55 lines
873 B
Go
55 lines
873 B
Go
|
package ucb
|
||
|
|
||
|
import (
|
||
|
"errors"
|
||
|
"math"
|
||
|
|
||
|
"tuxpa.in/a/gambit/helper"
|
||
|
)
|
||
|
|
||
|
type UCB struct {
|
||
|
cr helper.CountReward
|
||
|
}
|
||
|
|
||
|
func (ucb *UCB) Select(r float64) int {
|
||
|
a := len(ucb.cr.Counts)
|
||
|
for _, v := range ucb.cr.Counts {
|
||
|
if v == 0 {
|
||
|
return a
|
||
|
}
|
||
|
}
|
||
|
sz := len(ucb.cr.Counts)
|
||
|
var res float64
|
||
|
for _, v := range ucb.cr.Counts {
|
||
|
ans := math.Sqrt((2.0 * math.Log(float64(sz))) / float64(v))
|
||
|
if ans > res {
|
||
|
res = ans
|
||
|
}
|
||
|
}
|
||
|
return int(res)
|
||
|
}
|
||
|
|
||
|
func (u *UCB) Update(a int, r float64) error {
|
||
|
if a < 0 || a >= len(u.cr.Rewards) || r < 0 {
|
||
|
return errors.New("TODO")
|
||
|
}
|
||
|
u.cr.Counts[a]++
|
||
|
dec := float64(u.cr.Counts[a])
|
||
|
u.cr.Rewards[a] = (u.cr.Rewards[a]*(dec-1) + r) / dec
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
func (u *UCB) Reset(n int) error {
|
||
|
u.cr.ResetTo(n)
|
||
|
return nil
|
||
|
|
||
|
}
|
||
|
|
||
|
func (u *UCB) Count(res *[]int) {
|
||
|
u.cr.Count(res)
|
||
|
}
|
||
|
|
||
|
func (u *UCB) Reward(res *[]float64) {
|
||
|
u.cr.Reward(res)
|
||
|
}
|