gambit/algo/epsilon_greedy.go

43 lines
713 B
Go
Raw Normal View History

2023-07-06 08:59:17 +00:00
package algo
import (
"lukechampine.com/frand"
2023-07-06 09:48:20 +00:00
"tuxpa.in/a/gambit"
2023-07-06 08:59:17 +00:00
"tuxpa.in/a/gambit/helper"
)
2023-07-06 09:48:20 +00:00
var _ gambit.Bandit = (*EpsilonGreedy)(nil)
2023-07-06 08:59:17 +00:00
type EpsilonGreedy struct {
Epsilon float64
cr helper.CountReward
}
func (u *EpsilonGreedy) Select(r float64) int {
if r > u.Epsilon {
return int(u.cr.RewardMax())
}
return frand.Intn(u.cr.Size())
}
func (u *EpsilonGreedy) Update(a int, r float64) error {
return u.cr.Update(a, r)
}
func (u *EpsilonGreedy) Reset(n int) error {
u.cr.ResetTo(n)
return nil
}
func (u *EpsilonGreedy) Size() int {
return u.cr.Size()
}
func (u *EpsilonGreedy) Count(res []int) {
u.cr.Count(res)
}
func (u *EpsilonGreedy) Reward(res []float64) {
u.cr.Reward(res)
}