@@ -33,7 +33,7 @@ class Bandit:
3333 A class to represent a multi-armed bandit.
3434 """
3535
36- def __init__ (self , probabilities : list [float ]):
36+ def __init__ (self , probabilities : list [float ]) -> None :
3737 """
3838 Initialize the bandit with a list of probabilities for each arm.
3939
@@ -72,7 +72,7 @@ class EpsilonGreedy:
7272 https://medium.com/analytics-vidhya/the-epsilon-greedy-algorithm-for-reinforcement-learning-5fe6f96dc870
7373 """
7474
75- def __init__ (self , epsilon : float , k : int ):
75+ def __init__ (self , epsilon : float , k : int ) -> None :
7676 """
7777 Initialize the Epsilon-Greedy strategy.
7878
@@ -85,7 +85,7 @@ def __init__(self, epsilon: float, k: int):
8585 self .counts = np .zeros (k )
8686 self .values = np .zeros (k )
8787
88- def select_arm (self ):
88+ def select_arm (self ) -> int :
8989 """
9090 Select an arm to pull.
9191
@@ -104,7 +104,7 @@ def select_arm(self):
104104 else :
105105 return np .argmax (self .values )
106106
107- def update (self , arm_index : int , reward : int ):
107+ def update (self , arm_index : int , reward : int ) -> None :
108108 """
109109 Update the strategy.
110110
@@ -133,7 +133,7 @@ class UCB:
133133 https://people.maths.bris.ac.uk/~maajg/teaching/stochopt/ucb.pdf
134134 """
135135
136- def __init__ (self , k : int ):
136+ def __init__ (self , k : int ) -> None :
137137 """
138138 Initialize the UCB strategy.
139139
@@ -145,7 +145,7 @@ def __init__(self, k: int):
145145 self .values = np .zeros (k )
146146 self .total_counts = 0
147147
148- def select_arm (self ):
148+ def select_arm (self ) -> int :
149149 """
150150 Select an arm to pull.
151151
@@ -159,10 +159,11 @@ def select_arm(self):
159159 """
160160 if self .total_counts < self .k :
161161 return self .total_counts
162- ucb_values = self .values + np .sqrt (2 * np .log (self .total_counts ) / self .counts )
162+ ucb_values = self .values + \
163+ np .sqrt (2 * np .log (self .total_counts ) / self .counts )
163164 return np .argmax (ucb_values )
164165
165- def update (self , arm_index : int , reward : int ):
166+ def update (self , arm_index : int , reward : int ) -> None :
166167 """
167168 Update the strategy.
168169
@@ -192,7 +193,7 @@ class ThompsonSampling:
192193 https://en.wikipedia.org/wiki/Thompson_sampling
193194 """
194195
195- def __init__ (self , k : int ):
196+ def __init__ (self , k : int ) -> None :
196197 """
197198 Initialize the Thompson Sampling strategy.
198199
@@ -203,7 +204,7 @@ def __init__(self, k: int):
203204 self .successes = np .zeros (k )
204205 self .failures = np .zeros (k )
205206
206- def select_arm (self ):
207+ def select_arm (self ) -> int :
207208 """
208209 Select an arm to pull.
209210
@@ -223,7 +224,7 @@ def select_arm(self):
223224 ]
224225 return np .argmax (samples )
225226
226- def update (self , arm_index : int , reward : int ):
227+ def update (self , arm_index : int , reward : int ) -> None :
227228 """
228229 Update the strategy.
229230
@@ -259,7 +260,7 @@ def __init__(self, k: int):
259260 """
260261 self .k = k
261262
262- def select_arm (self ):
263+ def select_arm (self ) -> int :
263264 """
264265 Select an arm to pull.
265266
@@ -274,7 +275,7 @@ def select_arm(self):
274275 rng = np .random .default_rng ()
275276 return rng .integers (self .k )
276277
277- def update (self , arm_index : int , reward : int ):
278+ def update (self , arm_index : int , reward : int ) -> None :
278279 """
279280 Update the strategy.
280281
@@ -308,7 +309,7 @@ def __init__(self, k: int):
308309 self .counts = np .zeros (k )
309310 self .values = np .zeros (k )
310311
311- def select_arm (self ):
312+ def select_arm (self ) -> int :
312313 """
313314 Select an arm to pull.
314315
@@ -322,7 +323,7 @@ def select_arm(self):
322323 """
323324 return np .argmax (self .values )
324325
325- def update (self , arm_index : int , reward : int ):
326+ def update (self , arm_index : int , reward : int ) -> None :
326327 """
327328 Update the strategy.
328329
0 commit comments