11import numpy as np
2+ from numpy .random import default_rng
3+
4+ rng = default_rng (42 )
25
36
47class Dataloader :
@@ -39,7 +42,7 @@ def __init__(self, features: list[list[float]], labels: list[int]) -> None:
3942
4043 def get_train_test_data (
4144 self ,
42- ) -> tuple [list [ np .ndarray ], list [ np .ndarray ] , list [np .ndarray ], list [np .ndarray ]]:
45+ ) -> tuple [np .ndarray , np .ndarray , list [np .ndarray ], list [np .ndarray ]]:
4346 """
4447 Splits the data into training and testing sets.
4548 Here, we manually split the data.
@@ -136,8 +139,13 @@ class MLP:
136139 """
137140
138141 def __init__ (
139- self , dataloader , epoch : int , learning_rate : float , gamma = 1 , hidden_dim = 2
140- ):
142+ self ,
143+ dataloader : Dataloader ,
144+ epoch : int ,
145+ learning_rate : float ,
146+ gamma : float = 1.0 ,
147+ hidden_dim : int = 2 ,
148+ ) -> None :
141149 self .learning_rate = learning_rate
142150 self .gamma = gamma # learning_rate decay hyperparameter gamma
143151 self .epoch = epoch
@@ -173,23 +181,24 @@ def initialize(self) -> tuple[np.ndarray, np.ndarray]:
173181 """
174182 Initialize weights using He initialization.
175183
176- :return: Tuple of weights (W1, W2 ) for the network.
184+ :return: Tuple of weights (w1, w2 ) for the network.
177185
178186 >>> X = [[0.0, 0.0], [1.0, 1.0], [1.0, 0.0], [0.0, 1.0]]
179187 >>> y = [0, 1, 0, 0]
180188 >>> loader = Dataloader(X, y)
181189 >>> mlp = MLP(loader, 10, 0.1)
182- >>> W1, W2 = mlp.initialize()
183- >>> W1 .shape
190+ >>> w1, w2 = mlp.initialize()
191+ >>> w1 .shape
184192 (3, 2)
185- >>> W2 .shape
193+ >>> w2 .shape
186194 (2, 3)
187195 """
188196
189197 in_dim , out_dim = self .dataloader .get_inout_dim ()
190- w1 = np .random .Generator (in_dim + 1 , self .hidden_dim ) * 0.01
191-
192- w2 = np .random .Generator (self .hidden_dim , out_dim ) * 0.01
198+ w1 = rng .standard_normal ((in_dim + 1 , self .hidden_dim )) * np .sqrt (2.0 / in_dim )
199+ w2 = rng .standard_normal ((self .hidden_dim , out_dim )) * np .sqrt (
200+ 2.0 / self .hidden_dim
201+ )
193202 return w1 , w2
194203
195204 def relu (self , input_array : np .ndarray ) -> np .ndarray :
@@ -284,6 +293,7 @@ def back_prop(
284293 >>> mlp = MLP(None, 1, 0.1, hidden_dim=2)
285294 >>> x = np.array([[1.0, 2.0, 1.0]]) # batch_size=1, input_dim=2 + bias
286295 >>> y = np.array([[0.0, 1.0]]) # batch_size=1, output_dim=2
296+ >>> w1 = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]])
287297 >>> w2 = np.array([[0.7, 0.8], [0.9, 1.0]]) # (hidden_dim=2, output_dim=2)
288298 >>> _ = mlp.forward(x, w1, w2) # Run forward to set inter_variable
289299 >>> grad_w1, grad_w2 = mlp.back_prop(x, y, w2)
@@ -394,7 +404,7 @@ def accuracy(label: np.ndarray, y_hat: np.ndarray) -> float:
394404 >>> label = np.array([[1, 0], [0, 1], [1, 0]])
395405 >>> y_hat = np.array([[0.9, 0.1], [0.2, 0.8], [0.6, 0.4]])
396406 >>> mlp.accuracy(label, y_hat)
397- 1.0
407+ np.float64( 1.0)
398408 """
399409 return (y_hat .argmax (axis = 1 ) == label .argmax (axis = 1 )).mean ()
400410
@@ -415,7 +425,7 @@ def loss(output: np.ndarray, label: np.ndarray) -> float:
415425 >>> output = np.array([[0.9, 0.1], [0.2, 0.8]])
416426 >>> label = np.array([[1.0, 0.0], [0.0, 1.0]])
417427 >>> round(mlp.loss(output, label), 3)
418- 0.025
428+ np.float64( 0.025)
419429 """
420430 return np .sum ((output - label ) ** 2 ) / (2 * label .shape [0 ])
421431
0 commit comments