55"""
66
77import numpy as np
8+ from typing import Tuple
89
910
1011def mini_batch_gradient_descent (
11- X : np .ndarray , y : np .ndarray , lr : float = 0.01 , batch_size : int = 16 , n_epochs : int = 50
12- ):
12+ feature_matrix : np .ndarray ,
13+ target_values : np .ndarray ,
14+ learning_rate : float = 0.01 ,
15+ batch_size : int = 16 ,
16+ n_epochs : int = 50 ,
17+ ) -> Tuple [np .ndarray , float ]:
1318 """
1419 Mini-Batch Gradient Descent for linear regression.
1520
1621 Parameters
1722 ----------
18- X : np.ndarray
23+ feature_matrix : np.ndarray
1924 Feature matrix.
20- y : np.ndarray
25+ target_values : np.ndarray
2126 Target values.
22- lr : float
27+ learning_rate : float
2328 Learning rate.
2429 batch_size : int
2530 Size of mini-batches.
@@ -38,28 +43,30 @@ def mini_batch_gradient_descent(
3843 >>> import numpy as np
3944 >>> X = np.array([[1],[2],[3],[4]])
4045 >>> y = np.array([2,4,6,8])
41- >>> w, b = mini_batch_gradient_descent(X, y, lr =0.1, batch_size=2, n_epochs=100)
46+ >>> w, b = mini_batch_gradient_descent(X, y, learning_rate =0.1, batch_size=2, n_epochs=100)
4247 >>> round(w[0], 1) # slope close to 2
4348 2.0
4449 """
45- n_samples , n_features = X .shape
50+ n_samples , n_features = feature_matrix .shape
4651 weights = np .zeros (n_features )
4752 bias = 0
4853
4954 for _ in range (n_epochs ):
5055 indices = np .random .permutation (n_samples )
51- X_shuffled , y_shuffled = X [indices ], y [indices ]
52- for start in range (0 , n_samples , batch_size ):
53- end = start + batch_size
54- X_batch , y_batch = X_shuffled [start :end ], y_shuffled [start :end ]
55- y_pred = np .dot (X_batch , weights ) + bias
56- error = y_pred - y_batch
57- weights -= lr * (X_batch .T @ error ) / len (y_batch )
58- bias -= lr * np .mean (error )
56+ shuffled_features = feature_matrix [indices ]
57+ shuffled_targets = target_values [indices ]
58+ for start_idx in range (0 , n_samples , batch_size ):
59+ end_idx = start_idx + batch_size
60+ batch_features = shuffled_features [start_idx :end_idx ]
61+ batch_targets = shuffled_targets [start_idx :end_idx ]
62+ predictions = np .dot (batch_features , weights ) + bias
63+ errors = predictions - batch_targets
64+ weights -= learning_rate * (batch_features .T @ errors ) / len (batch_targets )
65+ bias -= learning_rate * np .mean (errors )
5966 return weights , bias
6067
6168
6269if __name__ == "__main__" :
6370 import doctest
6471
65- doctest .testmod ()
72+ doctest .testmod ()
0 commit comments