44by splitting the data into small batches.
55"""
66
7+ from __future__ import annotations
8+
79import numpy as np
810
911
@@ -13,6 +15,7 @@ def mini_batch_gradient_descent(
1315 learning_rate : float = 0.01 ,
1416 batch_size : int = 16 ,
1517 n_epochs : int = 50 ,
18+ random_seed : int | None = None ,
1619) -> tuple [np .ndarray , float ]:
1720 """
1821 Mini-Batch Gradient Descent for linear regression.
@@ -29,6 +32,8 @@ def mini_batch_gradient_descent(
2932 Size of mini-batches.
3033 n_epochs : int
3134 Number of training epochs.
35+ random_seed : int | None
36+ Random seed for reproducibility.
3237
3338 Returns
3439 -------
@@ -43,29 +48,31 @@ def mini_batch_gradient_descent(
4348 >>> X = np.array([[1], [2], [3], [4]])
4449 >>> y = np.array([2, 4, 6, 8])
4550 >>> w, b = mini_batch_gradient_descent(
46- ... X, y, learning_rate=0.1, batch_size=2, n_epochs=100
51+ ... X, y, learning_rate=0.1, batch_size=2, n_epochs=100, random_seed=42
4752 ... )
48- >>> round(w[0], 1) # slope close to 2
53+ >>> round(float( w[0]) , 1) # slope close to 2
4954 2.0
5055 """
5156 n_samples , n_features = feature_matrix .shape
5257 weights = np .zeros (n_features )
53- bias = 0
58+ bias = 0.0
5459
55- rng = np .random .default_rng ()
60+ rng = np .random .default_rng (random_seed )
5661
5762 for _ in range (n_epochs ):
5863 indices = rng .permutation (n_samples )
59- shuffled_features = feature_matrix [indices ]
60- shuffled_targets = target_values [indices ]
64+ feature_matrix_shuffled = feature_matrix [indices ]
65+ target_values_shuffled = target_values [indices ]
6166
6267 for start_idx in range (0 , n_samples , batch_size ):
6368 end_idx = start_idx + batch_size
64- batch_features = shuffled_features [start_idx :end_idx ]
65- batch_targets = shuffled_targets [start_idx :end_idx ]
66- predictions = np .dot (batch_features , weights ) + bias
67- errors = predictions - batch_targets
68- weights -= learning_rate * (batch_features .T @ errors ) / len (batch_targets )
69+ feature_batch = feature_matrix_shuffled [start_idx :end_idx ]
70+ target_batch = target_values_shuffled [start_idx :end_idx ]
71+
72+ predictions = np .dot (feature_batch , weights ) + bias
73+ errors = predictions - target_batch
74+
75+ weights -= learning_rate * (feature_batch .T @ errors ) / len (target_batch )
6976 bias -= learning_rate * np .mean (errors )
7077
7178 return weights , bias
0 commit comments