-
-
Notifications
You must be signed in to change notification settings - Fork 50.5k
Expand file tree
/
Copy pathmini_batch_gradient_descent.py
More file actions
84 lines (68 loc) · 2.26 KB
/
mini_batch_gradient_descent.py
File metadata and controls
84 lines (68 loc) · 2.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
"""
Mini-Batch Gradient Descent : https://en.wikipedia.org/wiki/Stochastic_gradient_descent
Mini-batch gradient descent is an optimization method for training models
by splitting the data into small batches.
"""
from __future__ import annotations
import numpy as np
def mini_batch_gradient_descent(
feature_matrix: np.ndarray,
target_values: np.ndarray,
learning_rate: float = 0.01,
batch_size: int = 16,
n_epochs: int = 50,
random_seed: int | None = None,
) -> tuple[np.ndarray, float]:
"""
Mini-Batch Gradient Descent for linear regression.
Parameters
----------
feature_matrix : np.ndarray
Feature matrix.
target_values : np.ndarray
Target values.
learning_rate : float
Learning rate.
batch_size : int
Size of mini-batches.
n_epochs : int
Number of training epochs.
random_seed : int | None
Random seed for reproducibility.
Returns
-------
weights : np.ndarray
Learned weights.
bias : float
Learned bias.
Example
-------
>>> import numpy as np
>>> X = np.array([[1], [2], [3], [4]])
>>> y = np.array([2, 4, 6, 8])
>>> w, b = mini_batch_gradient_descent(
... X, y, learning_rate=0.1, batch_size=2, n_epochs=100, random_seed=42
... )
>>> round(float(w[0]), 1) # slope close to 2
2.0
"""
n_samples, n_features = feature_matrix.shape
weights = np.zeros(n_features)
bias = 0.0
rng = np.random.default_rng(random_seed)
for _ in range(n_epochs):
indices = rng.permutation(n_samples)
feature_matrix_shuffled = feature_matrix[indices]
target_values_shuffled = target_values[indices]
for start_idx in range(0, n_samples, batch_size):
end_idx = start_idx + batch_size
feature_batch = feature_matrix_shuffled[start_idx:end_idx]
target_batch = target_values_shuffled[start_idx:end_idx]
predictions = np.dot(feature_batch, weights) + bias
errors = predictions - target_batch
weights -= learning_rate * (feature_batch.T @ errors) / len(target_batch)
bias -= learning_rate * np.mean(errors)
return weights, bias
if __name__ == "__main__":
import doctest
doctest.testmod()