Python/machine_learning/mini_batch_gradient_descent.py at f3a8909ac83b4b44f1f8f188796636378e96c73d · TheAlgorithms/Python · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
"""
Mini-Batch Gradient Descent : https://en.wikipedia.org/wiki/Stochastic_gradient_descent
Mini-batch gradient descent is an optimization method for training models
by splitting the data into small batches.
"""

from __future__ import annotations

import numpy as np


def mini_batch_gradient_descent(
    feature_matrix: np.ndarray,
    target_values: np.ndarray,
    learning_rate: float = 0.01,
    batch_size: int = 16,
    n_epochs: int = 50,
    random_seed: int | None = None,
) -> tuple[np.ndarray, float]:
    """
    Mini-Batch Gradient Descent for linear regression.

    Parameters
    ----------
    feature_matrix : np.ndarray
        Feature matrix.
    target_values : np.ndarray
        Target values.
    learning_rate : float
        Learning rate.
    batch_size : int
        Size of mini-batches.
    n_epochs : int
        Number of training epochs.
    random_seed : int | None
        Random seed for reproducibility.

    Returns
    -------
    weights : np.ndarray
        Learned weights.
    bias : float
        Learned bias.

    Example
    -------
    >>> import numpy as np
    >>> X = np.array([[1], [2], [3], [4]])
    >>> y = np.array([2, 4, 6, 8])
    >>> w, b = mini_batch_gradient_descent(
    ...     X, y, learning_rate=0.1, batch_size=2, n_epochs=100, random_seed=42
    ... )
    >>> round(float(w[0]), 1)  # slope close to 2
    2.0
    """
    n_samples, n_features = feature_matrix.shape
    weights = np.zeros(n_features)
    bias = 0.0

    rng = np.random.default_rng(random_seed)

    for _ in range(n_epochs):
        indices = rng.permutation(n_samples)
        feature_matrix_shuffled = feature_matrix[indices]
        target_values_shuffled = target_values[indices]

        for start_idx in range(0, n_samples, batch_size):
            end_idx = start_idx + batch_size
            feature_batch = feature_matrix_shuffled[start_idx:end_idx]
            target_batch = target_values_shuffled[start_idx:end_idx]

            predictions = np.dot(feature_batch, weights) + bias
            errors = predictions - target_batch

            weights -= learning_rate * (feature_batch.T @ errors) / len(target_batch)
            bias -= learning_rate * np.mean(errors)

    return weights, bias


if __name__ == "__main__":
    import doctest

    doctest.testmod()