From 94c7f324e2071e57035b12b1aa6b778c5536b200 Mon Sep 17 00:00:00 2001 From: sanskarmodi8 Date: Wed, 1 Oct 2025 11:17:40 +0530 Subject: [PATCH 1/4] added mini batch gradient descent algo in ml dir --- .../mini_batch_gradient_descent.py | 65 +++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 machine_learning/mini_batch_gradient_descent.py diff --git a/machine_learning/mini_batch_gradient_descent.py b/machine_learning/mini_batch_gradient_descent.py new file mode 100644 index 000000000000..85de096350e4 --- /dev/null +++ b/machine_learning/mini_batch_gradient_descent.py @@ -0,0 +1,65 @@ +""" +Mini-Batch Gradient Descent : https://en.wikipedia.org/wiki/Stochastic_gradient_descent +Mini-batch gradient descent is an optimization method for training models +by splitting the data into small batches. +""" + +import numpy as np + + +def mini_batch_gradient_descent( + X: np.ndarray, y: np.ndarray, lr: float = 0.01, batch_size: int = 16, n_epochs: int = 50 +): + """ + Mini-Batch Gradient Descent for linear regression. + + Parameters + ---------- + X : np.ndarray + Feature matrix. + y : np.ndarray + Target values. + lr : float + Learning rate. + batch_size : int + Size of mini-batches. + n_epochs : int + Number of training epochs. + + Returns + ------- + weights : np.ndarray + Learned weights. + bias : float + Learned bias. + + Example + ------- + >>> import numpy as np + >>> X = np.array([[1],[2],[3],[4]]) + >>> y = np.array([2,4,6,8]) + >>> w, b = mini_batch_gradient_descent(X, y, lr=0.1, batch_size=2, n_epochs=100) + >>> round(w[0], 1) # slope close to 2 + 2.0 + """ + n_samples, n_features = X.shape + weights = np.zeros(n_features) + bias = 0 + + for _ in range(n_epochs): + indices = np.random.permutation(n_samples) + X_shuffled, y_shuffled = X[indices], y[indices] + for start in range(0, n_samples, batch_size): + end = start + batch_size + X_batch, y_batch = X_shuffled[start:end], y_shuffled[start:end] + y_pred = np.dot(X_batch, weights) + bias + error = y_pred - y_batch + weights -= lr * (X_batch.T @ error) / len(y_batch) + bias -= lr * np.mean(error) + return weights, bias + + +if __name__ == "__main__": + import doctest + + doctest.testmod() From a96fdd1ccc011b1b8f9391636567b6b740e9990a Mon Sep 17 00:00:00 2001 From: sanskarmodi8 Date: Wed, 1 Oct 2025 11:28:10 +0530 Subject: [PATCH 2/4] updated code --- .../mini_batch_gradient_descent.py | 39 +++++++++++-------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/machine_learning/mini_batch_gradient_descent.py b/machine_learning/mini_batch_gradient_descent.py index 85de096350e4..21ab8be0fc2d 100644 --- a/machine_learning/mini_batch_gradient_descent.py +++ b/machine_learning/mini_batch_gradient_descent.py @@ -5,21 +5,26 @@ """ import numpy as np +from typing import Tuple def mini_batch_gradient_descent( - X: np.ndarray, y: np.ndarray, lr: float = 0.01, batch_size: int = 16, n_epochs: int = 50 -): + feature_matrix: np.ndarray, + target_values: np.ndarray, + learning_rate: float = 0.01, + batch_size: int = 16, + n_epochs: int = 50, +) -> Tuple[np.ndarray, float]: """ Mini-Batch Gradient Descent for linear regression. Parameters ---------- - X : np.ndarray + feature_matrix : np.ndarray Feature matrix. - y : np.ndarray + target_values : np.ndarray Target values. - lr : float + learning_rate : float Learning rate. batch_size : int Size of mini-batches. @@ -38,28 +43,30 @@ def mini_batch_gradient_descent( >>> import numpy as np >>> X = np.array([[1],[2],[3],[4]]) >>> y = np.array([2,4,6,8]) - >>> w, b = mini_batch_gradient_descent(X, y, lr=0.1, batch_size=2, n_epochs=100) + >>> w, b = mini_batch_gradient_descent(X, y, learning_rate=0.1, batch_size=2, n_epochs=100) >>> round(w[0], 1) # slope close to 2 2.0 """ - n_samples, n_features = X.shape + n_samples, n_features = feature_matrix.shape weights = np.zeros(n_features) bias = 0 for _ in range(n_epochs): indices = np.random.permutation(n_samples) - X_shuffled, y_shuffled = X[indices], y[indices] - for start in range(0, n_samples, batch_size): - end = start + batch_size - X_batch, y_batch = X_shuffled[start:end], y_shuffled[start:end] - y_pred = np.dot(X_batch, weights) + bias - error = y_pred - y_batch - weights -= lr * (X_batch.T @ error) / len(y_batch) - bias -= lr * np.mean(error) + shuffled_features = feature_matrix[indices] + shuffled_targets = target_values[indices] + for start_idx in range(0, n_samples, batch_size): + end_idx = start_idx + batch_size + batch_features = shuffled_features[start_idx:end_idx] + batch_targets = shuffled_targets[start_idx:end_idx] + predictions = np.dot(batch_features, weights) + bias + errors = predictions - batch_targets + weights -= learning_rate * (batch_features.T @ errors) / len(batch_targets) + bias -= learning_rate * np.mean(errors) return weights, bias if __name__ == "__main__": import doctest - doctest.testmod() + doctest.testmod() \ No newline at end of file From 15d9f9b8b408a2f627f22ec1b5788c379be817c0 Mon Sep 17 00:00:00 2001 From: sanskarmodi8 Date: Wed, 1 Oct 2025 11:31:17 +0530 Subject: [PATCH 3/4] updated code quality --- .../mini_batch_gradient_descent.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/machine_learning/mini_batch_gradient_descent.py b/machine_learning/mini_batch_gradient_descent.py index 21ab8be0fc2d..2bd1ac2a0cbc 100644 --- a/machine_learning/mini_batch_gradient_descent.py +++ b/machine_learning/mini_batch_gradient_descent.py @@ -5,7 +5,6 @@ """ import numpy as np -from typing import Tuple def mini_batch_gradient_descent( @@ -14,7 +13,7 @@ def mini_batch_gradient_descent( learning_rate: float = 0.01, batch_size: int = 16, n_epochs: int = 50, -) -> Tuple[np.ndarray, float]: +) -> tuple[np.ndarray, float]: """ Mini-Batch Gradient Descent for linear regression. @@ -41,9 +40,11 @@ def mini_batch_gradient_descent( Example ------- >>> import numpy as np - >>> X = np.array([[1],[2],[3],[4]]) - >>> y = np.array([2,4,6,8]) - >>> w, b = mini_batch_gradient_descent(X, y, learning_rate=0.1, batch_size=2, n_epochs=100) + >>> X = np.array([[1], [2], [3], [4]]) + >>> y = np.array([2, 4, 6, 8]) + >>> w, b = mini_batch_gradient_descent( + ... X, y, learning_rate=0.1, batch_size=2, n_epochs=100 + ... ) >>> round(w[0], 1) # slope close to 2 2.0 """ @@ -51,10 +52,13 @@ def mini_batch_gradient_descent( weights = np.zeros(n_features) bias = 0 + rng = np.random.default_rng() + for _ in range(n_epochs): - indices = np.random.permutation(n_samples) + indices = rng.permutation(n_samples) shuffled_features = feature_matrix[indices] shuffled_targets = target_values[indices] + for start_idx in range(0, n_samples, batch_size): end_idx = start_idx + batch_size batch_features = shuffled_features[start_idx:end_idx] @@ -63,10 +67,11 @@ def mini_batch_gradient_descent( errors = predictions - batch_targets weights -= learning_rate * (batch_features.T @ errors) / len(batch_targets) bias -= learning_rate * np.mean(errors) + return weights, bias if __name__ == "__main__": import doctest - doctest.testmod() \ No newline at end of file + doctest.testmod() From f3a8909ac83b4b44f1f8f188796636378e96c73d Mon Sep 17 00:00:00 2001 From: sanskarmodi8 Date: Wed, 1 Oct 2025 11:36:57 +0530 Subject: [PATCH 4/4] updated docs --- .../mini_batch_gradient_descent.py | 29 ++++++++++++------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/machine_learning/mini_batch_gradient_descent.py b/machine_learning/mini_batch_gradient_descent.py index 2bd1ac2a0cbc..0c93e0a0b63a 100644 --- a/machine_learning/mini_batch_gradient_descent.py +++ b/machine_learning/mini_batch_gradient_descent.py @@ -4,6 +4,8 @@ by splitting the data into small batches. """ +from __future__ import annotations + import numpy as np @@ -13,6 +15,7 @@ def mini_batch_gradient_descent( learning_rate: float = 0.01, batch_size: int = 16, n_epochs: int = 50, + random_seed: int | None = None, ) -> tuple[np.ndarray, float]: """ Mini-Batch Gradient Descent for linear regression. @@ -29,6 +32,8 @@ def mini_batch_gradient_descent( Size of mini-batches. n_epochs : int Number of training epochs. + random_seed : int | None + Random seed for reproducibility. Returns ------- @@ -43,29 +48,31 @@ def mini_batch_gradient_descent( >>> X = np.array([[1], [2], [3], [4]]) >>> y = np.array([2, 4, 6, 8]) >>> w, b = mini_batch_gradient_descent( - ... X, y, learning_rate=0.1, batch_size=2, n_epochs=100 + ... X, y, learning_rate=0.1, batch_size=2, n_epochs=100, random_seed=42 ... ) - >>> round(w[0], 1) # slope close to 2 + >>> round(float(w[0]), 1) # slope close to 2 2.0 """ n_samples, n_features = feature_matrix.shape weights = np.zeros(n_features) - bias = 0 + bias = 0.0 - rng = np.random.default_rng() + rng = np.random.default_rng(random_seed) for _ in range(n_epochs): indices = rng.permutation(n_samples) - shuffled_features = feature_matrix[indices] - shuffled_targets = target_values[indices] + feature_matrix_shuffled = feature_matrix[indices] + target_values_shuffled = target_values[indices] for start_idx in range(0, n_samples, batch_size): end_idx = start_idx + batch_size - batch_features = shuffled_features[start_idx:end_idx] - batch_targets = shuffled_targets[start_idx:end_idx] - predictions = np.dot(batch_features, weights) + bias - errors = predictions - batch_targets - weights -= learning_rate * (batch_features.T @ errors) / len(batch_targets) + feature_batch = feature_matrix_shuffled[start_idx:end_idx] + target_batch = target_values_shuffled[start_idx:end_idx] + + predictions = np.dot(feature_batch, weights) + bias + errors = predictions - target_batch + + weights -= learning_rate * (feature_batch.T @ errors) / len(target_batch) bias -= learning_rate * np.mean(errors) return weights, bias