Skip to content

Commit 3d13432

Browse files
Update linear_regression.py
1 parent a71618f commit 3d13432

1 file changed

Lines changed: 41 additions & 102 deletions

File tree

machine_learning/linear_regression.py

Lines changed: 41 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -21,127 +21,66 @@
2121

2222

2323
def collect_dataset():
24-
"""Collect dataset of CSGO
25-
The dataset contains ADR vs Rating of a Player
26-
:return : dataset obtained from the link, as matrix
27-
"""
24+
"""Collect dataset of CSGO (ADR vs Rating)."""
2825
response = httpx.get(
29-
"https://raw.githubusercontent.com/yashLadha/The_Math_of_Intelligence/"
30-
"master/Week1/ADRvsRating.csv",
26+
"https://raw.githubusercontent.com/yashLadha/The_Math_of_Intelligence/master/Week1/ADRvsRating.csv",
3127
timeout=10,
3228
)
33-
lines = response.text.splitlines()
34-
data = []
35-
for item in lines:
36-
item = item.split(",")
37-
data.append(item)
38-
data.pop(0) # This is for removing the labels from the list
39-
dataset = np.matrix(data)
40-
return dataset
41-
42-
43-
def run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta):
44-
"""Run steep gradient descent and updates the Feature vector accordingly_
45-
:param data_x : contains the dataset
46-
:param data_y : contains the output associated with each data-entry
47-
:param len_data : length of the data_
48-
:param alpha : Learning rate of the model
49-
:param theta : Feature vector (weight's for our model)
50-
;param return : Updated Feature's, using
51-
curr_features - alpha_ * gradient(w.r.t. feature)
52-
>>> import numpy as np
53-
>>> data_x = np.array([[1, 2], [3, 4]])
54-
>>> data_y = np.array([5, 6])
55-
>>> len_data = len(data_x)
56-
>>> alpha = 0.01
57-
>>> theta = np.array([0.1, 0.2])
58-
>>> run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta)
59-
array([0.196, 0.343])
60-
"""
61-
n = len_data
62-
63-
prod = np.dot(theta, data_x.transpose())
64-
prod -= data_y.transpose()
65-
sum_grad = np.dot(prod, data_x)
66-
theta = theta - (alpha / n) * sum_grad
29+
lines = response.text.strip().splitlines()
30+
data = [line.split(",") for line in lines[1:]] # skip header
31+
return np.array(data, dtype=float)
32+
33+
34+
def run_steep_gradient_descent(data_x, data_y, alpha, theta):
35+
"""Perform one step of gradient descent."""
36+
n = data_x.shape[0]
37+
predictions = data_x @ theta.T
38+
errors = predictions.flatten() - data_y
39+
gradient = (1 / n) * (errors @ data_x)
40+
theta = theta - alpha * gradient
6741
return theta
6842

6943

70-
def sum_of_square_error(data_x, data_y, len_data, theta):
71-
"""Return sum of square error for error calculation
72-
:param data_x : contains our dataset
73-
:param data_y : contains the output (result vector)
74-
:param len_data : len of the dataset
75-
:param theta : contains the feature vector
76-
:return : sum of square error computed from given feature's
77-
78-
Example:
79-
>>> vc_x = np.array([[1.1], [2.1], [3.1]])
80-
>>> vc_y = np.array([1.2, 2.2, 3.2])
81-
>>> round(sum_of_square_error(vc_x, vc_y, 3, np.array([1])),3)
82-
np.float64(0.005)
83-
"""
84-
prod = np.dot(theta, data_x.transpose())
85-
prod -= data_y.transpose()
86-
sum_elem = np.sum(np.square(prod))
87-
error = sum_elem / (2 * len_data)
88-
return error
89-
90-
91-
def run_linear_regression(data_x, data_y):
92-
"""Implement Linear regression over the dataset
93-
:param data_x : contains our dataset
94-
:param data_y : contains the output (result vector)
95-
:return : feature for line of best fit (Feature vector)
96-
"""
97-
iterations = 100000
98-
alpha = 0.0001550
99-
100-
no_features = data_x.shape[1]
101-
len_data = data_x.shape[0] - 1
102-
103-
theta = np.zeros((1, no_features))
44+
def sum_of_square_error(data_x, data_y, theta):
45+
"""Compute mean squared error."""
46+
n = data_x.shape[0]
47+
predictions = data_x @ theta.T
48+
errors = predictions.flatten() - data_y
49+
return np.sum(errors ** 2) / (2 * n)
10450

105-
for i in range(iterations):
106-
theta = run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta)
107-
error = sum_of_square_error(data_x, data_y, len_data, theta)
108-
print(f"At Iteration {i + 1} - Error is {error:.5f}")
10951

52+
def run_linear_regression(data_x, data_y, iterations=100000, alpha=0.000155):
53+
"""Run gradient descent to learn parameters."""
54+
theta = np.zeros((1, data_x.shape[1]))
55+
for i in range(iterations):
56+
theta = run_steep_gradient_descent(data_x, data_y, alpha, theta)
57+
error = sum_of_square_error(data_x, data_y, theta)
58+
print(f"Iteration {i + 1}: Error = {error:.5f}")
11059
return theta
11160

11261

11362
def mean_absolute_error(predicted_y, original_y):
114-
"""Return sum of square error for error calculation
115-
:param predicted_y : contains the output of prediction (result vector)
116-
:param original_y : contains values of expected outcome
117-
:return : mean absolute error computed from given feature's
118-
119-
>>> predicted_y = [3, -0.5, 2, 7]
120-
>>> original_y = [2.5, 0.0, 2, 8]
121-
>>> mean_absolute_error(predicted_y, original_y)
122-
0.5
123-
"""
124-
total = sum(abs(y - predicted_y[i]) for i, y in enumerate(original_y))
125-
return total / len(original_y)
63+
"""Compute MAE (fully vectorized)."""
64+
predicted_y = np.array(predicted_y)
65+
original_y = np.array(original_y)
66+
return np.mean(np.abs(predicted_y - original_y))
12667

12768

12869
def main():
129-
"""Driver function"""
13070
data = collect_dataset()
131-
132-
len_data = data.shape[0]
133-
data_x = np.c_[np.ones(len_data), data[:, :-1]].astype(float)
134-
data_y = data[:, -1].astype(float)
71+
data_x = np.c_[np.ones(data.shape[0]), data[:, 0]] # Add bias term
72+
data_y = data[:, 1] # Rating
13573

13674
theta = run_linear_regression(data_x, data_y)
137-
len_result = theta.shape[1]
138-
print("Resultant Feature vector : ")
139-
for i in range(len_result):
140-
print(f"{theta[0, i]:.5f}")
14175

76+
print("Learned Parameters (theta):")
77+
for val in theta[0]:
78+
print(f"{val:.5f}")
79+
80+
predictions = data_x @ theta.T
81+
mae = mean_absolute_error(predictions.flatten(), data_y)
82+
print(f"Mean Absolute Error: {mae:.5f}")
14283

143-
if __name__ == "__main__":
144-
import doctest
14584

146-
doctest.testmod()
85+
if __name__ == "__main__":
14786
main()

0 commit comments

Comments
 (0)