|
21 | 21 |
|
22 | 22 |
|
23 | 23 | def collect_dataset(): |
24 | | - """Collect dataset of CSGO |
25 | | - The dataset contains ADR vs Rating of a Player |
26 | | - :return : dataset obtained from the link, as matrix |
27 | | - """ |
| 24 | + """Collect dataset of CSGO (ADR vs Rating).""" |
28 | 25 | response = httpx.get( |
29 | | - "https://raw.githubusercontent.com/yashLadha/The_Math_of_Intelligence/" |
30 | | - "master/Week1/ADRvsRating.csv", |
| 26 | + "https://raw.githubusercontent.com/yashLadha/The_Math_of_Intelligence/master/Week1/ADRvsRating.csv", |
31 | 27 | timeout=10, |
32 | 28 | ) |
33 | | - lines = response.text.splitlines() |
34 | | - data = [] |
35 | | - for item in lines: |
36 | | - item = item.split(",") |
37 | | - data.append(item) |
38 | | - data.pop(0) # This is for removing the labels from the list |
39 | | - dataset = np.matrix(data) |
40 | | - return dataset |
41 | | - |
42 | | - |
43 | | -def run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta): |
44 | | - """Run steep gradient descent and updates the Feature vector accordingly_ |
45 | | - :param data_x : contains the dataset |
46 | | - :param data_y : contains the output associated with each data-entry |
47 | | - :param len_data : length of the data_ |
48 | | - :param alpha : Learning rate of the model |
49 | | - :param theta : Feature vector (weight's for our model) |
50 | | - ;param return : Updated Feature's, using |
51 | | - curr_features - alpha_ * gradient(w.r.t. feature) |
52 | | - >>> import numpy as np |
53 | | - >>> data_x = np.array([[1, 2], [3, 4]]) |
54 | | - >>> data_y = np.array([5, 6]) |
55 | | - >>> len_data = len(data_x) |
56 | | - >>> alpha = 0.01 |
57 | | - >>> theta = np.array([0.1, 0.2]) |
58 | | - >>> run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta) |
59 | | - array([0.196, 0.343]) |
60 | | - """ |
61 | | - n = len_data |
62 | | - |
63 | | - prod = np.dot(theta, data_x.transpose()) |
64 | | - prod -= data_y.transpose() |
65 | | - sum_grad = np.dot(prod, data_x) |
66 | | - theta = theta - (alpha / n) * sum_grad |
| 29 | + lines = response.text.strip().splitlines() |
| 30 | + data = [line.split(",") for line in lines[1:]] # skip header |
| 31 | + return np.array(data, dtype=float) |
| 32 | + |
| 33 | + |
| 34 | +def run_steep_gradient_descent(data_x, data_y, alpha, theta): |
| 35 | + """Perform one step of gradient descent.""" |
| 36 | + n = data_x.shape[0] |
| 37 | + predictions = data_x @ theta.T |
| 38 | + errors = predictions.flatten() - data_y |
| 39 | + gradient = (1 / n) * (errors @ data_x) |
| 40 | + theta = theta - alpha * gradient |
67 | 41 | return theta |
68 | 42 |
|
69 | 43 |
|
70 | | -def sum_of_square_error(data_x, data_y, len_data, theta): |
71 | | - """Return sum of square error for error calculation |
72 | | - :param data_x : contains our dataset |
73 | | - :param data_y : contains the output (result vector) |
74 | | - :param len_data : len of the dataset |
75 | | - :param theta : contains the feature vector |
76 | | - :return : sum of square error computed from given feature's |
77 | | -
|
78 | | - Example: |
79 | | - >>> vc_x = np.array([[1.1], [2.1], [3.1]]) |
80 | | - >>> vc_y = np.array([1.2, 2.2, 3.2]) |
81 | | - >>> round(sum_of_square_error(vc_x, vc_y, 3, np.array([1])),3) |
82 | | - np.float64(0.005) |
83 | | - """ |
84 | | - prod = np.dot(theta, data_x.transpose()) |
85 | | - prod -= data_y.transpose() |
86 | | - sum_elem = np.sum(np.square(prod)) |
87 | | - error = sum_elem / (2 * len_data) |
88 | | - return error |
89 | | - |
90 | | - |
91 | | -def run_linear_regression(data_x, data_y): |
92 | | - """Implement Linear regression over the dataset |
93 | | - :param data_x : contains our dataset |
94 | | - :param data_y : contains the output (result vector) |
95 | | - :return : feature for line of best fit (Feature vector) |
96 | | - """ |
97 | | - iterations = 100000 |
98 | | - alpha = 0.0001550 |
99 | | - |
100 | | - no_features = data_x.shape[1] |
101 | | - len_data = data_x.shape[0] - 1 |
102 | | - |
103 | | - theta = np.zeros((1, no_features)) |
| 44 | +def sum_of_square_error(data_x, data_y, theta): |
| 45 | + """Compute mean squared error.""" |
| 46 | + n = data_x.shape[0] |
| 47 | + predictions = data_x @ theta.T |
| 48 | + errors = predictions.flatten() - data_y |
| 49 | + return np.sum(errors ** 2) / (2 * n) |
104 | 50 |
|
105 | | - for i in range(iterations): |
106 | | - theta = run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta) |
107 | | - error = sum_of_square_error(data_x, data_y, len_data, theta) |
108 | | - print(f"At Iteration {i + 1} - Error is {error:.5f}") |
109 | 51 |
|
| 52 | +def run_linear_regression(data_x, data_y, iterations=100000, alpha=0.000155): |
| 53 | + """Run gradient descent to learn parameters.""" |
| 54 | + theta = np.zeros((1, data_x.shape[1])) |
| 55 | + for i in range(iterations): |
| 56 | + theta = run_steep_gradient_descent(data_x, data_y, alpha, theta) |
| 57 | + error = sum_of_square_error(data_x, data_y, theta) |
| 58 | + print(f"Iteration {i + 1}: Error = {error:.5f}") |
110 | 59 | return theta |
111 | 60 |
|
112 | 61 |
|
113 | 62 | def mean_absolute_error(predicted_y, original_y): |
114 | | - """Return sum of square error for error calculation |
115 | | - :param predicted_y : contains the output of prediction (result vector) |
116 | | - :param original_y : contains values of expected outcome |
117 | | - :return : mean absolute error computed from given feature's |
118 | | -
|
119 | | - >>> predicted_y = [3, -0.5, 2, 7] |
120 | | - >>> original_y = [2.5, 0.0, 2, 8] |
121 | | - >>> mean_absolute_error(predicted_y, original_y) |
122 | | - 0.5 |
123 | | - """ |
124 | | - total = sum(abs(y - predicted_y[i]) for i, y in enumerate(original_y)) |
125 | | - return total / len(original_y) |
| 63 | + """Compute MAE (fully vectorized).""" |
| 64 | + predicted_y = np.array(predicted_y) |
| 65 | + original_y = np.array(original_y) |
| 66 | + return np.mean(np.abs(predicted_y - original_y)) |
126 | 67 |
|
127 | 68 |
|
128 | 69 | def main(): |
129 | | - """Driver function""" |
130 | 70 | data = collect_dataset() |
131 | | - |
132 | | - len_data = data.shape[0] |
133 | | - data_x = np.c_[np.ones(len_data), data[:, :-1]].astype(float) |
134 | | - data_y = data[:, -1].astype(float) |
| 71 | + data_x = np.c_[np.ones(data.shape[0]), data[:, 0]] # Add bias term |
| 72 | + data_y = data[:, 1] # Rating |
135 | 73 |
|
136 | 74 | theta = run_linear_regression(data_x, data_y) |
137 | | - len_result = theta.shape[1] |
138 | | - print("Resultant Feature vector : ") |
139 | | - for i in range(len_result): |
140 | | - print(f"{theta[0, i]:.5f}") |
141 | 75 |
|
| 76 | + print("Learned Parameters (theta):") |
| 77 | + for val in theta[0]: |
| 78 | + print(f"{val:.5f}") |
| 79 | + |
| 80 | + predictions = data_x @ theta.T |
| 81 | + mae = mean_absolute_error(predictions.flatten(), data_y) |
| 82 | + print(f"Mean Absolute Error: {mae:.5f}") |
142 | 83 |
|
143 | | -if __name__ == "__main__": |
144 | | - import doctest |
145 | 84 |
|
146 | | - doctest.testmod() |
| 85 | +if __name__ == "__main__": |
147 | 86 | main() |
0 commit comments