Skip to content

Commit a5fa33e

Browse files
authored
Update pearson_correlation.py
1 parent 5dfd2d5 commit a5fa33e

1 file changed

Lines changed: 31 additions & 37 deletions

File tree

Lines changed: 31 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,53 +1,47 @@
11
import numpy as np
22

3-
4-
def pearson_correlation(x: np.ndarray, y: np.ndarray) -> float:
3+
def pearson_correlation(data_x: np.ndarray, data_y: np.ndarray) -> float:
54
"""
6-
Calculate the Pearson correlation coefficient (PCC) between two arrays.
7-
8-
Pearson correlation measures the linear relationship between two datasets,
9-
returning a value between -1 and 1:
10-
- 1 indicates a perfect positive linear correlation
11-
- 0 indicates no linear correlation
12-
- -1 indicates a perfect negative linear correlation
13-
14-
Formula:
15-
r = Σ((x - mean(x)) * (y - mean(y))) / sqrt(Σ(x - mean(x))^2 * Σ(y - mean(y))^2)
16-
17-
Reference: https://en.wikipedia.org/wiki/Pearson_correlation_coefficient
5+
Calculate the Pearson correlation coefficient between two sets of data.
186
197
Parameters:
20-
- x: 1D numpy array of values
21-
- y: 1D numpy array of values
8+
data_x (np.ndarray): Array of numeric values representing a column of data
9+
that will be compared with another column to determine
10+
how strongly the two vectors are related.
11+
data_y (np.ndarray): Array of numeric values representing the second column
12+
of data to compare with data_x.
2213
2314
Returns:
24-
- The Pearson correlation coefficient (float)
15+
float: Pearson correlation coefficient between data_x and data_y.
2516
26-
a = np.array([1, 2, 3, 4, 5])
27-
b = np.array([2, 4, 6, 8, 10])
28-
float(np.round(pearson_correlation(a, b), 5))
17+
Reference:
18+
https://en.wikipedia.org/wiki/Pearson_correlation_coefficient
19+
20+
Example:
21+
>>> data_x = np.array([1, 2, 3, 4, 5])
22+
>>> data_y = np.array([2, 4, 6, 8, 10])
23+
>>> round(pearson_correlation(data_x, data_y), 2)
2924
1.0
30-
a = np.array([1, 2, 3, 4, 5])
31-
b = np.array([10, 9, 2, 6, 4])
32-
float(np.round(pearson_correlation(a, b), 5))
33-
-0.18845
34-
a = np.array([1, 2, 3])
35-
b = np.array([1, 2])
36-
pearson_correlation(a, b)
37-
Traceback (most recent call last):
38-
...
39-
ValueError: Input arrays must have the same length.
4025
"""
41-
if len(x) != len(y):
42-
raise ValueError("Input arrays must have the same length.")
26+
if len(data_x) != len(data_y):
27+
raise ValueError("data_x and data_y must have the same length")
28+
29+
n = len(data_x)
30+
if n == 0:
31+
return 0.0
4332

44-
x_mean = np.mean(x)
45-
y_mean = np.mean(y)
33+
mean_x = np.mean(data_x)
34+
mean_y = np.mean(data_y)
4635

47-
numerator = np.sum((x - x_mean) * (y - y_mean))
48-
denominator = np.sqrt(np.sum((x - x_mean) ** 2) * np.sum((y - y_mean) ** 2))
36+
numerator = np.sum((data_x - mean_x) * (data_y - mean_y))
37+
denominator = np.sqrt(np.sum((data_x - mean_x)**2) * np.sum((data_y - mean_y)**2))
4938

5039
if denominator == 0:
51-
raise ValueError("Standard deviation of input arrays must not be zero.")
40+
return 0.0
5241

5342
return numerator / denominator
43+
44+
45+
if __name__ == "__main__":
46+
import doctest
47+
doctest.testmod()

0 commit comments

Comments
 (0)