Skip to content

Commit 5dfd2d5

Browse files
authored
Create pearson_correlation.py
1 parent a71618f commit 5dfd2d5

1 file changed

Lines changed: 53 additions & 0 deletions

File tree

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
import numpy as np
2+
3+
4+
def pearson_correlation(x: np.ndarray, y: np.ndarray) -> float:
5+
"""
6+
Calculate the Pearson correlation coefficient (PCC) between two arrays.
7+
8+
Pearson correlation measures the linear relationship between two datasets,
9+
returning a value between -1 and 1:
10+
- 1 indicates a perfect positive linear correlation
11+
- 0 indicates no linear correlation
12+
- -1 indicates a perfect negative linear correlation
13+
14+
Formula:
15+
r = Σ((x - mean(x)) * (y - mean(y))) / sqrt(Σ(x - mean(x))^2 * Σ(y - mean(y))^2)
16+
17+
Reference: https://en.wikipedia.org/wiki/Pearson_correlation_coefficient
18+
19+
Parameters:
20+
- x: 1D numpy array of values
21+
- y: 1D numpy array of values
22+
23+
Returns:
24+
- The Pearson correlation coefficient (float)
25+
26+
a = np.array([1, 2, 3, 4, 5])
27+
b = np.array([2, 4, 6, 8, 10])
28+
float(np.round(pearson_correlation(a, b), 5))
29+
1.0
30+
a = np.array([1, 2, 3, 4, 5])
31+
b = np.array([10, 9, 2, 6, 4])
32+
float(np.round(pearson_correlation(a, b), 5))
33+
-0.18845
34+
a = np.array([1, 2, 3])
35+
b = np.array([1, 2])
36+
pearson_correlation(a, b)
37+
Traceback (most recent call last):
38+
...
39+
ValueError: Input arrays must have the same length.
40+
"""
41+
if len(x) != len(y):
42+
raise ValueError("Input arrays must have the same length.")
43+
44+
x_mean = np.mean(x)
45+
y_mean = np.mean(y)
46+
47+
numerator = np.sum((x - x_mean) * (y - y_mean))
48+
denominator = np.sqrt(np.sum((x - x_mean) ** 2) * np.sum((y - y_mean) ** 2))
49+
50+
if denominator == 0:
51+
raise ValueError("Standard deviation of input arrays must not be zero.")
52+
53+
return numerator / denominator

0 commit comments

Comments
 (0)