From 5dfd2d5a4e7429b3ea13e4be062ca78f43894a6d Mon Sep 17 00:00:00 2001
From: LuisMelendez <luiguimelendez5@gmail.com>
Date: Wed, 1 Oct 2025 22:50:58 -0600
Subject: [PATCH 1/5] Create pearson_correlation.py

---
 machine_learning/pearson_correlation.py | 53 +++++++++++++++++++++++++
 1 file changed, 53 insertions(+)
 create mode 100644 machine_learning/pearson_correlation.py

diff --git a/machine_learning/pearson_correlation.py b/machine_learning/pearson_correlation.py
new file mode 100644
index 000000000000..3f71c8398efb
--- /dev/null
+++ b/machine_learning/pearson_correlation.py
@@ -0,0 +1,53 @@
+import numpy as np
+
+
+def pearson_correlation(x: np.ndarray, y: np.ndarray) -> float:
+    """
+    Calculate the Pearson correlation coefficient (PCC) between two arrays.
+
+    Pearson correlation measures the linear relationship between two datasets,
+    returning a value between -1 and 1:
+      - 1   indicates a perfect positive linear correlation
+      - 0   indicates no linear correlation
+      - -1  indicates a perfect negative linear correlation
+
+    Formula:
+    r = Σ((x - mean(x)) * (y - mean(y))) / sqrt(Σ(x - mean(x))^2 * Σ(y - mean(y))^2)
+
+    Reference: https://en.wikipedia.org/wiki/Pearson_correlation_coefficient
+
+    Parameters:
+    - x: 1D numpy array of values
+    - y: 1D numpy array of values
+
+    Returns:
+    - The Pearson correlation coefficient (float)
+
+      a = np.array([1, 2, 3, 4, 5])
+      b = np.array([2, 4, 6, 8, 10])
+      float(np.round(pearson_correlation(a, b), 5))
+    1.0
+      a = np.array([1, 2, 3, 4, 5])
+      b = np.array([10, 9, 2, 6, 4])
+      float(np.round(pearson_correlation(a, b), 5))
+    -0.18845
+      a = np.array([1, 2, 3])
+      b = np.array([1, 2])
+      pearson_correlation(a, b)
+    Traceback (most recent call last):
+        ...
+    ValueError: Input arrays must have the same length.
+    """
+    if len(x) != len(y):
+        raise ValueError("Input arrays must have the same length.")
+
+    x_mean = np.mean(x)
+    y_mean = np.mean(y)
+
+    numerator = np.sum((x - x_mean) * (y - y_mean))
+    denominator = np.sqrt(np.sum((x - x_mean) ** 2) * np.sum((y - y_mean) ** 2))
+
+    if denominator == 0:
+        raise ValueError("Standard deviation of input arrays must not be zero.")
+
+    return numerator / denominator

From a5fa33e47e8ff9a679c4a4d2d6d670843661ad81 Mon Sep 17 00:00:00 2001
From: LuisMelendez <luiguimelendez5@gmail.com>
Date: Wed, 1 Oct 2025 23:24:25 -0600
Subject: [PATCH 2/5] Update pearson_correlation.py

---
 machine_learning/pearson_correlation.py | 68 +++++++++++--------------
 1 file changed, 31 insertions(+), 37 deletions(-)

diff --git a/machine_learning/pearson_correlation.py b/machine_learning/pearson_correlation.py
index 3f71c8398efb..ebf1483e164f 100644
--- a/machine_learning/pearson_correlation.py
+++ b/machine_learning/pearson_correlation.py
@@ -1,53 +1,47 @@
 import numpy as np
 
-
-def pearson_correlation(x: np.ndarray, y: np.ndarray) -> float:
+def pearson_correlation(data_x: np.ndarray, data_y: np.ndarray) -> float:
     """
-    Calculate the Pearson correlation coefficient (PCC) between two arrays.
-
-    Pearson correlation measures the linear relationship between two datasets,
-    returning a value between -1 and 1:
-      - 1   indicates a perfect positive linear correlation
-      - 0   indicates no linear correlation
-      - -1  indicates a perfect negative linear correlation
-
-    Formula:
-    r = Σ((x - mean(x)) * (y - mean(y))) / sqrt(Σ(x - mean(x))^2 * Σ(y - mean(y))^2)
-
-    Reference: https://en.wikipedia.org/wiki/Pearson_correlation_coefficient
+    Calculate the Pearson correlation coefficient between two sets of data.
 
     Parameters:
-    - x: 1D numpy array of values
-    - y: 1D numpy array of values
+    data_x (np.ndarray): Array of numeric values representing a column of data 
+                         that will be compared with another column to determine
+                         how strongly the two vectors are related.
+    data_y (np.ndarray): Array of numeric values representing the second column 
+                         of data to compare with data_x.
 
     Returns:
-    - The Pearson correlation coefficient (float)
+    float: Pearson correlation coefficient between data_x and data_y.
 
-      a = np.array([1, 2, 3, 4, 5])
-      b = np.array([2, 4, 6, 8, 10])
-      float(np.round(pearson_correlation(a, b), 5))
+    Reference:
+    https://en.wikipedia.org/wiki/Pearson_correlation_coefficient
+
+    Example:
+    >>> data_x = np.array([1, 2, 3, 4, 5])
+    >>> data_y = np.array([2, 4, 6, 8, 10])
+    >>> round(pearson_correlation(data_x, data_y), 2)
     1.0
-      a = np.array([1, 2, 3, 4, 5])
-      b = np.array([10, 9, 2, 6, 4])
-      float(np.round(pearson_correlation(a, b), 5))
-    -0.18845
-      a = np.array([1, 2, 3])
-      b = np.array([1, 2])
-      pearson_correlation(a, b)
-    Traceback (most recent call last):
-        ...
-    ValueError: Input arrays must have the same length.
     """
-    if len(x) != len(y):
-        raise ValueError("Input arrays must have the same length.")
+    if len(data_x) != len(data_y):
+        raise ValueError("data_x and data_y must have the same length")
+    
+    n = len(data_x)
+    if n == 0:
+        return 0.0
 
-    x_mean = np.mean(x)
-    y_mean = np.mean(y)
+    mean_x = np.mean(data_x)
+    mean_y = np.mean(data_y)
 
-    numerator = np.sum((x - x_mean) * (y - y_mean))
-    denominator = np.sqrt(np.sum((x - x_mean) ** 2) * np.sum((y - y_mean) ** 2))
+    numerator = np.sum((data_x - mean_x) * (data_y - mean_y))
+    denominator = np.sqrt(np.sum((data_x - mean_x)**2) * np.sum((data_y - mean_y)**2))
 
     if denominator == 0:
-        raise ValueError("Standard deviation of input arrays must not be zero.")
+        return 0.0
 
     return numerator / denominator
+
+
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod()

From 86d7b1eb2b65503a621aaf840f82d3c99a5be699 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 2 Oct 2025 05:24:45 +0000
Subject: [PATCH 3/5] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 machine_learning/pearson_correlation.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/machine_learning/pearson_correlation.py b/machine_learning/pearson_correlation.py
index ebf1483e164f..75a464046be0 100644
--- a/machine_learning/pearson_correlation.py
+++ b/machine_learning/pearson_correlation.py
@@ -1,14 +1,15 @@
 import numpy as np
 
+
 def pearson_correlation(data_x: np.ndarray, data_y: np.ndarray) -> float:
     """
     Calculate the Pearson correlation coefficient between two sets of data.
 
     Parameters:
-    data_x (np.ndarray): Array of numeric values representing a column of data 
+    data_x (np.ndarray): Array of numeric values representing a column of data
                          that will be compared with another column to determine
                          how strongly the two vectors are related.
-    data_y (np.ndarray): Array of numeric values representing the second column 
+    data_y (np.ndarray): Array of numeric values representing the second column
                          of data to compare with data_x.
 
     Returns:
@@ -25,7 +26,7 @@ def pearson_correlation(data_x: np.ndarray, data_y: np.ndarray) -> float:
     """
     if len(data_x) != len(data_y):
         raise ValueError("data_x and data_y must have the same length")
-    
+
     n = len(data_x)
     if n == 0:
         return 0.0
@@ -34,7 +35,9 @@ def pearson_correlation(data_x: np.ndarray, data_y: np.ndarray) -> float:
     mean_y = np.mean(data_y)
 
     numerator = np.sum((data_x - mean_x) * (data_y - mean_y))
-    denominator = np.sqrt(np.sum((data_x - mean_x)**2) * np.sum((data_y - mean_y)**2))
+    denominator = np.sqrt(
+        np.sum((data_x - mean_x) ** 2) * np.sum((data_y - mean_y) ** 2)
+    )
 
     if denominator == 0:
         return 0.0
@@ -44,4 +47,5 @@ def pearson_correlation(data_x: np.ndarray, data_y: np.ndarray) -> float:
 
 if __name__ == "__main__":
     import doctest
+
     doctest.testmod()

From 5cb554de4f37170dfdd90e9d32e22196ff332724 Mon Sep 17 00:00:00 2001
From: LuisMelendez <luiguimelendez5@gmail.com>
Date: Wed, 1 Oct 2025 23:32:45 -0600
Subject: [PATCH 4/5] Update pearson_correlation.py

---
 machine_learning/pearson_correlation.py | 66 +++++++++++++------------
 1 file changed, 34 insertions(+), 32 deletions(-)

diff --git a/machine_learning/pearson_correlation.py b/machine_learning/pearson_correlation.py
index 75a464046be0..c7b00119507d 100644
--- a/machine_learning/pearson_correlation.py
+++ b/machine_learning/pearson_correlation.py
@@ -3,49 +3,51 @@
 
 def pearson_correlation(data_x: np.ndarray, data_y: np.ndarray) -> float:
     """
-    Calculate the Pearson correlation coefficient between two sets of data.
+    Calculate the Pearson correlation coefficient (PCC) between two arrays.
+
+    Pearson correlation measures the linear relationship between two datasets,
+    returning a value between -1 and 1:
+      - 1   indicates a perfect positive linear correlation
+      - 0   indicates no linear correlation
+      - -1  indicates a perfect negative linear correlation
+
+    Formula:
+    r = Σ((x - mean(x)) * (y - mean(y))) / sqrt(Σ(x - mean(x))^2 * Σ(y - mean(y))^2)
+
+    Reference: https://en.wikipedia.org/wiki/Pearson_correlation_coefficient
 
     Parameters:
-    data_x (np.ndarray): Array of numeric values representing a column of data
-                         that will be compared with another column to determine
-                         how strongly the two vectors are related.
-    data_y (np.ndarray): Array of numeric values representing the second column
-                         of data to compare with data_x.
+    - x: 1D numpy array of values
+    - y: 1D numpy array of values
 
     Returns:
-    float: Pearson correlation coefficient between data_x and data_y.
-
-    Reference:
-    https://en.wikipedia.org/wiki/Pearson_correlation_coefficient
+    - The Pearson correlation coefficient (float)
 
-    Example:
-    >>> data_x = np.array([1, 2, 3, 4, 5])
-    >>> data_y = np.array([2, 4, 6, 8, 10])
-    >>> round(pearson_correlation(data_x, data_y), 2)
+      a = np.array([1, 2, 3, 4, 5])
+      b = np.array([2, 4, 6, 8, 10])
+      float(np.round(pearson_correlation(a, b), 5))
     1.0
+      a = np.array([1, 2, 3, 4, 5])
+      b = np.array([10, 9, 2, 6, 4])
+      float(np.round(pearson_correlation(a, b), 5))
+    -0.18845
+      a = np.array([1, 2, 3])
+      b = np.array([1, 2])
+      pearson_correlation(a, b)
+    Traceback (most recent call last):
+        ...
+    ValueError: Input arrays must have the same length.
     """
     if len(data_x) != len(data_y):
-        raise ValueError("data_x and data_y must have the same length")
+        raise ValueError("Input arrays must have the same length.")
 
-    n = len(data_x)
-    if n == 0:
-        return 0.0
+    x_mean = np.mean(data_x)
+    y_mean = np.mean(data_y)
 
-    mean_x = np.mean(data_x)
-    mean_y = np.mean(data_y)
-
-    numerator = np.sum((data_x - mean_x) * (data_y - mean_y))
-    denominator = np.sqrt(
-        np.sum((data_x - mean_x) ** 2) * np.sum((data_y - mean_y) ** 2)
-    )
+    numerator = np.sum((data_x - x_mean) * (data_y - y_mean))
+    denominator = np.sqrt(np.sum((data_x - x_mean) ** 2) * np.sum((data_y - y_mean) ** 2))
 
     if denominator == 0:
-        return 0.0
+        raise ValueError("Standard deviation of input arrays must not be zero.")
 
     return numerator / denominator
-
-
-if __name__ == "__main__":
-    import doctest
-
-    doctest.testmod()

From 9a25b8312d24d0af5065bb8561604c20818ef625 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 2 Oct 2025 05:33:04 +0000
Subject: [PATCH 5/5] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 machine_learning/pearson_correlation.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/machine_learning/pearson_correlation.py b/machine_learning/pearson_correlation.py
index c7b00119507d..91a16285a88e 100644
--- a/machine_learning/pearson_correlation.py
+++ b/machine_learning/pearson_correlation.py
@@ -45,7 +45,9 @@ def pearson_correlation(data_x: np.ndarray, data_y: np.ndarray) -> float:
     y_mean = np.mean(data_y)
 
     numerator = np.sum((data_x - x_mean) * (data_y - y_mean))
-    denominator = np.sqrt(np.sum((data_x - x_mean) ** 2) * np.sum((data_y - y_mean) ** 2))
+    denominator = np.sqrt(
+        np.sum((data_x - x_mean) ** 2) * np.sum((data_y - y_mean) ** 2)
+    )
 
     if denominator == 0:
         raise ValueError("Standard deviation of input arrays must not be zero.")