hnolCol · dweindl · May 6, 2026
diff --git a/README.md b/README.md
diff --git a/reference-data/Readme.md b/reference-data/Readme.md
@@ -24,7 +24,7 @@ ComplexFinder(
 
 ### Complex Portal
 
-Go the [Complex Portal Website](https://www.ebi.ac.uk/complexportal/home) and download the database (save it as HUMAN_COMPLEX_PORTAL.txt) for the utilized organismn.
+Go the [Complex Portal Website](https://www.ebi.ac.uk/complexportal/home) and download the database (save it as HUMAN_COMPLEX_PORTAL.txt) for the utilized organism.
 
 
 ```python
@@ -40,7 +40,7 @@ ComplexFinder(
 
 ### hu.Map 2.0
 
-The hu.MAP 2.0 has recently beend published and is available at this [link](http://humap2.proteincomplexes.org).
+The hu.MAP 2.0 has recently been published and is available at this [link](http://humap2.proteincomplexes.org).
 
 ```python
 ComplexFinder(

diff --git a/src/main.py b/src/main.py
diff --git a/src/modules/Database.py b/src/modules/Database.py
@@ -52,7 +52,7 @@ class Database(object):
     def __init__(self, nJobs = 4, splitString = ";"):
         """Database Module.
 
-        The pipeline requires a database containing positve feature interactions.
+        The pipeline requires a database containing positive feature interactions.
         This module find interactions present in the dataset to be analysed,
         creates decoy interactions and matches metrices to databases.
 
@@ -115,7 +115,7 @@ def _filterDb(self,
                     raise ValueError("complexNameColumn not in database")
 
 
-    def pariwiseProteinInteractions(self,
+    def pairwiseProteinInteractions(self,
                                     complexIDsColumn,
                                     dbID = "20190823_CORUM.txt",
                                     filterDb = {'Organism': ["Human"]},
@@ -163,7 +163,7 @@ def addDecoy(self, sizeFraction = 1.2):
         Adds a decoy database to the module.
 
         Random entries from positive data are taken and Fake
-        complexes are build. Self-ineractions (x1 == x2) are
+        complexes are build. Self-interactions (x1 == x2) are
         not allowed and ignored. Duplicated interactions are
         also ignored as well as positive Interactions that is
         reported in a different positive complex.
@@ -277,7 +277,7 @@ def getInteractionClassByE1E2(self,E1E2s,E1s,E2s):
                 else:
                     E1E2Type.append("decoy")
             else:
-                #if we get here, those itneractions cannot be positive or decoy
+                #if we get here, those interactions cannot be positive or decoy
                 e1 = E1s[n]
                 e2 = E2s[n]
 
@@ -353,7 +353,7 @@ def _saveFilteredDf(self,fileName):
     def collectPairwiseInt(self,i,interactors,complexName,predictClass,splitString = ";"):
 
         collectedResult = []
-        for interaction in self._getPariwiseInteractions(interactors.split(splitString)):
+        for interaction in self._getPairwiseInteractions(interactors.split(splitString)):
                interaction = [e[:6] for e in interaction]
                collectedResult.append({"ComplexID":i,"E1":interaction[0],"E2":interaction[1],"E1E2":''.join(sorted(interaction)),"complexName":complexName,"Class":predictClass})
         return collectedResult
@@ -368,7 +368,7 @@ def _findPositiveInteractions(self,filteredDB, df, dbID, complexNameColumn):
         return df
 
 
-    def _getPariwiseInteractions(self,entryList):
+    def _getPairwiseInteractions(self, entryList):
         ""
         return itertools.combinations(entryList, 2)
 
@@ -426,23 +426,23 @@ def findMatch(self,x,metricDf, mCols):
             return metricDf.loc[metricDf["E2E1"] == search,mCols]
 
     @property
-    def indentifiedComplexes(self):
+    def identifiedComplexes(self):
         if hasattr(self,'uniqueComplexesIdentified'):
             return self.uniqueComplexesIdentified
 
     def identifiableComplexes(self,complexMemberIds, ID = "20190823_CORUM.txt"):
         ""
-        identifiableMebmers = OrderedDict()
+        identifiableMembers = OrderedDict()
         if hasattr(self,'uniqueComplexesIdentified'):
             for k in self.uniqueComplexesIdentified.keys():
-                identifiableMebmers[k] = {}
+                identifiableMembers[k] = {}
                 boolIdx = self.dbs[ID].index == k
                 complexData = self.dbs[ID][boolIdx]
                 cMembers = complexData[complexMemberIds].tolist()[0].split(";")
-                identifiableMebmers[k]["n"] = len(cMembers)
-                identifiableMebmers[k]["members"] = cMembers
+                identifiableMembers[k]["n"] = len(cMembers)
+                identifiableMembers[k]["members"] = cMembers
 
-        return identifiableMebmers
+        return identifiableMembers
 
 
     def assignComplexToProtein(self, e, complexMemberIds, complexIDColumn, ID = "20190823_CORUM.txt", filterDict = {'Organism': ["Human"]}):
@@ -553,12 +553,12 @@ def matchMetrices(self,pathToTmp,entriesInChunks,metricColumns,analysisName,forc
 
     def _createChunks(self,pathToTmp,entriesInChunks,metricColumns):
         """
-        Craetes chunks
+        Creates chunks
 
 
         To do:
 
-        Parellelerize.
+        Parallelize.
 
         Parameters
         ----------
@@ -728,10 +728,10 @@ def matchInteractions(self,columnLabel, distanceMatrix):
     def fillComplexMatrixFromData(self, X):
         ""
         if not isinstance(X, pd.DataFrame):
-            raise ValueError("X must be a pandas data frame with index and columns containg ID")
+            raise ValueError("X must be a pandas data frame with index and columns containing ID")
 
         return X.merge(self.df,how="left",left_index=True,right_on="E1;E2")
 
 
 if __name__ == "__main__":
-    Database().pariwiseProteinInteractions("subunits(UniProt IDs)")
+    Database().pairwiseProteinInteractions("subunits(UniProt IDs)")
diff --git a/src/modules/Distance.py b/src/modules/Distance.py
@@ -15,7 +15,7 @@
 
 
 def minMaxNorm(X,axis=0):
-    "Normalize array betweem 0 and 1"
+    "Normalize array between 0 and 1"
     Xmin = np.nanmin(X,axis=axis, keepdims=True)
     Xmax = np.nanmax(X,axis=axis,keepdims=True)
     X_transformed = (X - Xmin) / (Xmax-Xmin)
@@ -116,7 +116,7 @@ def _pearson(u,v):
 
 @jit()
 def pearson(nY,Ys):
-    "Calcualtes pearson correlation."
+    """Calculates pearson correlation."""
     return [_pearson(nY,Y) for Y in Ys]
 
 
@@ -217,16 +217,16 @@ def __init__(self,
             Identifier of E1
 
         E2 : obj:`list`of obj `np.array`
-            Signal intensity of E2s. Disntances
-            betwenn ID and E2 are calculated.
-            The intensitiy profiles of E2s are uploaded from source.npy.
+            Signal intensity of E2s. Distances
+            between ID and E2 are calculated.
+            The intensity profiles of E2s are uploaded from source.npy.
 
         ownPeaks : obj:`list`of obj `dict`
             List of modelled peaks for Y. Required to calculate apex distance,
-            which is equal to the euclidean dinstance of the closest peaks.
+            which is equal to the euclidean distance of the closest peaks.
 
         metrices : obj:`list` of obj:`str` or obj`list` of obj`dict`
-            List of strings or dictionories of metrices used to calculate distance.
+            List of strings or dictionaries of metrices used to calculate distance.
             If dict is provided, two keys namely `fn`and `name`must be provided.
             The name must be unique (if more than one dict is provided.)
 

diff --git a/src/modules/Distance_archive.py b/src/modules/Distance_archive.py
@@ -43,21 +43,21 @@ def __init__(self,
             Identifier of E1
 
         E2 : obj:`list`of obj `np.array`
-            Signal intensity of E2s. Disntances
-            betwenn ID and E2 are calculated.
-            The intensitiy profiles of E2s are uploaded from source.npy.
+            Signal intensity of E2s. Distances
+            between ID and E2 are calculated.
+            The intensity profiles of E2s are uploaded from source.npy.
 
         ownPeaks : obj:`list`of obj `dict`
             List of modelled peaks for Y. Required to calculate apex distance,
-            which is equal to the euclidean dinstance of the closest peaks.
+            which is equal to the euclidean distance of the closest peaks.
 
         metrices : obj:`list` of obj:`str` or obj`list` of obj`dict`
-            List of strings or dictionories of metrices used to calculate distance.
-            If dict is provided, two keys namely `fn`and `name`must be provided.
+            List of strings or dictionaries of metrices used to calculate distance.
+            If dict is provided, two keys namely `fn` and `name` must be provided.
             The name must be unique (if more than one dict is provided.)
 
         pathToTmp : string
-            Path to the temporary folder for the current anaylsis. Required to load
+            Path to the temporary folder for the current analysis. Required to load
             Signals (called Ys)
 
         chunkName : string

diff --git a/src/modules/Predictor.py b/src/modules/Predictor.py
@@ -77,7 +77,7 @@ def __init__(self, classifierClass = "random forest", n_jobs = 4, gridSearch = N
 
     def _initClassifier(self):
         """
-        Initiate Classifer
+        Initiate Classifier
 
         Parameters
         ----------
@@ -127,11 +127,11 @@ def _scaleFeatures(self,X):
         Feature scaling. Data are scaled by StandardScaler (0-1)
 
         Importantly, the scaler is not retrained once it was initiated
-        to ensure that the scaling remains similiar for predictors.
+        to ensure that the scaling remains similar for predictors.
 
         Parameters
         ----------
-        X : two dimensional numpy array (feature paris in rows)
+        X : two dimensional numpy array (feature pairs in rows)
             Distance matrix for feature pairs
 
 
@@ -153,7 +153,7 @@ def _gridOptimization(self,X,Y):
 
         Parameters
         ----------
-        X : two dimensional numpy array (feature paris in rows)
+        X : two dimensional numpy array (feature pairs in rows)
             Distance matrix for feature pairs
         Y : numpy array
             Array containing class labels of X (0,1)
@@ -181,7 +181,7 @@ def _gridOptimization(self,X,Y):
 
     def getFeatureImportance(self):
         """
-        Returns estimatore feature imporantance, if estimator allows for this.
+        Returns estimator feature importance, if estimator allows for this.
 
         Parameters
         ----------
@@ -215,7 +215,7 @@ def predict(self,X,scale=True):
         Returns
         -------
         Two dimensional array (n feature pairs x predictors)
-        containing the class proability
+        containing the class probability
         if predictors (default: 3 - see fit function)
 
         """
@@ -247,7 +247,7 @@ def fit(self, X, Y, kFold = 3, optimizedParams=None, pathToResults = '', plotROC
         X : two dimensional numpy array
             Distance matrix for feature pairs
         Y : np.array
-            Class labels (1 - 0) for postive
+            Class labels (1 - 0) for positive
             and negative interaction
         kFold : int
             Number of cross validations. Equals the number of predictors.
@@ -275,7 +275,7 @@ def fit(self, X, Y, kFold = 3, optimizedParams=None, pathToResults = '', plotROC
         if self.gridSerach is not None and optimizedParams is None:
             optimizedClassifier, optimizedParams = self._gridOptimization(X_train,y_train)
         else:
-            print("Info :: Grid serach skipped. Automatically skipped when using Guassian NB or parameter 'classiferGridSearch' is None.")
+            print("Info :: Grid search skipped. Automatically skipped when using Gaussian NB or parameter 'classiferGridSearch' is None.")
             optimizedClassifier = self.classifier
         #cv = StratifiedShuffleSplit(n_splits=10, test_size=0.2)
         if optimizedParams is not None:
@@ -284,7 +284,7 @@ def fit(self, X, Y, kFold = 3, optimizedParams=None, pathToResults = '', plotROC
 
         self.predictors = [optimizedClassifier]
         probasOut = optimizedClassifier.predict_proba(X)
-        #predict probabiliteis for complete data set to create a classfier report.
+        #predict probabilities for complete data set to create a classifier report.
         tprs = []
         aucs = []
         oobScore = np.nan

diff --git a/src/modules/Signal.py b/src/modules/Signal.py
@@ -40,12 +40,12 @@ def __init__(self,
         """Signal module for pre-processing and modeling
 
 
-        The Signal module allows to do severl pre-processing/modelling
+        The Signal module allows to do several pre-processing/modelling
         steps such as
             a) smoothing (rolling average)
             b) filtering by number of nonNaN values
             c) removal of single data points (surrounded by zeros or nans)
-            b) Peak detection (finds peaks) - required for further anaylsis
+            b) Peak detection (finds peaks) - required for further analysis
 
         The peak modelling allows for usage of `LorentzianModel` or `GaussianModel`
 
@@ -131,34 +131,34 @@ def _removeSingleDataPointPeaks(self):
 
         """
         peaksFiltered = 0
-        flilteredY = []
+        filteredY = []
 
         for i,x in enumerate(self.Y):
             if i == 0: #first item is different
                 if self.Y[i+1] == 0:
-                    flilteredY.append(0)
+                    filteredY.append(0)
                     if self.Y[i] > 0:
                         peaksFiltered += 1
                 else:
-                    flilteredY.append(x)
+                    filteredY.append(x)
 
             elif i == self.Y.size - 1: #last item also
                 if self.Y[-1] != 0 and self.Y[-1]:
-                    flilteredY.append(0)
+                    filteredY.append(0)
                     if self.Y[i] > 0:
                         peaksFiltered += 1
                 else:
-                    flilteredY.append(x)
+                    filteredY.append(x)
 
             else:
                 if self.Y[i-1] == 0 and self.Y[i+1] == 0:
-                    flilteredY.append(0)
+                    filteredY.append(0)
                     if self.Y[i] > 0:
                         peaksFiltered += 1
                 else:
-                    flilteredY.append(x)
+                    filteredY.append(x)
 
-        return np.array(flilteredY), peaksFiltered
+        return np.array(filteredY), peaksFiltered
 
     def isValid(self, nonZero = 4):
         """Returns true if signal contains more than
@@ -173,7 +173,7 @@ def isValid(self, nonZero = 4):
 
         Returns
         -------
-        boolean, True if vald
+        boolean, True if valid
 
         """
         valid = np.sum(self.Y > 0) > nonZero
@@ -241,15 +241,15 @@ def _addParams(self,modelParams,prefix,peakIdx,i):
         Parameters
         ----------
 
-        mdeolParams :
+        modelParams :
             modelParam object. Returned by model.make_params() (lmfit package)
             Documentation: https://lmfit.github.io/lmfit-py/model.html
 
         prefix : str
             Prefix for the model (e.g. peak), defaults to f'm{i}_'.format(i)
 
         peakIdx : int
-            Arary index at which the peak was detected in the Signal arary self.Y
+            Array index at which the peak was detected in the Signal array self.Y
 
         i : int
             index of detected models
@@ -263,7 +263,7 @@ def _addParams(self,modelParams,prefix,peakIdx,i):
 
 
         if self.avoidWideSmallPeaks and self.Y[peakIdx[i]] < np.max(self.Y) * 0.2:
-            #small peaks should not be to wide!
+            #small peaks should not be too wide!
             self._addParam(modelParams,
                             name=prefix+'amplitude',
                             max = self.Y[peakIdx[i]] * 1.2 * np.pi,
@@ -328,7 +328,7 @@ def _findParametersForModels(self,spec,peakIdx):
     def _checkPeakIdx(self,peakIdx, maxPeaks = 15):
         """
         Checks if number of peaks exceed the max number of
-        allwed peaks. (paramater: maxPeaks)
+        allowed peaks. (parameter: maxPeaks)
 
         If the number exceeds maxPeaks, the peaks with the
         highest value are taken. Others are removed
@@ -362,7 +362,7 @@ def fitModel(self):
         """
         Fits the model (ensemble of several peaks).
         The number of models equals the number of
-        detected peaks. Please not that that the maximum
+        detected peaks. Please note that the maximum
         number of peaks is limited by the parameter:
 
             maxPeaks (defaults to 12)
@@ -371,7 +371,7 @@ def fitModel(self):
 
             - peak models + signal profile are plotted and saved as pdf (folder modelPlots)
 
-            - if squaredR for the model fit is below threshold (r2Tresh - deufault 0.85), the
+            - if squaredR for the model fit is below threshold (r2Tresh - default 0.85), the
                 signal profile is ignored. A message is printed if this happens.
 
         Parameters