add option to exclude names in wildcard search

betaBison · betaBison · commit b35016770427 · 2023-04-26T18:09:50.000-07:00
diff --git a/gnss_lib_py/parsers/navdata.py b/gnss_lib_py/parsers/navdata.py
@@ -844,17 +844,26 @@ def in_rows(self, rows):
             raise KeyError(", ".join(missing_rows) + " row(s) are" \
                            + " missing from NavData object.")
 
-    def find_wildcard_indexes(self, wildcards, max_allow = None):
+    def find_wildcard_indexes(self, wildcards, max_allow = None,
+                              excludes = None):
         """Searches for indexes matching wildcard search input.
 
         For example, a search for ``x_*_m`` would find ``x_rx_m`` or
         ``x_sv_m`` or ``x_alpha_beta_gamma_m`` depending on the rows
         existing in the NavData instance.
 
+        The ``excludes`` variable allows you to exclude indexes when
+        trying to match a wildcard. For example, if there are rows named
+        ``pr_raw_m``and ``pr_raw_sigma_m`` then the input
+        ``wildcards="pr_*_m", excludes=None`` would return
+        ``{"pr_*_m", ["pr_raw_m","pr_raw_sigma_m"]}`` but with the excludes
+        parameter set, the input ``wildcards="pr_*_m", excludes="pr_*_sigma_m"``
+        would only return ``{"pr_*_m", ["pr_raw_m"]}``
+
         Will return an error no index is found matching the wildcard or
         if more than ``max_allow`` indexes are found.
 
-        Currently only allows for a single wildcard per index.
+        Currently only allows for a single wildcard '*' per index.
 
         Parameters
         ----------
@@ -863,6 +872,10 @@ def find_wildcard_indexes(self, wildcards, max_allow = None):
         max_allow : int or None
             Maximum number of valid indexes to allow before throwing an
             error. If None, then no limit is placed.
+        excludes : array-like or str
+            List or string to exclude for each wildcard in wildcards.
+            Must be the same length as wildcards. Allowed to include a
+            wildcard '*' character but not necessary.
 
         Returns
         -------
@@ -879,10 +892,29 @@ def find_wildcard_indexes(self, wildcards, max_allow = None):
         if not (isinstance(max_allow,int) or max_allow is None):
             raise TypeError("max_allow input in find_wildcard_indexes" \
                           + " must be an integer or None.")
+        # handle exclude types
+        if isinstance(excludes,str):
+            excludes = [excludes]
+        if excludes is None:
+            excludes = [None] * len(wildcards)
+        if not isinstance(excludes, (list,tuple,np.ndarray,set)):
+            raise TypeError("excludes input in find_wildcard_indexes" \
+                         +  " must be array-like, single string, " \
+                         + "or None for each wildcard")
+        if len(excludes) != len(wildcards):
+            raise TypeError("excludes input must match length of " \
+                          + "wildcard input.")
+        for ex_idx, exclude in enumerate(excludes):
+            if exclude is None or isinstance(exclude,str):
+                excludes[ex_idx] = [exclude]
+            if not isinstance(excludes[ex_idx], (list,tuple,np.ndarray,set)):
+                raise TypeError("excludes input in find_wildcard_indexes" \
+                             +  " must be array-like, single string, " \
+                             + "or None for each wildcard")
 
         wildcard_indexes = {}
 
-        for wildcard in wildcards:
+        for wild_idx, wildcard in enumerate(wildcards):
             if not isinstance(wildcard,str):
                 raise TypeError("wildcards must be strings")
             if wildcard.count("*") != 1:
@@ -891,6 +923,15 @@ def find_wildcard_indexes(self, wildcards, max_allow = None):
             indexes = [row for row in self.rows
                    if row.startswith(wildcard.split("*",maxsplit=1)[0])
                     and row.endswith(wildcard.split("*",maxsplit=1)[1])]
+            if excludes[wild_idx] is not None:
+                for exclude in excludes[wild_idx]:
+                    if exclude is not None:
+                        if '*' in exclude:
+                            indexes = [row for row in indexes
+                                     if not (row.startswith(exclude.split("*",maxsplit=1)[0])
+                                     and row.endswith(exclude.split("*",maxsplit=1)[1]))]
+                        else:
+                            indexes = [row for row in indexes if exclude != row]
             if max_allow is not None and len(indexes) > max_allow:
                 raise KeyError("More than " + str(max_allow) \
                              + " possible row indexes for "  + wildcard)
diff --git a/tests/parsers/test_navdata.py b/tests/parsers/test_navdata.py
@@ -2125,6 +2125,85 @@ def test_find_wildcard_indexes(data):
             multi.find_wildcard_indexes("x_*_m",max_allow)
         assert "max_allow" in str(excinfo.value)
 
+def test_find_wildcard_excludes(data):
+    """Tests find_wildcard_indexes
+
+    """
+    all_matching = data.rename({"names" : "x_alpha_m",
+                                "integers" : "x_beta_m",
+                                "floats" : "x_gamma_m",
+                                "strings" : "x_zeta_m"})
+
+    # no exclusion
+    indexes = all_matching.find_wildcard_indexes("x_*_m",excludes=None)
+    assert indexes["x_*_m"] == ["x_alpha_m","x_beta_m",
+                                "x_gamma_m","x_zeta_m"]
+    indexes = all_matching.find_wildcard_indexes("x_*_m",excludes=[None])
+    assert indexes["x_*_m"] == ["x_alpha_m","x_beta_m",
+                                "x_gamma_m","x_zeta_m"]
+
+    # single exclusion
+    indexes = all_matching.find_wildcard_indexes("x_*_m",excludes="x_beta_m")
+    assert indexes["x_*_m"] == ["x_alpha_m","x_gamma_m","x_zeta_m"]
+
+    # two exclusion
+    indexes = all_matching.find_wildcard_indexes("x_*_m",
+                                excludes=[["x_beta_m","x_zeta_m"]])
+    assert indexes["x_*_m"] == ["x_alpha_m","x_gamma_m"]
+
+    # all excluded
+    with pytest.raises(KeyError) as excinfo:
+        all_matching.find_wildcard_indexes("x_*_m",excludes=["x_*_m"])
+    assert "Missing " in str(excinfo.value)
+    assert "x_*_m" in str(excinfo.value)
+
+
+    multi = data.rename({"names" : "x_alpha_m",
+                         "integers" : "x_beta_m",
+                         "floats" : "y_alpha_deg",
+                         "strings" : "y_beta_deg"})
+
+    # no exclusion
+    indexes = multi.find_wildcard_indexes(["x_*_m","y_*_deg"],
+                                                excludes=None)
+    assert indexes["x_*_m"] == ["x_alpha_m","x_beta_m"]
+    assert indexes["y_*_deg"] == ["y_alpha_deg","y_beta_deg"]
+    indexes = multi.find_wildcard_indexes(["x_*_m","y_*_deg"],
+                                                excludes=[None,None])
+    assert indexes["x_*_m"] == ["x_alpha_m","x_beta_m"]
+    assert indexes["y_*_deg"] == ["y_alpha_deg","y_beta_deg"]
+
+    # single exclusion
+    indexes = multi.find_wildcard_indexes(["x_*_m","y_*_deg"],
+                                                excludes=["x_alpha*",None])
+    assert indexes["x_*_m"] == ["x_beta_m"]
+    assert indexes["y_*_deg"] == ["y_alpha_deg","y_beta_deg"]
+
+    # double exclusion
+    indexes = multi.find_wildcard_indexes(["x_*_m","y_*_deg"],
+                                                excludes=["x_alpha*","y_beta*"])
+    assert indexes["x_*_m"] == ["x_beta_m"]
+    assert indexes["y_*_deg"] == ["y_alpha_deg"]
+
+    # must match length
+    with pytest.raises(TypeError) as excinfo:
+        multi.find_wildcard_indexes(["x_*_m","y_*_deg"],
+                                    excludes=[None])
+    assert "match length" in str(excinfo.value)
+
+    # must match length
+    with pytest.raises(TypeError) as excinfo:
+        multi.find_wildcard_indexes(["x_*_m","y_*_deg"],
+                                    excludes={"a":"dictionary"})
+    assert "array-like" in str(excinfo.value)
+    # must match length
+    with pytest.raises(TypeError) as excinfo:
+        multi.find_wildcard_indexes(["x_*_m","y_*_deg"],
+                                    excludes=[None,{"a":"dictionary"}])
+    assert "array-like" in str(excinfo.value)
+
+
+
 @pytest.mark.parametrize('csv_path',
                         [
                          lazy_fixture("csv_dtypes"),