concat functionality

betaBison · betaBison · commit cf7fe1bf3236 · 2022-09-02T17:07:20.000-07:00
diff --git a/gnss_lib_py/parsers/navdata.py b/gnss_lib_py/parsers/navdata.py
@@ -116,50 +116,104 @@ def from_numpy_array(self, numpy_array):
 
         """
 
+
         if not isinstance(numpy_array, np.ndarray):
             raise TypeError("numpy_array must be np.ndarray")
 
+
         self._build_navdata()
 
+        numpy_array = np.atleast_2d(numpy_array)
         for row_num in range(numpy_array.shape[0]):
             self[str(row_num)] = numpy_array[row_num,:]
 
-    def add(self, csv_path=None, pandas_df=None, numpy_array=None):
-        """Add new timesteps to existing array
+    def concat(self, navdata=None, axis=1, inplace=False):
+        """Concatenates new rows or new columns to existing NavData.
+
+        Each type of data is included in a row, so adding new rows with
+        ``axis=0``, means adding new types of data. Concat requires that
+        the new NavData matches the length of the existing NavData. Row
+        concatenation assumes the same ordering across both NavData
+        instances (e.g. sorted by timestamp) and does not perform any
+        matching/sorting itself.
+
+        You can also concatenate new columns ``axis=1``. If the row
+        names of the new NavData instance don't match the row names of
+        the existing NavData instance, the mismatched values will be
+        filled with np.nan.
 
         Parameters
         ----------
-        csv_path : string
-            Path to csv file containing data to add
-        pandas_df : pd.DataFrame
-            DataFrame containing data to add
-        numpy_array : np.ndarray
-            Array containing only numeric data to add
+        navdata : gnss_lib_py.parsers.navdata.NavData
+            Navdata instance to concatenate.
+        axis : int
+            Either add new rows (type) of data ``axis=0`` or new columns
+            (e.g. timesteps) of data ``axis=1``.
+        inplace : bool
+            If False, will return new concatenated NavData instance.
+            If True, will concatenate data to the current NavData
+            instance.
+
+        Returns
+        -------
+        new_navdata : gnss_lib_py.parsers.navdata.NavData or None
+            If inplace is False, returns NavData instance after renaming
+            specified rows. If inplace is True, returns
+            None.
+
         """
-        old_row_num = len(self.map)
-        old_len = len(self)
-        new_data_cols = slice(old_len, None)
-        if numpy_array is not None:
-            if old_row_num == 0:
-                self.from_numpy_array(numpy_array)
-            else:
-                if len(numpy_array.shape)==1:
-                    numpy_array = np.reshape(numpy_array, [1, -1])
-                self.array = np.hstack((self.array, np.empty_like(numpy_array,
-                                        dtype=self.arr_dtype)))
-                self[:, new_data_cols] = numpy_array
-        if csv_path is not None:
-            if old_row_num == 0:
-                self.from_csv_path(csv_path)
-            else:
-                pandas_df = pd.read_csv(csv_path)
-        if pandas_df is not None:
-            if old_row_num == 0:
-                self.from_pandas_df(pandas_df)
-            else:
-                self.array = np.hstack((self.array, np.empty(pandas_df.shape).T))
-                for col in pandas_df.columns:
-                    self[col, new_data_cols] = np.asarray(pandas_df[col].values)
+
+        if not isinstance(navdata,NavData):
+            raise TypeError("new concat data must be a NavData instance.")
+
+        if axis == 0: # concatenate new rows
+            if len(self) != len(navdata):
+                raise RuntimeError("new concat data must be same " \
+                                 + "length to concatenate new rows.")
+            if not inplace:
+                new_navdata = self.copy()
+            for row in navdata.rows:
+                new_row_name = row
+                suffix = None
+                while new_row_name in self.rows:
+                    if suffix is None:
+                        suffix = 0
+                    else:
+                        suffix += 1
+                    new_row_name = row + "_" + str(suffix)
+                if inplace:
+                    self[new_row_name] = navdata[row]
+                else:
+                    new_navdata[new_row_name] = navdata[row]
+
+        elif axis == 1: # concatenate new columns
+            new_navdata = NavData()
+            # get unique list of row names
+            combined_rows = set(self.rows + navdata.rows)
+
+            for row in combined_rows:
+                combined_row = np.array([])
+                # combine data from existing and new instance
+                for data in [self, navdata]:
+                    if row in data.rows:
+                        new_row = data[row]
+                    elif len(data) == 0:
+                        continue
+                    else:
+                        # add np.nan for missing values
+                        new_row = np.empty((len(data),))
+                        new_row.fill(np.nan)
+                    combined_row = np.concatenate((combined_row,
+                                                   new_row))
+                new_navdata[row] = combined_row
+            if inplace:
+                self.array = new_navdata.array
+                self.map = new_navdata.map
+                self.str_map = new_navdata.str_map
+
+        if inplace:
+            return None
+        return new_navdata
 
     def where(self, key_idx, value, condition="eq"):
         """Return NavData where conditions are met for the given row
diff --git a/tests/parsers/test_navdata.py b/tests/parsers/test_navdata.py
@@ -345,14 +345,18 @@ def test_init_only_header(csv_only_header, csv_simple):
     csv_data = NavData(csv_path=csv_only_header)
     assert csv_data.shape == (4,0)
     # test adding new data to empty NavData with column names
-    csv_data.add(csv_path=csv_simple)
+    csv_data.concat(NavData(csv_path=csv_simple),axis=1,inplace=True)
     assert csv_data.shape == (4,6)
+    pd.testing.assert_frame_equal(csv_data.pandas_df().sort_index(axis=1),
+                                  pd.read_csv(csv_simple).sort_index(axis=1),
+                                  check_dtype=False, check_names=True)
 
     # should work when DataFrame is passed
     pd_data = NavData(pandas_df=pd.read_csv(csv_only_header))
     assert pd_data.shape == (4,0)
     # test adding new data to empty NavData with column names
-    pd_data.add(pandas_df=pd.read_csv(csv_simple))
+    pd_data.concat(NavData(pandas_df=pd.read_csv(csv_simple)),axis=1,
+                   inplace=True)
     assert pd_data.shape == (4,6)
 
 @pytest.mark.parametrize('pandas_df',
@@ -1232,7 +1236,7 @@ def test_add_numpy(numpy_array, add_array):
         Array to add to NavData
     """
     data = NavData(numpy_array=numpy_array)
-    data.add(numpy_array=add_array)
+    data.concat(NavData(numpy_array=add_array),axis=1,inplace=True)
     new_col_num = np.shape(add_array)[1]
     np.testing.assert_array_equal(data[:, -new_col_num:], add_array)
 
@@ -1241,13 +1245,14 @@ def test_add_numpy_1d():
     """Test addition of a 1D numpy array to NavData with single row
     """
     data = NavData(numpy_array=np.zeros([1,6]))
-    data.add(numpy_array=np.ones(8))
+    data.concat(NavData(numpy_array=np.ones(8)),axis=1, inplace=True)
     np.testing.assert_array_equal(data[0, :], np.hstack((np.zeros(6),
                                   np.ones(8))))
 
     # test adding to empty NavData
     data_empty = NavData()
-    data_empty.add(numpy_array=np.ones((8,8)))
+    data_empty.concat(NavData(numpy_array=np.ones((8,8))),axis=1,
+                      inplace=True)
     np.testing.assert_array_equal(data_empty[:,:],np.ones((8,8)))
 
 def test_add_csv(df_simple, csv_simple):
@@ -1256,21 +1261,22 @@ def test_add_csv(df_simple, csv_simple):
     """
     # Create and add to NavData
     data = NavData(csv_path=csv_simple)
-    data.add(csv_path=csv_simple)
+    data.concat(NavData(csv_path=csv_simple),axis=1,inplace=True)
     data_df = data.pandas_df()
     # Set up dataframe for comparison
     df_types = {'names': object, 'integers': np.float64,
                 'floats': np.float64, 'strings': object}
     expected_df = pd.concat((df_simple,df_simple)).reset_index(drop=True)
     expected_df = expected_df.astype(df_types)
-    pd.testing.assert_frame_equal(data_df, expected_df,
+    pd.testing.assert_frame_equal(data_df.sort_index(axis=1),
+                                  expected_df.sort_index(axis=1),
                                   check_index_type=False)
 
     # test adding to empty NavData
     data_empty = NavData()
-    data_empty.add(csv_path=csv_simple)
-    pd.testing.assert_frame_equal(data_empty.pandas_df(),
-                                  df_simple.astype(df_types),
+    data_empty.concat(NavData(csv_path=csv_simple),axis=1,inplace=True)
+    pd.testing.assert_frame_equal(data_empty.pandas_df().sort_index(axis=1),
+                                  df_simple.astype(df_types).sort_index(axis=1),
                                   check_index_type=False)
 
 def test_add_pandas_df(df_simple, add_df):
@@ -1284,19 +1290,82 @@ def test_add_pandas_df(df_simple, add_df):
         pd.DataFrame to add to NavData
     """
     data = NavData(pandas_df=df_simple)
-    data.add(pandas_df=add_df)
+    data.concat(NavData(pandas_df=add_df),axis=1,inplace=True)
     new_df = data.pandas_df()
     add_row_num = add_df.shape[0]
     subset_df = new_df.iloc[-add_row_num:, :].reset_index(drop=True)
-    pd.testing.assert_frame_equal(subset_df, add_df,
+    pd.testing.assert_frame_equal(subset_df.sort_index(axis=1),
+                                  add_df.sort_index(axis=1),
                                   check_index_type=False)
 
     # test adding to empty NavData
     data_empty = NavData()
-    data_empty.add(pandas_df=add_df)
-    pd.testing.assert_frame_equal(add_df, data_empty.pandas_df(),
+    data_empty.concat(NavData(pandas_df=add_df),axis=1,inplace=True)
+    pd.testing.assert_frame_equal(add_df.sort_index(axis=1),
+                                  data_empty.pandas_df().sort_index(axis=1),
                                   check_index_type=False)
 
+def test_concat(df_simple):
+    """Test concat functionaltiy.
+
+    Parameters
+    ----------
+    df_simple : pd.DataFrame
+        Simple pd.DataFrame with which to initialize NavData.
+
+    """
+
+    navdata_1 = NavData(pandas_df=df_simple)
+    navdata_2 = navdata_1.copy()
+    navdata_2.rename(mapper={"floats": "decimals", "names": "words"},
+                    inplace = True)
+
+    # add new columns
+    navdata = navdata_1.concat(navdata_1)
+    assert navdata.shape == (4,12)
+    # add new rows
+    navdata = navdata_1.concat(navdata_1,axis=0)
+    assert navdata.shape == (8,6)
+
+    # test multiple rows with the same name
+    navdata_long = navdata_1.copy()
+    for count in range(13):
+        navdata_long.concat(navdata_1,axis=0,inplace=True)
+        for word in ["names","integers","floats","strings"]:
+            assert word + "_" + str(count) in navdata_long.rows
+
+    # add semi new columns
+    navdata = navdata_1.concat(navdata_2)
+    assert navdata.shape == (6,12)
+
+    # add as new rows
+    navdata_b = navdata_1.concat(navdata_2,axis=0)
+    assert navdata_b.shape == (8,6)
+
+def test_concat_fails(df_simple):
+    """Test when concat should fail.
+
+    Parameters
+    ----------
+    df_simple : pd.DataFrame
+        Simple pd.DataFrame with which to initialize NavData.
+
+    """
+
+    navdata_1 = NavData(pandas_df=df_simple)
+
+    with pytest.raises(TypeError) as excinfo:
+        navdata_1.concat(np.array([]))
+    assert "concat" in str(excinfo.value)
+    assert "NavData" in str(excinfo.value)
+
+    navdata_2 = navdata_1.remove(cols=[0])
+
+    with pytest.raises(RuntimeError) as excinfo:
+        navdata_1.concat(navdata_2,axis=0)
+    assert "same length" in str(excinfo.value)
+    assert "concat" in str(excinfo.value)
+
 @pytest.mark.parametrize("rows",
                         [None,
                         ['names', 'integers', 'floats', 'strings'],