Skip to content

Commit cf7fe1b

Browse files
committed
concat functionality
1 parent 17fb4dd commit cf7fe1b

2 files changed

Lines changed: 169 additions & 46 deletions

File tree

gnss_lib_py/parsers/navdata.py

Lines changed: 86 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -116,50 +116,104 @@ def from_numpy_array(self, numpy_array):
116116
117117
"""
118118

119+
119120
if not isinstance(numpy_array, np.ndarray):
120121
raise TypeError("numpy_array must be np.ndarray")
121122

123+
122124
self._build_navdata()
123125

126+
numpy_array = np.atleast_2d(numpy_array)
124127
for row_num in range(numpy_array.shape[0]):
125128
self[str(row_num)] = numpy_array[row_num,:]
126129

127-
def add(self, csv_path=None, pandas_df=None, numpy_array=None):
128-
"""Add new timesteps to existing array
130+
def concat(self, navdata=None, axis=1, inplace=False):
131+
"""Concatenates new rows or new columns to existing NavData.
132+
133+
Each type of data is included in a row, so adding new rows with
134+
``axis=0``, means adding new types of data. Concat requires that
135+
the new NavData matches the length of the existing NavData. Row
136+
concatenation assumes the same ordering across both NavData
137+
instances (e.g. sorted by timestamp) and does not perform any
138+
matching/sorting itself.
139+
140+
You can also concatenate new columns ``axis=1``. If the row
141+
names of the new NavData instance don't match the row names of
142+
the existing NavData instance, the mismatched values will be
143+
filled with np.nan.
129144
130145
Parameters
131146
----------
132-
csv_path : string
133-
Path to csv file containing data to add
134-
pandas_df : pd.DataFrame
135-
DataFrame containing data to add
136-
numpy_array : np.ndarray
137-
Array containing only numeric data to add
147+
navdata : gnss_lib_py.parsers.navdata.NavData
148+
Navdata instance to concatenate.
149+
axis : int
150+
Either add new rows (type) of data ``axis=0`` or new columns
151+
(e.g. timesteps) of data ``axis=1``.
152+
inplace : bool
153+
If False, will return new concatenated NavData instance.
154+
If True, will concatenate data to the current NavData
155+
instance.
156+
157+
Returns
158+
-------
159+
new_navdata : gnss_lib_py.parsers.navdata.NavData or None
160+
If inplace is False, returns NavData instance after renaming
161+
specified rows. If inplace is True, returns
162+
None.
163+
138164
"""
139-
old_row_num = len(self.map)
140-
old_len = len(self)
141-
new_data_cols = slice(old_len, None)
142-
if numpy_array is not None:
143-
if old_row_num == 0:
144-
self.from_numpy_array(numpy_array)
145-
else:
146-
if len(numpy_array.shape)==1:
147-
numpy_array = np.reshape(numpy_array, [1, -1])
148-
self.array = np.hstack((self.array, np.empty_like(numpy_array,
149-
dtype=self.arr_dtype)))
150-
self[:, new_data_cols] = numpy_array
151-
if csv_path is not None:
152-
if old_row_num == 0:
153-
self.from_csv_path(csv_path)
154-
else:
155-
pandas_df = pd.read_csv(csv_path)
156-
if pandas_df is not None:
157-
if old_row_num == 0:
158-
self.from_pandas_df(pandas_df)
159-
else:
160-
self.array = np.hstack((self.array, np.empty(pandas_df.shape).T))
161-
for col in pandas_df.columns:
162-
self[col, new_data_cols] = np.asarray(pandas_df[col].values)
165+
166+
if not isinstance(navdata,NavData):
167+
raise TypeError("new concat data must be a NavData instance.")
168+
169+
if axis == 0: # concatenate new rows
170+
if len(self) != len(navdata):
171+
raise RuntimeError("new concat data must be same " \
172+
+ "length to concatenate new rows.")
173+
if not inplace:
174+
new_navdata = self.copy()
175+
for row in navdata.rows:
176+
new_row_name = row
177+
suffix = None
178+
while new_row_name in self.rows:
179+
if suffix is None:
180+
suffix = 0
181+
else:
182+
suffix += 1
183+
new_row_name = row + "_" + str(suffix)
184+
if inplace:
185+
self[new_row_name] = navdata[row]
186+
else:
187+
new_navdata[new_row_name] = navdata[row]
188+
189+
elif axis == 1: # concatenate new columns
190+
new_navdata = NavData()
191+
# get unique list of row names
192+
combined_rows = set(self.rows + navdata.rows)
193+
194+
for row in combined_rows:
195+
combined_row = np.array([])
196+
# combine data from existing and new instance
197+
for data in [self, navdata]:
198+
if row in data.rows:
199+
new_row = data[row]
200+
elif len(data) == 0:
201+
continue
202+
else:
203+
# add np.nan for missing values
204+
new_row = np.empty((len(data),))
205+
new_row.fill(np.nan)
206+
combined_row = np.concatenate((combined_row,
207+
new_row))
208+
new_navdata[row] = combined_row
209+
if inplace:
210+
self.array = new_navdata.array
211+
self.map = new_navdata.map
212+
self.str_map = new_navdata.str_map
213+
214+
if inplace:
215+
return None
216+
return new_navdata
163217

164218
def where(self, key_idx, value, condition="eq"):
165219
"""Return NavData where conditions are met for the given row

tests/parsers/test_navdata.py

Lines changed: 83 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -345,14 +345,18 @@ def test_init_only_header(csv_only_header, csv_simple):
345345
csv_data = NavData(csv_path=csv_only_header)
346346
assert csv_data.shape == (4,0)
347347
# test adding new data to empty NavData with column names
348-
csv_data.add(csv_path=csv_simple)
348+
csv_data.concat(NavData(csv_path=csv_simple),axis=1,inplace=True)
349349
assert csv_data.shape == (4,6)
350+
pd.testing.assert_frame_equal(csv_data.pandas_df().sort_index(axis=1),
351+
pd.read_csv(csv_simple).sort_index(axis=1),
352+
check_dtype=False, check_names=True)
350353

351354
# should work when DataFrame is passed
352355
pd_data = NavData(pandas_df=pd.read_csv(csv_only_header))
353356
assert pd_data.shape == (4,0)
354357
# test adding new data to empty NavData with column names
355-
pd_data.add(pandas_df=pd.read_csv(csv_simple))
358+
pd_data.concat(NavData(pandas_df=pd.read_csv(csv_simple)),axis=1,
359+
inplace=True)
356360
assert pd_data.shape == (4,6)
357361

358362
@pytest.mark.parametrize('pandas_df',
@@ -1232,7 +1236,7 @@ def test_add_numpy(numpy_array, add_array):
12321236
Array to add to NavData
12331237
"""
12341238
data = NavData(numpy_array=numpy_array)
1235-
data.add(numpy_array=add_array)
1239+
data.concat(NavData(numpy_array=add_array),axis=1,inplace=True)
12361240
new_col_num = np.shape(add_array)[1]
12371241
np.testing.assert_array_equal(data[:, -new_col_num:], add_array)
12381242

@@ -1241,13 +1245,14 @@ def test_add_numpy_1d():
12411245
"""Test addition of a 1D numpy array to NavData with single row
12421246
"""
12431247
data = NavData(numpy_array=np.zeros([1,6]))
1244-
data.add(numpy_array=np.ones(8))
1248+
data.concat(NavData(numpy_array=np.ones(8)),axis=1, inplace=True)
12451249
np.testing.assert_array_equal(data[0, :], np.hstack((np.zeros(6),
12461250
np.ones(8))))
12471251

12481252
# test adding to empty NavData
12491253
data_empty = NavData()
1250-
data_empty.add(numpy_array=np.ones((8,8)))
1254+
data_empty.concat(NavData(numpy_array=np.ones((8,8))),axis=1,
1255+
inplace=True)
12511256
np.testing.assert_array_equal(data_empty[:,:],np.ones((8,8)))
12521257

12531258
def test_add_csv(df_simple, csv_simple):
@@ -1256,21 +1261,22 @@ def test_add_csv(df_simple, csv_simple):
12561261
"""
12571262
# Create and add to NavData
12581263
data = NavData(csv_path=csv_simple)
1259-
data.add(csv_path=csv_simple)
1264+
data.concat(NavData(csv_path=csv_simple),axis=1,inplace=True)
12601265
data_df = data.pandas_df()
12611266
# Set up dataframe for comparison
12621267
df_types = {'names': object, 'integers': np.float64,
12631268
'floats': np.float64, 'strings': object}
12641269
expected_df = pd.concat((df_simple,df_simple)).reset_index(drop=True)
12651270
expected_df = expected_df.astype(df_types)
1266-
pd.testing.assert_frame_equal(data_df, expected_df,
1271+
pd.testing.assert_frame_equal(data_df.sort_index(axis=1),
1272+
expected_df.sort_index(axis=1),
12671273
check_index_type=False)
12681274

12691275
# test adding to empty NavData
12701276
data_empty = NavData()
1271-
data_empty.add(csv_path=csv_simple)
1272-
pd.testing.assert_frame_equal(data_empty.pandas_df(),
1273-
df_simple.astype(df_types),
1277+
data_empty.concat(NavData(csv_path=csv_simple),axis=1,inplace=True)
1278+
pd.testing.assert_frame_equal(data_empty.pandas_df().sort_index(axis=1),
1279+
df_simple.astype(df_types).sort_index(axis=1),
12741280
check_index_type=False)
12751281

12761282
def test_add_pandas_df(df_simple, add_df):
@@ -1284,19 +1290,82 @@ def test_add_pandas_df(df_simple, add_df):
12841290
pd.DataFrame to add to NavData
12851291
"""
12861292
data = NavData(pandas_df=df_simple)
1287-
data.add(pandas_df=add_df)
1293+
data.concat(NavData(pandas_df=add_df),axis=1,inplace=True)
12881294
new_df = data.pandas_df()
12891295
add_row_num = add_df.shape[0]
12901296
subset_df = new_df.iloc[-add_row_num:, :].reset_index(drop=True)
1291-
pd.testing.assert_frame_equal(subset_df, add_df,
1297+
pd.testing.assert_frame_equal(subset_df.sort_index(axis=1),
1298+
add_df.sort_index(axis=1),
12921299
check_index_type=False)
12931300

12941301
# test adding to empty NavData
12951302
data_empty = NavData()
1296-
data_empty.add(pandas_df=add_df)
1297-
pd.testing.assert_frame_equal(add_df, data_empty.pandas_df(),
1303+
data_empty.concat(NavData(pandas_df=add_df),axis=1,inplace=True)
1304+
pd.testing.assert_frame_equal(add_df.sort_index(axis=1),
1305+
data_empty.pandas_df().sort_index(axis=1),
12981306
check_index_type=False)
12991307

1308+
def test_concat(df_simple):
1309+
"""Test concat functionaltiy.
1310+
1311+
Parameters
1312+
----------
1313+
df_simple : pd.DataFrame
1314+
Simple pd.DataFrame with which to initialize NavData.
1315+
1316+
"""
1317+
1318+
navdata_1 = NavData(pandas_df=df_simple)
1319+
navdata_2 = navdata_1.copy()
1320+
navdata_2.rename(mapper={"floats": "decimals", "names": "words"},
1321+
inplace = True)
1322+
1323+
# add new columns
1324+
navdata = navdata_1.concat(navdata_1)
1325+
assert navdata.shape == (4,12)
1326+
# add new rows
1327+
navdata = navdata_1.concat(navdata_1,axis=0)
1328+
assert navdata.shape == (8,6)
1329+
1330+
# test multiple rows with the same name
1331+
navdata_long = navdata_1.copy()
1332+
for count in range(13):
1333+
navdata_long.concat(navdata_1,axis=0,inplace=True)
1334+
for word in ["names","integers","floats","strings"]:
1335+
assert word + "_" + str(count) in navdata_long.rows
1336+
1337+
# add semi new columns
1338+
navdata = navdata_1.concat(navdata_2)
1339+
assert navdata.shape == (6,12)
1340+
1341+
# add as new rows
1342+
navdata_b = navdata_1.concat(navdata_2,axis=0)
1343+
assert navdata_b.shape == (8,6)
1344+
1345+
def test_concat_fails(df_simple):
1346+
"""Test when concat should fail.
1347+
1348+
Parameters
1349+
----------
1350+
df_simple : pd.DataFrame
1351+
Simple pd.DataFrame with which to initialize NavData.
1352+
1353+
"""
1354+
1355+
navdata_1 = NavData(pandas_df=df_simple)
1356+
1357+
with pytest.raises(TypeError) as excinfo:
1358+
navdata_1.concat(np.array([]))
1359+
assert "concat" in str(excinfo.value)
1360+
assert "NavData" in str(excinfo.value)
1361+
1362+
navdata_2 = navdata_1.remove(cols=[0])
1363+
1364+
with pytest.raises(RuntimeError) as excinfo:
1365+
navdata_1.concat(navdata_2,axis=0)
1366+
assert "same length" in str(excinfo.value)
1367+
assert "concat" in str(excinfo.value)
1368+
13001369
@pytest.mark.parametrize("rows",
13011370
[None,
13021371
['names', 'integers', 'floats', 'strings'],

0 commit comments

Comments
 (0)