From a9ef620af688cc64c1cb1e53896f2112cd7e6ec1 Mon Sep 17 00:00:00 2001 From: mcarans Date: Fri, 13 Feb 2026 14:41:31 +1300 Subject: [PATCH] Make save_iterable make more sense by ordering according to headers if it is a list of strings. Also only the first row needs reordering for a dict. --- src/hdx/utilities/saver.py | 44 +++++++++++++------------------ tests/hdx/utilities/test_saver.py | 24 +++++++++++++++++ 2 files changed, 43 insertions(+), 25 deletions(-) diff --git a/src/hdx/utilities/saver.py b/src/hdx/utilities/saver.py index 41972ab..745a089 100644 --- a/src/hdx/utilities/saver.py +++ b/src/hdx/utilities/saver.py @@ -281,10 +281,13 @@ def save_iterable( row_function: Callable[[dict], dict | None] | None = None, no_empty: bool = True, ) -> list: - """Save an iterable of rows in dict or list form to a csv. (The headers + """Save an iterable of rows in dict or list form to a csv. The headers argument is either a row number (rows start counting at 1), or the actual - headers defined as a list of strings. If not set, all rows will be treated - as containing values.) + headers defined as a list of strings, the order of which defines the column order. + Columns not named in that list of strings will be dropped. If headers is not set, + all rows will be treated as containing values. The columns argument defines which + columns will be output. It can be used in conjunction with an integer headers with + columns selecting the columns and hedders selecting the starting row. Args: filepath: Path to write to @@ -327,32 +330,23 @@ def write_rows(newrows, has_header, headers) -> None: if isinstance(row, dict): has_header = True row = row_function(row) + while row is None: + row = next(rows) + row = row_function(row) if columns: - if row is not None: - newrow = {} - for column in columns: - if column in row: - newrow[column] = row[column] - newrows.append(newrow) - for row in rows: - row = row_function(row) - if row is None: - continue - newrow = {} - for column in columns: - if column in row: - newrow[column] = row[column] - newrows.append(newrow) + row = {k: row[k] for k in columns if k in row} + newrows.append(row) if headers is None: headers = columns else: - if row is not None: - newrows.append(row) - for row in rows: - row = row_function(row) - if row is None: - continue - newrows.append(row) + if headers and isinstance(headers, list): + row = {k: row[k] for k in headers if k in row} + newrows.append(row) + for row in rows: + row = row_function(row) + if row is None: + continue + newrows.append(row) else: if headers is None: headers = 1 diff --git a/tests/hdx/utilities/test_saver.py b/tests/hdx/utilities/test_saver.py index 64847d5..840fc7c 100755 --- a/tests/hdx/utilities/test_saver.py +++ b/tests/hdx/utilities/test_saver.py @@ -353,6 +353,12 @@ def test_save_iterable(self): ["5", "6", "4"], ["8", "9", "7"], ] + save_iterable( + str(filepath), list_of_dicts, headers=2, columns=["h2", "h3", "h1"] + ) + newll = read_list_from_csv(filepath) + remove(filepath) + assert newll == [["2", "3", "1"], ["5", "6", "4"], ["8", "9", "7"]] xlfilepath = filepath.with_suffix(".xlsx") rows = save_iterable( @@ -465,6 +471,24 @@ def row_func(row): ["4", "5", "6", "b"], ["7", "8", "9", "c"], ] + save_iterable(filepath, list_of_dicts, headers=["h2", "h1", "h4", "h3"]) + newll = read_list_from_csv(filepath) + remove(filepath) + assert newll == [ + ["h2", "h1", "h4", "h3"], + ["2", "1", "a", "3"], + ["5", "4", "b", "6"], + ["8", "7", "c", "9"], + ] + save_iterable(filepath, list_of_dicts, headers=["h2", "h1", "h4"]) + newll = read_list_from_csv(filepath) + remove(filepath) + assert newll == [ + ["h2", "h1", "h4"], + ["2", "1", "a"], + ["5", "4", "b"], + ["8", "7", "c"], + ] save_iterable(filepath, list_of_dicts) newll = read_list_from_csv(filepath) remove(filepath)