From 010d2128305e720c77d8916e45408a29e06670b9 Mon Sep 17 00:00:00 2001 From: sainekk Date: Thu, 14 May 2026 10:59:02 +0300 Subject: [PATCH 1/2] fix csv parser when no separator detected --- src/harmony/parsing/text_parser.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/harmony/parsing/text_parser.py b/src/harmony/parsing/text_parser.py index 5e7663c..ef71824 100644 --- a/src/harmony/parsing/text_parser.py +++ b/src/harmony/parsing/text_parser.py @@ -75,6 +75,7 @@ def convert_text_to_instruments(file: RawFile) -> List[Instrument]: elif "," in first_line: csv_sep = "," + if file.file_type == FileType.csv and csv_sep is not None: string_io = StringIO(page_text) df = pd.read_csv(string_io, sep=csv_sep) df.fillna("", inplace=True) From d3e847ea887fb5a74be31039538e38bd00979288 Mon Sep 17 00:00:00 2001 From: sainekk Date: Fri, 22 May 2026 18:57:15 +0300 Subject: [PATCH 2/2] add regression test for csv without detected separator --- tests/test_convert_text.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/test_convert_text.py b/tests/test_convert_text.py index 02776ef..160709f 100644 --- a/tests/test_convert_text.py +++ b/tests/test_convert_text.py @@ -135,6 +135,23 @@ def test_remove_both_ends_digits_from_csv(self): self.assertEqual("How are you today", questions[0].question_text) self.assertEqual("Are you feeling better", questions[1].question_text) + def test_csv_without_detected_separator_falls_back_to_line_parsing(self): + # Regression: pandas >=2.3 dropped support for pd.read_csv(sep=None). + # A CSV whose first line contains no tab or comma must parse via the + # txt branch (one question per line), not via pd.read_csv(sep=None). + no_sep_csv = RawFile.model_validate({ + "file_id": "no_sep_csv", + "file_name": "no_separator.csv", + "file_type": "csv", + "content": "I feel anxious\nI feel restless" + }) + instruments = convert_text_to_instruments(no_sep_csv) + self.assertEqual(1, len(instruments)) + questions = instruments[0].questions + self.assertEqual(2, len(questions)) + self.assertEqual("I feel anxious", questions[0].question_text) + self.assertEqual("I feel restless", questions[1].question_text) + if __name__ == '__main__': unittest.main()