From 2a4a85f01d2ff06048cbe6ea80b057dfb6cc7d89 Mon Sep 17 00:00:00 2001 From: Julian Sikorski Date: Tue, 26 May 2026 10:06:48 +0200 Subject: [PATCH 1/2] Add initial support for writing fractional seconds pandas with pandas-3.0.3 passes tests --- pyreadstat/_readstat_writer.pyx | 20 +++++++++++--------- tests/test_narwhalified.py | 6 ++++++ 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/pyreadstat/_readstat_writer.pyx b/pyreadstat/_readstat_writer.pyx index 361ef38..7d5e9b5 100644 --- a/pyreadstat/_readstat_writer.pyx +++ b/pyreadstat/_readstat_writer.pyx @@ -21,6 +21,7 @@ import sys import datetime #import calendar from datetime import timezone +from decimal import Decimal #from datetime import timezone as _timezone #from libc.math cimport round, NAN @@ -64,19 +65,19 @@ cdef object vectorized_convert_datetime_to_number(object df, dst_file_format fil transforms datetime64 columns in the dataframe to floats """ cdef dict convfacs - cdef double offset_secs - cdef double mulfac = 1.0 + cdef long long offset_secs + cdef int mulfac = 1 cdef int col_indx cdef list col_indxs - cdef double convfac + cdef int convfac if file_format == FILE_FORMAT_SAV or file_format == FILE_FORMAT_POR: - offset_secs = spss_offset_secs + offset_secs = int(spss_offset_secs) else: - offset_secs = sas_offset_secs + offset_secs = int(sas_offset_secs) if file_format == FILE_FORMAT_DTA: # stata stores in milliseconds - mulfac = 1000.0 + mulfac = 1000 convfacs = {'ns': 1e9, 'us': 1e6, 'ms': 1e3} col_indxs = list() @@ -87,8 +88,9 @@ cdef object vectorized_convert_datetime_to_number(object df, dst_file_format fil df = df.with_columns(nw.nth(col_indxs).cast(nw.Int64)) for col_indx in col_indxs: convfac = convfacs[pywriter_timeunits[col_indx]] - df = df.with_columns(nw.when(nw.nth(col_indx)!=-9223372036854775808).then(nw.nth(col_indx))) - df = df.with_columns((((nw.nth(col_indx).cast(nw.Float64))/convfac) + offset_secs).round() * mulfac) + df = df.with_columns(nw.when(nw.nth(col_indx) != -9223372036854775808).then( + ((nw.nth(col_indx) + offset_secs * convfac) * Decimal(mulfac) / convfac).round(6).cast( + nw.Float64))) return df @@ -138,7 +140,7 @@ cdef object vectorized_convert_time_to_number(object df, dst_file_format file_fo df = df.with_columns(nw.nth(col_indxs).cast(nw.Int64)) for col_indx in col_indxs: df = df.with_columns(nw.when(nw.nth(col_indx)!=-9223372036854775808).then(nw.nth(col_indx))) - df = df.with_columns((nw.nth(col_indx).cast(nw.Float64)/1e9).round() * mulfac) + df = df.with_columns((nw.nth(col_indx).cast(nw.Float64)/1e9).round(9) * mulfac) return df cdef double convert_datetimelike_to_number(dst_file_format file_format, pywriter_variable_type curtype, object curval) except *: diff --git a/tests/test_narwhalified.py b/tests/test_narwhalified.py index 3d9ec46..5e2e7b7 100644 --- a/tests/test_narwhalified.py +++ b/tests/test_narwhalified.py @@ -970,6 +970,12 @@ def test_xport_write_dates(self): df, meta = pyreadstat.read_xport(path, output_format=self.backend) self.assertTrue(df.equals(self.df_sas_dates2)) + def test_xport_write_fractional_seconds(self): + path = os.path.join(self.write_folder, "fractional_seconds.xpt") + pyreadstat.write_xport(self.df_sas_fractional_seconds, path) + df, meta = pyreadstat.read_xport(path, output_format=self.backend) + self.assertTrue(df.equals(self.df_sas_fractional_seconds)) + def test_sav_write_charnan(self): path = os.path.join(self.write_folder, "charnan.sav") pyreadstat.write_sav(self.df_charnan, path) From 4f9df0de87d998ebd2dfc6b68faaa8c360886f3d Mon Sep 17 00:00:00 2001 From: Julian Sikorski Date: Tue, 26 May 2026 21:06:47 +0200 Subject: [PATCH 2/2] Improve writing fractional seconds with polars Now "only" three out 100 test datetime values fail the fractional seconds xpt write test. All are beyond year 2250 and thus beyond the range in which microsecond precision is reliable using 64-bit float. --- pyreadstat/_readstat_writer.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyreadstat/_readstat_writer.pyx b/pyreadstat/_readstat_writer.pyx index 7d5e9b5..9f97f08 100644 --- a/pyreadstat/_readstat_writer.pyx +++ b/pyreadstat/_readstat_writer.pyx @@ -88,9 +88,9 @@ cdef object vectorized_convert_datetime_to_number(object df, dst_file_format fil df = df.with_columns(nw.nth(col_indxs).cast(nw.Int64)) for col_indx in col_indxs: convfac = convfacs[pywriter_timeunits[col_indx]] + finfac = Decimal(mulfac) / Decimal(convfac) df = df.with_columns(nw.when(nw.nth(col_indx) != -9223372036854775808).then( - ((nw.nth(col_indx) + offset_secs * convfac) * Decimal(mulfac) / convfac).round(6).cast( - nw.Float64))) + ((nw.nth(col_indx) + offset_secs * convfac) * finfac).cast(nw.Float64))) return df