From 2a80cede6df34d482cd541acc732fbc9710078ab Mon Sep 17 00:00:00 2001 From: Vojtech Trefny Date: Wed, 13 May 2026 15:58:24 +0200 Subject: [PATCH 1/3] Fix case-insensitive parsing of non-ASCII (e.g. Cyrillic) unit names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit strncasecmp only handles ASCII case folding, so translated unit names like "миб" (lowercase Cyrillic for MiB) failed to match "МиБ". Replace with a new u8_casecmp helper that converts to wchar_t via mbstowcs and compares with wcsncasecmp, which handles multibyte case folding correctly. Co-Authored-By: Claude Opus 4.6 --- po/libbytesize.pot | 36 ++++++++++++++++---------------- src/bs_size.c | 39 +++++++++++++++++++++++++++++++---- tests/libbytesize_unittest.py | 27 ++++++++++++++++++++++++ 3 files changed, 80 insertions(+), 22 deletions(-) diff --git a/po/libbytesize.pot b/po/libbytesize.pot index d9bc46b..af8d0e0 100644 --- a/po/libbytesize.pot +++ b/po/libbytesize.pot @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: libbytesize 2.12\n" "Report-Msgid-Bugs-To: vtrefny@redhat.com\n" -"POT-Creation-Date: 2026-01-08 06:02-0800\n" +"POT-Creation-Date: 2026-01-16 12:41+0100\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" @@ -18,86 +18,86 @@ msgstr "" "Content-Transfer-Encoding: 8bit\n" #. TRANSLATORS: 'B' for bytes -#: src/bs_size.c:52 src/bs_size.c:73 +#: src/bs_size.c:54 src/bs_size.c:75 msgid "B" msgstr "" #. TRANSLATORS: abbreviation for kibibyte, 2**10 bytes -#: src/bs_size.c:54 +#: src/bs_size.c:56 msgid "KiB" msgstr "" #. TRANSLATORS: abbreviation for mebibyte, 2**20 bytes -#: src/bs_size.c:56 +#: src/bs_size.c:58 msgid "MiB" msgstr "" #. TRANSLATORS: abbreviation for gibibyte, 2**30 bytes -#: src/bs_size.c:58 +#: src/bs_size.c:60 msgid "GiB" msgstr "" #. TRANSLATORS: abbreviation for tebibyte, 2**40 bytes -#: src/bs_size.c:60 +#: src/bs_size.c:62 msgid "TiB" msgstr "" #. TRANSLATORS: abbreviation for pebibyte, 2**50 bytes -#: src/bs_size.c:62 +#: src/bs_size.c:64 msgid "PiB" msgstr "" #. TRANSLATORS: abbreviation for exbibyte, 2**60 bytes -#: src/bs_size.c:64 +#: src/bs_size.c:66 msgid "EiB" msgstr "" #. TRANSLATORS: abbreviation for zebibyte, 2**70 bytes -#: src/bs_size.c:66 +#: src/bs_size.c:68 msgid "ZiB" msgstr "" #. TRANSLATORS: abbreviation for yobibyte, 2**80 bytes -#: src/bs_size.c:68 +#: src/bs_size.c:70 msgid "YiB" msgstr "" #. TRANSLATORS: abbreviation for kilobyte, 10**3 bytes -#: src/bs_size.c:75 +#: src/bs_size.c:77 msgid "KB" msgstr "" #. TRANSLATORS: abbreviation for megabyte, 10**6 bytes -#: src/bs_size.c:77 +#: src/bs_size.c:79 msgid "MB" msgstr "" #. TRANSLATORS: abbreviation for gigabyte, 10**9 bytes -#: src/bs_size.c:79 +#: src/bs_size.c:81 msgid "GB" msgstr "" #. TRANSLATORS: abbreviation for terabyte, 10**12 bytes -#: src/bs_size.c:81 +#: src/bs_size.c:83 msgid "TB" msgstr "" #. TRANSLATORS: abbreviation for petabyte, 10**15 bytes -#: src/bs_size.c:83 +#: src/bs_size.c:85 msgid "PB" msgstr "" #. TRANSLATORS: abbreviation for exabyte, 10**18 bytes -#: src/bs_size.c:85 +#: src/bs_size.c:87 msgid "EB" msgstr "" #. TRANSLATORS: abbreviation for zettabyte, 10**21 bytes -#: src/bs_size.c:87 +#: src/bs_size.c:89 msgid "ZB" msgstr "" #. TRANSLATORS: abbreviation for yottabyte, 10**24 bytes -#: src/bs_size.c:89 +#: src/bs_size.c:91 msgid "YB" msgstr "" diff --git a/src/bs_size.c b/src/bs_size.c index 7d17be2..cda12f3 100644 --- a/src/bs_size.c +++ b/src/bs_size.c @@ -8,6 +8,7 @@ #include #include #include +#include /* set code unit width to 8 so we can use generic macros like 'pcre2_compile' * instead of 'pcre2_compile_8' @@ -225,6 +226,36 @@ static void strstrip(char *str) { str[i-begin] = '\0'; } +/* Case-insensitive comparison that handles multibyte UTF-8 (e.g. Cyrillic) */ +static int u8_casecmp (const char *s1, const char *s2, size_t n1) { + wchar_t *w1 = NULL; + wchar_t *w2 = NULL; + size_t wlen1, wlen2; + int ret; + + wlen1 = mbstowcs (NULL, s1, 0); + wlen2 = mbstowcs (NULL, s2, 0); + if (wlen1 == (size_t) -1 || wlen2 == (size_t) -1) + return strncasecmp (s1, s2, n1); + + w1 = malloc ((wlen1 + 1) * sizeof (wchar_t)); + w2 = malloc ((wlen2 + 1) * sizeof (wchar_t)); + if (!w1 || !w2) { + free (w1); + free (w2); + return strncasecmp (s1, s2, n1); + } + + mbstowcs (w1, s1, wlen1 + 1); + mbstowcs (w2, s2, wlen2 + 1); + + ret = wcsncasecmp (w1, w2, wlen1); + + free (w1); + free (w2); + return ret; +} + static bool multiply_size_by_unit (mpq_t size, char *unit_str) { BSBunit bunit = BS_BUNIT_UNDEF; BSDunit dunit = BS_DUNIT_UNDEF; @@ -236,7 +267,7 @@ static bool multiply_size_by_unit (mpq_t size, char *unit_str) { unit_str_len = strlen (unit_str); for (bunit=BS_BUNIT_B; bunit < BS_BUNIT_UNDEF; bunit++) - if (strncasecmp (unit_str, b_units[bunit-BS_BUNIT_B], unit_str_len) == 0) { + if (u8_casecmp (unit_str, b_units[bunit-BS_BUNIT_B], unit_str_len) == 0) { pwr = (uint64_t) bunit - BS_BUNIT_B; mpz_mul_2exp (mpq_numref (size), mpq_numref (size), 10 * pwr); return true; @@ -245,7 +276,7 @@ static bool multiply_size_by_unit (mpq_t size, char *unit_str) { mpq_init (dec_mul); mpz_init (pow_1000); for (dunit=BS_DUNIT_B; dunit < BS_DUNIT_UNDEF; dunit++) - if (strncasecmp (unit_str, d_units[dunit-BS_DUNIT_B], unit_str_len) == 0) { + if (u8_casecmp (unit_str, d_units[dunit-BS_DUNIT_B], unit_str_len) == 0) { pwr = (uint64_t) (dunit - BS_DUNIT_B); mpz_ui_pow_ui (pow_1000, 1000, pwr); mpq_set_z (dec_mul, pow_1000); @@ -256,7 +287,7 @@ static bool multiply_size_by_unit (mpq_t size, char *unit_str) { } for (bunit=BS_BUNIT_B; bunit < BS_BUNIT_UNDEF; bunit++) - if (strncasecmp (unit_str, _(b_units[bunit-BS_BUNIT_B]), unit_str_len) == 0) { + if (u8_casecmp (unit_str, _(b_units[bunit-BS_BUNIT_B]), unit_str_len) == 0) { pwr = (uint64_t) bunit - BS_BUNIT_B; mpz_mul_2exp (mpq_numref (size), mpq_numref (size), 10 * pwr); mpz_clear (pow_1000); @@ -265,7 +296,7 @@ static bool multiply_size_by_unit (mpq_t size, char *unit_str) { } for (dunit=BS_DUNIT_B; dunit < BS_DUNIT_UNDEF; dunit++) - if (strncasecmp (unit_str, _(d_units[dunit-BS_DUNIT_B]), unit_str_len) == 0) { + if (u8_casecmp (unit_str, _(d_units[dunit-BS_DUNIT_B]), unit_str_len) == 0) { pwr = (uint64_t) (dunit - BS_DUNIT_B); mpz_ui_pow_ui (pow_1000, 1000, pwr); mpq_set_z (dec_mul, pow_1000); diff --git a/tests/libbytesize_unittest.py b/tests/libbytesize_unittest.py index 13a187e..898b259 100755 --- a/tests/libbytesize_unittest.py +++ b/tests/libbytesize_unittest.py @@ -140,6 +140,33 @@ def testNewFromStrLocalePsAF(self): expected = (1536, -1) self.assertEqual(actual, expected) + @requires_locales({'ru_RU.UTF-8'}) + def testNewFromStrLocaleRuRU(self): + locale.setlocale(locale.LC_ALL, 'ru_RU.UTF-8') + + # uppercase Cyrillic unit (canonical translation) + actual = SizeStruct.new_from_str('1 МиБ').get_bytes() + expected = (1048576, 1) + self.assertEqual(actual, expected) + + # lowercase Cyrillic unit -- case-insensitive matching for non-ASCII + actual = SizeStruct.new_from_str('1 миб').get_bytes() + expected = (1048576, 1) + self.assertEqual(actual, expected) + + actual = SizeStruct.new_from_str('2 гиб').get_bytes() + expected = (2147483648, 1) + self.assertEqual(actual, expected) + + # ASCII units should still work under Russian locale + actual = SizeStruct.new_from_str('1 MiB').get_bytes() + expected = (1048576, 1) + self.assertEqual(actual, expected) + + actual = SizeStruct.new_from_str('1 mib').get_bytes() + expected = (1048576, 1) + self.assertEqual(actual, expected) + #enddef def testNewFromBytes(self): From 7bccaf95aafe4702198e024af8545b78ff825851 Mon Sep 17 00:00:00 2001 From: Vojtech Trefny Date: Wed, 13 May 2026 16:27:09 +0200 Subject: [PATCH 2/3] Add GitHub Actions workflow for testing with musl libc (Alpine Linux) - Add misc/alpine.Dockerfile and .github/workflows/test-musl.yml to run tests in an Alpine container via podman - Pass LIBS="-lintl" to configure for musl where dgettext is in a separate libintl rather than libc - Use "C" as default test locale instead of "en_US.utf8" so non-locale-specific tests run on systems without glibc locale data Co-Authored-By: Claude Opus 4.6 --- .github/workflows/test-musl.yml | 38 +++++++++++++++++++++++++++++++++ misc/alpine.Dockerfile | 6 ++++++ tests/libbytesize_unittest.py | 2 +- 3 files changed, 45 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/test-musl.yml create mode 100644 misc/alpine.Dockerfile diff --git a/.github/workflows/test-musl.yml b/.github/workflows/test-musl.yml new file mode 100644 index 0000000..b12b881 --- /dev/null +++ b/.github/workflows/test-musl.yml @@ -0,0 +1,38 @@ +name: Run tests with musl libc (Alpine Linux) + +on: + pull_request: + branches: + - main + +jobs: + build: + name: musl libc test + runs-on: ubuntu-latest + env: + CI_CONTAINER: libbytesize-ci-alpine + + steps: + - name: Checkout libbytesize repository + uses: actions/checkout@v6 + + - name: Install podman + run: | + sudo apt -qq update + sudo apt -y -qq install podman + + - name: Build the container + run: | + podman build --no-cache -t ${{ env.CI_CONTAINER }} -f misc/alpine.Dockerfile . + + - name: Start the container + run: | + podman run -d -t --name ${{ env.CI_CONTAINER }} --volume "$(pwd):/app" --workdir "/app" ${{ env.CI_CONTAINER }} + + - name: Build in the container + run: | + podman exec -it ${{ env.CI_CONTAINER }} bash -c "./autogen.sh && LIBS='-lintl' ./configure --with-python3 --without-gtk-doc --without-tools && make" + + - name: Run tests in the container + run: | + podman exec -it ${{ env.CI_CONTAINER }} bash -c "top_srcdir=/app top_builddir=/app source tests/testenv.sh && python3 tests/libbytesize_unittest.py" diff --git a/misc/alpine.Dockerfile b/misc/alpine.Dockerfile new file mode 100644 index 0000000..c33119b --- /dev/null +++ b/misc/alpine.Dockerfile @@ -0,0 +1,6 @@ +FROM alpine:latest + +RUN apk add --no-cache \ + gcc make autoconf automake libtool pkgconf musl-dev bash git \ + gmp-dev pcre2-dev gettext-dev \ + python3 diff --git a/tests/libbytesize_unittest.py b/tests/libbytesize_unittest.py index 898b259..ea250dd 100755 --- a/tests/libbytesize_unittest.py +++ b/tests/libbytesize_unittest.py @@ -17,7 +17,7 @@ except ImportError: from bytesize.bytesize import SizeStruct -DEFAULT_LOCALE = "en_US.utf8" +DEFAULT_LOCALE = "C" class SizeTestCase(unittest.TestCase): From 67cfc5498b01fc1d3255d89d4dc6f0565ec63a27 Mon Sep 17 00:00:00 2001 From: Vojtech Trefny Date: Thu, 14 May 2026 11:22:37 +0200 Subject: [PATCH 3/3] tests: Set LANGUAGE when running locale tests On Debian LANGUAGE is set to the default language (en_US in most cases) so we need to clear it to make sure our locale tests actually work. --- tests/libbytesize_unittest.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/libbytesize_unittest.py b/tests/libbytesize_unittest.py index ea250dd..24dffeb 100755 --- a/tests/libbytesize_unittest.py +++ b/tests/libbytesize_unittest.py @@ -5,6 +5,7 @@ import unittest import sys import ctypes +import os from locale_utils import get_avail_locales, missing_locales, requires_locales @@ -32,6 +33,7 @@ def setUp(self): self.skipTest("requires missing locales: %s" % missing) locale.setlocale(locale.LC_ALL, DEFAULT_LOCALE) self.addCleanup(self._clean_up) + os.environ["LANGUAGE"] = "" def _clean_up(self): locale.setlocale(locale.LC_ALL, DEFAULT_LOCALE)