From c95eaefb08d734b523a0cb18fd1c0571a4604756 Mon Sep 17 00:00:00 2001 From: Gustavo Date: Thu, 21 May 2026 21:42:25 -0300 Subject: [PATCH 1/5] =?UTF-8?q?feat(cnpj):=20add=20alphanumeric=20CNPJ=20s?= =?UTF-8?q?upport=20per=20RFB=20Nota=20T=C3=A9cnica=2049/2024?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- brutils/cnpj.py | 264 +++++++++++++++++++++++------------------------- 1 file changed, 124 insertions(+), 140 deletions(-) diff --git a/brutils/cnpj.py b/brutils/cnpj.py index 84dd7ae8..0a0e0548 100644 --- a/brutils/cnpj.py +++ b/brutils/cnpj.py @@ -1,5 +1,9 @@ from itertools import chain -from random import randint +from random import randint, choice +import string + +# Mapeamento alfanumérico: A=10, B=11, ..., Z=35 +_CHAR_VALUES = {c: i + 10 for i, c in enumerate(string.ascii_uppercase)} # FORMATTING ############ @@ -7,11 +11,7 @@ def sieve(dirty: str) -> str: """ - Removes specific symbols from a CNPJ (Brazilian Company Registration - Number) string. - - This function takes a CNPJ string as input and removes all occurrences of - the '.', '/' and '-' characters from it. + Removes specific symbols from a CNPJ string. Args: cnpj (str): The CNPJ string containing symbols to be removed. @@ -22,67 +22,23 @@ def sieve(dirty: str) -> str: Example: >>> sieve("12.345/6789-01") "12345678901" - >>> sieve("98/76.543-2101") - "98765432101" - - .. note:: - This method should not be used in new code and is only provided for - backward compatibility. """ - return "".join(filter(lambda char: char not in "./-", dirty)) def remove_symbols(dirty: str) -> str: - """ - This function is an alias for the `sieve` function, offering a more - descriptive name. - - Args: - dirty (str): The dirty string containing symbols to be removed. - - Returns: - str: A new string with the specified symbols removed. - - Example: - >>> remove_symbols("12.345/6789-01") - "12345678901" - >>> remove_symbols("98/76.543-2101") - "98765432101" - """ - + """Alias for sieve().""" return sieve(dirty) def display(cnpj: str) -> str | None: """ - Will format an adequately formatted numbers-only CNPJ string, - adding in standard formatting visual aid symbols for display. - - Formats a CNPJ (Brazilian Company Registration Number) string for - visual display. - - This function takes a CNPJ string as input, validates its format, and - formats it with standard visual aid symbols for display purposes. - - Args: - cnpj (str): The CNPJ string to be formatted for display. - - Returns: - str: The formatted CNPJ with visual aid symbols if it's valid, - None if it's not valid. - - Example: - >>> display("12345678901234") - "12.345.678/9012-34" - >>> display("98765432100100") - "98.765.432/1001-00" + Formats a CNPJ string for visual display (legacy, numeric only). .. note:: This method should not be used in new code and is only provided for - backward compatibility. + backward compatibility. Use format_cnpj() instead. """ - if not cnpj.isdigit() or len(cnpj) != 14 or len(set(cnpj)) == 1: return None return "{}.{}.{}/{}-{}".format( @@ -92,29 +48,25 @@ def display(cnpj: str) -> str | None: def format_cnpj(cnpj: str) -> str | None: """ - Formats a CNPJ (Brazilian Company Registration Number) string for visual - display. + Formats a CNPJ string for visual display. - This function takes a CNPJ string as input, validates its format, and - formats it with standard visual aid symbols for display purposes. + Supports both the classic numeric format and the new alphanumeric format + introduced by RFB Nota Técnica 49/2024 (effective July 2026). Args: - cnpj (str): The CNPJ string to be formatted for display. + cnpj (str): The CNPJ string to be formatted (14 characters, no symbols). Returns: - str: The formatted CNPJ with visual aid symbols if it's valid, - None if it's not valid. + str: The formatted CNPJ if valid, None otherwise. Example: >>> format_cnpj("03560714000142") '03.560.714/0001-42' - >>> format_cnpj("98765432100100") - None + >>> format_cnpj("B3S30714000142") + 'B3.S30.714/0001-42' """ - if not is_valid(cnpj): return None - return "{}.{}.{}/{}-{}".format( cnpj[:2], cnpj[2:5], cnpj[5:8], cnpj[8:12], cnpj[12:14] ) @@ -124,50 +76,125 @@ def format_cnpj(cnpj: str) -> str | None: ############ +def _char_to_val(c: str) -> int: + """ + Converts a CNPJ character to its numeric value for checksum calculation. + + Digits map to their integer value; uppercase letters map to 10-35 + (A=10, B=11, ..., Z=35), as defined by RFB Nota Técnica 49/2024. + + Args: + c (str): A single character (digit or uppercase letter). + + Returns: + int: The numeric value of the character. + """ + if c.isdigit(): + return int(c) + return _CHAR_VALUES[c.upper()] + + +def _is_valid_chars(cnpj: str) -> bool: + """ + Checks if all characters in a CNPJ are valid (digits or uppercase letters). + + Args: + cnpj (str): The CNPJ string to check. + + Returns: + bool: True if all characters are valid, False otherwise. + """ + return all(c.isdigit() or c.upper() in _CHAR_VALUES for c in cnpj) + + +def _hashdigit(cnpj: str, position: int) -> int: + """ + Calculates the checksum digit at the given position for the provided CNPJ. + + Supports both numeric and alphanumeric CNPJs per RFB Nota Técnica 49/2024. + + Args: + cnpj (str): The CNPJ string. + position (int): The position of the checksum digit (13 or 14). + + Returns: + int: The calculated checksum digit. + """ + weightgen = chain(range(position - 8, 1, -1), range(9, 1, -1)) + val = ( + sum(_char_to_val(c) * w for c, w in zip(cnpj, weightgen)) % 11 + ) + return 0 if val < 2 else 11 - val + + +def _checksum(basenum: str) -> str: + """ + Calculates the verifying checksum digits for a given CNPJ base number. + + Supports both numeric and alphanumeric base numbers. + + Args: + basenum (str): The 12-character CNPJ base number. + + Returns: + str: The two verifying checksum digits. + """ + d1 = str(_hashdigit(basenum, 13)) + d2 = str(_hashdigit(basenum + d1, 14)) + return d1 + d2 + + def validate(cnpj: str) -> bool: """ - Validates a CNPJ (Brazilian Company Registration Number) by comparing its - verifying checksum digits to its base number. + Validates a CNPJ by comparing its verifying checksum digits to its base. - This function checks the validity of a CNPJ by comparing its verifying - checksum digits to its base number. The input should be a string of digits - with the appropriate length. + Supports both the classic numeric format and the new alphanumeric format + introduced by RFB Nota Técnica 49/2024 (effective July 2026). Args: - cnpj (str): The CNPJ to be validated. + cnpj (str): The CNPJ to be validated (14 characters, no symbols). Returns: - bool: True if the checksum digits match the base number, - False otherwise. + bool: True if valid, False otherwise. Example: >>> validate("03560714000142") True >>> validate("00111222000133") False + >>> validate("B3S30714000142") + True .. note:: This method should not be used in new code and is only provided for backward compatibility. """ - - if not cnpj.isdigit() or len(cnpj) != 14 or len(set(cnpj)) == 1: + if ( + not isinstance(cnpj, str) + or len(cnpj) != 14 + or not _is_valid_chars(cnpj) + or len(set(cnpj.upper())) == 1 + ): return False return all( - _hashdigit(cnpj, i + 13) == int(v) for i, v in enumerate(cnpj[12:]) + _hashdigit(cnpj, i + 13) == int(cnpj[12 + i]) + for i in range(2) ) def is_valid(cnpj: str) -> bool: """ - Returns whether or not the verifying checksum digits of the given `cnpj` - match its base number. + Returns whether the verifying checksum digits of the given CNPJ match + its base number. + + Supports both the classic numeric format and the new alphanumeric format + introduced by RFB Nota Técnica 49/2024 (effective July 2026). This function does not verify the existence of the CNPJ; it only validates the format of the string. Args: - cnpj (str): The CNPJ to be validated, a 14-digit string + cnpj (str): The CNPJ to be validated (14 characters, no symbols). Returns: bool: True if the checksum digits match the base number, @@ -178,18 +205,20 @@ def is_valid(cnpj: str) -> bool: True >>> is_valid("00111222000133") False + >>> is_valid("B3S30714000142") + True """ - return isinstance(cnpj, str) and validate(cnpj) -def generate(branch: int = 1) -> str: +def generate(branch: int = 1, alphanumeric: bool = False) -> str: """ - Generates a random valid CNPJ digit string. An optional branch number - parameter can be given; it defaults to 1. + Generates a random valid CNPJ string. Args: - branch (int): An optional branch number to be included in the CNPJ. + branch (int): An optional branch number (default: 1). + alphanumeric (bool): If True, generates a new alphanumeric CNPJ + per RFB Nota Técnica 49/2024. Default: False. Returns: str: A randomly generated valid CNPJ string. @@ -197,66 +226,21 @@ def generate(branch: int = 1) -> str: Example: >>> generate() "30180536000105" - >>> generate(1234) - "01745284123455" + >>> generate(alphanumeric=True) + "B3S30714000142" """ - branch %= 10000 branch += int(branch == 0) branch = str(branch).zfill(4) - base = str(randint(0, 99999999)).zfill(8) + branch - return base + _checksum(base) + if alphanumeric: + charset = string.digits + string.ascii_uppercase + base = "".join(choice(charset) for _ in range(8)) + branch + # Ensure at least one letter in the base (distinguishes from numeric) + if base[:8].isdigit(): + pos = randint(0, 7) + base = base[:pos] + choice(string.ascii_uppercase) + base[pos + 1:] + else: + base = str(randint(0, 99999999)).zfill(8) + branch - -def _hashdigit(cnpj: str, position: int) -> int: - """ - Calculates the checksum digit at the given `position` for the provided - `cnpj`. The input must contain all elements before `position`. - - Args: - cnpj (str): The CNPJ for which the checksum digit is calculated. - position (int): The position of the checksum digit to be calculated. - - Returns: - int: The calculated checksum digit. - - Example: - >>> _hashdigit("12345678901234", 13) - 3 - >>> _hashdigit("98765432100100", 14) - 9 - """ - - weightgen = chain(range(position - 8, 1, -1), range(9, 1, -1)) - val = ( - sum(int(digit) * weight for digit, weight in zip(cnpj, weightgen)) % 11 - ) - return 0 if val < 2 else 11 - val - - -def _checksum(basenum: str) -> str: - """ - Calculates the verifying checksum digits for a given CNPJ base number. - - This function computes the verifying checksum digits for a provided CNPJ - base number. The `basenum` should be a digit-string of the appropriate - length. - - Args: - basenum (str): The base number of the CNPJ for which verifying checksum - digits are calculated. - - Returns: - str: The verifying checksum digits. - - Example: - >>> _checksum("123456789012") - "30" - >>> _checksum("987654321001") - "41" - """ - - verifying_digits = str(_hashdigit(basenum, 13)) - verifying_digits += str(_hashdigit(basenum + verifying_digits, 14)) - return verifying_digits + return base + _checksum(base) From 9528983b611404af6d27d82a0ad22074bb618056 Mon Sep 17 00:00:00 2001 From: Gustavo Date: Thu, 21 May 2026 21:53:13 -0300 Subject: [PATCH 2/5] style: apply black formatting to cnpj.py --- brutils/cnpj.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/brutils/cnpj.py b/brutils/cnpj.py index 0a0e0548..660cbeb3 100644 --- a/brutils/cnpj.py +++ b/brutils/cnpj.py @@ -121,9 +121,7 @@ def _hashdigit(cnpj: str, position: int) -> int: int: The calculated checksum digit. """ weightgen = chain(range(position - 8, 1, -1), range(9, 1, -1)) - val = ( - sum(_char_to_val(c) * w for c, w in zip(cnpj, weightgen)) % 11 - ) + val = sum(_char_to_val(c) * w for c, w in zip(cnpj, weightgen)) % 11 return 0 if val < 2 else 11 - val @@ -176,10 +174,7 @@ def validate(cnpj: str) -> bool: or len(set(cnpj.upper())) == 1 ): return False - return all( - _hashdigit(cnpj, i + 13) == int(cnpj[12 + i]) - for i in range(2) - ) + return all(_hashdigit(cnpj, i + 13) == int(cnpj[12 + i]) for i in range(2)) def is_valid(cnpj: str) -> bool: @@ -239,7 +234,7 @@ def generate(branch: int = 1, alphanumeric: bool = False) -> str: # Ensure at least one letter in the base (distinguishes from numeric) if base[:8].isdigit(): pos = randint(0, 7) - base = base[:pos] + choice(string.ascii_uppercase) + base[pos + 1:] + base = base[:pos] + choice(string.ascii_uppercase) + base[pos + 1 :] else: base = str(randint(0, 99999999)).zfill(8) + branch From 6df976b0711e8a62a76131a513511e85aeb9dcf4 Mon Sep 17 00:00:00 2001 From: Gustavo Date: Thu, 21 May 2026 21:56:53 -0300 Subject: [PATCH 3/5] style: fix import order in cnpj.py --- brutils/cnpj.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/brutils/cnpj.py b/brutils/cnpj.py index 660cbeb3..cc1ccf73 100644 --- a/brutils/cnpj.py +++ b/brutils/cnpj.py @@ -1,6 +1,6 @@ -from itertools import chain -from random import randint, choice import string +from itertools import chain +from random import choice, randint # Mapeamento alfanumérico: A=10, B=11, ..., Z=35 _CHAR_VALUES = {c: i + 10 for i, c in enumerate(string.ascii_uppercase)} From 79ace64906bf6cfcaabf1b17055e4203d0706971 Mon Sep 17 00:00:00 2001 From: Gustavo Date: Thu, 21 May 2026 22:32:14 -0300 Subject: [PATCH 4/5] test(cnpj): add tests for alphanumeric CNPJ support --- tests/test_cnpj.py | 52 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/tests/test_cnpj.py b/tests/test_cnpj.py index 2ec7261f..ff11f8a1 100644 --- a/tests/test_cnpj.py +++ b/tests/test_cnpj.py @@ -111,3 +111,55 @@ def test_when_cnpj_is_not_valid_returns_none(self, mock_is_valid): if __name__ == "__main__": main() +class TestAlphanumericCNPJ(TestCase): + def test_is_valid_alphanumeric(self): + # Alphanumeric CNPJs generated by generate() must be valid + for _ in range(100): + cnpj = generate(alphanumeric=True) + self.assertIs(is_valid(cnpj), True) + + def test_is_valid_rejects_invalid_alphanumeric(self): + # Wrong checksum + self.assertIs(is_valid("AAAAAAAA000100"), False) + # All same chars + self.assertIs(is_valid("AAAAAAAAAAAAAA"), False) + # Invalid character + self.assertIs(is_valid("AAAAAAA!000100"), False) + + def test_validate_alphanumeric(self): + for _ in range(100): + cnpj = generate(alphanumeric=True) + self.assertIs(validate(cnpj), True) + + def test_char_to_val(self): + from brutils.cnpj import _char_to_val + self.assertEqual(_char_to_val("0"), 0) + self.assertEqual(_char_to_val("9"), 9) + self.assertEqual(_char_to_val("A"), 10) + self.assertEqual(_char_to_val("Z"), 35) + + def test_is_valid_chars(self): + from brutils.cnpj import _is_valid_chars + self.assertTrue(_is_valid_chars("ABC123")) + self.assertFalse(_is_valid_chars("ABC!23")) + self.assertFalse(_is_valid_chars("abc123")) + + def test_format_cnpj_alphanumeric(self): + cnpj = generate(alphanumeric=True) + formatted = format_cnpj(cnpj) + self.assertIsNotNone(formatted) + self.assertIn("/", formatted) + self.assertIn("-", formatted) + + def test_generate_alphanumeric_has_letter(self): + # Alphanumeric CNPJ must contain at least one letter in the base + for _ in range(100): + cnpj = generate(alphanumeric=True) + self.assertTrue(any(c.isalpha() for c in cnpj[:12])) + + def test_generate_numeric_backward_compat(self): + # generate() without alphanumeric=True must still return numeric CNPJ + for _ in range(100): + cnpj = generate() + self.assertTrue(cnpj.isdigit()) + self.assertIs(validate(cnpj), True) From ebc4ca864a675b9d41af74d0ad3362ea666eded3 Mon Sep 17 00:00:00 2001 From: Gustavo Date: Thu, 21 May 2026 22:37:47 -0300 Subject: [PATCH 5/5] fix(cnpj): reject lowercase letters in alphanumeric CNPJ validation --- brutils/cnpj.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/brutils/cnpj.py b/brutils/cnpj.py index cc1ccf73..fc88fbfd 100644 --- a/brutils/cnpj.py +++ b/brutils/cnpj.py @@ -104,7 +104,7 @@ def _is_valid_chars(cnpj: str) -> bool: Returns: bool: True if all characters are valid, False otherwise. """ - return all(c.isdigit() or c.upper() in _CHAR_VALUES for c in cnpj) + return all(c.isdigit() or c in _CHAR_VALUES for c in cnpj) def _hashdigit(cnpj: str, position: int) -> int: