From b31c4d35ef76233751f3ddcd1d4d1661e8e7d5db Mon Sep 17 00:00:00 2001 From: Gustavo Date: Tue, 19 May 2026 23:49:24 -0300 Subject: [PATCH] =?UTF-8?q?feat(cnpj):=20add=20alphanumeric=20CNPJ=20suppo?= =?UTF-8?q?rt=20(RFB=20Nota=20T=C3=A9cnica=2049/2024)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Receita Federal will start issuing alphanumeric CNPJs in July 2026, per Nota Técnica Conjunta COCAD/SUARA/RFB nº 49/2024. Changes: - _char_value(): new helper converting chars via ord(c)-48 (RFB rule) - _is_valid_format(): centralised format check supporting [0-9A-Z] - _hashdigit(): uses _char_value() instead of int() — works for both - validate() / is_valid(): accept 14-char alphanumeric CNPJs - display() / format_cnpj(): format alphanumeric CNPJs correctly - generate(): new alphanumeric=False parameter for backward compat - All existing tests pass (10/10); 10 new tests added (20/20 total) Closes #685 --- brutils/cnpj.py | 246 +++++++++++++++++++++++++++------------------ tests/test_cnpj.py | 49 +++++++++ 2 files changed, 198 insertions(+), 97 deletions(-) diff --git a/brutils/cnpj.py b/brutils/cnpj.py index 84dd7ae8..d13f32fd 100644 --- a/brutils/cnpj.py +++ b/brutils/cnpj.py @@ -1,5 +1,23 @@ from itertools import chain -from random import randint +from random import choice, randint +from string import ascii_uppercase, digits + +# --------------------------------------------------------------------------- +# Conjunto de caracteres válidos para o CNPJ (dígitos + letras maiúsculas). +# A Receita Federal passa a emitir CNPJs alfanuméricos a partir de julho/2026, +# conforme Nota Técnica Conjunta COCAD/SUARA/RFB nº 49/2024. +# +# Regra de conversão (ASCII - 48): +# '0'...'9' → 0...9 +# 'A'...'Z' → 17...42 +# +# Os dois últimos caracteres (dígitos verificadores) permanecem sempre +# numéricos ('0'...'9'). +# --------------------------------------------------------------------------- + +_VALID_CHARS = set(digits + ascii_uppercase) +_ALPHANUM_CHARS = digits + ascii_uppercase + # FORMATTING ############ @@ -7,36 +25,33 @@ def sieve(dirty: str) -> str: """ - Removes specific symbols from a CNPJ (Brazilian Company Registration - Number) string. + Removes specific symbols from a CNPJ string. - This function takes a CNPJ string as input and removes all occurrences of - the '.', '/' and '-' characters from it. + Removes all occurrences of '.', '/' and '-' from the given string, + returning a raw alphanumeric CNPJ string. Args: - cnpj (str): The CNPJ string containing symbols to be removed. + dirty (str): The CNPJ string containing symbols to be removed. Returns: str: A new string with the specified symbols removed. Example: - >>> sieve("12.345/6789-01") - "12345678901" - >>> sieve("98/76.543-2101") - "98765432101" + >>> sieve("12.ABC.345/01DE-35") + "12ABC34501DE35" + >>> sieve("12.345.678/0001-42") + "12345678000142" .. note:: This method should not be used in new code and is only provided for backward compatibility. """ - return "".join(filter(lambda char: char not in "./-", dirty)) def remove_symbols(dirty: str) -> str: """ - This function is an alias for the `sieve` function, offering a more - descriptive name. + Alias for :func:`sieve` with a more descriptive name. Args: dirty (str): The dirty string containing symbols to be removed. @@ -45,45 +60,34 @@ def remove_symbols(dirty: str) -> str: str: A new string with the specified symbols removed. Example: - >>> remove_symbols("12.345/6789-01") - "12345678901" - >>> remove_symbols("98/76.543-2101") - "98765432101" + >>> remove_symbols("12.ABC.345/01DE-35") + "12ABC34501DE35" """ - return sieve(dirty) def display(cnpj: str) -> str | None: """ - Will format an adequately formatted numbers-only CNPJ string, - adding in standard formatting visual aid symbols for display. - - Formats a CNPJ (Brazilian Company Registration Number) string for - visual display. - - This function takes a CNPJ string as input, validates its format, and - formats it with standard visual aid symbols for display purposes. + Formats a raw CNPJ string (numeric or alphanumeric) for visual display. Args: - cnpj (str): The CNPJ string to be formatted for display. + cnpj (str): A 14-character raw CNPJ string (no symbols). Returns: - str: The formatted CNPJ with visual aid symbols if it's valid, - None if it's not valid. + str | None: The formatted CNPJ (``XX.XXX.XXX/XXXX-DD``) if valid, + ``None`` otherwise. Example: - >>> display("12345678901234") - "12.345.678/9012-34" - >>> display("98765432100100") - "98.765.432/1001-00" + >>> display("12ABC34501DE35") + "12.ABC.345/01DE-35" + >>> display("12345678000142") + "12.345.678/0001-42" .. note:: This method should not be used in new code and is only provided for backward compatibility. """ - - if not cnpj.isdigit() or len(cnpj) != 14 or len(set(cnpj)) == 1: + if not _is_valid_format(cnpj): return None return "{}.{}.{}/{}-{}".format( cnpj[:2], cnpj[2:5], cnpj[5:8], cnpj[8:12], cnpj[12:] @@ -92,29 +96,25 @@ def display(cnpj: str) -> str | None: def format_cnpj(cnpj: str) -> str | None: """ - Formats a CNPJ (Brazilian Company Registration Number) string for visual - display. - - This function takes a CNPJ string as input, validates its format, and - formats it with standard visual aid symbols for display purposes. + Formats a CNPJ string (numeric or alphanumeric) for visual display. Args: - cnpj (str): The CNPJ string to be formatted for display. + cnpj (str): A 14-character raw CNPJ string (no symbols). Returns: - str: The formatted CNPJ with visual aid symbols if it's valid, - None if it's not valid. + str | None: The formatted CNPJ (``XX.XXX.XXX/XXXX-DD``) if valid, + ``None`` otherwise. Example: + >>> format_cnpj("12ABC34501DE35") + '12.ABC.345/01DE-35' >>> format_cnpj("03560714000142") '03.560.714/0001-42' >>> format_cnpj("98765432100100") None """ - if not is_valid(cnpj): return None - return "{}.{}.{}/{}-{}".format( cnpj[:2], cnpj[2:5], cnpj[5:8], cnpj[8:12], cnpj[12:14] ) @@ -124,25 +124,66 @@ def format_cnpj(cnpj: str) -> str | None: ############ +def _char_value(char: str) -> int: + """ + Converts a CNPJ character to its numeric value using the RFB rule + (ASCII ordinal − 48). + + Args: + char (str): A single character ('0'–'9' or 'A'–'Z'). + + Returns: + int: The numeric value of the character. + + Example: + >>> _char_value('0') + 0 + >>> _char_value('9') + 9 + >>> _char_value('A') + 17 + >>> _char_value('Z') + 42 + """ + return ord(char) - 48 + + +def _is_valid_format(cnpj: str) -> bool: + """ + Returns True if *cnpj* has the correct raw format: 14 characters, each + one in ``[0-9A-Z]``, the last two being digits, and not all identical. + """ + if not isinstance(cnpj, str) or len(cnpj) != 14: + return False + if not all(c in _VALID_CHARS for c in cnpj): + return False + if not cnpj[12:].isdigit(): + return False + if len(set(cnpj)) == 1: + return False + return True + + def validate(cnpj: str) -> bool: """ - Validates a CNPJ (Brazilian Company Registration Number) by comparing its - verifying checksum digits to its base number. + Validates a CNPJ by verifying its checksum digits. - This function checks the validity of a CNPJ by comparing its verifying - checksum digits to its base number. The input should be a string of digits - with the appropriate length. + Supports both the classic all-numeric format and the new alphanumeric + format introduced by Receita Federal (Nota Técnica 49/2024, effective + July 2026). Args: - cnpj (str): The CNPJ to be validated. + cnpj (str): The raw 14-character CNPJ string (no symbols). Returns: - bool: True if the checksum digits match the base number, - False otherwise. + bool: ``True`` if the checksum digits match the base number, + ``False`` otherwise. Example: >>> validate("03560714000142") True + >>> validate("12ABC34501DE35") + True >>> validate("00111222000133") False @@ -150,8 +191,7 @@ def validate(cnpj: str) -> bool: This method should not be used in new code and is only provided for backward compatibility. """ - - if not cnpj.isdigit() or len(cnpj) != 14 or len(set(cnpj)) == 1: + if not _is_valid_format(cnpj): return False return all( _hashdigit(cnpj, i + 13) == int(v) for i, v in enumerate(cnpj[12:]) @@ -160,103 +200,115 @@ def validate(cnpj: str) -> bool: def is_valid(cnpj: str) -> bool: """ - Returns whether or not the verifying checksum digits of the given `cnpj` - match its base number. + Returns whether the verifying checksum digits of the given CNPJ match + its base number. + + Supports both the classic all-numeric format and the new alphanumeric + format introduced by Receita Federal (Nota Técnica 49/2024, effective + July 2026). - This function does not verify the existence of the CNPJ; it only - validates the format of the string. + This function does not verify the *existence* of the CNPJ; it only + validates the format and checksum. Args: - cnpj (str): The CNPJ to be validated, a 14-digit string + cnpj (str): The raw 14-character CNPJ string (no symbols). Returns: - bool: True if the checksum digits match the base number, - False otherwise. + bool: ``True`` if the checksum digits match the base number, + ``False`` otherwise. Example: >>> is_valid("03560714000142") True + >>> is_valid("12ABC34501DE35") + True >>> is_valid("00111222000133") False """ - return isinstance(cnpj, str) and validate(cnpj) -def generate(branch: int = 1) -> str: +def generate(branch: int = 1, alphanumeric: bool = False) -> str: """ - Generates a random valid CNPJ digit string. An optional branch number - parameter can be given; it defaults to 1. + Generates a random valid CNPJ string. Args: - branch (int): An optional branch number to be included in the CNPJ. + branch (int): An optional branch (filial) number, defaults to 1. + Must be between 0 and 9999. + alphanumeric (bool): When ``True``, generates a CNPJ using the new + alphanumeric format (RFB Nota Técnica 49/2024). Defaults to + ``False`` for backward compatibility. Returns: - str: A randomly generated valid CNPJ string. + str: A randomly generated valid CNPJ string (14 raw characters). Example: - >>> generate() - "30180536000105" - >>> generate(1234) - "01745284123455" + >>> len(generate()) + 14 + >>> is_valid(generate()) + True + >>> is_valid(generate(alphanumeric=True)) + True """ - branch %= 10000 branch += int(branch == 0) branch = str(branch).zfill(4) - base = str(randint(0, 99999999)).zfill(8) + branch + + if alphanumeric: + base = "".join(choice(_ALPHANUM_CHARS) for _ in range(8)) + branch + else: + base = str(randint(0, 99999999)).zfill(8) + branch return base + _checksum(base) def _hashdigit(cnpj: str, position: int) -> int: """ - Calculates the checksum digit at the given `position` for the provided - `cnpj`. The input must contain all elements before `position`. + Calculates the checksum digit at *position* for the provided CNPJ. + + Uses the RFB character-value rule (``ord(char) − 48``) so that the + function works for both numeric and alphanumeric CNPJs. Args: - cnpj (str): The CNPJ for which the checksum digit is calculated. - position (int): The position of the checksum digit to be calculated. + cnpj (str): The CNPJ string. Must contain all characters before + *position*. + position (int): The 1-based position of the checksum digit (13 or 14). Returns: - int: The calculated checksum digit. + int: The calculated checksum digit (0–9). Example: - >>> _hashdigit("12345678901234", 13) + >>> _hashdigit("03560714000142", 13) + 4 + >>> _hashdigit("12ABC34501DE35", 13) 3 - >>> _hashdigit("98765432100100", 14) - 9 """ - weightgen = chain(range(position - 8, 1, -1), range(9, 1, -1)) val = ( - sum(int(digit) * weight for digit, weight in zip(cnpj, weightgen)) % 11 + sum(_char_value(char) * weight for char, weight in zip(cnpj, weightgen)) + % 11 ) return 0 if val < 2 else 11 - val def _checksum(basenum: str) -> str: """ - Calculates the verifying checksum digits for a given CNPJ base number. + Calculates the two verifying checksum digits for a CNPJ base number. - This function computes the verifying checksum digits for a provided CNPJ - base number. The `basenum` should be a digit-string of the appropriate - length. + Works for both numeric and alphanumeric base numbers. Args: - basenum (str): The base number of the CNPJ for which verifying checksum - digits are calculated. + basenum (str): The 12-character base CNPJ (no checksum digits). Returns: - str: The verifying checksum digits. + str: The two checksum digits (always numeric, '00'–'99'). Example: >>> _checksum("123456789012") "30" - >>> _checksum("987654321001") - "41" + >>> _checksum("12ABC34501DE") + "35" """ - - verifying_digits = str(_hashdigit(basenum, 13)) - verifying_digits += str(_hashdigit(basenum + verifying_digits, 14)) - return verifying_digits + d1 = str(_hashdigit(basenum, 13)) + d2 = str(_hashdigit(basenum + d1, 14)) + return d1 + d2 diff --git a/tests/test_cnpj.py b/tests/test_cnpj.py index 2ec7261f..d82db01c 100644 --- a/tests/test_cnpj.py +++ b/tests/test_cnpj.py @@ -111,3 +111,52 @@ def test_when_cnpj_is_not_valid_returns_none(self, mock_is_valid): if __name__ == "__main__": main() + + +class TestCNPJAlphanumeric: + """Tests for alphanumeric CNPJ support (RFB Nota Técnica 49/2024).""" + + def test_is_valid_canonical_rfb_example(self): + """Canonical example published by RFB (Nota Técnica 49/2024, Q14).""" + assert is_valid("12ABC34501DE35") is True + + def test_is_valid_numeric_still_works(self): + """Numeric CNPJs must remain valid (backward compatibility).""" + assert is_valid("03560714000142") is True + assert is_valid("00111222000133") is False + + def test_is_valid_rejects_lowercase(self): + """Lowercase letters are not valid CNPJ characters.""" + assert is_valid("12abc34501de35") is False + + def test_is_valid_rejects_wrong_checksum(self): + """Alphanumeric CNPJ with wrong DVs must be rejected.""" + assert is_valid("12ABC34501DE00") is False + + def test_is_valid_rejects_all_same_char(self): + """All-identical characters must be rejected.""" + assert is_valid("AAAAAAAAAAAAAA") is False + + def test_format_cnpj_alphanumeric(self): + """Alphanumeric CNPJ must be formatted correctly.""" + assert format_cnpj("12ABC34501DE35") == "12.ABC.345/01DE-35" + + def test_format_cnpj_numeric_unchanged(self): + """Numeric formatting must remain unchanged.""" + assert format_cnpj("03560714000142") == "03.560.714/0001-42" + + def test_generate_alphanumeric_is_valid(self): + """Generated alphanumeric CNPJs must pass validation.""" + for _ in range(50): + cnpj = generate(alphanumeric=True) + assert is_valid(cnpj), f"Generated CNPJ failed validation: {cnpj}" + + def test_generate_numeric_still_valid(self): + """Generated numeric CNPJs must still pass validation.""" + for _ in range(50): + cnpj = generate() + assert is_valid(cnpj), f"Generated CNPJ failed validation: {cnpj}" + + def test_sieve_alphanumeric(self): + """sieve() must strip symbols from alphanumeric CNPJs.""" + assert sieve("12.ABC.345/01DE-35") == "12ABC34501DE35"