Skip to content

Commit 8f3ce5c

Browse files
committed
fix: Improved diagnostic messages, consistency and code documentation
1 parent 91f4188 commit 8f3ce5c

1 file changed

Lines changed: 23 additions & 19 deletions

File tree

tool-support/bnf_grammar_tools/bnf_grammar/bnf_grammar_processor.py

Lines changed: 23 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,7 @@ def get_html(self) -> str:
228228
is_hyperlink_resolved = True
229229
break
230230
if not is_hyperlink_resolved:
231-
# Record unresolved non-terminal and line where it occurred
231+
# Record unresolved non-terminal and clause plus line where it occurred
232232
if token.text not in self.processor.unresolved_non_terminals_dict:
233233
self.processor.unresolved_non_terminals_dict[token.text] = []
234234
self.processor.unresolved_non_terminals_dict[token.text].append(f"{self.clause_id}: {html_line}")
@@ -320,7 +320,7 @@ class GrammarProcessor:
320320
- concrete textual notation
321321
- concrete graphical notation
322322
323-
Mistakes or ambiguous productions and notes are logged in the form of WARNING or ERROR messages.
323+
Mistakes or ambiguous productions and notes are logged in the form of ERROR or WARNING messages respectively.
324324
"""
325325
def __init__(self):
326326
self.start_timestamp = datetime.now(timezone.utc).isoformat(timespec="seconds").replace("+00:00", "Z")
@@ -345,7 +345,7 @@ def __init__(self):
345345
self.image_map: dict[str, str] = dict()
346346
self.unresolved_non_terminals_dict: dict[str, list[str]] = dict()
347347

348-
# Provide a backlink to this GrammarExtractor inside the GrammarElements
348+
# Provide a backlink to this GrammarProcessor inside the GrammarElement classes
349349
GrammarElement.processor = self
350350

351351
def extract_bnf_from_spec(self, input_dir: str, output_dir: str, input_file: str, syntax_kind: str, bnf_clause_id: str):
@@ -536,40 +536,40 @@ def extract_bnf_from_spec(self, input_dir: str, output_dir: str, input_file: str
536536
# Strip possibly trailing whitespace from the last candidate production
537537
candidate_productions[-1] = candidate_productions[-1].rstrip()
538538

539-
for candidate_prod in candidate_productions:
540-
if candidate_prod == "":
539+
for candidate_production in candidate_productions:
540+
if candidate_production == "":
541541
continue
542542

543543
# Collect keywords and symbols
544544
if clause_id.startswith(self.bnf_clause_id):
545545
# Store keywords and symbols appearing in textual notation productions
546-
matched_keywords = KEYWORD_PATTERN.findall(candidate_prod)
546+
matched_keywords = KEYWORD_PATTERN.findall(candidate_production)
547547
for matched_keyword in matched_keywords:
548548
self.extracted_keyword_set.add(matched_keyword[1:-1])
549-
matched_symbols = SYMBOL_PATTERN.findall(candidate_prod)
549+
matched_symbols = SYMBOL_PATTERN.findall(candidate_production)
550550
for matched_symbol in matched_symbols:
551551
self.extracted_symbol_set.add(matched_symbol[1:-1])
552552

553553
# Initialize count and index for productions with multiple <img...> terms
554-
img_count = candidate_prod.count("<img ")
554+
img_count = candidate_production.count("<img ")
555555
img_index = 0
556556

557557
# Process lines in each candidate production
558-
lines = candidate_prod.split("\n")
558+
lines = candidate_production.split("\n")
559559
line_number = 0
560560
current_note_ref: Optional[NoteRef] = None
561561
current_production: Optional[Production] = None
562562
production_name = None
563563
for line in lines:
564564
if line == "":
565-
LOGGER.error(f"Unexpected empty line in candidate production: {candidate_prod}")
565+
LOGGER.error(f"Unexpected empty line in candidate production: {candidate_production}")
566566
continue
567567
line_number += 1
568568
if line_number == 1:
569569
# Should be the start line of a production
570570
GRAPHICAL_GRAMMAR_NOTE_PATTERN = re.compile(r"^[ \t]*Note[.:].+", flags=re.IGNORECASE)
571571
if GRAPHICAL_GRAMMAR_NOTE_PATTERN.match(line):
572-
LOGGER.warning(f"Graphical note found in <pre>, but should be <p> element: {candidate_prod}")
572+
LOGGER.warning(f"Graphical note found in <pre>, but should be <p> element: {candidate_production}")
573573
current_note_ref = NoteRef(clause_id, lines=[line])
574574
self.elements.append(current_note_ref)
575575
else:
@@ -597,7 +597,7 @@ def extract_bnf_from_spec(self, input_dir: str, output_dir: str, input_file: str
597597
else:
598598
abstract_type = ""
599599
if production_name in self.grammars[-1].production_names:
600-
LOGGER.error(f"Non-unique production name: {production_name} in {self.grammars[-1].production_names}")
600+
LOGGER.error(f"Non-unique production name: {production_name} in {clause_id}:\n{candidate_production}")
601601
else:
602602
self.grammars[-1].production_names.add(production_name)
603603

@@ -607,9 +607,14 @@ def extract_bnf_from_spec(self, input_dir: str, output_dir: str, input_file: str
607607
if line[0] in (" ", "\t"):
608608
LOGGER.error(f"Production start line starts with a space or tab: {line}")
609609
line = line.strip()
610-
ONE_EQUALS_PATTERN = re.compile(r"( = | =$)")
611-
if not ONE_EQUALS_PATTERN.search(line):
612-
LOGGER.warning(f"Production start line does not contain exactly one '=': {line}")
610+
if self.syntax_kind == "textual-bnf":
611+
TEXTUAL_EQUALS_PATTERN = re.compile(r"( = | =$)")
612+
if not TEXTUAL_EQUALS_PATTERN.search(line):
613+
LOGGER.warning(f"Production start line does not contain exactly one '=': {line}")
614+
elif self.syntax_kind == "graphical-bnf":
615+
GRAPHICAL_EQUALS_PATTERN = re.compile(r"( = | =$| =\| | =\|$)")
616+
if not GRAPHICAL_EQUALS_PATTERN.search(line):
617+
LOGGER.warning(f"Production start line does not contain exactly one '=' or '=|': {line}")
613618
elif current_note_ref:
614619
current_note_ref.lines.append(line)
615620
elif "<img" in line:
@@ -640,8 +645,7 @@ def extract_bnf_from_spec(self, input_dir: str, output_dir: str, input_file: str
640645
except UnexpectedInput as e:
641646
LOGGER.error(f"Parse error in {self.input_path} {clause_id} in production:\n{current_production_text}\n{e}")
642647
else:
643-
log_level = logging.INFO if clause_id in ("8.2.3.6",) else logging.DEBUG
644-
LOGGER.log(log_level, f"Parsed successfully {clause_id}:\n{current_production_text}\n{parse_tree.pretty()}")
648+
LOGGER.debug(f"Parsed successfully {clause_id}:\n{current_production_text}\n{parse_tree.pretty()}")
645649

646650
for subtag in tag:
647651
if isinstance(subtag, Tag) and subtag.name in ("em", "strong", "img"):
@@ -815,7 +819,7 @@ def parse_marked_up_bnf(self, input_dir: str, output_dir: str, input_file: str,
815819
production_name = candidate_production.split(" ", 1)[0].strip()
816820

817821
if production_name in self.grammars[-1].production_names and not is_partial:
818-
LOGGER.error(f"Non-unique production name: {production_name} in {self.grammars[-1].production_names}")
822+
LOGGER.error(f"Non-unique production name: {production_name} in {clause_id}:\n{candidate_production}")
819823
else:
820824
self.grammars[-1].production_names.add(production_name)
821825
self.elements.append(Production(clause_id=clause_id, lines=candidate_production.split("\n"), name=production_name, abstract_syntax_type=abstract_syntax_type, is_partial=is_partial))
@@ -1044,7 +1048,7 @@ def report_checks(self) -> None:
10441048

10451049
if self.image_map:
10461050
map_string = "\n".join([f"{k} {v}" for k, v in sorted(self.image_map.items())])
1047-
LOGGER.info(f"Map from {len(self.image_map)} original to new SVG images:\n{map_string}")
1051+
LOGGER.info(f"Map from {len(self.image_map)} original to new image href attribute values:\n{map_string}")
10481052

10491053
LOGGER.info("===== End of Graphical Notation Grammar Checks")
10501054

0 commit comments

Comments
 (0)