From 6a6bb96a03b4a18a6dd1e3189f893e706badffd1 Mon Sep 17 00:00:00 2001 From: Bill Hlavacek Date: Sun, 10 May 2026 17:37:31 -0600 Subject: [PATCH] =?UTF-8?q?Lossless=20BNG-XML=20=E2=86=92=20BNGL=20round-t?= =?UTF-8?q?rip=20for=20tfun,=20rule=20modifiers,=20selectors?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three related fidelity fixes when reading BNG2.pl-emitted BNG-XML and re-emitting BNGL: 1. tfun placeholder reconstruction. BNG2.pl serializes ``tfun(...)`` calls (both the inline-array form ``tfun([xs],[ys],ctr)`` and the file-based ``TFUN(ctr,"file")`` form) by replacing the function body with a ``__TFUN_VAL__`` / ``__TFUN__VAL__`` placeholder and stashing the real arguments on attributes of the element (``@ctrName``, ``@xData``, ``@yData``, ``@method``, ``@file``). ``FunctionBlockXML`` was reading the placeholder verbatim into the regenerated BNGL, which BNG2.pl can't re-parse. Reconstruct the call from the attributes via a new ``_resolve_expression`` helper. 2. ``RuleMod`` now supports multiple modifiers per rule. A rule can legitimately carry, say, ``DeleteMolecules`` together with ``include_reactants(...)``. ``RuleMod.modifiers: list[str]`` stores the BNGL serialization of each modifier in insertion order; ``__str__`` prefers the list when non-empty, falling back to the single ``self.type`` for backwards compatibility. 3. ``include_reactants`` / ``exclude_reactants`` / ``include_products`` / ``exclude_products`` are now emitted as rule modifiers instead of being dropped with a stderr warning. BNG2.pl serializes these as ``ListOfInclude{Reactants,Products}`` / ``ListOfExclude{Reactants,Products}`` children on the rule, each carrying a pattern-index suffix (``_RP`` / ``_PP``) on the selector id and one or more ```` entries. New helpers ``_build_selector_modifier`` and ``_format_selector_pattern`` on ``RuleBlockXML`` mirror those back as e.g. ``include_reactants(1, MyMol)`` so the rule round-trips through BNG2.pl's BNGL parser. The ``get_rule_mod`` body is also tightened: it now tolerates rules that have no ``ListOfOperations`` at all (previously: ``KeyError``) and returns ``None`` only when there is genuinely no modifier to report, rather than dropping a real modifier on the floor when the ``ListOfOperations`` happens to be empty. --- bionetgen/modelapi/rulemod.py | 22 +++++- bionetgen/modelapi/xmlparsers.py | 124 +++++++++++++++++++++++++++---- 2 files changed, 130 insertions(+), 16 deletions(-) diff --git a/bionetgen/modelapi/rulemod.py b/bionetgen/modelapi/rulemod.py index 1e0da2be..4be1d333 100644 --- a/bionetgen/modelapi/rulemod.py +++ b/bionetgen/modelapi/rulemod.py @@ -1,22 +1,38 @@ class RuleMod: """ Rule modifiers class for storage and printing. + + A single rule may carry several modifiers — e.g. `DeleteMolecules` + together with `include_reactants(...)` / `exclude_products(...)`. + `self.modifiers` stores the BNGL serialization of each in insertion + order; `self.type` continues to track the single legacy modifier + name for backwards compatibility. """ - def __init__(self, mod_type=None) -> None: + def __init__(self, mod_type=None, modifiers=None) -> None: # valid mod types self.valid_mod_names = ["DeleteMolecules", "MoveConnected", "TotalRate"] + self.modifiers: list[str] = [] self.type = mod_type + if modifiers is not None: + for modifier in modifiers: + self.add_modifier(modifier) def __str__(self) -> str: + if len(self.modifiers) > 0: + return " ".join(self.modifiers) if self.type is None: return "" - else: - return self.type + return self.type def __repr__(self) -> str: return f"Rule modifier of type {self.type}" + def add_modifier(self, modifier) -> None: + text = str(modifier).strip() + if text and text not in self.modifiers: + self.modifiers.append(text) + @property def type(self): return self._type diff --git a/bionetgen/modelapi/xmlparsers.py b/bionetgen/modelapi/xmlparsers.py index bf529821..67d1b440 100644 --- a/bionetgen/modelapi/xmlparsers.py +++ b/bionetgen/modelapi/xmlparsers.py @@ -1,3 +1,5 @@ +import re + from .blocks import ParameterBlock, CompartmentBlock, ObservableBlock from .blocks import SpeciesBlock, MoleculeTypeBlock from .blocks import FunctionBlock, RuleBlock @@ -494,7 +496,7 @@ def parse_xml(self, xml): for f in xml: # add content to line fname = f["@id"] - expr = f["Expression"] + expr = self._resolve_expression(f) args = [] if "ListOfArguments" in f: args = self.get_arguments(f["ListOfArguments"]["Argument"]) @@ -502,7 +504,7 @@ def parse_xml(self, xml): block.add_function(fname, expr, args=args) else: fname = xml["@id"] - expr = xml["Expression"] + expr = self._resolve_expression(xml) args = [] if "ListOfArguments" in xml: args = self.get_arguments(xml["ListOfArguments"]["Argument"]) @@ -511,6 +513,40 @@ def parse_xml(self, xml): return block + def _resolve_expression(self, f) -> str: + """Return the BNGL expression body for a Function XML element. + + Most functions serialize their body verbatim into ````, + but BNG2.pl rewrites ``tfun(...)`` calls (both the inline-array form + and the file-based ``TFUN(arg, "file")`` form) as the placeholder + ``__TFUN_VAL__`` and stashes the real arguments in attributes on + the ```` element. Round-tripping the placeholder back + into BNGL is invalid — BNG2.pl can't re-parse it. Reconstruct the + call from the attributes when present. + """ + raw = str(f.get("Expression", "")) + if f.get("@type") != "TFUN": + return raw + + ctr = str(f.get("@ctrName", "")) + if "@xData" in f: + xs = str(f.get("@xData", "")) + ys = str(f.get("@yData", "")) + method = str(f.get("@method", "linear")) + body = f"tfun([{xs}],[{ys}],{ctr}" + if method and method != "linear": + body += f',method=>"{method}"' + body += ")" + elif "@file" in f: + body = f'TFUN({ctr},"{str(f.get("@file", ""))}")' + else: + body = raw + + for placeholder in ("__TFUN__VAL__", "__TFUN_VAL__"): + if placeholder in raw: + return raw.replace(placeholder, body).strip() + return body.strip() + def get_arguments(self, xml) -> list: args = [] if isinstance(xml, list): @@ -692,11 +728,11 @@ def get_operations(self, xml): return ops def get_rule_mod(self, xml): - # TODO: create working rule mods class rule_mod = RuleMod() - list_ops = xml["ListOfOperations"] + list_ops = xml.get("ListOfOperations") + had_explicit_ops = list_ops is not None if list_ops is None: - return None + list_ops = {} # determine which rule mod is being used, if any if "Delete" in list_ops: del_op = list_ops["Delete"] @@ -707,6 +743,7 @@ def get_rule_mod(self, xml): # it does not apply to the whole rule if all(val == "1" for val in dmvals): rule_mod.type = "DeleteMolecules" + rule_mod.add_modifier("DeleteMolecules") # JRF: I don't believe the id of the specific op rule_mod is currently used # rule_mod.id = op["@id"] elif "ChangeCompartment" in list_ops: @@ -717,6 +754,7 @@ def get_rule_mod(self, xml): # check if modifier was called or automatic if mod_call == "1": rule_mod.type = "MoveConnected" + rule_mod.add_modifier("MoveConnected") rule_mod.id = move_op["@id"] rule_mod.source = move_op["@source"] rule_mod.destination = move_op["@destination"] @@ -731,6 +769,7 @@ def get_rule_mod(self, xml): for mo in move_op: if mo["@moveConnected"] == "1": rule_mod.type = "MoveConnected" + rule_mod.add_modifier("MoveConnected") rule_mod.id.append(mo["@id"]) rule_mod.source.append(mo["@source"]) rule_mod.destination.append(mo["@destination"]) @@ -742,23 +781,82 @@ def get_rule_mod(self, xml): rate_type = ratelaw["@type"] if rate_type == "Function" and str(ratelaw.get("@totalrate", "0")) == "1": rule_mod.type = "TotalRate" + rule_mod.add_modifier("TotalRate") rule_mod.id = ratelaw["@id"] rule_mod.rate_type = ratelaw["@type"] rule_mod.name = ratelaw["@name"] rule_mod.call = ratelaw.get("@totalrate", "0") - # TODO: add support for include/exclude reactants/products + # Include / exclude reactants and products are also rule modifiers in BNGL. + # BNG2.pl emits them as ListOfInclude{Reactants,Products} / + # ListOfExclude{Reactants,Products} children on the rule, each carrying + # a pattern-index suffix (_RP / _PP) on the selector id and one + # or more Pattern entries. Mirror those back as + # include_reactants(,) + # exclude_products(,) + # etc., so the rule round-trips through BNG2.pl. + for key, value in xml.items(): + if key not in ( + "ListOfIncludeReactants", + "ListOfIncludeProducts", + "ListOfExcludeReactants", + "ListOfExcludeProducts", + ): + continue + selectors = value if isinstance(value, list) else [value] + for selector in selectors: + call = self._build_selector_modifier(key, selector) + if call is not None: + rule_mod.add_modifier(call) + if ( - "ListOfIncludeReactants" in xml - or "ListOfIncludeProducts" in xml - or "ListOfExcludeReactants" in xml - or "ListOfExcludeProducts" in xml + rule_mod.type is None + and len(rule_mod.modifiers) == 0 + and not had_explicit_ops ): - print( - "WARNING: Include/Exclude Reactants/Products not currently supported as rule modifiers" - ) + return None return rule_mod + def _build_selector_modifier(self, key, selector_xml): + call_names = { + "ListOfIncludeReactants": "include_reactants", + "ListOfExcludeReactants": "exclude_reactants", + "ListOfIncludeProducts": "include_products", + "ListOfExcludeProducts": "exclude_products", + } + call_name = call_names.get(key) + if call_name is None: + return None + selector_id = str(selector_xml.get("@id", "")) + match = re.search(r"_(?:RP|PP)(\d+)$", selector_id) + if match is None: + return None + pattern_xml = selector_xml.get("Pattern") + if pattern_xml is None: + return None + patterns = pattern_xml if isinstance(pattern_xml, list) else [pattern_xml] + pattern_parts = [self._format_selector_pattern(pattern) for pattern in patterns] + pattern_str = " + ".join(pattern_parts) + return f"{call_name}({match.group(1)},{pattern_str})" + + def _format_selector_pattern(self, pattern_xml): + pattern = PatternXML(pattern_xml).parsed_obj + if len(pattern.molecules) == 1: + molecule = pattern.molecules[0] + if ( + molecule.name != "0" + and len(molecule.components) == 0 + and molecule.compartment is None + and molecule.label is None + and pattern.compartment is None + and not pattern.fixed + and not pattern.MatchOnce + and pattern.relation is None + and pattern.quantity is None + ): + return molecule.name + return str(pattern) + class EnergyPatternBlockXML(XMLObj): """