diff --git a/bionetgen/modelapi/blocks.py b/bionetgen/modelapi/blocks.py index d9f81fd1..fbd6053b 100644 --- a/bionetgen/modelapi/blocks.py +++ b/bionetgen/modelapi/blocks.py @@ -645,6 +645,30 @@ def gen_string(self) -> str: return "\n".join(block_lines) +class ProtocolBlock(ActionBlock): + """ + Protocol block object, subclass of ActionBlock. + + Protocol lines live inside ``begin model``/``end model`` and must + retain their own begin/end block wrapper rather than being rendered + as top-level actions. + """ + + def __init__(self) -> None: + super().__init__() + self.name = "protocol" + + def add_item(self, item_tpl) -> None: + _, value = item_tpl + self.items.append(value) + + def gen_string(self) -> str: + block_lines = ["\nbegin protocol"] + block_lines.extend(item.print_line() for item in self.items) + block_lines.append("end protocol\n") + return "\n".join(block_lines) + + class EnergyPatternBlock(ModelBlock): """ Energy pattern block object, subclass of ModelBlock. diff --git a/bionetgen/modelapi/bngfile.py b/bionetgen/modelapi/bngfile.py index a601a374..63c2b082 100644 --- a/bionetgen/modelapi/bngfile.py +++ b/bionetgen/modelapi/bngfile.py @@ -54,6 +54,11 @@ def __init__( self.BNGPATH = BNGPATH self.bngexec = bngexec self.parsed_actions = [] + # Actions that live inside a ``begin protocol``/``end protocol`` + # block, stored separately from top-level actions so BNGParser can + # round-trip them into a ProtocolBlock instead of folding them into + # the top-level ActionBlock. + self.parsed_protocol_actions = [] def generate_xml(self, xml_file, model_file=None) -> bool: """ @@ -155,12 +160,36 @@ def strip_actions(self, model_path, folder) -> str: # to another line, so we can just remove the action lines mstr = re.sub(r"\\\n", "", mstr) mlines = mstr.split("\n") - stripped_lines = list(filter(lambda x: self._not_action(x), mlines)) - # remove spaces, actions don't allow them - self.parsed_actions = [ - x.replace(" ", "") - for x in filter(lambda x: not self._not_action(x), mlines) - ] + # Walk the lines once, separating non-action content (kept in the + # stripped output for BNG2.pl) from action-shaped lines, and + # further splitting action-shaped lines based on whether they sit + # inside a ``begin protocol``/``end protocol`` block. Protocol + # actions are tracked separately so BNGParser can round-trip them + # into a ProtocolBlock instead of the top-level ActionBlock. + self.parsed_actions = [] + self.parsed_protocol_actions = [] + stripped_lines = [] + in_protocol = False + for line in mlines: + if re.match(r"\s*(begin)\s+(protocol)\b", line): + in_protocol = True + stripped_lines.append(line) + continue + if re.match(r"\s*(end)\s+(protocol)\b", line): + in_protocol = False + stripped_lines.append(line) + continue + if self._not_action(line): + stripped_lines.append(line) + continue + # Hand the action line off to BNGParser intact — quoted + # spans (e.g. ``param=>"-v -gml 1000000"``) need to survive + # the whitespace-collapse pass, which `_normalize_action_text` + # does in a quote-aware way. + if in_protocol: + self.parsed_protocol_actions.append(line) + else: + self.parsed_actions.append(line) # let's remove begin/end actions, rarely used but should be removed remove_from = -1 remove_to = -1 diff --git a/bionetgen/modelapi/bngparser.py b/bionetgen/modelapi/bngparser.py index a1d54215..33fab560 100644 --- a/bionetgen/modelapi/bngparser.py +++ b/bionetgen/modelapi/bngparser.py @@ -1,4 +1,4 @@ -import xmltodict, re +import xmltodict from bionetgen.main import BioNetGen from bionetgen.core.exc import BNGParseError, BNGModelError @@ -8,7 +8,7 @@ from .xmlparsers import ParameterBlockXML, CompartmentBlockXML, ObservableBlockXML from .xmlparsers import SpeciesBlockXML, MoleculeTypeBlockXML, FunctionBlockXML from .xmlparsers import RuleBlockXML, EnergyPatternBlockXML, PopulationMapBlockXML -from .blocks import ActionBlock +from .blocks import ActionBlock, ProtocolBlock from bionetgen.core.utils.utils import ActionList # This allows access to the CLIs config setup @@ -18,6 +18,74 @@ def_bng_path = conf["bngpath"] +def _normalize_action_text(action: str) -> str: + """Strip BNGL comments and unquoted whitespace, keep quoted spans intact. + + `BNGFile.strip_actions` already removed unquoted spaces line-by-line, + but quoted spans (e.g. ``param=>"-v -gml 1000000"`` in a simulate + action) need to survive — so we run a quote-aware pass here. + """ + text = _strip_comment_outside_quotes(action) + text = _collapse_unquoted_whitespace(text) + return text.strip() + + +def _strip_comment_outside_quotes(text: str) -> str: + out = [] + in_single = False + in_double = False + escaped = False + for ch in text: + if escaped: + out.append(ch) + escaped = False + continue + if ch == "\\" and (in_single or in_double): + out.append(ch) + escaped = True + continue + if ch == '"' and not in_single: + in_double = not in_double + out.append(ch) + continue + if ch == "'" and not in_double: + in_single = not in_single + out.append(ch) + continue + if ch == "#" and not in_single and not in_double: + break + out.append(ch) + return "".join(out) + + +def _collapse_unquoted_whitespace(text: str) -> str: + out = [] + in_single = False + in_double = False + escaped = False + for ch in text: + if escaped: + out.append(ch) + escaped = False + continue + if ch == "\\" and (in_single or in_double): + out.append(ch) + escaped = True + continue + if ch == '"' and not in_single: + in_double = not in_double + out.append(ch) + continue + if ch == "'" and not in_double: + in_single = not in_single + out.append(ch) + continue + if ch.isspace() and not in_single and not in_double: + continue + out.append(ch) + return "".join(out) + + class BNGParser: """ Parser object that deals with reading in the BNGL file and @@ -106,148 +174,137 @@ def parse_actions(self, model_obj): """ Uses ActionList object to parse actions and turn them into action objects and fill up the ActionsBlock with them. + + Parses both top-level actions (BNGFile.parsed_actions) and + protocol-block actions (BNGFile.parsed_protocol_actions) — both + share the same action grammar; only the receiving block class + differs. """ - if len(self.bngfile.parsed_actions) > 0: - ablock = ActionBlock() - # we have actions in file, let's get them - # import ipdb;ipdb.set_trace() - left = [] - for action in self.bngfile.parsed_actions: - # some cleanup, first we remove comments - action = re.sub(r"\#.*", "", action) - # now we remove whitespaces - action = re.sub(r"\s", "", action) - # if we don't have anything left, move on - if len(action) == 0: - continue - # use pyparsing for parsing the action into a list - try: - action_list = self.alist.action_parser.parseString(action) - except Exception as e: + ablock = self._parse_action_block(self.bngfile.parsed_actions, ActionBlock) + if ablock is not None: + model_obj.add_block(ablock) + + protocol_actions = getattr(self.bngfile, "parsed_protocol_actions", []) + pblock = self._parse_action_block(protocol_actions, ProtocolBlock) + if pblock is not None: + model_obj.add_block(pblock) + + def _parse_action_block(self, action_lines, block_cls): + if len(action_lines) == 0: + return None + ablock = block_cls() + for action in action_lines: + self._parse_action_line(action, ablock) + if len(ablock) == 0: + return None + return ablock + + def _parse_action_line(self, action, ablock): + # Strip comments and unquoted whitespace. The walker preserves + # quoted spans so that e.g. ``param=>"-v -gml 1000000"`` keeps its + # internal spaces (the older ``re.sub(r"\s", "", action)`` pass + # would have collapsed them). + action = _normalize_action_text(action) + if len(action) == 0: + return + try: + action_list = self.alist.action_parser.parseString(action) + except Exception as e: + raise BNGParseError( + self.bngfile.path, f"Failed to parse action {action}" + ) from e + if action_list[-1] == ";": + _ = action_list.pop(-1) + atype = action_list.pop(0) + # all actions have "()", remove + action_list = action_list[1:-1] + if len(action_list) == 0: + ablock.add_action(atype, {}) + return + if atype in self.alist.no_setter_syntax: + if len(action_list) == 1: + ablock.add_action(atype, {action_list[0]: None}) + return + if len(action_list) == 3 and action_list[1] == ",": + ablock.add_action(atype, {action_list[0]: None, action_list[2]: None}) + return + elif atype in self.alist.square_braces: + if action_list[0] == "[": + action_list = action_list[1:-1] + arg_dict = {} + for arg in action_list: + arg_dict[arg] = None + ablock.add_action(atype, arg_dict) + return + elif atype in self.alist.normal_types: + if action_list[0] == "{": + action_list = action_list[1:-1] + arg_dict = {} + if len(action_list) == 0: + ablock.add_action(atype, arg_dict) + return + while len(action_list) > 0: + arg_name = action_list.pop(0) + connector = action_list.pop(0) + if connector != "=>": raise BNGParseError( - self.bngfile.path, f"Failed to parse action {action}" + self.bngfile.path, f"Action {action} is malformed" ) - # we could have ";" in the action, so we need to remove it - if action_list[-1] == ";": - _ = action_list.pop(-1) - # we we move onto actually making the action object - # first value is always the action type, remove - atype = action_list.pop(0) - # all actions have "()", remove - action_list = action_list[1:-1] - # be done if we don't have anything left - if len(action_list) == 0: - # we don't have any arguments - ablock.add_action(atype, {}) - continue - # we have arguments now onto argument parsing - # we check the action type and process accordingly - if atype in self.alist.no_setter_syntax: - # these are actions like setParameter("test", 10), setModelName("name") - if len(action_list) == 1: - # this is of the form action("argument") - ablock.add_action(atype, {action_list[0]: None}) - continue - elif len(action_list) == 3: - # TODO: Error checking here! - if action_list[1] == ",": - # this is of the form action(argument, value) - ablock.add_action( - atype, {action_list[0]: None, action_list[2]: None} - ) - continue - elif atype in self.alist.square_braces: - # these are actions like saveParameters(["a","b"]) - # TODO: Error checking here! - if action_list[0] == "[": - # remove square braces - action_list = action_list[1:-1] - arg_dict = {} - for arg in action_list: - arg_dict[arg] = None - ablock.add_action(atype, arg_dict) - continue - elif atype in self.alist.normal_types: - # finally a normal action, we have {} and => syntax - # TODO: Error checking here! - if action_list[0] == "{": - # remove curly braces - action_list = action_list[1:-1] - arg_dict = {} - if len(action_list) == 0: - ablock.add_action(atype, arg_dict) - continue - while len(action_list) > 0: - arg_name = action_list.pop(0) - connector = action_list.pop(0) - if connector != "=>": + if arg_name in self.alist.irregular_args: + arg_type = self.alist.irregular_args[arg_name] + if arg_type == "dict": + start_curly = action_list.pop(0) + if start_curly != "{": raise BNGParseError( - self.bngfile.path, f"Action {action} is malformed" + self.bngfile.path, + f"Action {action} is malformed", ) - if arg_name in self.alist.irregular_args: - arg_type = self.alist.irregular_args[arg_name] - if arg_type == "dict": - # process dict - start_curly = action_list.pop(0) - # make sure we are actually reading a dict - if start_curly != "{": + value_str = "{" + end_curly = None + while end_curly is None: + dict_key = action_list.pop(0) + if dict_key == "}": + end_curly = dict_key + else: + if len(value_str) > 1: + value_str += "," + dict_conn = action_list.pop(0) + dict_val = action_list.pop(0) + if dict_conn != "=>": raise BNGParseError( self.bngfile.path, f"Action {action} is malformed", ) - value_str = "{" - end_curly = None - while end_curly is None: - # we are looping over A, =>, B and want to - # generate { A=>B, C=>D, etc } - dict_key = action_list.pop(0) - if dict_key == "}": - # we are done - end_curly = dict_key - else: - if len(value_str) > 1: - value_str += "," - dict_conn = action_list.pop(0) - dict_val = action_list.pop(0) - if dict_conn != "=>": - raise BNGParseError( - self.bngfile.path, - f"Action {action} is malformed", - ) - value_str += dict_key + dict_conn + dict_val - value_str += "}" - arg_value = value_str - elif arg_type == "list": - # process list - start_curly = action_list.pop(0) - # make sure we are actually reading a dict - if start_curly != "[": - raise BNGParseError( - self.bngfile.path, - f"Action {action} is malformed", - ) - value_str = "[" - end_curly = None - while end_curly is None: - argument_element = action_list.pop(0) - if argument_element == "]": - end_curly = argument_element - else: - if len(value_str) > 1: - value_str += "," - value_str += argument_element - value_str += "]" - arg_value = value_str - else: - arg_value = action_list.pop(0) - arg_dict[arg_name] = arg_value - ablock.add_action(atype, arg_dict) - continue + value_str += dict_key + dict_conn + dict_val + value_str += "}" + arg_value = value_str + elif arg_type == "list": + start_curly = action_list.pop(0) + if start_curly != "[": + raise BNGParseError( + self.bngfile.path, + f"Action {action} is malformed", + ) + value_str = "[" + end_curly = None + while end_curly is None: + argument_element = action_list.pop(0) + if argument_element == "]": + end_curly = argument_element + else: + if len(value_str) > 1: + value_str += "," + value_str += argument_element + value_str += "]" + arg_value = value_str else: - raise BNGParseError( - self.bngfile.path, f"Action type {atype} is not recognized." - ) - model_obj.add_block(ablock) + arg_value = action_list.pop(0) + arg_dict[arg_name] = arg_value + ablock.add_action(atype, arg_dict) + return + raise BNGParseError( + self.bngfile.path, f"Action type {atype} is not recognized." + ) def parse_xml(self, xml_str, model_obj) -> None: """ diff --git a/bionetgen/modelapi/model.py b/bionetgen/modelapi/model.py index ab294193..afe15e00 100644 --- a/bionetgen/modelapi/model.py +++ b/bionetgen/modelapi/model.py @@ -11,6 +11,7 @@ MoleculeTypeBlock, ObservableBlock, ParameterBlock, + ProtocolBlock, RuleBlock, SpeciesBlock, EnergyPatternBlock, @@ -87,6 +88,7 @@ def __init__( "energy_patterns", "population_maps", "rules", + "protocol", "actions", ] self.model_name = "" @@ -309,6 +311,24 @@ def add_population_maps_block(self, block=None): else: self.population_maps = PopulationMapBlock() + def add_protocol_block(self, block=None): + """ + Adds a protocol block to the model object. + + A protocol block lives inside ``begin model``/``end model`` and + holds a sequence of state-mutating action lines (e.g. + ``setParameter``, ``setConcentration``, ``simulate``) that BNG2.pl + executes when ``parameter_scan({method=>"protocol"})`` is invoked. + """ + if block is not None: + # TODO: Transition to BNGErrors and logging + assert isinstance(block, ProtocolBlock) + self.protocol = block + if "protocol" not in self.active_blocks: + self.active_blocks.append("protocol") + else: + self.protocol = ProtocolBlock() + def add_actions_block(self, block=None): """ Adds an actions block to the model object.