diff --git a/docxtpl/template.py b/docxtpl/template.py index f20280a..0a66d97 100644 --- a/docxtpl/template.py +++ b/docxtpl/template.py @@ -82,11 +82,39 @@ def write_xml(self, filename): with open(filename, "w") as fh: fh.write(self.get_xml()) - def patch_xml(self, src_xml): + @staticmethod + def _get_delim_repr(env, attr, default): + """Return the regex-escaped representation of a jinja2 delimiter.""" + if env is None: + return re.escape(default) + return re.escape(getattr(env, attr)) + + def patch_xml(self, src_xml, jinja_env=None): """Make a lots of cleaning to have a raw xml understandable by jinja2 : strip all unnecessary xml tags, manage table cell background color and colspan, unescape html entities, etc...""" + # Resolve delimiter strings (regex-escaped) for dynamic patterns. + # When jinja_env is None, defaults to standard Jinja2 delimiters. + vo = self._get_delim_repr(jinja_env, "variable_start_string", "{{") + vc = self._get_delim_repr(jinja_env, "variable_end_string", "}}") + bo = self._get_delim_repr(jinja_env, "block_start_string", "{%") + bc = self._get_delim_repr(jinja_env, "block_end_string", "%}") + co = self._get_delim_repr(jinja_env, "comment_start_string", "{#") + cc = self._get_delim_repr(jinja_env, "comment_end_string", "#}") + + # Build a union pattern matching any Jinja2 tag: + # block_start ... block_end | comment_start ... comment_end | variable_start ... variable_end + def _tag_union(): + parts = [] + if bo and bc: + parts.append(f"{bo}(?:(?!{bc}).)*") + if co and cc: + parts.append(f"{co}(?:(?!{cc}).)*") + if vo and vc: + parts.append(f"{vo}(?:(?!{vc}).)*") + return "|".join(parts) + # replace {{ by {{ ( works with {{ }} {% and %} {# and #}) src_xml = re.sub( r"(?<={)(<[^>]*>)+(?=[\{%\#])|(?<=[%\}\#])(<[^>]*>)+(?=\})", @@ -103,12 +131,14 @@ def striptags(m): ".*?(|]*>)", "", m.group(0), flags=re.DOTALL ) - src_xml = re.sub( - r"{%(?:(?!%}).)*|{#(?:(?!#}).)*|{{(?:(?!}}).)*", - striptags, - src_xml, - flags=re.DOTALL, - ) + tag_pat = _tag_union() + if tag_pat: + src_xml = re.sub( + tag_pat, + striptags, + src_xml, + flags=re.DOTALL, + ) # manage table cell colspan def colspan(m): @@ -286,19 +316,23 @@ def without_gridspan(m2): flags=re.DOTALL, ) - def clean_tags(m): + def _clean_inner(text): return ( - m.group(0) - .replace(r"‘", "'") - .replace("<", "<") - .replace(">", ">") - .replace("“", '"') - .replace("”", '"') - .replace("‘", "'") - .replace("’", "'") + text.replace("‘", "'").replace("<", "<").replace(">", ">").replace("“", '"').replace("”", '"').replace("‘", "'").replace("’", "'") ) - src_xml = re.sub(r"(?<=\{[\{%])(.*?)(?=[\}%]})", clean_tags, src_xml) + # Build a dynamic pattern to match content *inside* any Jinja2 tag + # (between start and end delimiters) and apply HTML entity cleanup. + # Uses capture groups to preserve delimiter boundaries since + # lookbehind/lookahead widths can vary with custom delimiters. + clean_start = f"({vo}|{bo}|{co})" + clean_end = f"({vc}|{bc}|{cc})" + src_xml = re.sub( + clean_start + r"(.*?)" + clean_end, + lambda m: m.group(1) + _clean_inner(m.group(2)) + m.group(3), + src_xml, + flags=re.DOTALL, + ) return src_xml @@ -372,7 +406,8 @@ def render_footnotes( xml = self.patch_xml( part.blob.decode("utf-8") if isinstance(part.blob, bytes) - else part.blob + else part.blob, + jinja_env, ) xml = self.render_xml_part(xml, part, context, jinja_env) part._blob = xml.encode("utf-8") @@ -432,7 +467,7 @@ def resolve_paragraph(m): def build_xml(self, context, jinja_env=None): xml = self.get_xml() - xml = self.patch_xml(xml) + xml = self.patch_xml(xml, jinja_env) xml = self.render_xml_part(xml, self.docx._part, context, jinja_env) return xml @@ -459,7 +494,7 @@ def build_headers_footers_xml(self, context, uri, jinja_env=None): for relKey, part in self.get_headers_footers(uri): xml = self.get_part_xml(part) encoding = self.get_headers_footers_encoding(xml) - xml = self.patch_xml(xml) + xml = self.patch_xml(xml, jinja_env) xml = self.render_xml_part(xml, part, context, jinja_env) yield relKey, xml.encode(encoding) @@ -901,14 +936,14 @@ def get_undeclared_template_variables( # Get XML from the temporary document xml = self.xml_to_string(temp_doc._element.body) - xml = self.patch_xml(xml) + xml = self.patch_xml(xml, jinja_env) # Add headers and footers for uri in [self.HEADER_URI, self.FOOTER_URI]: for relKey, val in temp_doc._part.rels.items(): if (val.reltype == uri) and (val.target_part.blob): _xml = self.xml_to_string(parse_xml(val.target_part.blob)) - xml += self.patch_xml(_xml) + xml += self.patch_xml(_xml, jinja_env) if jinja_env: env = jinja_env diff --git a/tests/custom_delimiters.py b/tests/custom_delimiters.py new file mode 100644 index 0000000..dbbd621 --- /dev/null +++ b/tests/custom_delimiters.py @@ -0,0 +1,98 @@ +"""Test that custom Jinja2 delimiters work with patch_xml. + +This verifies that patch_xml properly strips XML tags from inside +user-configured Jinja2 blocks when using non-default delimiters +(like single braces {} instead of double braces {{}}). +""" +import sys, os +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +from docxtpl import DocxTemplate +from jinja2 import Environment +import zipfile, re + +TEMPLATE = os.path.join(os.path.dirname(__file__), + "templates", "custom_delimiters_tpl.docx") +OUTPUT = os.path.join(os.path.dirname(__file__), + "output", "custom_delimiters.docx") + + +def _get_text_from_docx(path): + """Extract plain text from a docx file for assertion.""" + with zipfile.ZipFile(path, "r") as z: + xml = z.read("word/document.xml").decode("utf-8") + return re.sub(r"<[^>]+>", "", xml) + + +def test_custom_delimiters(): + """Custom { } delimiters should render correctly even when + variables are split across multiple XML runs.""" + tpl = DocxTemplate(TEMPLATE) + jinja_env = Environment( + variable_start_string="{", + variable_end_string="}", + ) + tpl.render({"name": "Alice", "score": "95"}, jinja_env) + tpl.save(OUTPUT) + + text = _get_text_from_docx(OUTPUT) + print("Rendered text:", repr(text)) + + # Both variables should be substituted + assert "{name}" not in text, "Variable {name} was not rendered!" + assert "{score}" not in text, "Variable {score} was not rendered!" + assert "Alice" in text, "Name should appear in output" + assert "95" in text, "Score should appear in output" + + # No leftover braces + assert "{" not in text, "Leftover { in output" + assert "}" not in text, "Leftover } in output" + + print("✅ custom_delimiters: PASS") + + +def test_default_delimiters_still_work(): + """Default {{ }} delimiters should still work (backward compat).""" + import io as _io + + # Create a template with default delimiters + default_xml = """ + + + + Hello {{ + name + }}! + + +""" + buf = _io.BytesIO() + with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf: + zf.writestr("[Content_Types].xml", """ + + + + +""") + zf.writestr("_rels/.rels", """ + + +""") + zf.writestr("word/document.xml", default_xml) + + out_default = os.path.join(os.path.dirname(__file__), + "output", "custom_delimiters_default.docx") + tpl = DocxTemplate(buf) + tpl.render({"name": "Bob"}) + tpl.save(out_default) + + text = _get_text_from_docx(out_default) + print("Rendered text (default):", repr(text)) + assert "Bob" in text, "Name should appear in output" + assert "{{" not in text, "Leftover {{ in output" + print("✅ default_delimiters: PASS") + + +if __name__ == "__main__": + test_custom_delimiters() + test_default_delimiters_still_work() diff --git a/tests/templates/custom_delimiters_tpl.docx b/tests/templates/custom_delimiters_tpl.docx new file mode 100644 index 0000000..3246910 Binary files /dev/null and b/tests/templates/custom_delimiters_tpl.docx differ