Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 57 additions & 22 deletions docxtpl/template.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,39 @@ def write_xml(self, filename):
with open(filename, "w") as fh:
fh.write(self.get_xml())

def patch_xml(self, src_xml):
@staticmethod
def _get_delim_repr(env, attr, default):
"""Return the regex-escaped representation of a jinja2 delimiter."""
if env is None:
return re.escape(default)
return re.escape(getattr(env, attr))

def patch_xml(self, src_xml, jinja_env=None):
"""Make a lots of cleaning to have a raw xml understandable by jinja2 :
strip all unnecessary xml tags, manage table cell background color and colspan,
unescape html entities, etc..."""

# Resolve delimiter strings (regex-escaped) for dynamic patterns.
# When jinja_env is None, defaults to standard Jinja2 delimiters.
vo = self._get_delim_repr(jinja_env, "variable_start_string", "{{")
vc = self._get_delim_repr(jinja_env, "variable_end_string", "}}")
bo = self._get_delim_repr(jinja_env, "block_start_string", "{%")
bc = self._get_delim_repr(jinja_env, "block_end_string", "%}")
co = self._get_delim_repr(jinja_env, "comment_start_string", "{#")
cc = self._get_delim_repr(jinja_env, "comment_end_string", "#}")

# Build a union pattern matching any Jinja2 tag:
# block_start ... block_end | comment_start ... comment_end | variable_start ... variable_end
def _tag_union():
parts = []
if bo and bc:
parts.append(f"{bo}(?:(?!{bc}).)*")
if co and cc:
parts.append(f"{co}(?:(?!{cc}).)*")
if vo and vc:
parts.append(f"{vo}(?:(?!{vc}).)*")
return "|".join(parts)

# replace {<something>{ by {{ ( works with {{ }} {% and %} {# and #})
src_xml = re.sub(
r"(?<={)(<[^>]*>)+(?=[\{%\#])|(?<=[%\}\#])(<[^>]*>)+(?=\})",
Expand All @@ -103,12 +131,14 @@ def striptags(m):
"</w:t>.*?(<w:t>|<w:t [^>]*>)", "", m.group(0), flags=re.DOTALL
)

src_xml = re.sub(
r"{%(?:(?!%}).)*|{#(?:(?!#}).)*|{{(?:(?!}}).)*",
striptags,
src_xml,
flags=re.DOTALL,
)
tag_pat = _tag_union()
if tag_pat:
src_xml = re.sub(
tag_pat,
striptags,
src_xml,
flags=re.DOTALL,
)

# manage table cell colspan
def colspan(m):
Expand Down Expand Up @@ -286,19 +316,23 @@ def without_gridspan(m2):
flags=re.DOTALL,
)

def clean_tags(m):
def _clean_inner(text):
return (
m.group(0)
.replace(r"&#8216;", "'")
.replace("&lt;", "<")
.replace("&gt;", ">")
.replace("“", '"')
.replace("”", '"')
.replace("‘", "'")
.replace("’", "'")
text.replace("&#8216;", "'").replace("&lt;", "<").replace("&gt;", ">").replace("“", '"').replace("”", '"').replace("‘", "'").replace("’", "'")
)

src_xml = re.sub(r"(?<=\{[\{%])(.*?)(?=[\}%]})", clean_tags, src_xml)
# Build a dynamic pattern to match content *inside* any Jinja2 tag
# (between start and end delimiters) and apply HTML entity cleanup.
# Uses capture groups to preserve delimiter boundaries since
# lookbehind/lookahead widths can vary with custom delimiters.
clean_start = f"({vo}|{bo}|{co})"
clean_end = f"({vc}|{bc}|{cc})"
src_xml = re.sub(
clean_start + r"(.*?)" + clean_end,
lambda m: m.group(1) + _clean_inner(m.group(2)) + m.group(3),
src_xml,
flags=re.DOTALL,
)

return src_xml

Expand Down Expand Up @@ -372,7 +406,8 @@ def render_footnotes(
xml = self.patch_xml(
part.blob.decode("utf-8")
if isinstance(part.blob, bytes)
else part.blob
else part.blob,
jinja_env,
)
xml = self.render_xml_part(xml, part, context, jinja_env)
part._blob = xml.encode("utf-8")
Expand Down Expand Up @@ -432,7 +467,7 @@ def resolve_paragraph(m):

def build_xml(self, context, jinja_env=None):
xml = self.get_xml()
xml = self.patch_xml(xml)
xml = self.patch_xml(xml, jinja_env)
xml = self.render_xml_part(xml, self.docx._part, context, jinja_env)
return xml

Expand All @@ -459,7 +494,7 @@ def build_headers_footers_xml(self, context, uri, jinja_env=None):
for relKey, part in self.get_headers_footers(uri):
xml = self.get_part_xml(part)
encoding = self.get_headers_footers_encoding(xml)
xml = self.patch_xml(xml)
xml = self.patch_xml(xml, jinja_env)
xml = self.render_xml_part(xml, part, context, jinja_env)
yield relKey, xml.encode(encoding)

Expand Down Expand Up @@ -901,14 +936,14 @@ def get_undeclared_template_variables(

# Get XML from the temporary document
xml = self.xml_to_string(temp_doc._element.body)
xml = self.patch_xml(xml)
xml = self.patch_xml(xml, jinja_env)

# Add headers and footers
for uri in [self.HEADER_URI, self.FOOTER_URI]:
for relKey, val in temp_doc._part.rels.items():
if (val.reltype == uri) and (val.target_part.blob):
_xml = self.xml_to_string(parse_xml(val.target_part.blob))
xml += self.patch_xml(_xml)
xml += self.patch_xml(_xml, jinja_env)

if jinja_env:
env = jinja_env
Expand Down
98 changes: 98 additions & 0 deletions tests/custom_delimiters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
"""Test that custom Jinja2 delimiters work with patch_xml.

This verifies that patch_xml properly strips XML tags from inside
user-configured Jinja2 blocks when using non-default delimiters
(like single braces {} instead of double braces {{}}).
"""
import sys, os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))

from docxtpl import DocxTemplate
from jinja2 import Environment
import zipfile, re

TEMPLATE = os.path.join(os.path.dirname(__file__),
"templates", "custom_delimiters_tpl.docx")
OUTPUT = os.path.join(os.path.dirname(__file__),
"output", "custom_delimiters.docx")


def _get_text_from_docx(path):
"""Extract plain text from a docx file for assertion."""
with zipfile.ZipFile(path, "r") as z:
xml = z.read("word/document.xml").decode("utf-8")
return re.sub(r"<[^>]+>", "", xml)


def test_custom_delimiters():
"""Custom { } delimiters should render correctly even when
variables are split across multiple XML runs."""
tpl = DocxTemplate(TEMPLATE)
jinja_env = Environment(
variable_start_string="{",
variable_end_string="}",
)
tpl.render({"name": "Alice", "score": "95"}, jinja_env)
tpl.save(OUTPUT)

text = _get_text_from_docx(OUTPUT)
print("Rendered text:", repr(text))

# Both variables should be substituted
assert "{name}" not in text, "Variable {name} was not rendered!"
assert "{score}" not in text, "Variable {score} was not rendered!"
assert "Alice" in text, "Name should appear in output"
assert "95" in text, "Score should appear in output"

# No leftover braces
assert "{" not in text, "Leftover { in output"
assert "}" not in text, "Leftover } in output"

print("✅ custom_delimiters: PASS")


def test_default_delimiters_still_work():
"""Default {{ }} delimiters should still work (backward compat)."""
import io as _io

# Create a template with default delimiters
default_xml = """<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
<w:body>
<w:p>
<w:r><w:rPr></w:rPr><w:t>Hello {{</w:t></w:r>
<w:r><w:rPr></w:rPr><w:t>name</w:t></w:r>
<w:r><w:rPr></w:rPr><w:t>}}!</w:t></w:r>
</w:p>
</w:body>
</w:document>"""
buf = _io.BytesIO()
with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf:
zf.writestr("[Content_Types].xml", """<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
<Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>
<Default Extension="xml" ContentType="application/xml"/>
<Override PartName="/word/document.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>
</Types>""")
zf.writestr("_rels/.rels", """<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
<Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="word/document.xml"/>
</Relationships>""")
zf.writestr("word/document.xml", default_xml)

out_default = os.path.join(os.path.dirname(__file__),
"output", "custom_delimiters_default.docx")
tpl = DocxTemplate(buf)
tpl.render({"name": "Bob"})
tpl.save(out_default)

text = _get_text_from_docx(out_default)
print("Rendered text (default):", repr(text))
assert "Bob" in text, "Name should appear in output"
assert "{{" not in text, "Leftover {{ in output"
print("✅ default_delimiters: PASS")


if __name__ == "__main__":
test_custom_delimiters()
test_default_delimiters_still_work()
Binary file added tests/templates/custom_delimiters_tpl.docx
Binary file not shown.