Skip to content

Commit 9451bfa

Browse files
a.pirogovapirogov
authored andcommitted
completed xmlproxy
1 parent cc39ed3 commit 9451bfa

6 files changed

Lines changed: 343 additions & 80 deletions

File tree

src/somesy/pom_xml/xmlproxy.py

Lines changed: 126 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,13 @@
1111
JSONLike = Any
1212

1313

14+
def load_xml(path: Path) -> ET.ElementTree:
15+
"""Parse an XML file into an ElementTree, preserving comments."""
16+
path = path if isinstance(path, Path) else Path(path)
17+
parser = DET.XMLParser(target=ET.TreeBuilder(insert_comments=True))
18+
return DET.parse(path, parser=parser)
19+
20+
1421
class XMLProxy:
1522
"""Class providing dict-like access to edit XML via ElementTree.
1623
@@ -27,33 +34,38 @@ class XMLProxy:
2734
* Otherwise, the node is returned
2835
"""
2936

30-
def __init__(self, el: ET.Element, *, default_namespace: Optional[str] = None):
31-
"""Wrap an existing XML ElementTree Element."""
32-
self._node: ET.Element = el
33-
self._def_ns = default_namespace
34-
3537
def _wrap(self, el: ET.Element) -> XMLProxy:
36-
"""Wrap different element, inheriting the namespace."""
38+
"""Wrap a different element, inheriting the same namespace."""
3739
return XMLProxy(el, default_namespace=self._def_ns)
3840

41+
def _dump(self):
42+
"""Dump XML to stdout (for debugging)."""
43+
ET.dump(self._node)
44+
3945
def _qualified_key(self, key: str):
4046
"""If passed key is not qualified, prepends the default namespace (if set)."""
4147
if key[0] == "{" or not self._def_ns:
4248
return key
4349
return "{" + self._def_ns + "}" + key
4450

4551
def _shortened_key(self, key: str):
46-
"""Inverse of `_qualified_key`."""
52+
"""Inverse of `_qualified_key` (strips default namespace from element name)."""
4753
if key[0] != "{" or not self._def_ns or key.find(self._def_ns) < 0:
4854
return key
4955
return key[key.find("}") + 1 :]
5056

57+
# ----
58+
59+
def __init__(self, el: ET.Element, *, default_namespace: Optional[str] = None):
60+
"""Wrap an existing XML ElementTree Element."""
61+
self._node: ET.Element = el
62+
self._def_ns = default_namespace
63+
5164
@classmethod
5265
def parse(cls, path: Union[str, Path], **kwargs) -> XMLProxy:
53-
"""Parse an XML file into an ElementTree, preserving comments."""
66+
"""Parse an XML file into a wrapped ElementTree, preserving comments."""
5467
path = path if isinstance(path, Path) else Path(path)
55-
parser = DET.XMLParser(target=ET.TreeBuilder(insert_comments=True))
56-
return cls(DET.parse(path, parser=parser).getroot(), **kwargs)
68+
return cls(load_xml(path).getroot(), **kwargs)
5769

5870
def write(self, path: Union[str, Path], *, header: bool = True, **kwargs):
5971
"""Write the XML DOM to an UTF-8 encoded file."""
@@ -78,25 +90,48 @@ def __iter__(self):
7890
"""Iterate the nested elements in-order."""
7991
return map(self._wrap, iter(self._node))
8092

81-
def _dump(self):
82-
"""Dump XML to stdout (for debugging)."""
83-
ET.dump(self._node)
93+
@property
94+
def namespace(self) -> Optional[str]:
95+
"""Default namespace of this node."""
96+
return self._def_ns
97+
98+
@property
99+
def is_comment(self):
100+
"""Return whether the current element node is an XML comment."""
101+
return not isinstance(self._node.tag, str)
102+
103+
@property
104+
def tag(self) -> Optional[str]:
105+
"""Return tag name of this element (unless it is a comment)."""
106+
if self.is_comment:
107+
return None
108+
return self._shortened_key(self._node.tag)
109+
110+
@tag.setter
111+
def tag(self, val: str):
112+
"""Set the tag of this element."""
113+
if self.is_comment:
114+
raise ValueError("Cannot set tag name for comment element!")
115+
self._node.tag = self._qualified_key(val)
84116

85117
# ---- helpers ----
86118

87119
def to_jsonlike(
88-
self, *, strip_default_ns: bool = True, keep_root: bool = False
120+
self,
121+
*,
122+
strip_default_ns: bool = True,
123+
keep_root: bool = False,
89124
) -> JSONLike:
90125
"""Convert XML node to a JSON-like primitive, array or dict (ignoring attributes).
91126
92-
Note that comments are ignored and all leaf values are strings.
127+
Note that all leaf values are strings (i.e. not parsed to bool/int/float etc.).
93128
94129
Args:
95130
strip_default_ns: Do not qualify keys from the default namespace
96131
keep_root: If true, the root tag name will be preserved (`{"root_tag": {...}}`)
97132
"""
98133
if not len(self): # leaf -> assume it's a primitive value
99-
return self._node.text.strip()
134+
return self._node.text or ""
100135

101136
dct = {}
102137
ccnt = 0
@@ -120,15 +155,15 @@ def to_jsonlike(
120155
return dct if not keep_root else {self._shortened_key(self._node.tag): dct}
121156

122157
@classmethod
123-
def from_jsonlike_primitive(
158+
def _from_jsonlike_primitive(
124159
cls, val, *, elem_name: Optional[str] = None, **kwargs
125160
) -> Union[str, XMLProxy]:
126161
"""Convert a leaf node into a string value (i.e. return inner text).
127162
128163
Returns a string (or an XML element, if elem_name is passed).
129164
"""
130165
if val is None:
131-
ret = "null" # turn None into Java null
166+
ret = "" # turn None into empty string
132167
elif isinstance(val, str):
133168
ret = val
134169
elif isinstance(val, bool):
@@ -163,7 +198,7 @@ def from_jsonlike(cls, val, *, root_name: Optional[str] = None, **kwargs):
163198
map(lambda x: cls.from_jsonlike(x, root_name=root_name, **kwargs), val)
164199
)
165200
if not isinstance(val, dict): # primitive val
166-
return cls.from_jsonlike_primitive(val, elem_name=root_name, **kwargs)
201+
return cls._from_jsonlike_primitive(val, elem_name=root_name, **kwargs)
167202

168203
# now the dict case remains
169204
elem = ET.Element(root_name or "root")
@@ -179,7 +214,8 @@ def from_jsonlike(cls, val, *, root_name: Optional[str] = None, **kwargs):
179214
elif not isinstance(v, dict): # primitive val
180215
# FIXME: use better case-splitting for type of function to avoid cast
181216
tmp = cast(
182-
XMLProxy, XMLProxy.from_jsonlike_primitive(v, elem_name=k, **kwargs)
217+
XMLProxy,
218+
XMLProxy._from_jsonlike_primitive(v, elem_name=k, **kwargs),
183219
)
184220
elem.append(tmp._node)
185221
else: # dict
@@ -200,23 +236,27 @@ def get(self, key: str, *, as_nodes: bool = False, deep: bool = False):
200236
as_nodes: If true, will *always* return a list of (zero or more) XML nodes
201237
deep: Expand nested XML elements instead of returning them as XML nodes
202238
"""
239+
# NOTE: could allow to retrieve comments when using empty string/none as key?
240+
203241
if as_nodes and deep:
204242
raise ValueError("as_nodes and deep are mutually exclusive!")
205243
if not key:
206244
raise ValueError("Key must not be an empty string!")
245+
key = self._qualified_key(key)
207246

208247
# if not fully qualified + default NS is given, use it for query
209-
if lst := self._node.findall(self._qualified_key(key)):
210-
ns: List[XMLProxy] = list(map(self._wrap, lst))
211-
if as_nodes: # return it as a list of xml nodes
212-
return ns
213-
214-
# apply canonical dict-ification
215-
ret: Union[List[XMLProxy], List[JSONLike]] = (
216-
ns if not deep else [x.to_jsonlike() for x in ns]
217-
)
218-
if ret: # if list has just one element -> return that
219-
return lst[0] if len(lst) == 1 else lst
248+
lst = self._node.findall(key)
249+
ns: List[XMLProxy] = list(map(self._wrap, lst))
250+
if as_nodes: # return it as a list of xml nodes
251+
return ns
252+
if not ns: # no element
253+
return None
254+
255+
ret = ns if not deep else [x.to_jsonlike() for x in ns]
256+
if len(ret) == 1:
257+
return ret[0] # single element
258+
else:
259+
return ret
220260

221261
def __getitem__(self, key: str):
222262
"""Acts like `dict.__getitem__`, implemented with `get`."""
@@ -259,18 +299,27 @@ def __delitem__(self, key: Union[str, XMLProxy]):
259299
if not nodes:
260300
raise KeyError(key)
261301

262-
self._node.text = ""
302+
if self._node.text is not None:
303+
self._node.text = ""
263304
for child in nodes:
264305
self._node.remove(child._node)
265306

307+
def _clear(self):
308+
"""Remove contents of this XML element (e.g. for overwriting in-place)."""
309+
self._node.text = ""
310+
children = list(iter(self._node)) # need to store, removal invalidates iterator
311+
for child in children:
312+
self._node.remove(child)
313+
266314
def __setitem__(self, key: Union[str, XMLProxy], val: Union[JSONLike, XMLProxy]):
267315
"""Add or overwrite an inner XML tag.
268316
269317
If there is exactly one matching tag, the value is substituted in-place.
270318
If the passed value is a list, all list entries are added in their own element.
271319
272-
If there are multiple existing matches, **all** existing elements are removed
273-
and the new value is added with as a new element (i.e. coming last)!
320+
If there are multiple existing matches or target values, then
321+
**all** existing elements are removed and the new value(s) are added in
322+
new element(s) (i.e. coming after other unrelated existing elements)!
274323
275324
To prevent this behavior, instead of a string tag name you can provide the
276325
exact element to be overwritten, i.e. if a node `node_a` represents the following XML:
@@ -290,38 +339,49 @@ def __setitem__(self, key: Union[str, XMLProxy], val: Union[JSONLike, XMLProxy])
290339
291340
Note that the passed value must be either an XML element already, or be a pure JSON-like object.
292341
"""
293-
# TODO: what about assigning a list of stuff? add that, then write tests
294-
295342
if isinstance(key, str):
296-
nodes = self.get(key, as_nodes=True) or []
297-
if (
298-
len(nodes) > 1
299-
): # delete all existing elements in case there are multiple
343+
nodes = self.get(key, as_nodes=True)
344+
# delete all existing elements if multiple exist or are passed
345+
if len(nodes) > 1 or isinstance(val, list):
300346
del self[key]
301347
nodes = []
302-
if not nodes: # create new element if there were multiple or none
303-
node = self._wrap(ET.SubElement(self._node, self._qualified_key(key)))
304-
else: # take the unique matching node, empty it out (text + inner tags)
305-
node = nodes[0]
306-
else: # an XMLProxy object was passed as key -> use that
307-
node = key
308-
309-
# ensure the target node is cleared out (e.g. when reusing existing element)
310-
node._node.text = ""
311-
for child in list(
312-
iter(node._node)
313-
): # need to store in list, removal invalidates iterator
314-
node._node.remove(child)
315-
316-
# ensure value is represented as an XML node
317-
if not isinstance(val, XMLProxy):
318-
val = self.from_jsonlike(val, root_name=self._shortened_key(self._node.tag))
319-
else:
320-
wrapped = self._wrap(ET.Element("dummy"))
321-
wrapped._node.append(val._node)
322-
val = wrapped
323-
324-
# transplant node contents into existing element (so it is inserted in-place)
325-
node._node.text = val._node.text
326-
for child in iter(val):
327-
node._node.append(child._node)
348+
# now we can assume there's zero or one suitable target elements
349+
if nodes: # if it is one, clear it out
350+
nodes[0]._clear()
351+
else: # an XMLProxy object was passed as key -> try to use that
352+
if isinstance(val, list):
353+
raise ValueError(
354+
"Cannot overwrite a single element with a list of values!"
355+
)
356+
# ensure the target node is cleared out and use it as target
357+
key._clear()
358+
nodes = [key]
359+
key = key.tag
360+
361+
# ensure key string is qualified with a namespace
362+
key_name: str = self._qualified_key(key)
363+
364+
# normalize passed value(s) to be list (general case)
365+
vals = val if isinstance(val, list) else [val]
366+
367+
# ensure there is the required number of target element nodes
368+
for _ in range(len(vals) - len(nodes)):
369+
nodes.append(self._wrap(ET.SubElement(self._node, key_name)))
370+
371+
# normalize values no XML element nodes
372+
nvals = []
373+
for val in vals:
374+
# ensure value is represented as an XML node
375+
if isinstance(val, XMLProxy):
376+
obj = self._wrap(ET.Element("dummy"))
377+
obj._node.append(val._node)
378+
else:
379+
obj = self.from_jsonlike(val, root_name=key_name)
380+
381+
nvals.append(obj)
382+
383+
for node, val in zip(nodes, nvals):
384+
# transplant node contents into existing element (so it is inserted in-place)
385+
node._node.text = val._node.text
386+
for child in iter(val):
387+
node._node.append(child._node)

tests/conftest.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,12 @@
1010
from somesy.package_json.writer import PackageJSON
1111
from somesy.pyproject import Pyproject
1212
from somesy.julia import Julia
13+
from somesy.pom_xml import load_xml
14+
15+
TEST_DIR = Path(__file__).resolve().parent
16+
17+
TEST_DATA_DIR = TEST_DIR / "data"
18+
"""Location of the test input data."""
1319

1420

1521
class FileTypes(Enum):
@@ -101,7 +107,7 @@ def _load_files(files: Set[FileTypes]):
101107
if not isinstance(file_type, FileTypes):
102108
raise ValueError(f"Invalid file type: {file_type}")
103109

104-
read_file_name = Path("tests/data")
110+
read_file_name = TEST_DATA_DIR
105111
if file_type == FileTypes.CITATION:
106112
read_file_name = read_file_name / Path("CITATION.cff")
107113
file_instances[file_type] = CFF(read_file_name)
@@ -137,3 +143,11 @@ def person() -> Person:
137143
ret = Person.model_validate(p)
138144
ret.set_key_order(list(p.keys())) # custom order!
139145
return ret
146+
147+
148+
@pytest.fixture
149+
def xml_examples():
150+
def _xml_loader(filename: str) -> Path:
151+
return TEST_DATA_DIR / filename
152+
153+
yield _xml_loader

tests/data/blank_pom.xml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1-
<?xml version="1.0" encoding="UTF-8"?>
1+
<?xml version='1.0' encoding='UTF-8'?>
22
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
3-
</project>
3+
<!-- contents of POM file -->
4+
</project>

tests/data/example_1.xml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
2+
<nothing />
3+
<emptyString></emptyString>
4+
<listEntry>foo</listEntry>
5+
<!-- comment 1 -->
6+
<aBool>true</aBool>
7+
<!-- comment 2 -->
8+
<anInt>42</anInt>
9+
<listEntry>
10+
<someDict>
11+
<a>x</a>
12+
<b>y</b>
13+
</someDict>
14+
<someValue>z</someValue>
15+
</listEntry>
16+
<aFloat>3.14</aFloat>
17+
</project>

0 commit comments

Comments
 (0)