1111JSONLike = Any
1212
1313
14+ def load_xml (path : Path ) -> ET .ElementTree :
15+ """Parse an XML file into an ElementTree, preserving comments."""
16+ path = path if isinstance (path , Path ) else Path (path )
17+ parser = DET .XMLParser (target = ET .TreeBuilder (insert_comments = True ))
18+ return DET .parse (path , parser = parser )
19+
20+
1421class XMLProxy :
1522 """Class providing dict-like access to edit XML via ElementTree.
1623
@@ -27,33 +34,38 @@ class XMLProxy:
2734 * Otherwise, the node is returned
2835 """
2936
30- def __init__ (self , el : ET .Element , * , default_namespace : Optional [str ] = None ):
31- """Wrap an existing XML ElementTree Element."""
32- self ._node : ET .Element = el
33- self ._def_ns = default_namespace
34-
3537 def _wrap (self , el : ET .Element ) -> XMLProxy :
36- """Wrap different element, inheriting the namespace."""
38+ """Wrap a different element, inheriting the same namespace."""
3739 return XMLProxy (el , default_namespace = self ._def_ns )
3840
41+ def _dump (self ):
42+ """Dump XML to stdout (for debugging)."""
43+ ET .dump (self ._node )
44+
3945 def _qualified_key (self , key : str ):
4046 """If passed key is not qualified, prepends the default namespace (if set)."""
4147 if key [0 ] == "{" or not self ._def_ns :
4248 return key
4349 return "{" + self ._def_ns + "}" + key
4450
4551 def _shortened_key (self , key : str ):
46- """Inverse of `_qualified_key`."""
52+ """Inverse of `_qualified_key` (strips default namespace from element name) ."""
4753 if key [0 ] != "{" or not self ._def_ns or key .find (self ._def_ns ) < 0 :
4854 return key
4955 return key [key .find ("}" ) + 1 :]
5056
57+ # ----
58+
59+ def __init__ (self , el : ET .Element , * , default_namespace : Optional [str ] = None ):
60+ """Wrap an existing XML ElementTree Element."""
61+ self ._node : ET .Element = el
62+ self ._def_ns = default_namespace
63+
5164 @classmethod
5265 def parse (cls , path : Union [str , Path ], ** kwargs ) -> XMLProxy :
53- """Parse an XML file into an ElementTree, preserving comments."""
66+ """Parse an XML file into a wrapped ElementTree, preserving comments."""
5467 path = path if isinstance (path , Path ) else Path (path )
55- parser = DET .XMLParser (target = ET .TreeBuilder (insert_comments = True ))
56- return cls (DET .parse (path , parser = parser ).getroot (), ** kwargs )
68+ return cls (load_xml (path ).getroot (), ** kwargs )
5769
5870 def write (self , path : Union [str , Path ], * , header : bool = True , ** kwargs ):
5971 """Write the XML DOM to an UTF-8 encoded file."""
@@ -78,25 +90,48 @@ def __iter__(self):
7890 """Iterate the nested elements in-order."""
7991 return map (self ._wrap , iter (self ._node ))
8092
81- def _dump (self ):
82- """Dump XML to stdout (for debugging)."""
83- ET .dump (self ._node )
93+ @property
94+ def namespace (self ) -> Optional [str ]:
95+ """Default namespace of this node."""
96+ return self ._def_ns
97+
98+ @property
99+ def is_comment (self ):
100+ """Return whether the current element node is an XML comment."""
101+ return not isinstance (self ._node .tag , str )
102+
103+ @property
104+ def tag (self ) -> Optional [str ]:
105+ """Return tag name of this element (unless it is a comment)."""
106+ if self .is_comment :
107+ return None
108+ return self ._shortened_key (self ._node .tag )
109+
110+ @tag .setter
111+ def tag (self , val : str ):
112+ """Set the tag of this element."""
113+ if self .is_comment :
114+ raise ValueError ("Cannot set tag name for comment element!" )
115+ self ._node .tag = self ._qualified_key (val )
84116
85117 # ---- helpers ----
86118
87119 def to_jsonlike (
88- self , * , strip_default_ns : bool = True , keep_root : bool = False
120+ self ,
121+ * ,
122+ strip_default_ns : bool = True ,
123+ keep_root : bool = False ,
89124 ) -> JSONLike :
90125 """Convert XML node to a JSON-like primitive, array or dict (ignoring attributes).
91126
92- Note that comments are ignored and all leaf values are strings.
127+ Note that all leaf values are strings (i.e. not parsed to bool/int/float etc.) .
93128
94129 Args:
95130 strip_default_ns: Do not qualify keys from the default namespace
96131 keep_root: If true, the root tag name will be preserved (`{"root_tag": {...}}`)
97132 """
98133 if not len (self ): # leaf -> assume it's a primitive value
99- return self ._node .text . strip ()
134+ return self ._node .text or ""
100135
101136 dct = {}
102137 ccnt = 0
@@ -120,15 +155,15 @@ def to_jsonlike(
120155 return dct if not keep_root else {self ._shortened_key (self ._node .tag ): dct }
121156
122157 @classmethod
123- def from_jsonlike_primitive (
158+ def _from_jsonlike_primitive (
124159 cls , val , * , elem_name : Optional [str ] = None , ** kwargs
125160 ) -> Union [str , XMLProxy ]:
126161 """Convert a leaf node into a string value (i.e. return inner text).
127162
128163 Returns a string (or an XML element, if elem_name is passed).
129164 """
130165 if val is None :
131- ret = "null " # turn None into Java null
166+ ret = "" # turn None into empty string
132167 elif isinstance (val , str ):
133168 ret = val
134169 elif isinstance (val , bool ):
@@ -163,7 +198,7 @@ def from_jsonlike(cls, val, *, root_name: Optional[str] = None, **kwargs):
163198 map (lambda x : cls .from_jsonlike (x , root_name = root_name , ** kwargs ), val )
164199 )
165200 if not isinstance (val , dict ): # primitive val
166- return cls .from_jsonlike_primitive (val , elem_name = root_name , ** kwargs )
201+ return cls ._from_jsonlike_primitive (val , elem_name = root_name , ** kwargs )
167202
168203 # now the dict case remains
169204 elem = ET .Element (root_name or "root" )
@@ -179,7 +214,8 @@ def from_jsonlike(cls, val, *, root_name: Optional[str] = None, **kwargs):
179214 elif not isinstance (v , dict ): # primitive val
180215 # FIXME: use better case-splitting for type of function to avoid cast
181216 tmp = cast (
182- XMLProxy , XMLProxy .from_jsonlike_primitive (v , elem_name = k , ** kwargs )
217+ XMLProxy ,
218+ XMLProxy ._from_jsonlike_primitive (v , elem_name = k , ** kwargs ),
183219 )
184220 elem .append (tmp ._node )
185221 else : # dict
@@ -200,23 +236,27 @@ def get(self, key: str, *, as_nodes: bool = False, deep: bool = False):
200236 as_nodes: If true, will *always* return a list of (zero or more) XML nodes
201237 deep: Expand nested XML elements instead of returning them as XML nodes
202238 """
239+ # NOTE: could allow to retrieve comments when using empty string/none as key?
240+
203241 if as_nodes and deep :
204242 raise ValueError ("as_nodes and deep are mutually exclusive!" )
205243 if not key :
206244 raise ValueError ("Key must not be an empty string!" )
245+ key = self ._qualified_key (key )
207246
208247 # if not fully qualified + default NS is given, use it for query
209- if lst := self ._node .findall (self ._qualified_key (key )):
210- ns : List [XMLProxy ] = list (map (self ._wrap , lst ))
211- if as_nodes : # return it as a list of xml nodes
212- return ns
213-
214- # apply canonical dict-ification
215- ret : Union [List [XMLProxy ], List [JSONLike ]] = (
216- ns if not deep else [x .to_jsonlike () for x in ns ]
217- )
218- if ret : # if list has just one element -> return that
219- return lst [0 ] if len (lst ) == 1 else lst
248+ lst = self ._node .findall (key )
249+ ns : List [XMLProxy ] = list (map (self ._wrap , lst ))
250+ if as_nodes : # return it as a list of xml nodes
251+ return ns
252+ if not ns : # no element
253+ return None
254+
255+ ret = ns if not deep else [x .to_jsonlike () for x in ns ]
256+ if len (ret ) == 1 :
257+ return ret [0 ] # single element
258+ else :
259+ return ret
220260
221261 def __getitem__ (self , key : str ):
222262 """Acts like `dict.__getitem__`, implemented with `get`."""
@@ -259,18 +299,27 @@ def __delitem__(self, key: Union[str, XMLProxy]):
259299 if not nodes :
260300 raise KeyError (key )
261301
262- self ._node .text = ""
302+ if self ._node .text is not None :
303+ self ._node .text = ""
263304 for child in nodes :
264305 self ._node .remove (child ._node )
265306
307+ def _clear (self ):
308+ """Remove contents of this XML element (e.g. for overwriting in-place)."""
309+ self ._node .text = ""
310+ children = list (iter (self ._node )) # need to store, removal invalidates iterator
311+ for child in children :
312+ self ._node .remove (child )
313+
266314 def __setitem__ (self , key : Union [str , XMLProxy ], val : Union [JSONLike , XMLProxy ]):
267315 """Add or overwrite an inner XML tag.
268316
269317 If there is exactly one matching tag, the value is substituted in-place.
270318 If the passed value is a list, all list entries are added in their own element.
271319
272- If there are multiple existing matches, **all** existing elements are removed
273- and the new value is added with as a new element (i.e. coming last)!
320+ If there are multiple existing matches or target values, then
321+ **all** existing elements are removed and the new value(s) are added in
322+ new element(s) (i.e. coming after other unrelated existing elements)!
274323
275324 To prevent this behavior, instead of a string tag name you can provide the
276325 exact element to be overwritten, i.e. if a node `node_a` represents the following XML:
@@ -290,38 +339,49 @@ def __setitem__(self, key: Union[str, XMLProxy], val: Union[JSONLike, XMLProxy])
290339
291340 Note that the passed value must be either an XML element already, or be a pure JSON-like object.
292341 """
293- # TODO: what about assigning a list of stuff? add that, then write tests
294-
295342 if isinstance (key , str ):
296- nodes = self .get (key , as_nodes = True ) or []
297- if (
298- len (nodes ) > 1
299- ): # delete all existing elements in case there are multiple
343+ nodes = self .get (key , as_nodes = True )
344+ # delete all existing elements if multiple exist or are passed
345+ if len (nodes ) > 1 or isinstance (val , list ):
300346 del self [key ]
301347 nodes = []
302- if not nodes : # create new element if there were multiple or none
303- node = self ._wrap (ET .SubElement (self ._node , self ._qualified_key (key )))
304- else : # take the unique matching node, empty it out (text + inner tags)
305- node = nodes [0 ]
306- else : # an XMLProxy object was passed as key -> use that
307- node = key
308-
309- # ensure the target node is cleared out (e.g. when reusing existing element)
310- node ._node .text = ""
311- for child in list (
312- iter (node ._node )
313- ): # need to store in list, removal invalidates iterator
314- node ._node .remove (child )
315-
316- # ensure value is represented as an XML node
317- if not isinstance (val , XMLProxy ):
318- val = self .from_jsonlike (val , root_name = self ._shortened_key (self ._node .tag ))
319- else :
320- wrapped = self ._wrap (ET .Element ("dummy" ))
321- wrapped ._node .append (val ._node )
322- val = wrapped
323-
324- # transplant node contents into existing element (so it is inserted in-place)
325- node ._node .text = val ._node .text
326- for child in iter (val ):
327- node ._node .append (child ._node )
348+ # now we can assume there's zero or one suitable target elements
349+ if nodes : # if it is one, clear it out
350+ nodes [0 ]._clear ()
351+ else : # an XMLProxy object was passed as key -> try to use that
352+ if isinstance (val , list ):
353+ raise ValueError (
354+ "Cannot overwrite a single element with a list of values!"
355+ )
356+ # ensure the target node is cleared out and use it as target
357+ key ._clear ()
358+ nodes = [key ]
359+ key = key .tag
360+
361+ # ensure key string is qualified with a namespace
362+ key_name : str = self ._qualified_key (key )
363+
364+ # normalize passed value(s) to be list (general case)
365+ vals = val if isinstance (val , list ) else [val ]
366+
367+ # ensure there is the required number of target element nodes
368+ for _ in range (len (vals ) - len (nodes )):
369+ nodes .append (self ._wrap (ET .SubElement (self ._node , key_name )))
370+
371+ # normalize values no XML element nodes
372+ nvals = []
373+ for val in vals :
374+ # ensure value is represented as an XML node
375+ if isinstance (val , XMLProxy ):
376+ obj = self ._wrap (ET .Element ("dummy" ))
377+ obj ._node .append (val ._node )
378+ else :
379+ obj = self .from_jsonlike (val , root_name = key_name )
380+
381+ nvals .append (obj )
382+
383+ for node , val in zip (nodes , nvals ):
384+ # transplant node contents into existing element (so it is inserted in-place)
385+ node ._node .text = val ._node .text
386+ for child in iter (val ):
387+ node ._node .append (child ._node )
0 commit comments