@@ -408,25 +408,6 @@ add_attrib(ImageSpec& spec, string_view xmlname, string_view xmlvalue,
408408
409409
410410
411- // Utility: Search str for the first substring in str (starting from
412- // position pos) that starts with startmarker and ends with endmarker.
413- // If not found, return false. If found, return true, store the
414- // beginning and ending indices in startpos and endpos.
415- static bool
416- extract_middle (string_view str, size_t pos, string_view startmarker,
417- string_view endmarker, size_t & startpos, size_t & endpos)
418- {
419- startpos = str.find (startmarker, pos);
420- if (startpos == std::string::npos)
421- return false ; // start marker not found
422- endpos = str.find (endmarker, startpos);
423- if (endpos == std::string::npos)
424- return false ; // end marker not found
425- endpos += endmarker.size ();
426- return true ;
427- }
428-
429-
430411// Decode one XMP node and its children.
431412// Return value is the size of the resulting attribute (can be used to
432413// catch runaway or corrupt XML).
@@ -533,35 +514,35 @@ decode_xmp(string_view xml, ImageSpec& spec)
533514#endif
534515 if (!xml.length ())
535516 return true ;
536- for (size_t startpos = 0 , endpos = 0 ;
537- extract_middle (xml, endpos, " <rdf:Description" , " </rdf:Description>" ,
538- startpos, endpos);) {
539- // Turn that middle section into an XML document
540- string_view rdf = xml.substr (startpos, endpos - startpos); // scooch in
541- #if DEBUG_XMP_READ
542- std::cerr << " RDF is:\n ---\n " << rdf.substr (0 , 4096 ) << " \n ---\n " ;
543- #endif
544- pugi::xml_document doc;
545- pugi::xml_parse_result parse_result
546- = doc.load_buffer (rdf.data (), rdf.size (),
547- pugi::parse_default | pugi::parse_fragment);
548- if (!parse_result) {
517+ // Some callers (e.g. JPEG APP1 markers) prepend a namespace URI before the
518+ // actual XML. Skip any leading non-XML content.
519+ auto xmlstart = xml.find (' <' );
520+ if (xmlstart == string_view::npos)
521+ return true ;
522+ xml = xml.substr (xmlstart);
523+ pugi::xml_document doc;
524+ pugi::xml_parse_result parse_result
525+ = doc.load_buffer (xml.data (), xml.size (),
526+ pugi::parse_default | pugi::parse_fragment);
527+ if (!parse_result) {
549528#if DEBUG_XMP_READ
550- std::cerr << " Error parsing XML @" << parse_result.offset << " : "
551- << parse_result.description () << " \n " ;
529+ std::cerr << " Error parsing XML @" << parse_result.offset << " : "
530+ << parse_result.description () << " \n " ;
552531#endif
553- // Instead of returning early here if there were errors parsing
554- // the XML -- I have noticed that very minor XML malformations
555- // are common in XMP found in files -- hope for the best and
556- // go ahead and assume that maybe it managed to put something
557- // useful in the resulting document.
558- #if 0
559- return true;
560- #endif
561- }
562- // Decode the contents of the XML document (it will recurse)
563- decode_xmp_node (doc.first_child (), spec);
532+ // Instead of returning early here if there were errors parsing
533+ // the XML -- I have noticed that very minor XML malformations
534+ // are common in XMP found in files -- hope for the best and
535+ // go ahead and assume that maybe it managed to put something
536+ // useful in the resulting document.
564537 }
538+ // Find the first rdf:Description node anywhere in the document.
539+ // decode_xmp_node() iterates siblings, so passing the first one processes
540+ // all rdf:Description siblings (i.e. all blocks in the rdf:RDF container).
541+ auto first_desc = doc.find_node ([](pugi::xml_node n) {
542+ return strcmp (n.name (), " rdf:Description" ) == 0 ;
543+ });
544+ if (first_desc)
545+ decode_xmp_node (first_desc, spec);
565546#if DEBUG_XMP_READ
566547 std::cerr << " XMP total parse time " << timer () << " \n " ;
567548#endif
0 commit comments