@@ -413,25 +413,6 @@ add_attrib(ImageSpec& spec, string_view xmlname, string_view xmlvalue,
413413
414414
415415
416- // Utility: Search str for the first substring in str (starting from
417- // position pos) that starts with startmarker and ends with endmarker.
418- // If not found, return false. If found, return true, store the
419- // beginning and ending indices in startpos and endpos.
420- static bool
421- extract_middle (string_view str, size_t pos, string_view startmarker,
422- string_view endmarker, size_t & startpos, size_t & endpos)
423- {
424- startpos = str.find (startmarker, pos);
425- if (startpos == std::string::npos)
426- return false ; // start marker not found
427- endpos = str.find (endmarker, startpos);
428- if (endpos == std::string::npos)
429- return false ; // end marker not found
430- endpos += endmarker.size ();
431- return true ;
432- }
433-
434-
435416// Decode one XMP node and its children.
436417// Return value is the size of the resulting attribute (can be used to
437418// catch runaway or corrupt XML).
@@ -541,35 +522,35 @@ decode_xmp(string_view xml, ImageSpec& spec)
541522#endif
542523 if (!xml.length ())
543524 return true ;
544- for (size_t startpos = 0 , endpos = 0 ;
545- extract_middle (xml, endpos, " <rdf:Description" , " </rdf:Description>" ,
546- startpos, endpos);) {
547- // Turn that middle section into an XML document
548- string_view rdf = xml.substr (startpos, endpos - startpos); // scooch in
549- #if DEBUG_XMP_READ
550- std::cerr << " RDF is:\n ---\n " << rdf.substr (0 , 4096 ) << " \n ---\n " ;
551- #endif
552- pugi::xml_document doc;
553- pugi::xml_parse_result parse_result
554- = doc.load_buffer (rdf.data (), rdf.size (),
555- pugi::parse_default | pugi::parse_fragment);
556- if (!parse_result) {
525+ // Some callers (e.g. JPEG APP1 markers) prepend a namespace URI before the
526+ // actual XML. Skip any leading non-XML content.
527+ auto xmlstart = xml.find (' <' );
528+ if (xmlstart == string_view::npos)
529+ return true ;
530+ xml = xml.substr (xmlstart);
531+ pugi::xml_document doc;
532+ pugi::xml_parse_result parse_result
533+ = doc.load_buffer (xml.data (), xml.size (),
534+ pugi::parse_default | pugi::parse_fragment);
535+ if (!parse_result) {
557536#if DEBUG_XMP_READ
558- std::cerr << " Error parsing XML @" << parse_result.offset << " : "
559- << parse_result.description () << " \n " ;
537+ std::cerr << " Error parsing XML @" << parse_result.offset << " : "
538+ << parse_result.description () << " \n " ;
560539#endif
561- // Instead of returning early here if there were errors parsing
562- // the XML -- I have noticed that very minor XML malformations
563- // are common in XMP found in files -- hope for the best and
564- // go ahead and assume that maybe it managed to put something
565- // useful in the resulting document.
566- #if 0
567- return true;
568- #endif
569- }
570- // Decode the contents of the XML document (it will recurse)
571- decode_xmp_node (doc.first_child (), spec);
540+ // Instead of returning early here if there were errors parsing
541+ // the XML -- I have noticed that very minor XML malformations
542+ // are common in XMP found in files -- hope for the best and
543+ // go ahead and assume that maybe it managed to put something
544+ // useful in the resulting document.
572545 }
546+ // Find the first rdf:Description node anywhere in the document.
547+ // decode_xmp_node() iterates siblings, so passing the first one processes
548+ // all rdf:Description siblings (i.e. all blocks in the rdf:RDF container).
549+ auto first_desc = doc.find_node ([](pugi::xml_node n) {
550+ return strcmp (n.name (), " rdf:Description" ) == 0 ;
551+ });
552+ if (first_desc)
553+ decode_xmp_node (first_desc, spec);
573554#if DEBUG_XMP_READ
574555 std::cerr << " XMP total parse time " << timer () << " \n " ;
575556#endif
0 commit comments