@@ -208,8 +208,11 @@ def _create_grounding_annotations(
208208 annotations : list [Annotation ] = []
209209
210210 # Cache the label across iterations so the happy path is one DB lookup,
211- # not N. Reset to None on savepoint rollback so a transient labelset
212- # failure can be retried on the next annotation.
211+ # not N. Reset on ANY savepoint rollback (not just label-lookup failures):
212+ # if the label/labelset was created inside this iteration's savepoint and
213+ # then rolled back due to a downstream failure, the cached reference is
214+ # stale. ensure_label_and_labelset is idempotent, so the cost of an
215+ # unnecessary re-fetch after, e.g., a page=None ValueError is one SELECT.
213216 cached_label : AnnotationLabel | None = None
214217
215218 for result in alignment_results :
@@ -336,6 +339,12 @@ def _create_span_annotation(
336339 offset. For DOCX in particular this is a known limitation; the field
337340 serves as a placeholder and the actual location is encoded by the
338341 character offsets in ``json``.
342+
343+ Identity key uses ``json={"start": ..., "end": ...}``. PostgreSQL
344+ JSON equality is order-sensitive, so the key order in this literal
345+ must remain stable for ``get_or_create`` to deduplicate on retry.
346+ Python 3.7+ guarantees dict-literal insertion order, and this is the
347+ only construction site, so the ordering is locally enforced.
339348 """
340349 from opencontractserver .annotations .models import SPAN_LABEL , Annotation
341350
0 commit comments