diff --git a/src/openpecha/buda/__init__.py b/src/openpecha/buda/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/src/openpecha/buda/api.py b/src/openpecha/buda/api.py
deleted file mode 100644
index 65e2b7ca..00000000
--- a/src/openpecha/buda/api.py
+++ /dev/null
@@ -1,214 +0,0 @@
-import codecs
-import csv
-import gzip
-import hashlib
-import io
-import json
-import logging
-from contextlib import closing
-
-import boto3
-import botocore
-import pyewts
-import rdflib
-import requests
-from rdflib import Literal
-from rdflib.namespace import SKOS, Namespace
-
-LDSPDIBASEURL = "https://ldspdi.bdrc.io/"
-CONVERTER = pyewts.pyewts()
-
-SESSION = boto3.Session()
-S3 = SESSION.client("s3")
-
-BDR = Namespace("http://purl.bdrc.io/resource/")
-BDO = Namespace("http://purl.bdrc.io/ontology/core/")
-BDA = Namespace("http://purl.bdrc.io/admindata/")
-ADM = Namespace("http://purl.bdrc.io/ontology/admin/")
-
-
-def fetch_op_commits(ldspdibaseurl="http://ldspdi.bdrc.io/"):
- """
- Fetches the list of all openpecha commits on BUDA
- """
- res = {}
- headers = {"Accept": "text/csv"}
- params = {"format": "csv"}
- with closing(
- requests.get(
- ldspdibaseurl + "/query/table/OP_allCommits",
- stream=True,
- headers=headers,
- params=params,
- )
- ) as r:
- reader = csv.reader(codecs.iterdecode(r.iter_lines(), "utf-8"))
- for row in reader:
- if not row[0].startswith("http://purl.bdrc.io/resource/IE0OP"):
- logging.error("cannot interpret csv line starting with " + row[0])
- continue
- res[row[0][34:]] = row[1]
- return res
-
-
-def get_s3_folder_prefix(wlname, image_group_lname):
- """
- gives the s3 prefix (~folder) in which the volume will be present.
- inpire from https://github.com/buda-base/buda-iiif-presentation/blob/master/src/main/java/
- io/bdrc/iiif/presentation/ImageInfoListService.java#L73
- Example:
- - wlname=W22084, image_group_lname=I0886
- - result = "Works/60/W22084/images/W22084-0886/
- where:
- - 60 is the first two characters of the md5 of the string W22084
- - 0886 is:
- * the image group ID without the initial "I" if the image group ID is in the form I\\d\\d\\d\\d
- * or else the full image group ID (incuding the "I")
- """
- md5 = hashlib.md5(str.encode(wlname))
- two = md5.hexdigest()[:2]
-
- pre, rest = image_group_lname[0], image_group_lname[1:]
- if pre == "I" and rest.isdigit() and len(rest) == 4:
- suffix = rest
- else:
- suffix = image_group_lname
-
- return "Works/{two}/{RID}/images/{RID}-{suffix}/".format(
- two=two, RID=wlname, suffix=suffix
- )
-
-
-def gets3blob(s3Key):
- f = io.BytesIO()
- try:
- S3.download_fileobj("archive.tbrc.org", s3Key, f)
- return f
- except botocore.exceptions.ClientError as e:
- if e.response["Error"]["Code"] == "404":
- return None
- else:
- raise
-
-
-def get_image_list_s3(wlname, image_group_lname):
- s3key = get_s3_folder_prefix(wlname, image_group_lname) + "dimensions.json"
- blob = gets3blob(s3key)
- if blob is None:
- return None
- blob.seek(0)
- b = blob.read()
- ub = gzip.decompress(b)
- s = ub.decode("utf8")
- data = json.loads(s)
- return data
-
-
-def get_image_list_iiifpres(wlname, image_group_lname):
- r = requests.get(f"http://iiifpres.bdrc.io/il/v:bdr:{wlname}")
- return r.json()
-
-
-def get_image_list(wlname, image_group_lname, source="s3", reorder_with_bvm=False):
- il = None
- if source == "s3":
- il = get_image_list_s3(wlname, image_group_lname)
- else:
- il = get_image_list_iiifpres(wlname, image_group_lname)
- return il
-
-
-def _res_from_model(g, wlname):
- res = {
- "source_metadata": {"id": "http://purl.bdrc.io/resource/" + wlname},
- "image_groups": {},
- }
- wres = BDR[wlname]
- try:
- adm = g.value(predicate=ADM.adminAbout, object=wres)
- res["source_metadata"]["status"] = str(g.value(adm, ADM.status))
- res["source_metadata"]["access"] = str(g.value(adm, ADM.access))
- if (adm, ADM.restrictedInChina, Literal(True)) in g:
- res["source_metadata"]["geo_restriction"] = ["CN"]
- mwres = g.value(wres, BDO.instanceReproductionOf)
- res["source_metadata"]["reproduction_of"] = str(mwres)
- for _, _, cs in g.triples((mwres, BDO.copyright, None)):
- res["source_metadata"]["copyright_status"] = str(cs)
- if "copyright_status" not in res["source_metadata"]:
- res["source_metadata"][
- "copyright_status"
- ] = "http://purl.bdrc.io/resource/CopyrightPublicDomain"
- res["source_metadata"]["reproduction_of"] = str(mwres)
- for _, _, l in g.triples((mwres, SKOS.prefLabel, None)):
- if l.language == "bo-x-ewts":
- res["source_metadata"]["title"] = CONVERTER.toUnicode(l.value)
- break
- else:
- res["source_metadata"]["title"] = l.value
- res["source_metadata"]["languages"] = set()
- for _, _, wa in g.triples((mwres, BDO.instanceOf, None)):
- for _, _, l in g.triples((wa, BDO.language, None)):
- for _, _, lt in g.triples((l, BDO.langBCP47Lang, None)):
- res["source_metadata"]["languages"].add(lt.value)
- for _, _, aac in g.triples((wa, BDO.creator, None)):
- if (aac, BDO.role, BDR.R0ER0009) or (aac, BDO.role, BDR.R0ER0009) in g:
- for _, _, p in g.triples((aac, BDO.agent, None)):
- for _, _, l in g.triples((p, SKOS.prefLabel, None)):
- if l.language == "bo-x-ewts":
- res["source_metadata"]["author"] = CONVERTER.toUnicode(
- l.value
- )
- break
- else:
- res["source_metadata"]["author"] = l.value
- res["source_metadata"]["languages"] = list(res["source_metadata"]["languages"])
- for _, _, ig in g.triples((wres, BDO.instanceHasVolume, None)):
- if (
- g.value(ig, BDO.volumeNumber) is None
- or g.value(ig, BDO.volumePagesTotal) is None
- ):
- continue
- iglname = str(ig)[str(ig).rfind("/") + 1 :]
- res["image_groups"][iglname] = {}
- iginfo = res["image_groups"][iglname]
- iginfo["id"] = str(ig)
- iginfo["total_pages"] = int(g.value(ig, BDO.volumePagesTotal))
- iginfo["volume_number"] = int(g.value(ig, BDO.volumeNumber))
- iginfo["volume_pages_bdrc_intro"] = int(
- g.value(ig, BDO.volumePagesTbrcIntro)
- )
- for _, _, l in g.triples((ig, SKOS.prefLabel, None)):
- if l.language == "bo-x-ewts":
- iginfo["title"] = CONVERTER.toUnicode(l.value)
- break
- else:
- iginfo["title"] = l.value
- finally:
- return res
-
-
-def get_buda_scan_info(wlname):
- headers = {"Accept": "text/turtle"}
- params = {"R_RES": "bdr:" + wlname}
- res = None
- g = rdflib.Graph()
- try:
- req = requests.get(
- LDSPDIBASEURL + "query/graph/OP_info",
- headers=headers,
- params=params,
- )
- g.parse(data=req.text, format="ttl")
- res = _res_from_model(g, wlname)
- except Exception as e:
- logging.error("get_buda_scan_info failed for " + wlname + ": " + str(e))
- finally:
- return res
-
-
-def image_group_to_folder_name(scan_id, image_group_id):
- image_group_folder_part = image_group_id
- pre, rest = image_group_id[0], image_group_id[1:]
- if pre == "I" and rest.isdigit() and len(rest) == 4:
- image_group_folder_part = rest
- return scan_id + "-" + image_group_folder_part
diff --git a/src/openpecha/pecha/__init__.py b/src/openpecha/pecha/__init__.py
index 834ccadc..16acde74 100644
--- a/src/openpecha/pecha/__init__.py
+++ b/src/openpecha/pecha/__init__.py
@@ -14,7 +14,6 @@
)
from openpecha.pecha.annotations import BaseAnnotation
from openpecha.pecha.layer import AnnotationType
-from openpecha.pecha.metadata import PechaMetaData
from openpecha.config import PECHAS_PATH
BASE_NAME = str
@@ -25,9 +24,8 @@ class Pecha:
def __init__(self, pecha_id: str, pecha_path: Path) -> None:
self.id = pecha_id
self.pecha_path = pecha_path
- self.metadata = self.load_metadata()
self.bases = self.load_bases()
- # self.annotations = self.load_annotations()
+ self.annotations = []
@classmethod
def from_path(cls, pecha_path: Path) -> "Pecha":
@@ -65,21 +63,28 @@ def create(cls, output_path: Optional[Path] = None, pecha_id: Optional[str] = No
return cls(pecha_id, pecha_path)
@classmethod
- def create_pecha(cls, pecha_id: str, base_text: str, annotation_id: str, annotation: List[BaseAnnotation]) -> "Pecha":
+ def create_pecha(cls, pecha_id: str, base_text: str, annotation_id: str, annotation: List[BaseAnnotation], annotation_type: AnnotationType) -> "Pecha":
pecha = cls.create(pecha_id=pecha_id)
base_name = pecha.set_base(base_text)
- ann_type = get_annotation_type(annotation)
- ann_store, _ = pecha.add_layer(base_name=base_name, layer_type=ann_type, annotation_id=annotation_id)
-
+ ann_store, _ = pecha.add_layer(base_name=base_name, layer_type=annotation_type, annotation_id=annotation_id)
for single_annotation in annotation:
- ann_store = pecha.add_annotation(ann_store=ann_store, annotation=single_annotation, layer_type=ann_type)
+ ann_store = pecha.add_annotation(ann_store=ann_store, annotation=single_annotation, layer_type=annotation_type)
ann_store.save()
+ annotations = get_anns(ann_store, include_span=True)
+ for annotation in annotations:
+ pecha.annotations.append({
+ "span": {
+ "start": annotation["span"]["start"],
+ "end": annotation["span"]["end"],
+ },
+ "id": annotation["id"]
+ })
return pecha
- def add(self, annotation_id: str, annotation: List[BaseAnnotation]) -> "Pecha":
+ def add(self, annotation_id: str, annotation: List[BaseAnnotation], annotation_type: AnnotationType) -> "Pecha":
base_name = next(iter(self.bases))
- ann_type = get_annotation_type(annotation)
+ ann_type = annotation_type
if check_annotation_exists(self.layer_path/base_name/f"{ann_type.value}-{annotation_id}.json"):
raise ValueError(f"Annotation with id {annotation_id} already exists")
ann_store, _ = self.add_layer(base_name=base_name, layer_type=ann_type, annotation_id=annotation_id)
@@ -102,20 +107,6 @@ def layer_path(self):
layer_path.mkdir(parents=True, exist_ok=True)
return layer_path
- @property
- def metadata_path(self):
- return self.pecha_path / "metadata.json"
-
-
- def load_metadata(self):
- if not self.metadata_path.exists():
- return None
-
- with open(self.metadata_path) as f:
- metadata = json.load(f)
-
- return PechaMetaData(**metadata)
-
def load_bases(self):
bases = {}
for base_file in self.base_path.rglob("*.txt"):
@@ -189,7 +180,6 @@ def add_annotation(
# Add Annotation Group Type
ann_group_type = layer_type.annotation_group_type
ann_data[ann_group_type.value] = layer_type.value
-
start, end = (
annotation.span.start,
annotation.span.end,
@@ -219,32 +209,9 @@ def add_annotation(
raise StamAddAnnotationError(
f"[Error] Failed to add annotation to STAM: {e}"
)
+
return ann_store
- def set_metadata(self, pecha_metadata: Dict):
- # Retrieve parser name
- parser_name = self.metadata.parser if self.metadata else None
- if "parser" not in pecha_metadata:
- pecha_metadata["parser"] = parser_name
-
- # Retrieve initial creation type name
- initial_creation_type = (
- self.metadata.initial_creation_type if self.metadata else None
- )
- if "initial_creation_type" not in pecha_metadata:
- pecha_metadata["initial_creation_type"] = initial_creation_type
-
- try:
- pecha_metadata = PechaMetaData(**pecha_metadata)
- except Exception as e:
- raise ValueError(f"Invalid metadata: {e}")
-
- self.metadata = pecha_metadata
- with open(self.metadata_path, "w") as f:
- json.dump(self.metadata.to_dict(), f, ensure_ascii=False, indent=2)
-
- return self.metadata
-
def get_segmentation_layer_path(self) -> str:
"""
1. Get the first layer file from the pecha
@@ -256,11 +223,6 @@ def get_segmentation_layer_path(self) -> str:
return relative_layer_path
- def get_first_layer_path(self) -> str:
- layer_path = list(self.layer_path.rglob("*.json"))[0]
- relative_layer_path = layer_path.relative_to(self.pecha_path.parent).as_posix()
-
- return relative_layer_path
def get_layer_by_ann_type(self, base_name: str, layer_type: AnnotationType):
"""
@@ -296,7 +258,10 @@ def get_anns(ann_store: AnnotationStore, include_span: bool = False):
for ann in ann_store:
ann_data = {}
for data in ann:
- ann_data[data.key().id()] = data.value().get()
+ k = data.key().id()
+ if k in ["index"]:
+ continue
+ ann_data[k] = data.value().get()
curr_ann = {**ann_data, "text": str(ann)}
if include_span:
curr_ann["span"] = {
@@ -310,15 +275,6 @@ def get_anns(ann_store: AnnotationStore, include_span: bool = False):
def load_layer(path: Path) -> AnnotationStore:
return AnnotationStore(file=str(path))
-
-def get_annotation_type(annotation: List[BaseAnnotation]):
- if hasattr(annotation[0], "alignment_index") and hasattr(annotation[0], "index"):
- return AnnotationType.ALIGNMENT
- elif hasattr(annotation[0], "index") and not hasattr(annotation[0], "alignment_index"):
- return AnnotationType.SEGMENTATION
- else:
- raise ValueError("Invalid annotation type")
-
def check_annotation_exists(annotation_path: Path):
if annotation_path.exists():
return True
diff --git a/src/openpecha/pecha/annotations.py b/src/openpecha/pecha/annotations.py
index 44f48689..0d37c6b4 100644
--- a/src/openpecha/pecha/annotations.py
+++ b/src/openpecha/pecha/annotations.py
@@ -40,25 +40,26 @@ def end_must_not_be_less_than_start(self) -> "span":
class BaseAnnotation(BaseModel):
span: span
- metadata: Optional[Dict] = None
model_config = ConfigDict(extra="allow")
def get_dict(self):
res = self.model_dump()
# Remove span from the dictionary
- res.pop("span")
+ to_remove_keys = ["span"]
+ for key in to_remove_keys:
+ res.pop(key)
# Remove None values from the dictionary
res = {k: v for k, v in res.items() if v is not None}
return res
class SegmentationAnnotation(BaseAnnotation):
- index: int
+ id: str = Field(..., description="Annotation ID")
class AlignmentAnnotation(BaseAnnotation):
- index: int
+ id: str = Field(..., description="Annotation ID")
alignment_index: list[int] = Field(
description="Index of the alignment, which can be of translation or commentary"
)
diff --git a/src/openpecha/pecha/metadata.py b/src/openpecha/pecha/metadata.py
deleted file mode 100644
index 9dc4a40f..00000000
--- a/src/openpecha/pecha/metadata.py
+++ /dev/null
@@ -1,263 +0,0 @@
-from datetime import datetime
-from enum import Enum
-from typing import Dict, List, Optional
-
-from pydantic import BaseModel, ConfigDict, field_serializer, model_validator
-
-from openpecha.ids import get_initial_pecha_id
-
-
-class InitialCreationType(Enum):
- ocr = "ocr"
- ebook = "ebook"
- input = "input"
- tmx = "tmx"
- json = "json"
- google_docx = "google_docx"
-
-
-class Language(Enum):
- tibetan = "bo"
- english = "en"
- literal_chinese = "lzh"
- chinese = "zh"
- sanskrit = "sa"
- italian = "it"
- russian = "ru"
- hindi = "hi"
-
-
-class CopyrightStatus(Enum):
- UNKNOWN = "Unknown"
- COPYRIGHTED = "In copyright"
- PUBLIC_DOMAIN = "Public domain"
-
-
-class Copyright(BaseModel):
- status: CopyrightStatus = CopyrightStatus.UNKNOWN # noqa
- notice: Optional[str] = ""
- info_url: Optional[str] = None
-
- model_config = ConfigDict(extra="forbid")
-
-
-Copyright_copyrighted = Copyright(
- status=CopyrightStatus.COPYRIGHTED,
- notice="In copyright by the original author or editor",
- info_url="http://rightsstatements.org/vocab/InC/1.0/",
-)
-
-Copyright_unknown = Copyright(
- status=CopyrightStatus.UNKNOWN,
- notice="Copyright Undertermined",
- info_url="http://rightsstatements.org/vocab/UND/1.0/",
-)
-
-Copyright_public_domain = Copyright(
- status=CopyrightStatus.PUBLIC_DOMAIN,
- notice="Public domain",
- info_url="https://creativecommons.org/publicdomain/mark/1.0/",
-)
-
-
-class LicenseType(Enum):
- # based on https://creativecommons.org/licenses/
-
- CC0 = "CC0"
- PUBLIC_DOMAIN_MARK = "Public Domain Mark"
- CC_BY = "CC BY"
- CC_BY_SA = "CC BY-SA"
- CC_BY_ND = "CC BY-ND"
- CC_BY_NC = "CC BY-NC"
- CC_BY_NC_SA = "CC BY-NC-SA"
- CC_BY_NC_ND = "CC BY-NC-ND"
-
- UNDER_COPYRIGHT = "under copyright"
- UNKNOWN = "Unknown"
-
-
-class PechaMetaData(BaseModel):
- id: str
- title: Optional[Dict[str, str] | str] = None
- author: Optional[List[str] | Dict[str, str] | str] = None
- imported: Optional[datetime] = None
- source: Optional[str] = None
- toolkit_version: str
- parser: str
- initial_creation_type: InitialCreationType
- language: Optional[Language] = None
- source_metadata: Dict = {}
- bases: Dict = {}
- copyright: Copyright = Copyright()
- licence: LicenseType = LicenseType.UNKNOWN
-
- model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True)
- # Optional fields from both classes
- legacy_id: Optional[str] = None
- source_file: Optional[str] = None
-
- # Metadata fields
- ocr_import_info: Optional[Dict] = None
- statistics: Optional[Dict] = None
- quality: Optional[Dict] = None
-
- # Time tracking
- last_modified: Optional[datetime] = None
-
- # Validators from both classes
- @model_validator(mode="before")
- def set_id(cls, values):
- if "id" not in values or values["id"] is None:
- values["id"] = get_initial_pecha_id()
- return values
-
- # @model_validator(mode="before")
- # def validate_parser(cls, values):
- # if "parser" in values and values["parser"]:
- # parser_classes = cls.get_toolkit_parsers()
- # if values["parser"] not in [name for name, _ in parser_classes]:
- # raise ValueError(
- # f"Parser {values['parser']} not in the Toolkit parsers."
- # )
- # return values
-
- @model_validator(mode="before")
- def set_toolkit_version(cls, values):
- if "toolkit_version" not in values or values["toolkit_version"] is None:
- try:
- from importlib.metadata import PackageNotFoundError, version
-
- toolkit_version = version("openpecha")
- values["toolkit_version"] = toolkit_version
- except PackageNotFoundError as e:
- raise RuntimeError("Package 'openpecha' not found.") from e
- except Exception as e:
- raise RuntimeError(f"Error fetching toolkit version: {str(e)}") from e
- return values
-
- @model_validator(mode="before")
- def set_imported(cls, values):
- if "imported" not in values or values["imported"] is None:
- values["imported"] = datetime.now()
- return values
-
- @model_validator(mode="before")
- def set_last_modified(cls, values):
- if "last_modified" not in values or values["last_modified"] is None:
- values["last_modified"] = datetime.now()
- return values
-
- @model_validator(mode="before")
- def set_copyright_info(cls, values):
- if "copyright" not in values or values["copyright"] is None:
- values["copyright"] = Copyright()
- return values
-
- # Serializers for complex types
- @field_serializer("imported", mode="plain")
- def serialize_imported(self, value: Optional[datetime]) -> Optional[str]:
- return value.isoformat() if value else None
-
- @field_serializer("last_modified", mode="plain")
- def serialize_last_modified(self, value: Optional[datetime]) -> Optional[str]:
- return value.isoformat() if value else None
-
- @field_serializer("licence", mode="plain")
- def serialize_licence(self, value: Optional[LicenseType]) -> Optional[str]:
- return value.value if value else None
-
- @field_serializer("language", mode="plain")
- def serialize_language(self, value: Optional[Language]) -> Optional[str]:
- return value.value if value else None
-
- @field_serializer("initial_creation_type", mode="plain")
- def serialize_initial_creation_type(
- self, value: Optional[InitialCreationType]
- ) -> Optional[str]:
- return value.value if value else None
-
- @field_serializer("copyright", mode="plain")
- def serialize_copyright(self, value: Optional[Copyright]) -> Optional[Dict]:
- if not value:
- return None
- return {
- "status": value.status.value,
- "notice": value.notice,
- "info_url": value.info_url,
- }
-
- def update_last_modified_date(self):
- self.last_modified = datetime.now()
-
- # @classmethod
- # def get_toolkit_parsers(cls):
- # # List to store all classes from the package
- # all_classes = []
- # import sys
-
- # base_path = Path(__file__).parent / "parsers"
- # pecha_parser_path = "openpecha.pecha.parsers"
-
- # for py_file in base_path.rglob("*.py"):
- # path_parts = list(py_file.parts)
- # path_parts[-1] = path_parts[-1].replace(".py", "")
- # if path_parts[-1] == "__init__":
- # path_parts.pop()
-
- # if path_parts[0] == "/":
- # path_parts.pop(0)
-
- # start_index = path_parts.index(pecha_parser_path.split(".")[0])
- # parser_path = ".".join(path_parts[start_index:])
- # importlib.import_module(parser_path)
- # classes = inspect.getmembers(sys.modules[parser_path], inspect.isclass)
- # all_classes.extend(classes)
-
- # parsers = importlib.import_module("openpecha.pecha.parsers")
- # parser_classes = [
- # (name, class_)
- # for name, class_ in all_classes
- # if issubclass(class_, parsers.BaseParser)
- # or issubclass(class_, parsers.OCRBaseParser)
- # and class_ is not parsers.BaseParser
- # and class_ is not parsers.OCRBaseParser
- # ]
- # return parser_classes
-
- def to_dict(self):
- """
- Prepare PechaMetaData attribute to be JSON serializable
- """
- data = self.model_dump()
-
- # Dynamically get standard fields from the model
- standard_fields = list(set(type(self).model_fields.keys()))
-
- # Move any extra fields to source_metadata
- extra_fields = {}
- for k, v in data.items():
- if k not in standard_fields:
- if isinstance(v, Enum):
- extra_fields[k] = v.value
- elif isinstance(v, datetime):
- extra_fields[k] = v.isoformat()
- else:
- extra_fields[k] = v
-
- if "source_metadata" not in data:
- data["source_metadata"] = {}
- data["source_metadata"].update(extra_fields)
-
- # Remove extra fields from the top-level data
- for field in extra_fields:
- del data[field]
-
- return data
-
-
-class InitialPechaMetadata(PechaMetaData):
- @model_validator(mode="before")
- def set_id(cls, values):
- if "id" not in values or values["id"] is None:
- values["id"] = get_initial_pecha_id()
- return values
diff --git a/src/openpecha/pecha/parsers/edition.py b/src/openpecha/pecha/parsers/edition.py
index 27a028a1..7bb996e5 100644
--- a/src/openpecha/pecha/parsers/edition.py
+++ b/src/openpecha/pecha/parsers/edition.py
@@ -16,6 +16,7 @@
from openpecha.pecha.layer import AnnotationType
from openpecha.pecha.parsers import update_coords
from openpecha.pecha.serializers.json import JsonSerializer
+from openpecha.ids import get_annotation_id
logger = get_logger(__name__)
@@ -44,8 +45,8 @@ def parse_segmentation(self, segments: list[str]) -> list[SegmentationAnnotation
for index, segment in enumerate(segments, start=1):
anns.append(
SegmentationAnnotation(
+ id=str(index),
span=span(start=char_count, end=char_count + len(segment)),
- index=index,
)
)
char_count += len(segment) + 1
diff --git a/src/openpecha/utils.py b/src/openpecha/utils.py
index 750b8a49..b51baca0 100644
--- a/src/openpecha/utils.py
+++ b/src/openpecha/utils.py
@@ -27,7 +27,7 @@ def read_csv(file_path) -> List[List[str]]:
with open(file_path, newline="", encoding="utf-8") as file:
reader = csv.reader(file)
rows = list(reader)
- return rows
+# return rows
def write_csv(file_path, data) -> None:
@@ -58,5 +58,5 @@ def write_json(
def convert_to_base_annotation(raw_annotation):
span_data = raw_annotation["span"]
annotation_span = span(start=span_data["start"], end=span_data["end"])
- annotation_data = {k: v for k, v in raw_annotation.items() if k != "span"}
+ annotation_data = {k: v for k, v in raw_annotation.items() if k != "span" and k != "index"}
return BaseAnnotation(span=annotation_span, **annotation_data)
\ No newline at end of file
diff --git a/tests/buda/data/OP_info-W12827.ttl b/tests/buda/data/OP_info-W12827.ttl
deleted file mode 100644
index 7440d773..00000000
--- a/tests/buda/data/OP_info-W12827.ttl
+++ /dev/null
@@ -1,191 +0,0 @@
-@prefix : .
-@prefix aut: .
-@prefix bdan: .
-@prefix bd: .
-@prefix bf: .
-@prefix owl: .
-@prefix tbr: .
-@prefix bdou: .
-@prefix rsh: .
-@prefix xsd: .
-@prefix admin: .
-@prefix skos: .
-@prefix rdfs: .
-@prefix bdac: .
-@prefix wd: .
-@prefix dr: .
-@prefix oa: .
-@prefix dila: .
-@prefix sh: .
-@prefix tmp: .
-@prefix dcterms: .
-@prefix text: .
-@prefix bda: .
-@prefix foaf: .
-@prefix bdd: .
-@prefix ad: .
-@prefix bdg: .
-@prefix f: .
-@prefix vcard: .
-@prefix adm: .
-@prefix bdo: .
-@prefix iiif2: .
-@prefix iiif3: .
-@prefix adr: .
-@prefix viaf: .
-@prefix bds: .
-@prefix eftr: .
-@prefix bdr: .
-@prefix bdu: .
-@prefix as: .
-@prefix rdf: .
-@prefix tm: .
-@prefix ldp: .
-
-bdr:I2062 a bdo:ImageGroup ;
- bdo:volumeNumber 2 ;
- bdo:volumeOf bdr:W12827 ;
- bdo:volumePagesTbrcIntro 0 ;
- skos:prefLabel "volume 1"@en ;
- bdo:volumePagesTotal 493 .
-
-bdr:I2068 a bdo:ImageGroup ;
- bdo:volumeNumber 8 ;
- bdo:volumeOf bdr:W12827 ;
- bdo:volumePagesTbrcIntro 0 ;
- skos:prefLabel "pod 1"@bo-x-ewts ;
- bdo:volumePagesTotal 535 .
-
-bdr:I2071 a bdo:ImageGroup ;
- bdo:volumeNumber 11 ;
- bdo:volumeOf bdr:W12827 ;
- bdo:volumePagesTbrcIntro 0 ;
- bdo:volumePagesTotal 525 .
-
-bdr:LangBo a bdo:Language , owl:Class ;
- rdfs:seeAlso , ;
- rdfs:subClassOf bdo:Language ;
- bdo:langBCP47Lang "bo" ;
- bdo:langMARCCode "tib" ;
- skos:prefLabel "藏文"@zh-hans , "bod yig"@bo-x-ewts , "Tibetan"@en .
-
-bdr:I2061 a bdo:ImageGroup ;
- bdo:volumeNumber 1 ;
- bdo:volumeOf bdr:W12827 ;
- bdo:volumePagesTbrcIntro 0 ;
- bdo:volumePagesTotal 459 .
-
-bdr:I2067 a bdo:ImageGroup ;
- bdo:volumeNumber 7 ;
- bdo:volumeOf bdr:W12827 ;
- bdo:volumePagesTbrcIntro 0 ;
- bdo:volumePagesTotal 645 .
-
-bdr:I2070 a bdo:ImageGroup ;
- bdo:volumeNumber 10 ;
- bdo:volumeOf bdr:W12827 ;
- bdo:volumePagesTbrcIntro 0 ;
- bdo:volumePagesTotal 547 .
-
-bdr:I2066 a bdo:ImageGroup ;
- bdo:volumeNumber 6 ;
- bdo:volumeOf bdr:W12827 ;
- bdo:volumePagesTbrcIntro 0 ;
- bdo:volumePagesTotal 729 .
-
-bdr:W12827 a bdo:DigitalInstance , bdo:Instance , bdo:ImageInstance ;
- bdo:inCollection bdr:PRHD01 , bdr:PR01DOR0 , bdr:PR01JW33478 , bdr:PR1PL480 ;
- bdo:instanceHasVolume bdr:I2072 , bdr:I2071 , bdr:I2066 , bdr:I2073 , bdr:I2062 , bdr:I2068 , bdr:I2067 , bdr:I2064 , bdr:I2061 , bdr:I2063 , bdr:I2070 , bdr:I2069 , bdr:I2065 ;
- bdo:instanceOf bdr:WA12827 ;
- bdo:instanceReproductionOf bdr:MW12827 ;
- bdo:isRoot true ;
- bdo:numberOfVolumes 13 ;
- tmp:thumbnailIIIFService .
-
-bda:W12827 a adm:AdminData ;
- adm:access bda:AccessOpen ;
- adm:adminAbout bdr:W12827 ;
- adm:contentLegal bda:LD_BDRC_PD ;
- adm:facetIndex 66 ;
- adm:gitPath "d3/W12827.trig" ;
- adm:gitRepo bda:GR0014 ;
- adm:gitRevision "e1b570dbf911313d270802fce1bb694a51d8a2e1" ;
- adm:graphId bdg:W12827 ;
- adm:metadataLegal bda:LD_BDRC_CC0 ;
- adm:restrictedInChina true ;
- adm:status bda:StatusReleased ;
- bdo:isRoot true .
-
-bdr:I2065 a bdo:ImageGroup ;
- bdo:volumeNumber 5 ;
- bdo:volumeOf bdr:W12827 ;
- bdo:volumePagesTbrcIntro 0 ;
- bdo:volumePagesTotal 617 .
-
-bdr:WA12827 a bdo:Work ;
- bdo:catalogInfo "The Nyingtik Yabzhi of Longchen Rabjam Drime Ozer (1308-1364). Collection of profound Dzogchen teachings. Consists of the Kandro Nyingtik, Kandro Yangtik, Bima Nyingtik, Lama Yangtik, and Zabmo Yangtik. Scanned with the generous support of Master Tam Shek-Wing of the Vajrayana Buddhist Association."@en ;
- bdo:creator bdr:CR55948F3FDC9CACD3 ;
- bdo:isRoot true ;
- bdo:language bdr:LangBo ;
- bdo:workHasInstance bdr:W1KG12048 , bdr:MW2PD19078 , bdr:MW1KG12048 , bdr:MW12827 , bdr:W3CN3025 , bdr:MW1KG9720 , bdr:W2PD19078 , bdr:MW4PD2049 , bdr:W4PD2043 , bdr:MW4PD2043 , bdr:W12827 , bdr:W4PD2049 , bdr:MW3CN3025 , bdr:W1KG9720 ;
- bdo:workIsAbout bdr:T354 , bdr:WA3JT13386 ;
- tmp:entityScore 28 ;
- skos:prefLabel "snying thig ya bzhi/"@bo-x-ewts .
-
-bdr:I2064 a bdo:ImageGroup ;
- bdo:volumeNumber 4 ;
- bdo:volumeOf bdr:W12827 ;
- bdo:volumePagesTbrcIntro 0 ;
- bdo:volumePagesTotal 597 .
-
-bdr:I2073 a bdo:ImageGroup ;
- bdo:volumeNumber 13 ;
- bdo:volumeOf bdr:W12827 ;
- bdo:volumePagesTbrcIntro 0 ;
- bdo:volumePagesTotal 571 .
-
-bdr:P1583 skos:prefLabel "klong chen rab 'byams pa dri med 'od zer/"@bo-x-ewts , "隆钦热降巴·赤墨俄色"@zh-hans .
-
-bdr:I2063 a bdo:ImageGroup ;
- bdo:volumeNumber 3 ;
- bdo:volumeOf bdr:W12827 ;
- bdo:volumePagesTbrcIntro 0 ;
- bdo:volumePagesTotal 451 .
-
-bdr:I2069 a bdo:ImageGroup ;
- bdo:volumeNumber 9 ;
- bdo:volumeOf bdr:W12827 ;
- bdo:volumePagesTbrcIntro 0 ;
- bdo:volumePagesTotal 559 .
-
-bdr:CR55948F3FDC9CACD3
- a bdo:AgentAsCreator ;
- bdo:agent bdr:P1583 ;
- bdo:role bdr:R0ER0019 .
-
-bdr:MW12827 a bdo:Instance ;
- bf:identifiedBy bdr:ID9E0972E23D10FF0F , bdr:ID4CF611CFA57E16D8 , bdr:IDEC1B3ED2EB801B0F ;
- bdo:authorshipStatement "arranged and structured by klon-chen rab-'byams-pa"@en ;
- bdo:biblioNote "reproduced from a set of prints from a-'dzom chos-sgar blocksv. 7-13. published by talung tsetrul pema wangyal, darjeeling, w.b."@en ;
- bdo:extentStatement "13 v." ;
- bdo:hasPart bdr:MW12827_BB8776 , bdr:MW12827_C93AD5 , bdr:MW12827_CBDB0C , bdr:MW12827_8E3796 , bdr:MW12827_58921B ;
- bdo:hasSourcePrintery bdr:G3JT12503 ;
- bdo:hasTitle bdr:TT7FFDCDE93527101E ;
- bdo:instanceEvent bdr:EVF192DCB4E6693489 ;
- bdo:instanceHasReproduction bdr:W12827 ;
- bdo:instanceOf bdr:WA12827 ;
- bdo:isRoot true ;
- bdo:note bdr:NT7E4CD6992DD16DE1 ;
- bdo:numberOfVolumes 13 ;
- bdo:printMethod bdr:PrintMethod_Relief_WoodBlock ;
- bdo:publisherLocation "delhi"@en ;
- bdo:publisherName "sherab gyaltsen lama"@en ;
- bdo:script bdr:ScriptTibt ;
- tmp:thumbnailIIIFService ;
- skos:prefLabel "snying thig ya bzhi/"@bo-x-ewts .
-
-bdr:I2072 a bdo:ImageGroup ;
- bdo:volumeNumber 12 ;
- bdo:volumeOf bdr:W12827 ;
- bdo:volumePagesTbrcIntro 0 ;
- bdo:volumePagesTotal 489 .
diff --git a/tests/buda/data/expected-W12827.json b/tests/buda/data/expected-W12827.json
deleted file mode 100644
index 83d1ec25..00000000
--- a/tests/buda/data/expected-W12827.json
+++ /dev/null
@@ -1,95 +0,0 @@
-{
- "source_metadata":{
- "id":"http://purl.bdrc.io/resource/W12827",
- "status":"http://purl.bdrc.io/admindata/StatusReleased",
- "access":"http://purl.bdrc.io/admindata/AccessOpen",
- "reproduction_of":"http://purl.bdrc.io/resource/MW12827",
- "copyright_status":"http://purl.bdrc.io/resource/CopyrightPublicDomain",
- "languages": ["bo"],
- "title":"སྙིང་ཐིག་ཡ་བཞི།",
- "author":"ཀློང་ཆེན་རབ་འབྱམས་པ་དྲི་མེད་འོད་ཟེར།",
- "geo_restriction": ["CN"]
- },
- "image_groups":{
- "I2072":{
- "id":"http://purl.bdrc.io/resource/I2072",
- "total_pages":489,
- "volume_number":12,
- "volume_pages_bdrc_intro":0
- },
- "I2071":{
- "id":"http://purl.bdrc.io/resource/I2071",
- "total_pages":525,
- "volume_number":11,
- "volume_pages_bdrc_intro":0
- },
- "I2066":{
- "id":"http://purl.bdrc.io/resource/I2066",
- "total_pages":729,
- "volume_number":6,
- "volume_pages_bdrc_intro":0
- },
- "I2073":{
- "id":"http://purl.bdrc.io/resource/I2073",
- "total_pages":571,
- "volume_number":13,
- "volume_pages_bdrc_intro":0
- },
- "I2062":{
- "id":"http://purl.bdrc.io/resource/I2062",
- "total_pages":493,
- "volume_number":2,
- "volume_pages_bdrc_intro":0,
- "title":"volume 1"
- },
- "I2068":{
- "id":"http://purl.bdrc.io/resource/I2068",
- "total_pages":535,
- "volume_number":8,
- "volume_pages_bdrc_intro":0,
- "title":"པོད ༡"
- },
- "I2067":{
- "id":"http://purl.bdrc.io/resource/I2067",
- "total_pages":645,
- "volume_number":7,
- "volume_pages_bdrc_intro":0
- },
- "I2064":{
- "id":"http://purl.bdrc.io/resource/I2064",
- "total_pages":597,
- "volume_number":4,
- "volume_pages_bdrc_intro":0
- },
- "I2061":{
- "id":"http://purl.bdrc.io/resource/I2061",
- "total_pages":459,
- "volume_number":1,
- "volume_pages_bdrc_intro":0
- },
- "I2063":{
- "id":"http://purl.bdrc.io/resource/I2063",
- "total_pages":451,
- "volume_number":3,
- "volume_pages_bdrc_intro":0
- },
- "I2070":{
- "id":"http://purl.bdrc.io/resource/I2070",
- "total_pages":547,
- "volume_number":10,
- "volume_pages_bdrc_intro":0
- },
- "I2069":{
- "id":"http://purl.bdrc.io/resource/I2069",
- "total_pages":559,
- "volume_number":9,
- "volume_pages_bdrc_intro":0
- },
- "I2065":{
- "id":"http://purl.bdrc.io/resource/I2065",
- "total_pages":617,
- "volume_number":5,
- "volume_pages_bdrc_intro":0
- }
- }
-}
\ No newline at end of file
diff --git a/tests/buda/test_buda_api.py b/tests/buda/test_buda_api.py
deleted file mode 100644
index f147bafd..00000000
--- a/tests/buda/test_buda_api.py
+++ /dev/null
@@ -1,16 +0,0 @@
-import json
-from pathlib import Path
-
-import rdflib
-
-from openpecha.buda.api import _res_from_model
-
-
-def test_buda_info_from_model():
- ttl_path = Path(__file__).parent / "data" / "OP_info-W12827.ttl"
- g = rdflib.Graph().parse(str(ttl_path), format="ttl")
- res = _res_from_model(g, "W12827")
- expected_path = Path(__file__).parent / "data" / "expected-W12827.json"
- with open(expected_path) as expected_file:
- expected = json.load(expected_file)
- assert res == expected
diff --git a/tests/pecha/parser/edition/test_edition.py b/tests/pecha/parser/edition/test_edition.py
index 847152f3..b3386939 100644
--- a/tests/pecha/parser/edition/test_edition.py
+++ b/tests/pecha/parser/edition/test_edition.py
@@ -14,6 +14,7 @@
from openpecha.pecha.parsers.edition import EditionParser
from openpecha.pecha.serializers.json import JsonSerializer
from openpecha.utils import read_json
+from openpecha.pecha.annotations import VersionVariantOperations
class TestEditionParser(TestCase):
@@ -36,16 +37,16 @@ def test_segmentation_parse(self):
anns = parser.parse_segmentation(segments)
expected_anns = [
- SegmentationAnnotation(span=span(start=0, end=87), index=1),
- SegmentationAnnotation(span=span(start=88, end=207), index=2),
- SegmentationAnnotation(span=span(start=208, end=283), index=3),
- SegmentationAnnotation(span=span(start=284, end=361), index=4),
- SegmentationAnnotation(span=span(start=362, end=508), index=5),
- SegmentationAnnotation(span=span(start=509, end=844), index=6),
- SegmentationAnnotation(span=span(start=845, end=1129), index=7),
- SegmentationAnnotation(span=span(start=1130, end=1217), index=8),
- SegmentationAnnotation(span=span(start=1218, end=1409), index=9),
- SegmentationAnnotation(span=span(start=1410, end=1605), index=10),
+ SegmentationAnnotation(span=span(start=0, end=87), id="1"),
+ SegmentationAnnotation(span=span(start=88, end=207), id="2"),
+ SegmentationAnnotation(span=span(start=208, end=283), id="3"),
+ SegmentationAnnotation(span=span(start=284, end=361), id="4"),
+ SegmentationAnnotation(span=span(start=362, end=508), id="5"),
+ SegmentationAnnotation(span=span(start=509, end=844), id="6"),
+ SegmentationAnnotation(span=span(start=845, end=1129), id="7"),
+ SegmentationAnnotation(span=span(start=1130, end=1217), id="8"),
+ SegmentationAnnotation(span=span(start=1218, end=1409), id="9"),
+ SegmentationAnnotation(span=span(start=1410, end=1605), id="10"),
]
assert anns == expected_anns
@@ -59,34 +60,34 @@ def test_segmentation_parse(self):
updated_anns = update_coords(anns, old_base, new_base)
expected_updated_anns = [
SegmentationAnnotation(
- span=span(start=0, end=87, errors=None), metadata=None, index=1
+ span=span(start=0, end=87, errors=None), id="1"
),
SegmentationAnnotation(
- span=span(start=88, end=208, errors=None), metadata=None, index=2
+ span=span(start=88, end=208, errors=None), id="2"
),
SegmentationAnnotation(
- span=span(start=209, end=284, errors=None), metadata=None, index=3
+ span=span(start=209, end=284, errors=None), id="3"
),
SegmentationAnnotation(
- span=span(start=285, end=363, errors=None), metadata=None, index=4
+ span=span(start=285, end=363, errors=None), id="4"
),
SegmentationAnnotation(
- span=span(start=364, end=511, errors=None), metadata=None, index=5
+ span=span(start=364, end=511, errors=None), id="5"
),
SegmentationAnnotation(
- span=span(start=512, end=843, errors=None), metadata=None, index=6
+ span=span(start=512, end=843, errors=None), id="6"
),
SegmentationAnnotation(
- span=span(start=843, end=1089, errors=None), metadata=None, index=7
+ span=span(start=843, end=1089, errors=None), id="7"
),
SegmentationAnnotation(
- span=span(start=1090, end=1221, errors=None), metadata=None, index=8
+ span=span(start=1090, end=1221, errors=None), id="8"
),
SegmentationAnnotation(
- span=span(start=1222, end=1411, errors=None), metadata=None, index=9
+ span=span(start=1222, end=1411, errors=None), id="9"
),
SegmentationAnnotation(
- span=span(start=1412, end=1626, errors=None), metadata=None, index=10
+ span=span(start=1412, end=1626, errors=None), id="10"
),
]
@@ -100,37 +101,37 @@ def test_version_parse(self):
new_base = "Hello World"
diffs = parser.parse_version(old_base, new_base)
assert diffs == [
- Version(span=span(start=5, end=5), operation="insertion", text=" World")
+ Version(span=span(start=5, end=5), operation=VersionVariantOperations.INSERTION, text=" World")
]
# Deletion
old_base = "Hello World"
new_base = "Hello"
diffs = parser.parse_version(old_base, new_base)
- assert diffs == [Version(span=span(start=5, end=11), operation="deletion")]
+ assert diffs == [Version(span=span(start=5, end=11), operation=VersionVariantOperations.DELETION)]
# Insertion in Between
old_base = "Hello World"
new_base = "Hello!! World"
diffs = parser.parse_version(old_base, new_base)
assert diffs == [
- Version(span=span(start=5, end=5), operation="insertion", text="!!")
+ Version(span=span(start=5, end=5), operation=VersionVariantOperations.INSERTION, text="!!")
]
# Deletion in Between
old_base = "Good morning, Everyone"
new_base = "Good Everyone"
diffs = parser.parse_version(old_base, new_base)
- assert diffs == [Version(span=span(start=4, end=13), operation="deletion")]
+ assert diffs == [Version(span=span(start=4, end=13), operation=VersionVariantOperations.DELETION)]
# Insertion and Deletion
old_base = "Good morning, Ladies and Gentlemen"
new_base = "Good Attractive Ladies and Gentlemen"
diffs = parser.parse_version(old_base, new_base)
assert diffs == [
- Version(span=span(start=5, end=13), operation="deletion"),
+ Version(span=span(start=5, end=13), operation=VersionVariantOperations.DELETION),
Version(
- span=span(start=13, end=13), operation="insertion", text="Attractive"
+ span=span(start=13, end=13), operation=VersionVariantOperations.INSERTION, text="Attractive"
),
]
@@ -141,95 +142,81 @@ def test_version_parse(self):
segments = self.txt_file.read_text(encoding="utf-8").splitlines()
new_base = "\n".join(segments)
diffs = parser.parse_version(old_base, new_base)
+
assert diffs == [
Version(
span=span(start=87, end=87, errors=None),
- metadata=None,
- operation="insertion",
+ operation=VersionVariantOperations.INSERTION,
text="\n",
),
Version(
span=span(start=282, end=282, errors=None),
- metadata=None,
- operation="insertion",
+ operation=VersionVariantOperations.INSERTION,
text="\n",
),
Version(
span=span(start=673, end=674, errors=None),
- metadata=None,
- operation="deletion",
+ operation=VersionVariantOperations.DELETION,
text="",
),
Version(
span=span(start=888, end=888, errors=None),
- metadata=None,
- operation="insertion",
+ operation=VersionVariantOperations.INSERTION,
text=" རྟག་ཏུ་ཚུལ་ཁྲིམས་ཡང་དག་བླངས་ནས་གནས་པར་འགྱུར།",
),
Version(
span=span(start=1034, end=1080, errors=None),
- metadata=None,
- operation="deletion",
+ operation=VersionVariantOperations.DELETION,
text="",
),
Version(
span=span(start=1080, end=1080, errors=None),
- metadata=None,
- operation="insertion",
+ operation=VersionVariantOperations.INSERTION,
text="འགྲོ་བ་དགྲོལ་བར་བྱ་ཕྱིར་ཡོངས་སུ་བསྔོ་བྱེད་ཅིང༌",
),
Version(
span=span(start=1083, end=1083, errors=None),
- metadata=None,
- operation="insertion",
+ operation=VersionVariantOperations.INSERTION,
text="\n",
),
Version(
span=span(start=1170, end=1213, errors=None),
- metadata=None,
- operation="deletion",
+ operation=VersionVariantOperations.DELETION,
text="",
),
Version(
span=span(start=1279, end=1279, errors=None),
- metadata=None,
- operation="insertion",
+ operation=VersionVariantOperations.INSERTION,
text="པར་",
),
Version(
span=span(start=1322, end=1323, errors=None),
- metadata=None,
- operation="deletion",
+ operation=VersionVariantOperations.DELETION,
text="",
),
Version(
span=span(start=1323, end=1323, errors=None),
- metadata=None,
- operation="insertion",
+ operation=VersionVariantOperations.INSERTION,
text="བ",
),
Version(
span=span(start=1441, end=1444, errors=None),
- metadata=None,
- operation="deletion",
+ operation=VersionVariantOperations.DELETION,
text="",
),
Version(
span=span(start=1497, end=1500, errors=None),
- metadata=None,
- operation="deletion",
+ operation=VersionVariantOperations.DELETION,
text="",
),
Version(
span=span(start=1573, end=1585, errors=None),
- metadata=None,
- operation="deletion",
+ operation=VersionVariantOperations.DELETION,
text="",
),
Version(
span=span(start=1616, end=1617, errors=None),
- metadata=None,
- operation="deletion",
+ operation=VersionVariantOperations.DELETION,
text="",
),
]
@@ -244,70 +231,71 @@ def test_parse(self):
ann_store=AnnotationStore(file=str(self.pecha.layer_path / seg_layer_path)),
include_span=True,
)
+
expected_seg_anns = [
{
- "index": 1,
+ "id": "1",
"segmentation_type": "segmentation",
"text": "བུ་མ་འཇུག་པ་ལས་སེམས་བསྐྱེད་དྲུག་པ། ཤོ་ལོ་ཀ ༡-༦༤ མངོན་དུ་ཕྱོགས་པར་མཉམ་བཞག་སེམས་གནས་ཏེ། །",
"span": {"start": 0, "end": 87},
},
{
- "index": 2,
+ "id": "2",
"segmentation_type": "segmentation",
"text": "ྫོགས་པའི་སངས་རྒྱས་ཆོས་ལ་མངོན་ཕྱོགས་ཤིང༌། །འདི་བརྟེན་འབྱུང་བའི་དེ་ཉིད་མཐོང་བ་དེས། །ཤེས་རབ་གནས་པས་འགོག་པ་ཐོབ་པར་འགྱུར། །",
"span": {"start": 88, "end": 206},
},
{
- "index": 3,
+ "id": "3",
"segmentation_type": "segmentation",
"text": "ཇི་ལྟར་ལོང་བའི་ཚོགས་ཀུན་བདེ་བླག་ཏུ། །མིག་ལྡན་སྐྱེས་བུ་གཅིག་གིས་འདོད་པ་ཡི། །",
"span": {"start": 207, "end": 282},
},
{
- "index": 4,
+ "id": "4",
"segmentation_type": "segmentation",
"text": "ུལ་དུ་འཁྲིད་པ་དེ་བཞིན་འདིར་ཡང་བློས། །མིག་ཉམས་ཡོན་ཏན་བླངས་ཏེ་རྒྱལ་ཉིད་འགྲོ། །",
"span": {"start": 283, "end": 359},
},
{
- "index": 5,
+ "id": "5",
"segmentation_type": "segmentation",
"text": "ཇི་ལྟར་དེ་ཡིས་ཆེས་ཟབ་ཆོས་རྟོགས་པ། །ལུང་དང་གཞན་ཡང་རིགས་པས་ཡིན་པས་ན། །དེ་ལྟར་འཕགས་པ་ཀླུ་སྒྲུབ་གཞུང་ལུགས་ལས། །ཇི་ལྟར་གནས་པའི་ལུགས་བཞིན་བརྗོད་པར་བྱ། ",
"span": {"start": 360, "end": 505},
},
{
- "index": 6,
+ "id": "6",
"segmentation_type": "segmentation",
"text": "\nསོ་སོ་སྐྱེ་བོའི་དུས་ནའང་སྟོང་པ་ཉིད་ཐོས་ནས། །ནང་དུ་རབ་ཏུ་དགའ་བ་ཡང་དང་ཡང་དུ་འབྱུང༌། །རབ་ཏུ་དགའ་བ་ལས་བྱུང་མཆི་མས་མིག་བརླན་ཞིང༌། །ལུས་ཀྱི་བ་སྤུ་ལྡང་པར་འགྱུར་པ་གང་ཡིན་པ། །\nདེ་ལ་རྫོགས་པའི་སངས་རྒྱས་བློ་ཡི་ས་བོན་ཡོད། །དེ་ཉིད་ཉེ་བར་བསྟན་པའི་སྣོད་ནི་དེ་ཡིན་ཏེ། །དེ་ལ་དམ་པའི་དོན་གྱི་བདེན་པ་བསྟན་པར་བྱ། །དེ་ལ་དེ་ཡི་རྗེས་སུ་འགྲོ་བའི་ཡོན་ཏན་འབྱུང༌། །\nརྟག་ཏུ་ཚུལ་ཁྲིམས་ཡང་དག་བླངས་ནས་གནས་པར་འག",
"span": {"start": 506, "end": 884},
},
{
- "index": 7,
+ "id": "7",
"segmentation_type": "segmentation",
"text": "ུར། །སྦྱིན་པ་གཏོང་བར་འགྱུར་ཞིང་སྙིང་རྗེ་བསྟེན་པར་བྱེད། །བཟོད་པ་སྒོམ་བྱེད་དེ་ཡི་དགེ་བའང་བྱང་ཆུབ་ཏུ། །འགྲོ་བ་དགྲོལ་བར་བྱ་ཕྱིར་ཡོངས་སུ་བསྔོ་བྱེད་ཅིང༌། །\nརྫོགས་པའི་བྱང་ཆུབ་སེམས་དཔའ་རྣམས་ལ་གུས་པར་བྱེད། །ཟབ་ཅིང་རྒྱ་ཆེའི་ཚུལ་ལ་མཁས་པའི་སྐྱེ་བོས་ནི། །རིམ་གྱིས་རབ་ཏུ་དགའ་བའི་ས་ནི་འཐོབ་འགྱུར་བས།",
"span": {"start": 885, "end": 1169},
},
{
- "index": 8,
+ "id": "8",
"segmentation_type": "segmentation",
"text": "།དེ་ནི་དོན་དུ་གཉེར་བས་ལམ་འདི་མཉན་པར་གྱིས། །",
"span": {"start": 1170, "end": 1213},
},
{
- "index": 9,
+ "id": "9",
"segmentation_type": "segmentation",
"text": "དེ་ཉིད་དེ་ལས་འབྱུང་མིན་གཞན་དག་ལས་ལྟ་ག་ལ་ཞིག །གཉིས་ཀ་ལས་ཀྱང་མ་ཡིན་རྒྱུ་མེད་པར་ནི་ག་ལ་ཡོད། །དེ་ནི་དེ་ལས་འབྱུང་ན་ཡོན་ཏན་འགའ་ཡང་ཡོད་མ་ཡིན། །སྐྱེས་པར་གྱུར་པ་སླར་ཡང་སྐྱེ་བར་རིགས་པའང་མ་ཡིན་ཉིད། །\nསྐྱེ",
"span": {"start": 1214, "end": 1407},
},
{
- "index": 10,
+ "id": "10",
"segmentation_type": "segmentation",
"text": "་ཟིན་སླར་ཡང་སྐྱེ་བར་ཡོངས་སུ་རྟོག་པར་འགྱུར་ན་ནི། །མྱུ་གུ་ལ་སོགས་རྣམས་ཀྱི་སྐྱེ་བ་འདིར་རྙེད་མི་འགྱུར་ཞིང༌། །ས་བོན་སྲིད་མཐར་ཐུག་པར་རབ་ཏུ་སྐྱེ་བ་ཉིད་དུ་འགྱུར། །ཇི་ལྟར་དེ་ཉིད་ཀྱིས་དེ་",
"span": {"start": 1408, "end": 1585},
},
]
assert seg_anns == expected_seg_anns
-
+
version_anns = get_anns(
ann_store=AnnotationStore(file=str(self.pecha.layer_path / version_path)),
include_span=True,
diff --git a/tests/pecha/test_annotation.py b/tests/pecha/test_annotation.py
index 87868116..bf23821b 100644
--- a/tests/pecha/test_annotation.py
+++ b/tests/pecha/test_annotation.py
@@ -1,16 +1,10 @@
-import json
-
import pytest
from pydantic import ValidationError
from openpecha.pecha.annotations import (
- AnnotationModel,
BaseAnnotation,
- PechaAlignment,
- PechaId,
span,
)
-from openpecha.pecha.layer import AnnotationType
def test_span_end_must_not_be_less_than_start():
@@ -21,362 +15,4 @@ def test_span_end_must_not_be_less_than_start():
def test_annotation_id():
ann = BaseAnnotation(span=span(start=10, end=20))
assert ann.span.start == 10
- assert ann.span.end == 20
- assert ann.metadata is None
-
-
-def test_pechaid_valid():
- pid = PechaId.validate("I1234ABCD")
- assert pid == "I1234ABCD"
-
-
-def test_pechaid_invalid():
- with pytest.raises(ValueError):
- PechaId.validate("X1234ABCD")
- with pytest.raises(ValueError):
- PechaId.validate("I1234ABC") # too short
- with pytest.raises(ValueError):
- PechaId.validate("I1234ABCDE") # too long
- with pytest.raises(ValueError):
- PechaId.validate("I1234abcD") # lowercase
-
-
-def test_pecha_alignment_fields():
- pa = PechaAlignment(pecha_id="I1234ABCD", alignment_id="align1")
- assert pa.pecha_id == "I1234ABCD"
- assert pa.alignment_id == "align1"
-
-
-def test_annotation_model_minimal_alignment():
- align = PechaAlignment(pecha_id="I1234ABCD", alignment_id="align1")
- am = AnnotationModel(
- pecha_id="I1234ABCD",
- type=AnnotationType.ALIGNMENT,
- document_id="doc1",
- path="ann1",
- title="Test",
- aligned_to=align,
- )
- assert am.pecha_id == "I1234ABCD"
- assert am.type == AnnotationType.ALIGNMENT
- assert am.aligned_to == align
-
-
-def test_annotation_model_minimal_non_alignment():
- am = AnnotationModel(
- pecha_id="I1234ABCD",
- type=AnnotationType.SEGMENTATION,
- document_id="doc1",
- path="ann1",
- title="Test",
- )
- assert am.pecha_id == "I1234ABCD"
- assert am.type == AnnotationType.SEGMENTATION
- assert am.aligned_to is None
-
-
-def test_annotation_model_with_alignment():
- align = PechaAlignment(pecha_id="I1234ABCD", alignment_id="align1")
- am = AnnotationModel(
- pecha_id="I1234ABCD",
- type=AnnotationType.ALIGNMENT,
- document_id="doc1",
- path="ann1",
- title="Test",
- aligned_to=align,
- )
- assert am.aligned_to is not None
- assert am.aligned_to.pecha_id == "I1234ABCD"
- assert am.aligned_to.alignment_id == "align1"
-
-
-def test_annotation_model_invalid_pechaid():
- with pytest.raises(ValidationError):
- AnnotationModel(
- pecha_id="BADID",
- type=AnnotationType.ALIGNMENT,
- document_id="doc1",
- path="ann1",
- title="Test",
- )
-
-
-def test_annotation_model_missing_required():
- with pytest.raises(ValidationError):
- AnnotationModel(
- pecha_id="I1234ABCD",
- type=AnnotationType.ALIGNMENT,
- document_id="doc1",
- # path missing
- title="Test",
- )
-
-
-class TestValidAnnotationModel:
- """Tests for valid annotation models in different scenarios."""
-
- def test_valid_annotation_minimal(self):
- """Test minimal valid annotation with default values."""
- input_data = {
- "pecha_id": "I12345678",
- "document_id": "DOC123",
- "title": "Test Annotation",
- "path": "E11/layer.json",
- }
-
- model = AnnotationModel(**input_data)
- assert str(model.pecha_id) == "I12345678"
- assert model.document_id == "DOC123"
- assert model.title == "Test Annotation"
- assert model.type == AnnotationType.SEGMENTATION
- assert model.aligned_to is None
-
- def test_valid_annotation_with_type(self):
- """Test valid annotation with explicit type."""
- input_data = {
- "pecha_id": "I12345678",
- "document_id": "DOC123",
- "title": "Test Alignment Annotation",
- "type": "alignment",
- "path": "E11/layer.json",
- }
-
- model = AnnotationModel(**input_data)
- assert str(model.pecha_id) == "I12345678"
- assert model.document_id == "DOC123"
- assert model.title == "Test Alignment Annotation"
- assert model.type == AnnotationType.ALIGNMENT
- assert model.aligned_to is None
-
- def test_valid_annotation_with_alignment(self):
- """Test valid annotation with alignment information."""
- input_data = {
- "pecha_id": "I12345678",
- "document_id": "DOC123",
- "title": "Test Annotation with Alignment",
- "type": "alignment",
- "path": "E11/layer.json",
- "aligned_to": {
- "pecha_id": "I87654321",
- "alignment_id": "ALIGN001",
- },
- }
-
- model = AnnotationModel(**input_data)
- assert str(model.pecha_id) == "I12345678"
- assert model.document_id == "DOC123"
- assert model.title == "Test Annotation with Alignment"
- assert model.type == AnnotationType.ALIGNMENT
- assert model.aligned_to is not None
- assert model.model_dump()["aligned_to"]["pecha_id"] == "I87654321"
- assert model.model_dump()["aligned_to"]["alignment_id"] == "ALIGN001"
-
- def test_valid_annotation_from_dict(self):
- """Test creating a valid annotation from a dictionary."""
- input_data = {
- "pecha_id": "I12345678",
- "document_id": "DOC123",
- "title": "Test Dict Annotation",
- "path": "E11/layer.json",
- }
-
- model = AnnotationModel.model_validate(input_data)
- assert str(model.pecha_id) == "I12345678"
- assert model.document_id == "DOC123"
- assert model.title == "Test Dict Annotation"
- assert model.type == AnnotationType.SEGMENTATION
-
-
-class TestInvalidAnnotationModel:
- """Tests for invalid annotation models that should raise validation errors."""
-
- def test_invalid_pecha_id_format(self):
- """Test that invalid pecha_id format raises ValidationError."""
- with pytest.raises(ValidationError) as exc_info:
- AnnotationModel(
- pecha_id="invalid_id", # Should start with I and contain 8 hex chars
- document_id="DOC123",
- title="Invalid ID Test",
- path="E11/layer.json",
- )
-
- # Check the specific validation error message
- errors = exc_info.value.errors()
- assert any(
- err["loc"] == ("pecha_id",)
- and "PechaId must start with 'I' followed by 8 uppercase hex characters"
- in err["msg"]
- for err in errors
- )
-
- def test_missing_document_id(self):
- """Test that missing document_id raises ValidationError."""
- with pytest.raises(ValidationError) as exc_info:
- AnnotationModel(
- pecha_id="I12345678",
- # Missing document_id
- title="Missing Document ID Test",
- )
-
- errors = exc_info.value.errors()
- assert any("document_id" in str(err) for err in errors)
- assert any("Field required" in str(err) for err in errors)
-
- def test_missing_title(self):
- """Test that missing title raises ValidationError."""
- with pytest.raises(ValidationError) as exc_info:
- AnnotationModel(
- pecha_id="I12345678",
- document_id="DOC123",
- # Missing title
- )
-
- errors = exc_info.value.errors()
- assert any("title" in str(err) for err in errors)
- assert any("Field required" in str(err) for err in errors)
-
- def test_empty_title(self):
- """Test that empty title raises ValidationError."""
- with pytest.raises(ValidationError) as exc_info:
- AnnotationModel(
- pecha_id="I12345678",
- document_id="DOC123",
- title="", # Empty title
- )
-
- errors = exc_info.value.errors()
- assert any("title" in str(err) for err in errors)
- assert any("min_length" in str(err) for err in errors)
-
- def test_invalid_document_id(self):
- """Test that invalid document_id raises ValidationError."""
- with pytest.raises(ValidationError) as exc_info:
- AnnotationModel(
- pecha_id="I12345678",
- document_id="", # Empty document_id
- title="Invalid Document ID Test",
- )
-
- errors = exc_info.value.errors()
- assert any("document_id" in str(err) for err in errors)
- assert any("pattern" in str(err) for err in errors)
-
- def test_invalid_type(self):
- """Test that invalid type raises ValidationError."""
- with pytest.raises(ValidationError) as exc_info:
- AnnotationModel(
- pecha_id="I12345678",
- document_id="DOC123",
- title="Invalid Type Test",
- type="invalid_type", # Not in AnnotationType enum
- )
-
- errors = exc_info.value.errors()
- assert any("type" in str(err) for err in errors)
- assert any("enum" in str(err) for err in errors)
-
- def test_invalid_aligned_to(self):
- """Test that invalid aligned_to raises ValidationError."""
- with pytest.raises(ValidationError) as exc_info:
- AnnotationModel(
- pecha_id="I12345678",
- document_id="DOC123",
- title="Invalid Alignment Test",
- type="Alignment",
- path="E11/layer.json",
- aligned_to={
- "pecha_id": "invalid_id", # Invalid pecha_id format
- "alignment_id": "ALIGN001",
- },
- )
-
- errors = exc_info.value.errors()
- assert any("aligned_to" in str(err) for err in errors)
- assert any(
- err["loc"] == ("aligned_to", "pecha_id")
- and "PechaId must start with 'I' followed by 8 uppercase hex characters"
- in err["msg"]
- for err in errors
- )
-
- def test_missing_alignment_id(self):
- """Test that missing alignment_id in aligned_to raises ValidationError."""
- with pytest.raises(ValidationError) as exc_info:
- AnnotationModel(
- pecha_id="I12345678",
- document_id="DOC123",
- title="Missing Alignment ID Test",
- type="alignment",
- aligned_to={
- "pecha_id": "I87654321",
- # Missing alignment_id
- },
- )
-
- errors = exc_info.value.errors()
- assert any("aligned_to" in str(err) for err in errors)
-
-
-class TestAnnotationModelSerialization:
- """Tests for serialization of annotation models."""
-
- def test_model_dump(self):
- """Test model_dump() produces the expected dictionary."""
- model = AnnotationModel(
- pecha_id="I12345678",
- document_id="DOC123",
- title="Serialization Test",
- type="alignment",
- path="E11/layer.json",
- aligned_to={
- "pecha_id": "I87654321",
- "alignment_id": "ALIGN001",
- },
- )
-
- data = model.model_dump()
- assert data["pecha_id"] == "I12345678" # Should be string not nested object
- assert data["document_id"] == "DOC123"
- assert data["title"] == "Serialization Test"
- assert data["type"] == AnnotationType.ALIGNMENT
- assert data["path"] == "E11/layer.json"
- assert data["aligned_to"]["pecha_id"] == "I87654321"
- assert data["aligned_to"]["alignment_id"] == "ALIGN001"
-
- def test_model_dump_json(self):
- """Test model_dump_json() produces valid JSON with expected structure."""
- model = AnnotationModel(
- pecha_id="I12345678",
- document_id="DOC123",
- title="JSON Serialization Test",
- path="E11/layer.json",
- )
-
- json_str = model.model_dump_json()
- data = json.loads(json_str)
-
- assert data["pecha_id"] == "I12345678"
- assert data["document_id"] == "DOC123"
- assert data["title"] == "JSON Serialization Test"
- assert data["type"] == "segmentation"
- assert data["path"] == "E11/layer.json"
- assert data["aligned_to"] is None
-
- def test_json_schema(self):
- """Test the JSON schema is correctly generated."""
- schema = AnnotationModel.model_json_schema()
-
- # Check basic schema structure
- assert "properties" in schema
- assert "pecha_id" in schema["properties"]
- assert "document_id" in schema["properties"]
- assert "title" in schema["properties"]
- assert "type" in schema["properties"]
- assert "aligned_to" in schema["properties"]
-
- # Check required fields
- assert "required" in schema
- required_fields = schema["required"]
- assert "pecha_id" in required_fields
- assert "document_id" in required_fields
- assert "title" in required_fields
+ assert ann.span.end == 20
\ No newline at end of file
diff --git a/tests/pecha/test_create_pecha.py b/tests/pecha/test_create_pecha.py
index 6f12eff6..a711c3e3 100644
--- a/tests/pecha/test_create_pecha.py
+++ b/tests/pecha/test_create_pecha.py
@@ -1,4 +1,4 @@
-from openpecha.pecha import Pecha, get_anns, get_annotation_type
+from openpecha.pecha import Pecha, get_anns
from openpecha.utils import read_json, convert_to_base_annotation
from pathlib import Path
from openpecha.pecha.layer import AnnotationType
@@ -9,25 +9,37 @@ def test_create_pecha():
data = read_json("tests/pecha/data/ITEST001.json")
annotation = [convert_to_base_annotation(ann) for ann in data["annotation"]]
annotation_id = generate_id()
- pecha = Pecha.create_pecha(pecha_id=data["pecha_id"], base_text=data["base_text"], annotation_id=annotation_id, annotation=annotation)
-
+ pecha = Pecha.create_pecha(pecha_id=data["pecha_id"], base_text=data["base_text"], annotation_id=annotation_id, annotation=annotation, annotation_type=AnnotationType.ALIGNMENT)
+ #
assert pecha.id == data["pecha_id"]
base_name = list(pecha.bases.keys())[0]
assert pecha.bases[base_name] == data["base_text"]
ann_store, _ = pecha.get_layer_by_ann_type(base_name=base_name, layer_type=AnnotationType.ALIGNMENT)
+
# ann_store is a list, we need to use the first AnnotationStore
created_annotations = get_anns(ann_store[0] if isinstance(ann_store, list) else ann_store, include_span=True)
assert len(created_annotations) == len(data["annotation"])
first_created = created_annotations[0]
+
first_original = data["annotation"][0]
assert first_created["span"]["start"] == first_original["span"]["start"]
assert first_created["span"]["end"] == first_original["span"]["end"]
- assert first_created["index"] == first_original["index"]
+ assert not first_created.get("index")
assert first_created["alignment_index"] == first_original["alignment_index"]
+
+ pecha_annotation = pecha.annotations[0]
+ assert pecha_annotation["span"]["start"] == first_original["span"]["start"]
+ assert pecha_annotation["span"]["end"] == first_original["span"]["end"]
+ assert pecha_annotation.get("id", None) != None
+ # Check that only the expected keys are present in first_created
+ expected_keys = {"span", "id"}
+ actual_keys = set(pecha_annotation.keys())
+ assert actual_keys <= expected_keys, f"Unexpected keys found: {actual_keys - expected_keys}"
+ assert expected_keys <= actual_keys, f"Missing expected keys: {expected_keys - actual_keys}"
def test_add():
data = read_json("tests/pecha/data/ITEST001_alignment.json")
@@ -36,7 +48,7 @@ def test_add():
base_name = next(iter(pecha.bases))
annotation_id = generate_id()
- annotation_id = pecha.add(annotation_id=annotation_id, annotation=annotation)
+ annotation_id = pecha.add(annotation_id=annotation_id, annotation=annotation, annotation_type=AnnotationType.ALIGNMENT)
ann_store, _ = pecha.get_layer_by_ann_type(base_name=base_name, layer_type=AnnotationType.ALIGNMENT)
@@ -45,14 +57,15 @@ def test_add():
assert len(created_annotations) == len(data["annotation"])
first_created = created_annotations[0]
+
first_original = data["annotation"][0]
assert first_created["span"]["start"] == first_original["span"]["start"]
assert first_created["span"]["end"] == first_original["span"]["end"]
- assert first_created["index"] == first_original["index"]
+ assert not first_created.get("index")
assert first_created["alignment_index"] == first_original["alignment_index"]
# Clean up - remove the added annotation layer to keep test data clean
- ann_type = get_annotation_type(annotation)
+ ann_type = AnnotationType.ALIGNMENT
annotation_layer_file = pecha.layer_path / base_name / f"{ann_type.value}-{annotation_id}.json"
if annotation_layer_file.exists():
annotation_layer_file.unlink()
\ No newline at end of file
diff --git a/tests/pecha/test_update.py b/tests/pecha/test_update.py
deleted file mode 100644
index 839f6729..00000000
--- a/tests/pecha/test_update.py
+++ /dev/null
@@ -1,26 +0,0 @@
-from openpecha.pecha import Pecha
-from pathlib import Path
-from openpecha.utils import read_json, convert_to_base_annotation
-import subprocess
-from openpecha.pecha.layer import AnnotationType
-from openpecha.pecha import get_anns
-
-
-
-pecha = Pecha.from_path(Path(f"tests/pecha/update/data/ID8Sv2ynVKZX8wIt"))
-annotation_id = "Tm3Uewnh3ySsvgIE"
-annotation = [convert_to_base_annotation(ann) for ann in read_json("tests/pecha/update/data/updated_segmentation.json")]
-layer_type = AnnotationType.SEGMENTATION
-
-
-def test_update_annotation():
- updated_pecha = pecha.update_annotation(annotation_id=annotation_id, annotation=annotation, layer_type=layer_type)
- assert updated_pecha.id == pecha.id
- base_name = list(pecha.bases.keys())[0]
- ann_store, _ = pecha.get_layer_by_ann_type(base_name=base_name, layer_type=layer_type)
-
- created_annotations = get_anns(ann_store[0] if isinstance(ann_store, list) else ann_store, include_span=True)
-
- assert len(created_annotations) == len(annotation)
- subprocess.run("rm -rf tests/pecha/update/data/ID8Sv2ynVKZX8wIt", shell=True)
- subprocess.run("cp -r tests/pecha/serializers/json/data/ID8Sv2ynVKZX8wIt tests/pecha/update/data/ID8Sv2ynVKZX8wIt", shell=True)
\ No newline at end of file
diff --git a/tests/pecha/update/data/ID8Sv2ynVKZX8wIt/layers/26E4/segmentation-Tm3Uewnh3ySsvgIE.json b/tests/pecha/update/data/ID8Sv2ynVKZX8wIt/layers/26E4/segmentation-Tm3Uewnh3ySsvgIE.json
deleted file mode 100644
index bce267dc..00000000
--- a/tests/pecha/update/data/ID8Sv2ynVKZX8wIt/layers/26E4/segmentation-Tm3Uewnh3ySsvgIE.json
+++ /dev/null
@@ -1,1320 +0,0 @@
-{
- "@type": "AnnotationStore",
- "@id": "ID8Sv2ynVKZX8wIt",
- "resources": [
- {
- "@type": "TextResource",
- "@id": "26E4",
- "@include": "../../base/26E4.txt"
- }
- ],
- "annotationsets": [
- {
- "@type": "AnnotationDataSet",
- "@id": "segmentation_annotation",
- "keys": [
- {
- "@type": "DataKey",
- "@id": "index"
- },
- {
- "@type": "DataKey",
- "@id": "segmentation_type"
- }
- ],
- "data": [
- {
- "@type": "AnnotationData",
- "@id": "84A8849AB4",
- "key": "index",
- "value": {
- "@type": "Int",
- "value": 1
- }
- },
- {
- "@type": "AnnotationData",
- "@id": "B144C11491",
- "key": "segmentation_type",
- "value": {
- "@type": "String",
- "value": "segmentation"
- }
- },
- {
- "@type": "AnnotationData",
- "@id": "222D05AA0A",
- "key": "index",
- "value": {
- "@type": "Int",
- "value": 2
- }
- },
- {
- "@type": "AnnotationData",
- "@id": "E3785540BA",
- "key": "index",
- "value": {
- "@type": "Int",
- "value": 3
- }
- },
- {
- "@type": "AnnotationData",
- "@id": "418F632528",
- "key": "index",
- "value": {
- "@type": "Int",
- "value": 4
- }
- },
- {
- "@type": "AnnotationData",
- "@id": "9E813CA691",
- "key": "index",
- "value": {
- "@type": "Int",
- "value": 5
- }
- },
- {
- "@type": "AnnotationData",
- "@id": "95ABD8599D",
- "key": "index",
- "value": {
- "@type": "Int",
- "value": 6
- }
- },
- {
- "@type": "AnnotationData",
- "@id": "9AE80BBF3E",
- "key": "index",
- "value": {
- "@type": "Int",
- "value": 7
- }
- },
- {
- "@type": "AnnotationData",
- "@id": "EC333836BE",
- "key": "index",
- "value": {
- "@type": "Int",
- "value": 8
- }
- },
- {
- "@type": "AnnotationData",
- "@id": "A561344039",
- "key": "index",
- "value": {
- "@type": "Int",
- "value": 9
- }
- },
- {
- "@type": "AnnotationData",
- "@id": "DBECC4EF6F",
- "key": "index",
- "value": {
- "@type": "Int",
- "value": 10
- }
- },
- {
- "@type": "AnnotationData",
- "@id": "4F48498554",
- "key": "index",
- "value": {
- "@type": "Int",
- "value": 11
- }
- },
- {
- "@type": "AnnotationData",
- "@id": "4B3DB7A8E7",
- "key": "index",
- "value": {
- "@type": "Int",
- "value": 12
- }
- },
- {
- "@type": "AnnotationData",
- "@id": "3DDFE49DED",
- "key": "index",
- "value": {
- "@type": "Int",
- "value": 13
- }
- },
- {
- "@type": "AnnotationData",
- "@id": "B6C64F6BBB",
- "key": "index",
- "value": {
- "@type": "Int",
- "value": 14
- }
- },
- {
- "@type": "AnnotationData",
- "@id": "FFAF411C2C",
- "key": "index",
- "value": {
- "@type": "Int",
- "value": 15
- }
- },
- {
- "@type": "AnnotationData",
- "@id": "232E82AD4C",
- "key": "index",
- "value": {
- "@type": "Int",
- "value": 16
- }
- },
- {
- "@type": "AnnotationData",
- "@id": "F290DD99A9",
- "key": "index",
- "value": {
- "@type": "Int",
- "value": 17
- }
- },
- {
- "@type": "AnnotationData",
- "@id": "CE5C4051BA",
- "key": "index",
- "value": {
- "@type": "Int",
- "value": 18
- }
- },
- {
- "@type": "AnnotationData",
- "@id": "DE143AE56E",
- "key": "index",
- "value": {
- "@type": "Int",
- "value": 19
- }
- },
- {
- "@type": "AnnotationData",
- "@id": "D49C52979C",
- "key": "index",
- "value": {
- "@type": "Int",
- "value": 20
- }
- },
- {
- "@type": "AnnotationData",
- "@id": "4DF7691F34",
- "key": "index",
- "value": {
- "@type": "Int",
- "value": 21
- }
- },
- {
- "@type": "AnnotationData",
- "@id": "0D98A8BED4",
- "key": "index",
- "value": {
- "@type": "Int",
- "value": 22
- }
- },
- {
- "@type": "AnnotationData",
- "@id": "BC498F81BD",
- "key": "index",
- "value": {
- "@type": "Int",
- "value": 23
- }
- },
- {
- "@type": "AnnotationData",
- "@id": "CC3CCCF793",
- "key": "index",
- "value": {
- "@type": "Int",
- "value": 24
- }
- },
- {
- "@type": "AnnotationData",
- "@id": "6661E11FF6",
- "key": "index",
- "value": {
- "@type": "Int",
- "value": 25
- }
- },
- {
- "@type": "AnnotationData",
- "@id": "E38E1956F9",
- "key": "index",
- "value": {
- "@type": "Int",
- "value": 26
- }
- },
- {
- "@type": "AnnotationData",
- "@id": "91D6998190",
- "key": "index",
- "value": {
- "@type": "Int",
- "value": 27
- }
- },
- {
- "@type": "AnnotationData",
- "@id": "34B618B6DD",
- "key": "index",
- "value": {
- "@type": "Int",
- "value": 28
- }
- },
- {
- "@type": "AnnotationData",
- "@id": "10F713FAC8",
- "key": "index",
- "value": {
- "@type": "Int",
- "value": 29
- }
- },
- {
- "@type": "AnnotationData",
- "@id": "DE82D2441F",
- "key": "index",
- "value": {
- "@type": "Int",
- "value": 30
- }
- },
- {
- "@type": "AnnotationData",
- "@id": "C13C1ACA0C",
- "key": "index",
- "value": {
- "@type": "Int",
- "value": 31
- }
- },
- {
- "@type": "AnnotationData",
- "@id": "07FB3155B3",
- "key": "index",
- "value": {
- "@type": "Int",
- "value": 32
- }
- }
- ]
- }
- ],
- "annotations": [
- {
- "@type": "Annotation",
- "@id": "B07549701B",
- "target": {
- "@type": "TextSelector",
- "resource": "26E4",
- "offset": {
- "@type": "Offset",
- "begin": {
- "@type": "BeginAlignedCursor",
- "value": 0
- },
- "end": {
- "@type": "BeginAlignedCursor",
- "value": 54
- }
- }
- },
- "data": [
- {
- "@type": "AnnotationData",
- "@id": "84A8849AB4",
- "set": "segmentation_annotation"
- },
- {
- "@type": "AnnotationData",
- "@id": "B144C11491",
- "set": "segmentation_annotation"
- }
- ]
- },
- {
- "@type": "Annotation",
- "@id": "60010AB6CD",
- "target": {
- "@type": "TextSelector",
- "resource": "26E4",
- "offset": {
- "@type": "Offset",
- "begin": {
- "@type": "BeginAlignedCursor",
- "value": 55
- },
- "end": {
- "@type": "BeginAlignedCursor",
- "value": 110
- }
- }
- },
- "data": [
- {
- "@type": "AnnotationData",
- "@id": "222D05AA0A",
- "set": "segmentation_annotation"
- },
- {
- "@type": "AnnotationData",
- "@id": "B144C11491",
- "set": "segmentation_annotation"
- }
- ]
- },
- {
- "@type": "Annotation",
- "@id": "C8B26CFFA4",
- "target": {
- "@type": "TextSelector",
- "resource": "26E4",
- "offset": {
- "@type": "Offset",
- "begin": {
- "@type": "BeginAlignedCursor",
- "value": 111
- },
- "end": {
- "@type": "BeginAlignedCursor",
- "value": 175
- }
- }
- },
- "data": [
- {
- "@type": "AnnotationData",
- "@id": "E3785540BA",
- "set": "segmentation_annotation"
- },
- {
- "@type": "AnnotationData",
- "@id": "B144C11491",
- "set": "segmentation_annotation"
- }
- ]
- },
- {
- "@type": "Annotation",
- "@id": "5C519C091F",
- "target": {
- "@type": "TextSelector",
- "resource": "26E4",
- "offset": {
- "@type": "Offset",
- "begin": {
- "@type": "BeginAlignedCursor",
- "value": 176
- },
- "end": {
- "@type": "BeginAlignedCursor",
- "value": 193
- }
- }
- },
- "data": [
- {
- "@type": "AnnotationData",
- "@id": "418F632528",
- "set": "segmentation_annotation"
- },
- {
- "@type": "AnnotationData",
- "@id": "B144C11491",
- "set": "segmentation_annotation"
- }
- ]
- },
- {
- "@type": "Annotation",
- "@id": "A99CFE3DC3",
- "target": {
- "@type": "TextSelector",
- "resource": "26E4",
- "offset": {
- "@type": "Offset",
- "begin": {
- "@type": "BeginAlignedCursor",
- "value": 194
- },
- "end": {
- "@type": "BeginAlignedCursor",
- "value": 251
- }
- }
- },
- "data": [
- {
- "@type": "AnnotationData",
- "@id": "9E813CA691",
- "set": "segmentation_annotation"
- },
- {
- "@type": "AnnotationData",
- "@id": "B144C11491",
- "set": "segmentation_annotation"
- }
- ]
- },
- {
- "@type": "Annotation",
- "@id": "BCBF7B0B44",
- "target": {
- "@type": "TextSelector",
- "resource": "26E4",
- "offset": {
- "@type": "Offset",
- "begin": {
- "@type": "BeginAlignedCursor",
- "value": 252
- },
- "end": {
- "@type": "BeginAlignedCursor",
- "value": 287
- }
- }
- },
- "data": [
- {
- "@type": "AnnotationData",
- "@id": "95ABD8599D",
- "set": "segmentation_annotation"
- },
- {
- "@type": "AnnotationData",
- "@id": "B144C11491",
- "set": "segmentation_annotation"
- }
- ]
- },
- {
- "@type": "Annotation",
- "@id": "74CAE409E9",
- "target": {
- "@type": "TextSelector",
- "resource": "26E4",
- "offset": {
- "@type": "Offset",
- "begin": {
- "@type": "BeginAlignedCursor",
- "value": 288
- },
- "end": {
- "@type": "BeginAlignedCursor",
- "value": 428
- }
- }
- },
- "data": [
- {
- "@type": "AnnotationData",
- "@id": "9AE80BBF3E",
- "set": "segmentation_annotation"
- },
- {
- "@type": "AnnotationData",
- "@id": "B144C11491",
- "set": "segmentation_annotation"
- }
- ]
- },
- {
- "@type": "Annotation",
- "@id": "2E2FB82547",
- "target": {
- "@type": "TextSelector",
- "resource": "26E4",
- "offset": {
- "@type": "Offset",
- "begin": {
- "@type": "BeginAlignedCursor",
- "value": 429
- },
- "end": {
- "@type": "BeginAlignedCursor",
- "value": 527
- }
- }
- },
- "data": [
- {
- "@type": "AnnotationData",
- "@id": "EC333836BE",
- "set": "segmentation_annotation"
- },
- {
- "@type": "AnnotationData",
- "@id": "B144C11491",
- "set": "segmentation_annotation"
- }
- ]
- },
- {
- "@type": "Annotation",
- "@id": "43ECDEE9B0",
- "target": {
- "@type": "TextSelector",
- "resource": "26E4",
- "offset": {
- "@type": "Offset",
- "begin": {
- "@type": "BeginAlignedCursor",
- "value": 528
- },
- "end": {
- "@type": "BeginAlignedCursor",
- "value": 669
- }
- }
- },
- "data": [
- {
- "@type": "AnnotationData",
- "@id": "A561344039",
- "set": "segmentation_annotation"
- },
- {
- "@type": "AnnotationData",
- "@id": "B144C11491",
- "set": "segmentation_annotation"
- }
- ]
- },
- {
- "@type": "Annotation",
- "@id": "AA18B6218B",
- "target": {
- "@type": "TextSelector",
- "resource": "26E4",
- "offset": {
- "@type": "Offset",
- "begin": {
- "@type": "BeginAlignedCursor",
- "value": 670
- },
- "end": {
- "@type": "BeginAlignedCursor",
- "value": 730
- }
- }
- },
- "data": [
- {
- "@type": "AnnotationData",
- "@id": "DBECC4EF6F",
- "set": "segmentation_annotation"
- },
- {
- "@type": "AnnotationData",
- "@id": "B144C11491",
- "set": "segmentation_annotation"
- }
- ]
- },
- {
- "@type": "Annotation",
- "@id": "418BE00783",
- "target": {
- "@type": "TextSelector",
- "resource": "26E4",
- "offset": {
- "@type": "Offset",
- "begin": {
- "@type": "BeginAlignedCursor",
- "value": 731
- },
- "end": {
- "@type": "BeginAlignedCursor",
- "value": 861
- }
- }
- },
- "data": [
- {
- "@type": "AnnotationData",
- "@id": "4F48498554",
- "set": "segmentation_annotation"
- },
- {
- "@type": "AnnotationData",
- "@id": "B144C11491",
- "set": "segmentation_annotation"
- }
- ]
- },
- {
- "@type": "Annotation",
- "@id": "DA91DA4BE4",
- "target": {
- "@type": "TextSelector",
- "resource": "26E4",
- "offset": {
- "@type": "Offset",
- "begin": {
- "@type": "BeginAlignedCursor",
- "value": 862
- },
- "end": {
- "@type": "BeginAlignedCursor",
- "value": 1321
- }
- }
- },
- "data": [
- {
- "@type": "AnnotationData",
- "@id": "4B3DB7A8E7",
- "set": "segmentation_annotation"
- },
- {
- "@type": "AnnotationData",
- "@id": "B144C11491",
- "set": "segmentation_annotation"
- }
- ]
- },
- {
- "@type": "Annotation",
- "@id": "B4245065D7",
- "target": {
- "@type": "TextSelector",
- "resource": "26E4",
- "offset": {
- "@type": "Offset",
- "begin": {
- "@type": "BeginAlignedCursor",
- "value": 1322
- },
- "end": {
- "@type": "BeginAlignedCursor",
- "value": 1362
- }
- }
- },
- "data": [
- {
- "@type": "AnnotationData",
- "@id": "3DDFE49DED",
- "set": "segmentation_annotation"
- },
- {
- "@type": "AnnotationData",
- "@id": "B144C11491",
- "set": "segmentation_annotation"
- }
- ]
- },
- {
- "@type": "Annotation",
- "@id": "0F1030EE43",
- "target": {
- "@type": "TextSelector",
- "resource": "26E4",
- "offset": {
- "@type": "Offset",
- "begin": {
- "@type": "BeginAlignedCursor",
- "value": 1363
- },
- "end": {
- "@type": "BeginAlignedCursor",
- "value": 1363
- }
- }
- },
- "data": [
- {
- "@type": "AnnotationData",
- "@id": "B6C64F6BBB",
- "set": "segmentation_annotation"
- },
- {
- "@type": "AnnotationData",
- "@id": "B144C11491",
- "set": "segmentation_annotation"
- }
- ]
- },
- {
- "@type": "Annotation",
- "@id": "97F7C93C66",
- "target": {
- "@type": "TextSelector",
- "resource": "26E4",
- "offset": {
- "@type": "Offset",
- "begin": {
- "@type": "BeginAlignedCursor",
- "value": 1364
- },
- "end": {
- "@type": "BeginAlignedCursor",
- "value": 1435
- }
- }
- },
- "data": [
- {
- "@type": "AnnotationData",
- "@id": "FFAF411C2C",
- "set": "segmentation_annotation"
- },
- {
- "@type": "AnnotationData",
- "@id": "B144C11491",
- "set": "segmentation_annotation"
- }
- ]
- },
- {
- "@type": "Annotation",
- "@id": "3A307BA7F7",
- "target": {
- "@type": "TextSelector",
- "resource": "26E4",
- "offset": {
- "@type": "Offset",
- "begin": {
- "@type": "BeginAlignedCursor",
- "value": 1436
- },
- "end": {
- "@type": "BeginAlignedCursor",
- "value": 1516
- }
- }
- },
- "data": [
- {
- "@type": "AnnotationData",
- "@id": "232E82AD4C",
- "set": "segmentation_annotation"
- },
- {
- "@type": "AnnotationData",
- "@id": "B144C11491",
- "set": "segmentation_annotation"
- }
- ]
- },
- {
- "@type": "Annotation",
- "@id": "B8EE2A4C35",
- "target": {
- "@type": "TextSelector",
- "resource": "26E4",
- "offset": {
- "@type": "Offset",
- "begin": {
- "@type": "BeginAlignedCursor",
- "value": 1517
- },
- "end": {
- "@type": "BeginAlignedCursor",
- "value": 1667
- }
- }
- },
- "data": [
- {
- "@type": "AnnotationData",
- "@id": "F290DD99A9",
- "set": "segmentation_annotation"
- },
- {
- "@type": "AnnotationData",
- "@id": "B144C11491",
- "set": "segmentation_annotation"
- }
- ]
- },
- {
- "@type": "Annotation",
- "@id": "80D90F8088",
- "target": {
- "@type": "TextSelector",
- "resource": "26E4",
- "offset": {
- "@type": "Offset",
- "begin": {
- "@type": "BeginAlignedCursor",
- "value": 1668
- },
- "end": {
- "@type": "BeginAlignedCursor",
- "value": 1888
- }
- }
- },
- "data": [
- {
- "@type": "AnnotationData",
- "@id": "CE5C4051BA",
- "set": "segmentation_annotation"
- },
- {
- "@type": "AnnotationData",
- "@id": "B144C11491",
- "set": "segmentation_annotation"
- }
- ]
- },
- {
- "@type": "Annotation",
- "@id": "7BBBEC254B",
- "target": {
- "@type": "TextSelector",
- "resource": "26E4",
- "offset": {
- "@type": "Offset",
- "begin": {
- "@type": "BeginAlignedCursor",
- "value": 1889
- },
- "end": {
- "@type": "BeginAlignedCursor",
- "value": 1976
- }
- }
- },
- "data": [
- {
- "@type": "AnnotationData",
- "@id": "DE143AE56E",
- "set": "segmentation_annotation"
- },
- {
- "@type": "AnnotationData",
- "@id": "B144C11491",
- "set": "segmentation_annotation"
- }
- ]
- },
- {
- "@type": "Annotation",
- "@id": "7CE016E497",
- "target": {
- "@type": "TextSelector",
- "resource": "26E4",
- "offset": {
- "@type": "Offset",
- "begin": {
- "@type": "BeginAlignedCursor",
- "value": 1977
- },
- "end": {
- "@type": "BeginAlignedCursor",
- "value": 2155
- }
- }
- },
- "data": [
- {
- "@type": "AnnotationData",
- "@id": "D49C52979C",
- "set": "segmentation_annotation"
- },
- {
- "@type": "AnnotationData",
- "@id": "B144C11491",
- "set": "segmentation_annotation"
- }
- ]
- },
- {
- "@type": "Annotation",
- "@id": "CEDCB38435",
- "target": {
- "@type": "TextSelector",
- "resource": "26E4",
- "offset": {
- "@type": "Offset",
- "begin": {
- "@type": "BeginAlignedCursor",
- "value": 2156
- },
- "end": {
- "@type": "BeginAlignedCursor",
- "value": 2269
- }
- }
- },
- "data": [
- {
- "@type": "AnnotationData",
- "@id": "4DF7691F34",
- "set": "segmentation_annotation"
- },
- {
- "@type": "AnnotationData",
- "@id": "B144C11491",
- "set": "segmentation_annotation"
- }
- ]
- },
- {
- "@type": "Annotation",
- "@id": "8FF1A6F3F3",
- "target": {
- "@type": "TextSelector",
- "resource": "26E4",
- "offset": {
- "@type": "Offset",
- "begin": {
- "@type": "BeginAlignedCursor",
- "value": 2270
- },
- "end": {
- "@type": "BeginAlignedCursor",
- "value": 2366
- }
- }
- },
- "data": [
- {
- "@type": "AnnotationData",
- "@id": "0D98A8BED4",
- "set": "segmentation_annotation"
- },
- {
- "@type": "AnnotationData",
- "@id": "B144C11491",
- "set": "segmentation_annotation"
- }
- ]
- },
- {
- "@type": "Annotation",
- "@id": "671711C44C",
- "target": {
- "@type": "TextSelector",
- "resource": "26E4",
- "offset": {
- "@type": "Offset",
- "begin": {
- "@type": "BeginAlignedCursor",
- "value": 2367
- },
- "end": {
- "@type": "BeginAlignedCursor",
- "value": 2527
- }
- }
- },
- "data": [
- {
- "@type": "AnnotationData",
- "@id": "BC498F81BD",
- "set": "segmentation_annotation"
- },
- {
- "@type": "AnnotationData",
- "@id": "B144C11491",
- "set": "segmentation_annotation"
- }
- ]
- },
- {
- "@type": "Annotation",
- "@id": "0CFE1EF97C",
- "target": {
- "@type": "TextSelector",
- "resource": "26E4",
- "offset": {
- "@type": "Offset",
- "begin": {
- "@type": "BeginAlignedCursor",
- "value": 2528
- },
- "end": {
- "@type": "BeginAlignedCursor",
- "value": 2763
- }
- }
- },
- "data": [
- {
- "@type": "AnnotationData",
- "@id": "CC3CCCF793",
- "set": "segmentation_annotation"
- },
- {
- "@type": "AnnotationData",
- "@id": "B144C11491",
- "set": "segmentation_annotation"
- }
- ]
- },
- {
- "@type": "Annotation",
- "@id": "0BFEA4BBEF",
- "target": {
- "@type": "TextSelector",
- "resource": "26E4",
- "offset": {
- "@type": "Offset",
- "begin": {
- "@type": "BeginAlignedCursor",
- "value": 2764
- },
- "end": {
- "@type": "BeginAlignedCursor",
- "value": 2821
- }
- }
- },
- "data": [
- {
- "@type": "AnnotationData",
- "@id": "6661E11FF6",
- "set": "segmentation_annotation"
- },
- {
- "@type": "AnnotationData",
- "@id": "B144C11491",
- "set": "segmentation_annotation"
- }
- ]
- },
- {
- "@type": "Annotation",
- "@id": "6D8BDFC8FD",
- "target": {
- "@type": "TextSelector",
- "resource": "26E4",
- "offset": {
- "@type": "Offset",
- "begin": {
- "@type": "BeginAlignedCursor",
- "value": 2822
- },
- "end": {
- "@type": "BeginAlignedCursor",
- "value": 2923
- }
- }
- },
- "data": [
- {
- "@type": "AnnotationData",
- "@id": "E38E1956F9",
- "set": "segmentation_annotation"
- },
- {
- "@type": "AnnotationData",
- "@id": "B144C11491",
- "set": "segmentation_annotation"
- }
- ]
- },
- {
- "@type": "Annotation",
- "@id": "74E5403E03",
- "target": {
- "@type": "TextSelector",
- "resource": "26E4",
- "offset": {
- "@type": "Offset",
- "begin": {
- "@type": "BeginAlignedCursor",
- "value": 2924
- },
- "end": {
- "@type": "BeginAlignedCursor",
- "value": 3106
- }
- }
- },
- "data": [
- {
- "@type": "AnnotationData",
- "@id": "91D6998190",
- "set": "segmentation_annotation"
- },
- {
- "@type": "AnnotationData",
- "@id": "B144C11491",
- "set": "segmentation_annotation"
- }
- ]
- },
- {
- "@type": "Annotation",
- "@id": "3F2B818452",
- "target": {
- "@type": "TextSelector",
- "resource": "26E4",
- "offset": {
- "@type": "Offset",
- "begin": {
- "@type": "BeginAlignedCursor",
- "value": 3107
- },
- "end": {
- "@type": "BeginAlignedCursor",
- "value": 3216
- }
- }
- },
- "data": [
- {
- "@type": "AnnotationData",
- "@id": "34B618B6DD",
- "set": "segmentation_annotation"
- },
- {
- "@type": "AnnotationData",
- "@id": "B144C11491",
- "set": "segmentation_annotation"
- }
- ]
- },
- {
- "@type": "Annotation",
- "@id": "D050E18B90",
- "target": {
- "@type": "TextSelector",
- "resource": "26E4",
- "offset": {
- "@type": "Offset",
- "begin": {
- "@type": "BeginAlignedCursor",
- "value": 3217
- },
- "end": {
- "@type": "BeginAlignedCursor",
- "value": 3262
- }
- }
- },
- "data": [
- {
- "@type": "AnnotationData",
- "@id": "10F713FAC8",
- "set": "segmentation_annotation"
- },
- {
- "@type": "AnnotationData",
- "@id": "B144C11491",
- "set": "segmentation_annotation"
- }
- ]
- },
- {
- "@type": "Annotation",
- "@id": "4544050EE3",
- "target": {
- "@type": "TextSelector",
- "resource": "26E4",
- "offset": {
- "@type": "Offset",
- "begin": {
- "@type": "BeginAlignedCursor",
- "value": 3263
- },
- "end": {
- "@type": "BeginAlignedCursor",
- "value": 3305
- }
- }
- },
- "data": [
- {
- "@type": "AnnotationData",
- "@id": "DE82D2441F",
- "set": "segmentation_annotation"
- },
- {
- "@type": "AnnotationData",
- "@id": "B144C11491",
- "set": "segmentation_annotation"
- }
- ]
- },
- {
- "@type": "Annotation",
- "@id": "9853B76832",
- "target": {
- "@type": "TextSelector",
- "resource": "26E4",
- "offset": {
- "@type": "Offset",
- "begin": {
- "@type": "BeginAlignedCursor",
- "value": 3306
- },
- "end": {
- "@type": "BeginAlignedCursor",
- "value": 3548
- }
- }
- },
- "data": [
- {
- "@type": "AnnotationData",
- "@id": "C13C1ACA0C",
- "set": "segmentation_annotation"
- },
- {
- "@type": "AnnotationData",
- "@id": "B144C11491",
- "set": "segmentation_annotation"
- }
- ]
- },
- {
- "@type": "Annotation",
- "@id": "769E1974BB",
- "target": {
- "@type": "TextSelector",
- "resource": "26E4",
- "offset": {
- "@type": "Offset",
- "begin": {
- "@type": "BeginAlignedCursor",
- "value": 3549
- },
- "end": {
- "@type": "BeginAlignedCursor",
- "value": 3606
- }
- }
- },
- "data": [
- {
- "@type": "AnnotationData",
- "@id": "07FB3155B3",
- "set": "segmentation_annotation"
- },
- {
- "@type": "AnnotationData",
- "@id": "B144C11491",
- "set": "segmentation_annotation"
- }
- ]
- }
- ]
-}
\ No newline at end of file
diff --git a/tests/test_ids.py b/tests/test_ids.py
index 037eee7e..69140a7e 100644
--- a/tests/test_ids.py
+++ b/tests/test_ids.py
@@ -3,8 +3,6 @@
from openpecha.ids import (
get_annotation_id,
get_base_id,
- get_id,
- get_initial_pecha_id,
get_layer_id,
get_uuid,
)
@@ -16,16 +14,6 @@ def test_get_uuid():
r"^[0-9a-fA-F]{32}$", uuid
), f"UUID {uuid} is not in the correct format"
-
-def test_get_id():
- prefix = "T"
- length = 4
- generated_id = get_id(prefix, length)
- assert re.match(
- r"^T[0-9A-F]{4}$", generated_id
- ), f"ID {generated_id} is not in the correct format"
-
-
def test_get_base_id():
base_id = get_base_id()
assert re.match(
@@ -40,13 +28,6 @@ def test_get_layer_id():
), f"Layer ID {layer_id} is not in the correct format"
-def test_get_initial_pecha_id():
- initial_pecha_id = get_initial_pecha_id()
- assert re.match(
- r"^I[0-9A-F]{8}$", initial_pecha_id
- ), f"Initial Pecha ID {initial_pecha_id} is not in the correct format"
-
-
def test_get_annotation_id():
ann_id = get_annotation_id()
assert len(ann_id) == 10