Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
8c72cf1
refactor: deleted tests and modified files
ta4tsering Oct 21, 2025
556efcf
removed index if annotation type is alignment or segmentation
tentse Oct 22, 2025
d55f43c
test parse
tentse Oct 22, 2025
97c5ae0
fix expected test parse annotation
tentse Oct 22, 2025
d3457b1
fix test_segmentation in edition parse
tentse Oct 22, 2025
bc64778
change operation=Enum selection rather than magic string
tentse Oct 22, 2025
5d0c6e8
fixed test_version test case
tentse Oct 22, 2025
1b9dfda
added id and span information in return part when creating pecha
tentse Oct 22, 2025
6a7b333
check index not present in annotation
tentse Oct 22, 2025
d3a530f
removed pip install -e '.[github]' causing test issue on github workflow
tentse Oct 22, 2025
d2214fc
added back the pip install -e '.[github]
tentse Oct 22, 2025
7d30d8a
added print statement to check first_created
tentse Oct 22, 2025
3700864
removed popping of index since already removed from segmentation and …
tentse Oct 22, 2025
1d4cb66
removed index from BaseAnnotation class itself
tentse Oct 22, 2025
14d83dd
removed annotation layer distinguisher function
tentse Oct 22, 2025
0d8e315
exclude 'index' key from annotation data in get_anns function
tentse Oct 22, 2025
71a3c0c
enhance test_create_pecha to validate annotation keys and span values
tentse Oct 23, 2025
c67fdea
removed
tentse Oct 24, 2025
a9b66ae
removed load_metadata and metadata_path from Pecha class
tentse Oct 24, 2025
c7e1718
removed set_metadata from Pecha class
tentse Oct 24, 2025
715405e
removed get_annotation_type from Pecha
tentse Oct 24, 2025
d257bcd
remove metadata.py
tentse Oct 24, 2025
6a9b0a4
removed BUDA folder
tentse Oct 24, 2025
62a2171
removed all methods from utils.py from openpecha
tentse Oct 24, 2025
6005d8c
restored utils method in openpecha
tentse Oct 24, 2025
fae1d7a
final restore utils method in openpecha
tentse Oct 24, 2025
e6968fd
removed BUDA from test file
tentse Oct 24, 2025
0f9aff5
added metadata.py file back
tentse Oct 24, 2025
27edc9b
remove import metadata.py from pecha.__init__.py and testing
tentse Oct 24, 2025
4820a14
removed metadata.py after testing
tentse Oct 24, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file removed src/openpecha/buda/__init__.py
Empty file.
214 changes: 0 additions & 214 deletions src/openpecha/buda/api.py

This file was deleted.

84 changes: 20 additions & 64 deletions src/openpecha/pecha/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
)
from openpecha.pecha.annotations import BaseAnnotation
from openpecha.pecha.layer import AnnotationType
from openpecha.pecha.metadata import PechaMetaData
from openpecha.config import PECHAS_PATH

BASE_NAME = str
Expand All @@ -25,9 +24,8 @@ class Pecha:
def __init__(self, pecha_id: str, pecha_path: Path) -> None:
self.id = pecha_id
self.pecha_path = pecha_path
self.metadata = self.load_metadata()
self.bases = self.load_bases()
# self.annotations = self.load_annotations()
self.annotations = []

@classmethod
def from_path(cls, pecha_path: Path) -> "Pecha":
Expand Down Expand Up @@ -65,21 +63,28 @@ def create(cls, output_path: Optional[Path] = None, pecha_id: Optional[str] = No
return cls(pecha_id, pecha_path)

@classmethod
def create_pecha(cls, pecha_id: str, base_text: str, annotation_id: str, annotation: List[BaseAnnotation]) -> "Pecha":
def create_pecha(cls, pecha_id: str, base_text: str, annotation_id: str, annotation: List[BaseAnnotation], annotation_type: AnnotationType) -> "Pecha":
pecha = cls.create(pecha_id=pecha_id)
base_name = pecha.set_base(base_text)
ann_type = get_annotation_type(annotation)
ann_store, _ = pecha.add_layer(base_name=base_name, layer_type=ann_type, annotation_id=annotation_id)

ann_store, _ = pecha.add_layer(base_name=base_name, layer_type=annotation_type, annotation_id=annotation_id)
for single_annotation in annotation:
ann_store = pecha.add_annotation(ann_store=ann_store, annotation=single_annotation, layer_type=ann_type)
ann_store = pecha.add_annotation(ann_store=ann_store, annotation=single_annotation, layer_type=annotation_type)
ann_store.save()
annotations = get_anns(ann_store, include_span=True)
for annotation in annotations:
pecha.annotations.append({
"span": {
"start": annotation["span"]["start"],
"end": annotation["span"]["end"],
},
"id": annotation["id"]
})
return pecha


def add(self, annotation_id: str, annotation: List[BaseAnnotation]) -> "Pecha":
def add(self, annotation_id: str, annotation: List[BaseAnnotation], annotation_type: AnnotationType) -> "Pecha":
base_name = next(iter(self.bases))
ann_type = get_annotation_type(annotation)
ann_type = annotation_type
if check_annotation_exists(self.layer_path/base_name/f"{ann_type.value}-{annotation_id}.json"):
raise ValueError(f"Annotation with id {annotation_id} already exists")
ann_store, _ = self.add_layer(base_name=base_name, layer_type=ann_type, annotation_id=annotation_id)
Expand All @@ -102,20 +107,6 @@ def layer_path(self):
layer_path.mkdir(parents=True, exist_ok=True)
return layer_path

@property
def metadata_path(self):
return self.pecha_path / "metadata.json"


def load_metadata(self):
if not self.metadata_path.exists():
return None

with open(self.metadata_path) as f:
metadata = json.load(f)

return PechaMetaData(**metadata)

def load_bases(self):
bases = {}
for base_file in self.base_path.rglob("*.txt"):
Expand Down Expand Up @@ -189,7 +180,6 @@ def add_annotation(
# Add Annotation Group Type
ann_group_type = layer_type.annotation_group_type
ann_data[ann_group_type.value] = layer_type.value

start, end = (
annotation.span.start,
annotation.span.end,
Expand Down Expand Up @@ -219,32 +209,9 @@ def add_annotation(
raise StamAddAnnotationError(
f"[Error] Failed to add annotation to STAM: {e}"
)

return ann_store

def set_metadata(self, pecha_metadata: Dict):
# Retrieve parser name
parser_name = self.metadata.parser if self.metadata else None
if "parser" not in pecha_metadata:
pecha_metadata["parser"] = parser_name

# Retrieve initial creation type name
initial_creation_type = (
self.metadata.initial_creation_type if self.metadata else None
)
if "initial_creation_type" not in pecha_metadata:
pecha_metadata["initial_creation_type"] = initial_creation_type

try:
pecha_metadata = PechaMetaData(**pecha_metadata)
except Exception as e:
raise ValueError(f"Invalid metadata: {e}")

self.metadata = pecha_metadata
with open(self.metadata_path, "w") as f:
json.dump(self.metadata.to_dict(), f, ensure_ascii=False, indent=2)

return self.metadata

def get_segmentation_layer_path(self) -> str:
"""
1. Get the first layer file from the pecha
Expand All @@ -256,11 +223,6 @@ def get_segmentation_layer_path(self) -> str:

return relative_layer_path

def get_first_layer_path(self) -> str:
layer_path = list(self.layer_path.rglob("*.json"))[0]
relative_layer_path = layer_path.relative_to(self.pecha_path.parent).as_posix()

return relative_layer_path

def get_layer_by_ann_type(self, base_name: str, layer_type: AnnotationType):
"""
Expand Down Expand Up @@ -296,7 +258,10 @@ def get_anns(ann_store: AnnotationStore, include_span: bool = False):
for ann in ann_store:
ann_data = {}
for data in ann:
ann_data[data.key().id()] = data.value().get()
k = data.key().id()
if k in ["index"]:
continue
ann_data[k] = data.value().get()
curr_ann = {**ann_data, "text": str(ann)}
if include_span:
curr_ann["span"] = {
Expand All @@ -310,15 +275,6 @@ def get_anns(ann_store: AnnotationStore, include_span: bool = False):
def load_layer(path: Path) -> AnnotationStore:
return AnnotationStore(file=str(path))


def get_annotation_type(annotation: List[BaseAnnotation]):
if hasattr(annotation[0], "alignment_index") and hasattr(annotation[0], "index"):
return AnnotationType.ALIGNMENT
elif hasattr(annotation[0], "index") and not hasattr(annotation[0], "alignment_index"):
return AnnotationType.SEGMENTATION
else:
raise ValueError("Invalid annotation type")

def check_annotation_exists(annotation_path: Path):
if annotation_path.exists():
return True
Expand Down
Loading