From a43a9f4a17af455cdbd263090dc0b9e22e08f712 Mon Sep 17 00:00:00 2001
From: KrishnaRani <thakurkrishnarani@gmail.com>
Date: Tue, 26 May 2026 18:21:43 +0200
Subject: [PATCH 1/3] PropMatch Aligner extension: adding feature for LLM/RAG
 alignment technique

---
 examples/property_alignment/propmatch_llm.py | 142 +++++++++++++++++++
 examples/property_alignment/propmatch_rag.py |  96 +++++++++++++
 ontoaligner/aligner/llm/dataset.py           | 105 ++++++++++++++
 ontoaligner/aligner/rag/dataset.py           | 113 ++++++++++++++-
 ontoaligner/aligner/rag/rag.py               |   2 +-
 ontoaligner/encoder/llm.py                   |  89 ++++++++++++
 ontoaligner/encoder/rag.py                   |  42 +++++-
 7 files changed, 586 insertions(+), 3 deletions(-)
 create mode 100644 examples/property_alignment/propmatch_llm.py
 create mode 100644 examples/property_alignment/propmatch_rag.py

diff --git a/examples/property_alignment/propmatch_llm.py b/examples/property_alignment/propmatch_llm.py
new file mode 100644
index 0000000..32163ff
--- /dev/null
+++ b/examples/property_alignment/propmatch_llm.py
@@ -0,0 +1,142 @@
+import json
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+from sklearn.linear_model import LogisticRegression
+
+from ontoaligner.ontology import PropertyOMDataset
+from ontoaligner.encoder import PropMatchEncoder
+from ontoaligner.aligner import AutoModelDecoderLLM
+from ontoaligner.aligner import PropertyFullTextLLMDataset
+
+from ontoaligner.postprocess import TFIDFLabelMapper, llm_postprocessor
+from ontoaligner.utils import metrics, xmlify
+
+# ---------------------------------------------------------
+# Step 1: Initialize the property ontology matching task
+# ---------------------------------------------------------
+task = PropertyOMDataset()
+
+print("Property Matching Task:", task)
+
+# ---------------------------------------------------------
+# Step 2: Collect source ontology, target ontology, and references
+# ---------------------------------------------------------
+dataset = task.collect(
+    source_ontology_path="../assets/MI-MatOnto/mi_ontology.xml",
+    target_ontology_path="../assets/MI-MatOnto/matonto_ontology.xml",
+    reference_matching_path="../assets/MI-MatOnto/property_matchings.xml",
+)
+
+# ---------------------------------------------------------
+# Step 3: Encode properties
+# ---------------------------------------------------------
+# PropMatchEncoder should produce property dictionaries containing:
+# iri, label, domain, range, inverse
+#
+# These fields are used by PropertyFullTextLLMDataset.
+encoder_model = PropMatchEncoder()
+
+source_onto, target_onto = encoder_model(
+    source=dataset["source"],
+    target=dataset["target"],
+)
+# ---------------------------------------------------------
+# Step 4: Prepare property LLM dataset
+# ---------------------------------------------------------
+llm_dataset = PropertyFullTextLLMDataset(
+    source_onto=source_onto,
+    target_onto=target_onto,
+)
+print("Number of property pairs:", len(llm_dataset))
+
+# ---------------------------------------------------------
+# Step 5: Create DataLoader
+# ---------------------------------------------------------
+dataloader = DataLoader(
+    llm_dataset,
+    batch_size=128,
+    shuffle=False,
+    collate_fn=llm_dataset.collate_fn,
+)
+
+# ---------------------------------------------------------
+# Step 6: Initialize LLM model
+# ---------------------------------------------------------
+model = AutoModelDecoderLLM(
+    device="cpu",        # Use "cpu" if GPU is not available
+    max_length=300,
+    max_new_tokens=10,
+)
+
+# ---------------------------------------------------------
+# Step 7: Load LLM
+# ---------------------------------------------------------
+model.load(
+    path="Qwen/Qwen2.5-0.5B-Instruct"
+)
+
+# ---------------------------------------------------------
+# Step 8: Generate LLM predictions
+# ---------------------------------------------------------
+predictions = []
+
+for batch in tqdm(dataloader):
+    prompts = batch["prompts"]
+    sequences = model.generate(prompts)
+    predictions.extend(sequences)
+
+print("Number of predictions:", len(predictions))
+
+
+# ---------------------------------------------------------
+# Step 9: Map LLM outputs to yes/no
+# ---------------------------------------------------------
+label_dict = {
+    "yes": ["yes", "correct", "true", "positive", "valid"],
+    "no": ["no", "incorrect", "false", "negative", "invalid"],
+}
+
+mapper = TFIDFLabelMapper(
+    classifier=LogisticRegression(),
+    ngram_range=(1, 1),
+    label_dict=label_dict,
+)
+
+# ---------------------------------------------------------
+# Step 10: Post-process LLM predictions
+# ---------------------------------------------------------
+# llm_postprocessor keeps predicted "yes" pairs as final matchings.
+matchings = llm_postprocessor(
+    predicts=predictions,
+    mapper=mapper,
+    dataset=llm_dataset,
+)
+
+# ---------------------------------------------------------
+# Step 11: Evaluate property matchings
+# ---------------------------------------------------------
+evaluation = metrics.evaluation_report(
+    predicts=matchings,
+    references=dataset["reference"],
+)
+print("Property LLM Matching Evaluation Report:")
+print(json.dumps(evaluation, indent=4))
+
+# ---------------------------------------------------------
+# Step 12: Save XML matchings
+# ---------------------------------------------------------
+xml_str = xmlify.xml_alignment_generator(matchings=matchings)
+xml_output_file = "property_llm_matchings.xml"
+with open(xml_output_file, "w", encoding="utf-8") as xml_file:
+    xml_file.write(xml_str)
+print(f"Saved property LLM matchings XML to: {xml_output_file}")
+
+# ---------------------------------------------------------
+# Step 13: Save JSON matchings
+# ---------------------------------------------------------
+json_output_file = "property_llm_matchings.json"
+
+with open(json_output_file, "w", encoding="utf-8") as json_file:
+    json.dump(matchings, json_file, indent=4, ensure_ascii=False)
+
+print(f"Saved property LLM matchings JSON to: {json_output_file}")
\ No newline at end of file
diff --git a/examples/property_alignment/propmatch_rag.py b/examples/property_alignment/propmatch_rag.py
new file mode 100644
index 0000000..bb2537e
--- /dev/null
+++ b/examples/property_alignment/propmatch_rag.py
@@ -0,0 +1,96 @@
+import json
+
+from ontoaligner.ontology import PropertyOMDataset
+from ontoaligner.utils import metrics, xmlify
+from ontoaligner.aligner import FalconLLMBERTRetrieverRAG
+from ontoaligner.encoder import PropertyFullTextRAGEncoder
+from ontoaligner.postprocess import rag_hybrid_postprocessor
+
+# Step 1: Initialize the property ontology matching task
+task = PropertyOMDataset()
+print("Property Matching Task:", task)
+
+# Step 2: Collect source ontology, target ontology, and reference property alignments
+dataset = task.collect(
+    source_ontology_path="../assets/MI-MatOnto/mi_ontology.xml",
+    target_ontology_path="../assets/MI-MatOnto/matonto_ontology.xml",
+    reference_matching_path="../assets/MI-MatOnto/property_matchings.xml",
+)
+
+# Step 3: Initialize the property RAG encoder
+# This encoder should use:
+#   retrieval_encoder = PropMatchEncoder
+#   llm_encoder = "PropertyFullTextRAGDataset"
+encoder_model = PropertyFullTextRAGEncoder()
+
+# Step 4: Encode the property ontologies
+encoded_ontology = encoder_model(
+    source=dataset["source"],
+    target=dataset["target"],
+    reference=dataset["reference"],
+)
+
+# Step 5: Define model configuration
+config = {
+    "retriever_config": {
+        "device": "cpu",
+        "top_k": 5,
+        "threshold": 0.1,
+    },
+    "llm_config": {
+        "device": "cpu",
+        "max_length": 300,
+        "max_new_tokens": 5,
+        "huggingface_access_token": "",
+        "device_map": "auto",
+        "batch_size": 8,
+        "answer_set": {
+            "yes": ["yes", "correct", "true", "positive", "valid"],
+            "no": ["no", "incorrect", "false", "negative", "invalid"],
+        },
+    },
+}
+
+# Step 6: Initialize the normal RAG model
+model = FalconLLMBERTRetrieverRAG(**config)
+
+# Step 7: Load small LLM and retriever model
+model.load(
+    llm_path="Qwen/Qwen2.5-0.5B-Instruct",
+    ir_path="all-MiniLM-L6-v2",
+)
+
+# Step 8: Generate property matching predictions
+predicts = model.generate(input_data=encoded_ontology)
+
+
+# Step 9: Apply hybrid postprocessing
+hybrid_matchings, hybrid_configs = rag_hybrid_postprocessor(
+    predicts=predicts,
+    ir_score_threshold=0.4,
+    llm_confidence_th=0.5,
+)
+
+# Step 10: Evaluate property matchings
+evaluation = metrics.evaluation_report(
+    predicts=hybrid_matchings,
+    references=dataset["reference"],
+)
+
+print("Property Hybrid Matching Evaluation Report:")
+print(json.dumps(evaluation, indent=4))
+
+# Step 11: Print hybrid postprocessing configuration
+print("Property Hybrid Matching Obtained Configuration:")
+print(hybrid_configs)
+
+# Step 12: Convert final property matchings to XML
+xml_str = xmlify.xml_alignment_generator(matchings=hybrid_matchings)
+
+# Step 13: Save XML output
+output_file_path = "property_matchings.xml"
+
+with open(output_file_path, "w", encoding="utf-8") as xml_file:
+    xml_file.write(xml_str)
+
+print(f"Saved property matchings to: {output_file_path}")
\ No newline at end of file
diff --git a/ontoaligner/aligner/llm/dataset.py b/ontoaligner/aligner/llm/dataset.py
index 38d8abb..6a7dbf2 100644
--- a/ontoaligner/aligner/llm/dataset.py
+++ b/ontoaligner/aligner/llm/dataset.py
@@ -117,3 +117,108 @@ def fill_one_sample(self, input_data: Any) -> str:
             .replace("{target_children}", target_children)
         )
         return template
+
+class PropertyLLMDataset(LLMDataset):
+    prompt = """Determine whether the following two ontology properties represent the same semantic relation. Respond with "yes" or "no" only.
+### Property 1:
+{source}
+### Property 2:
+{target}
+### Your Answer:"""
+
+    def fill_one_sample(self, input_data: Any) -> str:
+        template = self.prompt
+
+        source = self.preprocess(input_data["source"].get("label", ""))
+        target = self.preprocess(input_data["target"].get("label", ""))
+
+        template = (
+            template.replace("{source}", source)
+            .replace("{target}", target)
+        )
+
+        return template
+
+class PropertyFullTextLLMDataset(LLMDataset):
+    prompt = """Determine whether the following two ontology properties represent the same semantic relation. Respond with "yes" or "no" only.
+### Property 1:
+{source}
+**Domain**: {source_domain}
+**Range**: {source_range}
+**Inverse**: {source_inverse}
+
+### Property 2:
+{target}
+**Domain**: {target_domain}
+**Range**: {target_range}
+**Inverse**: {target_inverse}
+
+### Your Answer:"""
+
+    def fill_one_sample(self, input_data: Any) -> str:
+        template = self.prompt
+
+        source = self.preprocess(input_data["source"].get("label", ""))
+        target = self.preprocess(input_data["target"].get("label", ""))
+
+        source_domain = (
+            " ".join(input_data["source"].get("domain_text", []))
+            if len(input_data["source"].get("domain_text", [])) > 0
+            else ""
+        )
+
+        target_domain = (
+            " ".join(input_data["target"].get("domain_text", []))
+            if len(input_data["target"].get("domain_text", [])) > 0
+            else ""
+        )
+
+        source_range = (
+            " ".join(input_data["source"].get("range_text", []))
+            if len(input_data["source"].get("range_text", [])) > 0
+            else ""
+        )
+
+        target_range = (
+            " ".join(input_data["target"].get("range_text", []))
+            if len(input_data["target"].get("range_text", [])) > 0
+            else ""
+        )
+
+        source_inverse = ""
+        if input_data["source"].get("inverse_of"):
+            source_inverse = (
+                " ".join(input_data["source"].get("inverse_label", []))
+                if len(input_data["source"].get("inverse_label", [])) > 0
+                else ""
+            )
+
+        target_inverse = ""
+        if input_data["target"].get("inverse_of"):
+            target_inverse = (
+                " ".join(input_data["target"].get("inverse_label", []))
+                if len(input_data["target"].get("inverse_label", [])) > 0
+                else ""
+            )
+
+        source_domain = self.preprocess(source_domain)
+        target_domain = self.preprocess(target_domain)
+
+        source_range = self.preprocess(source_range)
+        target_range = self.preprocess(target_range)
+
+        source_inverse = self.preprocess(source_inverse)
+        target_inverse = self.preprocess(target_inverse)
+
+        template = (
+            template.replace("{source}", source)
+            .replace("{target}", target)
+            .replace("{source_domain}", source_domain)
+            .replace("{target_domain}", target_domain)
+            .replace("{source_range}", source_range)
+            .replace("{target_range}", target_range)
+            .replace("{source_inverse}", source_inverse)
+            .replace("{target_inverse}", target_inverse)
+        )
+
+        return template
\ No newline at end of file
diff --git a/ontoaligner/aligner/rag/dataset.py b/ontoaligner/aligner/rag/dataset.py
index 135a745..d929155 100644
--- a/ontoaligner/aligner/rag/dataset.py
+++ b/ontoaligner/aligner/rag/dataset.py
@@ -26,7 +26,6 @@
 
 from torch.utils.data import Dataset
 
-
 class RAGDataset(Dataset):
     """
     A base dataset class for handling real-world entity classification tasks. This class preprocesses data and formats it into
@@ -229,3 +228,115 @@ def fill_one_sample(self, input_data: Any) -> str:
             .replace("{target_children}", target_children)
         )
         return template
+
+class PropertyRAGDataset(RAGDataset):
+    """
+    A subclass of RAGDataset used for ontology property matching using only property labels.
+    """
+
+    prompt = """Classify if two ontology properties represent the same semantic relation or not (answer only yes or no).
+### First property:
+{source}
+### Second property:
+{target}
+### Answer:"""
+
+    def fill_one_sample(self, input_data: Any) -> str:
+        template = self.prompt
+
+        source = self.preprocess(input_data["source"]["label"])
+        target = self.preprocess(input_data["target"]["label"])
+
+        template = (
+            template.replace("{source}", source)
+            .replace("{target}", target)
+        )
+
+        return template
+    
+class PropertyFullTextRAGDataset(RAGDataset):
+    """
+    A subclass of RAGDataset used for ontology property matching using property label,
+    domain, range, and inverse property.
+    """
+
+    prompt = """Classify if two ontology properties represent the same semantic relation or not (answer only yes or no).
+### First property:
+{source}
+Domain: {source_domain}
+Range: {source_range}
+Inverse: {source_inverse}
+
+### Second property:
+{target}
+Domain: {target_domain}
+Range: {target_range}
+Inverse: {target_inverse}
+
+### Answer:"""
+
+    def fill_one_sample(self, input_data: Any) -> str:
+        template = self.prompt
+
+        source = self.preprocess(input_data["source"]["label"])
+        target = self.preprocess(input_data["target"]["label"])
+
+        source_domain = (
+            " ".join(input_data["source"]["domain_text"])
+            if len(input_data["source"]["domain_text"]) > 0
+            else ""
+        )
+
+        target_domain = (
+            " ".join(input_data["target"]["domain_text"])
+            if len(input_data["target"]["domain_text"]) > 0
+            else ""
+        )
+
+        source_range = (
+            " ".join(input_data["source"]["range_text"])
+            if len(input_data["source"]["range_text"]) > 0
+            else ""
+        )
+
+        target_range = (
+            " ".join(input_data["target"]["range_text"])
+            if len(input_data["target"]["range_text"]) > 0
+            else ""
+        )
+
+        source_inverse = ""
+        if input_data["source"]["inverse_of"]:
+            source_inverse = (
+                " ".join(input_data["source"]["inverse_label"])
+                if len(input_data["source"]["inverse_label"]) > 0
+                else ""
+            )
+
+        target_inverse = ""
+        if input_data["target"]["inverse_of"]:
+            target_inverse = (
+                " ".join(input_data["target"]["inverse_label"])
+                if len(input_data["target"]["inverse_label"]) > 0
+                else ""
+            )
+
+        source_domain = self.preprocess(source_domain)
+        target_domain = self.preprocess(target_domain)
+        source_range = self.preprocess(source_range)
+        target_range = self.preprocess(target_range)
+        source_inverse = self.preprocess(source_inverse)
+        target_inverse = self.preprocess(target_inverse)
+
+        template = (
+            template.replace("{source}", source)
+            .replace("{target}", target)
+            .replace("{source_domain}", source_domain)
+            .replace("{target_domain}", target_domain)
+            .replace("{source_range}", source_range)
+            .replace("{target_range}", target_range)
+            .replace("{source_inverse}", source_inverse)
+            .replace("{target_inverse}", target_inverse)
+        )
+
+        return template
\ No newline at end of file
diff --git a/ontoaligner/aligner/rag/rag.py b/ontoaligner/aligner/rag/rag.py
index 33aea82..78ae2ca 100644
--- a/ontoaligner/aligner/rag/rag.py
+++ b/ontoaligner/aligner/rag/rag.py
@@ -235,7 +235,7 @@ def __init__(self, retriever = None, llm = None, retriever_config=None, llm_conf
             self.Retrieval = retriever(**self.kwargs["retriever_config"])
         if not llm:
             try:
-                self.Retrieval = self.LLM(**self.kwargs["llm_config"])
+                self.LLM = self.LLM(**self.kwargs["llm_config"])
             except Exception as error:
                 raise ValueError(f"{error}\n LLM model must be provided.")
         else:
diff --git a/ontoaligner/encoder/llm.py b/ontoaligner/encoder/llm.py
index 388e02d..60dbaad 100644
--- a/ontoaligner/encoder/llm.py
+++ b/ontoaligner/encoder/llm.py
@@ -157,3 +157,92 @@ def get_owl_items(self, owl: Dict) -> Any:
         """
         parents = ", ".join([parent["label"] for parent in owl["parents"]])
         return {"iri": owl["iri"], "concept": owl["label"], "parents": str(parents)}
+
+class PropertyLLMEncoder(LLMEncoder):
+    """
+    Encodes OWL/RDF items that represent properties.
+
+    This class inherits from the `LLMEncoder` class and is designed to encode OWL/RDF property items.
+    The `get_owl_items` method retrieves the IRI, label, and definition of the property.
+
+    Attributes:
+        items_in_owl (str): Specifies the type of OWL items being encoded, in this case, a Property.
+    """
+    items_in_owl: str = "(Property)"
+
+    def get_owl_items(self, prop: Dict) -> Any:
+        """
+        Extracts the IRI, label, and definition of a property from the given OWL item.
+
+        Parameters:
+            owl (Dict): A dictionary representing an OWL/RDF property item, expected to contain
+                        'iri', 'label', and optionally 'definition' keys.
+
+        Returns:
+            Dict: A dictionary containing the IRI, label, definition, and combined text of the property.
+        """        
+        label = prop.get("label", "")
+
+        combined_text = label
+
+        return {
+            "iri": prop["iri"],
+            "label": label,
+            "text": combined_text,
+        }
+
+class PropertyFullTextLLMEncoder(LLMEncoder):
+    """
+    Encodes OWL/RDF items that represent properties with domain, range, inverse property, and definition.
+
+    This class inherits from the `LLMEncoder` class and is designed to encode OWL/RDF property items.
+    The `get_owl_items` method retrieves the IRI, label, definition, domain, range, and inverse property information.
+
+    Attributes:
+        items_in_owl (str): Specifies the type of OWL items being encoded, in this case,
+                            a Property with Definition, Domain, Range, and Inverse.
+    """
+    items_in_owl: str = "(Property, Domain, Range, Inverse)"
+
+    def get_owl_items(self, prop: Dict) -> Any:
+        label = prop.get("label", "")
+
+        domain_text = (
+            " ".join(prop.get("domain_text", []))
+            if len(prop.get("domain_text", [])) > 0
+            else ""
+        )
+
+        range_text = (
+            " ".join(prop.get("range_text", []))
+            if len(prop.get("range_text", [])) > 0
+            else ""
+        )
+
+        inverse_text = ""
+        if prop.get("inverse_of"):
+            inverse_text = (
+                " ".join(prop.get("inverse_label", []))
+                if len(prop.get("inverse_label", [])) > 0
+                else ""
+            )
+
+        combined_text = label
+
+        if domain_text:
+            combined_text += "  " + domain_text
+
+        if range_text:
+            combined_text += "  " + range_text
+
+        if inverse_text:
+            combined_text += "  inverse: " + inverse_text
+
+        return {
+            "iri": prop["iri"],
+            "label": label,
+            "domain": domain_text,
+            "range": range_text,
+            "inverse": inverse_text,
+            "text": combined_text,
+        }
\ No newline at end of file
diff --git a/ontoaligner/encoder/rag.py b/ontoaligner/encoder/rag.py
index 4c42c37..7ea7075 100644
--- a/ontoaligner/encoder/rag.py
+++ b/ontoaligner/encoder/rag.py
@@ -26,7 +26,7 @@
 
 from ..base import BaseEncoder
 from .lightweight import ConceptLightweightEncoder
-
+from .property import PropertyEncoder, PropMatchEncoder
 
 class RAGEncoder(BaseEncoder):
     """
@@ -287,3 +287,43 @@ def __str__(self):
             dict: A dictionary with the encoder name and items in OWL.
         """
         return f"OLaLaEncoder{self.items_in_owl}"
+
+class PropertyRAGEncoder(RAGEncoder):
+    """
+    Encodes OWL/RDF items representing a Property using retrieval-based and language model encoders.
+
+    This class extends the `RAGEncoder` class and is specialized in encoding OWL/RDF items that consist of
+    a Property. The retrieval encoder uses the `PropertyEncoder` class to retrieve the necessary property items,
+    while the language model encoder is set to "PropertyRAGDataset".
+
+    Attributes:
+        items_in_owl (str): Specifies the type of OWL items being encoded, in this case, a Property.
+        retrieval_encoder (Any): The retrieval encoder used for fetching OWL/RDF property items,
+                                 set to `PropertyEncoder`.
+        llm_encoder (str): The language model encoder used, set to "PropertyRAGDataset".
+    """    
+    items_in_owl: str = "(Property)"
+    retrieval_encoder: Any = PropertyEncoder
+    llm_encoder: str = "PropertyRAGDataset"
+
+
+class PropertyRAGEncoder(RAGEncoder):
+    """
+    Encodes OWL/RDF items representing a Property with its Domain, Range, and Inverse property using
+    retrieval-based and language model encoders.
+
+    This class extends the `RAGEncoder` class and is specialized in encoding OWL/RDF items that consist of
+    a Property, its Domain, Range, and Inverse property information. The retrieval encoder uses the
+    `PropMatchEncoder` class to retrieve the necessary property items, while the language model encoder is
+    set to "PropertyFullTextRAGDataset".
+
+    Attributes:
+        items_in_owl (str): Specifies the type of OWL items being encoded, in this case,
+                            a Property with Domain, Range, and Inverse property.
+        retrieval_encoder (Any): The retrieval encoder used for fetching OWL/RDF property items,
+                                 set to `PropMatchEncoder`.
+        llm_encoder (str): The language model encoder used, set to "PropertyFullTextRAGDataset".
+    """    
+    items_in_owl: str = "(Property, Domain, Range, Inverse)"
+    retrieval_encoder: Any = PropMatchEncoder
+    llm_encoder: str = "PropertyFullTextRAGDataset"
\ No newline at end of file

From ebd4d250378a8d29e16edc84d187e984f278bf31 Mon Sep 17 00:00:00 2001
From: KrishnaRani <thakurkrishnarani@gmail.com>
Date: Sat, 30 May 2026 00:24:55 +0200
Subject: [PATCH 2/3] restructured code

---
 ontoaligner/aligner/llm/dataset.py        | 105 --------
 ontoaligner/aligner/llm/models.py         |   2 +-
 ontoaligner/aligner/propmatch/__init__.py |   2 +
 ontoaligner/aligner/propmatch/llm.py      | 285 +++++++++++++++++++++
 ontoaligner/aligner/propmatch/rag.py      | 298 ++++++++++++++++++++++
 ontoaligner/aligner/rag/dataset.py        | 112 --------
 ontoaligner/aligner/rag/rag.py            |   2 +-
 ontoaligner/encoder/llm.py                |  89 -------
 ontoaligner/encoder/property.py           | 133 +++++++++-
 ontoaligner/encoder/rag.py                |  41 ---
 10 files changed, 719 insertions(+), 350 deletions(-)
 create mode 100644 ontoaligner/aligner/propmatch/llm.py
 create mode 100644 ontoaligner/aligner/propmatch/rag.py

diff --git a/ontoaligner/aligner/llm/dataset.py b/ontoaligner/aligner/llm/dataset.py
index 6a7dbf2..38d8abb 100644
--- a/ontoaligner/aligner/llm/dataset.py
+++ b/ontoaligner/aligner/llm/dataset.py
@@ -117,108 +117,3 @@ def fill_one_sample(self, input_data: Any) -> str:
             .replace("{target_children}", target_children)
         )
         return template
-
-class PropertyLLMDataset(LLMDataset):
-    prompt = """Determine whether the following two ontology properties represent the same semantic relation. Respond with "yes" or "no" only.
-### Property 1:
-{source}
-### Property 2:
-{target}
-### Your Answer:"""
-
-    def fill_one_sample(self, input_data: Any) -> str:
-        template = self.prompt
-
-        source = self.preprocess(input_data["source"].get("label", ""))
-        target = self.preprocess(input_data["target"].get("label", ""))
-
-        template = (
-            template.replace("{source}", source)
-            .replace("{target}", target)
-        )
-
-        return template
-
-class PropertyFullTextLLMDataset(LLMDataset):
-    prompt = """Determine whether the following two ontology properties represent the same semantic relation. Respond with "yes" or "no" only.
-### Property 1:
-{source}
-**Domain**: {source_domain}
-**Range**: {source_range}
-**Inverse**: {source_inverse}
-
-### Property 2:
-{target}
-**Domain**: {target_domain}
-**Range**: {target_range}
-**Inverse**: {target_inverse}
-
-### Your Answer:"""
-
-    def fill_one_sample(self, input_data: Any) -> str:
-        template = self.prompt
-
-        source = self.preprocess(input_data["source"].get("label", ""))
-        target = self.preprocess(input_data["target"].get("label", ""))
-
-        source_domain = (
-            " ".join(input_data["source"].get("domain_text", []))
-            if len(input_data["source"].get("domain_text", [])) > 0
-            else ""
-        )
-
-        target_domain = (
-            " ".join(input_data["target"].get("domain_text", []))
-            if len(input_data["target"].get("domain_text", [])) > 0
-            else ""
-        )
-
-        source_range = (
-            " ".join(input_data["source"].get("range_text", []))
-            if len(input_data["source"].get("range_text", [])) > 0
-            else ""
-        )
-
-        target_range = (
-            " ".join(input_data["target"].get("range_text", []))
-            if len(input_data["target"].get("range_text", [])) > 0
-            else ""
-        )
-
-        source_inverse = ""
-        if input_data["source"].get("inverse_of"):
-            source_inverse = (
-                " ".join(input_data["source"].get("inverse_label", []))
-                if len(input_data["source"].get("inverse_label", [])) > 0
-                else ""
-            )
-
-        target_inverse = ""
-        if input_data["target"].get("inverse_of"):
-            target_inverse = (
-                " ".join(input_data["target"].get("inverse_label", []))
-                if len(input_data["target"].get("inverse_label", [])) > 0
-                else ""
-            )
-
-        source_domain = self.preprocess(source_domain)
-        target_domain = self.preprocess(target_domain)
-
-        source_range = self.preprocess(source_range)
-        target_range = self.preprocess(target_range)
-
-        source_inverse = self.preprocess(source_inverse)
-        target_inverse = self.preprocess(target_inverse)
-
-        template = (
-            template.replace("{source}", source)
-            .replace("{target}", target)
-            .replace("{source_domain}", source_domain)
-            .replace("{target_domain}", target_domain)
-            .replace("{source_range}", source_range)
-            .replace("{target_range}", target_range)
-            .replace("{source_inverse}", source_inverse)
-            .replace("{target_inverse}", target_inverse)
-        )
-
-        return template
\ No newline at end of file
diff --git a/ontoaligner/aligner/llm/models.py b/ontoaligner/aligner/llm/models.py
index bde3e3f..61eff58 100644
--- a/ontoaligner/aligner/llm/models.py
+++ b/ontoaligner/aligner/llm/models.py
@@ -20,7 +20,7 @@
 from transformers import (AutoTokenizer, AutoModelForCausalLM, T5Tokenizer, T5ForConditionalGeneration)
 
 from .llm import EncoderDecoderLLMArch, DecoderLLMArch, OpenAILLMArch
-
+from ..propmatch import PropertyLLMDataset, PropertyFullTextLLMDataset
 
 class FlanT5LEncoderDecoderLM(EncoderDecoderLLMArch):
     """
diff --git a/ontoaligner/aligner/propmatch/__init__.py b/ontoaligner/aligner/propmatch/__init__.py
index 0d49dab..9822f86 100644
--- a/ontoaligner/aligner/propmatch/__init__.py
+++ b/ontoaligner/aligner/propmatch/__init__.py
@@ -12,3 +12,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from .propmatch import * # NOQA
+from .rag import *
+from .llm import *
\ No newline at end of file
diff --git a/ontoaligner/aligner/propmatch/llm.py b/ontoaligner/aligner/propmatch/llm.py
new file mode 100644
index 0000000..f9c7ad1
--- /dev/null
+++ b/ontoaligner/aligner/propmatch/llm.py
@@ -0,0 +1,285 @@
+# Copyright 2025 Scientific Knowledge Organization (SciKnowOrg) Research Group.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+This script defines custom dataset classes for property-level language model (LLM) ontology matching tasks.
+These datasets preprocess source and target ontology properties and format them into structured prompts for a language model,
+with variations on how much property information is included, such as labels only or full metadata with domain, range, and inverse-property context.
+
+Classes:
+    - BasePropertyLLMDataset: The base class for creating property-level LLM datasets from source and target ontology properties.
+    - PropertyLLMDataset: A subclass of BasePropertyLLMDataset that creates prompts using only source and target property labels.
+    - PropertyFullTextLLMDataset: A subclass of BasePropertyLLMDataset that creates prompts using labels, domain, range, and inverse-property information.
+"""
+
+from typing import Any, Dict, List
+
+from torch.utils.data import Dataset
+
+
+class BasePropertyLLMDataset(Dataset):
+    """
+    Base dataset class for property-level LLM ontology matching.
+
+    This class creates all possible source-target property pairs from two ontology
+    property collections. It also provides shared helper methods for text
+    preprocessing, field extraction, sample formatting, and batch collation.
+
+    Attributes:
+        prompt: Prompt template used by subclasses.
+        data: List of source-target property-pair dictionaries.
+        len: Number of source-target property pairs in the dataset.
+    """
+
+    prompt: str = None
+
+    def __init__(self, source_onto: Any, target_onto: Any) -> None:
+        """
+        Initialize the dataset from source and target ontology properties.
+
+        Args:
+            source_onto: Iterable containing source ontology property dictionaries.
+            target_onto: Iterable containing target ontology property dictionaries.
+        """
+        self.data = []
+
+        for source in source_onto:
+            for target in target_onto:
+                self.data.append({
+                    "source": source,
+                    "target": target,
+                })
+
+        self.len = len(self.data)
+
+    def preprocess(self, text: Any) -> str:
+        """
+        Normalize text before inserting it into a prompt.
+
+        The method converts the input value to a string, replaces underscores with
+        spaces, lowercases the text, and converts None values to an empty string.
+
+        Args:
+            text: Text or value to normalize.
+
+        Returns:
+            Normalized text as a string.
+        """
+        if text is None:
+            return ""
+
+        text = str(text)
+        text = text.replace("_", " ")
+        text = text.lower()
+        return text
+
+    def join_text_list(self, value: Any) -> str:
+        """
+        Convert a text value or list of text values into a single string.
+
+        Args:
+            value: A text value, list of text values, or None.
+
+        Returns:
+            A single string. Lists are joined with spaces, and None is converted
+            to an empty string.
+        """
+        if value is None:
+            return ""
+
+        if isinstance(value, list):
+            return " ".join(str(v) for v in value)
+
+        return str(value)
+
+    def get_text(self, item: Dict, key: str) -> str:
+        """
+        Extract and normalize a text field from an ontology property item.
+
+        Args:
+            item: Ontology property dictionary.
+            key: Dictionary key to extract.
+
+        Returns:
+            Normalized text for the requested field.
+        """
+        return self.preprocess(self.join_text_list(item.get(key, "")))
+
+    def __getitem__(self, index: int) -> Dict:
+        """
+        Return one formatted dataset sample.
+
+        Args:
+            index: Index of the source-target property pair.
+
+        Returns:
+            Dictionary containing the generated prompt and the corresponding
+            source and target property IRIs.
+        """
+        sample = self.data[index]
+
+        return {
+            "prompts": self.fill_one_sample(sample),
+            "iris": [
+                sample["source"]["iri"],
+                sample["target"]["iri"],
+            ],
+        }
+
+    def __len__(self) -> int:
+        """
+        Return the number of source-target property pairs.
+
+        Returns:
+            Dataset length.
+        """
+        return self.len
+
+    def fill_one_sample(self, input_data: Any) -> str:
+        """
+        Convert one source-target property pair into a prompt.
+
+        Subclasses must override this method to define the specific prompt format.
+
+        Args:
+            input_data: Dictionary containing source and target property data.
+
+        Raises:
+            NotImplementedError: This method must be implemented by subclasses.
+        """
+        raise NotImplementedError
+
+    def collate_fn(self, batchs: List[Dict]) -> Dict:
+        """
+        Collate multiple dataset samples into a batch.
+
+        Args:
+            batchs: List of samples returned by __getitem__.
+
+        Returns:
+            Dictionary containing batched prompts and source-target IRI pairs.
+        """
+        batchs_clear = {
+            "prompts": [],
+            "iris": [],
+        }
+
+        for batch in batchs:
+            batchs_clear["prompts"].append(batch["prompts"])
+            batchs_clear["iris"].append(batch["iris"])
+
+        return batchs_clear
+
+
+class PropertyLLMDataset(BasePropertyLLMDataset):
+    """
+    Dataset class for label-only property-level LLM ontology matching.
+
+    This class creates prompts that compare two ontology properties using only
+    their labels and asks the model whether they represent the same semantic relation.
+    """
+
+    prompt = """Determine whether the following two ontology properties represent the same semantic relation. Respond with "yes" or "no" only.
+### Property 1:
+{source}
+### Property 2:
+{target}
+### Your Answer:"""
+
+    def fill_one_sample(self, input_data: Any) -> str:
+        """
+        Build a label-only prompt for one source-target property pair.
+
+        Args:
+            input_data: Dictionary containing source and target property dictionaries.
+
+        Returns:
+            Formatted prompt string for the property pair.
+        """
+        source = self.preprocess(input_data["source"].get("label", ""))
+        target = self.preprocess(input_data["target"].get("label", ""))
+
+        return (
+            self.prompt
+            .replace("{source}", source)
+            .replace("{target}", target)
+        )
+
+
+class PropertyFullTextLLMDataset(BasePropertyLLMDataset):
+    """
+    Dataset class for metadata-rich property-level LLM ontology matching.
+
+    This class creates prompts that compare two ontology properties using their
+    labels, domain information, range information, and inverse-property labels
+    when available.
+    """
+
+    prompt = """Determine whether the following two ontology properties represent the same semantic relation. Respond with "yes" or "no" only.
+### Property 1:
+{source}
+**Domain**: {source_domain}
+**Range**: {source_range}
+**Inverse**: {source_inverse}
+
+### Property 2:
+{target}
+**Domain**: {target_domain}
+**Range**: {target_range}
+**Inverse**: {target_inverse}
+
+### Your Answer:"""
+
+    def fill_one_sample(self, input_data: Any) -> str:
+        """
+        Build a metadata-rich prompt for one source-target property pair.
+
+        Args:
+            input_data: Dictionary containing source and target property dictionaries,
+                including optional domain, range, and inverse-property metadata.
+
+        Returns:
+            Formatted prompt string for the property pair.
+        """
+        source_item = input_data["source"]
+        target_item = input_data["target"]
+
+        source = self.preprocess(source_item.get("label", ""))
+        target = self.preprocess(target_item.get("label", ""))
+
+        source_domain = self.get_text(source_item, "domain_text")
+        target_domain = self.get_text(target_item, "domain_text")
+
+        source_range = self.get_text(source_item, "range_text")
+        target_range = self.get_text(target_item, "range_text")
+
+        source_inverse = ""
+        if source_item.get("inverse_of"):
+            source_inverse = self.get_text(source_item, "inverse_label")
+
+        target_inverse = ""
+        if target_item.get("inverse_of"):
+            target_inverse = self.get_text(target_item, "inverse_label")
+
+        return (
+            self.prompt
+            .replace("{source}", source)
+            .replace("{target}", target)
+            .replace("{source_domain}", source_domain)
+            .replace("{target_domain}", target_domain)
+            .replace("{source_range}", source_range)
+            .replace("{target_range}", target_range)
+            .replace("{source_inverse}", source_inverse)
+            .replace("{target_inverse}", target_inverse)
+        )
\ No newline at end of file
diff --git a/ontoaligner/aligner/propmatch/rag.py b/ontoaligner/aligner/propmatch/rag.py
new file mode 100644
index 0000000..cf304f0
--- /dev/null
+++ b/ontoaligner/aligner/propmatch/rag.py
@@ -0,0 +1,298 @@
+# Copyright 2025 Scientific Knowledge Organization (SciKnowOrg) Research Group.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This script defines custom dataset classes for property-level retrieval-augmented generation (RAG) ontology matching tasks.
+These datasets preprocess source and target ontology properties and format them into structured prompts for a language model,
+with variations on how much property information is included, such as labels only or full metadata with domain, range, and inverse-property context.
+
+Classes:
+    - BasePropertyRAGDataset: The base class for creating property-level RAG datasets from source-target property pairs.
+    - PropertyRAGDataset: A subclass of BasePropertyRAGDataset that creates prompts using only source and target property labels.
+    - PropertyFullTextRAGDataset: A subclass of BasePropertyRAGDataset that creates prompts using labels, domain, range, and inverse-property information.
+"""
+
+from typing import Any, Dict, List
+
+from torch.utils.data import Dataset
+
+class BasePropertyRAGDataset(Dataset):
+    """Base dataset for property-level RAG ontology matching.
+
+    This class prepares source-target property pairs and provides shared helper
+    methods for text normalization, field extraction, batching, and indexing.
+    Subclasses must implement :meth:`fill_one_sample` to convert each property
+    pair into a model prompt.
+
+    Attributes:
+        prompt: Prompt template used by subclasses.
+        data: List of source-target property-pair dictionaries.
+        len: Number of property pairs in the dataset.
+    """
+
+    prompt: str = None
+
+    def __init__(
+        self,
+        data: Any = None,
+        source_onto: Any = None,
+        target_onto: Any = None,
+    ) -> None:
+        """Initialize the dataset from explicit pairs or ontology collections.
+
+        Args:
+            data: Optional precomputed list of source-target property pairs.
+            source_onto: Optional iterable of source ontology properties.
+            target_onto: Optional iterable of target ontology properties.
+
+        Raises:
+            ValueError: If neither ``data`` nor both ``source_onto`` and
+                ``target_onto`` are provided.
+        """
+        if data is not None:
+            self.data = data
+
+        elif source_onto is not None and target_onto is not None:
+            self.data = []
+
+            for source in source_onto:
+                for target in target_onto:
+                    self.data.append(
+                        {
+                            "source": source,
+                            "target": target,
+                        }
+                    )
+
+        else:
+            raise ValueError(
+                "BasePropertyRAGDataset requires either data=... or source_onto=... and target_onto=..."
+            )
+
+        self.len = len(self.data)
+
+    def preprocess(self, text: Any) -> str:
+        """Normalize text for prompt construction.
+
+        The method converts input values to strings, replaces underscores with
+        spaces, lowercases the text, and converts ``None`` values to an empty
+        string.
+
+        Args:
+            text: Input text or value to normalize.
+
+        Returns:
+            A normalized string.
+        """
+        if text is None:
+            return ""
+
+        text = str(text)
+        text = text.replace("_", " ")
+        text = text.lower()
+        return text
+
+    def join_text_list(self, value: Any) -> str:
+        """Convert a scalar or list-like text field into a string.
+
+        Args:
+            value: A text value, list of text values, or ``None``.
+
+        Returns:
+            A single string representation of the value. Lists are joined with
+            spaces, and ``None`` is converted to an empty string.
+        """
+        if value is None:
+            return ""
+
+        if isinstance(value, list):
+            return " ".join(str(v) for v in value)
+
+        return str(value)
+
+    def get_text(self, item: Dict, key: str) -> str:
+        """Extract and normalize a text field from an ontology item.
+
+        Args:
+            item: Ontology property dictionary.
+            key: Field name to extract from the dictionary.
+
+        Returns:
+            The normalized field value.
+        """
+        return self.preprocess(self.join_text_list(item.get(key, "")))
+
+    def __getitem__(self, index: int) -> Dict:
+        """Return a single prompt-ready dataset item.
+
+        Args:
+            index: Index of the source-target property pair.
+
+        Returns:
+            A dictionary containing the generated prompt and the corresponding
+            source and target IRIs.
+        """
+        sample = self.data[index]
+
+        return {
+            "prompts": self.fill_one_sample(sample),
+            "iris": [
+                sample["source"]["iri"],
+                sample["target"]["iri"],
+            ],
+        }
+
+    def __len__(self) -> int:
+        """Return the number of property pairs in the dataset.
+
+        Returns:
+            Dataset length.
+        """
+        return self.len
+
+    def fill_one_sample(self, input_data: Any) -> str:
+        """Convert a source-target property pair into a prompt.
+
+        Subclasses must override this method with a concrete prompt generation
+        strategy.
+
+        Args:
+            input_data: Source-target property-pair dictionary.
+
+        Raises:
+            NotImplementedError: Always raised by the base implementation.
+        """
+        raise NotImplementedError
+
+    def collate_fn(self, batchs: List[Dict]) -> Dict:
+        """Collate dataset samples into a batch dictionary.
+
+        Args:
+            batchs: List of dataset samples returned by :meth:`__getitem__`.
+
+        Returns:
+            A dictionary containing batched prompts and source-target IRI pairs.
+        """
+        batchs_clear = {
+            "prompts": [],
+            "iris": [],
+        }
+
+        for batch in batchs:
+            batchs_clear["prompts"].append(batch["prompts"])
+            batchs_clear["iris"].append(batch["iris"])
+
+        return batchs_clear
+
+
+class PropertyRAGDataset(BasePropertyRAGDataset):
+    """Prompt dataset using only source and target property labels.
+
+    This dataset creates binary yes/no prompts that ask whether two ontology
+    properties represent the same semantic relation using only their labels.
+    """
+
+    prompt = """Determine whether the following two ontology properties represent the same semantic relation. Respond with "yes" or "no" only.
+
+### Property 1:
+{source}
+
+### Property 2:
+{target}
+
+### Your Answer:"""
+
+    def fill_one_sample(self, input_data: Any) -> str:
+        """Build a label-only matching prompt for one property pair.
+
+        Args:
+            input_data: Dictionary containing ``source`` and ``target`` property
+                dictionaries.
+
+        Returns:
+            A formatted prompt string for the property pair.
+        """
+        source = self.preprocess(input_data["source"].get("label", ""))
+        target = self.preprocess(input_data["target"].get("label", ""))
+
+        return (
+            self.prompt
+            .replace("{source}", source)
+            .replace("{target}", target)
+        )
+
+
+class PropertyFullTextRAGDataset(BasePropertyRAGDataset):
+    """Prompt dataset using labels and additional property metadata.
+
+    This dataset creates binary yes/no prompts using property labels together
+    with domain, range, and inverse-property information when available.
+    """
+
+    prompt = """Determine whether the following two ontology properties represent the same semantic relation. Respond with "yes" or "no" only.
+
+### Property 1:
+{source}
+**Domain**: {source_domain}
+**Range**: {source_range}
+**Inverse**: {source_inverse}
+
+### Property 2:
+{target}
+**Domain**: {target_domain}
+**Range**: {target_range}
+**Inverse**: {target_inverse}
+
+### Your Answer:"""
+
+    def fill_one_sample(self, input_data: Any) -> str:
+        """Build a metadata-rich matching prompt for one property pair.
+
+        Args:
+            input_data: Dictionary containing ``source`` and ``target`` property
+                dictionaries with optional domain, range, and inverse metadata.
+
+        Returns:
+            A formatted prompt string for the property pair.
+        """
+        source_item = input_data["source"]
+        target_item = input_data["target"]
+
+        source = self.preprocess(source_item.get("label", ""))
+        target = self.preprocess(target_item.get("label", ""))
+
+        source_domain = self.get_text(source_item, "domain_text")
+        target_domain = self.get_text(target_item, "domain_text")
+
+        source_range = self.get_text(source_item, "range_text")
+        target_range = self.get_text(target_item, "range_text")
+
+        source_inverse = ""
+        if source_item.get("inverse_of"):
+            source_inverse = self.get_text(source_item, "inverse_label")
+
+        target_inverse = ""
+        if target_item.get("inverse_of"):
+            target_inverse = self.get_text(target_item, "inverse_label")
+
+        return (
+            self.prompt
+            .replace("{source}", source)
+            .replace("{target}", target)
+            .replace("{source_domain}", source_domain)
+            .replace("{target_domain}", target_domain)
+            .replace("{source_range}", source_range)
+            .replace("{target_range}", target_range)
+            .replace("{source_inverse}", source_inverse)
+            .replace("{target_inverse}", target_inverse)
+        )
\ No newline at end of file
diff --git a/ontoaligner/aligner/rag/dataset.py b/ontoaligner/aligner/rag/dataset.py
index d929155..788377a 100644
--- a/ontoaligner/aligner/rag/dataset.py
+++ b/ontoaligner/aligner/rag/dataset.py
@@ -228,115 +228,3 @@ def fill_one_sample(self, input_data: Any) -> str:
             .replace("{target_children}", target_children)
         )
         return template
-
-class PropertyRAGDataset(RAGDataset):
-    """
-    A subclass of RAGDataset used for ontology property matching using only property labels.
-    """
-
-    prompt = """Classify if two ontology properties represent the same semantic relation or not (answer only yes or no).
-### First property:
-{source}
-### Second property:
-{target}
-### Answer:"""
-
-    def fill_one_sample(self, input_data: Any) -> str:
-        template = self.prompt
-
-        source = self.preprocess(input_data["source"]["label"])
-        target = self.preprocess(input_data["target"]["label"])
-
-        template = (
-            template.replace("{source}", source)
-            .replace("{target}", target)
-        )
-
-        return template
-    
-class PropertyFullTextRAGDataset(RAGDataset):
-    """
-    A subclass of RAGDataset used for ontology property matching using property label,
-    domain, range, and inverse property.
-    """
-
-    prompt = """Classify if two ontology properties represent the same semantic relation or not (answer only yes or no).
-### First property:
-{source}
-Domain: {source_domain}
-Range: {source_range}
-Inverse: {source_inverse}
-
-### Second property:
-{target}
-Domain: {target_domain}
-Range: {target_range}
-Inverse: {target_inverse}
-
-### Answer:"""
-
-    def fill_one_sample(self, input_data: Any) -> str:
-        template = self.prompt
-
-        source = self.preprocess(input_data["source"]["label"])
-        target = self.preprocess(input_data["target"]["label"])
-
-        source_domain = (
-            " ".join(input_data["source"]["domain_text"])
-            if len(input_data["source"]["domain_text"]) > 0
-            else ""
-        )
-
-        target_domain = (
-            " ".join(input_data["target"]["domain_text"])
-            if len(input_data["target"]["domain_text"]) > 0
-            else ""
-        )
-
-        source_range = (
-            " ".join(input_data["source"]["range_text"])
-            if len(input_data["source"]["range_text"]) > 0
-            else ""
-        )
-
-        target_range = (
-            " ".join(input_data["target"]["range_text"])
-            if len(input_data["target"]["range_text"]) > 0
-            else ""
-        )
-
-        source_inverse = ""
-        if input_data["source"]["inverse_of"]:
-            source_inverse = (
-                " ".join(input_data["source"]["inverse_label"])
-                if len(input_data["source"]["inverse_label"]) > 0
-                else ""
-            )
-
-        target_inverse = ""
-        if input_data["target"]["inverse_of"]:
-            target_inverse = (
-                " ".join(input_data["target"]["inverse_label"])
-                if len(input_data["target"]["inverse_label"]) > 0
-                else ""
-            )
-
-        source_domain = self.preprocess(source_domain)
-        target_domain = self.preprocess(target_domain)
-        source_range = self.preprocess(source_range)
-        target_range = self.preprocess(target_range)
-        source_inverse = self.preprocess(source_inverse)
-        target_inverse = self.preprocess(target_inverse)
-
-        template = (
-            template.replace("{source}", source)
-            .replace("{target}", target)
-            .replace("{source_domain}", source_domain)
-            .replace("{target_domain}", target_domain)
-            .replace("{source_range}", source_range)
-            .replace("{target_range}", target_range)
-            .replace("{source_inverse}", source_inverse)
-            .replace("{target_inverse}", target_inverse)
-        )
-
-        return template
\ No newline at end of file
diff --git a/ontoaligner/aligner/rag/rag.py b/ontoaligner/aligner/rag/rag.py
index 78ae2ca..bfc79c8 100644
--- a/ontoaligner/aligner/rag/rag.py
+++ b/ontoaligner/aligner/rag/rag.py
@@ -40,7 +40,7 @@
 from ..llm import DecoderLLMArch, OpenAILLMArch
 from .dataset import * # NOQA
 from ...postprocess import process
-
+from ..propmatch import PropertyRAGDataset, PropertyFullTextRAGDataset
 
 class RAGBasedDecoderLLMArch(DecoderLLMArch):
     """
diff --git a/ontoaligner/encoder/llm.py b/ontoaligner/encoder/llm.py
index 60dbaad..388e02d 100644
--- a/ontoaligner/encoder/llm.py
+++ b/ontoaligner/encoder/llm.py
@@ -157,92 +157,3 @@ def get_owl_items(self, owl: Dict) -> Any:
         """
         parents = ", ".join([parent["label"] for parent in owl["parents"]])
         return {"iri": owl["iri"], "concept": owl["label"], "parents": str(parents)}
-
-class PropertyLLMEncoder(LLMEncoder):
-    """
-    Encodes OWL/RDF items that represent properties.
-
-    This class inherits from the `LLMEncoder` class and is designed to encode OWL/RDF property items.
-    The `get_owl_items` method retrieves the IRI, label, and definition of the property.
-
-    Attributes:
-        items_in_owl (str): Specifies the type of OWL items being encoded, in this case, a Property.
-    """
-    items_in_owl: str = "(Property)"
-
-    def get_owl_items(self, prop: Dict) -> Any:
-        """
-        Extracts the IRI, label, and definition of a property from the given OWL item.
-
-        Parameters:
-            owl (Dict): A dictionary representing an OWL/RDF property item, expected to contain
-                        'iri', 'label', and optionally 'definition' keys.
-
-        Returns:
-            Dict: A dictionary containing the IRI, label, definition, and combined text of the property.
-        """        
-        label = prop.get("label", "")
-
-        combined_text = label
-
-        return {
-            "iri": prop["iri"],
-            "label": label,
-            "text": combined_text,
-        }
-
-class PropertyFullTextLLMEncoder(LLMEncoder):
-    """
-    Encodes OWL/RDF items that represent properties with domain, range, inverse property, and definition.
-
-    This class inherits from the `LLMEncoder` class and is designed to encode OWL/RDF property items.
-    The `get_owl_items` method retrieves the IRI, label, definition, domain, range, and inverse property information.
-
-    Attributes:
-        items_in_owl (str): Specifies the type of OWL items being encoded, in this case,
-                            a Property with Definition, Domain, Range, and Inverse.
-    """
-    items_in_owl: str = "(Property, Domain, Range, Inverse)"
-
-    def get_owl_items(self, prop: Dict) -> Any:
-        label = prop.get("label", "")
-
-        domain_text = (
-            " ".join(prop.get("domain_text", []))
-            if len(prop.get("domain_text", [])) > 0
-            else ""
-        )
-
-        range_text = (
-            " ".join(prop.get("range_text", []))
-            if len(prop.get("range_text", [])) > 0
-            else ""
-        )
-
-        inverse_text = ""
-        if prop.get("inverse_of"):
-            inverse_text = (
-                " ".join(prop.get("inverse_label", []))
-                if len(prop.get("inverse_label", [])) > 0
-                else ""
-            )
-
-        combined_text = label
-
-        if domain_text:
-            combined_text += "  " + domain_text
-
-        if range_text:
-            combined_text += "  " + range_text
-
-        if inverse_text:
-            combined_text += "  inverse: " + inverse_text
-
-        return {
-            "iri": prop["iri"],
-            "label": label,
-            "domain": domain_text,
-            "range": range_text,
-            "inverse": inverse_text,
-            "text": combined_text,
-        }
\ No newline at end of file
diff --git a/ontoaligner/encoder/property.py b/ontoaligner/encoder/property.py
index 6cac3e8..32935a7 100644
--- a/ontoaligner/encoder/property.py
+++ b/ontoaligner/encoder/property.py
@@ -21,7 +21,8 @@
 from typing import Any, Dict
 
 from ..base import BaseEncoder
-
+from .llm import LLMEncoder
+from .rag import RAGEncoder
 
 class PropertyEncoder(BaseEncoder):
     """
@@ -155,3 +156,133 @@ def get_encoder_info(self) -> str:
     def __str__(self):
         """Returns a string representation of the encoder."""
         return {"PropMatchEncoder": self.items_in_owl}
+
+class PropertyRAGEncoder(RAGEncoder):
+    """
+    Encodes OWL/RDF items representing a Property using retrieval-based and language model encoders.
+
+    This class extends the `RAGEncoder` class and is specialized in encoding OWL/RDF items that consist of
+    a Property. The retrieval encoder uses the `PropertyEncoder` class to retrieve the necessary property items,
+    while the language model encoder is set to "PropertyRAGDataset".
+
+    Attributes:
+        items_in_owl (str): Specifies the type of OWL items being encoded, in this case, a Property.
+        retrieval_encoder (Any): The retrieval encoder used for fetching OWL/RDF property items,
+                                 set to `PropertyEncoder`.
+        llm_encoder (str): The language model encoder used, set to "PropertyRAGDataset".
+    """    
+    items_in_owl: str = "(Property)"
+    retrieval_encoder: Any = PropertyEncoder
+    llm_encoder: str = "PropertyRAGDataset"
+
+
+class PropertyFullTextRAGEncoder(RAGEncoder):
+    """
+    Encodes OWL/RDF items representing a Property with its Domain, Range, and Inverse property using
+    retrieval-based and language model encoders.
+
+    This class extends the `RAGEncoder` class and is specialized in encoding OWL/RDF items that consist of
+    a Property, its Domain, Range, and Inverse property information. The retrieval encoder uses the
+    `PropMatchEncoder` class to retrieve the necessary property items, while the language model encoder is
+    set to "PropertyFullTextRAGDataset".
+
+    Attributes:
+        items_in_owl (str): Specifies the type of OWL items being encoded, in this case,
+                            a Property with Domain, Range, and Inverse property.
+        retrieval_encoder (Any): The retrieval encoder used for fetching OWL/RDF property items,
+                                 set to `PropMatchEncoder`.
+        llm_encoder (str): The language model encoder used, set to "PropertyFullTextRAGDataset".
+    """    
+    items_in_owl: str = "(Property, Domain, Range, Inverse)"
+    retrieval_encoder: Any = PropMatchEncoder
+    llm_encoder: str = "PropertyFullTextRAGDataset"
+
+
+class PropertyLLMEncoder(LLMEncoder):
+    """
+    Encodes OWL/RDF items that represent properties.
+
+    This class inherits from the `LLMEncoder` class and is designed to encode OWL/RDF property items.
+    The `get_owl_items` method retrieves the IRI, label, and definition of the property.
+
+    Attributes:
+        items_in_owl (str): Specifies the type of OWL items being encoded, in this case, a Property.
+    """
+    items_in_owl: str = "(Property)"
+
+    def get_owl_items(self, prop: Dict) -> Any:
+        """
+        Extracts the IRI, label, and definition of a property from the given OWL item.
+
+        Parameters:
+            owl (Dict): A dictionary representing an OWL/RDF property item, expected to contain
+                        'iri', 'label', and optionally 'definition' keys.
+
+        Returns:
+            Dict: A dictionary containing the IRI, label, definition, and combined text of the property.
+        """        
+        label = prop.get("label", "")
+
+        combined_text = label
+
+        return {
+            "iri": prop["iri"],
+            "label": label,
+            "text": combined_text,
+        }
+
+class PropertyFullTextLLMEncoder(LLMEncoder):
+    """
+    Encodes OWL/RDF items that represent properties with domain, range, inverse property, and definition.
+
+    This class inherits from the `LLMEncoder` class and is designed to encode OWL/RDF property items.
+    The `get_owl_items` method retrieves the IRI, label, definition, domain, range, and inverse property information.
+
+    Attributes:
+        items_in_owl (str): Specifies the type of OWL items being encoded, in this case,
+                            a Property with Definition, Domain, Range, and Inverse.
+    """
+    items_in_owl: str = "(Property, Domain, Range, Inverse)"
+
+    def get_owl_items(self, prop: Dict) -> Any:
+        label = prop.get("label", "")
+
+        domain_text = (
+            " ".join(prop.get("domain_text", []))
+            if len(prop.get("domain_text", [])) > 0
+            else ""
+        )
+
+        range_text = (
+            " ".join(prop.get("range_text", []))
+            if len(prop.get("range_text", [])) > 0
+            else ""
+        )
+
+        inverse_text = ""
+        if prop.get("inverse_of"):
+            inverse_text = (
+                " ".join(prop.get("inverse_label", []))
+                if len(prop.get("inverse_label", [])) > 0
+                else ""
+            )
+
+        combined_text = label
+
+        if domain_text:
+            combined_text += "  " + domain_text
+
+        if range_text:
+            combined_text += "  " + range_text
+
+        if inverse_text:
+            combined_text += "  inverse: " + inverse_text
+
+        return {
+            "iri": prop["iri"],
+            "label": label,
+            "domain": domain_text,
+            "range": range_text,
+            "inverse": inverse_text,
+            "text": combined_text,
+        }    
\ No newline at end of file
diff --git a/ontoaligner/encoder/rag.py b/ontoaligner/encoder/rag.py
index 7ea7075..899b776 100644
--- a/ontoaligner/encoder/rag.py
+++ b/ontoaligner/encoder/rag.py
@@ -26,7 +26,6 @@
 
 from ..base import BaseEncoder
 from .lightweight import ConceptLightweightEncoder
-from .property import PropertyEncoder, PropMatchEncoder
 
 class RAGEncoder(BaseEncoder):
     """
@@ -287,43 +286,3 @@ def __str__(self):
             dict: A dictionary with the encoder name and items in OWL.
         """
         return f"OLaLaEncoder{self.items_in_owl}"
-
-class PropertyRAGEncoder(RAGEncoder):
-    """
-    Encodes OWL/RDF items representing a Property using retrieval-based and language model encoders.
-
-    This class extends the `RAGEncoder` class and is specialized in encoding OWL/RDF items that consist of
-    a Property. The retrieval encoder uses the `PropertyEncoder` class to retrieve the necessary property items,
-    while the language model encoder is set to "PropertyRAGDataset".
-
-    Attributes:
-        items_in_owl (str): Specifies the type of OWL items being encoded, in this case, a Property.
-        retrieval_encoder (Any): The retrieval encoder used for fetching OWL/RDF property items,
-                                 set to `PropertyEncoder`.
-        llm_encoder (str): The language model encoder used, set to "PropertyRAGDataset".
-    """    
-    items_in_owl: str = "(Property)"
-    retrieval_encoder: Any = PropertyEncoder
-    llm_encoder: str = "PropertyRAGDataset"
-
-
-class PropertyRAGEncoder(RAGEncoder):
-    """
-    Encodes OWL/RDF items representing a Property with its Domain, Range, and Inverse property using
-    retrieval-based and language model encoders.
-
-    This class extends the `RAGEncoder` class and is specialized in encoding OWL/RDF items that consist of
-    a Property, its Domain, Range, and Inverse property information. The retrieval encoder uses the
-    `PropMatchEncoder` class to retrieve the necessary property items, while the language model encoder is
-    set to "PropertyFullTextRAGDataset".
-
-    Attributes:
-        items_in_owl (str): Specifies the type of OWL items being encoded, in this case,
-                            a Property with Domain, Range, and Inverse property.
-        retrieval_encoder (Any): The retrieval encoder used for fetching OWL/RDF property items,
-                                 set to `PropMatchEncoder`.
-        llm_encoder (str): The language model encoder used, set to "PropertyFullTextRAGDataset".
-    """    
-    items_in_owl: str = "(Property, Domain, Range, Inverse)"
-    retrieval_encoder: Any = PropMatchEncoder
-    llm_encoder: str = "PropertyFullTextRAGDataset"
\ No newline at end of file

From 6d7e5f20c32f9c3e021781d51b7f2658ed744bfc Mon Sep 17 00:00:00 2001
From: KrishnaRani <thakurkrishnarani@gmail.com>
Date: Sat, 30 May 2026 00:28:59 +0200
Subject: [PATCH 3/3] propMatch Aligner extension

---
 ontoaligner/aligner/rag/dataset.py | 1 +
 ontoaligner/encoder/rag.py         | 1 +
 2 files changed, 2 insertions(+)

diff --git a/ontoaligner/aligner/rag/dataset.py b/ontoaligner/aligner/rag/dataset.py
index 788377a..135a745 100644
--- a/ontoaligner/aligner/rag/dataset.py
+++ b/ontoaligner/aligner/rag/dataset.py
@@ -26,6 +26,7 @@
 
 from torch.utils.data import Dataset
 
+
 class RAGDataset(Dataset):
     """
     A base dataset class for handling real-world entity classification tasks. This class preprocesses data and formats it into
diff --git a/ontoaligner/encoder/rag.py b/ontoaligner/encoder/rag.py
index 899b776..4c42c37 100644
--- a/ontoaligner/encoder/rag.py
+++ b/ontoaligner/encoder/rag.py
@@ -27,6 +27,7 @@
 from ..base import BaseEncoder
 from .lightweight import ConceptLightweightEncoder
 
+
 class RAGEncoder(BaseEncoder):
     """
     A retrieval-augmented generation (RAG) encoder for ontology mapping.