Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/python-app.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ name: Python FastAPI CI

on:
push:
branches: [ main ]
branches: [ release ]
pull_request:
branches: [ main ]
branches: [ release ]

jobs:
build:
Expand Down
64 changes: 56 additions & 8 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
import uvicorn
from mongodb import users_collection
from typing import Optional, List
from rank_bm25 import BM25Okapi
import re
from string import punctuation

app = FastAPI()

Expand All @@ -13,6 +16,20 @@ class InputData(BaseModel):
user_ID: str
context: Optional[List[str]] = None

# Define a simple English stopword list
STOPWORDS = set([
'i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', 'your', 'yours', 'yourself', 'yourselves',
'he', 'him', 'his', 'himself', 'she', 'her', 'hers', 'herself', 'it', 'its', 'itself', 'they', 'them', 'their',
'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', 'these', 'those', 'am', 'is', 'are',
'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an',
'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about',
'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up',
'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when',
'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no',
'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'don',
'should', 'now'
])

@app.get("/")
def read_root():
return {"message": "Hello, world!"}
Expand All @@ -32,18 +49,49 @@ def run_script():
except Exception as e:
return {"error": str(e)}

# Enhanced regex-based tokenizer with stopword and punctuation removal
def simple_tokenize(text, remove_stopwords=True):
tokens = re.findall(r"\b\w+\b", text.lower())
if remove_stopwords:
filtered = [t for t in tokens if t not in STOPWORDS and t not in punctuation]
else:
filtered = [t for t in tokens if t not in punctuation]
return filtered

@app.post("/ask-ai")
def ask_ai(data: InputData):
try:
docs = list(users_collection.find({"user_id": data.user_ID}).sort("_id", -1).limit(5))
# Fetch a larger window of history
docs = list(users_collection.find({"user_id": data.user_ID}).sort("_id", -1).limit(50))
if docs:
history = []
for doc in docs[::-1]:
history.append({
"user_prompt": doc.get("user_promt", ""),
"AI": doc.get("AI", "")
})
print("Last five history loaded from MongoDB:", history)
# Prepare documents for BM25
history_texts = [
f"{doc.get('user_promt', '')} {doc.get('AI', '')}" for doc in docs
]
tokenized_corpus = [simple_tokenize(text, remove_stopwords=True) for text in history_texts]
tokenized_query = simple_tokenize(data.query, remove_stopwords=False)
print("Query:", data.query)
# print("Tokenized query:", tokenized_query)
# print("History texts:", history_texts)
# print("Tokenized corpus:", tokenized_corpus)
# Check for empty query or empty corpus
if not tokenized_query or not any(tokenized_corpus):
history = []
print("BM25 skipped: empty query or corpus.")
else:
bm25 = BM25Okapi(tokenized_corpus)
scores = bm25.get_scores(tokenized_query)
top_n = 5
top_indices = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:top_n]
# Select the most relevant history
relevant_docs = [docs[i] for i in top_indices]
history = []
for doc in relevant_docs:
history.append({
"user_prompt": doc.get("user_promt", ""),
"AI": doc.get("AI", "")
})
print("Top 5 relevant history loaded from MongoDB using BM25:", history)
else:
history = []

Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@ fastapi
uvicorn
python-dotenv
google-generativeai
pymongo
pymongo
rank_bm25