Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 55 additions & 8 deletions backend/ai_brain/llm_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
pass

from openai import OpenAI
import google.generativeai as genai
from google import genai

logger = logging.getLogger(__name__)

Expand All @@ -37,9 +37,8 @@ def __init__(self, provider="openai"):
http_client=httpx.Client()
)
elif self.provider == "gemini" and self.gemini_key:
genai.configure(api_key=self.gemini_key)
# gemini-2.0-flash is available for this key
self.model = genai.GenerativeModel('gemini-2.0-flash')
self.client = genai.Client(api_key=self.gemini_key)
self.model_name = 'gemini-2.5-flash'
else:
logger.warning("No API Key found for AI Brain. AI features will fail.")

Expand Down Expand Up @@ -133,7 +132,14 @@ def generate_linkedin_comment(self, image_path):
Do not sound like a bot. Be encouraging or ask a relevant question.
"""

response = self.model.generate_content([prompt, img])
try:
response = self.client.models.generate_content(
model=self.model_name,
contents=[prompt, img]
)
except Exception as api_err:
raise api_err

return response.text
except Exception as e:
logger.error(f"Gemini Error: {e}")
Expand Down Expand Up @@ -217,9 +223,10 @@ def solve_form(self, image_path, profile_context):
img = PIL.Image.open(image_path)

# Check for other models if flash fails
# Try standard gemini-pro-vision if flash is 404ing?
# Or gemini-1.5-flash-latest
response = self.model.generate_content([prompt, img])
response = self.client.models.generate_content(
model=self.model_name,
contents=[prompt, img]
)

answer_json_str = response.text.strip()
# Clean JSON markdown
Expand All @@ -235,3 +242,43 @@ def solve_form(self, image_path, profile_context):
return {}

return {} # Fallback

def enhance_prompt(self, raw_prompt):
"""
Takes a raw, sparse user prompt and expands it into a highly descriptive
and structured prompt using the LLM.
"""
system_instructions = (
"You are an expert Prompt Engineer. The user will give you a raw, short, or confusing prompt. "
"Your job is to understand their intent and rewrite it into a highly descriptive, "
"structured, and effective prompt that could be used for an AI agent or LLM.\n"
"Return ONLY the rewritten prompt. Do not include introductory text."
)

if self.provider == "openai":
try:
response = self.client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": system_instructions},
{"role": "user", "content": f"Raw prompt: {raw_prompt}"}
]
)
return response.choices[0].message.content.strip()
except Exception as e:
logger.error(f"OpenAI enhance_prompt failed: {e}")
return raw_prompt

elif self.provider == "gemini":
try:
full_prompt = f"{system_instructions}\n\nRaw prompt: {raw_prompt}"
response = self.client.models.generate_content(
model=self.model_name,
contents=full_prompt
)
return response.text.strip()
except Exception as e:
logger.error(f"Gemini enhance_prompt failed: {e}")
return raw_prompt

return raw_prompt
33 changes: 33 additions & 0 deletions backend/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import time
import os
from dotenv import load_dotenv
from pydantic import BaseModel
import backend.ai_brain.llm_client as llm_client

# Load environment variables
load_dotenv()
Expand Down Expand Up @@ -86,3 +88,34 @@ def emit(self, record):
async def get_logs():
"""Returns the last 50 logs."""
return {"logs": list(reversed(memory_handler.log_records))}

class PromptRequest(BaseModel):
raw_prompt: str

@app.post("/api/save_prompt")
async def save_prompt(request: PromptRequest):
"""
Receives a raw prompt, enhances it via AI Brain,
and saves both to a local markdown file.
"""
raw = request.raw_prompt
logger.info(f"Processing raw prompt: {raw}")

# Init Brain
brain = llm_client.AIBrain(provider='gemini')
enhanced = brain.enhance_prompt(raw)

# Save to file
file_path = os.path.join(os.path.dirname(__file__), "saved_prompts.md")

import datetime
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

with open(file_path, "a", encoding="utf-8") as f:
f.write(f"## Saved on {timestamp}\n")
f.write(f"**Raw Input:** {raw}\n\n")
f.write(f"**Descriptive Prompt:**\n```\n{enhanced}\n```\n\n")
f.write("---\n\n")

logger.info("Prompt successfully enhanced and saved.")
return {"status": "success", "raw": raw, "enhanced": enhanced}
12 changes: 10 additions & 2 deletions backend/appium_service/adb_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def input_text(self, text):
return self._run_command(["shell", "input", "text", escaped_text])

def press_home(self):
return self._run_command(["shell", "input", "keyevent", "KEYWORDS_HOME"])
return self._run_command(["shell", "input", "keyevent", "KEYCODE_HOME"])

def press_back(self):
"""Simulate Back button."""
Expand All @@ -64,7 +64,7 @@ def wake_screen(self):
# Check if screen is on
dump = self._run_command(["shell", "dumpsys", "power"])
if "mWakefulness=Awake" not in dump:
self._run_command(["shell", "input", "keyevent", "KW_POWER"])
self._run_command(["shell", "input", "keyevent", "KEYCODE_POWER"])
self._run_command(["shell", "input", "keyevent", "82"]) # Unlock

def get_screenshot(self, save_path="screen.png"):
Expand All @@ -76,3 +76,11 @@ def get_screenshot(self, save_path="screen.png"):
def start_app(self, package_name):
"""Launches an app via monkey (often more reliable than am start for some apps)."""
self._run_command(["shell", "monkey", "-p", package_name, "-c", "android.intent.category.LAUNCHER", "1"])

def get_clipboard(self):
"""Retrieves text from the Android clipboard."""
# Use am broadcast to get clipboard (depends on version, but usually works on standard builds)
# Fallback: Appium driver.get_clipboard_text() is used in MobileDriverService
# For direct ADB without extra helpers, it's tricky.
# We will use the service's driver in the final implementation.
return self._run_command(["shell", "am", "broadcast", "-a", "com.android.clipboard.GET_TEXT"])
10 changes: 10 additions & 0 deletions backend/appium_service/mobile_driver_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,16 @@ def stop_session(self):
self.discard_recording() # Cleanup if not already done
self.driver.quit()

def get_clipboard_text(self):
"""Returns the text currently on the device clipboard."""
if not self.driver:
self.start_session()
try:
return self.driver.get_clipboard_text()
except Exception as e:
logger.error(f"Failed to get clipboard: {e}")
return None

def find_and_click(self, xpath, timeout=10):
"""Robust click with wait."""
if not self.driver:
Expand Down
86 changes: 82 additions & 4 deletions backend/appium_service/pages/linkedin/feed_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,25 @@ class LinkedInFeedPage(BasePage):
Responsible for: Verifying feed state, scrolling, and basic interaction.
"""

def dismiss_popups(self):
"""Identifies and dismisses blocking popups like Ads or Premium Prompts."""
popups_dismissed = False
popup_id = "com.linkedin.android:id/ad_non_modal_dialog_close_button"
if self.device.click_element_by_id(popup_id):
logger.warning("⚠️ Dismissed Popup via ID.")
popups_dismissed = True
time.sleep(1)

dismiss_texts = ["No thanks", "Not now", "Skip", "Close", "Got it"]
for text in dismiss_texts:
if self.device.click_text(text):
logger.warning(f"⚠️ Dismissed Popup via text '{text}'.")
popups_dismissed = True
time.sleep(1)
break

return popups_dismissed

def ensure_app_open(self):
"""
Ensures the app is open and on the Feed.
Expand All @@ -25,10 +44,7 @@ def ensure_app_open(self):

# 2. Check for Popup
logger.info("Feed not found. Checking for popups...")
popup_id = "com.linkedin.android:id/ad_non_modal_dialog_close_button"
if self.device.click_element_by_id(popup_id):
logger.info("⚠️ Dismissed Popup in Feed Page.")
time.sleep(2)
if self.dismiss_popups():
if self.device.wait_for_text(["Search", "Messaging", "Home"], timeout=5):
return True

Expand All @@ -37,6 +53,8 @@ def ensure_app_open(self):
def scroll_feed(self, swipes=3):
"""Performs scrolling action on the feed."""
logger.info(f"Scrolling feed {swipes} times...")
self.dismiss_popups()

for i in range(swipes):
self.scroll_down()
time.sleep(1.5) # Wait for content to settle
Expand Down Expand Up @@ -126,3 +144,63 @@ def leave_comment(self, comment_text):

logger.warning("Could not find 'Post' or 'Comment' or 'Send' button.")
return False

def get_post_link(self):
"""
Retrieves the sharing link for the current post at the top of the feed.
Returns the URL as a string, or None if failed.
"""
logger.info("Attempting to get post link...")

# 1. Click 'Send'
if not self.device.click_text("Send", timeout=5):
logger.error("Could not find 'Send' button for the post.")
return None

time.sleep(2) # Wait for share menu

# 2. Click 'Copy link'
if not self.device.click_text("Copy link", timeout=5):
logger.error("Could not find 'Copy link' in the share menu.")
# Try to dismiss the menu if it's stuck
self.device.adb.press_back()
return None

# 3. Retrieve from clipboard
time.sleep(1)
link = self.device.get_clipboard_text()

if link and "linkedin.com/posts" in link:
# Clean URL: Strip tracking params (everything after ?)
if "?" in link:
link = link.split("?")[0]

logger.info(f"✅ Extracted post link (cleaned): {link}")
return link
else:
logger.warning(f"Unexpected clipboard content: {link}")
return None

def get_feed_links(self, count=5):
"""
Scans the feed and extracts links for multiple posts.
"""
logger.info(f"Scanning feed for {count} post links...")
links = []
seen = set()

retries = 0
while len(links) < count and retries < count * 2:
link = self.get_post_link()
if link and link not in seen:
links.append(link)
seen.add(link)
logger.info(f"Progress: {len(links)}/{count}")

# Scroll to next post
logger.info("Scrolling to next post...")
self.scroll_down()
time.sleep(2)
retries += 1

return links
34 changes: 26 additions & 8 deletions backend/appium_service/pages/linkedin/job_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,34 @@ class LinkedInJobPage(BasePage):
Responsible for: Navigating to jobs, searching, and filtering.
"""

def go_to_jobs(self):
"""Clicks the 'Jobs' tab. Handles Payment Popup if present."""
logger.info("Navigating to Jobs tab...")
def dismiss_popups(self):
"""Identifies and dismisses blocking popups like Ads or Premium Prompts."""
popups_dismissed = False

# 0. Check for blocking Popups (Payment Problem etc)
# 1. Close Button by ID (e.g. ad_non_modal_dialog_close_button)
popup_id = "com.linkedin.android:id/ad_non_modal_dialog_close_button"
if self.device.click_element_by_id(popup_id):
logger.info("⚠️ Dismissed Payment/Promo Popup.")
logger.warning("⚠️ Dismissed Ad/Promo Popup via ID.")
popups_dismissed = True
time.sleep(1)

if self.device.click_text("No thanks"):
logger.info("⚠️ Dismissed Popup via 'No thanks'.")
time.sleep(1)
# 2. Text based dismissals
dismiss_texts = ["No thanks", "Not now", "Skip", "Close", "Got it"]
for text in dismiss_texts:
if self.device.click_text(text):
logger.warning(f"⚠️ Dismissed Popup via text '{text}'.")
popups_dismissed = True
time.sleep(1)
break

return popups_dismissed

def go_to_jobs(self):
"""Clicks the 'Jobs' tab. Handles Payment Popup if present."""
logger.info("Navigating to Jobs tab...")

# 0. Check for blocking Popups
self.dismiss_popups()

# 1. Look for the Jobs tab
# ID is usually stable: com.linkedin.android:id/tab_jobs
Expand All @@ -49,6 +64,8 @@ def search_jobs(self, keyword):
"""
logger.info(f"Searching for jobs: '{keyword}'")

self.dismiss_popups()

# 1. Find Search Entry Point (Top Bar)
# Often "Search jobs" text or ID com.linkedin.android:id/search_bar_text
search_clicked = False
Expand Down Expand Up @@ -92,6 +109,7 @@ def filter_easy_apply(self):
Clicks the 'Easy Apply' filter button.
"""
logger.info("Filtering for Easy Apply...")
self.dismiss_popups()

# Method: Look for "Easy Apply" text in the horizontal scroll view of filters
# It usually appears at the top after search.
Expand Down
Loading