Skip to content

Commit 6a12e34

Browse files
Refactor fetch_jobs function to include job_title parameter
Updated the fetch_jobs function to accept job_title as an argument and improved the docstring for clarity.
1 parent c0ad5bb commit 6a12e34

1 file changed

Lines changed: 34 additions & 12 deletions

File tree

web_programming/fetch_jobs.py

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""
2-
Scraping jobs given job title and location from indeed website
2+
Scraping jobs given job title and location from Indeed website
33
"""
44

55
# /// script
@@ -11,24 +11,46 @@
1111
# ///
1212

1313
from __future__ import annotations
14-
1514
from collections.abc import Generator
16-
1715
import httpx
1816
from bs4 import BeautifulSoup
1917

20-
url = "https://www.indeed.co.in/jobs?q=mobile+app+development&l="
18+
BASE_URL = "https://www.indeed.co.in/jobs"
19+
20+
21+
def fetch_jobs(
22+
job_title: str = "mobile app development", location: str = "mumbai"
23+
) -> Generator[tuple[str, str], None, None]:
24+
"""
25+
Scrape job postings from Indeed for a given job title and location.
26+
27+
Args:
28+
job_title: Keywords to search for (default: "mobile app development").
29+
location: City or region to search jobs in (default: "mumbai").
30+
31+
Yields:
32+
Tuples of (job title, company name).
33+
34+
Example:
35+
>>> jobs = list(fetch_jobs("python developer", "Bangalore"))
36+
>>> isinstance(jobs[0], tuple)
37+
True
38+
"""
39+
headers = {"User-Agent": "Mozilla/5.0 (compatible; JobScraper/1.0)"}
40+
params = {"q": job_title, "l": location}
41+
42+
response = httpx.get(BASE_URL, params=params, headers=headers, timeout=10)
43+
response.raise_for_status()
2144

45+
soup = BeautifulSoup(response.content, "html.parser")
2246

23-
def fetch_jobs(location: str = "mumbai") -> Generator[tuple[str, str]]:
24-
soup = BeautifulSoup(httpx.get(url + location, timeout=10).content, "html.parser")
25-
# This attribute finds out all the specifics listed in a job
2647
for job in soup.find_all("div", attrs={"data-tn-component": "organicJob"}):
27-
job_title = job.find("a", attrs={"data-tn-element": "jobTitle"}).text.strip()
28-
company_name = job.find("span", {"class": "company"}).text.strip()
29-
yield job_title, company_name
48+
title_tag = job.find("a", attrs={"data-tn-element": "jobTitle"})
49+
company_tag = job.find("span", {"class": "company"})
50+
if title_tag and company_tag:
51+
yield title_tag.text.strip(), company_tag.text.strip()
3052

3153

3254
if __name__ == "__main__":
33-
for i, job in enumerate(fetch_jobs("Bangalore"), 1):
34-
print(f"Job {i:>2} is {job[0]} at {job[1]}")
55+
for i, (title, company) in enumerate(fetch_jobs("python developer", "Bangalore"), 1):
56+
print(f"Job {i:>2} is {title} at {company}")

0 commit comments

Comments
 (0)