|
1 | 1 | """ |
2 | | -Scraping jobs given job title and location from indeed website |
| 2 | +Scraping jobs given job title and location from Indeed website |
3 | 3 | """ |
4 | 4 |
|
5 | 5 | # /// script |
|
11 | 11 | # /// |
12 | 12 |
|
13 | 13 | from __future__ import annotations |
14 | | - |
15 | 14 | from collections.abc import Generator |
16 | | - |
17 | 15 | import httpx |
18 | 16 | from bs4 import BeautifulSoup |
19 | 17 |
|
20 | | -url = "https://www.indeed.co.in/jobs?q=mobile+app+development&l=" |
| 18 | +BASE_URL = "https://www.indeed.co.in/jobs" |
| 19 | + |
| 20 | + |
| 21 | +def fetch_jobs( |
| 22 | + job_title: str = "mobile app development", location: str = "mumbai" |
| 23 | +) -> Generator[tuple[str, str], None, None]: |
| 24 | + """ |
| 25 | + Scrape job postings from Indeed for a given job title and location. |
| 26 | +
|
| 27 | + Args: |
| 28 | + job_title: Keywords to search for (default: "mobile app development"). |
| 29 | + location: City or region to search jobs in (default: "mumbai"). |
| 30 | +
|
| 31 | + Yields: |
| 32 | + Tuples of (job title, company name). |
| 33 | +
|
| 34 | + Example: |
| 35 | + >>> jobs = list(fetch_jobs("python developer", "Bangalore")) |
| 36 | + >>> isinstance(jobs[0], tuple) |
| 37 | + True |
| 38 | + """ |
| 39 | + headers = {"User-Agent": "Mozilla/5.0 (compatible; JobScraper/1.0)"} |
| 40 | + params = {"q": job_title, "l": location} |
| 41 | + |
| 42 | + response = httpx.get(BASE_URL, params=params, headers=headers, timeout=10) |
| 43 | + response.raise_for_status() |
21 | 44 |
|
| 45 | + soup = BeautifulSoup(response.content, "html.parser") |
22 | 46 |
|
23 | | -def fetch_jobs(location: str = "mumbai") -> Generator[tuple[str, str]]: |
24 | | - soup = BeautifulSoup(httpx.get(url + location, timeout=10).content, "html.parser") |
25 | | - # This attribute finds out all the specifics listed in a job |
26 | 47 | for job in soup.find_all("div", attrs={"data-tn-component": "organicJob"}): |
27 | | - job_title = job.find("a", attrs={"data-tn-element": "jobTitle"}).text.strip() |
28 | | - company_name = job.find("span", {"class": "company"}).text.strip() |
29 | | - yield job_title, company_name |
| 48 | + title_tag = job.find("a", attrs={"data-tn-element": "jobTitle"}) |
| 49 | + company_tag = job.find("span", {"class": "company"}) |
| 50 | + if title_tag and company_tag: |
| 51 | + yield title_tag.text.strip(), company_tag.text.strip() |
30 | 52 |
|
31 | 53 |
|
32 | 54 | if __name__ == "__main__": |
33 | | - for i, job in enumerate(fetch_jobs("Bangalore"), 1): |
34 | | - print(f"Job {i:>2} is {job[0]} at {job[1]}") |
| 55 | + for i, (title, company) in enumerate(fetch_jobs("python developer", "Bangalore"), 1): |
| 56 | + print(f"Job {i:>2} is {title} at {company}") |
0 commit comments