From b2537c6f9848e18b89895dad6b29c1af69be6f55 Mon Sep 17 00:00:00 2001 From: jairomelo Date: Mon, 14 Jul 2025 22:48:31 -0400 Subject: [PATCH 1/8] minor fix: right url for Documentation --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3ed6d1e..dc0eaed 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ ![CI](https://github.com/jairomelo/GeoResolver/actions/workflows/ci.yml/badge.svg) ![License](https://img.shields.io/pypi/l/georesolver) ![Downloads](https://static.pepy.tech/badge/georesolver) -[![Documentation](https://img.shields.io/badge/docs-online-blue)](https://jairomelo.com/Georesolver/) +[![Documentation](https://img.shields.io/badge/docs-online-blue)](http://jairomelo.com/GeoResolver/) [![Issues](https://img.shields.io/github/issues/jairomelo/Georesolver)](https://github.com/jairomelo/Georesolver/issues) From 8a4070be7a38c69a716ef0752bdff49141f6ca13 Mon Sep 17 00:00:00 2001 From: jairomelo Date: Tue, 26 May 2026 15:01:57 -0700 Subject: [PATCH 2/8] fix: update version to 0.2.3 and correct documentation URL in pyproject.toml --- CHANGELOG.md | 5 +++++ pyproject.toml | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 11c704d..649a57b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,11 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [v0.2.3] - 2026-05-26 +- Fixed documentation url in `pyproject.toml` [Fix issue [#4](https://github.com/jairomelo/GeoResolver/issues/4)] + +--- + ## [v0.2.2] - 2025-07-14 ### Added diff --git a/pyproject.toml b/pyproject.toml index 9d433c6..2a7496d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "georesolver" -version = "0.2.2" +version = "0.2.3" description = "Multi-source place name to coordinates resolver using TGN, WHG, GeoNames, and Wikidata" authors = [ {name="Jairo Antonio Melo Florez", email="jairoantoniomelo@gmail.com"} @@ -51,7 +51,7 @@ classifiers = [ [project.urls] Homepage = "https://github.com/jairomelo/Georesolver" Issues = "https://github.com/jairomelo/Georesolver/issues" -Documentation = "https://jairomelo.com/Georesolver/" +Documentation = "https://jairomelo.com/GeoResolver/georesolver.html" [tool.setuptools.package-data] "georesolver" = ["data/mappings/places_map.json"] From f56d4dabced8a1e8f1c557d03498cbb9746b335c Mon Sep 17 00:00:00 2001 From: jairomelo Date: Tue, 26 May 2026 15:04:45 -0700 Subject: [PATCH 3/8] fix: update documentation URL in README.md to point to the correct path --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index dc0eaed..4b3fb17 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ ![CI](https://github.com/jairomelo/GeoResolver/actions/workflows/ci.yml/badge.svg) ![License](https://img.shields.io/pypi/l/georesolver) ![Downloads](https://static.pepy.tech/badge/georesolver) -[![Documentation](https://img.shields.io/badge/docs-online-blue)](http://jairomelo.com/GeoResolver/) +[![Documentation](https://img.shields.io/badge/docs-online-blue)](http://jairomelo.com/GeoResolver/georesolver.html) [![Issues](https://img.shields.io/github/issues/jairomelo/Georesolver)](https://github.com/jairomelo/Georesolver/issues) From 0e7d79521bbd103ed5b3d1a4b9b11b728b7893f6 Mon Sep 17 00:00:00 2001 From: jairomelo Date: Tue, 26 May 2026 15:25:44 -0700 Subject: [PATCH 4/8] fix: enhance BaseQuery to use session with custom headers and retry logic --- src/georesolver/base.py | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/src/georesolver/base.py b/src/georesolver/base.py index 6717a4a..beeb1fe 100644 --- a/src/georesolver/base.py +++ b/src/georesolver/base.py @@ -3,6 +3,9 @@ from ratelimit import limits, sleep_and_retry import requests import requests_cache +import os +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry from georesolver.utils.LoggerHandler import setup_logger class BaseQuery(ABC): @@ -24,6 +27,27 @@ def __init__( self.base_url = base_url.rstrip("/") self.calls, self.period = rate_limit + # A non-default User-Agent is required by some services (e.g., Wikidata/WHG). + custom_ua = os.getenv("GEORESOLVER_USER_AGENT", "georesolver/0.2 (+https://pypi.org/project/georesolver)") + self.default_headers = { + "User-Agent": custom_ua, + "Accept": "application/json", + "Accept-Language": "en-US,en;q=0.9" + } + + self.session = requests.Session() + self.session.headers.update(self.default_headers) + + retry = Retry( + total=3, + backoff_factor=0.5, + status_forcelist=[429, 500, 502, 503, 504], + allowed_methods={"GET"} + ) + adapter = HTTPAdapter(max_retries=retry) + self.session.mount("https://", adapter) + self.session.mount("http://", adapter) + if enable_cache: requests_cache.install_cache(cache_name, expire_after=cache_expiry) self.logger.info(f"Installed cache '{cache_name}' (expires after {cache_expiry}s)") @@ -32,13 +56,18 @@ def __init__( @limits(calls=30, period=1) def _limited_get(self, url: str, - params: Optional[Dict[str, Any]] = None) -> requests.Response: + params: Optional[Dict[str, Any]] = None, + headers: Optional[Dict[str, str]] = None, + timeout: int = 20) -> requests.Response: """ Internal method to perform a GET request with rate limiting. """ full_url = f"{self.base_url}{url}" if not url.startswith("http") else url try: - response = requests.get(full_url, params=params) + merged_headers = self.default_headers.copy() + if headers: + merged_headers.update(headers) + response = self.session.get(full_url, params=params, headers=merged_headers, timeout=timeout) response.raise_for_status() if getattr(response, "from_cache", False): self.logger.info(f"[CACHE HIT] {response.url}") From e7fa2d1a7096c4ecc27d92d653767311064e823a Mon Sep 17 00:00:00 2001 From: jairomelo Date: Tue, 26 May 2026 15:26:24 -0700 Subject: [PATCH 5/8] fix: implement customizable User-Agent for API requests in BaseQuery --- README.md | 15 +++++++++++++++ src/georesolver/base.py | 18 +++++++++++++++++- 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 4b3fb17..60e7122 100644 --- a/README.md +++ b/README.md @@ -265,6 +265,21 @@ Each service-specific list should contain valid place type codes or labels expec This library queries the Wikidata MediaWiki API via the endpoint: `https://www.wikidata.org/w/api.php` +### Request identification (recommended) + +Some public APIs (including Wikidata and, in some scenarios, WHG) may reject requests sent with the default Python user agent. +GeoResolver sends an identifiable `User-Agent` by default, and you can customize it with environment variables: + +```bash +# Full override +GEORESOLVER_USER_AGENT="georesolver/0.2 (+https://your-project.example; contact: you@example.org)" + +# Or append contact info to the default GeoResolver user agent +GEORESOLVER_USER_AGENT_CONTACT="you@example.org" +``` + +For production pipelines, it is recommended to provide a contact email or URL to align with service policies and simplify troubleshooting with providers. + It does not use the SPARQL endpoint (`https://query.wikidata.org/sparql`), as this approach is faster and more reliable for simple place lookups. The library performs entity searches by name and retrieves coordinates, country (P17), and administrative data from the entity information. **Enhanced in v0.2.0**: WikidataQuery now provides better country and administrative entity data retrieval, with improved matching against the BaseQuery interface for consistency across all services. diff --git a/src/georesolver/base.py b/src/georesolver/base.py index beeb1fe..27ed66a 100644 --- a/src/georesolver/base.py +++ b/src/georesolver/base.py @@ -14,6 +14,22 @@ class BaseQuery(ABC): Handles caching, rate limiting, and basic GET requests. """ + @staticmethod + def _build_default_user_agent() -> str: + """ + Build an identifiable User-Agent required by some public APIs. + Users can fully override it with GEORESOLVER_USER_AGENT. + """ + configured_ua = os.getenv("GEORESOLVER_USER_AGENT") + if configured_ua: + return configured_ua + + contact = os.getenv("GEORESOLVER_USER_AGENT_CONTACT", "").strip() + base_ua = "georesolver/0.2 (+https://pypi.org/project/georesolver)" + if contact: + return f"{base_ua}; contact: {contact}" + return base_ua + def __init__( self, base_url: str, @@ -28,7 +44,7 @@ def __init__( self.calls, self.period = rate_limit # A non-default User-Agent is required by some services (e.g., Wikidata/WHG). - custom_ua = os.getenv("GEORESOLVER_USER_AGENT", "georesolver/0.2 (+https://pypi.org/project/georesolver)") + custom_ua = self._build_default_user_agent() self.default_headers = { "User-Agent": custom_ua, "Accept": "application/json", From d999100719548da3859c4efbcce1f2fa444a5690 Mon Sep 17 00:00:00 2001 From: jairomelo Date: Tue, 26 May 2026 15:40:28 -0700 Subject: [PATCH 6/8] fix: add example environment variables for configuration --- .env.example | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .env.example diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..2f46b09 --- /dev/null +++ b/.env.example @@ -0,0 +1,3 @@ +GEONAMES_USERNAME="your-geonames-username--do-not-use-demo" +GEORESOLVER_USER_AGENT="georesolver/0.2 (+https://your-project.example; contact: you@example.org)" +GEORESOLVER_USER_AGENT_CONTACT="you@example.org" \ No newline at end of file From 26cb5b1ecde4942b8641f8bbc2f45d4e26e1ba3e Mon Sep 17 00:00:00 2001 From: jairomelo Date: Tue, 26 May 2026 15:41:13 -0700 Subject: [PATCH 7/8] fix: update changelog for version 0.2.4 with new features and enhancements --- CHANGELOG.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 649a57b..6f4ca83 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,8 +5,16 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [v0.2.3] - 2026-05-26 +## [v0.2.4] - 2026-05-26 - Fixed documentation url in `pyproject.toml` [Fix issue [#4](https://github.com/jairomelo/GeoResolver/issues/4)] +- Added a shared requests Session with default headers: + - Custom User-Agent + - Accept: application/json + - Accept-Language +- Added a conservative retry adapter for transient statuses (429/5xx). +- Routed BaseQuery HTTP calls through that session. +- Added timeout and optional per-call header override support in the shared GET helper. +- Added a configurable User-Agent builder. --- From dadc357b0c2d55818e4c2329d106b665a7ad4e7a Mon Sep 17 00:00:00 2001 From: jairomelo Date: Tue, 26 May 2026 15:46:23 -0700 Subject: [PATCH 8/8] fix: update version to 0.2.4 in pyproject.toml and changelog --- CHANGELOG.md | 3 ++- pyproject.toml | 5 ++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6f4ca83..39a964a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -101,5 +101,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 Previous stable release. See git history for details of earlier versions. -[Unreleased]: https://github.com/jairomelo/georesolver/compare/v0.1.4...HEAD +[Unreleased]: https://github.com/jairomelo/georesolver/compare/v0.2.4...HEAD +[v0.2.4]: https://github.com/jairomelo/georesolver/releases/tag/v0.2.4 [v0.1.4]: https://github.com/jairomelo/georesolver/releases/tag/v0.1.4 diff --git a/pyproject.toml b/pyproject.toml index 2a7496d..376023a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,13 +4,13 @@ build-backend = "setuptools.build_meta" [project] name = "georesolver" -version = "0.2.3" +version = "0.2.4" description = "Multi-source place name to coordinates resolver using TGN, WHG, GeoNames, and Wikidata" authors = [ {name="Jairo Antonio Melo Florez", email="jairoantoniomelo@gmail.com"} ] readme = {file = "README.md", content-type = "text/markdown"} -license = {text = "GPL-3.0-only"} +license = "GPL-3.0-only" requires-python = ">=3.9" dependencies = [ "SPARQLWrapper~=2.0.0", @@ -34,7 +34,6 @@ keywords = [ classifiers = [ "Development Status :: 4 - Beta", "Intended Audience :: Developers", - "License :: OSI Approved :: MIT License", "Natural Language :: English", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.9",