Skip to content

Commit e8c3a43

Browse files
author
Tom McCormick
committed
fix file system with env variables to set scheme and net loc if not specified in file path
1 parent c3c314d commit e8c3a43

1 file changed

Lines changed: 20 additions & 6 deletions

File tree

pyiceberg/io/pyarrow.py

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -393,14 +393,28 @@ def __init__(self, properties: Properties = EMPTY_DICT):
393393

394394
@staticmethod
395395
def parse_location(location: str) -> Tuple[str, str, str]:
396-
"""Return the path without the scheme."""
396+
"""Return (scheme, netloc, path) for the given location.
397+
Uses environment variables DEFAULT_SCHEME and DEFAULT_NETLOC
398+
if scheme/netloc are missing.
399+
"""
397400
uri = urlparse(location)
398-
if not uri.scheme:
399-
return "file", uri.netloc, os.path.abspath(location)
400-
elif uri.scheme in ("hdfs", "viewfs"):
401-
return uri.scheme, uri.netloc, uri.path
401+
402+
# Load defaults from environment
403+
default_scheme = os.getenv("DEFAULT_SCHEME", "file")
404+
default_netloc = os.getenv("DEFAULT_NETLOC", "")
405+
406+
# Apply logic
407+
scheme = uri.scheme or default_scheme
408+
netloc = uri.netloc or default_netloc
409+
410+
if scheme in ("hdfs", "viewfs"):
411+
return scheme, netloc, uri.path
402412
else:
403-
return uri.scheme, uri.netloc, f"{uri.netloc}{uri.path}"
413+
# For non-HDFS URIs, include netloc in the path if present
414+
path = uri.path if uri.scheme else os.path.abspath(location)
415+
if netloc and not path.startswith(netloc):
416+
path = f"{netloc}{path}"
417+
return scheme, netloc, path
404418

405419
def _initialize_fs(self, scheme: str, netloc: Optional[str] = None) -> FileSystem:
406420
"""Initialize FileSystem for different scheme."""

0 commit comments

Comments
 (0)