From 33867ab2bd974cb3847eb9fcf701068a4e519b04 Mon Sep 17 00:00:00 2001 From: Aldrich_CC <109075336+Chen17-sq@users.noreply.github.com> Date: Mon, 27 Apr 2026 19:53:11 +0800 Subject: [PATCH] fix(security): validate file paths in /api/documents/upload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The /api/documents/upload endpoint accepted any absolute path on the host and forwarded the file's contents to the configured VLM / embedding provider for processing. Combined with api_auth being disabled by default, any process that could reach 127.0.0.1:1733 — for example a browser extension with host_permissions, a malicious npm/pip dependency, or another container sharing the host network — could exfiltrate arbitrary user files (SSH keys, cloud credentials, etc.) by submitting their paths and letting the document pipeline ship the contents to the cloud LLM endpoint. This change rejects upload paths that are not under an allowed root, with a defense-in-depth deny list for clearly sensitive locations regardless of configuration: * Allow-list sources, in order: 1. capture.folder_monitor.watch_folder_paths (already opted-in) 2. security.document_upload_allowed_paths (new, explicit extension) 3. Fallback to ~/Documents, ~/Downloads, ~/Desktop when present. * Hard deny: paths whose components include .ssh / .aws / .gnupg / .azure / .gcloud / .kube / .docker / .password-store / Keychains; filenames .env / id_rsa / id_ed25519 / id_ecdsa / id_dsa / shadow; and paths under /etc, /proc, /sys, /dev, /root, /var/log, /var/db (and the /private equivalents on macOS). Paths are resolved (with symlinks followed) before checks, so the allow-list cannot be escaped via `..` traversal or symlink tricks. --- config/config.yaml | 17 +++ opencontext/server/context_operations.py | 171 ++++++++++++++++++++++- 2 files changed, 185 insertions(+), 3 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index 4b0984af..5e0ca265 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -209,6 +209,23 @@ api_auth: - "/assistant" - "/vaults" +# Security-related settings +security: + # Allow-list for the /api/documents/upload endpoint. Any file path passed + # to this endpoint must resolve to a location under one of these roots, + # otherwise the upload is rejected. This prevents the endpoint from being + # abused to read arbitrary files (e.g. ~/.ssh/id_rsa) and ship their + # contents to the configured VLM/embedding provider. + # + # Paths under capture.folder_monitor.watch_folder_paths are implicitly + # allowed and do not need to be repeated here. + # + # If both this list and watch_folder_paths are empty, the upload endpoint + # falls back to ~/Documents, ~/Downloads and ~/Desktop (when they exist). + document_upload_allowed_paths: [] + # - "${CONTEXT_PATH:.}/uploads" + # - "~/Documents/MineContext" + # Prompts configuration prompts: language: "zh" diff --git a/opencontext/server/context_operations.py b/opencontext/server/context_operations.py index ca9bd53b..8d7e09ad 100644 --- a/opencontext/server/context_operations.py +++ b/opencontext/server/context_operations.py @@ -11,7 +11,8 @@ import datetime import os -from typing import Any, Dict, List, Optional +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple from opencontext.models.context import ProcessedContext, RawContextProperties, Vectorize from opencontext.models.enums import ( @@ -26,6 +27,135 @@ logger = get_logger(__name__) +# Path components / locations that must never be ingested by /api/documents/upload, +# regardless of the configured allow-list. These hold credentials, keys, or system +# state whose contents would be sent to the configured (potentially remote) +# VLM/embedding provider for processing. +_SENSITIVE_PATH_COMPONENTS = frozenset( + { + ".ssh", + ".aws", + ".gnupg", + ".azure", + ".gcloud", + ".kube", + ".docker", + ".password-store", + "Keychains", + } +) + +_SENSITIVE_FILENAMES = frozenset( + { + ".env", + "id_rsa", + "id_ed25519", + "id_ecdsa", + "id_dsa", + "shadow", + } +) + +_SENSITIVE_PATH_PREFIXES = ( + "/etc", + "/proc", + "/sys", + "/dev", + "/root", + "/var/log", + "/var/db", + "/private/etc", + "/private/var/log", + "/private/var/db", +) + + +def _is_sensitive_path(path: Path) -> Tuple[bool, str]: + """Defense-in-depth deny check for clearly sensitive locations.""" + for part in path.parts: + if part in _SENSITIVE_PATH_COMPONENTS: + return True, f"path contains sensitive directory '{part}'" + if path.name in _SENSITIVE_FILENAMES: + return True, f"file '{path.name}' is on sensitive-filename deny list" + path_str = str(path) + for prefix in _SENSITIVE_PATH_PREFIXES: + if path_str == prefix or path_str.startswith(prefix + os.sep): + return True, f"path is under sensitive system directory '{prefix}'" + return False, "" + + +def _resolve_paths_from_config(values: Any) -> List[Path]: + """Coerce a config value (string, list of strings, or None) into a list of + resolved absolute Paths. Silently skips entries that fail to resolve.""" + if values is None: + return [] + if isinstance(values, str): + values = [values] + if not isinstance(values, list): + return [] + out: List[Path] = [] + for raw in values: + if not isinstance(raw, str) or not raw: + continue + try: + out.append(Path(raw).expanduser().resolve()) + except Exception: + continue + return out + + +def _resolve_allowed_upload_roots(config: Optional[Dict[str, Any]]) -> List[Path]: + """Compute the set of directories under which a path passed to + /api/documents/upload is permitted to live. + + Sources, in order: + 1. capture.folder_monitor.watch_folder_paths — directories the user has + already opted in to having watched/processed. + 2. security.document_upload_allowed_paths — explicit allow-list extension + point for the upload endpoint. + 3. Fallback if neither is set: ~/Documents, ~/Downloads, ~/Desktop. + """ + cfg = config or {} + roots: List[Path] = [] + + capture_cfg = cfg.get("capture") or {} + folder_monitor_cfg = capture_cfg.get("folder_monitor") or {} + roots.extend(_resolve_paths_from_config(folder_monitor_cfg.get("watch_folder_paths"))) + + security_cfg = cfg.get("security") or {} + roots.extend(_resolve_paths_from_config(security_cfg.get("document_upload_allowed_paths"))) + + if not roots: + try: + home = Path.home().resolve() + for sub in ("Documents", "Downloads", "Desktop"): + candidate = home / sub + if candidate.exists(): + roots.append(candidate) + except Exception: + pass + + # De-duplicate while preserving order. + seen = set() + unique: List[Path] = [] + for r in roots: + key = str(r) + if key not in seen: + seen.add(key) + unique.append(r) + return unique + + +def _is_path_under_any_root(path: Path, roots: List[Path]) -> bool: + for root in roots: + try: + path.relative_to(root) + return True + except ValueError: + continue + return False + + class ContextOperations: """Handles context CRUD and search operations.""" @@ -115,19 +245,54 @@ def add_screenshot( def add_document(self, file_path: str, context_processor_callback) -> Optional[str]: """Add a document to the system.""" import uuid - from pathlib import Path + + from opencontext.config.global_config import get_config # Validate inputs if not file_path: return "Document path cannot be empty" - path = Path(file_path).expanduser() + expanded = Path(file_path).expanduser() + if not expanded.is_absolute(): + return "Document path must be absolute" + + try: + path = expanded.resolve(strict=False) + except Exception as e: + return f"Cannot resolve document path: {e}" + if not path.exists(): return f"Document path {file_path} does not exist" if not path.is_file(): return f"Path {file_path} is not a file" + # The contents of any file accepted here are forwarded to the configured + # VLM / embedding provider for processing, so this endpoint must not be + # usable as an arbitrary file-read primitive against the host. + sensitive, reason = _is_sensitive_path(path) + if sensitive: + logger.warning( + "Rejected document upload from sensitive path: %s (%s)", file_path, reason + ) + return f"Document path is not allowed: {reason}" + + allowed_roots = _resolve_allowed_upload_roots(get_config()) + if not _is_path_under_any_root(path, allowed_roots): + roots_pretty = ", ".join(str(r) for r in allowed_roots) if allowed_roots else "" + logger.warning( + "Rejected document upload from path outside allow-list: %s " + "(allowed roots: [%s])", + file_path, + roots_pretty, + ) + return ( + f"Document path is not within an allowed directory. " + f"Allowed roots: [{roots_pretty}]. " + f"To permit additional directories, set " + f"'security.document_upload_allowed_paths' in config.yaml." + ) + try: # Create RawContextProperties object_id = f"doc_{uuid.uuid4()}"