Skip to content
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
168 changes: 158 additions & 10 deletions app/src/ai/skills/file_watchers/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,47 @@ use std::{
sync::LazyLock,
};

/// Well-known dependency/cache directory names that should never be probed as
/// potential workspace roots in Pass 2 Case (b). Skills inside these trees are
/// not authored by the repo owner and must not be auto-loaded.
static DEPENDENCY_DIR_NAMES: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
[
// JavaScript / Node
"node_modules",
"bower_components",
"jspm_packages",
".yarn",
".pnp",
// Rust
"target",
// Go
"vendor",
// Python
"__pycache__",
".venv",
"venv",
"env",
".eggs",
"site-packages",
// Java / Kotlin / Gradle
".gradle",
".m2",
// iOS / macOS
"Pods",
"DerivedData",
// Ruby
"gems",
// Generic build / dist artefacts
"dist",
"build",
".build",
"out",
".cache",
".tmp",
]
.into()
});

use ai::skills::{
home_skills_path, read_skills, ParsedSkill, SkillProvider, SKILL_PROVIDER_DEFINITIONS,
};
Expand All @@ -17,32 +58,73 @@ use crate::warp_managed_paths_watcher::warp_managed_skill_dirs;
/// Finds all skill directories in a repository by querying the RepoMetadataModel tree.
///
/// Returns a list of paths to skill directories (e.g., `/repo/.agents/skills/`, `/repo/sub/.claude/skills/`).
///
/// Two passes are used to handle gitignored provider directories:
///
/// **Pass 1 — loaded skill dirs:** Standard tree traversal collecting directories that
/// end with a known provider skills path (e.g. `.agents/skills`). Gitignored directories
/// are skipped here because they are lazy-loaded with empty children in the tree.
///
/// **Pass 2 — lazy-loaded directories:** Traversal with `include_ignored: true` to find
/// directories that are lazy-loaded (`loaded: false`). Two sub-cases are handled:
///
/// - **Case (a) — provider root is lazy:** The lazy dir is named like a provider root
/// (`.agents`, `.claude`, …), e.g. `sub-project/.agents/` is gitignored. A single
/// `is_dir()` check is performed for `{provider_dir}/skills`.
///
/// - **Case (b) — parent of provider root is lazy:** The lazy dir is not a provider
/// root but could be a parent of one, e.g. `sub-project/` is gitignored so `.agents/`
/// is never in the tree at all. For each known provider, `{dir}/{provider_path}` is
/// checked with `is_dir()`. Directories whose names appear in `DEPENDENCY_DIR_NAMES`
/// (e.g. `node_modules`, `target`, `vendor`) are skipped to prevent loading untrusted
/// skills from dependency trees.
///
/// In both cases only directories already registered in the tree are examined, keeping
/// the scope bounded. Additionally, Case (b) explicitly skips well-known
/// dependency/cache directory names so that gitignored package trees (e.g.
/// `node_modules/`) cannot be used to inject untrusted skills.
pub fn find_skill_directories_in_tree(
repo_path: &Path,
repo_metadata: &RepoMetadataModel,
ctx: &AppContext,
) -> Vec<PathBuf> {
// Collect provider skills paths (e.g., ".agents/skills", ".claude/skills")
let skill_path_suffixes: Vec<&Path> = SKILL_PROVIDER_DEFINITIONS
let Some(id) = repo_metadata::RepositoryIdentifier::try_local(repo_path) else {
return Vec::new();
};

// Collect provider skills paths (e.g. ".agents/skills", ".claude/skills") and the
// corresponding provider root names (e.g. ".agents", ".claude") for the second pass.
let skill_path_suffixes: Vec<String> = SKILL_PROVIDER_DEFINITIONS
.iter()
.map(|p| p.skills_path.as_path())
.map(|p| p.skills_path.to_string_lossy().into_owned())
.collect();

let provider_root_names: HashSet<String> = SKILL_PROVIDER_DEFINITIONS
.iter()
.filter_map(|p| {
p.skills_path
.parent()
.and_then(Path::file_name)
.and_then(|n| n.to_str())
.map(str::to_owned)
})
.collect();

// ── Pass 1: find fully-loaded skill directories ───────────────────────────
//
// Filter during traversal: only collect directories that end with a skill provider path.
// The filter rejects files and non-matching directories, avoiding intermediate allocations.
let suffixes_1 = skill_path_suffixes.clone();
let args = GetContentsArgs::default().with_filter(move |content| {
let RepoContent::Directory(dir) = content else {
return false;
};
skill_path_suffixes
suffixes_1
.iter()
.any(|suffix| dir.path.ends_with(&suffix.to_string_lossy()))
.any(|suffix| dir.path.ends_with(suffix.as_str()))
});

let Some(id) = repo_metadata::RepositoryIdentifier::try_local(repo_path) else {
return Vec::new();
};
repo_metadata
let mut result: Vec<PathBuf> = repo_metadata
.get_repo_contents(&id, args, ctx)
.unwrap_or_default()
.into_iter()
Expand All @@ -52,7 +134,73 @@ pub fn find_skill_directories_in_tree(
RepoContent::Directory(dir) => dir.path.to_local_path_lossy(),
RepoContent::File(f) => f.path.to_local_path_lossy(),
})
.collect()
.collect();

// ── Pass 2: check lazy-loaded directories ────────────────────────────────
//
// Gitignored directories appear in the tree with `loaded: false` and no
// children. Two sub-cases are handled with targeted `is_dir()` probes:
//
// Case (a) — provider root is lazy (e.g. `sub-project/.agents/` is
// gitignored): probe `{dir}/skills`.
//
// Case (b) — parent of provider root is lazy (e.g. `sub-project/` is
// gitignored, so `.agents/` is never in the tree at all): probe
// `{dir}/{provider_path}` for every known provider.
//
// Only directories already registered in the tree are examined, keeping
// the scope bounded. Dependency subtrees like `node_modules` are safe:
// when their parent is lazy-loaded their children are absent from the
// tree, so Pass 2 can never reach a `node_modules/.agents/` entry.
let mut result_set: HashSet<PathBuf> = result.iter().cloned().collect();
let args_lazy = GetContentsArgs::default()
.include_ignored()
.with_filter(move |content| {
let RepoContent::Directory(dir) = content else {
return false;
};
!dir.loaded
Comment thread
rajgandhi1 marked this conversation as resolved.
});

let lazy_dirs: Vec<PathBuf> = repo_metadata
.get_repo_contents(&id, args_lazy, ctx)
.unwrap_or_default()
.into_iter()
.map(|content| match content {
RepoContent::Directory(dir) => dir.path.to_local_path_lossy(),
RepoContent::File(f) => f.path.to_local_path_lossy(),
})
.collect();

for lazy_dir in lazy_dirs {
let dir_name = lazy_dir.file_name().and_then(|n| n.to_str()).unwrap_or("");
if provider_root_names.contains(dir_name) {
// Case (a): the lazy dir is itself a provider root (e.g. `.agents/`).
// Probe one level deeper for the `skills` subdirectory.
let skills_path = lazy_dir.join("skills");
if !result_set.contains(&skills_path) && skills_path.is_dir() {
result_set.insert(skills_path.clone());
result.push(skills_path);
}
} else if !DEPENDENCY_DIR_NAMES.contains(dir_name) {
Comment thread
rajgandhi1 marked this conversation as resolved.
Outdated
// Case (b): the lazy dir is a parent of a potential provider root
// (e.g. `sub-project/` is gitignored, so `.agents/` was never
// indexed). Probe `{dir}/{provider_path}` for every known provider.
//
// Dependency/cache directories (node_modules, target, vendor, …) are
// excluded via DEPENDENCY_DIR_NAMES: skills inside those trees are not
// authored by the repo owner and must not be auto-loaded.
for provider in SKILL_PROVIDER_DEFINITIONS.iter() {
let skills_path = lazy_dir.join(&provider.skills_path);
Comment thread
rajgandhi1 marked this conversation as resolved.
Outdated
if !result_set.contains(&skills_path) && skills_path.is_dir() {
result_set.insert(skills_path.clone());
result.push(skills_path);
}
}
}
}

result
}

/// Reads all skills from the given skill directories.
Expand Down
Loading