From d4988aa89fd942dd1a854386c3066f4cc71a78c3 Mon Sep 17 00:00:00 2001 From: Tyrie Vella Date: Tue, 9 Jun 2026 11:40:32 -0700 Subject: [PATCH 1/2] Use git ls-files -s instead of ls-tree for full-tree enumeration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When no previous commit exists to diff against (sourceTreeSha == null), DiffHelper.PerformDiff previously ran 'git ls-tree -r -t HEAD' which walks all tree objects. On a large repo with ~2.5M files, this takes ~24s. Replace with 'git ls-files -s' which reads the index instead of walking tree objects. Benchmarked at ~6.5s on the same repo — a 3.7x speedup. The optimization is only applied when targetTreeSha matches HEAD's tree, since ls-files reads the index (which reflects HEAD). When they differ (e.g., FastFetch checking out a non-HEAD commit), falls back to ls-tree to preserve correctness. Also falls back to ls-tree if ls-files fails (e.g., index does not exist on fresh git init before first checkout). Assisted-by: Claude Opus 4.6 Signed-off-by: Tyrie Vella --- GVFS/GVFS.Common/Git/DiffTreeResult.cs | 41 ++++++++ GVFS/GVFS.Common/Git/GitProcess.cs | 12 +++ GVFS/GVFS.Common/Prefetch/Git/DiffHelper.cs | 98 ++++++++++++++++--- .../Prefetch/DiffTreeResultTests.cs | 72 ++++++++++++++ 4 files changed, 211 insertions(+), 12 deletions(-) diff --git a/GVFS/GVFS.Common/Git/DiffTreeResult.cs b/GVFS/GVFS.Common/Git/DiffTreeResult.cs index abafa4597..a4adfb17f 100644 --- a/GVFS/GVFS.Common/Git/DiffTreeResult.cs +++ b/GVFS/GVFS.Common/Git/DiffTreeResult.cs @@ -195,6 +195,47 @@ public static bool IsLsTreeLineOfType(string line, string typeMarker) return line.IndexOf(typeMarker, TypeMarkerStartIndex, typeMarker.Length, StringComparison.OrdinalIgnoreCase) == TypeMarkerStartIndex; } + /// + /// Parse the output of calling git ls-files -s (staging info). + /// This reads from the index, which is much faster than ls-tree on large repos. + /// ls-files only returns file entries (no tree entries). + /// + public static DiffTreeResult ParseFromLsFilesStagingLine(string line) + { + if (string.IsNullOrEmpty(line)) + { + throw new ArgumentException("Line to parse cannot be null or empty", nameof(line)); + } + + /* + * Example output lines from ls-files -s + * + * 100644 44c5f5cba4b29d31c2ad06eed51ea02af76c27c0 0\tReadme.md + * 100755 196142fbb753c0a3c7c6690323db7aa0a11f41ec 0\tScripts/BuildGVFSForMac.sh + * ^-mode ^-sha ^stage + * ^-tab + * ^-path + * + * Format: \t + * Mode is 6 chars, space, SHA is 40 chars, space, stage digit(s), tab, path + */ + + int tabIndex = line.IndexOf('\t'); + if (tabIndex < 0 || line.Length < 50) + { + return null; + } + + DiffTreeResult blobAdd = new DiffTreeResult(); + blobAdd.TargetMode = Convert.ToUInt16(line.Substring(0, 6), 8); + blobAdd.TargetIsSymLink = blobAdd.TargetMode == SymLinkFileIndexEntry; + blobAdd.TargetSha = line.Substring(7, GVFSConstants.ShaStringLength); + blobAdd.TargetPath = ConvertPathToUtf8Path(line.Substring(tabIndex + 1)); + blobAdd.Operation = Operations.Add; + + return blobAdd; + } + private static string AppendPathSeparatorIfNeeded(string path) { return path.Last() == Path.DirectorySeparatorChar ? path : path + Path.DirectorySeparatorChar; diff --git a/GVFS/GVFS.Common/Git/GitProcess.cs b/GVFS/GVFS.Common/Git/GitProcess.cs index b818fd915..055c6f272 100644 --- a/GVFS/GVFS.Common/Git/GitProcess.cs +++ b/GVFS/GVFS.Common/Git/GitProcess.cs @@ -760,6 +760,18 @@ public Result LsTree(string treeish, Action parseStdOutLine, bool recurs parseStdOutLine); } + /// + /// Runs git ls-files -s to list all tracked files with their mode, SHA, and path. + /// Reads from the index (fast) rather than walking tree objects (slow). + /// + public Result LsFilesStaging(Action parseStdOutLine) + { + return this.InvokeGitInWorkingDirectoryRoot( + "ls-files -s", + useReadObjectHook: false, + parseStdOutLine: parseStdOutLine); + } + public Result LsFiles(Action parseStdOutLine) { return this.InvokeGitInWorkingDirectoryRoot( diff --git a/GVFS/GVFS.Common/Prefetch/Git/DiffHelper.cs b/GVFS/GVFS.Common/Prefetch/Git/DiffHelper.cs index 386e4c214..b4de70eb2 100644 --- a/GVFS/GVFS.Common/Prefetch/Git/DiffHelper.cs +++ b/GVFS/GVFS.Common/Prefetch/Git/DiffHelper.cs @@ -119,21 +119,46 @@ public void PerformDiff(string sourceTreeSha, string targetTreeSha) { this.UpdatedWholeTree = true; - // Nothing is checked out (fresh git init), so we must search the entire tree. - GitProcess.Result result = this.git.LsTree( - targetTreeSha, - line => this.EnqueueOperationsFromLsTreeLine(activity, line), - recursive: true, - showAllTrees: true); - - if (result.ExitCodeIsFailure) + // Prefer ls-files -s over ls-tree -r -t for full-tree enumeration. + // ls-files reads the git index (~6.5s on a 2.5M-file repo) while + // ls-tree walks every tree object (~24s on the same repo). + // ls-files reflects the index (HEAD), so we can only use it when + // targetTreeSha matches HEAD's tree. When they differ (e.g., + // FastFetch checking out a different commit), fall back to ls-tree. + bool usedLsFiles = false; + if (this.TargetMatchesHeadTree(targetTreeSha)) { - this.HasFailures = true; - metadata.Add("Errors", result.Errors); - metadata.Add("Output", result.Output.Length > 1024 ? result.Output.Substring(1024) : result.Output); + GitProcess.Result result = this.git.LsFilesStaging( + line => this.EnqueueOperationsFromLsFilesStagingLine(activity, line)); + + if (result.ExitCodeIsSuccess) + { + usedLsFiles = true; + metadata.Add("Operation", "LsFilesStaging"); + } + else + { + this.tracer.RelatedWarning("ls-files -s failed, falling back to ls-tree: " + result.Errors); + } } - metadata.Add("Operation", "LsTree"); + if (!usedLsFiles) + { + GitProcess.Result result = this.git.LsTree( + targetTreeSha, + line => this.EnqueueOperationsFromLsTreeLine(activity, line), + recursive: true, + showAllTrees: true); + + if (result.ExitCodeIsFailure) + { + this.HasFailures = true; + metadata.Add("Errors", result.Errors); + metadata.Add("Output", result.Output.Length > 1024 ? result.Output.Substring(1024) : result.Output); + } + + metadata.Add("Operation", "LsTree"); + } } else { @@ -235,6 +260,37 @@ private void FlushStagedQueues() } } + /// + /// Check whether targetTreeSha matches HEAD's tree SHA so we can safely + /// use git ls-files -s (which reads the index reflecting HEAD) instead of + /// git ls-tree (which walks a specific tree object). + /// + private bool TargetMatchesHeadTree(string targetTreeSha) + { + try + { + using (LibGit2Repo repo = new LibGit2Repo(this.tracer, this.enlistment.WorkingDirectoryBackingRoot)) + { + string headTreeSha = repo.GetTreeSha("HEAD"); + if (headTreeSha != null && string.Equals(headTreeSha, targetTreeSha, StringComparison.OrdinalIgnoreCase)) + { + return true; + } + + this.tracer.RelatedInfo( + "TargetMatchesHeadTree: target {0} != HEAD {1}, will use ls-tree", + targetTreeSha, + headTreeSha ?? "(null)"); + return false; + } + } + catch (Exception e) + { + this.tracer.RelatedWarning("TargetMatchesHeadTree: failed to resolve HEAD tree: " + e.Message); + return false; + } + } + private void EnqueueOperationsFromLsTreeLine(ITracer activity, string line) { DiffTreeResult result = DiffTreeResult.ParseFromLsTreeLine(line); @@ -268,6 +324,24 @@ private void EnqueueOperationsFromLsTreeLine(ITracer activity, string line) } } + private void EnqueueOperationsFromLsFilesStagingLine(ITracer activity, string line) + { + DiffTreeResult result = DiffTreeResult.ParseFromLsFilesStagingLine(line); + if (result == null) + { + this.tracer.RelatedError("Unrecognized ls-files -s line: {0}", line); + return; + } + + if (!this.ShouldIncludeResult(result)) + { + return; + } + + // ls-files -s only returns file entries, never trees + this.EnqueueFileAddOperation(activity, result); + } + private void EnqueueOperationsFromDiffTreeLine(ITracer activity, string line) { if (!line.StartsWith(":")) diff --git a/GVFS/GVFS.UnitTests/Prefetch/DiffTreeResultTests.cs b/GVFS/GVFS.UnitTests/Prefetch/DiffTreeResultTests.cs index 5dd1a9dad..f70d30efc 100644 --- a/GVFS/GVFS.UnitTests/Prefetch/DiffTreeResultTests.cs +++ b/GVFS/GVFS.UnitTests/Prefetch/DiffTreeResultTests.cs @@ -38,6 +38,12 @@ public class DiffTreeResultTests private static readonly string InvalidLineFromLsTree = $"040000 bad {TestSha1}\t{TestTreePath1}"; private static readonly string SymLinkLineFromLsTree = $"120000 blob {TestSha1}\t{TestTreePath1}"; + // ls-files -s test data + private static readonly string BlobLineFromLsFilesStaging = $"100644 {TestSha1} 0\t{TestTreePath1}"; + private static readonly string ExecutableBlobFromLsFilesStaging = $"100755 {TestSha1} 0\t{TestBlobPath1}"; + private static readonly string SymLinkFromLsFilesStaging = $"120000 {TestSha1} 0\t{TestTreePath1}"; + private static readonly string BlobWithSpacesFromLsFilesStaging = $"100644 {TestSha1} 0\t{TestBlobPath1}"; + [TestCase] [Category(CategoryConstants.ExceptionExpected)] public void ParseFromDiffTreeLine_NullLine() @@ -341,6 +347,72 @@ public void ParseFromDiffTreeLine_BlobLineWithTreePath() this.ValidateDiffTreeResult(expected, result); } + [TestCase] + [Category(CategoryConstants.ExceptionExpected)] + public void ParseFromLsFilesStagingLine_NullLine() + { + Assert.Throws(() => DiffTreeResult.ParseFromLsFilesStagingLine(null)); + } + + [TestCase] + [Category(CategoryConstants.ExceptionExpected)] + public void ParseFromLsFilesStagingLine_EmptyLine() + { + Assert.Throws(() => DiffTreeResult.ParseFromLsFilesStagingLine(string.Empty)); + } + + [TestCase] + public void ParseFromLsFilesStagingLine_InvalidLine() + { + DiffTreeResult.ParseFromLsFilesStagingLine("short").ShouldBeNull(); + } + + [TestCase] + public void ParseFromLsFilesStagingLine_BlobLine() + { + DiffTreeResult expected = new DiffTreeResult() + { + Operation = DiffTreeResult.Operations.Add, + SourceIsDirectory = false, + TargetIsDirectory = false, + TargetPath = TestTreePath1.Replace('/', Path.DirectorySeparatorChar), + SourceSha = null, + TargetSha = TestSha1 + }; + + DiffTreeResult result = DiffTreeResult.ParseFromLsFilesStagingLine(BlobLineFromLsFilesStaging); + this.ValidateDiffTreeResult(expected, result); + } + + [TestCase] + public void ParseFromLsFilesStagingLine_ExecutableBlob() + { + DiffTreeResult result = DiffTreeResult.ParseFromLsFilesStagingLine(ExecutableBlobFromLsFilesStaging); + result.ShouldNotBeNull(); + result.Operation.ShouldEqual(DiffTreeResult.Operations.Add); + result.TargetMode.ShouldEqual(Convert.ToUInt16("100755", 8)); + result.TargetSha.ShouldEqual(TestSha1); + result.TargetPath.ShouldEqual(TestBlobPath1.Replace('/', Path.DirectorySeparatorChar)); + } + + [TestCase] + public void ParseFromLsFilesStagingLine_SymLink() + { + DiffTreeResult result = DiffTreeResult.ParseFromLsFilesStagingLine(SymLinkFromLsFilesStaging); + result.ShouldNotBeNull(); + result.TargetIsSymLink.ShouldBeTrue(); + result.TargetSha.ShouldEqual(TestSha1); + } + + [TestCase] + public void ParseFromLsFilesStagingLine_PathWithSpaces() + { + DiffTreeResult result = DiffTreeResult.ParseFromLsFilesStagingLine(BlobWithSpacesFromLsFilesStaging); + result.ShouldNotBeNull(); + result.TargetPath.ShouldEqual(TestBlobPath1.Replace('/', Path.DirectorySeparatorChar)); + result.TargetSha.ShouldEqual(TestSha1); + } + [TestCase("040000 tree 73b881d52b607b0f3e9e620d36f556d3d233a11d\tGVFS", DiffTreeResult.TreeMarker, true)] [TestCase("040000 tree 73b881d52b607b0f3e9e620d36f556d3d233a11d\tGVFS", DiffTreeResult.BlobMarker, false)] [TestCase("100644 blob 44c5f5cba4b29d31c2ad06eed51ea02af76c27c0\tReadme.md", DiffTreeResult.BlobMarker, true)] From 62ff7fa50edc67674bcba44d026345764e78e1af Mon Sep 17 00:00:00 2001 From: Tyrie Vella Date: Thu, 11 Jun 2026 15:17:44 -0700 Subject: [PATCH 2/2] Address review feedback: fix tree/commit SHA comparison, filter unmerged entries Fix three issues raised in PR review: 1. TargetMatchesHeadTree compared HEAD's tree SHA against a commit SHA (callers pass commit IDs, not tree IDs). The comparison never matched, so the optimization silently fell back to ls-tree every time. Fix by resolving targetTreeSha to its tree SHA via GetTreeSha() before comparing. 2. Add comments clarifying that the ls-files path intentionally skips directory operations. This is safe because the path only fires for gvfs prefetch on GVFS-mounted repos where directories are virtualized by PrjFlt. FastFetch force-checkout (which needs directory ops) targets a non-HEAD commit and falls back to ls-tree. 3. Filter out non-zero stage entries (unmerged) in the ls-files parser. During merge conflicts the same path appears at stages 1/2/3 with different SHAs. While GVFS repos shouldn't have conflicts, filtering defensively avoids duplicate adds with wrong blob SHAs. Assisted-by: Claude Opus 4.6 Signed-off-by: Tyrie Vella --- GVFS/GVFS.Common/Git/DiffTreeResult.cs | 15 +++++++++++++ GVFS/GVFS.Common/Prefetch/Git/DiffHelper.cs | 20 ++++++++++++++++-- .../Prefetch/DiffTreeResultTests.cs | 21 +++++++++++++++++++ 3 files changed, 54 insertions(+), 2 deletions(-) diff --git a/GVFS/GVFS.Common/Git/DiffTreeResult.cs b/GVFS/GVFS.Common/Git/DiffTreeResult.cs index a4adfb17f..d3969f27e 100644 --- a/GVFS/GVFS.Common/Git/DiffTreeResult.cs +++ b/GVFS/GVFS.Common/Git/DiffTreeResult.cs @@ -199,6 +199,7 @@ public static bool IsLsTreeLineOfType(string line, string typeMarker) /// Parse the output of calling git ls-files -s (staging info). /// This reads from the index, which is much faster than ls-tree on large repos. /// ls-files only returns file entries (no tree entries). + /// Entries with stage != 0 (unmerged) are skipped to avoid duplicate/conflicting adds. /// public static DiffTreeResult ParseFromLsFilesStagingLine(string line) { @@ -218,6 +219,11 @@ public static DiffTreeResult ParseFromLsFilesStagingLine(string line) * * Format: \t * Mode is 6 chars, space, SHA is 40 chars, space, stage digit(s), tab, path + * + * During a merge conflict, the same path can appear multiple times with + * stage 1 (common ancestor), 2 (ours), and 3 (theirs). We only want + * stage 0 (normal) entries. In GVFS-mounted repos merge conflicts should + * not occur, but we filter defensively. */ int tabIndex = line.IndexOf('\t'); @@ -226,6 +232,15 @@ public static DiffTreeResult ParseFromLsFilesStagingLine(string line) return null; } + // Stage is between the SHA and the tab: " \t" + // Position 48 = 6 (mode) + 1 (space) + 40 (sha) + 1 (space) + int stageStart = 7 + GVFSConstants.ShaStringLength + 1; + string stageStr = line.Substring(stageStart, tabIndex - stageStart); + if (stageStr != "0") + { + return null; + } + DiffTreeResult blobAdd = new DiffTreeResult(); blobAdd.TargetMode = Convert.ToUInt16(line.Substring(0, 6), 8); blobAdd.TargetIsSymLink = blobAdd.TargetMode == SymLinkFileIndexEntry; diff --git a/GVFS/GVFS.Common/Prefetch/Git/DiffHelper.cs b/GVFS/GVFS.Common/Prefetch/Git/DiffHelper.cs index b4de70eb2..0f51056b6 100644 --- a/GVFS/GVFS.Common/Prefetch/Git/DiffHelper.cs +++ b/GVFS/GVFS.Common/Prefetch/Git/DiffHelper.cs @@ -125,6 +125,12 @@ public void PerformDiff(string sourceTreeSha, string targetTreeSha) // ls-files reflects the index (HEAD), so we can only use it when // targetTreeSha matches HEAD's tree. When they differ (e.g., // FastFetch checking out a different commit), fall back to ls-tree. + // + // ls-files only returns file entries (not tree/directory entries). + // This is safe because the ls-files path only fires for gvfs prefetch + // on a GVFS-mounted repo where directories are virtualized by PrjFlt + // and don't need explicit creation. FastFetch force-checkout (which + // needs directory operations) won't match HEAD and falls back to ls-tree. bool usedLsFiles = false; if (this.TargetMatchesHeadTree(targetTreeSha)) { @@ -264,6 +270,10 @@ private void FlushStagedQueues() /// Check whether targetTreeSha matches HEAD's tree SHA so we can safely /// use git ls-files -s (which reads the index reflecting HEAD) instead of /// git ls-tree (which walks a specific tree object). + /// + /// Note: callers may pass either a tree SHA or a commit SHA as targetTreeSha + /// (git ls-tree auto-peels commits). We resolve both sides to tree SHAs + /// before comparing. /// private bool TargetMatchesHeadTree(string targetTreeSha) { @@ -272,14 +282,20 @@ private bool TargetMatchesHeadTree(string targetTreeSha) using (LibGit2Repo repo = new LibGit2Repo(this.tracer, this.enlistment.WorkingDirectoryBackingRoot)) { string headTreeSha = repo.GetTreeSha("HEAD"); - if (headTreeSha != null && string.Equals(headTreeSha, targetTreeSha, StringComparison.OrdinalIgnoreCase)) + + // targetTreeSha may be a commit SHA (callers like BlobPrefetcher + // pass commit IDs). Resolve it to a tree SHA for comparison. + string targetResolvedTreeSha = repo.GetTreeSha(targetTreeSha) ?? targetTreeSha; + + if (headTreeSha != null && string.Equals(headTreeSha, targetResolvedTreeSha, StringComparison.OrdinalIgnoreCase)) { return true; } this.tracer.RelatedInfo( - "TargetMatchesHeadTree: target {0} != HEAD {1}, will use ls-tree", + "TargetMatchesHeadTree: target {0} (tree {1}) != HEAD tree {2}, will use ls-tree", targetTreeSha, + targetResolvedTreeSha, headTreeSha ?? "(null)"); return false; } diff --git a/GVFS/GVFS.UnitTests/Prefetch/DiffTreeResultTests.cs b/GVFS/GVFS.UnitTests/Prefetch/DiffTreeResultTests.cs index f70d30efc..e5aef689d 100644 --- a/GVFS/GVFS.UnitTests/Prefetch/DiffTreeResultTests.cs +++ b/GVFS/GVFS.UnitTests/Prefetch/DiffTreeResultTests.cs @@ -43,6 +43,9 @@ public class DiffTreeResultTests private static readonly string ExecutableBlobFromLsFilesStaging = $"100755 {TestSha1} 0\t{TestBlobPath1}"; private static readonly string SymLinkFromLsFilesStaging = $"120000 {TestSha1} 0\t{TestTreePath1}"; private static readonly string BlobWithSpacesFromLsFilesStaging = $"100644 {TestSha1} 0\t{TestBlobPath1}"; + private static readonly string UnmergedStage1FromLsFilesStaging = $"100644 {TestSha1} 1\t{TestTreePath1}"; + private static readonly string UnmergedStage2FromLsFilesStaging = $"100644 {Test2Sha1} 2\t{TestTreePath1}"; + private static readonly string UnmergedStage3FromLsFilesStaging = $"100644 {TestSha1} 3\t{TestTreePath1}"; [TestCase] [Category(CategoryConstants.ExceptionExpected)] @@ -413,6 +416,24 @@ public void ParseFromLsFilesStagingLine_PathWithSpaces() result.TargetSha.ShouldEqual(TestSha1); } + [TestCase] + public void ParseFromLsFilesStagingLine_UnmergedStage1_ReturnsNull() + { + DiffTreeResult.ParseFromLsFilesStagingLine(UnmergedStage1FromLsFilesStaging).ShouldBeNull(); + } + + [TestCase] + public void ParseFromLsFilesStagingLine_UnmergedStage2_ReturnsNull() + { + DiffTreeResult.ParseFromLsFilesStagingLine(UnmergedStage2FromLsFilesStaging).ShouldBeNull(); + } + + [TestCase] + public void ParseFromLsFilesStagingLine_UnmergedStage3_ReturnsNull() + { + DiffTreeResult.ParseFromLsFilesStagingLine(UnmergedStage3FromLsFilesStaging).ShouldBeNull(); + } + [TestCase("040000 tree 73b881d52b607b0f3e9e620d36f556d3d233a11d\tGVFS", DiffTreeResult.TreeMarker, true)] [TestCase("040000 tree 73b881d52b607b0f3e9e620d36f556d3d233a11d\tGVFS", DiffTreeResult.BlobMarker, false)] [TestCase("100644 blob 44c5f5cba4b29d31c2ad06eed51ea02af76c27c0\tReadme.md", DiffTreeResult.BlobMarker, true)]