Skip to content

Commit 78315ce

Browse files
committed
src: optimize NormalizeString to cut allocations and copies
Rewrite the parent-dir ("..") handling in NormalizeString() to use a write-position rewind with a "dotdot" floor index, the approach used by Go's path/filepath.Clean, and build segments with in-place appends instead of temporary-string concatenation. Previously each ".." did `res = res.substr(0, idx)`, which allocated a new string and copied the whole surviving prefix (O(n^2) copy volume on deep-backtrack paths), plus two find_last_of() scans. Each normal segment built two or three temporary std::strings, and res was never reserve()d. Now ".." rewinds the write position to the previous separator and resize()s, so there is no allocation, no prefix copy and no find_last_of call. Segments are written with push_back() and append(). The output is byte for byte identical to the previous implementation.
1 parent 17e4196 commit 78315ce

2 files changed

Lines changed: 71 additions & 33 deletions

File tree

src/path.cc

Lines changed: 30 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,15 @@ constexpr bool IsPathSeparator(const char c) noexcept {
2121
std::string NormalizeString(const std::string_view path,
2222
bool allowAboveRoot,
2323
const std::string_view separator) {
24+
const char separator_char = separator[0];
2425
std::string res;
25-
int lastSegmentLength = 0;
26+
res.reserve(path.size());
27+
// `dotdot` is the floor in `res` below which a `..` segment cannot
28+
// backtrack: it sits just past any leading `..` run that could not be
29+
// resolved. This lets `..` rewind the write position to the preceding
30+
// separator without rescanning or reallocating the whole prefix. Same
31+
// approach as Go's path/filepath.Clean.
32+
int dotdot = 0;
2633
int lastSlash = -1;
2734
int dots = 0;
2835
char code = 0;
@@ -37,45 +44,35 @@ std::string NormalizeString(const std::string_view path,
3744

3845
if (IsPathSeparator(code)) {
3946
if (lastSlash == static_cast<int>(i - 1) || dots == 1) {
40-
// NOOP
47+
// NOOP: empty segment (e.g. `//`) or a `.` segment.
4148
} else if (dots == 2) {
42-
int len = res.length();
43-
if (len < 2 || lastSegmentLength != 2 || res[len - 1] != '.' ||
44-
res[len - 2] != '.') {
45-
if (len > 2) {
46-
auto lastSlashIndex = res.find_last_of(separator);
47-
if (lastSlashIndex == std::string::npos) {
48-
res = "";
49-
lastSegmentLength = 0;
50-
} else {
51-
res = res.substr(0, lastSlashIndex);
52-
len = res.length();
53-
lastSegmentLength = len - 1 - res.find_last_of(separator);
54-
}
55-
lastSlash = i;
56-
dots = 0;
57-
continue;
58-
} else if (len != 0) {
59-
res = "";
60-
lastSegmentLength = 0;
61-
lastSlash = i;
62-
dots = 0;
63-
continue;
49+
int w = static_cast<int>(res.length());
50+
if (w > dotdot) {
51+
// Drop the previous segment by rewinding the write position to the
52+
// separator that precedes it.
53+
w--;
54+
while (w > dotdot && res[w] != separator_char) {
55+
w--;
6456
}
65-
}
66-
67-
if (allowAboveRoot) {
68-
res += res.length() > 0 ? std::string(separator) + ".." : "..";
69-
lastSegmentLength = 2;
57+
res.resize(static_cast<size_t>(w));
58+
lastSlash = i;
59+
dots = 0;
60+
continue;
61+
} else if (allowAboveRoot) {
62+
// Cannot backtrack past the floor; keep the `..` and raise the floor.
63+
if (!res.empty()) {
64+
res.push_back(separator_char);
65+
}
66+
res.append("..", 2);
67+
dotdot = static_cast<int>(res.length());
7068
}
7169
} else {
7270
if (!res.empty()) {
73-
res += std::string(separator) +
74-
std::string(path.substr(lastSlash + 1, i - (lastSlash + 1)));
71+
res.push_back(separator_char);
72+
res.append(path.data() + lastSlash + 1, i - (lastSlash + 1));
7573
} else {
76-
res = path.substr(lastSlash + 1, i - (lastSlash + 1));
74+
res.assign(path.data() + lastSlash + 1, i - (lastSlash + 1));
7775
}
78-
lastSegmentLength = i - lastSlash - 1;
7976
}
8077
lastSlash = i;
8178
dots = 0;

test/cctest/test_path.cc

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include "v8.h"
99

1010
using node::BufferValue;
11+
using node::NormalizeString;
1112
using node::PathResolve;
1213
using node::ToNamespacedPath;
1314

@@ -43,6 +44,13 @@ TEST_F(PathTest, PathResolve) {
4344
"\\\\.\\PHYSICALDRIVE0");
4445
EXPECT_EQ(PathResolve(*env, {"\\\\?\\PHYSICALDRIVE0"}),
4546
"\\\\?\\PHYSICALDRIVE0");
47+
// Backtracking past the drive root stays clamped at the drive root.
48+
EXPECT_EQ(PathResolve(*env, {"c:/a/b/c", "..\\..\\..\\.."}), "c:\\");
49+
// UNC root is preserved when backtracking past it. The UNC share
50+
// \\server\share is the root, so "..","..","x" cannot escape it and the
51+
// remaining segment "x" is appended to the share root.
52+
EXPECT_EQ(PathResolve(*env, {"//server/share", "..", "..", "x"}),
53+
"\\\\server\\share\\x");
4654
#else
4755
EXPECT_EQ(PathResolve(*env, {"/var/lib", "../", "file/"}), "/var/file");
4856
EXPECT_EQ(PathResolve(*env, {"/var/lib", "/../", "file/"}), "/file");
@@ -51,9 +59,42 @@ TEST_F(PathTest, PathResolve) {
5159
EXPECT_EQ(PathResolve(*env, {"/some/dir", ".", "/absolute/"}), "/absolute");
5260
EXPECT_EQ(PathResolve(*env, {"/foo/tmp.3/", "../tmp.3/cycles/root.js"}),
5361
"/foo/tmp.3/cycles/root.js");
62+
// Backtracking past the root stays clamped at the root.
63+
EXPECT_EQ(PathResolve(*env, {"/a/b/c/d/e", "../../../../.."}), "/");
64+
EXPECT_EQ(PathResolve(*env, {"/a/b/c", "../../../../.."}), "/");
65+
// Mixed current-dir and parent-dir segments.
66+
EXPECT_EQ(PathResolve(*env, {"/a/./b/../c/./d"}), "/a/c/d");
67+
// Collapsing of repeated separators.
68+
EXPECT_EQ(PathResolve(*env, {"/a//b///c"}), "/a/b/c");
69+
// Single parent-dir traversal.
70+
EXPECT_EQ(PathResolve(*env, {"/a/../b"}), "/b");
71+
EXPECT_EQ(PathResolve(*env, {"/a/b/../../c"}), "/c");
72+
// Trailing separator is stripped.
73+
EXPECT_EQ(PathResolve(*env, {"/a/b/c/"}), "/a/b/c");
74+
// Single absolute segment.
75+
EXPECT_EQ(PathResolve(*env, {"/single"}), "/single");
5476
#endif
5577
}
5678

79+
TEST_F(PathTest, NormalizeString) {
80+
// allowAboveRoot = false (absolute context): ".." that cannot be resolved is
81+
// dropped, "." segments and repeated/trailing separators are collapsed.
82+
EXPECT_EQ(NormalizeString("a/b/../../../c", false, "/"), "c");
83+
EXPECT_EQ(NormalizeString("a/b/c/d/e/../../../../..", false, "/"), "");
84+
EXPECT_EQ(NormalizeString("a/./b//c/", false, "/"), "a/b/c");
85+
EXPECT_EQ(NormalizeString("./foo/./bar/", false, "/"), "foo/bar");
86+
// allowAboveRoot = true (relative context): leading ".." is preserved.
87+
EXPECT_EQ(NormalizeString("a/b/../../../c", true, "/"), "../c");
88+
EXPECT_EQ(NormalizeString("../../a", true, "/"), "../../a");
89+
EXPECT_EQ(NormalizeString("foo/..", true, "/"), "");
90+
EXPECT_EQ(NormalizeString("foo/../..", true, "/"), "..");
91+
#ifdef _WIN32
92+
// The Windows separator is handled the same way.
93+
EXPECT_EQ(NormalizeString("a\\b\\..\\..\\..\\c", false, "\\"), "c");
94+
EXPECT_EQ(NormalizeString("..\\..\\a", true, "\\"), "..\\..\\a");
95+
#endif // _WIN32
96+
}
97+
5798
TEST_F(PathTest, ToNamespacedPath) {
5899
const v8::HandleScope handle_scope(isolate_);
59100
Argv argv;

0 commit comments

Comments
 (0)