Skip to content

Commit 8b1b28a

Browse files
akxstkao05
andcommitted
Improve extract performance via ignoring directories early during os.walk
Co-authored-by: Steven Kao <st.kao.05@gmail.com>
1 parent 0c4f378 commit 8b1b28a

2 files changed

Lines changed: 41 additions & 7 deletions

File tree

babel/messages/extract.py

Lines changed: 38 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import os
2424
import sys
2525
import tokenize
26+
import warnings
2627
from collections.abc import (
2728
Callable,
2829
Collection,
@@ -114,7 +115,33 @@ def _strip(line: str):
114115
comments[:] = [_strip(c) for c in comments]
115116

116117

117-
def default_directory_filter(dirpath: str | os.PathLike[str]) -> bool:
118+
def make_default_directory_filter(
119+
method_map: Iterable[tuple[str, str]],
120+
root_dir: str | os.PathLike[str],
121+
):
122+
def directory_filter(dirpath: str | os.PathLike[str]) -> bool:
123+
subdir = os.path.basename(dirpath)
124+
# Legacy default behavior: ignore dot and underscore directories
125+
if subdir.startswith('.') or subdir.startswith('_'):
126+
return False
127+
128+
dir_rel = os.path.relpath(dirpath, root_dir).replace(os.sep, '/')
129+
130+
for pattern, method in method_map:
131+
if method == "ignore" and pathmatch(pattern, dir_rel):
132+
return False
133+
134+
return True
135+
136+
return directory_filter
137+
138+
139+
def default_directory_filter(dirpath: str | os.PathLike[str]) -> bool: # pragma: no cover
140+
warnings.warn(
141+
"`default_directory_filter` is deprecated and will be removed in a future version of Babel.",
142+
DeprecationWarning,
143+
stacklevel=2,
144+
)
118145
subdir = os.path.basename(dirpath)
119146
# Legacy default behavior: ignore dot and underscore directories
120147
return not (subdir.startswith('.') or subdir.startswith('_'))
@@ -201,13 +228,19 @@ def extract_from_dir(
201228
"""
202229
if dirname is None:
203230
dirname = os.getcwd()
231+
204232
if options_map is None:
205233
options_map = {}
234+
235+
dirname = os.path.abspath(dirname)
236+
206237
if directory_filter is None:
207-
directory_filter = default_directory_filter
238+
directory_filter = make_default_directory_filter(
239+
method_map=method_map,
240+
root_dir=dirname,
241+
)
208242

209-
absname = os.path.abspath(dirname)
210-
for root, dirnames, filenames in os.walk(absname):
243+
for root, dirnames, filenames in os.walk(dirname):
211244
dirnames[:] = [
212245
subdir for subdir in dirnames if directory_filter(os.path.join(root, subdir))
213246
]
@@ -224,7 +257,7 @@ def extract_from_dir(
224257
keywords,
225258
comment_tags,
226259
strip_comment_tags,
227-
dirpath=absname,
260+
dirpath=dirname,
228261
)
229262

230263

tests/messages/frontend/test_extract.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -202,10 +202,11 @@ def test_extraction_with_mapping_file(extract_cmd, pot_file):
202202

203203

204204
@freeze_time("1994-11-11")
205-
def test_extraction_with_mapping_dict(extract_cmd, pot_file):
205+
@pytest.mark.parametrize("ignore_pattern", ['**/ignored/**.*', 'ignored'])
206+
def test_extraction_with_mapping_dict(extract_cmd, pot_file, ignore_pattern):
206207
extract_cmd.distribution.message_extractors = {
207208
'project': [
208-
('**/ignored/**.*', 'ignore', None),
209+
(ignore_pattern, 'ignore', None),
209210
('**.py', 'python', None),
210211
],
211212
}

0 commit comments

Comments
 (0)