Skip to content

Commit 0614703

Browse files
committed
improve scanning speed with savepoint transactions
1 parent dcf8fd0 commit 0614703

1 file changed

Lines changed: 59 additions & 51 deletions

File tree

comiclib/scan.py

Lines changed: 59 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -36,57 +36,65 @@
3636

3737
def scan(paths):
3838
with Session(engine) as db:
39-
for p in paths:
40-
p = Path(os.path.relpath(p, settings.content))
41-
if p.is_relative_to('thumb'):
42-
continue
43-
old_a = db.scalar(select(Archive).where(
44-
Archive.path == p.as_posix()))
45-
if old_a is None:
46-
a = Archive(path=p.as_posix())
47-
archive_id = '00' + hashlib.blake2b(p.as_posix().encode(), digest_size=19).hexdigest() # 00 stands for ID type origin
48-
elif settings.skip_exits:
49-
continue
50-
else:
51-
a = old_a
52-
archive_id = old_a.id
53-
metadata = {"id": archive_id, "title": a.title, "subtitle": a.subtitle, "source": a.source, "pagecount": a.pagecount, "tags": set(
54-
t.tag for t in a.tags if not t.tag.startswith("date_added:")), "categories": set(c.name for c in a.categories)}
55-
real_path = Path(settings.content) / p
56-
prev_scanners = []
57-
for scanner, name in scanners:
58-
prev_metadata = copy.deepcopy(metadata)
59-
if scanner.scan(real_path, archive_id, metadata, prev_scanners):
60-
prev_scanners.append(name)
61-
else:
62-
metadata = prev_metadata
63-
if not prev_scanners:
64-
continue
65-
if not any(tag.startswith("date_added:") for tag in metadata["tags"]):
66-
# Directory modification times are often difficult to synchronize.
67-
mtime_path = next(real_path.iterdir()) if real_path.is_dir() else real_path
68-
metadata["tags"].add(f"date_added:{int(mtime_path.stat().st_mtime)}")
69-
logging.debug(pformat(metadata))
70-
a.title = metadata["title"]
71-
a.subtitle = metadata["subtitle"]
72-
a.source = metadata["source"]
73-
a.pagecount = metadata["pagecount"]
74-
a.thumb = metadata["thumb"]
75-
for tag in filter(lambda t: not t.tag in metadata["tags"], a.tags):
76-
a.tags.remove(tag)
77-
for tag in metadata["tags"] - set(t.tag for t in a.tags):
78-
a.tags.append(Tag(archive_id=metadata["id"], tag=tag))
79-
for category in filter(lambda c: not c.name in metadata["categories"], a.categories):
80-
a.categories.remove(category)
81-
for category in metadata["categories"] - set(c.name for c in a.categories):
82-
if (c := db.scalar(select(Category).where(Category.name == category))) is None:
83-
c = Category(name=category, pinned=0)
84-
db.add(c)
85-
a.categories.append(c)
86-
if old_a is None:
87-
assert len(metadata["id"]) == 40, f'The length of ID {metadata["id"]} is incorrect.'
88-
a.id = metadata["id"]
89-
db.add(a)
39+
try:
40+
for p in paths:
41+
checkpoint = db.begin_nested()
42+
try:
43+
p = Path(os.path.relpath(p, settings.content))
44+
if p.is_relative_to('thumb'):
45+
continue
46+
old_a = db.scalar(select(Archive).where(
47+
Archive.path == p.as_posix()))
48+
if old_a is None:
49+
a = Archive(path=p.as_posix())
50+
archive_id = '00' + hashlib.blake2b(p.as_posix().encode(), digest_size=19).hexdigest() # 00 stands for ID type origin
51+
elif settings.skip_exits:
52+
continue
53+
else:
54+
a = old_a
55+
archive_id = old_a.id
56+
metadata = {"id": archive_id, "title": a.title, "subtitle": a.subtitle, "source": a.source, "pagecount": a.pagecount, "tags": set(
57+
t.tag for t in a.tags if not t.tag.startswith("date_added:")), "categories": set(c.name for c in a.categories)}
58+
real_path = Path(settings.content) / p
59+
prev_scanners = []
60+
for scanner, name in scanners:
61+
prev_metadata = copy.deepcopy(metadata)
62+
if scanner.scan(real_path, archive_id, metadata, prev_scanners):
63+
prev_scanners.append(name)
64+
else:
65+
metadata = prev_metadata
66+
if not prev_scanners:
67+
continue
68+
if not any(tag.startswith("date_added:") for tag in metadata["tags"]):
69+
# Directory modification times are often difficult to synchronize.
70+
mtime_path = next(real_path.iterdir()) if real_path.is_dir() else real_path
71+
metadata["tags"].add(f"date_added:{int(mtime_path.stat().st_mtime)}")
72+
logging.debug(pformat(metadata))
73+
a.title = metadata["title"]
74+
a.subtitle = metadata["subtitle"]
75+
a.source = metadata["source"]
76+
a.pagecount = metadata["pagecount"]
77+
a.thumb = metadata["thumb"]
78+
for tag in filter(lambda t: not t.tag in metadata["tags"], a.tags):
79+
a.tags.remove(tag)
80+
for tag in metadata["tags"] - set(t.tag for t in a.tags):
81+
a.tags.append(Tag(archive_id=metadata["id"], tag=tag))
82+
for category in filter(lambda c: not c.name in metadata["categories"], a.categories):
83+
a.categories.remove(category)
84+
for category in metadata["categories"] - set(c.name for c in a.categories):
85+
if (c := db.scalar(select(Category).where(Category.name == category))) is None:
86+
c = Category(name=category, pinned=0)
87+
db.add(c)
88+
a.categories.append(c)
89+
if old_a is None:
90+
assert len(metadata["id"]) == 40, f'The length of ID {metadata["id"]} is incorrect.'
91+
a.id = metadata["id"]
92+
db.add(a)
93+
checkpoint.commit()
94+
finally:
95+
if checkpoint.is_active:
96+
checkpoint.rollback()
97+
finally:
9098
db.commit()
9199

92100
def get_files_inuse():

0 commit comments

Comments
 (0)