Skip to content

Commit 0627fcb

Browse files
committed
experimental: custom ID
1 parent d2cfbee commit 0627fcb

7 files changed

Lines changed: 28 additions & 14 deletions

File tree

comiclib/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = "0.0.2"
1+
__version__ = "0.0.3.dev"
22
print(f" >>> ComicLib v{__version__}")
33

44
from .scan import watch, scannow

comiclib/scan.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,14 +44,13 @@ def scan(paths):
4444
Archive.path == p.as_posix()))
4545
if old_a is None:
4646
a = Archive(path=p.as_posix())
47-
archive_id = hashlib.blake2b(
48-
p.as_posix().encode(), digest_size=20).hexdigest()
47+
archive_id = '00' + hashlib.blake2b(p.as_posix().encode(), digest_size=19).hexdigest() # 00 stands for ID type origin
4948
elif settings.skip_exits:
5049
continue
5150
else:
5251
a = old_a
5352
archive_id = old_a.id
54-
metadata = {"title": a.title, "subtitle": a.subtitle, "source": a.source, "pagecount": a.pagecount, "tags": set(
53+
metadata = {"id": archive_id, "title": a.title, "subtitle": a.subtitle, "source": a.source, "pagecount": a.pagecount, "tags": set(
5554
t.tag for t in a.tags if not t.tag.startswith("date_added:")), "categories": set(c.name for c in a.categories)}
5655
real_path = Path(settings.content) / p
5756
prev_scanners = []
@@ -76,7 +75,7 @@ def scan(paths):
7675
for tag in filter(lambda t: not t.tag in metadata["tags"], a.tags):
7776
a.tags.remove(tag)
7877
for tag in metadata["tags"] - set(t.tag for t in a.tags):
79-
a.tags.append(Tag(archive_id=archive_id, tag=tag))
78+
a.tags.append(Tag(archive_id=metadata["id"], tag=tag))
8079
for category in filter(lambda c: not c.name in metadata["categories"], a.categories):
8180
a.categories.remove(category)
8281
for category in metadata["categories"] - set(c.name for c in a.categories):
@@ -85,7 +84,7 @@ def scan(paths):
8584
db.add(c)
8685
a.categories.append(c)
8786
if old_a is None:
88-
a.id = archive_id
87+
a.id = metadata["id"]
8988
db.add(a)
9089
db.commit()
9190

comiclib/scanner/30-importEHdb.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,10 @@ def scan(self, path: Path, id: str, metadata: dict, prev_scanners: list[str]) ->
7676
return False
7777
elif prev_scanners and not (gid := self.get_gid(metadata)) is None:
7878
logger.info(f' <- {path}')
79-
res = self.con.execute("SELECT title, title_jpn, category, posted, thumb, artist, `group`, parody, character, female, male, language, mixed, other, cosplayer, rest FROM gallery WHERE gid == ?", (gid,)).fetchone()
79+
res = self.con.execute("SELECT title, title_jpn, category, posted, thumb, token, artist, `group`, parody, character, female, male, language, mixed, other, cosplayer, rest FROM gallery WHERE gid == ?", (gid,)).fetchone()
8080
if res is None: return False
81+
token = res.pop('token')
82+
metadata["id"] = f"EH{gid:>018}{token}{id[-10:]}"
8183
metadata["title"] = res.pop("title")
8284
metadata["subtitle"] = res.pop("title_jpn")
8385
thumb = res.pop("thumb")
@@ -90,7 +92,6 @@ def scan(self, path: Path, id: str, metadata: dict, prev_scanners: list[str]) ->
9092
if res[namespace] is None: continue
9193
metadata["tags"] |= set(map(lambda v: namespace+':'+v, ast.literal_eval(res[namespace])))
9294
if metadata["source"] is None or not re.fullmatch(r"https?://e[x-]hentai\.org/g/(\d+)/", metadata["source"]) is None:
93-
token = self.con.execute("SELECT token FROM gallery WHERE gid == ?", (gid,)).fetchone()['token']
9495
metadata["source"] = f"https://exhentai.org/g/{gid}/{token}/"
9596
return True
9697
else:

comiclib/scanner/40-thumb.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,6 @@ def scan(self, path: Path, id: str, metadata: dict, prev_scanners: list[str]) ->
1111
if not prev_scanners or not metadata.get('thumb') is None:
1212
return False
1313
logger.info(f' <- {path}')
14-
thumb = extract_thumbnail(path, id, 1, cover=True)
14+
thumb = extract_thumbnail(path, metadata['id'], 1, cover=True)
1515
metadata['thumb'] = str(thumb)
1616
return True

docs/en/docs/scanner.md

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,13 @@ This return value is also used as a reference for subsequent scripts.
104104
Parameters of `Scanner.scan`:
105105

106106
* `path`: file/directory path for the input comic.
107-
* `id`: Unique ID generated by ComicLib, do not attempt to write this value.
108-
* `metadata`: Metadata obtained after processing by the previous script. The fields include `title`, `subtitle` `source`, `pagecount`, `tags`, `categories`. The initial values are `None` or `set()`. scanners write the resulting metadata into this `dict`.
107+
* `id`: The unique ID pre-generated by ComicLib, which is a hash of the relative path to `CONTENT`. The database uses the ID given by `metadata[id]` instead, see the description of custom ID below.
108+
* `metadata`: Metadata obtained after processing by the previous script. The fields include `id`, `title`, `subtitle` `source`, `pagecount`, `tags`, `categories`. The initial values are `None` or `set()`, except for `id`. scanners write the resulting metadata into this `dict`.
109109
* `prev_scanners`: The name of the script that previously returned `True`.
110+
111+
!!! example "custom ID (Experimental)"
112+
ComicLib first pre-generates a unique ID based on the path, starting with `00` as the value of the parameter `id`. Initially this ID is the same as `metadata[id]`.
113+
The scanner can generate a new ID based on `id`, `metadata[id]` modified by the previous scanning script and other information, and write it into `metadata[id]`.
114+
It is generally agreed that the first two characters of the ID represent the meaning of the ID. For example, the built-in scanner 30-importEHdb.py uses `EH` to represent its designed ID with ehentai gid information.
115+
The final `metadata[id]` is written to the database as a unique identifier for the comic.
116+
ID must be unique, and be 40 characters. Custom IDs will not work for updating metadata during rescanning

docs/zh/docs/scanner.md

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,13 @@ class Scanner:
103103
`Scanner.scan` 的参数:
104104

105105
* `path`: 文件/目录路径
106-
* `id`: 由 ComicLib 生成的唯一 ID,不要试图写入此值
107-
* `metadata`: 由前面脚本处理后得到的元数据,字段有 `title`, `subtitle` `source`, `pagecount`, `tags`, `categories`最初值皆为 `None``set()`。扫描脚本将得到的元数据写入该 `dict`
106+
* `id`: 由 ComicLib 预生成的唯一 ID,是相对于 `CONTENT` 的路径的散列值,但最终数据库使用的是 `metadata[id]` 给出的 ID,见下面自定义ID的说明。
107+
* `metadata`: 由前面脚本处理后得到的元数据,字段有 `id`, `title`, `subtitle` `source`, `pagecount`, `tags`, `categories``id` 外最初值皆为 `None``set()`。扫描脚本将得到的元数据写入该 `dict`
108108
* `prev_scanners`: 前面返回 `True` 的脚本名称。
109+
110+
!!! example "自定义ID(试验性)"
111+
ComicLib 先根据路径预生成一个唯一 ID,以 `00` 开头,作为参数 `id` 的值。最初这一 ID 与 `metadata[id]` 相同。
112+
扫描脚本可以根据 `id` 、前面扫描脚本修改的 `metadata[id]` 和其他信息生成一个新的 ID,写入 `metadata[id]`
113+
一般约定 ID 的前两个字符表示 ID 的含义,如内置脚本 30-importEHdb.py 用 `EH` 表示其设计的带有 ehentai gid 信息的 ID。
114+
最终的 `metadata[id]` 作为漫画的唯一标识符写入数据库。
115+
ID 必须保证唯一,且为 40 个字符。自定义 ID 对重新扫描更新元数据无效。

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "comiclib"
7-
version = "0.0.2"
7+
version = "0.0.3.dev"
88
authors = [
99
{ name="URenko" },
1010
]

0 commit comments

Comments
 (0)