Skip to content

Commit 5104dbb

Browse files
committed
support general archive format
1 parent 37d7e97 commit 5104dbb

6 files changed

Lines changed: 130 additions & 20 deletions

File tree

comiclib/main.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,10 @@
33

44
from .scan import watch, scannow
55
from .config import settings
6-
from .utils import extract_thumbnail, convert_image
6+
from .utils import is_image, extract_thumbnail, convert_image, ArchiveFile
77
from typing import Union, Annotated
88
from enum import Enum
99
from pathlib import Path
10-
from zipfile import ZipFile
1110
import re
1211
import base64
1312
import tempfile
@@ -267,11 +266,11 @@ def extract_archive(id: str, force: bool = True, db: Session = Depends(get_db)):
267266
return JSONResponse({"operation": "", "error": "This ID doesn't exist on the server.", "success": 0}, status.HTTP_400_BAD_REQUEST)
268267
path = Path(settings.content) / a.path
269268
if path.is_dir():
270-
pages = [f"./api/archives/{id}/page?path="+quote(p.name, safe='') for p in sorted(path.iterdir()) if p.suffix != '.txt' and not p.name.startswith('.')]
271-
elif path.suffix == '.zip':
272-
with ZipFile(path) as z:
269+
pages = [f"./api/archives/{id}/page?path="+quote(p.name, safe='') for p in sorted(path.iterdir()) if is_image(p)]
270+
elif ArchiveFile.support_formats.fullmatch(path.name):
271+
with ArchiveFile(path) as z:
273272
pages = [f"./api/archives/{id}/page?path="+quote(z_info.filename, safe='') for z_info in filter(
274-
lambda z_info: not z_info.is_dir(), z.infolist())]
273+
lambda z_info: not z_info.is_dir() and is_image(z_info.filename), z.infolist())]
275274
else:
276275
raise NotImplementedError
277276
return {"job": -1, "pages": pages}
@@ -292,15 +291,15 @@ def get_archive_page(request: Request, id: str, path: str, db: Session = Depends
292291
saveto.parent.mkdir(parents=True, exist_ok=True)
293292
if p.is_dir():
294293
convert_image(p / path, saveto)
295-
elif p.suffix == '.zip':
296-
with ZipFile(p) as z, z.open(path) as f:
294+
elif ArchiveFile.support_formats.fullmatch(path.name):
295+
with ArchiveFile(p) as z, z.open(path) as f:
297296
convert_image(f, saveto)
298297
else:
299298
raise NotImplementedError
300299
return FileResponse(saveto)
301-
if p.suffix == '.zip':
300+
if ArchiveFile.support_formats.fullmatch(str(p)):
302301
def iterfile():
303-
with ZipFile(p) as z, z.open(path) as f:
302+
with ArchiveFile(p) as z, z.open(path) as f:
304303
yield from f
305304
return StreamingResponse(iterfile())
306305
else:

comiclib/scanner/10-zip.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
from pathlib import Path
2-
32
from zipfile import ZipFile
43

4+
from comiclib.utils import is_image
5+
56
import logging
67
logger = logging.getLogger(__name__)
78

@@ -14,7 +15,7 @@ def scan(self, path: Path, id: str, metadata: dict, prev_scanners: list[str]) ->
1415
metadata["title"] = path.stem
1516
with ZipFile(path) as z:
1617
metadata["pagecount"] = len(
17-
list(filter(lambda z_info: not z_info.is_dir(), z.infolist())))
18+
list(filter(lambda z_info: not z_info.is_dir() and is_image(z_info.filename), z.infolist())))
1819
assert metadata["pagecount"] > 0
1920
return True
2021
else:

comiclib/scanner/11-archive.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
from pathlib import Path
2+
3+
from comiclib.utils import is_image, ArchiveFile
4+
5+
import logging
6+
logger = logging.getLogger(__name__)
7+
8+
class Scanner:
9+
'''Handle regular archived files, except zip, with the filename as the title.'''
10+
11+
def scan(self, path: Path, id: str, metadata: dict, prev_scanners: list[str]) -> bool:
12+
if '10-zip' not in prev_scanners and not ArchiveFile.support_formats.fullmatch(path.name) is None and not path.is_dir():
13+
logger.info(f' <- {path}')
14+
metadata["title"] = path.stem
15+
with ArchiveFile(path) as z:
16+
metadata["pagecount"] = len(
17+
list(filter(lambda z_info: not z_info.is_dir() and is_image(z_info.filename), z.infolist())))
18+
assert metadata["pagecount"] > 0
19+
return True
20+
else:
21+
return False

comiclib/utils.py

Lines changed: 89 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,31 @@
11
from typing import Union
22
from pathlib import Path
33
from zipfile import ZipFile
4-
import asyncio
4+
import io
5+
import re
56
import subprocess
67
import tempfile
78
import shutil
9+
import mimetypes
10+
import itertools
811
import PIL
912
from PIL import Image
1013
try:
1114
from jxlpy import JXLImagePlugin
1215
except ModuleNotFoundError:
1316
pass
1417

18+
import logging
19+
logger = logging.getLogger(__name__)
20+
1521
from .config import settings
1622

23+
24+
mimetypes.add_type('image/jxl', '.jxl')
25+
def is_image(p: Union[str, Path]):
26+
mime = mimetypes.guess_type(p, strict=False)[0]
27+
return False if mime is None else mime.partition('/')[0] == 'image'
28+
1729
def convert_image(f_or_path, saveto: str, thumbnail=False):
1830
try:
1931
with Image.open(f_or_path) as im:
@@ -40,10 +52,10 @@ def extract_thumbnail(path: Union[str, Path], id: str, page: int, cache=False, c
4052
return saveto.relative_to(settings.thumb)
4153
saveto.parent.mkdir(parents=True, exist_ok=True)
4254
if path.is_dir():
43-
convert_image(sorted(filter(lambda p:p.suffix != '.txt' and not p.name.startswith('.'), path.iterdir()))[page-1], saveto, thumbnail=True)
44-
elif path.suffix == '.zip':
45-
with ZipFile(path) as z:
46-
with z.open(list(filter(lambda z_info: not z_info.is_dir(), z.infolist()))[page-1].filename) as f:
55+
convert_image(sorted(filter(is_image, path.iterdir()))[page-1], saveto, thumbnail=True)
56+
elif ArchiveFile.support_formats.fullmatch(path.name):
57+
with ArchiveFile(path) as z:
58+
with z.open(list(filter(lambda z_info: not z_info.is_dir() and is_image(z_info.filename), z.infolist()))[page-1].filename) as f:
4759
convert_image(f, saveto, thumbnail=True)
4860
else:
4961
raise NotImplementedError
@@ -52,4 +64,75 @@ def extract_thumbnail(path: Union[str, Path], id: str, page: int, cache=False, c
5264
cover_path.parent.mkdir(parents=True, exist_ok=True)
5365
shutil.copy2(saveto, cover_path)
5466
return cover_path.name
55-
return saveto.relative_to(settings.thumb)
67+
return saveto.relative_to(settings.thumb)
68+
69+
class ArchiveInfo:
70+
def __init__(self, filename):
71+
self.filename = filename
72+
73+
def is_dir(self):
74+
return self._is_dir
75+
76+
class ArchiveFile:
77+
support_formats = re.compile('.+\.(zip|rar|7z)$', re.IGNORECASE)
78+
executable = None
79+
80+
def __init__(self, file):
81+
self.file = Path(file)
82+
if self.file.suffix.lower() == '.zip':
83+
self.zipfile = ZipFile(file)
84+
return
85+
else:
86+
self.zipfile = None
87+
self._infolist = None
88+
if self.executable is None:
89+
sevenzip = ('7zzs', '7zz', '7z', '7za', '7zr')
90+
for executable in itertools.chain(('./'+_s for _s in sevenzip), sevenzip):
91+
try:
92+
p = subprocess.run([executable, 'i'], capture_output=True)
93+
if p.returncode == 0:
94+
self.executable = executable
95+
if b'Rar' not in p.stdout:
96+
logger.warning("Your version of 7-Zip does not support rar files, please download the correct version from https://7-zip.org/download.html.")
97+
except FileNotFoundError:
98+
continue
99+
if self.executable is None:
100+
raise FileNotFoundError("We encountered a non-zip archive, this requires 7-Zip, but you do not have it installed, please download from https://7-zip.org/download.html and make sure 7zzs or 7zz or 7z is in the working directory or the directory indicated by PATH.")
101+
logger.debug(f"Use 7-Zip {self.executable}")
102+
103+
def infolist(self):
104+
if not self._infolist is None:
105+
return self._infolist
106+
self._infolist = []
107+
stdout = subprocess.run([self.executable, 'l', str(self.file)], check=True, capture_output=True, text=True).stdout
108+
list_start = False
109+
line_sep = '------------------- ----- ------------ ------------ ------------------------'
110+
for line in stdout.splitlines():
111+
if list_start:
112+
if line == line_sep:
113+
break
114+
m = re.match(r'\S+ \S+ ([\.D])\S{4} +\d+ +\d* (.+)', line)
115+
if m is None: raise NotImplementedError
116+
archive_info = ArchiveInfo(m[2])
117+
archive_info._is_dir = m[1] == 'D'
118+
self._infolist.append(archive_info)
119+
if line == line_sep:
120+
list_start = True
121+
return self._infolist
122+
123+
def namelist(self):
124+
return (archive_info.filename for archive_info in self.infolist())
125+
126+
def open(self, name):
127+
p = subprocess.run([self.executable, 'e', '-so', str(self.file), name], check=True, capture_output=True)
128+
return io.BytesIO(p.stdout)
129+
130+
def __enter__(self):
131+
if self.zipfile is None:
132+
return self
133+
else:
134+
return self.zipfile.__enter__()
135+
136+
def __exit__(self, *args):
137+
if not self.zipfile is None:
138+
self.zipfile.__exit__(*args)

docs/en/docs/supported-formats.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
| File extension | Download method | Corresponding scanner |
44
| -------------- | --------------- | ----------- |
55
| zip | general | 10-zip.py |
6+
| rar, 7z [^1] | general | 11-archive.py |
67
| zip | [ccloli/E-Hentai-Downloader](https://github.com/ccloli/E-Hentai-Downloader) | 20-ccloli.py |
78
| folder | [Hentai@Home](https://ehwiki.org/wiki/Hentai@Home#H.40H_Downloader) | 21-hath.py |
8-
| folder | [EhViewer](https://github.com/seven332/EhViewer) | 22-ehviewer.py |
9+
| folder | [EhViewer](https://github.com/seven332/EhViewer) | 22-ehviewer.py |
10+
11+
[^1]: They require 7-Zip, download it from [https://7-zip.org/download.html](https://7-zip.org/download.html) and make sure 7zzs or 7zz or 7z is in the working directory or the directory indicated by PATH. 7-Zip obtained from some other sources may not support rar files.

docs/zh/docs/supported-formats.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
| 文件扩展名 | 下载方式 | 对应扫描脚本 |
44
| --------- | ------- | ----------- |
55
| zip | 一般 | 10-zip.py |
6+
| rar, 7z [^1] | 一般 | 11-archive.py |
67
| zip | [ccloli/E-Hentai-Downloader](https://github.com/ccloli/E-Hentai-Downloader) | 20-ccloli.py |
78
| 文件夹 | [Hentai@Home](https://ehwiki.org/wiki/Hentai@Home#H.40H_Downloader) | 21-hath.py |
8-
| 文件夹 | [EhViewer](https://github.com/seven332/EhViewer) | 22-ehviewer.py |
9+
| 文件夹 | [EhViewer](https://github.com/seven332/EhViewer) | 22-ehviewer.py |
10+
11+
[^1]: 需要安装 7-Zip,请从 [https://7-zip.org/download.html](https://7-zip.org/download.html) 下载并确保 7zzs 或 7zz 或 7z 处在工作目录或 PATH 指示的目录内。从其他一些渠道获取的 7-Zip 可能不支持 rar 文件。

0 commit comments

Comments
 (0)