Skip to content

Commit 8eb135e

Browse files
authoredMar 1, 2025··
Merge pull request #26 from saveweb/clean
Add Clean
2 parents ed179e0 + c9ba360 commit 8eb135e

File tree

10 files changed

+1840
-868
lines changed

10 files changed

+1840
-868
lines changed
 

‎.github/workflows/python-package.yaml

+2-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
strategy:
1717
fail-fast: false
1818
matrix:
19-
python-version: ["3.9", "3.10", "3.11", "3.12"]
19+
python-version: ["3.9", "3.11", "3.13"]
2020

2121
steps:
2222
- uses: actions/checkout@v3
@@ -47,6 +47,7 @@ jobs:
4747
python -m biliarchiver.cli_tools.biliarchiver get --help
4848
python -m biliarchiver.cli_tools.biliarchiver up --help
4949
python -m biliarchiver.cli_tools.biliarchiver config --help
50+
python -m biliarchiver.cli_tools.biliarchiver clean --help
5051
# - name: Test with pytest
5152
# run: |
5253
# pytest

‎biliarchiver/_biliarchiver_upload_bvid.py

+30
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,14 @@ def upload_bvid(
3434
try:
3535
lock_dir = config.storage_home_dir / ".locks" / bvid
3636
lock_dir.mkdir(parents=True, exist_ok=True)
37+
videos_basepath = (
38+
config.storage_home_dir
39+
/ "videos"
40+
/ f"{bvid}-{human_readable_upper_part_map(string=bvid, backward=True)}"
41+
)
42+
if os.path.exists(videos_basepath / "_spam.mark"):
43+
print(_("{} 被标记为垃圾内容,跳过").format(bvid))
44+
return
3745
with UploadLock(lock_dir): # type: ignore
3846
_upload_bvid(
3947
bvid,
@@ -49,6 +57,18 @@ def upload_bvid(
4957
print(_("{} 的视频还没有下载完成,跳过".format(bvid)))
5058
except Exception as e:
5159
print(_("上传 {} 时出错:".format(bvid)))
60+
error_msg = str(e)
61+
if "appears to be spam" in error_msg:
62+
print(_("{} 被标记为垃圾内容,创建标记文件").format(bvid))
63+
videos_basepath = (
64+
config.storage_home_dir
65+
/ "videos"
66+
/ f"{bvid}-{human_readable_upper_part_map(string=bvid, backward=True)}"
67+
)
68+
if videos_basepath.exists():
69+
with open(videos_basepath / "_spam.mark", "w", encoding="utf-8") as f:
70+
f.write(error_msg)
71+
5272
raise e
5373

5474

@@ -84,6 +104,9 @@ def _upload_bvid(
84104
)
85105
)
86106
continue
107+
if os.path.exists(f"{videos_basepath}/{local_identifier}/_spam.mark"):
108+
print(_("{} 被标记为垃圾内容,跳过").format(local_identifier))
109+
continue
87110
if local_identifier.startswith("_"):
88111
print(_("跳过带 _ 前缀的 local_identifier: {}").format(local_identifier))
89112
continue
@@ -256,6 +279,13 @@ def _upload_bvid(
256279
print(f"Upload failed, retrying ({upload_retry}) ...")
257280
time.sleep(min(30 * (6 - upload_retry), 240))
258281
continue
282+
if "appears to be spam" in str(e):
283+
print(_("{} 被标记为垃圾内容,创建标记文件").format(bvid))
284+
with open(
285+
videos_basepath / "_spam.mark", "w", encoding="utf-8"
286+
) as f:
287+
f.write(str(e))
288+
raise e
259289
else:
260290
raise e
261291
tries = 100

‎biliarchiver/archive_bvid.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -223,9 +223,7 @@ def delete_cache(reason: str = ""):
223223
for result, cor in zip(results, coroutines):
224224
if isinstance(result, Exception):
225225
print(_("出错,其他任务完成后将抛出异常..."))
226-
for task in tasks:
227-
task.cancel()
228-
await asyncio.sleep(3)
226+
# No need to modify other code since asyncio.gather already waited for all tasks
229227
traceback.print_exception(result)
230228
raise result
231229

‎biliarchiver/cli_tools/bili_archive_bvids.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -112,25 +112,29 @@ def check_free_space():
112112
tasks: List[asyncio.Task] = []
113113

114114
def tasks_check():
115+
failed_tasks = []
115116
for task in tasks:
116117
if task.done():
117118
_task_exception = task.exception()
118119
if isinstance(_task_exception, BaseException):
119120
import traceback
120121
traceback.print_exc()
121-
print(f"任务 {task} 出错,即将异常退出...")
122-
for task in tasks:
123-
task.cancel()
124-
raise _task_exception
122+
print(f"任务 {task} 出错,但其他任务将继续执行...")
123+
failed_tasks.append((task, _task_exception))
125124
# print(f'任务 {task} 已完成')
126125
tasks.remove(task)
126+
127127
if not check_free_space():
128128
s = _("剩余空间不足 {} GiB").format(min_free_space_gb)
129129
print(s)
130130
for task in tasks:
131131
task.cancel()
132132
raise RuntimeError(s)
133133

134+
if failed_tasks:
135+
print(f"完成所有任务,但有 {len(failed_tasks)} 个任务失败")
136+
raise failed_tasks[0][1]
137+
134138
for index, bvid in enumerate(bvids_list):
135139
if index < skip_to:
136140
print(f"跳过 {bvid} ({index+1}/{len(bvids_list)})", end="\r")

‎biliarchiver/cli_tools/biliarchiver.py

+2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from biliarchiver.cli_tools.down_command import down
66
from biliarchiver.cli_tools.get_command import get
77
from biliarchiver.cli_tools.conf_command import config
8+
from biliarchiver.cli_tools.clean_command import clean
89
from biliarchiver.version import BILI_ARCHIVER_VERSION
910

1011

@@ -69,6 +70,7 @@ def init():
6970
biliarchiver.add_command(down)
7071
biliarchiver.add_command(get)
7172
biliarchiver.add_command(config)
73+
biliarchiver.add_command(clean)
7274

7375

7476
@biliarchiver.command(help=click.style(_("配置账号信息"), fg="cyan"))
+200
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
import os
2+
import click
3+
import asyncio
4+
from pathlib import Path
5+
from rich import print
6+
7+
from biliarchiver.utils.storage import get_free_space
8+
from biliarchiver.i18n import _
9+
10+
11+
@click.command(help=click.style(_("清理并尝试修复未完成的任务"), fg="cyan"))
12+
@click.option(
13+
"--try-upload", "-u", is_flag=True, default=False, help=_("尝试上传下载完成的视频")
14+
)
15+
@click.option(
16+
"--try-download",
17+
"-d",
18+
is_flag=True,
19+
default=False,
20+
help=_("尝试继续下载未完成的视频"),
21+
)
22+
@click.option("--clean-locks", "-l", is_flag=True, default=False, help=_("清理锁文件"))
23+
@click.option(
24+
"--collection", "-c", default="opensource_movies", help=_("欲上传至的 collection")
25+
)
26+
@click.option("--all", "-a", is_flag=True, default=False, help=_("执行所有清理操作"))
27+
@click.option(
28+
"--min-free-space-gb",
29+
"-m",
30+
type=int,
31+
default=10,
32+
help=_("最小剩余空间 (GB),少于此值时将中止下载"),
33+
)
34+
def clean(try_upload, try_download, clean_locks, collection, all, min_free_space_gb):
35+
"""清理命令主函数"""
36+
if all:
37+
try_upload = try_download = clean_locks = True
38+
39+
if not any([try_upload, try_download, clean_locks]):
40+
print(_("请指定至少一项清理操作,或使用 --all/-a 执行所有清理操作"))
41+
return
42+
43+
from biliarchiver.config import config
44+
45+
# 检查磁盘空间
46+
free_space_gb = get_free_space(config.storage_home_dir) / (1024 * 1024 * 1024)
47+
print(_("当前剩余磁盘空间: {:.2f} GB").format(free_space_gb))
48+
49+
# 清理锁文件
50+
if clean_locks:
51+
clean_lock_files(config)
52+
53+
# 处理下载和上传
54+
videos_dir = config.storage_home_dir / "videos"
55+
if not videos_dir.exists():
56+
print(_("视频目录不存在: {}").format(videos_dir))
57+
return
58+
59+
bvids_to_download = []
60+
61+
for video_dir in videos_dir.iterdir():
62+
if not video_dir.is_dir():
63+
continue
64+
65+
# 提取BVID
66+
if "-" not in video_dir.name:
67+
continue
68+
69+
bvid = video_dir.name.split("-")[0]
70+
71+
# 检查是否是有效的BVID
72+
if not bvid.startswith("BV"):
73+
continue
74+
75+
# 检查下载状态
76+
if not (video_dir / "_all_downloaded.mark").exists():
77+
if try_download:
78+
print(_("发现未完成下载的视频: {}").format(bvid))
79+
bvids_to_download.append(bvid)
80+
continue
81+
82+
# 下载完成,检查是否需要上传
83+
if try_upload:
84+
process_finished_download(video_dir, bvid, collection)
85+
86+
# 执行下载
87+
if try_download and bvids_to_download:
88+
if free_space_gb < min_free_space_gb:
89+
print(_("剩余空间不足 {} GB,跳过下载操作").format(min_free_space_gb))
90+
else:
91+
download_unfinished_videos(config, bvids_to_download, min_free_space_gb)
92+
93+
94+
def clean_lock_files(config):
95+
"""清理所有锁文件"""
96+
lock_dir = config.storage_home_dir / ".locks"
97+
if not lock_dir.exists():
98+
print(_("锁文件目录不存在: {}").format(lock_dir))
99+
return
100+
101+
total_locks = 0
102+
total_size = 0
103+
104+
for lock_path in lock_dir.glob("**/*"):
105+
if lock_path.is_file():
106+
size = lock_path.stat().st_size
107+
total_size += size
108+
total_locks += 1
109+
lock_path.unlink()
110+
111+
# 删除空文件夹
112+
for dirpath, dirnames, filenames in os.walk(lock_dir, topdown=False):
113+
for dirname in dirnames:
114+
full_path = Path(dirpath) / dirname
115+
if not any(full_path.iterdir()):
116+
try:
117+
full_path.rmdir()
118+
except:
119+
pass
120+
121+
print(
122+
_("已清理 {} 个锁文件,释放 {:.2f} MiB 空间").format(
123+
total_locks, total_size / (1024 * 1024)
124+
)
125+
)
126+
127+
128+
def process_finished_download(video_dir, bvid, collection):
129+
"""处理下载完成的视频目录"""
130+
# 检查是否有标记为垃圾的文件
131+
if (video_dir / "_spam.mark").exists():
132+
print(_("{} 已被标记为垃圾,跳过").format(bvid))
133+
return
134+
135+
# 检查是否有分P需要上传
136+
has_parts_to_upload = False
137+
for part_dir in video_dir.iterdir():
138+
if not part_dir.is_dir():
139+
continue
140+
141+
# 检查该分P是否下载完成但未上传
142+
if (part_dir / "_downloaded.mark").exists() and not (
143+
part_dir / "_uploaded.mark"
144+
).exists():
145+
has_parts_to_upload = True
146+
break
147+
148+
if has_parts_to_upload:
149+
print(_("尝试上传 {}").format(bvid))
150+
from biliarchiver._biliarchiver_upload_bvid import upload_bvid
151+
try:
152+
upload_bvid(
153+
bvid,
154+
update_existing=False,
155+
collection=collection,
156+
delete_after_upload=True,
157+
)
158+
except Exception as e:
159+
error_str = str(e)
160+
if "appears to be spam" in error_str:
161+
print(_("{} 被检测为垃圾,标记并跳过").format(bvid))
162+
with open(video_dir / "_spam.mark", "w", encoding="utf-8") as f:
163+
f.write(error_str)
164+
else:
165+
print(_("上传 {} 时出错: {}").format(bvid, e))
166+
167+
168+
def download_unfinished_videos(config, bvids, min_free_space_gb):
169+
"""尝试下载未完成的视频"""
170+
if not bvids:
171+
return
172+
173+
# 创建临时文件保存BVID列表
174+
temp_file = config.storage_home_dir / "_temp_bvids.txt"
175+
with open(temp_file, "w", encoding="utf-8") as f:
176+
f.write("\n".join(bvids))
177+
178+
print(_("尝试继续下载 {} 个未完成的视频").format(len(bvids)))
179+
180+
# 构建参数
181+
kwargs = {
182+
"bvids": str(temp_file),
183+
"skip_ia_check": True,
184+
"from_browser": None,
185+
"min_free_space_gb": min_free_space_gb,
186+
"skip_to": 0,
187+
"disable_version_check": False,
188+
}
189+
190+
try:
191+
# 使用asyncio运行异步函数
192+
from biliarchiver.cli_tools.bili_archive_bvids import _down
193+
194+
asyncio.run(_down(**kwargs))
195+
except Exception as e:
196+
print(_("下载过程中出错: {}").format(e))
197+
finally:
198+
# 清理临时文件
199+
if temp_file.exists():
200+
temp_file.unlink()

‎biliarchiver/locales/biliarchiver.pot

+251-161
Large diffs are not rendered by default.

‎biliarchiver/locales/en/LC_MESSAGES/biliarchiver.po

+270-177
Large diffs are not rendered by default.

‎biliarchiver/locales/en/LC_MESSAGES/biliarchiver.po~

+571
Large diffs are not rendered by default.

‎poetry.lock

+505-522
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)
Please sign in to comment.