Skip to content

Commit 6f3e031

Browse files
committed
impl local mirror use
1 parent e005c72 commit 6f3e031

8 files changed

+853604
-64
lines changed

Asset_Extract.py

+9-5
Original file line numberDiff line numberDiff line change
@@ -27,17 +27,18 @@ def pattern(arg):
2727
parser.add_argument("-ex", "--extract_dir", type=str, help="Extract directory, 0 to skip")
2828
parser.add_argument("-ex_img", "--extract_img_dir", type=str, help="Extract image directory, 0 to skip")
2929
parser.add_argument("-ex_media", "--extract_media_dir", type=str, help="Extract media directory, 0 to skip")
30+
parser.add_argument("-local", "--local_mirror", type=str, help="Use assets from local dir")
3031
parser.add_argument("-m", "--manifest", type=str, help="Manifest directory")
3132
parser.add_argument("-r", "--region", type=str, help="Region {!r}".format(MANIFESTS.keys()))
3233
parser.add_argument("patterns", type=pattern, nargs="*", help="Extract patterns. Syntax: {regex}, {regex}:{target}, {regex}:{target}:{region}")
3334
args = parser.parse_args()
3435

3536
ex_kwargs = {}
36-
for key, arg in (("dl_dir", args.download_dir), ("ex_dir", args.extract_dir), ("ex_img_dir", args.extract_img_dir), ("ex_media_dir", args.extract_media_dir)):
37+
for key, arg in (("dl_dir", args.download_dir), ("ex_dir", args.extract_dir), ("ex_img_dir", args.extract_img_dir), ("ex_media_dir", args.extract_media_dir), ("local_mirror", args.local_mirror)):
3738
if arg == "0":
38-
ex_kwargs["dl_dir"] = None
39+
ex_kwargs[key] = None
3940
elif arg:
40-
ex_kwargs["dl_dir"] = arg
41+
ex_kwargs[key] = arg
4142

4243
if args.manifest and args.operation in ("extract", "mirror"):
4344
if args.manifest == "ALLTIME":
@@ -57,7 +58,10 @@ def pattern(arg):
5758
for region in MANIFESTS.keys():
5859
ex.download_and_extract_by_diff(region=region)
5960
elif args.operation == "mirror":
60-
ex.mirror_files()
61+
if args.local_mirror:
62+
ex.mirror_files(mirror_dir=args.local_mirror)
63+
else:
64+
ex.mirror_files()
6165
elif args.operation == "report":
6266
ex.report_diff()
6367
else:
@@ -68,5 +72,5 @@ def pattern(arg):
6872
ex_patterns[reg][exp] = target
6973
else:
7074
ex_patterns[region][exp] = target
71-
print(f"Patterns {dict(ex_patterns)}")
75+
print(f"Patterns\n{dict(ex_patterns)}")
7276
ex.download_and_extract_by_pattern(ex_patterns)

loader/AssetExtractor.py

+37-57
Original file line numberDiff line numberDiff line change
@@ -614,28 +614,32 @@ def mp_extract(ex_dir, ex_img_dir, ex_target, dl_filelist):
614614
json.dump(path_id_to_string, fn, indent=2)
615615

616616

617+
def requests_download(url, target):
618+
check_target_path(target)
619+
while True:
620+
try:
621+
with requests.get(url, stream=True) as req:
622+
if req.status_code == 200:
623+
with open(target, "wb") as fn:
624+
for chunk in req:
625+
fn.write(chunk)
626+
return True
627+
except requests.exceptions.ConnectionError:
628+
continue
629+
except Exception as e:
630+
print(e)
631+
return False
632+
633+
617634
def mp_download_to_hash(source, dl_dir):
618635
dl_target = os.path.join(dl_dir, source.hash)
619-
if not os.path.exists(dl_target):
620-
while True:
621-
try:
622-
with requests.get(source.url, stream=True) as req:
623-
if req.status_code == 200:
624-
with open(dl_target, "wb") as fn:
625-
for chunk in req:
626-
fn.write(chunk)
627-
print("-", end="", flush=True)
628-
break
629-
except requests.exceptions.ConnectionError:
630-
continue
631-
except Exception as e:
632-
print(e)
633-
return
636+
if not os.path.exists(dl_target) and requests_download(source.url, dl_target):
637+
print("-", end="", flush=True)
634638
else:
635639
print(".", end="", flush=True)
636640

637641

638-
def mp_download(target, source, extract, region, dl_dir, overwrite):
642+
def mp_download(target, source, extract, region, dl_dir, overwrite, local_mirror):
639643
# dl_target = os.path.join(dl_dir, region, target.replace("/", "_"))
640644
if source.raw:
641645
if source.ver:
@@ -644,25 +648,22 @@ def mp_download(target, source, extract, region, dl_dir, overwrite):
644648
dl_target = os.path.join(dl_dir, region, target.replace("/", "_"))
645649
else:
646650
dl_target = os.path.join(dl_dir, source.hash)
647-
check_target_path(dl_target)
648651

649652
if overwrite or not os.path.exists(dl_target):
650-
while True:
651-
try:
652-
with requests.get(source.url, stream=True) as req:
653-
if req.status_code == 200:
654-
with open(dl_target, "wb") as fn:
655-
for chunk in req:
656-
fn.write(chunk)
657-
print("-", end="", flush=True)
658-
break
659-
except ConnectionError as e:
660-
if e.errno == -3:
661-
continue
662-
return
663-
except Exception as e:
664-
print(e)
653+
if local_mirror is not None:
654+
if not os.path.exists(link_src := os.path.join(local_mirror, source.hash)):
655+
if not requests_download(source.url, link_src):
656+
return
657+
if source.raw:
658+
check_target_path(dl_target)
659+
# symlink is no good with wine stuff
660+
os.link(link_src, dl_target)
661+
else:
662+
dl_target = link_src
663+
else:
664+
if not requests_download(source.url, dl_target):
665665
return
666+
print("-", end="", flush=True)
666667
else:
667668
print(".", end="", flush=True)
668669

@@ -748,7 +749,7 @@ def crid_mod_usm(source, ex_target, dl_target):
748749

749750

750751
class Extractor:
751-
def __init__(self, dl_dir="./_download", ex_dir="./_extract", ex_img_dir="./_images", ex_media_dir="./_media", overwrite=False, manifest_override=MANIFESTS):
752+
def __init__(self, dl_dir="./_download", ex_dir="./_extract", ex_img_dir="./_images", ex_media_dir="./_media", overwrite=False, manifest_override=MANIFESTS, local_mirror="../archives/cdn"):
752753
self.pm = {}
753754
self.pm_old = {}
754755
if manifest_override == "ALLTIME":
@@ -769,6 +770,7 @@ def __init__(self, dl_dir="./_download", ex_dir="./_extract", ex_img_dir="./_ima
769770
self.ex_media_dir = ex_media_dir
770771
self.extract_list = []
771772
self.overwrite = overwrite
773+
self.local_mirror = local_mirror
772774

773775
def raw_extract(self, source, ex_target, dl_target):
774776
if self.ex_media_dir:
@@ -793,31 +795,9 @@ def pool_download_and_extract(self, download_list, region=None):
793795
NUM_WORKERS = multiprocessing.cpu_count()
794796
pool = multiprocessing.Pool(processes=NUM_WORKERS)
795797
if region is None:
796-
dl_args = [
797-
(
798-
target,
799-
source,
800-
extract,
801-
region,
802-
self.dl_dir,
803-
self.overwrite,
804-
)
805-
for region, extract, matched in download_list
806-
for target, source in matched
807-
]
798+
dl_args = [(target, source, extract, region, self.dl_dir, self.overwrite, self.local_mirror) for region, extract, matched in download_list for target, source in matched]
808799
else:
809-
dl_args = [
810-
(
811-
target,
812-
source,
813-
extract,
814-
region,
815-
self.dl_dir,
816-
self.overwrite,
817-
)
818-
for extract, matched in download_list
819-
for target, source in matched
820-
]
800+
dl_args = [(target, source, extract, region, self.dl_dir, self.overwrite, self.local_mirror) for extract, matched in download_list for target, source in matched]
821801
print(f"Download {len(dl_args)}", flush=True) # tqdm(dl_args, desc="download", total=len(dl_args))
822802
downloaded = list(filter(None, pool.starmap(mp_download, dl_args)))
823803
pool.close()

0 commit comments

Comments
 (0)