@@ -614,28 +614,32 @@ def mp_extract(ex_dir, ex_img_dir, ex_target, dl_filelist):
614
614
json .dump (path_id_to_string , fn , indent = 2 )
615
615
616
616
617
+ def requests_download (url , target ):
618
+ check_target_path (target )
619
+ while True :
620
+ try :
621
+ with requests .get (url , stream = True ) as req :
622
+ if req .status_code == 200 :
623
+ with open (target , "wb" ) as fn :
624
+ for chunk in req :
625
+ fn .write (chunk )
626
+ return True
627
+ except requests .exceptions .ConnectionError :
628
+ continue
629
+ except Exception as e :
630
+ print (e )
631
+ return False
632
+
633
+
617
634
def mp_download_to_hash (source , dl_dir ):
618
635
dl_target = os .path .join (dl_dir , source .hash )
619
- if not os .path .exists (dl_target ):
620
- while True :
621
- try :
622
- with requests .get (source .url , stream = True ) as req :
623
- if req .status_code == 200 :
624
- with open (dl_target , "wb" ) as fn :
625
- for chunk in req :
626
- fn .write (chunk )
627
- print ("-" , end = "" , flush = True )
628
- break
629
- except requests .exceptions .ConnectionError :
630
- continue
631
- except Exception as e :
632
- print (e )
633
- return
636
+ if not os .path .exists (dl_target ) and requests_download (source .url , dl_target ):
637
+ print ("-" , end = "" , flush = True )
634
638
else :
635
639
print ("." , end = "" , flush = True )
636
640
637
641
638
- def mp_download (target , source , extract , region , dl_dir , overwrite ):
642
+ def mp_download (target , source , extract , region , dl_dir , overwrite , local_mirror ):
639
643
# dl_target = os.path.join(dl_dir, region, target.replace("/", "_"))
640
644
if source .raw :
641
645
if source .ver :
@@ -644,25 +648,22 @@ def mp_download(target, source, extract, region, dl_dir, overwrite):
644
648
dl_target = os .path .join (dl_dir , region , target .replace ("/" , "_" ))
645
649
else :
646
650
dl_target = os .path .join (dl_dir , source .hash )
647
- check_target_path (dl_target )
648
651
649
652
if overwrite or not os .path .exists (dl_target ):
650
- while True :
651
- try :
652
- with requests .get (source .url , stream = True ) as req :
653
- if req .status_code == 200 :
654
- with open (dl_target , "wb" ) as fn :
655
- for chunk in req :
656
- fn .write (chunk )
657
- print ("-" , end = "" , flush = True )
658
- break
659
- except ConnectionError as e :
660
- if e .errno == - 3 :
661
- continue
662
- return
663
- except Exception as e :
664
- print (e )
653
+ if local_mirror is not None :
654
+ if not os .path .exists (link_src := os .path .join (local_mirror , source .hash )):
655
+ if not requests_download (source .url , link_src ):
656
+ return
657
+ if source .raw :
658
+ check_target_path (dl_target )
659
+ # symlink is no good with wine stuff
660
+ os .link (link_src , dl_target )
661
+ else :
662
+ dl_target = link_src
663
+ else :
664
+ if not requests_download (source .url , dl_target ):
665
665
return
666
+ print ("-" , end = "" , flush = True )
666
667
else :
667
668
print ("." , end = "" , flush = True )
668
669
@@ -748,7 +749,7 @@ def crid_mod_usm(source, ex_target, dl_target):
748
749
749
750
750
751
class Extractor :
751
- def __init__ (self , dl_dir = "./_download" , ex_dir = "./_extract" , ex_img_dir = "./_images" , ex_media_dir = "./_media" , overwrite = False , manifest_override = MANIFESTS ):
752
+ def __init__ (self , dl_dir = "./_download" , ex_dir = "./_extract" , ex_img_dir = "./_images" , ex_media_dir = "./_media" , overwrite = False , manifest_override = MANIFESTS , local_mirror = "../archives/cdn" ):
752
753
self .pm = {}
753
754
self .pm_old = {}
754
755
if manifest_override == "ALLTIME" :
@@ -769,6 +770,7 @@ def __init__(self, dl_dir="./_download", ex_dir="./_extract", ex_img_dir="./_ima
769
770
self .ex_media_dir = ex_media_dir
770
771
self .extract_list = []
771
772
self .overwrite = overwrite
773
+ self .local_mirror = local_mirror
772
774
773
775
def raw_extract (self , source , ex_target , dl_target ):
774
776
if self .ex_media_dir :
@@ -793,31 +795,9 @@ def pool_download_and_extract(self, download_list, region=None):
793
795
NUM_WORKERS = multiprocessing .cpu_count ()
794
796
pool = multiprocessing .Pool (processes = NUM_WORKERS )
795
797
if region is None :
796
- dl_args = [
797
- (
798
- target ,
799
- source ,
800
- extract ,
801
- region ,
802
- self .dl_dir ,
803
- self .overwrite ,
804
- )
805
- for region , extract , matched in download_list
806
- for target , source in matched
807
- ]
798
+ dl_args = [(target , source , extract , region , self .dl_dir , self .overwrite , self .local_mirror ) for region , extract , matched in download_list for target , source in matched ]
808
799
else :
809
- dl_args = [
810
- (
811
- target ,
812
- source ,
813
- extract ,
814
- region ,
815
- self .dl_dir ,
816
- self .overwrite ,
817
- )
818
- for extract , matched in download_list
819
- for target , source in matched
820
- ]
800
+ dl_args = [(target , source , extract , region , self .dl_dir , self .overwrite , self .local_mirror ) for extract , matched in download_list for target , source in matched ]
821
801
print (f"Download { len (dl_args )} " , flush = True ) # tqdm(dl_args, desc="download", total=len(dl_args))
822
802
downloaded = list (filter (None , pool .starmap (mp_download , dl_args )))
823
803
pool .close ()
0 commit comments