diff --git a/getsearchtxt.py b/getsearchtxt.py deleted file mode 100644 index 444f02e..0000000 --- a/getsearchtxt.py +++ /dev/null @@ -1,118 +0,0 @@ -#!/usr/bin/env python3 - -import sys -import re -import json -import requests -import time -import traceback -import gzip - -p=re.compile(r'.*/s/(.*)') -skipp = re.compile(r'.*(cover|screen|频道).*',re.IGNORECASE) -reqcount=1 -sharedict=set() - -def getlist(w,shareid, fileid,morepage): - global p - global skipp - global reqcount - global sharedict - - reqcount += 1 - if reqcount % 5 == 0: - print(f"reqcount:{reqcount} shareid:{shareid} fileid:{fileid}",file=sys.stderr) - #time.sleep(1) - url = f'http://192.168.101.188:9978/proxy?do=pikpak&type=list&share_id={shareid}&file_id={fileid}&pass_code=&morepage={morepage}' - print(f"url: {url}",file=sys.stderr) - resp = requests.get(url) - content = resp.content.decode('utf-8') - lines = content.split("\n") - if "folder" not in content and len(lines)<=4: - return - isfirst=True - for line in lines: - if isfirst: - isfirst=False - print(f"first line:{line}",file=sys.stderr) - if skipp.match(line): - continue - linearr = line.split('\t') - if len(linearr)>2: - m = p.match(linearr[0]) - if m: - arr = m.group(1).split("/") - else: - arr = linearr[0].split("/") - shareid=arr[0] - fileid=arr[1] if len(arr)>1 else "" - if shareid+"/"+fileid in sharedict: - print(f"skip shareid{shareid} fileid:{fileid}", file=sys.stderr) - continue - w.write(line+"\n") - w.flush() - if linearr[2] == "folder": - getlist(w,shareid,fileid,False) - - if len(lines)>0: - getlist(w,shareid,fileid,True) - -def main(): - try: - f = gzip.open(sys.argv[1]+".raw.gz",mode="rt",encoding="utf-8") - if f is not None: - print(f"found gz raw file:{sys.argv[1]}.raw.gz, extract it",file=sys.stderr) - with(open(sys.argv[1]+".raw","w",encoding="utf-8")) as w: - while(True): - lines = f.readlines() - if len(lines)<=0: - break - for line in lines: - line=line.strip() - w.write(line+"\n") - f.seek(0) - except: - traceback.print_exc() - try: - f = open(sys.argv[1]+".raw","r",encoding="utf-8") - except: - f = None - if f is not None: - print("found old raw file") - while True: - lines = f.readlines() - if len(lines)<=0: - break - for line in lines: - linearr = line.split("\t") - m = p.match(linearr[0]) - if m: - arr = m.group(1).split("/") - else: - arr = linearr[0].split("/") - if len(arr)>1: - shareid = arr[0] - fileid = arr[1] - sharedict.add(shareid+"/"+fileid) - f.close() - print(f"old raw file record:{len(sharedict)}") - else: - print("no old raw file") - with(open(sys.argv[1]+".raw","a+",encoding="utf-8")) as w: - with(open(sys.argv[1],"r",encoding="utf-8")) as f: - j = json.load(f) - for c in j: - shareid=c.get("type_id") - fileid="" - m = p.match(shareid) - if m: - arr = m.group(1).split("/") - else: - arr = shareid.split("/") - shareid=arr[0] - fileid=arr[1] if len(arr)>1 else "" - if shareid+"/"+fileid in sharedict: - continue - getlist(w,shareid,fileid,False) - -main()