删除 getsearchtxt.py
This commit is contained in:
parent
d5cc14f8f5
commit
1d6626a6c3
118
getsearchtxt.py
118
getsearchtxt.py
@ -1,118 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
|
|
||||||
import sys
|
|
||||||
import re
|
|
||||||
import json
|
|
||||||
import requests
|
|
||||||
import time
|
|
||||||
import traceback
|
|
||||||
import gzip
|
|
||||||
|
|
||||||
p=re.compile(r'.*/s/(.*)')
|
|
||||||
skipp = re.compile(r'.*(cover|screen|频道).*',re.IGNORECASE)
|
|
||||||
reqcount=1
|
|
||||||
sharedict=set()
|
|
||||||
|
|
||||||
def getlist(w,shareid, fileid,morepage):
|
|
||||||
global p
|
|
||||||
global skipp
|
|
||||||
global reqcount
|
|
||||||
global sharedict
|
|
||||||
|
|
||||||
reqcount += 1
|
|
||||||
if reqcount % 5 == 0:
|
|
||||||
print(f"reqcount:{reqcount} shareid:{shareid} fileid:{fileid}",file=sys.stderr)
|
|
||||||
#time.sleep(1)
|
|
||||||
url = f'http://192.168.101.188:9978/proxy?do=pikpak&type=list&share_id={shareid}&file_id={fileid}&pass_code=&morepage={morepage}'
|
|
||||||
print(f"url: {url}",file=sys.stderr)
|
|
||||||
resp = requests.get(url)
|
|
||||||
content = resp.content.decode('utf-8')
|
|
||||||
lines = content.split("\n")
|
|
||||||
if "folder" not in content and len(lines)<=4:
|
|
||||||
return
|
|
||||||
isfirst=True
|
|
||||||
for line in lines:
|
|
||||||
if isfirst:
|
|
||||||
isfirst=False
|
|
||||||
print(f"first line:{line}",file=sys.stderr)
|
|
||||||
if skipp.match(line):
|
|
||||||
continue
|
|
||||||
linearr = line.split('\t')
|
|
||||||
if len(linearr)>2:
|
|
||||||
m = p.match(linearr[0])
|
|
||||||
if m:
|
|
||||||
arr = m.group(1).split("/")
|
|
||||||
else:
|
|
||||||
arr = linearr[0].split("/")
|
|
||||||
shareid=arr[0]
|
|
||||||
fileid=arr[1] if len(arr)>1 else ""
|
|
||||||
if shareid+"/"+fileid in sharedict:
|
|
||||||
print(f"skip shareid{shareid} fileid:{fileid}", file=sys.stderr)
|
|
||||||
continue
|
|
||||||
w.write(line+"\n")
|
|
||||||
w.flush()
|
|
||||||
if linearr[2] == "folder":
|
|
||||||
getlist(w,shareid,fileid,False)
|
|
||||||
|
|
||||||
if len(lines)>0:
|
|
||||||
getlist(w,shareid,fileid,True)
|
|
||||||
|
|
||||||
def main():
|
|
||||||
try:
|
|
||||||
f = gzip.open(sys.argv[1]+".raw.gz",mode="rt",encoding="utf-8")
|
|
||||||
if f is not None:
|
|
||||||
print(f"found gz raw file:{sys.argv[1]}.raw.gz, extract it",file=sys.stderr)
|
|
||||||
with(open(sys.argv[1]+".raw","w",encoding="utf-8")) as w:
|
|
||||||
while(True):
|
|
||||||
lines = f.readlines()
|
|
||||||
if len(lines)<=0:
|
|
||||||
break
|
|
||||||
for line in lines:
|
|
||||||
line=line.strip()
|
|
||||||
w.write(line+"\n")
|
|
||||||
f.seek(0)
|
|
||||||
except:
|
|
||||||
traceback.print_exc()
|
|
||||||
try:
|
|
||||||
f = open(sys.argv[1]+".raw","r",encoding="utf-8")
|
|
||||||
except:
|
|
||||||
f = None
|
|
||||||
if f is not None:
|
|
||||||
print("found old raw file")
|
|
||||||
while True:
|
|
||||||
lines = f.readlines()
|
|
||||||
if len(lines)<=0:
|
|
||||||
break
|
|
||||||
for line in lines:
|
|
||||||
linearr = line.split("\t")
|
|
||||||
m = p.match(linearr[0])
|
|
||||||
if m:
|
|
||||||
arr = m.group(1).split("/")
|
|
||||||
else:
|
|
||||||
arr = linearr[0].split("/")
|
|
||||||
if len(arr)>1:
|
|
||||||
shareid = arr[0]
|
|
||||||
fileid = arr[1]
|
|
||||||
sharedict.add(shareid+"/"+fileid)
|
|
||||||
f.close()
|
|
||||||
print(f"old raw file record:{len(sharedict)}")
|
|
||||||
else:
|
|
||||||
print("no old raw file")
|
|
||||||
with(open(sys.argv[1]+".raw","a+",encoding="utf-8")) as w:
|
|
||||||
with(open(sys.argv[1],"r",encoding="utf-8")) as f:
|
|
||||||
j = json.load(f)
|
|
||||||
for c in j:
|
|
||||||
shareid=c.get("type_id")
|
|
||||||
fileid=""
|
|
||||||
m = p.match(shareid)
|
|
||||||
if m:
|
|
||||||
arr = m.group(1).split("/")
|
|
||||||
else:
|
|
||||||
arr = shareid.split("/")
|
|
||||||
shareid=arr[0]
|
|
||||||
fileid=arr[1] if len(arr)>1 else ""
|
|
||||||
if shareid+"/"+fileid in sharedict:
|
|
||||||
continue
|
|
||||||
getlist(w,shareid,fileid,False)
|
|
||||||
|
|
||||||
main()
|
|
Loading…
x
Reference in New Issue
Block a user