Skip to content

Commit fe48004

Browse files
committed
first commit
0 parents  commit fe48004

21 files changed

+5332
-0
lines changed

algoroc.py

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
import os,sys,glob,re
2+
if __name__=="__main__":
3+
pf=open("positiveset.txt","r")
4+
l=[]
5+
for i in pf.readlines():
6+
it=i.strip("\n")
7+
l.append(it)
8+
9+
pf.close()
10+
rf=open("positive_result_to_choose.txt","r")
11+
c=0
12+
t=0
13+
for i in rf.readlines():
14+
it=i.strip("\n")
15+
t=t+1
16+
if it in l:
17+
c=c+1
18+
19+
rf.close()
20+
print "FNR=",(1-float(c)/float(t))*100
21+
rf=open("")

allocatekeys2slave.py

+59
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import os,sys
2+
from subprocess import Popen, PIPE
3+
if __name__=="__main__":
4+
argv=sys.argv
5+
totalkeyslst=argv[1]
6+
print totalkeyslst
7+
slavelst=argv[2]
8+
print slavelst
9+
sl=open(slavelst,"r")
10+
ips=[]
11+
smap=[]
12+
alkmap=[]
13+
totalmem=0
14+
ts=0
15+
for i in sl.readlines():
16+
line=i.strip("\n")
17+
ip,mem=line.split(" ")
18+
ips.append(ip)
19+
smap.append(mem)
20+
totalmem=totalmem+int(mem)
21+
ts=ts+1
22+
23+
tk=len(open(totalkeyslst,"r").readlines())
24+
tmp=0
25+
for i in range(ts):
26+
t=int(tk*int(smap[i])/totalmem)
27+
alkmap.append(t)
28+
tmp=tmp+t
29+
#print smap[i],alkmap[i]
30+
31+
alkmap[i]=tk-tmp+t
32+
33+
for i in alkmap:
34+
print i
35+
onlst=[]
36+
d,ext=os.path.splitext(totalkeyslst)
37+
for i in range(ts):
38+
onlst.append(d+str(i+1)+ext)
39+
40+
j=0
41+
ct=0
42+
for i in range(ts):
43+
if j==0:
44+
cmd="head -"+str(alkmap[i])+" "+totalkeyslst+" >"+onlst[j]
45+
else:
46+
cmd="head -"+str(ct+alkmap[i])+" "+totalkeyslst+"|tail -"+str(alkmap[i])+" >"+onlst[j]
47+
os.system(cmd)
48+
print cmd
49+
if j==0:
50+
pass
51+
else:
52+
cmd="scp "+onlst[j]+" is_admin@"+ips[j]+":"+totalkeyslst
53+
os.system(cmd)
54+
ct=ct+alkmap[i]
55+
j=j+1
56+
57+
58+
59+

cp2fingertest.py

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import os,sys,glob,time
2+
from subprocess import Popen, PIPE
3+
dest=" [email protected]:/home/is_admin/videos/negative/"
4+
while(True):
5+
l=glob.glob("*.f4v")
6+
for i in l:
7+
p1 = Popen(['fuser',i], stdout=PIPE)
8+
if p1.communicate()[0]=='':
9+
cmd="scp "+i+dest
10+
os.system(cmd)
11+
cmd="rm -f "+i
12+
os.system(cmd)
13+
14+
time.sleep(60)
15+

distriKeys.py

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
from hash_ring import *
2+
memcache_servers = ['127.0.0.1','10.25.13.152','10.25.13.154']
3+
ring = HashRing(memcache_servers)
4+
f=open("keyslist-all.txt","r")
5+
fo=open("distrikeys.txt","w")
6+
d={}
7+
for i in memcache_servers:
8+
d[i]=0
9+
10+
for i in f.readlines():
11+
itemId=i.strip("\n")
12+
server = ring.get_node(itemId)
13+
d[server]=d[server]+1
14+
#print server
15+
fo.write(itemId+" "+server+"\n")
16+
f.close()
17+
fo.close()
18+
19+
for i in memcache_servers:
20+
print d[i]
21+

downVideo.py

+53
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
import urllib,cookielib,re,os
2+
from xml.etree.ElementTree import ElementTree,fromstring,tostring
3+
def getVideo(id,done):
4+
id=id.strip('\n')
5+
#video_name="/home/is_admin/videos/"+id+".f4v"
6+
video_name="E:/newvideos/"+id+".f4v"
7+
#print video_name
8+
#return
9+
if not os.path.exists(video_name) and id not in done:
10+
url="http://v2.tudou.com/v?it="+id
11+
cj=cookielib.CookieJar()
12+
r=urllib.urlopen(url)
13+
html=r.read()
14+
#print html
15+
pr=re.compile("http://(?:.|\n)*?</f>")
16+
ma=re.search(pr,html)
17+
if(ma==None):
18+
return
19+
myxml=fromstring(html)
20+
#try:
21+
# title=myxml.get('title')
22+
#except:
23+
# return
24+
#video_url=myxml[0][0].text
25+
video_url=ma.group(0)[:-4]
26+
#print video_url
27+
28+
#video_name=id+".f4v"
29+
#fo.write(id+" "+title.encode("utf-8")+"\n")
30+
#print id,title
31+
#print video_name
32+
try:
33+
urllib.urlretrieve(video_url,video_name)
34+
except:
35+
return
36+
37+
if __name__=='__main__':
38+
f=open("keyslist-all.txt")
39+
fo=open("D:/videos/downloaded.txt","r")
40+
done=[]
41+
for i in fo.readlines():
42+
it=i.strip("\n")
43+
done.append(it)
44+
fo.close()
45+
fo=open("D:/refer/refer.txt","r")
46+
for i in fo.readlines():
47+
it=i.strip("\n")
48+
done.append(it)
49+
fo.close()
50+
for i in f.readlines():
51+
#print i
52+
getVideo(i,done)
53+
f.close()

genapp.py

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import os,sys,glob,time,json
2+
slavelst=['127.0.0.1','10.25.13.152','10.25.13.153','10.25.13.154']
3+
loadkeydir={}
4+
loadkeydir['VF12']='/home/is_admin/MRPlatform/src/Test/keys/keyslist-all.txt'
5+
testkeydir['VF6']='/home/is_admin/videos/'
6+
loadkeydir['AF']='/home/is_admin/MRPlatform/src/Test/keys/keyslist-all.txt'
7+
method=['VF12','VF6','AF']
8+
min_score=range(5,26)
9+
seg=[100,200,400,600,800,1000,1200,1400,1600]
10+
neighborhood=range(1,3)
11+
f=open("./config/cfg.lst","r")
12+
for i in f.readlines():
13+
cfg_list.append(i.strip("\n"))
14+
for i in cfg_list:
15+
#change app config files and switch parameters
16+
pmaster.stdin.write("")
17+
#generate
18+
lt=0
19+
lr=1
20+
while(lt!=lr):
21+
#get the tested keys number from appconfig file i
22+
json_data = open(i)
23+
data = json.load()
24+
loadlist = str(data["KeysLocation"]["LoadKeysList"])
25+
print loadlist
26+
#%sms%2.2fseg%dsample%dcutoff%2.2fv%dn%d%s_result.txt
27+
pon = os.path.basename(str(data["KeysLocation"]["TestKeysList"]))[0:8]
28+
of=str(data["AlgoParams"]["method"])+"ms"+'%2.2f'%float(str(data["AlgoParams"]["em_min_score"]))+"seg"+str(data["AlgoParams"]["keys_num_a_seg"])+"sample"+str(data["AlgoParams"]["samplingNum"])+"cutoff"+str(data["AlgoParams"]["cutoffpercent"])+"v"+str(data["AlgoParams"]["verifymethodid"])+"n"+str(data["AlgoParams"]["neighbormethodid"])+pon+"_result.txt"
29+
print of
30+
pc = Popen("cat "+ loadlist + " |wc -l", stdin = PIPE, stdout = PIPE, stderr = None, shell = True)
31+
lt = int(pc.stdout.read())
32+
print lt
33+
pc = Popen("cat "+ of + " |wc -l", stdin = PIPE, stdout = PIPE, stderr = None, shell = True)
34+
lr = int(pc.stdout.read())
35+
print lr
36+
time.sleep(60)

getTilelist.py

+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import urllib,cookielib,re,os
2+
from xml.etree.ElementTree import ElementTree,fromstring,tostring
3+
def getVideo(fo,id):
4+
id=id.strip('\n')
5+
video_name="E:/videos/negative/"+id+".f4v"
6+
if not os.path.exists(video_name):
7+
url="http://v2.tudou.com/v?it="+id
8+
cj=cookielib.CookieJar()
9+
r=urllib.urlopen(url)
10+
html=r.read()
11+
#print html
12+
pr=re.compile("http://(?:.|\n)*?</f>")
13+
ma=re.search(pr,html)
14+
if(ma==None):
15+
return
16+
myxml=fromstring(html)
17+
try:
18+
title=myxml.get('title')
19+
except:
20+
return
21+
#video_url=myxml[0][0].text
22+
video_url=ma.group(0)[:-4]
23+
#print video_url
24+
25+
#video_name=id+".f4v"
26+
fo.write(id+" "+title.encode("utf-8")+"\n")
27+
print id,title
28+
#print video_name
29+
#try:
30+
#urllib.urlretrieve(video_url,video_name)
31+
#except:
32+
#return
33+
34+
if __name__=='__main__':
35+
f=open("E:/false_positive.txt")
36+
fo=open("E:/false_positive_title.txt","w")
37+
for i in f.readlines():
38+
#print i
39+
getVideo(fo,i)
40+
fo.close()

gettudouv.py

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import urllib,cookielib,re,os
2+
import os.path
3+
from xml.etree.ElementTree import ElementTree,fromstring,tostring
4+
5+
def getVideo(id,video_name):
6+
id=id.strip('\n')
7+
#video_name="E:/videos/negative/"+id+".f4v"
8+
if not os.path.exists(video_name):
9+
url="http://v2.tudou.com/v?it="+id
10+
cj=cookielib.CookieJar()
11+
r=urllib.urlopen(url)
12+
html=r.read()
13+
#print html
14+
pr=re.compile("http://(?:.|\n)*?</f>")
15+
ma=re.search(pr,html)
16+
if(ma==None):
17+
return
18+
myxml=fromstring(html)
19+
try:
20+
title=myxml.get('title')
21+
except:
22+
return
23+
#video_url=myxml[0][0].text
24+
video_url=ma.group(0)[:-4]
25+
#print video_url
26+
27+
#video_name=id+".f4v"
28+
#fo.write(id+" "+title.encode("utf-8")+"\n")
29+
#print id,title
30+
#print video_name
31+
try:
32+
urllib.urlretrieve(video_url,video_name)
33+
except:
34+
return
35+
36+
if __name__=='__main__':
37+
for pa,dn,fn in os.walk("E:/videos/positive"):
38+
#print pa,pa[:20]
39+
for j in dn:
40+
itemId=j[:9]
41+
name=pa+"/"+j+"/"+itemId+"tudou.f4v"
42+
getVideo(itemId,name)

getvideolist.py

+58
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
from BeautifulSoup import BeautifulSoup
2+
import mechanize,re,urllib
3+
def getVideo(sw,fo):
4+
br = mechanize.Browser()
5+
br.set_handle_robots(False)
6+
#br.open("http://cn.bing.com/?scope=video&FORM=Z9LH1")
7+
#br.select_form(nr=0)
8+
#br.form['q']="Girls Generation"
9+
#rb=br.open("http://cn.bing.com/videos/search?pq=girls+generation&sc=8-16&sp=-1&sk=&qpvt=Girls+Generation&q=Girls+Generation%20filterui:msite-youku.com&FORM=R5FD8")
10+
#http://cn.bing.com/videos/search?pq=girls+generation&sc=8-16&sp=-1&sk=&qpvt=Girls+Generation&q=Girls+Generation%20filterui:msite-youku.com&FORM=R5FD9
11+
#rb=br.open("http://cn.bing.com/videos/search?pq=girls+generation&sc=8-16&sp=-1&sk=&qpvt=Girls+Generation&q=Girls+Generation%20filterui:msite-ku6.com&FORM=R5FD1")
12+
#http://cn.bing.com/videos/search?pq=girls+generation&sc=8-16&sp=-1&sk=&qpvt=Girls+Generation&q=Girls+Generation%20filterui:msite-6.cn&FORM=R5FD12
13+
#rb=br.submit()
14+
#vsite=['youku','ku6']
15+
#replace the space in search word
16+
#sw
17+
#for site in vsite:
18+
#surl="http://cn.bing.com/videos/search?pq=%s&sc=8-16&sp=-1&sk=&qpvt=Girls+Generation&q=Girls+Generation%20filterui:msite-%s.com&FORM=R5FD8"%site
19+
#rb=br.open(surl)
20+
#http://cn.bing.com/videos/search?q=%E9%92%A2%E9%93%81%E4%BE%A0&go=&qs=n&form=QBLH&pq=%E9%92%A2%E9%93%81%E4%BE%A0&sc=0-4&sp=-1&sk=
21+
#sw=sw.replace(" ","+")
22+
#sw=sw.decode("utf-8")
23+
#sw=sw.encode("gb2312")
24+
sw=urllib.quote(sw)
25+
longtime=urllib.quote(" filterui:duration-long")
26+
#rb=br.open("http://cn.bing.com/videos/search?q=%E8%80%81%E5%A4%A7%E7%9A%84%E5%B9%B8%E7%A6%8F34&go=&qs=n&form=QBLH&pq=%E8%80%81%E5%A4%A7%E7%9A%84%E5%B9%B8%E7%A6%8F34&sc=1-7&sp=-1&sk=")
27+
#http://cn.bing.com/videos/search?pq=%u65b9%u8c2c%u795e%u63a206&sc=0-0&sp=-1&sk=&qpvt=%E6%96%B9%E8%B0%AC%E7%A5%9E%E6%8E%A206&q=%E6%96%B9%E8%B0%AC%E7%A5%9E%E6%8E%A206%20filterui:duration-long&FORM=R5FD2
28+
surl="http://cn.bing.com/videos/search?q=%(sw)s&go=&qs=n&form=Z9LH1&pq=%(sw)s%(longtime)s&sc=1-7&sp=-1&sk="%{"sw":sw,"longtime":longtime}
29+
print surl
30+
rb=br.open(surl)
31+
content = rb.read()
32+
vr=re.compile("purl=\"http://(?:.|\n)*?\"")
33+
#vr=re.compile("result-link\"\ href=\"http://(?:.|\n)*?\"")
34+
#vr=re.compile("result-link")
35+
#vr['youku']=re.compile("http://v.youku.com/v_show/id_(?:.|\n)*?.html")
36+
#vr['ku6']=re.compile("http://v.ku6.com/show/(?:.|\n)*?.html")
37+
ma=re.findall(vr,content)
38+
print len(ma)
39+
for i in ma:
40+
if i.find("tudou")==-1 and i.find("cntv")==-1 and i.find("xinhuanet")==-1:
41+
print i[6:-1]
42+
fo.write(i[6:-1]+"\n")
43+
44+
def main():
45+
f=open("idtilte2.lst")
46+
fo=open("videopage2.lst","w")
47+
for sw in f.readlines():
48+
l=sw.find(" ")
49+
title=sw[l:]
50+
getVideo(title,fo)
51+
fo.close()
52+
53+
if __name__=="__main__":
54+
main()
55+
56+
#import os
57+
#os.chdir("E:/videos")
58+
#from kw2vlBaidu import *

jujilist.py

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import re,urllib
2+
f=open("id-title.txt","rb")
3+
fo=open("juji.lst","w")
4+
pr=re.compile("[\u2e80-\uffff]+\d\d")
5+
for i in f.readlines():
6+
l=i.find(" ")
7+
k=i[:l]
8+
title = i[l:]
9+
ma=re.findall(pr,title)
10+
if ma and len(title)<10:
11+
#print title
12+
fo.write(k+" "+title+"\n")
13+
14+
f.close()
15+
fo.close()

keygen.py

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import os,sys,re
2+
3+
FFMPEG="ffmpeg"
4+
CODEWAV="/root/tmp/musicretr-2.2.6/codewav"
5+
f=open("video_list.lst","r")
6+
for vn in f.readlines():
7+
inputfn=vn.strip("\n")
8+
outputfn=os.path.splitext(vn)[0]+".key"
9+
cmd = FFMPEG+" -i "+inputfn+" -y -ar 11025 -ab 128k tmp.wav"
10+
os.system(cmd)
11+
#print cmd
12+
cmd = CODEWAV+" boostextdescr.txt 1 tmp.wav "+outputfn
13+
#print cmd
14+
os.system(cmd)
15+
16+
f.close()

0 commit comments

Comments
 (0)