|
| 1 | +from BeautifulSoup import BeautifulSoup |
| 2 | +import mechanize,re,urllib |
| 3 | +def getVideo(sw,fo): |
| 4 | + br = mechanize.Browser() |
| 5 | + br.set_handle_robots(False) |
| 6 | + #br.open("http://cn.bing.com/?scope=video&FORM=Z9LH1") |
| 7 | + #br.select_form(nr=0) |
| 8 | + #br.form['q']="Girls Generation" |
| 9 | + #rb=br.open("http://cn.bing.com/videos/search?pq=girls+generation&sc=8-16&sp=-1&sk=&qpvt=Girls+Generation&q=Girls+Generation%20filterui:msite-youku.com&FORM=R5FD8") |
| 10 | + #http://cn.bing.com/videos/search?pq=girls+generation&sc=8-16&sp=-1&sk=&qpvt=Girls+Generation&q=Girls+Generation%20filterui:msite-youku.com&FORM=R5FD9 |
| 11 | + #rb=br.open("http://cn.bing.com/videos/search?pq=girls+generation&sc=8-16&sp=-1&sk=&qpvt=Girls+Generation&q=Girls+Generation%20filterui:msite-ku6.com&FORM=R5FD1") |
| 12 | + #http://cn.bing.com/videos/search?pq=girls+generation&sc=8-16&sp=-1&sk=&qpvt=Girls+Generation&q=Girls+Generation%20filterui:msite-6.cn&FORM=R5FD12 |
| 13 | + #rb=br.submit() |
| 14 | + #vsite=['youku','ku6'] |
| 15 | + #replace the space in search word |
| 16 | + #sw |
| 17 | + #for site in vsite: |
| 18 | + #surl="http://cn.bing.com/videos/search?pq=%s&sc=8-16&sp=-1&sk=&qpvt=Girls+Generation&q=Girls+Generation%20filterui:msite-%s.com&FORM=R5FD8"%site |
| 19 | + #rb=br.open(surl) |
| 20 | + #http://cn.bing.com/videos/search?q=%E9%92%A2%E9%93%81%E4%BE%A0&go=&qs=n&form=QBLH&pq=%E9%92%A2%E9%93%81%E4%BE%A0&sc=0-4&sp=-1&sk= |
| 21 | + #sw=sw.replace(" ","+") |
| 22 | + #sw=sw.decode("utf-8") |
| 23 | + #sw=sw.encode("gb2312") |
| 24 | + sw=urllib.quote(sw) |
| 25 | + longtime=urllib.quote(" filterui:duration-long") |
| 26 | + #rb=br.open("http://cn.bing.com/videos/search?q=%E8%80%81%E5%A4%A7%E7%9A%84%E5%B9%B8%E7%A6%8F34&go=&qs=n&form=QBLH&pq=%E8%80%81%E5%A4%A7%E7%9A%84%E5%B9%B8%E7%A6%8F34&sc=1-7&sp=-1&sk=") |
| 27 | + #http://cn.bing.com/videos/search?pq=%u65b9%u8c2c%u795e%u63a206&sc=0-0&sp=-1&sk=&qpvt=%E6%96%B9%E8%B0%AC%E7%A5%9E%E6%8E%A206&q=%E6%96%B9%E8%B0%AC%E7%A5%9E%E6%8E%A206%20filterui:duration-long&FORM=R5FD2 |
| 28 | + surl="http://cn.bing.com/videos/search?q=%(sw)s&go=&qs=n&form=Z9LH1&pq=%(sw)s%(longtime)s&sc=1-7&sp=-1&sk="%{"sw":sw,"longtime":longtime} |
| 29 | + print surl |
| 30 | + rb=br.open(surl) |
| 31 | + content = rb.read() |
| 32 | + vr=re.compile("purl=\"http://(?:.|\n)*?\"") |
| 33 | + #vr=re.compile("result-link\"\ href=\"http://(?:.|\n)*?\"") |
| 34 | + #vr=re.compile("result-link") |
| 35 | + #vr['youku']=re.compile("http://v.youku.com/v_show/id_(?:.|\n)*?.html") |
| 36 | + #vr['ku6']=re.compile("http://v.ku6.com/show/(?:.|\n)*?.html") |
| 37 | + ma=re.findall(vr,content) |
| 38 | + print len(ma) |
| 39 | + for i in ma: |
| 40 | + if i.find("tudou")==-1 and i.find("cntv")==-1 and i.find("xinhuanet")==-1: |
| 41 | + print i[6:-1] |
| 42 | + fo.write(i[6:-1]+"\n") |
| 43 | + |
| 44 | +def main(): |
| 45 | + f=open("idtilte2.lst") |
| 46 | + fo=open("videopage2.lst","w") |
| 47 | + for sw in f.readlines(): |
| 48 | + l=sw.find(" ") |
| 49 | + title=sw[l:] |
| 50 | + getVideo(title,fo) |
| 51 | + fo.close() |
| 52 | + |
| 53 | +if __name__=="__main__": |
| 54 | + main() |
| 55 | + |
| 56 | +#import os |
| 57 | +#os.chdir("E:/videos") |
| 58 | +#from kw2vlBaidu import * |
0 commit comments