原文地址:http://bbs.csdn.net/topics/390361293
View Code
1 # -*- coding: utf-8 -*- 2 #抓取网易公开课下载链接 3 #By : hnicypb@126.com 4 #Ver :1.0 5 #Time:2013-12-30 6 #Python 2.7 + BeautifulSoup 3.03(用最新版本4.1乱码没搞定,退回3.03) 7 #eg: python 抓取网易公开课.py http://v.163.com/special/opencourse/paradigms.html 8 9 from BeautifulSoup import BeautifulSoup10 import re11 import sys,os12 import urllib13 14 #显示百分比15 def rpb(blocknum, blocksize, totalsize):16 percent = 100.0 * blocknum * blocksize / totalsize17 if percent > 100:percent = 10018 print "%.2f%%"% percent19 20 def downlaod(url):21 #获取页面22 html = urllib.urlopen(url).read()23 #用美汤来装载24 soup = BeautifulSoup(html)25 #获取课程信息,名称,简介等26 title = soup.find('div',{ "class" : "m-cdes"})27 print title.h2.string28 print title.findAll('p')[0].string29 print title.findAll('p')[1].string30 print title.findAll('p')[2].string31 32 #获取课程详细列表信息33 detail=soup.findAll('tr',{ "class" : "u-even"})34 for i in detail:35 #获取课程名称36 name=i.find('td',{ "class" : "u-ctitle"}) 37 fileName=name.contents[0].strip() .lstrip() .rstrip(',') + name.a.string.strip() .lstrip() .rstrip(',')38 #获取课程下载链接39 downInfo=i.find('td',{ "class" : "u-cdown"})40 downLink=downInfo.a['href']41 42 print fileName43 print downLink44 45 #使用urlretrieve下载该文件46 if not os.path.exists(fileName):47 urllib.urlretrieve(downLink,fileName+".mp4",rpb)48 49 def main(argv):50 if len(argv)>=2:51 downlaod(argv[1])52 53 if __name__=="__main__":54 main(sys.argv)