Python札记1-HTTP Download
程序员文章站
2024-02-01 10:14:22
...
# -*- coding: utf-8 -*- __author__ = 'gull' import os, urllib2, log_factory from urlparse import urlsplit def get(url, filePath, fileName = None, buffer = 16 * 1024): log = log_factory.getLogger() log.info("send http request to %s", url) def writefile(fsrc, fdst, totalLength): """copy data from file-like object fsrc to file-like object fdst""" if not totalLength: totalLength = "?" else: totalLength = float(totalLength) bytesRead = 0.0 while 1: buf = fsrc.read(buffer) if not buf: break fdst.write(buf) bytesRead += len(buf) if totalLength != "?": log.info("%s: %.02f/%.02f kb (%d%%)" % ( fileName, bytesRead / 1024.0, totalLength / 1024.0, 100 * bytesRead / totalLength )) else: log.info("%s: %.02f/? kb (?%%)" % ( fileName, bytesRead / 1024.0 )) def getFileName(openUrl): if 'Content-Disposition' in openUrl.info(): # If the response has Content-Disposition, try to get filename from it cd = dict(map( lambda x: x.strip().split('=') if '=' in x else (x.strip(), ''), openUrl.info().split(';'))) if 'filename' in cd: filename = cd['filename'].strip("\"'") if filename: return filename # if no filename was found above, parse it out of the final URL. return os.path.basename(urlsplit(openUrl.url)[2]) def getFileLength(openUrl): return openUrl.info().getheader("Content-Length") r = urllib2.urlopen(urllib2.Request(url), timeout = 120) #timeout is 120s try: fileName = fileName or getFileName(r) fullfileName = "%s%s%s" % (filePath, os.path.sep, fileName) totalLength = getFileLength(r) log.info("write response date to %s", fullfileName) with open(fullfileName, 'wb') as f: writefile(r, f, totalLength) return fileName, totalLength, fullfileName finally: r.close() log.info("http request finished.")
参数说明:
- url:即下载路径,如http://apache.etoak.com/tomcat/tomcat-7/v7.0.20/bin/apache-tomcat-7.0.20.tar.gz
- filePath:下载文件保存的文件夹
- fileName:下载后保存的文件名,可选参数。若为空,则会却reponse header中的filename信息(如下图)继续判空,则取url后缀名(如:apache-tomcat-7.0.20.tar.gz)
- buffer:下载缓冲区大小,默认16k
可继续加入以下特性:
- 支持代理
- 文件分块,多线程下载
- 异步下载,回调机制
- ...
上一篇: http传输报文过大
下一篇: 包装类和基本类型的转换