wsgi 协议
wsgi 协议
本来没打算这么早就学习 wsgi 的,因为想要学习python 是如何处理网络请求的绕不开 wsgi,所以只好先学习一下 wsgi。先对 wsgi 有个印象,到了学习 django 运行方式以及如何处理网络请求数据的时候就会感觉很顺畅了。本文参考
什么是 wsgi
wsgi 的全称是web server gateway interface,这是一个规范,描述了 web server 如何与 web application 交互、web application 如何处理请求。该规范的具体描述在 pep3333。wsgi 既要实现 web server,也要实现 web application。在 django 中的 app 其实就是 web application,而 web server其实在使用命令行输入python runserver
或者使用 pycharm 开启 django 项目的时候就把runserver
,sys.argv就是 runserver命令,进入该函数,发现执行了utility.execute()
def execute(self): """ given the command-line arguments, this figures out which subcommand is being run, creates a parser appropriate to that command, and runs it. """ try: subcommand = self.argv[1] except indexerror: subcommand = 'help' # display help if no arguments were given. # preprocess options to extract --settings and --pythonpath. # these options could affect the commands that are available, so they # must be processed early. parser = commandparser(none, usage="%(prog)s subcommand [options] [args]", add_help=false) parser.add_argument('--settings') parser.add_argument('--pythonpath') parser.add_argument('args', nargs='*') # catch-all try: options, args = parser.parse_known_args(self.argv[2:]) handle_default_options(options) except commanderror: pass # ignore any option errors at this point. try: settings.installed_apps except improperlyconfigured as exc: self.settings_exception = exc if settings.configured: # start the auto-reloading dev server even if the code is broken. # the hardcoded condition is a code smell but we can't rely on a # flag on the command class because we haven't located it yet. if subcommand == 'runserver' and '--noreload' not in self.argv: try: autoreload.check_errors(django.setup)() except exception: # the exception will be raised later in the child process # started by the autoreloader. pretend it didn't happen by # loading an empty list of applications. apps.all_models = defaultdict(ordereddict) apps.app_configs = ordereddict() apps.apps_ready = apps.models_ready = apps.ready = true # remove options not compatible with the built-in runserver # (e.g. options for the contrib.staticfiles' runserver). # changes here require manually testing as described in # #27522. _parser = self.fetch_command('runserver').create_parser('django', 'runserver') _options, _args = _parser.parse_known_args(self.argv[2:]) for _arg in _args: self.argv.remove(_arg) # in all other cases, django.setup() is required to succeed. else: django.setup() self.autocomplete() if subcommand == 'help': if '--commands' in args: sys.stdout.write(self.main_help_text(commands_only=true) + '\n') elif len(options.args) < 1: sys.stdout.write(self.main_help_text() + '\n') else: self.fetch_command(options.args[0]).print_help(self.prog_name, options.args[0]) # special-cases: we want 'django-admin --version' and # 'django-admin --help' to work, for backwards compatibility. elif subcommand == 'version' or self.argv[1:] == ['--version']: sys.stdout.write(django.get_version() + '\n') elif self.argv[1:] in (['--help'], ['-h']): sys.stdout.write(self.main_help_text() + '\n') else: self.fetch_command(subcommand).run_from_argv(self.argv)
if settings.configured: # start the auto-reloading dev server even if the code is broken. # the hardcoded condition is a code smell but we can't rely on a # flag on the command class because we haven't located it yet. if subcommand == 'runserver' and '--noreload' not in self.argv: try: autoreload.check_errors(django.setup)() except exception: # the exception will be raised later in the child process # started by the autoreloader. pretend it didn't happen by # loading an empty list of applications. apps.all_models = defaultdict(ordereddict) apps.app_configs = ordereddict() apps.apps_ready = apps.models_ready = apps.ready = true # remove options not compatible with the built-in runserver # (e.g. options for the contrib.staticfiles' runserver). # changes here require manually testing as described in # #27522. _parser = self.fetch_command('runserver').create_parser('django', 'runserver') _options, _args = _parser.parse_known_args(self.argv[2:]) for _arg in _args: self.argv.remove(_arg) # in all other cases, django.setup() is required to succeed. else: django.setup()
这行代码等学习 django 处理流程的时候在详细解释,反正只要知道目前经过这个函数的执行,django 的 web server 成功运行了。
实现了 wsgi 的模块/库有 wsgiref(python 内置,下面也是用这个来举例)、werkzeug.serving、twisted.web等。
当前运行在 wsgi 之上的 web 框架有 bottle、flask、django 等。wsgi server 所做的工作仅仅是将客户端收到的请求传递给 wsgi application,然后将 wsgi application 的返回值作为相应传给客户端。wsgi application 可以是栈式的,这个栈的中间部分叫做中间件
,两端是必须要实现的 application 和 server。所以对客户端来说,中间件扮演服务器;对服务器来说,中间件扮演客户端。在 django 中wsgi 收到的数据用 request对象表示,要传给客户端的数据用 httpresponse对象表示。
搭建一个 wsgi 服务
在上章节说了 python 有个内置的 wsgi 库叫 wsgiref。
# templates为模板(html)文件夹 # 为项目入口, # 为路由配置 # 为具体处理路由逻辑代码
start 文件
# start.py文件 from wsgiref.simple_server import make_server from urls import urls def app(env, response): # 在这里, print(env) route = env['path_info'] print(response) # 设置状态码与响应头 response('200 ok', [('content-type', 'text/html')]) # 设置错误处理 data = urls['/error']() # 设置路由处理 if route in urls: data = urls[route]() # 返回二进制响应体 return [data] if __name__ == '__main__': # 创建服务器对象 server = make_server('', 8808, app) print('服务:http://localhost:8808') # 服务保持运行状态 server.serve_forever() # wsgi server 是一个 web server,其处理一个 http 请求的逻辑如下: # iterable = app(env, response) # for date in iterable: # send data to client
其实这个模块底层使用了 sockserver 模块,我前面的博客也有介绍。经过 make_server
就成功开启了wsgi server
是为了将服务器持续接收客户端请求,采用的是轮询方法,该方法里面的参数 poll_interval=0.5
,采用的是0.5秒轮询一次,轮询采用的是 selector
urls 文件
# urls.py文件 from views import * urls = { '/index': index, # 函数地址 '/error': error }
views 文件
# 处理请求的功能函数(处理结果返回的都是页面 => 功能函数) # 利用 jinja2来渲染模板,将后台数据传给前台 from jinjia2 import template # 处理主页请求 def index(): with open('templates/index.html', 'r') as f: dt = tem = template(dt) # 将后台数据通过模板渲染功能渲染传给前台页面 resp = tem.render(name='主页') return resp.encode('utf-8') # 处理图标请求 def ico(): with open('favicon.ico', 'rb') as f: dt = return dt # 处理错误请求 def error(): return b'404'
- index 测试
- error 测试
wsgi application接口
在上面wsgi 服务中的 app 就是 wsgi 中的 application,该接口应该实现为一个可调用对象,例如函数、方法、类、含__call__
- 一个字典,该字典可以包含了客户端请求的信息以及其他信息,可以认为是请求上下文,一般叫做 environment(在这里我取名为 env);
- 一个用于发送 http 状态码与响应头的回调函数。(具体怎么回调的还不清楚)
同时,可调用对象的返回值是响应体(response body),响应正文是可迭代的、并包含了多个字符串。(加了中括号可以减少迭代次数,提高效率)
把上面的 app 代码拷下来:
def app(env, response): # 在这里, print(env) route = env['path_info'] print(response) # 设置状态码与响应头 response('200 ok', [('content-type', 'text/html')]) # 设置错误处理 data = urls['/error']() # 设置路由处理 if route in urls: data = urls[route]() # 返回二进制响应体 return [data]
当我对服务端发起请求时,会打印出 env,如下:
{'path': '/users/jingxing/virtualenv/py3-env1/bin:/users/jingxing/.nvm/versions/node/v4.9.1/bin:/library/frameworks/python.framework/versions/3.6/bin:/python_study/mongodb/bin://volumes/python_study/mongodb/bin:/library/frameworks/python.framework/versions/3.6/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:/applications/vmware', 'ps1': '(py3-env1) ', 'versioner_python_version': '2.7', 'ls_options': '--color=auto', 'logname': 'jingxing', 'xpc_service_name': 'com.jetbrains.pycharm.23248', 'pwd': '/users/jingxing/django_project/day01', 'pycharm_hosted': '1', 'node_path': '/users/jingxing/.nvm/versions/node/v4.9.1/lib/node_modules', 'pycharm_matplotlib_port': '62845', 'pythonpath': '/users/jingxing/django_project/day01:/users/jingxing/django_project/day04:/users/jingxing/django_project/day02:/users/jingxing/pycharmprojects/youku/youkusecond:/users/jingxing/django_project/day03:/applications/', 'nvm_cd_flags': '', 'nvm_dir': '/users/jingxing/.nvm', 'shell': '/bin/bash', 'lscolors': 'cxfxcxdxbxegedabaggxgx', 'pythonioencoding': 'utf-8', 'versioner_python_prefer_32_bit': 'no', 'user': 'jingxing', 'clicolor': 'yes', 'tmpdir': '/var/folders/yl/3drd7wf93f90sfkgpc2zg9cr0000gn/t/', 'ssh_auth_sock': '/private/tmp/', 'virtual_env': '/users/jingxing/virtualenv/py3-env1', 'xpc_flags': '0x0', 'pythonunbuffered': '1', '__cf_user_text_encoding': '0x1f5:0x0:0x0', 'apple_pubsub_socket_render': '/private/tmp/', 'lc_ctype': 'en_us.utf-8', 'nvm_bin': '/users/jingxing/.nvm/versions/node/v4.9.1/bin', 'home': '/users/jingxing', 'server_name': 'jingxingdemacbook-pro.local', 'gateway_interface': 'cgi/1.1', 'server_port': '8808', 'remote_host': '', 'content_length': '', 'script_name': '', 'server_protocol': 'http/1.1', 'server_software': 'wsgiserver/0.2', 'request_method': 'get', 'path_info': '/', 'query_string': '', 'remote_addr': '', 'content_type': 'text/plain', 'http_host': '', 'http_connection': 'keep-alive', 'http_upgrade_insecure_requests': '1', 'http_user_agent': 'mozilla/5.0 (macintosh; intel mac os x 10_14_2) applewebkit/537.36 (khtml, like gecko) chrome/71.0.3578.98 safari/537.36', 'http_accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'http_accept_encoding': 'gzip, deflate, br', 'http_accept_language': 'en-us,en;q=0.9,zh-cn;q=0.8,zh;q=0.7', 'http_cookie': 'csrftoken=yjpgsyb6tw4fen2fxjy6dhzzyflbu4ssaue9avqwrjlihymealukqjvbpl7ktpph', 'wsgi.input': <_io.bufferedreader name=7>, 'wsgi.errors': <_io.textiowrapper name='<stderr>' mode='w' encoding='utf-8'>, 'wsgi.version': (1, 0), 'wsgi.run_once': false, 'wsgi.url_scheme': 'http', 'wsgi.multithread': true, 'wsgi.multiprocess': false, 'wsgi.file_wrapper': <class 'wsgiref.util.filewrapper'>}
- path_info:路由信息;
- server_port:端口;
- http_host:ip;
- server_protocol:服务器端通信协议
在 app 中向客户端返回数据时,写的为
return [data]
,如果改为return date
,这将会导致 wsgi 程序的响应变慢。原因是字符串date
也是可迭代的,它的每一次迭代只能得到 1bytes 的数据量,这也意味着每一次只向客户端发送1bytes 的数据,直到发送完毕为止。所以推荐使用return [data]
解析 get 请求
运行 start.py文件,在浏览器中访问http://localhost:8808/?id=1&name=musibii
'query_string': 'id=1&name=musibii' 'request_method': 'get'
函数可以很方便的处理 query_string,同时需要cgi.escape()
from cgi import parse_qs, escape query_string = 'id=1&name=musibii' d = parse_qs(query_string) print(d.get('id', [''])[0]) # ['']是默认值,如果在query_string中没找到则返回默认值 print(d.get('name',[])) print(escape('<script>alert(123);</script>'))
1 ['musibii'] <script>alert(123);</script>
处理 get 请求的动态网页
from wsgiref.simple_server import make_server from cgi import parse_qs, escape # html中 form 的 method 默认为 get,action 是当前页面 html = ''' <html> <body> <form method="get" action=""> <p> age: <input type="text" name="age" value="%(age)s"> </p> <p> hobbies: <input name="hobbies" type="checkbox" value="software" %(checked-software)s > software <input name="hobbies" type="checkbox" value="tunning" %(checked-tunning)s > auto tunning </p> <p> <input type="submit" value="submit"> </p> </form> <p> age: %(age)s<br> hobbies: %(hobbies)s </p> </body> </html> ''' def app(env, response): # 解析query_string d = parse_qs(env['query_string']) age = d.get('age', [''])[0] # 返回 age 对应的值 hobbies = d.get('hobbies', []) # 以 list 形式返回所有的 hobbies # 防止脚本注入 age = escape(age) hobbies = [escape(hobby) for hobby in hobbies] response_body = html% { 'checked-software': ('', 'checket')['software' in hobbies], 'checked-tunning': ('', 'checked')['tunning' in hobbies], 'age': age or 'empty', 'hobbies': ','.join(hobbies or ['no hobbies?']) } status = '200 ok' response_body = [ ('content-type', 'text/html'), ('content-length', str(len(response_body))) ] start_response(status, response_headers) return [response_body] httpd = make_server('', 8088, app) httpd.serve_forever()
处理 post 请求的动态网页
对于post 请求,查询字符串是放在 http 请求正文(request body)末尾的,不是显式在 url 中。请求正文在 env 字典变量中键为wsgi.input
对应的值中,这是一个类似 file 的变量:
'wsgi.input': <_io.bufferedreader name=7>
我看源码看晕了还是没找到这个 name 具体是什么意思,经过 google 猜测这个应该是个标识符。
from wsgiref.simple_server import make_server from cgi import parse_qs, escape # html中form的method是post html = """ <html> <body> <form method="post" action=""> <p> age: <input type="text" name="age" value="%(age)s"> </p> <p> hobbies: <input name="hobbies" type="checkbox" value="software" %(checked-software)s > software <input name="hobbies" type="checkbox" value="tunning" %(checked-tunning)s > auto tunning </p> <p> <input type="submit" value="submit"> </p> </form> <p> age: %(age)s<br> hobbies: %(hobbies)s </p> </body> </html> """ def application(environ, start_response): # content_length 可能为空,或者没有 try: request_body_size = int(environ.get('content_length', 0)) except (valueerror): request_body_size = 0 request_body = environ['wsgi.input'].read(request_body_size) d = parse_qs(request_body) # 获取数据 age = d.get('age', [''])[0] hobbies = d.get('hobbies', []) # 转义,防止脚本注入 age = escape(age) hobbies = [escape(hobby) for hobby in hobbies] response_body = html % { 'checked-software': ('', 'checked')['software' in hobbies], 'checked-tunning': ('', 'checked')['tunning' in hobbies], 'age': age or 'empty', 'hobbies': ', '.join(hobbies or ['no hobbies?']) } status = '200 ok' response_headers = [ ('content-type', 'text/html'), ('content-length', str(len(response_body))) ] start_response(status, response_headers) return [response_body] httpd = make_server('localhost', 8051, application) httpd.serve_forever()
中间件位于 wsgi server 和 wsgi application 之间。所以对客户端来说,中间件扮演服务器;对服务器来说,中间件扮演客户端。在 django 中wsgi 收到的数据用 request对象表示,要传给客户端的数据用 httpresponse对象表示。
from wsgiref.simple_server import make_server def application(environ, start_response): response_body = 'hello world!' status = '200 ok' response_headers = [ ('content-type', 'text/plain'), ('content-length', str(len(response_body))) ] start_response(status, response_headers) return [response_body] # 中间件 class upperware: def __init__(self, app): self.wrapped_app = app def __call__(self, environ, start_response): for data in self.wrapped_app(environ, start_response): yield data.upper() wrapped_app = upperware(application) httpd = make_server('localhost', 8051, wrapped_app) httpd.serve_forever()
上一篇: 一份详细的报表,揭秘*职业生存现状