# SimpleHTTPServerWithUpload.py
# This code tweaked from https://github.com/tualatrix/tools/blob/master/SimpleHTTPServerWithUpload.py
"""Simple HTTP Server With Upload.
This module builds on BaseHTTPServer by implementing the standard GET
and HEAD requests in a fairly straightforward manner.
__version__ = "0.1"
__all__ = ["SimpleHTTPRequestHandler"]
__author__ = "bones7456"
__home_page__ = "http://li2z.cn/"
import os
import posixpath
import BaseHTTPServer
import urllib
import cgi
import shutil
import mimetypes
import re
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
class SimpleHTTPRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
"""Simple HTTP request handler with GET/HEAD/POST commands.
This serves files from the current directory and any of its
subdirectories. The MIME type for files is determined by
calling the .guess_type() method. And can reveive file uploaded
by client.
The GET/HEAD/POST requests are identical except that the HEAD
request omits the actual contents of the file.
server_version = "SimpleHTTPWithUpload/" + __version__
def do_GET(self):
"""Serve a GET request."""
f = self.send_head()
if f:
self.copyfile(f, self.wfile)
f.close()
def do_HEAD(self):
"""Serve a HEAD request."""
f = self.send_head()
if f:
f.close()
def do_POST(self):
"""Serve a POST request."""
r, info = self.deal_post_data()
print r, info, "by: ", self.client_address
f = StringIO()
f.write('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
f.write("<html>\n<title>Upload Result Page</title>\n")
f.write("<body>\n<h2>Upload Result Page</h2>\n")
f.write("<hr>\n")
if r:
f.write("<strong>Success:</strong> ")
else:
f.write("<strong>Failed:</strong> ")
f.write(info)
f.write("<br><a href=\"%s\">back</a>" % self.headers['referer'])
f.write("<hr><small>Powered By: bones7456, check new version at ")
f.write("<a href=\"http://li2z.cn/?s=SimpleHTTPServerWithUpload\">")
f.write("here</a>.</small></body>\n</html>\n")
length = f.tell()
f.seek(0)
self.send_response(200)
self.send_header("Content-type", "text/html")
self.send_header("Content-Length", str(length))
self.end_headers()
if f:
self.copyfile(f, self.wfile)
f.close()
def deal_post_data(self):
boundary = self.headers.plisttext.split("=")[1]
remainbytes = int(self.headers['content-length'])
line = self.rfile.readline()
remainbytes -= len(line)
if not boundary in line:
return (False, "Content NOT begin with boundary")
line = self.rfile.readline()
remainbytes -= len(line)
fn = re.findall(r'Content-Disposition.*name="file"; filename="(.*)"', line)
if not fn:
return (False, "Can't find out file name...")
elif not fn[0]:
return (False, "No file chosen.")
path = self.translate_path(self.path)
fn = os.path.join(path, fn[0])
while os.path.exists(fn):
fn += "_"
line = self.rfile.readline()
remainbytes -= len(line)
line = self.rfile.readline()
remainbytes -= len(line)
out = open(fn, 'wb')
except IOError:
return (False, "Can't create file to write, do you have permission to write?")
if line.strip():
preline = line
else:
preline = self.rfile.readline()
remainbytes -= len(preline)
while remainbytes > 0:
line = self.rfile.readline()
remainbytes -= len(line)
if boundary in line:
preline = preline[0:-1]
if preline.endswith('\r'):
preline = preline[0:-1]
out.write(preline)
out.close()
return (True, "File '%s' upload success!" % fn)
else:
out.write(preline)
preline = line
return (False, "Unexpect Ends of data.")
def send_head(self):
"""Common code for GET and HEAD commands.
This sends the response code and MIME headers.
Return value is either a file object (which has to be copied
to the outputfile by the caller unless the command was HEAD,
and must be closed by the caller under all circumstances), or
None, in which case the caller has nothing further to do.
path = self.translate_path(self.path)
f = None
if os.path.isdir(path):
if not self.path.endswith('/'):
# redirect browser - doing basically what apache does
self.send_response(301)
self.send_header("Location", self.path + "/")
self.end_headers()
return None
for index in "index.html", "index.htm":
index = os.path.join(path, index)
if os.path.exists(index):
path = index
break
else:
return self.list_directory(path)
ctype = self.guess_type(path)
# Always read in binary mode. Opening files in text mode may cause
# newline translations, making the actual size of the content
# transmitted *less* than the content-length!
f = open(path, 'rb')
except IOError:
self.send_error(404, "File not found")
return None
self.send_response(200)
self.send_header("Content-type", ctype)
fs = os.fstat(f.fileno())
self.send_header("Content-Length", str(fs[6]))
self.send_header("Last-Modified", self.date_time_string(fs.st_mtime))
self.end_headers()
return f
def list_directory(self, path):
"""Helper to produce a directory listing (absent index.html).
Return value is either a file object, or None (indicating an
error). In either case, the headers are sent, making the
interface the same as for send_head().
list = os.listdir(path)
except os.error:
self.send_error(404, "No permission to list directory")
return None
list.sort(key=lambda a: a.lower())
f = StringIO()
displaypath = cgi.escape(urllib.unquote(self.path))
f.write('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
f.write("<html>\n<title>Directory listing for %s</title>\n" % displaypath)
f.write("<body>\n<h2>Directory listing for %s</h2>\n" % displaypath)
f.write("<hr>\n")
f.write("<form ENCTYPE=\"multipart/form-data\" method=\"post\">")
f.write("<input name=\"file\" type=\"file\"/>")
f.write("<input type=\"submit\" value=\"upload\"/></form>\n")
f.write("<hr>\n<ul>\n")
for name in list:
fullname = os.path.join(path, name)
displayname = linkname = name
# Append / for directories or @ for symbolic links
if os.path.isdir(fullname):
displayname = name + "/"
linkname = name + "/"
if os.path.islink(fullname):
displayname = name + "@"
# Note: a link to a directory displays with @ and links with /
f.write('<li><a href="%s">%s</a>\n'
% (urllib.quote(linkname), cgi.escape(displayname)))
f.write("</ul>\n<hr>\n</body>\n</html>\n")
length = f.tell()
f.seek(0)
self.send_response(200)
self.send_header("Content-type", "text/html")
self.send_header("Content-Length", str(length))
self.end_headers()
return f
def translate_path(self, path):
"""Translate a /-separated PATH to the local filename syntax.
Components that mean special things to the local file system
(e.g. drive or directory names) are ignored. (XXX They should
probably be diagnosed.)
# abandon query parameters
path = path.split('?',1)[0]
path = path.split('#',1)[0]
path = posixpath.normpath(urllib.unquote(path))
words = path.split('/')
words = filter(None, words)
path = os.getcwd()
for word in words:
drive, word = os.path.splitdrive(word)
head, word = os.path.split(word)
if word in (os.curdir, os.pardir): continue
path = os.path.join(path, word)
return path
def copyfile(self, source, outputfile):
"""Copy all data between two file objects.
The SOURCE argument is a file object open for reading
(or anything with a read() method) and the DESTINATION
argument is a file object open for writing (or
anything with a write() method).
The only reason for overriding this would be to change
the block size or perhaps to replace newlines by CRLF
-- note however that this the default server uses this
to copy binary data as well.
shutil.copyfileobj(source, outputfile)
def guess_type(self, path):
"""Guess the type of a file.
Argument is a PATH (a filename).
Return value is a string of the form type/subtype,
usable for a MIME Content-type header.
The default implementation looks the file's extension
up in the table self.extensions_map, using application/octet-stream
as a default; however it would be permissible (if
slow) to look inside the data to make a better guess.
base, ext = posixpath.splitext(path)
if ext in self.extensions_map:
return self.extensions_map[ext]
ext = ext.lower()
if ext in self.extensions_map:
return self.extensions_map[ext]
else:
return self.extensions_map['']
if not mimetypes.inited:
mimetypes.init() # try to read system mime.types
extensions_map = mimetypes.types_map.copy()
extensions_map.update({
'': 'application/octet-stream', # Default
'.py': 'text/plain',
'.c': 'text/plain',
'.h': 'text/plain',
def test(HandlerClass = SimpleHTTPRequestHandler,
ServerClass = BaseHTTPServer.HTTPServer):
BaseHTTPServer.test(HandlerClass, ServerClass)
if __name__ == '__main__':
test()
将其保存为 SimpleHTTPServerWithUpload.py,采用如下方式创建使用 8080 端口的 Web 服务器:
python SimpleHTTPServerWithUpload.py 8080
参考资料:
1、非常简单的Python HTTP服务
https://coolshell.cn/articles/1480.html
2、What is the Python 3 equivalent of “python -m SimpleHTTPServer”
http://stackoverflow.com/questions/7943751/what-is-the-python-3-equivalent-of-python-m-simplehttpserver
listen_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
listen_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
listen_socket.bind((HOST, PORT))
listen_socket.listen(1)
print 'Serving HTTP on port %s ...' % PORT
while True:
client_connection, client_address = listen_socket.accept()
request = client_connection.recv(1024)
print request
http_response = """
HTTP/1.1 200 OK
Hello, World!
client_connection.sendall(http_response)
client_connection.close()
def __init__(self, server_address):
# Create a listening socket
self.listen_socket = listen_socket = socket.socket(
self.address_family,
self.socket_type
# Allow to reuse the same address
listen_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
# Bind
listen_socket.bind(server_address)
# Activate
listen_socket.listen(self.request_queue_size)
# Get server host name and port
host, port = self.listen_socket.getsockname()[:2]
self.server_name = socket.getfqdn(host)
self.server_port = port
# Return headers set by Web framework/Web application
self.headers_set = []
def set_app(self, application):
self.application = application
def serve_forever(self):
listen_socket = self.listen_socket
while True:
# New client connection
self.client_connection, client_address = listen_socket.accept()
# Handle one request and close the client connection. Then
# loop over to wait for another client connection
self.handle_one_request()
def handle_one_request(self):
self.request_data = request_data = self.client_connection.recv(1024)
# Print formatted request data a la 'curl -v'
print(''.join(
'< {line}\n'.format(line=line)
for line in request_data.splitlines()
self.parse_request(request_data)
# Construct environment dictionary using request data
env = self.get_environ()
# It's time to call our application callable and get
# back a result that will become HTTP response body
result = self.application(env, self.start_response)
# Construct a response and send it back to the client
self.finish_response(result)
def parse_request(self, text):
request_line = text.splitlines()[0]
request_line = request_line.rstrip('\r\n')
# Break down the request line into components
(self.request_method, # GET
self.path, # /hello
self.request_version # HTTP/1.1
) = request_line.split()
def get_environ(self):
env = {}
# The following code snippet does not follow PEP8 conventions
# but it's formatted the way it is for demonstration purposes
# to emphasize the required variables and their values
# Required WSGI variables
env['wsgi.version'] = (1, 0)
env['wsgi.url_scheme'] = 'http'
env['wsgi.input'] = StringIO.StringIO(self.request_data)
env['wsgi.errors'] = sys.stderr
env['wsgi.multithread'] = False
env['wsgi.multiprocess'] = False
env['wsgi.run_once'] = False
# Required CGI variables
env['REQUEST_METHOD'] = self.request_method # GET
env['PATH_INFO'] = self.path # /hello
env['SERVER_NAME'] = self.server_name # localhost
env['SERVER_PORT'] = str(self.server_port) # 8888
return env
def start_response(self, status, response_headers, exc_info=None):
# Add necessary server headers
server_headers = [
('Date', 'Tue, 31 Mar 2015 12:54:48 GMT'),
('Server', 'WSGIServer 0.2'),
self.headers_set = [status, response_headers + server_headers]
# To adhere to WSGI specification the start_response must return
# a 'write' callable. We simplicity's sake we'll ignore that detail
# for now.
# return self.finish_response
def finish_response(self, result):
status, response_headers = self.headers_set
response = 'HTTP/1.1 {status}\r\n'.format(status=status)
for header in response_headers:
response += '{0}: {1}\r\n'.format(*header)
response += '\r\n'
for data in result:
response += data
# Print formatted response data a la 'curl -v'
print(''.join(
'> {line}\n'.format(line=line)
for line in response.splitlines()
self.client_connection.sendall(response)
finally:
self.client_connection.close()
SERVER_ADDRESS = (HOST, PORT) = '', 8888
def make_server(server_address, application):
server = WSGIServer(server_address)
server.set_app(application)
return server
if __name__ == '__main__':
if len(sys.argv) < 2:
sys.exit('Provide a WSGI application object as module:callable')
app_path = sys.argv[1]
module, application = app_path.split(':')
module = __import__(module)
application = getattr(module, application)
httpd = make_server(SERVER_ADDRESS, application)
print('WSGIServer: Serving HTTP on port {port} ...\n'.format(port=PORT))
httpd.serve_forever()
它明显比本系列第一部分中的服务器代码大,但为了方便你理解,而不陷入具体细节,它也足够小了(只有150行不到)。上面的服务器还做了别的事 – 它可以运行你喜欢的Web框架写的基本的Web应用,可以是Pyramid,Flask,Django,或者其他的Python WSGI框架。
不信?自己试试看。把上面的代码保存成webserver2.py或者直接从Github上下载。如果你不带参数地直接运行它,它就会报怨然后退出。 Python
12
|
$ python webserver2.pyProvide a WSGI application object as module:callable
|
它真的想给Web框架提供服务,从这开始有趣起来。要运行服务器你唯一需要做的是安装Python。但是要运行使用Pyramid,Flask,和Django写的应用,你得先安装这些框架。一起安装这三个吧。我比较喜欢使用virtualenv。跟着以下步骤来创建和激活一个虚拟环境,然后安装这三个Web框架。
$ [sudo] pip install virtualenv
$ mkdir ~/envs
$ virtualenv ~/envs/lsbaws/
$ cd ~/envs/lsbaws/
bin include lib
$ source bin/activate
(lsbaws) $ pip install pyramid
(lsbaws) $ pip install flask
(lsbaws) $ pip install django
此时你需要创建一个Web应用。我们先拿Pyramid开始吧。保存以下代码到保存webserver2.py时相同的目录。命名为pyramidapp.py。或者直接从Github上下载:
from pyramid.config import Configurator
from pyramid.response import Response
def hello_world(request):
return Response(
'Hello world from Pyramid!\n',
content_type='text/plain',
config = Configurator()
config.add_route('hello', '/hello')
config.add_view(hello_world, route_name='hello')
app = config.make_wsgi_app()
现在你已经准备好用完全属于自己的Web服务器来运行Pyramid应用了:
12
|
(lsbaws) $ python webserver2.py pyramidapp:appWSGIServer: Serving HTTP on port 8888 …
|
刚才你告诉你的服务器从python模块‘pyramidapp’中加载可调用的‘app’,现在你的服务器准备好了接受请求然后转发它们给你的Pyramid应用。目前应用只处理一个路由:/hello 路由。在浏览器里输入http://localhost:8888/hello地址,按回车键,观察结果:
def run_application(application):
"""Server code."""
# This is where an application/framework stores
# an HTTP status and HTTP response headers for the server
# to transmit to the client
headers_set = []
# Environment dictionary with WSGI/CGI variables
environ = {}
def start_response(status, response_headers, exc_info=None):
headers_set[:] = [status, response_headers]
# Server invokes the ‘application' callable and gets back the
# response body
result = application(environ, start_response)
# Server builds an HTTP response and transmits it to the client
def app(environ, start_response):
"""A barebones WSGI app."""
start_response('200 OK', [('Content-Type', 'text/plain')])
return ['Hello world!']
run_application(app)
以下是它如何工作的:
-
1.框架提供一个可调用的’应用’(WSGI规格并没有要求如何实现)
-
2.服务器每次接收到HTTP客户端请求后,执行可调用的’应用’。服务器把一个包含了WSGI/CGI变量的字典和一个可调用的’start_response’做为参数给可调用的’application’。
-
3.框架/应用生成HTTP状态和HTTP响应头,然后把它们传给可调用的’start_response’,让服务器保存它们。框架/应用也返回一个响应体。
-
4.服务器把状态,响应头,响应体合并到HTTP响应里,然后传给(HTTP)客户端(这步不是(WSGI)规格里的一部分,但它是后面流程中的一步,为了解释清楚我加上了这步)
status = '200 OK'
response_headers = [('Content-Type', 'text/plain')]
start_response(status, response_headers)
return ['Hello world from a simple WSGI application!\n']
#####################################################################
# Iterative server - webserver3a.py #
# #
# Tested with Python 2.7.9 & Python 3.4 on Ubuntu 14.04 & Mac OS X #
#####################################################################
import socket
SERVER_ADDRESS = (HOST, PORT) = '', 8888
REQUEST_QUEUE_SIZE = 5
def handle_request(client_connection):
request = client_connection.recv(1024)
print(request.decode())
http_response = b"""
HTTP/1.1 200 OK
Hello, World!
client_connection.sendall(http_response)
def serve_forever():
listen_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
listen_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
listen_socket.bind(SERVER_ADDRESS)
listen_socket.listen(REQUEST_QUEUE_SIZE)
print('Serving HTTP on port {port} ...'.format(port=PORT))
while True:
client_connection, client_address = listen_socket.accept()
handle_request(client_connection)
client_connection.close()
if __name__ == '__main__':
serve_forever()
要观察服务器同一时间只处理一个客户端请求,稍微修改一下服务器,在每次发送给客户端响应后添加一个60秒的延迟。添加这行代码就是告诉服务器睡眠60秒。
#########################################################################
# Iterative server - webserver3b.py #
# #
# Tested with Python 2.7.9 & Python 3.4 on Ubuntu 14.04 & Mac OS X #
# #
# - Server sleeps for 60 seconds after sending a response to a client #
#########################################################################
import socket
import time
SERVER_ADDRESS = (HOST, PORT) = '', 8888
REQUEST_QUEUE_SIZE = 5
def handle_request(client_connection):
request = client_connection.recv(1024)
print(request.decode())
http_response = b"""
HTTP/1.1 200 OK
Hello, World!
client_connection.sendall(http_response)
time.sleep(60) # sleep and block the process for 60 seconds
def serve_forever():
listen_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
listen_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
listen_socket.bind(SERVER_ADDRESS)
listen_socket.listen(REQUEST_QUEUE_SIZE)
print('Serving HTTP on port {port} ...'.format(port=PORT))
while True:
client_connection, client_address = listen_socket.accept()
handle_request(client_connection)
client_connection.close()
if __name__ == '__main__':
serve_forever()
启动服务器:
1
|
$ python webserver3b.py
|
现在打开一个新的控制台窗口,运行以下curl命令。你应该立即就会看到屏幕上打印出了“Hello, World!”字符串:
$ curl http://localhost:8888/hello
Hello, World!
And without delay open up a second terminal window and run the same curl command:
立刻再打开一个控制台窗口,然后运行相同的curl命令:
1
|
$ curl http://localhost:8888/hello
|
如果你是在60秒内做的,那么第二个curl应该不会立刻产生任何输出,而是挂起。而且服务器也不会在标准输出打印出新请求体。在我的Mac上看起来像这样(在右下角的黄色高亮窗口表示第二个curl命令正挂起,等待服务器接受这个连接):
-
服务器创建一个TCP/IP socket。在Python里使用下面的语句即可:
Python 1listen_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-
服务器可能会设置一些socket选项(这是可选的,上面的代码就设置了,为了在杀死或重启服务器后,立马就能再次重用相同的地址)。
Python 1listen_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
-
然后,服务器绑定指定地址,bind函数分配一个本地地址给socket。在TCP中,调用bind可以指定一个端口号,一个IP地址,两者都,或者两者都不指定。
Python 1listen_socket.bind(SERVER_ADDRESS)
-
然后,服务器让这个socket成为监听socket。
Python 1listen_socket.listen(REQUEST_QUEUE_SIZE)
listen方法只会被服务器调用。它告诉内核它要接受这个socket上的到来的连接请求了。
做完这些后,服务器开始循环地一次接受一个客户端连接。当有连接到达时,aceept调用返回已连接的客户端socket。然后,服务器从这个socket读取请求数据,在标准输出上把数据打印出来,并回发一个消息给客户端。然后,服务器关闭客户端连接,准备好再次接受新的客户端连接。
下面是客户端使用TCP/IP和服务器通信要做的:
# create a socket and connect to a server
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.connect(('localhost', 8888))
# send and receive some data
sock.sendall(b'test')
data = sock.recv(1024)
print(data.decode())
创建socket后,客户端需要连接服务器。这是通过connect调用做到的: Python
1
|
sock.connect((‘localhost’, 8888))
|
客户端仅需提供要连接的远程IP地址或主机名和远程端口号即可。
可能你注意到了,客户端不用调用bind和accept。客户端没必要调用bind,是因为客户端不关心本地IP地址和本地端口号。当客户端调用connect时内核的TCP/IP栈自动分配一个本地IP址地和本地端口。本地端口被称为暂时端口( ephemeral port),也就是,short-lived 端口。
>>> import socket
>>> sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
>>> sock.connect(('localhost', 8888))
>>> host, port = sock.getsockname()[:2]
>>> host, port
('127.0.0.1', 60589)
上面这个例子中,内核分配了60589这个暂时端口。
在我开始回答第二部分提出的问题前,我需要快速讲一下几个重要的概念。你很快就知道为什么重要了。两个概念是进程和文件描述符。
什么是进程?进程就是一个正在运行的程序的实例。比如,当服务器代码执行时,它被加载进内存,运行起来的程序实例被称为进程。内核记录了进程的一堆信息用于跟踪,进程ID就是一个例子。当你运行服务器 webserver3a.py 或 webserver3b.py 时,你就在运行一个进程了。
###########################################################################
# Concurrent server - webserver3c.py #
# #
# Tested with Python 2.7.9 & Python 3.4 on Ubuntu 14.04 & Mac OS X #
# #
# - Child process sleeps for 60 seconds after handling a client's request #
# - Parent and child processes close duplicate descriptors #
# #
###########################################################################
import os
import socket
import time
SERVER_ADDRESS = (HOST, PORT) = '', 8888
REQUEST_QUEUE_SIZE = 5
def handle_request(client_connection):
request = client_connection.recv(1024)
print(
'Child PID: {pid}. Parent PID {ppid}'.format(
pid=os.getpid(),
ppid=os.getppid(),
print(request.decode())
http_response = b"""
HTTP/1.1 200 OK
Hello, World!
client_connection.sendall(http_response)
time.sleep(60)
def serve_forever():
listen_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
listen_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
listen_socket.bind(SERVER_ADDRESS)
listen_socket.listen(REQUEST_QUEUE_SIZE)
print('Serving HTTP on port {port} ...'.format(port=PORT))
print('Parent PID (PPID): {pid}n'.format(pid=os.getpid()))
while True:
client_connection, client_address = listen_socket.accept()
pid = os.fork()
if pid == 0: # child
listen_socket.close() # close child copy
handle_request(client_connection)
client_connection.close()
os._exit(0) # child exits here
else: # parent
client_connection.close() # close parent copy and loop over
if __name__ == '__main__':
serve_forever()
在深入讨论for如何工作之前,先自己试试,看看服务器确实可以同时处理多个请求,不像webserver3a.py和webserver3b.py。用下面命令启动服务器: Python
1
|
$ python webserver3c.py
|
像你以前那样试试用两个curl命令,自己看看,现在虽然服务器子进程在处理客户端请求时睡眠60秒,但不影响别的客户端,因为它们是被不同的完全独立的进程处理的。你应该能看到curl命令立刻就输出了“Hello, World!”,然后挂起60秒。你可以接着想运行多少curl命令就运行多少(嗯,几乎是任意多),它们都会立刻输出服务器的响应“Hello, Wrold”,而且不会有明显的延迟。试试看。
理解fork()的最重要的点是,你fork了一次,但它返回了两次:一个是在父进程里,一个是在子进程里。当你fork了一个新进程,子进程返回的进程ID是0。父进程里fork返回的是子进程的PID。
那么,如果它的父进程关闭了同一个socket,子进程为什么还能从客户端socket读取数据呢?答案就在上图。内核使用描述符引用计数来决定是否关闭socket。只有当描述符引用计数为0时才关闭socket。当服务器创建一个子进程,子进程获取了父进程的文件描述符拷贝,内核增加了这些描述符的引用计数。在一个父进程和一个子进程的场景中,客户端socket的描述符引用计数就成了2,当父进程关闭了客户端连接socket,它仅仅把引用计数减为1,不会引发内核关闭这个socket。子进程也把父进程的listen_socket拷贝给关闭了,因为子进程不用管接受新连接,它只关心处理已经连接的客户端的请求: Python
1
|
listen_socket.close() # close child copy
|
本文后面我会讲下如果不关闭复制的描述符会发生什么。
你从并发服务器源码看到啦,现在服务器父进程唯一的角色就是接受一个新的客户端连接,fork一个新的子进程来处理客户端请求,然后重复接受另一个客户端连接,就没有别的事做啦。服务器父进程不处理客户端请求——它的小弟(子进程)干这事。
跑个题,我们说两个事件并发到底是什么意思呢?
###########################################################################
# Concurrent server - webserver3d.py #
# #
# Tested with Python 2.7.9 & Python 3.4 on Ubuntu 14.04 & Mac OS X #
###########################################################################
import os
import socket
SERVER_ADDRESS = (HOST, PORT) = '', 8888
REQUEST_QUEUE_SIZE = 5
def handle_request(client_connection):
request = client_connection.recv(1024)
http_response = b"""
HTTP/1.1 200 OK
Hello, World!
client_connection.sendall(http_response)
def serve_forever():
listen_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
listen_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
listen_socket.bind(SERVER_ADDRESS)
listen_socket.listen(REQUEST_QUEUE_SIZE)
print('Serving HTTP on port {port} ...'.format(port=PORT))
clients = []
while True:
client_connection, client_address = listen_socket.accept()
# store the reference otherwise it's garbage collected
# on the next loop run
clients.append(client_connection)
pid = os.fork()
if pid == 0: # child
listen_socket.close() # close child copy
handle_request(client_connection)
client_connection.close()
os._exit(0) # child exits here
else: # parent
# client_connection.close()
print(len(clients))
if __name__ == '__main__':
serve_forever()
启动服务器:
1
|
$ python webserver3d.py
|
使用curl去连接服务器:
12
|
$ curl http://localhost:8888/helloHello, World!
|
好的,curl打印出来并发服务器的响应,但是它不终止,一直挂起。发生了什么?服务器不再睡眠60秒了:它的子进程开心地处理了客户端请求,关闭了客户端连接然后退出啦,但是客户端curl仍然不终止。
$ ulimit -a
core file size (blocks, -c) 0
data seg size (kbytes, -d) unlimited
scheduling priority (-e) 0
file size (blocks, -f) unlimited
pending signals (-i) 3842
max locked memory (kbytes, -l) 64
max memory size (kbytes, -m) unlimited
open files (-n) 1024
pipe size (512 bytes, -p) 8
POSIX message queues (bytes, -q) 819200
real-time priority (-r) 0
stack size (kbytes, -s) 8192
cpu time (seconds, -t) unlimited
max user processes (-u) 3842
virtual memory (kbytes, -v) unlimited
file locks (-x) unlimited
看到上面的了咩,我的Ubuntu上,进程的最大可打开文件描述符是1024。
现在咱们看看怎么让服务器耗尽可用文件描述符。在已存在或新的控制台窗口,调用服务器最大可打开文件描述符为256: Python
在同一个控制台上启动webserver3d.py: Python
1
|
$ python webserver3d.py
|
使用下面的client3.py客户端来测试服务器。
#####################################################################
# Test client - client3.py #
# #
# Tested with Python 2.7.9 & Python 3.4 on Ubuntu 14.04 & Mac OS X #
#####################################################################
import argparse
import errno
import os
import socket
SERVER_ADDRESS = 'localhost', 8888
REQUEST = b"""
GET /hello HTTP/1.1
Host: localhost:8888
def main(max_clients, max_conns):
socks = []
for client_num in range(max_clients):
pid = os.fork()
if pid == 0:
for connection_num in range(max_conns):
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.connect(SERVER_ADDRESS)
sock.sendall(REQUEST)
socks.append(sock)
print(connection_num)
os._exit(0)
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='Test client for LSBAWS.',
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
parser.add_argument(
'--max-conns',
type=int,
default=1024,
help='Maximum number of connections per client.'
parser.add_argument(
'--max-clients',
type=int,
default=1,
help='Maximum number of clients.'
args = parser.parse_args()
main(args.max_clients, args.max_conns)
在新的控制台窗口里,启动client3.py,让它创建300个连接同时连接服务器。 Python
1
|
$ python client3.py –max-clients=300
|
很快服务器就崩了。下面是我电脑上抛异常的截图:
$ ps auxw | grep -i python | grep -v grep
vagrant 9099 0.0 1.2 31804 6256 pts/0 S+ 16:33 0:00 python webserver3d.py
vagrant 9102 0.0 0.0 0 0 pts/0 Z+ 16:33 0:00 [python] <defunct>
你看到上面第二行了咩?它说PId为9102的进程的状态是Z+,进程的名称是。这个就是僵尸啦。僵尸的问题在于,你杀死不了他们啊。
###########################################################################
# Concurrent server - webserver3e.py #
# #
# Tested with Python 2.7.9 & Python 3.4 on Ubuntu 14.04 & Mac OS X #
###########################################################################
import os
import signal
import socket
import time
SERVER_ADDRESS = (HOST, PORT) = '', 8888
REQUEST_QUEUE_SIZE = 5
def grim_reaper(signum, frame):
pid, status = os.wait()
print(
'Child {pid} terminated with status {status}'
'n'.format(pid=pid, status=status)
def handle_request(client_connection):
request = client_connection.recv(1024)
print(request.decode())
http_response = b"""
HTTP/1.1 200 OK
Hello, World!
client_connection.sendall(http_response)
# sleep to allow the parent to loop over to 'accept' and block there
time.sleep(3)
def serve_forever():
listen_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
listen_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
listen_socket.bind(SERVER_ADDRESS)
listen_socket.listen(REQUEST_QUEUE_SIZE)
print('Serving HTTP on port {port} ...'.format(port=PORT))
signal.signal(signal.SIGCHLD, grim_reaper)
while True:
client_connection, client_address = listen_socket.accept()
pid = os.fork()
if pid == 0: # child
listen_socket.close() # close child copy
handle_request(client_connection)
client_connection.close()
os._exit(0)
else: # parent
client_connection.close()
if __name__ == '__main__':
serve_forever()
启动服务器:
1
|
$ python webserver3e.py
|
使用老朋友curl给修改后的并发服务器发送请求: Python
1
|
$ curl http://localhost:8888/hello
|
观察服务器:
###########################################################################
# Concurrent server - webserver3f.py #
# #
# Tested with Python 2.7.9 & Python 3.4 on Ubuntu 14.04 & Mac OS X #
###########################################################################
import errno
import os
import signal
import socket
SERVER_ADDRESS = (HOST, PORT) = '', 8888
REQUEST_QUEUE_SIZE = 1024
def grim_reaper(signum, frame):
pid, status = os.wait()
def handle_request(client_connection):
request = client_connection.recv(1024)
print(request.decode())
http_response = b"""
HTTP/1.1 200 OK
Hello, World!
client_connection.sendall(http_response)
def serve_forever():
listen_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
listen_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
listen_socket.bind(SERVER_ADDRESS)
listen_socket.listen(REQUEST_QUEUE_SIZE)
print('Serving HTTP on port {port} ...'.format(port=PORT))
signal.signal(signal.SIGCHLD, grim_reaper)
while True:
client_connection, client_address = listen_socket.accept()
except IOError as e:
code, msg = e.args
# restart 'accept' if it was interrupted
if code == errno.EINTR:
continue
else:
raise
pid = os.fork()
if pid == 0: # child
listen_socket.close() # close child copy
handle_request(client_connection)
client_connection.close()
os._exit(0)
else: # parent
client_connection.close() # close parent copy and loop over
if __name__ == '__main__':
serve_forever()
启动修改后的webserver3f.py: Python
1
|
$ python webserver3f.py
|
使用curl给修改后的服务器发送请求: Python
1
|
$ curl http://localhost:8888/hello
|
看到了吗?没有EINTR异常啦。现在,验证一下吧,没有僵尸了,带wait的SIGCHLD事件处理器也能处理好子进程了。怎么验证呢?只要运行ps命令,看看没有Z+状态的进程(没有进程)。太棒啦!没有僵尸在四周跳的感觉真安全呢!
###########################################################################
# Concurrent server - webserver3g.py #
# #
# Tested with Python 2.7.9 & Python 3.4 on Ubuntu 14.04 & Mac OS X #
###########################################################################
import errno
import os
import signal
import socket
SERVER_ADDRESS = (HOST, PORT) = '', 8888
REQUEST_QUEUE_SIZE = 1024
def grim_reaper(signum, frame):
while True:
pid, status = os.waitpid(
-1, # Wait for any child process
os.WNOHANG # Do not block and return EWOULDBLOCK error
except OSError:
return
if pid == 0: # no more zombies
return
def handle_request(client_connection):
request = client_connection.recv(1024)
print(request.decode())
http_response = b"""
HTTP/1.1 200 OK
Hello, World!
client_connection.sendall(http_response)
def serve_forever():
listen_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
listen_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
listen_socket.bind(SERVER_ADDRESS)
listen_socket.listen(REQUEST_QUEUE_SIZE)
print('Serving HTTP on port {port} ...'.format(port=PORT))
signal.signal(signal.SIGCHLD, grim_reaper)
while True:
client_connection, client_address = listen_socket.accept()
except IOError as e:
code, msg = e.args
# restart 'accept' if it was interrupted
if code == errno.EINTR:
continue
else:
raise
pid = os.fork()
if pid == 0: # child
listen_socket.close() # close child copy
handle_request(client_connection)
client_connection.close()
os._exit(0)
else: # parent
client_connection.close() # close parent copy and loop over
if __name__ == '__main__':
serve_forever()
启动服务器:
1
|
$ python webserver3g.py
|
使用测试客户端client3.py: Python
1
|
$ python client3.py –max-clients 128
|
现在验证一下没有僵尸了吧。哈!没有僵尸的日子真好!
近期评论