comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/http/server.py @ 68:5028fdace37b

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 16:23:26 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 68:5028fdace37b
1 """HTTP server classes.
2
3 Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see
4 SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,
5 and CGIHTTPRequestHandler for CGI scripts.
6
7 It does, however, optionally implement HTTP/1.1 persistent connections,
8 as of version 0.3.
9
10 Notes on CGIHTTPRequestHandler
11 ------------------------------
12
13 This class implements GET and POST requests to cgi-bin scripts.
14
15 If the os.fork() function is not present (e.g. on Windows),
16 subprocess.Popen() is used as a fallback, with slightly altered semantics.
17
18 In all cases, the implementation is intentionally naive -- all
19 requests are executed synchronously.
20
21 SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
22 -- it may execute arbitrary Python code or external programs.
23
24 Note that status code 200 is sent prior to execution of a CGI script, so
25 scripts cannot send other status codes such as 302 (redirect).
26
27 XXX To do:
28
29 - log requests even later (to capture byte count)
30 - log user-agent header and other interesting goodies
31 - send error log to separate file
32 """
33
34
35 # See also:
36 #
37 # HTTP Working Group T. Berners-Lee
38 # INTERNET-DRAFT R. T. Fielding
39 # <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen
40 # Expires September 8, 1995 March 8, 1995
41 #
42 # URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
43 #
44 # and
45 #
46 # Network Working Group R. Fielding
47 # Request for Comments: 2616 et al
48 # Obsoletes: 2068 June 1999
49 # Category: Standards Track
50 #
51 # URL: http://www.faqs.org/rfcs/rfc2616.html
52
53 # Log files
54 # ---------
55 #
56 # Here's a quote from the NCSA httpd docs about log file format.
57 #
58 # | The logfile format is as follows. Each line consists of:
59 # |
60 # | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
61 # |
62 # | host: Either the DNS name or the IP number of the remote client
63 # | rfc931: Any information returned by identd for this person,
64 # | - otherwise.
65 # | authuser: If user sent a userid for authentication, the user name,
66 # | - otherwise.
67 # | DD: Day
68 # | Mon: Month (calendar name)
69 # | YYYY: Year
70 # | hh: hour (24-hour format, the machine's timezone)
71 # | mm: minutes
72 # | ss: seconds
73 # | request: The first line of the HTTP request as sent by the client.
74 # | ddd: the status code returned by the server, - if not available.
75 # | bbbb: the total number of bytes sent,
76 # | *not including the HTTP/1.0 header*, - if not available
77 # |
78 # | You can determine the name of the file accessed through request.
79 #
80 # (Actually, the latter is only true if you know the server configuration
81 # at the time the request was made!)
82
83 __version__ = "0.6"
84
85 __all__ = [
86 "HTTPServer", "ThreadingHTTPServer", "BaseHTTPRequestHandler",
87 "SimpleHTTPRequestHandler", "CGIHTTPRequestHandler",
88 ]
89
90 import copy
91 import datetime
92 import email.utils
93 import html
94 import http.client
95 import io
96 import mimetypes
97 import os
98 import posixpath
99 import select
100 import shutil
101 import socket # For gethostbyaddr()
102 import socketserver
103 import sys
104 import time
105 import urllib.parse
106 from functools import partial
107
108 from http import HTTPStatus
109
110
111 # Default error message template
112 DEFAULT_ERROR_MESSAGE = """\
113 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
114 "http://www.w3.org/TR/html4/strict.dtd">
115 <html>
116 <head>
117 <meta http-equiv="Content-Type" content="text/html;charset=utf-8">
118 <title>Error response</title>
119 </head>
120 <body>
121 <h1>Error response</h1>
122 <p>Error code: %(code)d</p>
123 <p>Message: %(message)s.</p>
124 <p>Error code explanation: %(code)s - %(explain)s.</p>
125 </body>
126 </html>
127 """
128
129 DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"
130
131 class HTTPServer(socketserver.TCPServer):
132
133 allow_reuse_address = 1 # Seems to make sense in testing environment
134
135 def server_bind(self):
136 """Override server_bind to store the server name."""
137 socketserver.TCPServer.server_bind(self)
138 host, port = self.server_address[:2]
139 self.server_name = socket.getfqdn(host)
140 self.server_port = port
141
142
143 class ThreadingHTTPServer(socketserver.ThreadingMixIn, HTTPServer):
144 daemon_threads = True
145
146
147 class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
148
149 """HTTP request handler base class.
150
151 The following explanation of HTTP serves to guide you through the
152 code as well as to expose any misunderstandings I may have about
153 HTTP (so you don't need to read the code to figure out I'm wrong
154 :-).
155
156 HTTP (HyperText Transfer Protocol) is an extensible protocol on
157 top of a reliable stream transport (e.g. TCP/IP). The protocol
158 recognizes three parts to a request:
159
160 1. One line identifying the request type and path
161 2. An optional set of RFC-822-style headers
162 3. An optional data part
163
164 The headers and data are separated by a blank line.
165
166 The first line of the request has the form
167
168 <command> <path> <version>
169
170 where <command> is a (case-sensitive) keyword such as GET or POST,
171 <path> is a string containing path information for the request,
172 and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
173 <path> is encoded using the URL encoding scheme (using %xx to signify
174 the ASCII character with hex code xx).
175
176 The specification specifies that lines are separated by CRLF but
177 for compatibility with the widest range of clients recommends
178 servers also handle LF. Similarly, whitespace in the request line
179 is treated sensibly (allowing multiple spaces between components
180 and allowing trailing whitespace).
181
182 Similarly, for output, lines ought to be separated by CRLF pairs
183 but most clients grok LF characters just fine.
184
185 If the first line of the request has the form
186
187 <command> <path>
188
189 (i.e. <version> is left out) then this is assumed to be an HTTP
190 0.9 request; this form has no optional headers and data part and
191 the reply consists of just the data.
192
193 The reply form of the HTTP 1.x protocol again has three parts:
194
195 1. One line giving the response code
196 2. An optional set of RFC-822-style headers
197 3. The data
198
199 Again, the headers and data are separated by a blank line.
200
201 The response code line has the form
202
203 <version> <responsecode> <responsestring>
204
205 where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
206 <responsecode> is a 3-digit response code indicating success or
207 failure of the request, and <responsestring> is an optional
208 human-readable string explaining what the response code means.
209
210 This server parses the request and the headers, and then calls a
211 function specific to the request type (<command>). Specifically,
212 a request SPAM will be handled by a method do_SPAM(). If no
213 such method exists the server sends an error response to the
214 client. If it exists, it is called with no arguments:
215
216 do_SPAM()
217
218 Note that the request name is case sensitive (i.e. SPAM and spam
219 are different requests).
220
221 The various request details are stored in instance variables:
222
223 - client_address is the client IP address in the form (host,
224 port);
225
226 - command, path and version are the broken-down request line;
227
228 - headers is an instance of email.message.Message (or a derived
229 class) containing the header information;
230
231 - rfile is a file object open for reading positioned at the
232 start of the optional input data part;
233
234 - wfile is a file object open for writing.
235
236 IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
237
238 The first thing to be written must be the response line. Then
239 follow 0 or more header lines, then a blank line, and then the
240 actual data (if any). The meaning of the header lines depends on
241 the command executed by the server; in most cases, when data is
242 returned, there should be at least one header line of the form
243
244 Content-type: <type>/<subtype>
245
246 where <type> and <subtype> should be registered MIME types,
247 e.g. "text/html" or "text/plain".
248
249 """
250
251 # The Python system version, truncated to its first component.
252 sys_version = "Python/" + sys.version.split()[0]
253
254 # The server software version. You may want to override this.
255 # The format is multiple whitespace-separated strings,
256 # where each string is of the form name[/version].
257 server_version = "BaseHTTP/" + __version__
258
259 error_message_format = DEFAULT_ERROR_MESSAGE
260 error_content_type = DEFAULT_ERROR_CONTENT_TYPE
261
262 # The default request version. This only affects responses up until
263 # the point where the request line is parsed, so it mainly decides what
264 # the client gets back when sending a malformed request line.
265 # Most web servers default to HTTP 0.9, i.e. don't send a status line.
266 default_request_version = "HTTP/0.9"
267
268 def parse_request(self):
269 """Parse a request (internal).
270
271 The request should be stored in self.raw_requestline; the results
272 are in self.command, self.path, self.request_version and
273 self.headers.
274
275 Return True for success, False for failure; on failure, any relevant
276 error response has already been sent back.
277
278 """
279 self.command = None # set in case of error on the first line
280 self.request_version = version = self.default_request_version
281 self.close_connection = True
282 requestline = str(self.raw_requestline, 'iso-8859-1')
283 requestline = requestline.rstrip('\r\n')
284 self.requestline = requestline
285 words = requestline.split()
286 if len(words) == 0:
287 return False
288
289 if len(words) >= 3: # Enough to determine protocol version
290 version = words[-1]
291 try:
292 if not version.startswith('HTTP/'):
293 raise ValueError
294 base_version_number = version.split('/', 1)[1]
295 version_number = base_version_number.split(".")
296 # RFC 2145 section 3.1 says there can be only one "." and
297 # - major and minor numbers MUST be treated as
298 # separate integers;
299 # - HTTP/2.4 is a lower version than HTTP/2.13, which in
300 # turn is lower than HTTP/12.3;
301 # - Leading zeros MUST be ignored by recipients.
302 if len(version_number) != 2:
303 raise ValueError
304 version_number = int(version_number[0]), int(version_number[1])
305 except (ValueError, IndexError):
306 self.send_error(
307 HTTPStatus.BAD_REQUEST,
308 "Bad request version (%r)" % version)
309 return False
310 if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
311 self.close_connection = False
312 if version_number >= (2, 0):
313 self.send_error(
314 HTTPStatus.HTTP_VERSION_NOT_SUPPORTED,
315 "Invalid HTTP version (%s)" % base_version_number)
316 return False
317 self.request_version = version
318
319 if not 2 <= len(words) <= 3:
320 self.send_error(
321 HTTPStatus.BAD_REQUEST,
322 "Bad request syntax (%r)" % requestline)
323 return False
324 command, path = words[:2]
325 if len(words) == 2:
326 self.close_connection = True
327 if command != 'GET':
328 self.send_error(
329 HTTPStatus.BAD_REQUEST,
330 "Bad HTTP/0.9 request type (%r)" % command)
331 return False
332 self.command, self.path = command, path
333
334 # Examine the headers and look for a Connection directive.
335 try:
336 self.headers = http.client.parse_headers(self.rfile,
337 _class=self.MessageClass)
338 except http.client.LineTooLong as err:
339 self.send_error(
340 HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
341 "Line too long",
342 str(err))
343 return False
344 except http.client.HTTPException as err:
345 self.send_error(
346 HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
347 "Too many headers",
348 str(err)
349 )
350 return False
351
352 conntype = self.headers.get('Connection', "")
353 if conntype.lower() == 'close':
354 self.close_connection = True
355 elif (conntype.lower() == 'keep-alive' and
356 self.protocol_version >= "HTTP/1.1"):
357 self.close_connection = False
358 # Examine the headers and look for an Expect directive
359 expect = self.headers.get('Expect', "")
360 if (expect.lower() == "100-continue" and
361 self.protocol_version >= "HTTP/1.1" and
362 self.request_version >= "HTTP/1.1"):
363 if not self.handle_expect_100():
364 return False
365 return True
366
367 def handle_expect_100(self):
368 """Decide what to do with an "Expect: 100-continue" header.
369
370 If the client is expecting a 100 Continue response, we must
371 respond with either a 100 Continue or a final response before
372 waiting for the request body. The default is to always respond
373 with a 100 Continue. You can behave differently (for example,
374 reject unauthorized requests) by overriding this method.
375
376 This method should either return True (possibly after sending
377 a 100 Continue response) or send an error response and return
378 False.
379
380 """
381 self.send_response_only(HTTPStatus.CONTINUE)
382 self.end_headers()
383 return True
384
385 def handle_one_request(self):
386 """Handle a single HTTP request.
387
388 You normally don't need to override this method; see the class
389 __doc__ string for information on how to handle specific HTTP
390 commands such as GET and POST.
391
392 """
393 try:
394 self.raw_requestline = self.rfile.readline(65537)
395 if len(self.raw_requestline) > 65536:
396 self.requestline = ''
397 self.request_version = ''
398 self.command = ''
399 self.send_error(HTTPStatus.REQUEST_URI_TOO_LONG)
400 return
401 if not self.raw_requestline:
402 self.close_connection = True
403 return
404 if not self.parse_request():
405 # An error code has been sent, just exit
406 return
407 mname = 'do_' + self.command
408 if not hasattr(self, mname):
409 self.send_error(
410 HTTPStatus.NOT_IMPLEMENTED,
411 "Unsupported method (%r)" % self.command)
412 return
413 method = getattr(self, mname)
414 method()
415 self.wfile.flush() #actually send the response if not already done.
416 except socket.timeout as e:
417 #a read or a write timed out. Discard this connection
418 self.log_error("Request timed out: %r", e)
419 self.close_connection = True
420 return
421
422 def handle(self):
423 """Handle multiple requests if necessary."""
424 self.close_connection = True
425
426 self.handle_one_request()
427 while not self.close_connection:
428 self.handle_one_request()
429
430 def send_error(self, code, message=None, explain=None):
431 """Send and log an error reply.
432
433 Arguments are
434 * code: an HTTP error code
435 3 digits
436 * message: a simple optional 1 line reason phrase.
437 *( HTAB / SP / VCHAR / %x80-FF )
438 defaults to short entry matching the response code
439 * explain: a detailed message defaults to the long entry
440 matching the response code.
441
442 This sends an error response (so it must be called before any
443 output has been generated), logs the error, and finally sends
444 a piece of HTML explaining the error to the user.
445
446 """
447
448 try:
449 shortmsg, longmsg = self.responses[code]
450 except KeyError:
451 shortmsg, longmsg = '???', '???'
452 if message is None:
453 message = shortmsg
454 if explain is None:
455 explain = longmsg
456 self.log_error("code %d, message %s", code, message)
457 self.send_response(code, message)
458 self.send_header('Connection', 'close')
459
460 # Message body is omitted for cases described in:
461 # - RFC7230: 3.3. 1xx, 204(No Content), 304(Not Modified)
462 # - RFC7231: 6.3.6. 205(Reset Content)
463 body = None
464 if (code >= 200 and
465 code not in (HTTPStatus.NO_CONTENT,
466 HTTPStatus.RESET_CONTENT,
467 HTTPStatus.NOT_MODIFIED)):
468 # HTML encode to prevent Cross Site Scripting attacks
469 # (see bug #1100201)
470 content = (self.error_message_format % {
471 'code': code,
472 'message': html.escape(message, quote=False),
473 'explain': html.escape(explain, quote=False)
474 })
475 body = content.encode('UTF-8', 'replace')
476 self.send_header("Content-Type", self.error_content_type)
477 self.send_header('Content-Length', str(len(body)))
478 self.end_headers()
479
480 if self.command != 'HEAD' and body:
481 self.wfile.write(body)
482
483 def send_response(self, code, message=None):
484 """Add the response header to the headers buffer and log the
485 response code.
486
487 Also send two standard headers with the server software
488 version and the current date.
489
490 """
491 self.log_request(code)
492 self.send_response_only(code, message)
493 self.send_header('Server', self.version_string())
494 self.send_header('Date', self.date_time_string())
495
496 def send_response_only(self, code, message=None):
497 """Send the response header only."""
498 if self.request_version != 'HTTP/0.9':
499 if message is None:
500 if code in self.responses:
501 message = self.responses[code][0]
502 else:
503 message = ''
504 if not hasattr(self, '_headers_buffer'):
505 self._headers_buffer = []
506 self._headers_buffer.append(("%s %d %s\r\n" %
507 (self.protocol_version, code, message)).encode(
508 'latin-1', 'strict'))
509
510 def send_header(self, keyword, value):
511 """Send a MIME header to the headers buffer."""
512 if self.request_version != 'HTTP/0.9':
513 if not hasattr(self, '_headers_buffer'):
514 self._headers_buffer = []
515 self._headers_buffer.append(
516 ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict'))
517
518 if keyword.lower() == 'connection':
519 if value.lower() == 'close':
520 self.close_connection = True
521 elif value.lower() == 'keep-alive':
522 self.close_connection = False
523
524 def end_headers(self):
525 """Send the blank line ending the MIME headers."""
526 if self.request_version != 'HTTP/0.9':
527 self._headers_buffer.append(b"\r\n")
528 self.flush_headers()
529
530 def flush_headers(self):
531 if hasattr(self, '_headers_buffer'):
532 self.wfile.write(b"".join(self._headers_buffer))
533 self._headers_buffer = []
534
535 def log_request(self, code='-', size='-'):
536 """Log an accepted request.
537
538 This is called by send_response().
539
540 """
541 if isinstance(code, HTTPStatus):
542 code = code.value
543 self.log_message('"%s" %s %s',
544 self.requestline, str(code), str(size))
545
546 def log_error(self, format, *args):
547 """Log an error.
548
549 This is called when a request cannot be fulfilled. By
550 default it passes the message on to log_message().
551
552 Arguments are the same as for log_message().
553
554 XXX This should go to the separate error log.
555
556 """
557
558 self.log_message(format, *args)
559
560 def log_message(self, format, *args):
561 """Log an arbitrary message.
562
563 This is used by all other logging functions. Override
564 it if you have specific logging wishes.
565
566 The first argument, FORMAT, is a format string for the
567 message to be logged. If the format string contains
568 any % escapes requiring parameters, they should be
569 specified as subsequent arguments (it's just like
570 printf!).
571
572 The client ip and current date/time are prefixed to
573 every message.
574
575 """
576
577 sys.stderr.write("%s - - [%s] %s\n" %
578 (self.address_string(),
579 self.log_date_time_string(),
580 format%args))
581
582 def version_string(self):
583 """Return the server software version string."""
584 return self.server_version + ' ' + self.sys_version
585
586 def date_time_string(self, timestamp=None):
587 """Return the current date and time formatted for a message header."""
588 if timestamp is None:
589 timestamp = time.time()
590 return email.utils.formatdate(timestamp, usegmt=True)
591
592 def log_date_time_string(self):
593 """Return the current time formatted for logging."""
594 now = time.time()
595 year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
596 s = "%02d/%3s/%04d %02d:%02d:%02d" % (
597 day, self.monthname[month], year, hh, mm, ss)
598 return s
599
600 weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
601
602 monthname = [None,
603 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
604 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
605
606 def address_string(self):
607 """Return the client address."""
608
609 return self.client_address[0]
610
611 # Essentially static class variables
612
613 # The version of the HTTP protocol we support.
614 # Set this to HTTP/1.1 to enable automatic keepalive
615 protocol_version = "HTTP/1.0"
616
617 # MessageClass used to parse headers
618 MessageClass = http.client.HTTPMessage
619
620 # hack to maintain backwards compatibility
621 responses = {
622 v: (v.phrase, v.description)
623 for v in HTTPStatus.__members__.values()
624 }
625
626
627 class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
628
629 """Simple HTTP request handler with GET and HEAD commands.
630
631 This serves files from the current directory and any of its
632 subdirectories. The MIME type for files is determined by
633 calling the .guess_type() method.
634
635 The GET and HEAD requests are identical except that the HEAD
636 request omits the actual contents of the file.
637
638 """
639
640 server_version = "SimpleHTTP/" + __version__
641
642 def __init__(self, *args, directory=None, **kwargs):
643 if directory is None:
644 directory = os.getcwd()
645 self.directory = directory
646 super().__init__(*args, **kwargs)
647
648 def do_GET(self):
649 """Serve a GET request."""
650 f = self.send_head()
651 if f:
652 try:
653 self.copyfile(f, self.wfile)
654 finally:
655 f.close()
656
657 def do_HEAD(self):
658 """Serve a HEAD request."""
659 f = self.send_head()
660 if f:
661 f.close()
662
663 def send_head(self):
664 """Common code for GET and HEAD commands.
665
666 This sends the response code and MIME headers.
667
668 Return value is either a file object (which has to be copied
669 to the outputfile by the caller unless the command was HEAD,
670 and must be closed by the caller under all circumstances), or
671 None, in which case the caller has nothing further to do.
672
673 """
674 path = self.translate_path(self.path)
675 f = None
676 if os.path.isdir(path):
677 parts = urllib.parse.urlsplit(self.path)
678 if not parts.path.endswith('/'):
679 # redirect browser - doing basically what apache does
680 self.send_response(HTTPStatus.MOVED_PERMANENTLY)
681 new_parts = (parts[0], parts[1], parts[2] + '/',
682 parts[3], parts[4])
683 new_url = urllib.parse.urlunsplit(new_parts)
684 self.send_header("Location", new_url)
685 self.end_headers()
686 return None
687 for index in "index.html", "index.htm":
688 index = os.path.join(path, index)
689 if os.path.exists(index):
690 path = index
691 break
692 else:
693 return self.list_directory(path)
694 ctype = self.guess_type(path)
695 # check for trailing "/" which should return 404. See Issue17324
696 # The test for this was added in test_httpserver.py
697 # However, some OS platforms accept a trailingSlash as a filename
698 # See discussion on python-dev and Issue34711 regarding
699 # parseing and rejection of filenames with a trailing slash
700 if path.endswith("/"):
701 self.send_error(HTTPStatus.NOT_FOUND, "File not found")
702 return None
703 try:
704 f = open(path, 'rb')
705 except OSError:
706 self.send_error(HTTPStatus.NOT_FOUND, "File not found")
707 return None
708
709 try:
710 fs = os.fstat(f.fileno())
711 # Use browser cache if possible
712 if ("If-Modified-Since" in self.headers
713 and "If-None-Match" not in self.headers):
714 # compare If-Modified-Since and time of last file modification
715 try:
716 ims = email.utils.parsedate_to_datetime(
717 self.headers["If-Modified-Since"])
718 except (TypeError, IndexError, OverflowError, ValueError):
719 # ignore ill-formed values
720 pass
721 else:
722 if ims.tzinfo is None:
723 # obsolete format with no timezone, cf.
724 # https://tools.ietf.org/html/rfc7231#section-7.1.1.1
725 ims = ims.replace(tzinfo=datetime.timezone.utc)
726 if ims.tzinfo is datetime.timezone.utc:
727 # compare to UTC datetime of last modification
728 last_modif = datetime.datetime.fromtimestamp(
729 fs.st_mtime, datetime.timezone.utc)
730 # remove microseconds, like in If-Modified-Since
731 last_modif = last_modif.replace(microsecond=0)
732
733 if last_modif <= ims:
734 self.send_response(HTTPStatus.NOT_MODIFIED)
735 self.end_headers()
736 f.close()
737 return None
738
739 self.send_response(HTTPStatus.OK)
740 self.send_header("Content-type", ctype)
741 self.send_header("Content-Length", str(fs[6]))
742 self.send_header("Last-Modified",
743 self.date_time_string(fs.st_mtime))
744 self.end_headers()
745 return f
746 except:
747 f.close()
748 raise
749
750 def list_directory(self, path):
751 """Helper to produce a directory listing (absent index.html).
752
753 Return value is either a file object, or None (indicating an
754 error). In either case, the headers are sent, making the
755 interface the same as for send_head().
756
757 """
758 try:
759 list = os.listdir(path)
760 except OSError:
761 self.send_error(
762 HTTPStatus.NOT_FOUND,
763 "No permission to list directory")
764 return None
765 list.sort(key=lambda a: a.lower())
766 r = []
767 try:
768 displaypath = urllib.parse.unquote(self.path,
769 errors='surrogatepass')
770 except UnicodeDecodeError:
771 displaypath = urllib.parse.unquote(path)
772 displaypath = html.escape(displaypath, quote=False)
773 enc = sys.getfilesystemencoding()
774 title = 'Directory listing for %s' % displaypath
775 r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '
776 '"http://www.w3.org/TR/html4/strict.dtd">')
777 r.append('<html>\n<head>')
778 r.append('<meta http-equiv="Content-Type" '
779 'content="text/html; charset=%s">' % enc)
780 r.append('<title>%s</title>\n</head>' % title)
781 r.append('<body>\n<h1>%s</h1>' % title)
782 r.append('<hr>\n<ul>')
783 for name in list:
784 fullname = os.path.join(path, name)
785 displayname = linkname = name
786 # Append / for directories or @ for symbolic links
787 if os.path.isdir(fullname):
788 displayname = name + "/"
789 linkname = name + "/"
790 if os.path.islink(fullname):
791 displayname = name + "@"
792 # Note: a link to a directory displays with @ and links with /
793 r.append('<li><a href="%s">%s</a></li>'
794 % (urllib.parse.quote(linkname,
795 errors='surrogatepass'),
796 html.escape(displayname, quote=False)))
797 r.append('</ul>\n<hr>\n</body>\n</html>\n')
798 encoded = '\n'.join(r).encode(enc, 'surrogateescape')
799 f = io.BytesIO()
800 f.write(encoded)
801 f.seek(0)
802 self.send_response(HTTPStatus.OK)
803 self.send_header("Content-type", "text/html; charset=%s" % enc)
804 self.send_header("Content-Length", str(len(encoded)))
805 self.end_headers()
806 return f
807
808 def translate_path(self, path):
809 """Translate a /-separated PATH to the local filename syntax.
810
811 Components that mean special things to the local file system
812 (e.g. drive or directory names) are ignored. (XXX They should
813 probably be diagnosed.)
814
815 """
816 # abandon query parameters
817 path = path.split('?',1)[0]
818 path = path.split('#',1)[0]
819 # Don't forget explicit trailing slash when normalizing. Issue17324
820 trailing_slash = path.rstrip().endswith('/')
821 try:
822 path = urllib.parse.unquote(path, errors='surrogatepass')
823 except UnicodeDecodeError:
824 path = urllib.parse.unquote(path)
825 path = posixpath.normpath(path)
826 words = path.split('/')
827 words = filter(None, words)
828 path = self.directory
829 for word in words:
830 if os.path.dirname(word) or word in (os.curdir, os.pardir):
831 # Ignore components that are not a simple file/directory name
832 continue
833 path = os.path.join(path, word)
834 if trailing_slash:
835 path += '/'
836 return path
837
838 def copyfile(self, source, outputfile):
839 """Copy all data between two file objects.
840
841 The SOURCE argument is a file object open for reading
842 (or anything with a read() method) and the DESTINATION
843 argument is a file object open for writing (or
844 anything with a write() method).
845
846 The only reason for overriding this would be to change
847 the block size or perhaps to replace newlines by CRLF
848 -- note however that this the default server uses this
849 to copy binary data as well.
850
851 """
852 shutil.copyfileobj(source, outputfile)
853
854 def guess_type(self, path):
855 """Guess the type of a file.
856
857 Argument is a PATH (a filename).
858
859 Return value is a string of the form type/subtype,
860 usable for a MIME Content-type header.
861
862 The default implementation looks the file's extension
863 up in the table self.extensions_map, using application/octet-stream
864 as a default; however it would be permissible (if
865 slow) to look inside the data to make a better guess.
866
867 """
868
869 base, ext = posixpath.splitext(path)
870 if ext in self.extensions_map:
871 return self.extensions_map[ext]
872 ext = ext.lower()
873 if ext in self.extensions_map:
874 return self.extensions_map[ext]
875 else:
876 return self.extensions_map['']
877
878 if not mimetypes.inited:
879 mimetypes.init() # try to read system mime.types
880 extensions_map = mimetypes.types_map.copy()
881 extensions_map.update({
882 '': 'application/octet-stream', # Default
883 '.py': 'text/plain',
884 '.c': 'text/plain',
885 '.h': 'text/plain',
886 })
887
888
889 # Utilities for CGIHTTPRequestHandler
890
891 def _url_collapse_path(path):
892 """
893 Given a URL path, remove extra '/'s and '.' path elements and collapse
894 any '..' references and returns a collapsed path.
895
896 Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
897 The utility of this function is limited to is_cgi method and helps
898 preventing some security attacks.
899
900 Returns: The reconstituted URL, which will always start with a '/'.
901
902 Raises: IndexError if too many '..' occur within the path.
903
904 """
905 # Query component should not be involved.
906 path, _, query = path.partition('?')
907 path = urllib.parse.unquote(path)
908
909 # Similar to os.path.split(os.path.normpath(path)) but specific to URL
910 # path semantics rather than local operating system semantics.
911 path_parts = path.split('/')
912 head_parts = []
913 for part in path_parts[:-1]:
914 if part == '..':
915 head_parts.pop() # IndexError if more '..' than prior parts
916 elif part and part != '.':
917 head_parts.append( part )
918 if path_parts:
919 tail_part = path_parts.pop()
920 if tail_part:
921 if tail_part == '..':
922 head_parts.pop()
923 tail_part = ''
924 elif tail_part == '.':
925 tail_part = ''
926 else:
927 tail_part = ''
928
929 if query:
930 tail_part = '?'.join((tail_part, query))
931
932 splitpath = ('/' + '/'.join(head_parts), tail_part)
933 collapsed_path = "/".join(splitpath)
934
935 return collapsed_path
936
937
938
939 nobody = None
940
941 def nobody_uid():
942 """Internal routine to get nobody's uid"""
943 global nobody
944 if nobody:
945 return nobody
946 try:
947 import pwd
948 except ImportError:
949 return -1
950 try:
951 nobody = pwd.getpwnam('nobody')[2]
952 except KeyError:
953 nobody = 1 + max(x[2] for x in pwd.getpwall())
954 return nobody
955
956
957 def executable(path):
958 """Test for executable file."""
959 return os.access(path, os.X_OK)
960
961
962 class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
963
964 """Complete HTTP server with GET, HEAD and POST commands.
965
966 GET and HEAD also support running CGI scripts.
967
968 The POST command is *only* implemented for CGI scripts.
969
970 """
971
972 # Determine platform specifics
973 have_fork = hasattr(os, 'fork')
974
975 # Make rfile unbuffered -- we need to read one line and then pass
976 # the rest to a subprocess, so we can't use buffered input.
977 rbufsize = 0
978
979 def do_POST(self):
980 """Serve a POST request.
981
982 This is only implemented for CGI scripts.
983
984 """
985
986 if self.is_cgi():
987 self.run_cgi()
988 else:
989 self.send_error(
990 HTTPStatus.NOT_IMPLEMENTED,
991 "Can only POST to CGI scripts")
992
993 def send_head(self):
994 """Version of send_head that support CGI scripts"""
995 if self.is_cgi():
996 return self.run_cgi()
997 else:
998 return SimpleHTTPRequestHandler.send_head(self)
999
1000 def is_cgi(self):
1001 """Test whether self.path corresponds to a CGI script.
1002
1003 Returns True and updates the cgi_info attribute to the tuple
1004 (dir, rest) if self.path requires running a CGI script.
1005 Returns False otherwise.
1006
1007 If any exception is raised, the caller should assume that
1008 self.path was rejected as invalid and act accordingly.
1009
1010 The default implementation tests whether the normalized url
1011 path begins with one of the strings in self.cgi_directories
1012 (and the next character is a '/' or the end of the string).
1013
1014 """
1015 collapsed_path = _url_collapse_path(self.path)
1016 dir_sep = collapsed_path.find('/', 1)
1017 head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]
1018 if head in self.cgi_directories:
1019 self.cgi_info = head, tail
1020 return True
1021 return False
1022
1023
1024 cgi_directories = ['/cgi-bin', '/htbin']
1025
1026 def is_executable(self, path):
1027 """Test whether argument path is an executable file."""
1028 return executable(path)
1029
1030 def is_python(self, path):
1031 """Test whether argument path is a Python script."""
1032 head, tail = os.path.splitext(path)
1033 return tail.lower() in (".py", ".pyw")
1034
1035 def run_cgi(self):
1036 """Execute a CGI script."""
1037 dir, rest = self.cgi_info
1038 path = dir + '/' + rest
1039 i = path.find('/', len(dir)+1)
1040 while i >= 0:
1041 nextdir = path[:i]
1042 nextrest = path[i+1:]
1043
1044 scriptdir = self.translate_path(nextdir)
1045 if os.path.isdir(scriptdir):
1046 dir, rest = nextdir, nextrest
1047 i = path.find('/', len(dir)+1)
1048 else:
1049 break
1050
1051 # find an explicit query string, if present.
1052 rest, _, query = rest.partition('?')
1053
1054 # dissect the part after the directory name into a script name &
1055 # a possible additional path, to be stored in PATH_INFO.
1056 i = rest.find('/')
1057 if i >= 0:
1058 script, rest = rest[:i], rest[i:]
1059 else:
1060 script, rest = rest, ''
1061
1062 scriptname = dir + '/' + script
1063 scriptfile = self.translate_path(scriptname)
1064 if not os.path.exists(scriptfile):
1065 self.send_error(
1066 HTTPStatus.NOT_FOUND,
1067 "No such CGI script (%r)" % scriptname)
1068 return
1069 if not os.path.isfile(scriptfile):
1070 self.send_error(
1071 HTTPStatus.FORBIDDEN,
1072 "CGI script is not a plain file (%r)" % scriptname)
1073 return
1074 ispy = self.is_python(scriptname)
1075 if self.have_fork or not ispy:
1076 if not self.is_executable(scriptfile):
1077 self.send_error(
1078 HTTPStatus.FORBIDDEN,
1079 "CGI script is not executable (%r)" % scriptname)
1080 return
1081
1082 # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
1083 # XXX Much of the following could be prepared ahead of time!
1084 env = copy.deepcopy(os.environ)
1085 env['SERVER_SOFTWARE'] = self.version_string()
1086 env['SERVER_NAME'] = self.server.server_name
1087 env['GATEWAY_INTERFACE'] = 'CGI/1.1'
1088 env['SERVER_PROTOCOL'] = self.protocol_version
1089 env['SERVER_PORT'] = str(self.server.server_port)
1090 env['REQUEST_METHOD'] = self.command
1091 uqrest = urllib.parse.unquote(rest)
1092 env['PATH_INFO'] = uqrest
1093 env['PATH_TRANSLATED'] = self.translate_path(uqrest)
1094 env['SCRIPT_NAME'] = scriptname
1095 if query:
1096 env['QUERY_STRING'] = query
1097 env['REMOTE_ADDR'] = self.client_address[0]
1098 authorization = self.headers.get("authorization")
1099 if authorization:
1100 authorization = authorization.split()
1101 if len(authorization) == 2:
1102 import base64, binascii
1103 env['AUTH_TYPE'] = authorization[0]
1104 if authorization[0].lower() == "basic":
1105 try:
1106 authorization = authorization[1].encode('ascii')
1107 authorization = base64.decodebytes(authorization).\
1108 decode('ascii')
1109 except (binascii.Error, UnicodeError):
1110 pass
1111 else:
1112 authorization = authorization.split(':')
1113 if len(authorization) == 2:
1114 env['REMOTE_USER'] = authorization[0]
1115 # XXX REMOTE_IDENT
1116 if self.headers.get('content-type') is None:
1117 env['CONTENT_TYPE'] = self.headers.get_content_type()
1118 else:
1119 env['CONTENT_TYPE'] = self.headers['content-type']
1120 length = self.headers.get('content-length')
1121 if length:
1122 env['CONTENT_LENGTH'] = length
1123 referer = self.headers.get('referer')
1124 if referer:
1125 env['HTTP_REFERER'] = referer
1126 accept = []
1127 for line in self.headers.getallmatchingheaders('accept'):
1128 if line[:1] in "\t\n\r ":
1129 accept.append(line.strip())
1130 else:
1131 accept = accept + line[7:].split(',')
1132 env['HTTP_ACCEPT'] = ','.join(accept)
1133 ua = self.headers.get('user-agent')
1134 if ua:
1135 env['HTTP_USER_AGENT'] = ua
1136 co = filter(None, self.headers.get_all('cookie', []))
1137 cookie_str = ', '.join(co)
1138 if cookie_str:
1139 env['HTTP_COOKIE'] = cookie_str
1140 # XXX Other HTTP_* headers
1141 # Since we're setting the env in the parent, provide empty
1142 # values to override previously set values
1143 for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
1144 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
1145 env.setdefault(k, "")
1146
1147 self.send_response(HTTPStatus.OK, "Script output follows")
1148 self.flush_headers()
1149
1150 decoded_query = query.replace('+', ' ')
1151
1152 if self.have_fork:
1153 # Unix -- fork as we should
1154 args = [script]
1155 if '=' not in decoded_query:
1156 args.append(decoded_query)
1157 nobody = nobody_uid()
1158 self.wfile.flush() # Always flush before forking
1159 pid = os.fork()
1160 if pid != 0:
1161 # Parent
1162 pid, sts = os.waitpid(pid, 0)
1163 # throw away additional data [see bug #427345]
1164 while select.select([self.rfile], [], [], 0)[0]:
1165 if not self.rfile.read(1):
1166 break
1167 if sts:
1168 self.log_error("CGI script exit status %#x", sts)
1169 return
1170 # Child
1171 try:
1172 try:
1173 os.setuid(nobody)
1174 except OSError:
1175 pass
1176 os.dup2(self.rfile.fileno(), 0)
1177 os.dup2(self.wfile.fileno(), 1)
1178 os.execve(scriptfile, args, env)
1179 except:
1180 self.server.handle_error(self.request, self.client_address)
1181 os._exit(127)
1182
1183 else:
1184 # Non-Unix -- use subprocess
1185 import subprocess
1186 cmdline = [scriptfile]
1187 if self.is_python(scriptfile):
1188 interp = sys.executable
1189 if interp.lower().endswith("w.exe"):
1190 # On Windows, use python.exe, not pythonw.exe
1191 interp = interp[:-5] + interp[-4:]
1192 cmdline = [interp, '-u'] + cmdline
1193 if '=' not in query:
1194 cmdline.append(query)
1195 self.log_message("command: %s", subprocess.list2cmdline(cmdline))
1196 try:
1197 nbytes = int(length)
1198 except (TypeError, ValueError):
1199 nbytes = 0
1200 p = subprocess.Popen(cmdline,
1201 stdin=subprocess.PIPE,
1202 stdout=subprocess.PIPE,
1203 stderr=subprocess.PIPE,
1204 env = env
1205 )
1206 if self.command.lower() == "post" and nbytes > 0:
1207 data = self.rfile.read(nbytes)
1208 else:
1209 data = None
1210 # throw away additional data [see bug #427345]
1211 while select.select([self.rfile._sock], [], [], 0)[0]:
1212 if not self.rfile._sock.recv(1):
1213 break
1214 stdout, stderr = p.communicate(data)
1215 self.wfile.write(stdout)
1216 if stderr:
1217 self.log_error('%s', stderr)
1218 p.stderr.close()
1219 p.stdout.close()
1220 status = p.returncode
1221 if status:
1222 self.log_error("CGI script exit status %#x", status)
1223 else:
1224 self.log_message("CGI script exited OK")
1225
1226
1227 def _get_best_family(*address):
1228 infos = socket.getaddrinfo(
1229 *address,
1230 type=socket.SOCK_STREAM,
1231 flags=socket.AI_PASSIVE,
1232 )
1233 family, type, proto, canonname, sockaddr = next(iter(infos))
1234 return family, sockaddr
1235
1236
1237 def test(HandlerClass=BaseHTTPRequestHandler,
1238 ServerClass=ThreadingHTTPServer,
1239 protocol="HTTP/1.0", port=8000, bind=None):
1240 """Test the HTTP request handler class.
1241
1242 This runs an HTTP server on port 8000 (or the port argument).
1243
1244 """
1245 ServerClass.address_family, addr = _get_best_family(bind, port)
1246
1247 HandlerClass.protocol_version = protocol
1248 with ServerClass(addr, HandlerClass) as httpd:
1249 host, port = httpd.socket.getsockname()[:2]
1250 url_host = f'[{host}]' if ':' in host else host
1251 print(
1252 f"Serving HTTP on {host} port {port} "
1253 f"(http://{url_host}:{port}/) ..."
1254 )
1255 try:
1256 httpd.serve_forever()
1257 except KeyboardInterrupt:
1258 print("\nKeyboard interrupt received, exiting.")
1259 sys.exit(0)
1260
1261 if __name__ == '__main__':
1262 import argparse
1263
1264 parser = argparse.ArgumentParser()
1265 parser.add_argument('--cgi', action='store_true',
1266 help='Run as CGI Server')
1267 parser.add_argument('--bind', '-b', metavar='ADDRESS',
1268 help='Specify alternate bind address '
1269 '[default: all interfaces]')
1270 parser.add_argument('--directory', '-d', default=os.getcwd(),
1271 help='Specify alternative directory '
1272 '[default:current directory]')
1273 parser.add_argument('port', action='store',
1274 default=8000, type=int,
1275 nargs='?',
1276 help='Specify alternate port [default: 8000]')
1277 args = parser.parse_args()
1278 if args.cgi:
1279 handler_class = CGIHTTPRequestHandler
1280 else:
1281 handler_class = partial(SimpleHTTPRequestHandler,
1282 directory=args.directory)
1283 test(HandlerClass=handler_class, port=args.port, bind=args.bind)