Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/http/cookiejar.py @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 68:5028fdace37b |
---|---|
1 r"""HTTP cookie handling for web clients. | |
2 | |
3 This module has (now fairly distant) origins in Gisle Aas' Perl module | |
4 HTTP::Cookies, from the libwww-perl library. | |
5 | |
6 Docstrings, comments and debug strings in this code refer to the | |
7 attributes of the HTTP cookie system as cookie-attributes, to distinguish | |
8 them clearly from Python attributes. | |
9 | |
10 Class diagram (note that BSDDBCookieJar and the MSIE* classes are not | |
11 distributed with the Python standard library, but are available from | |
12 http://wwwsearch.sf.net/): | |
13 | |
14 CookieJar____ | |
15 / \ \ | |
16 FileCookieJar \ \ | |
17 / | \ \ \ | |
18 MozillaCookieJar | LWPCookieJar \ \ | |
19 | | \ | |
20 | ---MSIEBase | \ | |
21 | / | | \ | |
22 | / MSIEDBCookieJar BSDDBCookieJar | |
23 |/ | |
24 MSIECookieJar | |
25 | |
26 """ | |
27 | |
28 __all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy', | |
29 'FileCookieJar', 'LWPCookieJar', 'LoadError', 'MozillaCookieJar'] | |
30 | |
31 import os | |
32 import copy | |
33 import datetime | |
34 import re | |
35 import time | |
36 import urllib.parse, urllib.request | |
37 import threading as _threading | |
38 import http.client # only for the default HTTP port | |
39 from calendar import timegm | |
40 | |
41 debug = False # set to True to enable debugging via the logging module | |
42 logger = None | |
43 | |
44 def _debug(*args): | |
45 if not debug: | |
46 return | |
47 global logger | |
48 if not logger: | |
49 import logging | |
50 logger = logging.getLogger("http.cookiejar") | |
51 return logger.debug(*args) | |
52 | |
53 | |
54 DEFAULT_HTTP_PORT = str(http.client.HTTP_PORT) | |
55 MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar " | |
56 "instance initialised with one)") | |
57 | |
58 def _warn_unhandled_exception(): | |
59 # There are a few catch-all except: statements in this module, for | |
60 # catching input that's bad in unexpected ways. Warn if any | |
61 # exceptions are caught there. | |
62 import io, warnings, traceback | |
63 f = io.StringIO() | |
64 traceback.print_exc(None, f) | |
65 msg = f.getvalue() | |
66 warnings.warn("http.cookiejar bug!\n%s" % msg, stacklevel=2) | |
67 | |
68 | |
69 # Date/time conversion | |
70 # ----------------------------------------------------------------------------- | |
71 | |
72 EPOCH_YEAR = 1970 | |
73 def _timegm(tt): | |
74 year, month, mday, hour, min, sec = tt[:6] | |
75 if ((year >= EPOCH_YEAR) and (1 <= month <= 12) and (1 <= mday <= 31) and | |
76 (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)): | |
77 return timegm(tt) | |
78 else: | |
79 return None | |
80 | |
81 DAYS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] | |
82 MONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", | |
83 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] | |
84 MONTHS_LOWER = [] | |
85 for month in MONTHS: MONTHS_LOWER.append(month.lower()) | |
86 | |
87 def time2isoz(t=None): | |
88 """Return a string representing time in seconds since epoch, t. | |
89 | |
90 If the function is called without an argument, it will use the current | |
91 time. | |
92 | |
93 The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ", | |
94 representing Universal Time (UTC, aka GMT). An example of this format is: | |
95 | |
96 1994-11-24 08:49:37Z | |
97 | |
98 """ | |
99 if t is None: | |
100 dt = datetime.datetime.utcnow() | |
101 else: | |
102 dt = datetime.datetime.utcfromtimestamp(t) | |
103 return "%04d-%02d-%02d %02d:%02d:%02dZ" % ( | |
104 dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second) | |
105 | |
106 def time2netscape(t=None): | |
107 """Return a string representing time in seconds since epoch, t. | |
108 | |
109 If the function is called without an argument, it will use the current | |
110 time. | |
111 | |
112 The format of the returned string is like this: | |
113 | |
114 Wed, DD-Mon-YYYY HH:MM:SS GMT | |
115 | |
116 """ | |
117 if t is None: | |
118 dt = datetime.datetime.utcnow() | |
119 else: | |
120 dt = datetime.datetime.utcfromtimestamp(t) | |
121 return "%s, %02d-%s-%04d %02d:%02d:%02d GMT" % ( | |
122 DAYS[dt.weekday()], dt.day, MONTHS[dt.month-1], | |
123 dt.year, dt.hour, dt.minute, dt.second) | |
124 | |
125 | |
126 UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None} | |
127 | |
128 TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$", re.ASCII) | |
129 def offset_from_tz_string(tz): | |
130 offset = None | |
131 if tz in UTC_ZONES: | |
132 offset = 0 | |
133 else: | |
134 m = TIMEZONE_RE.search(tz) | |
135 if m: | |
136 offset = 3600 * int(m.group(2)) | |
137 if m.group(3): | |
138 offset = offset + 60 * int(m.group(3)) | |
139 if m.group(1) == '-': | |
140 offset = -offset | |
141 return offset | |
142 | |
143 def _str2time(day, mon, yr, hr, min, sec, tz): | |
144 yr = int(yr) | |
145 if yr > datetime.MAXYEAR: | |
146 return None | |
147 | |
148 # translate month name to number | |
149 # month numbers start with 1 (January) | |
150 try: | |
151 mon = MONTHS_LOWER.index(mon.lower())+1 | |
152 except ValueError: | |
153 # maybe it's already a number | |
154 try: | |
155 imon = int(mon) | |
156 except ValueError: | |
157 return None | |
158 if 1 <= imon <= 12: | |
159 mon = imon | |
160 else: | |
161 return None | |
162 | |
163 # make sure clock elements are defined | |
164 if hr is None: hr = 0 | |
165 if min is None: min = 0 | |
166 if sec is None: sec = 0 | |
167 | |
168 day = int(day) | |
169 hr = int(hr) | |
170 min = int(min) | |
171 sec = int(sec) | |
172 | |
173 if yr < 1000: | |
174 # find "obvious" year | |
175 cur_yr = time.localtime(time.time())[0] | |
176 m = cur_yr % 100 | |
177 tmp = yr | |
178 yr = yr + cur_yr - m | |
179 m = m - tmp | |
180 if abs(m) > 50: | |
181 if m > 0: yr = yr + 100 | |
182 else: yr = yr - 100 | |
183 | |
184 # convert UTC time tuple to seconds since epoch (not timezone-adjusted) | |
185 t = _timegm((yr, mon, day, hr, min, sec, tz)) | |
186 | |
187 if t is not None: | |
188 # adjust time using timezone string, to get absolute time since epoch | |
189 if tz is None: | |
190 tz = "UTC" | |
191 tz = tz.upper() | |
192 offset = offset_from_tz_string(tz) | |
193 if offset is None: | |
194 return None | |
195 t = t - offset | |
196 | |
197 return t | |
198 | |
199 STRICT_DATE_RE = re.compile( | |
200 r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) " | |
201 r"(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$", re.ASCII) | |
202 WEEKDAY_RE = re.compile( | |
203 r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I | re.ASCII) | |
204 LOOSE_HTTP_DATE_RE = re.compile( | |
205 r"""^ | |
206 (\d\d?) # day | |
207 (?:\s+|[-\/]) | |
208 (\w+) # month | |
209 (?:\s+|[-\/]) | |
210 (\d+) # year | |
211 (?: | |
212 (?:\s+|:) # separator before clock | |
213 (\d\d?):(\d\d) # hour:min | |
214 (?::(\d\d))? # optional seconds | |
215 )? # optional clock | |
216 \s* | |
217 (?: | |
218 ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+) # timezone | |
219 \s* | |
220 )? | |
221 (?: | |
222 \(\w+\) # ASCII representation of timezone in parens. | |
223 \s* | |
224 )?$""", re.X | re.ASCII) | |
225 def http2time(text): | |
226 """Returns time in seconds since epoch of time represented by a string. | |
227 | |
228 Return value is an integer. | |
229 | |
230 None is returned if the format of str is unrecognized, the time is outside | |
231 the representable range, or the timezone string is not recognized. If the | |
232 string contains no timezone, UTC is assumed. | |
233 | |
234 The timezone in the string may be numerical (like "-0800" or "+0100") or a | |
235 string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the | |
236 timezone strings equivalent to UTC (zero offset) are known to the function. | |
237 | |
238 The function loosely parses the following formats: | |
239 | |
240 Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format | |
241 Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format | |
242 Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format | |
243 09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday) | |
244 08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday) | |
245 08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday) | |
246 | |
247 The parser ignores leading and trailing whitespace. The time may be | |
248 absent. | |
249 | |
250 If the year is given with only 2 digits, the function will select the | |
251 century that makes the year closest to the current date. | |
252 | |
253 """ | |
254 # fast exit for strictly conforming string | |
255 m = STRICT_DATE_RE.search(text) | |
256 if m: | |
257 g = m.groups() | |
258 mon = MONTHS_LOWER.index(g[1].lower()) + 1 | |
259 tt = (int(g[2]), mon, int(g[0]), | |
260 int(g[3]), int(g[4]), float(g[5])) | |
261 return _timegm(tt) | |
262 | |
263 # No, we need some messy parsing... | |
264 | |
265 # clean up | |
266 text = text.lstrip() | |
267 text = WEEKDAY_RE.sub("", text, 1) # Useless weekday | |
268 | |
269 # tz is time zone specifier string | |
270 day, mon, yr, hr, min, sec, tz = [None]*7 | |
271 | |
272 # loose regexp parse | |
273 m = LOOSE_HTTP_DATE_RE.search(text) | |
274 if m is not None: | |
275 day, mon, yr, hr, min, sec, tz = m.groups() | |
276 else: | |
277 return None # bad format | |
278 | |
279 return _str2time(day, mon, yr, hr, min, sec, tz) | |
280 | |
281 ISO_DATE_RE = re.compile( | |
282 r"""^ | |
283 (\d{4}) # year | |
284 [-\/]? | |
285 (\d\d?) # numerical month | |
286 [-\/]? | |
287 (\d\d?) # day | |
288 (?: | |
289 (?:\s+|[-:Tt]) # separator before clock | |
290 (\d\d?):?(\d\d) # hour:min | |
291 (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional) | |
292 )? # optional clock | |
293 \s* | |
294 (?: | |
295 ([-+]?\d\d?:?(:?\d\d)? | |
296 |Z|z) # timezone (Z is "zero meridian", i.e. GMT) | |
297 \s* | |
298 )?$""", re.X | re. ASCII) | |
299 def iso2time(text): | |
300 """ | |
301 As for http2time, but parses the ISO 8601 formats: | |
302 | |
303 1994-02-03 14:15:29 -0100 -- ISO 8601 format | |
304 1994-02-03 14:15:29 -- zone is optional | |
305 1994-02-03 -- only date | |
306 1994-02-03T14:15:29 -- Use T as separator | |
307 19940203T141529Z -- ISO 8601 compact format | |
308 19940203 -- only date | |
309 | |
310 """ | |
311 # clean up | |
312 text = text.lstrip() | |
313 | |
314 # tz is time zone specifier string | |
315 day, mon, yr, hr, min, sec, tz = [None]*7 | |
316 | |
317 # loose regexp parse | |
318 m = ISO_DATE_RE.search(text) | |
319 if m is not None: | |
320 # XXX there's an extra bit of the timezone I'm ignoring here: is | |
321 # this the right thing to do? | |
322 yr, mon, day, hr, min, sec, tz, _ = m.groups() | |
323 else: | |
324 return None # bad format | |
325 | |
326 return _str2time(day, mon, yr, hr, min, sec, tz) | |
327 | |
328 | |
329 # Header parsing | |
330 # ----------------------------------------------------------------------------- | |
331 | |
332 def unmatched(match): | |
333 """Return unmatched part of re.Match object.""" | |
334 start, end = match.span(0) | |
335 return match.string[:start]+match.string[end:] | |
336 | |
337 HEADER_TOKEN_RE = re.compile(r"^\s*([^=\s;,]+)") | |
338 HEADER_QUOTED_VALUE_RE = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"") | |
339 HEADER_VALUE_RE = re.compile(r"^\s*=\s*([^\s;,]*)") | |
340 HEADER_ESCAPE_RE = re.compile(r"\\(.)") | |
341 def split_header_words(header_values): | |
342 r"""Parse header values into a list of lists containing key,value pairs. | |
343 | |
344 The function knows how to deal with ",", ";" and "=" as well as quoted | |
345 values after "=". A list of space separated tokens are parsed as if they | |
346 were separated by ";". | |
347 | |
348 If the header_values passed as argument contains multiple values, then they | |
349 are treated as if they were a single value separated by comma ",". | |
350 | |
351 This means that this function is useful for parsing header fields that | |
352 follow this syntax (BNF as from the HTTP/1.1 specification, but we relax | |
353 the requirement for tokens). | |
354 | |
355 headers = #header | |
356 header = (token | parameter) *( [";"] (token | parameter)) | |
357 | |
358 token = 1*<any CHAR except CTLs or separators> | |
359 separators = "(" | ")" | "<" | ">" | "@" | |
360 | "," | ";" | ":" | "\" | <"> | |
361 | "/" | "[" | "]" | "?" | "=" | |
362 | "{" | "}" | SP | HT | |
363 | |
364 quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) | |
365 qdtext = <any TEXT except <">> | |
366 quoted-pair = "\" CHAR | |
367 | |
368 parameter = attribute "=" value | |
369 attribute = token | |
370 value = token | quoted-string | |
371 | |
372 Each header is represented by a list of key/value pairs. The value for a | |
373 simple token (not part of a parameter) is None. Syntactically incorrect | |
374 headers will not necessarily be parsed as you would want. | |
375 | |
376 This is easier to describe with some examples: | |
377 | |
378 >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz']) | |
379 [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]] | |
380 >>> split_header_words(['text/html; charset="iso-8859-1"']) | |
381 [[('text/html', None), ('charset', 'iso-8859-1')]] | |
382 >>> split_header_words([r'Basic realm="\"foo\bar\""']) | |
383 [[('Basic', None), ('realm', '"foobar"')]] | |
384 | |
385 """ | |
386 assert not isinstance(header_values, str) | |
387 result = [] | |
388 for text in header_values: | |
389 orig_text = text | |
390 pairs = [] | |
391 while text: | |
392 m = HEADER_TOKEN_RE.search(text) | |
393 if m: | |
394 text = unmatched(m) | |
395 name = m.group(1) | |
396 m = HEADER_QUOTED_VALUE_RE.search(text) | |
397 if m: # quoted value | |
398 text = unmatched(m) | |
399 value = m.group(1) | |
400 value = HEADER_ESCAPE_RE.sub(r"\1", value) | |
401 else: | |
402 m = HEADER_VALUE_RE.search(text) | |
403 if m: # unquoted value | |
404 text = unmatched(m) | |
405 value = m.group(1) | |
406 value = value.rstrip() | |
407 else: | |
408 # no value, a lone token | |
409 value = None | |
410 pairs.append((name, value)) | |
411 elif text.lstrip().startswith(","): | |
412 # concatenated headers, as per RFC 2616 section 4.2 | |
413 text = text.lstrip()[1:] | |
414 if pairs: result.append(pairs) | |
415 pairs = [] | |
416 else: | |
417 # skip junk | |
418 non_junk, nr_junk_chars = re.subn(r"^[=\s;]*", "", text) | |
419 assert nr_junk_chars > 0, ( | |
420 "split_header_words bug: '%s', '%s', %s" % | |
421 (orig_text, text, pairs)) | |
422 text = non_junk | |
423 if pairs: result.append(pairs) | |
424 return result | |
425 | |
426 HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])") | |
427 def join_header_words(lists): | |
428 """Do the inverse (almost) of the conversion done by split_header_words. | |
429 | |
430 Takes a list of lists of (key, value) pairs and produces a single header | |
431 value. Attribute values are quoted if needed. | |
432 | |
433 >>> join_header_words([[("text/plain", None), ("charset", "iso-8859-1")]]) | |
434 'text/plain; charset="iso-8859-1"' | |
435 >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859-1")]]) | |
436 'text/plain, charset="iso-8859-1"' | |
437 | |
438 """ | |
439 headers = [] | |
440 for pairs in lists: | |
441 attr = [] | |
442 for k, v in pairs: | |
443 if v is not None: | |
444 if not re.search(r"^\w+$", v): | |
445 v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v) # escape " and \ | |
446 v = '"%s"' % v | |
447 k = "%s=%s" % (k, v) | |
448 attr.append(k) | |
449 if attr: headers.append("; ".join(attr)) | |
450 return ", ".join(headers) | |
451 | |
452 def strip_quotes(text): | |
453 if text.startswith('"'): | |
454 text = text[1:] | |
455 if text.endswith('"'): | |
456 text = text[:-1] | |
457 return text | |
458 | |
459 def parse_ns_headers(ns_headers): | |
460 """Ad-hoc parser for Netscape protocol cookie-attributes. | |
461 | |
462 The old Netscape cookie format for Set-Cookie can for instance contain | |
463 an unquoted "," in the expires field, so we have to use this ad-hoc | |
464 parser instead of split_header_words. | |
465 | |
466 XXX This may not make the best possible effort to parse all the crap | |
467 that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient | |
468 parser is probably better, so could do worse than following that if | |
469 this ever gives any trouble. | |
470 | |
471 Currently, this is also used for parsing RFC 2109 cookies. | |
472 | |
473 """ | |
474 known_attrs = ("expires", "domain", "path", "secure", | |
475 # RFC 2109 attrs (may turn up in Netscape cookies, too) | |
476 "version", "port", "max-age") | |
477 | |
478 result = [] | |
479 for ns_header in ns_headers: | |
480 pairs = [] | |
481 version_set = False | |
482 | |
483 # XXX: The following does not strictly adhere to RFCs in that empty | |
484 # names and values are legal (the former will only appear once and will | |
485 # be overwritten if multiple occurrences are present). This is | |
486 # mostly to deal with backwards compatibility. | |
487 for ii, param in enumerate(ns_header.split(';')): | |
488 param = param.strip() | |
489 | |
490 key, sep, val = param.partition('=') | |
491 key = key.strip() | |
492 | |
493 if not key: | |
494 if ii == 0: | |
495 break | |
496 else: | |
497 continue | |
498 | |
499 # allow for a distinction between present and empty and missing | |
500 # altogether | |
501 val = val.strip() if sep else None | |
502 | |
503 if ii != 0: | |
504 lc = key.lower() | |
505 if lc in known_attrs: | |
506 key = lc | |
507 | |
508 if key == "version": | |
509 # This is an RFC 2109 cookie. | |
510 if val is not None: | |
511 val = strip_quotes(val) | |
512 version_set = True | |
513 elif key == "expires": | |
514 # convert expires date to seconds since epoch | |
515 if val is not None: | |
516 val = http2time(strip_quotes(val)) # None if invalid | |
517 pairs.append((key, val)) | |
518 | |
519 if pairs: | |
520 if not version_set: | |
521 pairs.append(("version", "0")) | |
522 result.append(pairs) | |
523 | |
524 return result | |
525 | |
526 | |
527 IPV4_RE = re.compile(r"\.\d+$", re.ASCII) | |
528 def is_HDN(text): | |
529 """Return True if text is a host domain name.""" | |
530 # XXX | |
531 # This may well be wrong. Which RFC is HDN defined in, if any (for | |
532 # the purposes of RFC 2965)? | |
533 # For the current implementation, what about IPv6? Remember to look | |
534 # at other uses of IPV4_RE also, if change this. | |
535 if IPV4_RE.search(text): | |
536 return False | |
537 if text == "": | |
538 return False | |
539 if text[0] == "." or text[-1] == ".": | |
540 return False | |
541 return True | |
542 | |
543 def domain_match(A, B): | |
544 """Return True if domain A domain-matches domain B, according to RFC 2965. | |
545 | |
546 A and B may be host domain names or IP addresses. | |
547 | |
548 RFC 2965, section 1: | |
549 | |
550 Host names can be specified either as an IP address or a HDN string. | |
551 Sometimes we compare one host name with another. (Such comparisons SHALL | |
552 be case-insensitive.) Host A's name domain-matches host B's if | |
553 | |
554 * their host name strings string-compare equal; or | |
555 | |
556 * A is a HDN string and has the form NB, where N is a non-empty | |
557 name string, B has the form .B', and B' is a HDN string. (So, | |
558 x.y.com domain-matches .Y.com but not Y.com.) | |
559 | |
560 Note that domain-match is not a commutative operation: a.b.c.com | |
561 domain-matches .c.com, but not the reverse. | |
562 | |
563 """ | |
564 # Note that, if A or B are IP addresses, the only relevant part of the | |
565 # definition of the domain-match algorithm is the direct string-compare. | |
566 A = A.lower() | |
567 B = B.lower() | |
568 if A == B: | |
569 return True | |
570 if not is_HDN(A): | |
571 return False | |
572 i = A.rfind(B) | |
573 if i == -1 or i == 0: | |
574 # A does not have form NB, or N is the empty string | |
575 return False | |
576 if not B.startswith("."): | |
577 return False | |
578 if not is_HDN(B[1:]): | |
579 return False | |
580 return True | |
581 | |
582 def liberal_is_HDN(text): | |
583 """Return True if text is a sort-of-like a host domain name. | |
584 | |
585 For accepting/blocking domains. | |
586 | |
587 """ | |
588 if IPV4_RE.search(text): | |
589 return False | |
590 return True | |
591 | |
592 def user_domain_match(A, B): | |
593 """For blocking/accepting domains. | |
594 | |
595 A and B may be host domain names or IP addresses. | |
596 | |
597 """ | |
598 A = A.lower() | |
599 B = B.lower() | |
600 if not (liberal_is_HDN(A) and liberal_is_HDN(B)): | |
601 if A == B: | |
602 # equal IP addresses | |
603 return True | |
604 return False | |
605 initial_dot = B.startswith(".") | |
606 if initial_dot and A.endswith(B): | |
607 return True | |
608 if not initial_dot and A == B: | |
609 return True | |
610 return False | |
611 | |
612 cut_port_re = re.compile(r":\d+$", re.ASCII) | |
613 def request_host(request): | |
614 """Return request-host, as defined by RFC 2965. | |
615 | |
616 Variation from RFC: returned value is lowercased, for convenient | |
617 comparison. | |
618 | |
619 """ | |
620 url = request.get_full_url() | |
621 host = urllib.parse.urlparse(url)[1] | |
622 if host == "": | |
623 host = request.get_header("Host", "") | |
624 | |
625 # remove port, if present | |
626 host = cut_port_re.sub("", host, 1) | |
627 return host.lower() | |
628 | |
629 def eff_request_host(request): | |
630 """Return a tuple (request-host, effective request-host name). | |
631 | |
632 As defined by RFC 2965, except both are lowercased. | |
633 | |
634 """ | |
635 erhn = req_host = request_host(request) | |
636 if req_host.find(".") == -1 and not IPV4_RE.search(req_host): | |
637 erhn = req_host + ".local" | |
638 return req_host, erhn | |
639 | |
640 def request_path(request): | |
641 """Path component of request-URI, as defined by RFC 2965.""" | |
642 url = request.get_full_url() | |
643 parts = urllib.parse.urlsplit(url) | |
644 path = escape_path(parts.path) | |
645 if not path.startswith("/"): | |
646 # fix bad RFC 2396 absoluteURI | |
647 path = "/" + path | |
648 return path | |
649 | |
650 def request_port(request): | |
651 host = request.host | |
652 i = host.find(':') | |
653 if i >= 0: | |
654 port = host[i+1:] | |
655 try: | |
656 int(port) | |
657 except ValueError: | |
658 _debug("nonnumeric port: '%s'", port) | |
659 return None | |
660 else: | |
661 port = DEFAULT_HTTP_PORT | |
662 return port | |
663 | |
664 # Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't | |
665 # need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738). | |
666 HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()" | |
667 ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])") | |
668 def uppercase_escaped_char(match): | |
669 return "%%%s" % match.group(1).upper() | |
670 def escape_path(path): | |
671 """Escape any invalid characters in HTTP URL, and uppercase all escapes.""" | |
672 # There's no knowing what character encoding was used to create URLs | |
673 # containing %-escapes, but since we have to pick one to escape invalid | |
674 # path characters, we pick UTF-8, as recommended in the HTML 4.0 | |
675 # specification: | |
676 # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1 | |
677 # And here, kind of: draft-fielding-uri-rfc2396bis-03 | |
678 # (And in draft IRI specification: draft-duerst-iri-05) | |
679 # (And here, for new URI schemes: RFC 2718) | |
680 path = urllib.parse.quote(path, HTTP_PATH_SAFE) | |
681 path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path) | |
682 return path | |
683 | |
684 def reach(h): | |
685 """Return reach of host h, as defined by RFC 2965, section 1. | |
686 | |
687 The reach R of a host name H is defined as follows: | |
688 | |
689 * If | |
690 | |
691 - H is the host domain name of a host; and, | |
692 | |
693 - H has the form A.B; and | |
694 | |
695 - A has no embedded (that is, interior) dots; and | |
696 | |
697 - B has at least one embedded dot, or B is the string "local". | |
698 then the reach of H is .B. | |
699 | |
700 * Otherwise, the reach of H is H. | |
701 | |
702 >>> reach("www.acme.com") | |
703 '.acme.com' | |
704 >>> reach("acme.com") | |
705 'acme.com' | |
706 >>> reach("acme.local") | |
707 '.local' | |
708 | |
709 """ | |
710 i = h.find(".") | |
711 if i >= 0: | |
712 #a = h[:i] # this line is only here to show what a is | |
713 b = h[i+1:] | |
714 i = b.find(".") | |
715 if is_HDN(h) and (i >= 0 or b == "local"): | |
716 return "."+b | |
717 return h | |
718 | |
719 def is_third_party(request): | |
720 """ | |
721 | |
722 RFC 2965, section 3.3.6: | |
723 | |
724 An unverifiable transaction is to a third-party host if its request- | |
725 host U does not domain-match the reach R of the request-host O in the | |
726 origin transaction. | |
727 | |
728 """ | |
729 req_host = request_host(request) | |
730 if not domain_match(req_host, reach(request.origin_req_host)): | |
731 return True | |
732 else: | |
733 return False | |
734 | |
735 | |
736 class Cookie: | |
737 """HTTP Cookie. | |
738 | |
739 This class represents both Netscape and RFC 2965 cookies. | |
740 | |
741 This is deliberately a very simple class. It just holds attributes. It's | |
742 possible to construct Cookie instances that don't comply with the cookie | |
743 standards. CookieJar.make_cookies is the factory function for Cookie | |
744 objects -- it deals with cookie parsing, supplying defaults, and | |
745 normalising to the representation used in this class. CookiePolicy is | |
746 responsible for checking them to see whether they should be accepted from | |
747 and returned to the server. | |
748 | |
749 Note that the port may be present in the headers, but unspecified ("Port" | |
750 rather than"Port=80", for example); if this is the case, port is None. | |
751 | |
752 """ | |
753 | |
754 def __init__(self, version, name, value, | |
755 port, port_specified, | |
756 domain, domain_specified, domain_initial_dot, | |
757 path, path_specified, | |
758 secure, | |
759 expires, | |
760 discard, | |
761 comment, | |
762 comment_url, | |
763 rest, | |
764 rfc2109=False, | |
765 ): | |
766 | |
767 if version is not None: version = int(version) | |
768 if expires is not None: expires = int(float(expires)) | |
769 if port is None and port_specified is True: | |
770 raise ValueError("if port is None, port_specified must be false") | |
771 | |
772 self.version = version | |
773 self.name = name | |
774 self.value = value | |
775 self.port = port | |
776 self.port_specified = port_specified | |
777 # normalise case, as per RFC 2965 section 3.3.3 | |
778 self.domain = domain.lower() | |
779 self.domain_specified = domain_specified | |
780 # Sigh. We need to know whether the domain given in the | |
781 # cookie-attribute had an initial dot, in order to follow RFC 2965 | |
782 # (as clarified in draft errata). Needed for the returned $Domain | |
783 # value. | |
784 self.domain_initial_dot = domain_initial_dot | |
785 self.path = path | |
786 self.path_specified = path_specified | |
787 self.secure = secure | |
788 self.expires = expires | |
789 self.discard = discard | |
790 self.comment = comment | |
791 self.comment_url = comment_url | |
792 self.rfc2109 = rfc2109 | |
793 | |
794 self._rest = copy.copy(rest) | |
795 | |
796 def has_nonstandard_attr(self, name): | |
797 return name in self._rest | |
798 def get_nonstandard_attr(self, name, default=None): | |
799 return self._rest.get(name, default) | |
800 def set_nonstandard_attr(self, name, value): | |
801 self._rest[name] = value | |
802 | |
803 def is_expired(self, now=None): | |
804 if now is None: now = time.time() | |
805 if (self.expires is not None) and (self.expires <= now): | |
806 return True | |
807 return False | |
808 | |
809 def __str__(self): | |
810 if self.port is None: p = "" | |
811 else: p = ":"+self.port | |
812 limit = self.domain + p + self.path | |
813 if self.value is not None: | |
814 namevalue = "%s=%s" % (self.name, self.value) | |
815 else: | |
816 namevalue = self.name | |
817 return "<Cookie %s for %s>" % (namevalue, limit) | |
818 | |
819 def __repr__(self): | |
820 args = [] | |
821 for name in ("version", "name", "value", | |
822 "port", "port_specified", | |
823 "domain", "domain_specified", "domain_initial_dot", | |
824 "path", "path_specified", | |
825 "secure", "expires", "discard", "comment", "comment_url", | |
826 ): | |
827 attr = getattr(self, name) | |
828 args.append("%s=%s" % (name, repr(attr))) | |
829 args.append("rest=%s" % repr(self._rest)) | |
830 args.append("rfc2109=%s" % repr(self.rfc2109)) | |
831 return "%s(%s)" % (self.__class__.__name__, ", ".join(args)) | |
832 | |
833 | |
834 class CookiePolicy: | |
835 """Defines which cookies get accepted from and returned to server. | |
836 | |
837 May also modify cookies, though this is probably a bad idea. | |
838 | |
839 The subclass DefaultCookiePolicy defines the standard rules for Netscape | |
840 and RFC 2965 cookies -- override that if you want a customized policy. | |
841 | |
842 """ | |
843 def set_ok(self, cookie, request): | |
844 """Return true if (and only if) cookie should be accepted from server. | |
845 | |
846 Currently, pre-expired cookies never get this far -- the CookieJar | |
847 class deletes such cookies itself. | |
848 | |
849 """ | |
850 raise NotImplementedError() | |
851 | |
852 def return_ok(self, cookie, request): | |
853 """Return true if (and only if) cookie should be returned to server.""" | |
854 raise NotImplementedError() | |
855 | |
856 def domain_return_ok(self, domain, request): | |
857 """Return false if cookies should not be returned, given cookie domain. | |
858 """ | |
859 return True | |
860 | |
861 def path_return_ok(self, path, request): | |
862 """Return false if cookies should not be returned, given cookie path. | |
863 """ | |
864 return True | |
865 | |
866 | |
867 class DefaultCookiePolicy(CookiePolicy): | |
868 """Implements the standard rules for accepting and returning cookies.""" | |
869 | |
870 DomainStrictNoDots = 1 | |
871 DomainStrictNonDomain = 2 | |
872 DomainRFC2965Match = 4 | |
873 | |
874 DomainLiberal = 0 | |
875 DomainStrict = DomainStrictNoDots|DomainStrictNonDomain | |
876 | |
877 def __init__(self, | |
878 blocked_domains=None, allowed_domains=None, | |
879 netscape=True, rfc2965=False, | |
880 rfc2109_as_netscape=None, | |
881 hide_cookie2=False, | |
882 strict_domain=False, | |
883 strict_rfc2965_unverifiable=True, | |
884 strict_ns_unverifiable=False, | |
885 strict_ns_domain=DomainLiberal, | |
886 strict_ns_set_initial_dollar=False, | |
887 strict_ns_set_path=False, | |
888 secure_protocols=("https", "wss") | |
889 ): | |
890 """Constructor arguments should be passed as keyword arguments only.""" | |
891 self.netscape = netscape | |
892 self.rfc2965 = rfc2965 | |
893 self.rfc2109_as_netscape = rfc2109_as_netscape | |
894 self.hide_cookie2 = hide_cookie2 | |
895 self.strict_domain = strict_domain | |
896 self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable | |
897 self.strict_ns_unverifiable = strict_ns_unverifiable | |
898 self.strict_ns_domain = strict_ns_domain | |
899 self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar | |
900 self.strict_ns_set_path = strict_ns_set_path | |
901 self.secure_protocols = secure_protocols | |
902 | |
903 if blocked_domains is not None: | |
904 self._blocked_domains = tuple(blocked_domains) | |
905 else: | |
906 self._blocked_domains = () | |
907 | |
908 if allowed_domains is not None: | |
909 allowed_domains = tuple(allowed_domains) | |
910 self._allowed_domains = allowed_domains | |
911 | |
912 def blocked_domains(self): | |
913 """Return the sequence of blocked domains (as a tuple).""" | |
914 return self._blocked_domains | |
915 def set_blocked_domains(self, blocked_domains): | |
916 """Set the sequence of blocked domains.""" | |
917 self._blocked_domains = tuple(blocked_domains) | |
918 | |
919 def is_blocked(self, domain): | |
920 for blocked_domain in self._blocked_domains: | |
921 if user_domain_match(domain, blocked_domain): | |
922 return True | |
923 return False | |
924 | |
925 def allowed_domains(self): | |
926 """Return None, or the sequence of allowed domains (as a tuple).""" | |
927 return self._allowed_domains | |
928 def set_allowed_domains(self, allowed_domains): | |
929 """Set the sequence of allowed domains, or None.""" | |
930 if allowed_domains is not None: | |
931 allowed_domains = tuple(allowed_domains) | |
932 self._allowed_domains = allowed_domains | |
933 | |
934 def is_not_allowed(self, domain): | |
935 if self._allowed_domains is None: | |
936 return False | |
937 for allowed_domain in self._allowed_domains: | |
938 if user_domain_match(domain, allowed_domain): | |
939 return False | |
940 return True | |
941 | |
942 def set_ok(self, cookie, request): | |
943 """ | |
944 If you override .set_ok(), be sure to call this method. If it returns | |
945 false, so should your subclass (assuming your subclass wants to be more | |
946 strict about which cookies to accept). | |
947 | |
948 """ | |
949 _debug(" - checking cookie %s=%s", cookie.name, cookie.value) | |
950 | |
951 assert cookie.name is not None | |
952 | |
953 for n in "version", "verifiability", "name", "path", "domain", "port": | |
954 fn_name = "set_ok_"+n | |
955 fn = getattr(self, fn_name) | |
956 if not fn(cookie, request): | |
957 return False | |
958 | |
959 return True | |
960 | |
961 def set_ok_version(self, cookie, request): | |
962 if cookie.version is None: | |
963 # Version is always set to 0 by parse_ns_headers if it's a Netscape | |
964 # cookie, so this must be an invalid RFC 2965 cookie. | |
965 _debug(" Set-Cookie2 without version attribute (%s=%s)", | |
966 cookie.name, cookie.value) | |
967 return False | |
968 if cookie.version > 0 and not self.rfc2965: | |
969 _debug(" RFC 2965 cookies are switched off") | |
970 return False | |
971 elif cookie.version == 0 and not self.netscape: | |
972 _debug(" Netscape cookies are switched off") | |
973 return False | |
974 return True | |
975 | |
976 def set_ok_verifiability(self, cookie, request): | |
977 if request.unverifiable and is_third_party(request): | |
978 if cookie.version > 0 and self.strict_rfc2965_unverifiable: | |
979 _debug(" third-party RFC 2965 cookie during " | |
980 "unverifiable transaction") | |
981 return False | |
982 elif cookie.version == 0 and self.strict_ns_unverifiable: | |
983 _debug(" third-party Netscape cookie during " | |
984 "unverifiable transaction") | |
985 return False | |
986 return True | |
987 | |
988 def set_ok_name(self, cookie, request): | |
989 # Try and stop servers setting V0 cookies designed to hack other | |
990 # servers that know both V0 and V1 protocols. | |
991 if (cookie.version == 0 and self.strict_ns_set_initial_dollar and | |
992 cookie.name.startswith("$")): | |
993 _debug(" illegal name (starts with '$'): '%s'", cookie.name) | |
994 return False | |
995 return True | |
996 | |
997 def set_ok_path(self, cookie, request): | |
998 if cookie.path_specified: | |
999 req_path = request_path(request) | |
1000 if ((cookie.version > 0 or | |
1001 (cookie.version == 0 and self.strict_ns_set_path)) and | |
1002 not self.path_return_ok(cookie.path, request)): | |
1003 _debug(" path attribute %s is not a prefix of request " | |
1004 "path %s", cookie.path, req_path) | |
1005 return False | |
1006 return True | |
1007 | |
1008 def set_ok_domain(self, cookie, request): | |
1009 if self.is_blocked(cookie.domain): | |
1010 _debug(" domain %s is in user block-list", cookie.domain) | |
1011 return False | |
1012 if self.is_not_allowed(cookie.domain): | |
1013 _debug(" domain %s is not in user allow-list", cookie.domain) | |
1014 return False | |
1015 if cookie.domain_specified: | |
1016 req_host, erhn = eff_request_host(request) | |
1017 domain = cookie.domain | |
1018 if self.strict_domain and (domain.count(".") >= 2): | |
1019 # XXX This should probably be compared with the Konqueror | |
1020 # (kcookiejar.cpp) and Mozilla implementations, but it's a | |
1021 # losing battle. | |
1022 i = domain.rfind(".") | |
1023 j = domain.rfind(".", 0, i) | |
1024 if j == 0: # domain like .foo.bar | |
1025 tld = domain[i+1:] | |
1026 sld = domain[j+1:i] | |
1027 if sld.lower() in ("co", "ac", "com", "edu", "org", "net", | |
1028 "gov", "mil", "int", "aero", "biz", "cat", "coop", | |
1029 "info", "jobs", "mobi", "museum", "name", "pro", | |
1030 "travel", "eu") and len(tld) == 2: | |
1031 # domain like .co.uk | |
1032 _debug(" country-code second level domain %s", domain) | |
1033 return False | |
1034 if domain.startswith("."): | |
1035 undotted_domain = domain[1:] | |
1036 else: | |
1037 undotted_domain = domain | |
1038 embedded_dots = (undotted_domain.find(".") >= 0) | |
1039 if not embedded_dots and domain != ".local": | |
1040 _debug(" non-local domain %s contains no embedded dot", | |
1041 domain) | |
1042 return False | |
1043 if cookie.version == 0: | |
1044 if (not erhn.endswith(domain) and | |
1045 (not erhn.startswith(".") and | |
1046 not ("."+erhn).endswith(domain))): | |
1047 _debug(" effective request-host %s (even with added " | |
1048 "initial dot) does not end with %s", | |
1049 erhn, domain) | |
1050 return False | |
1051 if (cookie.version > 0 or | |
1052 (self.strict_ns_domain & self.DomainRFC2965Match)): | |
1053 if not domain_match(erhn, domain): | |
1054 _debug(" effective request-host %s does not domain-match " | |
1055 "%s", erhn, domain) | |
1056 return False | |
1057 if (cookie.version > 0 or | |
1058 (self.strict_ns_domain & self.DomainStrictNoDots)): | |
1059 host_prefix = req_host[:-len(domain)] | |
1060 if (host_prefix.find(".") >= 0 and | |
1061 not IPV4_RE.search(req_host)): | |
1062 _debug(" host prefix %s for domain %s contains a dot", | |
1063 host_prefix, domain) | |
1064 return False | |
1065 return True | |
1066 | |
1067 def set_ok_port(self, cookie, request): | |
1068 if cookie.port_specified: | |
1069 req_port = request_port(request) | |
1070 if req_port is None: | |
1071 req_port = "80" | |
1072 else: | |
1073 req_port = str(req_port) | |
1074 for p in cookie.port.split(","): | |
1075 try: | |
1076 int(p) | |
1077 except ValueError: | |
1078 _debug(" bad port %s (not numeric)", p) | |
1079 return False | |
1080 if p == req_port: | |
1081 break | |
1082 else: | |
1083 _debug(" request port (%s) not found in %s", | |
1084 req_port, cookie.port) | |
1085 return False | |
1086 return True | |
1087 | |
1088 def return_ok(self, cookie, request): | |
1089 """ | |
1090 If you override .return_ok(), be sure to call this method. If it | |
1091 returns false, so should your subclass (assuming your subclass wants to | |
1092 be more strict about which cookies to return). | |
1093 | |
1094 """ | |
1095 # Path has already been checked by .path_return_ok(), and domain | |
1096 # blocking done by .domain_return_ok(). | |
1097 _debug(" - checking cookie %s=%s", cookie.name, cookie.value) | |
1098 | |
1099 for n in "version", "verifiability", "secure", "expires", "port", "domain": | |
1100 fn_name = "return_ok_"+n | |
1101 fn = getattr(self, fn_name) | |
1102 if not fn(cookie, request): | |
1103 return False | |
1104 return True | |
1105 | |
1106 def return_ok_version(self, cookie, request): | |
1107 if cookie.version > 0 and not self.rfc2965: | |
1108 _debug(" RFC 2965 cookies are switched off") | |
1109 return False | |
1110 elif cookie.version == 0 and not self.netscape: | |
1111 _debug(" Netscape cookies are switched off") | |
1112 return False | |
1113 return True | |
1114 | |
1115 def return_ok_verifiability(self, cookie, request): | |
1116 if request.unverifiable and is_third_party(request): | |
1117 if cookie.version > 0 and self.strict_rfc2965_unverifiable: | |
1118 _debug(" third-party RFC 2965 cookie during unverifiable " | |
1119 "transaction") | |
1120 return False | |
1121 elif cookie.version == 0 and self.strict_ns_unverifiable: | |
1122 _debug(" third-party Netscape cookie during unverifiable " | |
1123 "transaction") | |
1124 return False | |
1125 return True | |
1126 | |
1127 def return_ok_secure(self, cookie, request): | |
1128 if cookie.secure and request.type not in self.secure_protocols: | |
1129 _debug(" secure cookie with non-secure request") | |
1130 return False | |
1131 return True | |
1132 | |
1133 def return_ok_expires(self, cookie, request): | |
1134 if cookie.is_expired(self._now): | |
1135 _debug(" cookie expired") | |
1136 return False | |
1137 return True | |
1138 | |
1139 def return_ok_port(self, cookie, request): | |
1140 if cookie.port: | |
1141 req_port = request_port(request) | |
1142 if req_port is None: | |
1143 req_port = "80" | |
1144 for p in cookie.port.split(","): | |
1145 if p == req_port: | |
1146 break | |
1147 else: | |
1148 _debug(" request port %s does not match cookie port %s", | |
1149 req_port, cookie.port) | |
1150 return False | |
1151 return True | |
1152 | |
1153 def return_ok_domain(self, cookie, request): | |
1154 req_host, erhn = eff_request_host(request) | |
1155 domain = cookie.domain | |
1156 | |
1157 if domain and not domain.startswith("."): | |
1158 dotdomain = "." + domain | |
1159 else: | |
1160 dotdomain = domain | |
1161 | |
1162 # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't | |
1163 if (cookie.version == 0 and | |
1164 (self.strict_ns_domain & self.DomainStrictNonDomain) and | |
1165 not cookie.domain_specified and domain != erhn): | |
1166 _debug(" cookie with unspecified domain does not string-compare " | |
1167 "equal to request domain") | |
1168 return False | |
1169 | |
1170 if cookie.version > 0 and not domain_match(erhn, domain): | |
1171 _debug(" effective request-host name %s does not domain-match " | |
1172 "RFC 2965 cookie domain %s", erhn, domain) | |
1173 return False | |
1174 if cookie.version == 0 and not ("."+erhn).endswith(dotdomain): | |
1175 _debug(" request-host %s does not match Netscape cookie domain " | |
1176 "%s", req_host, domain) | |
1177 return False | |
1178 return True | |
1179 | |
1180 def domain_return_ok(self, domain, request): | |
1181 # Liberal check of. This is here as an optimization to avoid | |
1182 # having to load lots of MSIE cookie files unless necessary. | |
1183 req_host, erhn = eff_request_host(request) | |
1184 if not req_host.startswith("."): | |
1185 req_host = "."+req_host | |
1186 if not erhn.startswith("."): | |
1187 erhn = "."+erhn | |
1188 if domain and not domain.startswith("."): | |
1189 dotdomain = "." + domain | |
1190 else: | |
1191 dotdomain = domain | |
1192 if not (req_host.endswith(dotdomain) or erhn.endswith(dotdomain)): | |
1193 #_debug(" request domain %s does not match cookie domain %s", | |
1194 # req_host, domain) | |
1195 return False | |
1196 | |
1197 if self.is_blocked(domain): | |
1198 _debug(" domain %s is in user block-list", domain) | |
1199 return False | |
1200 if self.is_not_allowed(domain): | |
1201 _debug(" domain %s is not in user allow-list", domain) | |
1202 return False | |
1203 | |
1204 return True | |
1205 | |
1206 def path_return_ok(self, path, request): | |
1207 _debug("- checking cookie path=%s", path) | |
1208 req_path = request_path(request) | |
1209 pathlen = len(path) | |
1210 if req_path == path: | |
1211 return True | |
1212 elif (req_path.startswith(path) and | |
1213 (path.endswith("/") or req_path[pathlen:pathlen+1] == "/")): | |
1214 return True | |
1215 | |
1216 _debug(" %s does not path-match %s", req_path, path) | |
1217 return False | |
1218 | |
1219 def vals_sorted_by_key(adict): | |
1220 keys = sorted(adict.keys()) | |
1221 return map(adict.get, keys) | |
1222 | |
1223 def deepvalues(mapping): | |
1224 """Iterates over nested mapping, depth-first, in sorted order by key.""" | |
1225 values = vals_sorted_by_key(mapping) | |
1226 for obj in values: | |
1227 mapping = False | |
1228 try: | |
1229 obj.items | |
1230 except AttributeError: | |
1231 pass | |
1232 else: | |
1233 mapping = True | |
1234 yield from deepvalues(obj) | |
1235 if not mapping: | |
1236 yield obj | |
1237 | |
1238 | |
1239 # Used as second parameter to dict.get() method, to distinguish absent | |
1240 # dict key from one with a None value. | |
1241 class Absent: pass | |
1242 | |
1243 class CookieJar: | |
1244 """Collection of HTTP cookies. | |
1245 | |
1246 You may not need to know about this class: try | |
1247 urllib.request.build_opener(HTTPCookieProcessor).open(url). | |
1248 """ | |
1249 | |
1250 non_word_re = re.compile(r"\W") | |
1251 quote_re = re.compile(r"([\"\\])") | |
1252 strict_domain_re = re.compile(r"\.?[^.]*") | |
1253 domain_re = re.compile(r"[^.]*") | |
1254 dots_re = re.compile(r"^\.+") | |
1255 | |
1256 magic_re = re.compile(r"^\#LWP-Cookies-(\d+\.\d+)", re.ASCII) | |
1257 | |
1258 def __init__(self, policy=None): | |
1259 if policy is None: | |
1260 policy = DefaultCookiePolicy() | |
1261 self._policy = policy | |
1262 | |
1263 self._cookies_lock = _threading.RLock() | |
1264 self._cookies = {} | |
1265 | |
1266 def set_policy(self, policy): | |
1267 self._policy = policy | |
1268 | |
1269 def _cookies_for_domain(self, domain, request): | |
1270 cookies = [] | |
1271 if not self._policy.domain_return_ok(domain, request): | |
1272 return [] | |
1273 _debug("Checking %s for cookies to return", domain) | |
1274 cookies_by_path = self._cookies[domain] | |
1275 for path in cookies_by_path.keys(): | |
1276 if not self._policy.path_return_ok(path, request): | |
1277 continue | |
1278 cookies_by_name = cookies_by_path[path] | |
1279 for cookie in cookies_by_name.values(): | |
1280 if not self._policy.return_ok(cookie, request): | |
1281 _debug(" not returning cookie") | |
1282 continue | |
1283 _debug(" it's a match") | |
1284 cookies.append(cookie) | |
1285 return cookies | |
1286 | |
1287 def _cookies_for_request(self, request): | |
1288 """Return a list of cookies to be returned to server.""" | |
1289 cookies = [] | |
1290 for domain in self._cookies.keys(): | |
1291 cookies.extend(self._cookies_for_domain(domain, request)) | |
1292 return cookies | |
1293 | |
1294 def _cookie_attrs(self, cookies): | |
1295 """Return a list of cookie-attributes to be returned to server. | |
1296 | |
1297 like ['foo="bar"; $Path="/"', ...] | |
1298 | |
1299 The $Version attribute is also added when appropriate (currently only | |
1300 once per request). | |
1301 | |
1302 """ | |
1303 # add cookies in order of most specific (ie. longest) path first | |
1304 cookies.sort(key=lambda a: len(a.path), reverse=True) | |
1305 | |
1306 version_set = False | |
1307 | |
1308 attrs = [] | |
1309 for cookie in cookies: | |
1310 # set version of Cookie header | |
1311 # XXX | |
1312 # What should it be if multiple matching Set-Cookie headers have | |
1313 # different versions themselves? | |
1314 # Answer: there is no answer; was supposed to be settled by | |
1315 # RFC 2965 errata, but that may never appear... | |
1316 version = cookie.version | |
1317 if not version_set: | |
1318 version_set = True | |
1319 if version > 0: | |
1320 attrs.append("$Version=%s" % version) | |
1321 | |
1322 # quote cookie value if necessary | |
1323 # (not for Netscape protocol, which already has any quotes | |
1324 # intact, due to the poorly-specified Netscape Cookie: syntax) | |
1325 if ((cookie.value is not None) and | |
1326 self.non_word_re.search(cookie.value) and version > 0): | |
1327 value = self.quote_re.sub(r"\\\1", cookie.value) | |
1328 else: | |
1329 value = cookie.value | |
1330 | |
1331 # add cookie-attributes to be returned in Cookie header | |
1332 if cookie.value is None: | |
1333 attrs.append(cookie.name) | |
1334 else: | |
1335 attrs.append("%s=%s" % (cookie.name, value)) | |
1336 if version > 0: | |
1337 if cookie.path_specified: | |
1338 attrs.append('$Path="%s"' % cookie.path) | |
1339 if cookie.domain.startswith("."): | |
1340 domain = cookie.domain | |
1341 if (not cookie.domain_initial_dot and | |
1342 domain.startswith(".")): | |
1343 domain = domain[1:] | |
1344 attrs.append('$Domain="%s"' % domain) | |
1345 if cookie.port is not None: | |
1346 p = "$Port" | |
1347 if cookie.port_specified: | |
1348 p = p + ('="%s"' % cookie.port) | |
1349 attrs.append(p) | |
1350 | |
1351 return attrs | |
1352 | |
1353 def add_cookie_header(self, request): | |
1354 """Add correct Cookie: header to request (urllib.request.Request object). | |
1355 | |
1356 The Cookie2 header is also added unless policy.hide_cookie2 is true. | |
1357 | |
1358 """ | |
1359 _debug("add_cookie_header") | |
1360 self._cookies_lock.acquire() | |
1361 try: | |
1362 | |
1363 self._policy._now = self._now = int(time.time()) | |
1364 | |
1365 cookies = self._cookies_for_request(request) | |
1366 | |
1367 attrs = self._cookie_attrs(cookies) | |
1368 if attrs: | |
1369 if not request.has_header("Cookie"): | |
1370 request.add_unredirected_header( | |
1371 "Cookie", "; ".join(attrs)) | |
1372 | |
1373 # if necessary, advertise that we know RFC 2965 | |
1374 if (self._policy.rfc2965 and not self._policy.hide_cookie2 and | |
1375 not request.has_header("Cookie2")): | |
1376 for cookie in cookies: | |
1377 if cookie.version != 1: | |
1378 request.add_unredirected_header("Cookie2", '$Version="1"') | |
1379 break | |
1380 | |
1381 finally: | |
1382 self._cookies_lock.release() | |
1383 | |
1384 self.clear_expired_cookies() | |
1385 | |
1386 def _normalized_cookie_tuples(self, attrs_set): | |
1387 """Return list of tuples containing normalised cookie information. | |
1388 | |
1389 attrs_set is the list of lists of key,value pairs extracted from | |
1390 the Set-Cookie or Set-Cookie2 headers. | |
1391 | |
1392 Tuples are name, value, standard, rest, where name and value are the | |
1393 cookie name and value, standard is a dictionary containing the standard | |
1394 cookie-attributes (discard, secure, version, expires or max-age, | |
1395 domain, path and port) and rest is a dictionary containing the rest of | |
1396 the cookie-attributes. | |
1397 | |
1398 """ | |
1399 cookie_tuples = [] | |
1400 | |
1401 boolean_attrs = "discard", "secure" | |
1402 value_attrs = ("version", | |
1403 "expires", "max-age", | |
1404 "domain", "path", "port", | |
1405 "comment", "commenturl") | |
1406 | |
1407 for cookie_attrs in attrs_set: | |
1408 name, value = cookie_attrs[0] | |
1409 | |
1410 # Build dictionary of standard cookie-attributes (standard) and | |
1411 # dictionary of other cookie-attributes (rest). | |
1412 | |
1413 # Note: expiry time is normalised to seconds since epoch. V0 | |
1414 # cookies should have the Expires cookie-attribute, and V1 cookies | |
1415 # should have Max-Age, but since V1 includes RFC 2109 cookies (and | |
1416 # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we | |
1417 # accept either (but prefer Max-Age). | |
1418 max_age_set = False | |
1419 | |
1420 bad_cookie = False | |
1421 | |
1422 standard = {} | |
1423 rest = {} | |
1424 for k, v in cookie_attrs[1:]: | |
1425 lc = k.lower() | |
1426 # don't lose case distinction for unknown fields | |
1427 if lc in value_attrs or lc in boolean_attrs: | |
1428 k = lc | |
1429 if k in boolean_attrs and v is None: | |
1430 # boolean cookie-attribute is present, but has no value | |
1431 # (like "discard", rather than "port=80") | |
1432 v = True | |
1433 if k in standard: | |
1434 # only first value is significant | |
1435 continue | |
1436 if k == "domain": | |
1437 if v is None: | |
1438 _debug(" missing value for domain attribute") | |
1439 bad_cookie = True | |
1440 break | |
1441 # RFC 2965 section 3.3.3 | |
1442 v = v.lower() | |
1443 if k == "expires": | |
1444 if max_age_set: | |
1445 # Prefer max-age to expires (like Mozilla) | |
1446 continue | |
1447 if v is None: | |
1448 _debug(" missing or invalid value for expires " | |
1449 "attribute: treating as session cookie") | |
1450 continue | |
1451 if k == "max-age": | |
1452 max_age_set = True | |
1453 try: | |
1454 v = int(v) | |
1455 except ValueError: | |
1456 _debug(" missing or invalid (non-numeric) value for " | |
1457 "max-age attribute") | |
1458 bad_cookie = True | |
1459 break | |
1460 # convert RFC 2965 Max-Age to seconds since epoch | |
1461 # XXX Strictly you're supposed to follow RFC 2616 | |
1462 # age-calculation rules. Remember that zero Max-Age | |
1463 # is a request to discard (old and new) cookie, though. | |
1464 k = "expires" | |
1465 v = self._now + v | |
1466 if (k in value_attrs) or (k in boolean_attrs): | |
1467 if (v is None and | |
1468 k not in ("port", "comment", "commenturl")): | |
1469 _debug(" missing value for %s attribute" % k) | |
1470 bad_cookie = True | |
1471 break | |
1472 standard[k] = v | |
1473 else: | |
1474 rest[k] = v | |
1475 | |
1476 if bad_cookie: | |
1477 continue | |
1478 | |
1479 cookie_tuples.append((name, value, standard, rest)) | |
1480 | |
1481 return cookie_tuples | |
1482 | |
1483 def _cookie_from_cookie_tuple(self, tup, request): | |
1484 # standard is dict of standard cookie-attributes, rest is dict of the | |
1485 # rest of them | |
1486 name, value, standard, rest = tup | |
1487 | |
1488 domain = standard.get("domain", Absent) | |
1489 path = standard.get("path", Absent) | |
1490 port = standard.get("port", Absent) | |
1491 expires = standard.get("expires", Absent) | |
1492 | |
1493 # set the easy defaults | |
1494 version = standard.get("version", None) | |
1495 if version is not None: | |
1496 try: | |
1497 version = int(version) | |
1498 except ValueError: | |
1499 return None # invalid version, ignore cookie | |
1500 secure = standard.get("secure", False) | |
1501 # (discard is also set if expires is Absent) | |
1502 discard = standard.get("discard", False) | |
1503 comment = standard.get("comment", None) | |
1504 comment_url = standard.get("commenturl", None) | |
1505 | |
1506 # set default path | |
1507 if path is not Absent and path != "": | |
1508 path_specified = True | |
1509 path = escape_path(path) | |
1510 else: | |
1511 path_specified = False | |
1512 path = request_path(request) | |
1513 i = path.rfind("/") | |
1514 if i != -1: | |
1515 if version == 0: | |
1516 # Netscape spec parts company from reality here | |
1517 path = path[:i] | |
1518 else: | |
1519 path = path[:i+1] | |
1520 if len(path) == 0: path = "/" | |
1521 | |
1522 # set default domain | |
1523 domain_specified = domain is not Absent | |
1524 # but first we have to remember whether it starts with a dot | |
1525 domain_initial_dot = False | |
1526 if domain_specified: | |
1527 domain_initial_dot = bool(domain.startswith(".")) | |
1528 if domain is Absent: | |
1529 req_host, erhn = eff_request_host(request) | |
1530 domain = erhn | |
1531 elif not domain.startswith("."): | |
1532 domain = "."+domain | |
1533 | |
1534 # set default port | |
1535 port_specified = False | |
1536 if port is not Absent: | |
1537 if port is None: | |
1538 # Port attr present, but has no value: default to request port. | |
1539 # Cookie should then only be sent back on that port. | |
1540 port = request_port(request) | |
1541 else: | |
1542 port_specified = True | |
1543 port = re.sub(r"\s+", "", port) | |
1544 else: | |
1545 # No port attr present. Cookie can be sent back on any port. | |
1546 port = None | |
1547 | |
1548 # set default expires and discard | |
1549 if expires is Absent: | |
1550 expires = None | |
1551 discard = True | |
1552 elif expires <= self._now: | |
1553 # Expiry date in past is request to delete cookie. This can't be | |
1554 # in DefaultCookiePolicy, because can't delete cookies there. | |
1555 try: | |
1556 self.clear(domain, path, name) | |
1557 except KeyError: | |
1558 pass | |
1559 _debug("Expiring cookie, domain='%s', path='%s', name='%s'", | |
1560 domain, path, name) | |
1561 return None | |
1562 | |
1563 return Cookie(version, | |
1564 name, value, | |
1565 port, port_specified, | |
1566 domain, domain_specified, domain_initial_dot, | |
1567 path, path_specified, | |
1568 secure, | |
1569 expires, | |
1570 discard, | |
1571 comment, | |
1572 comment_url, | |
1573 rest) | |
1574 | |
1575 def _cookies_from_attrs_set(self, attrs_set, request): | |
1576 cookie_tuples = self._normalized_cookie_tuples(attrs_set) | |
1577 | |
1578 cookies = [] | |
1579 for tup in cookie_tuples: | |
1580 cookie = self._cookie_from_cookie_tuple(tup, request) | |
1581 if cookie: cookies.append(cookie) | |
1582 return cookies | |
1583 | |
1584 def _process_rfc2109_cookies(self, cookies): | |
1585 rfc2109_as_ns = getattr(self._policy, 'rfc2109_as_netscape', None) | |
1586 if rfc2109_as_ns is None: | |
1587 rfc2109_as_ns = not self._policy.rfc2965 | |
1588 for cookie in cookies: | |
1589 if cookie.version == 1: | |
1590 cookie.rfc2109 = True | |
1591 if rfc2109_as_ns: | |
1592 # treat 2109 cookies as Netscape cookies rather than | |
1593 # as RFC2965 cookies | |
1594 cookie.version = 0 | |
1595 | |
1596 def make_cookies(self, response, request): | |
1597 """Return sequence of Cookie objects extracted from response object.""" | |
1598 # get cookie-attributes for RFC 2965 and Netscape protocols | |
1599 headers = response.info() | |
1600 rfc2965_hdrs = headers.get_all("Set-Cookie2", []) | |
1601 ns_hdrs = headers.get_all("Set-Cookie", []) | |
1602 self._policy._now = self._now = int(time.time()) | |
1603 | |
1604 rfc2965 = self._policy.rfc2965 | |
1605 netscape = self._policy.netscape | |
1606 | |
1607 if ((not rfc2965_hdrs and not ns_hdrs) or | |
1608 (not ns_hdrs and not rfc2965) or | |
1609 (not rfc2965_hdrs and not netscape) or | |
1610 (not netscape and not rfc2965)): | |
1611 return [] # no relevant cookie headers: quick exit | |
1612 | |
1613 try: | |
1614 cookies = self._cookies_from_attrs_set( | |
1615 split_header_words(rfc2965_hdrs), request) | |
1616 except Exception: | |
1617 _warn_unhandled_exception() | |
1618 cookies = [] | |
1619 | |
1620 if ns_hdrs and netscape: | |
1621 try: | |
1622 # RFC 2109 and Netscape cookies | |
1623 ns_cookies = self._cookies_from_attrs_set( | |
1624 parse_ns_headers(ns_hdrs), request) | |
1625 except Exception: | |
1626 _warn_unhandled_exception() | |
1627 ns_cookies = [] | |
1628 self._process_rfc2109_cookies(ns_cookies) | |
1629 | |
1630 # Look for Netscape cookies (from Set-Cookie headers) that match | |
1631 # corresponding RFC 2965 cookies (from Set-Cookie2 headers). | |
1632 # For each match, keep the RFC 2965 cookie and ignore the Netscape | |
1633 # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are | |
1634 # bundled in with the Netscape cookies for this purpose, which is | |
1635 # reasonable behaviour. | |
1636 if rfc2965: | |
1637 lookup = {} | |
1638 for cookie in cookies: | |
1639 lookup[(cookie.domain, cookie.path, cookie.name)] = None | |
1640 | |
1641 def no_matching_rfc2965(ns_cookie, lookup=lookup): | |
1642 key = ns_cookie.domain, ns_cookie.path, ns_cookie.name | |
1643 return key not in lookup | |
1644 ns_cookies = filter(no_matching_rfc2965, ns_cookies) | |
1645 | |
1646 if ns_cookies: | |
1647 cookies.extend(ns_cookies) | |
1648 | |
1649 return cookies | |
1650 | |
1651 def set_cookie_if_ok(self, cookie, request): | |
1652 """Set a cookie if policy says it's OK to do so.""" | |
1653 self._cookies_lock.acquire() | |
1654 try: | |
1655 self._policy._now = self._now = int(time.time()) | |
1656 | |
1657 if self._policy.set_ok(cookie, request): | |
1658 self.set_cookie(cookie) | |
1659 | |
1660 | |
1661 finally: | |
1662 self._cookies_lock.release() | |
1663 | |
1664 def set_cookie(self, cookie): | |
1665 """Set a cookie, without checking whether or not it should be set.""" | |
1666 c = self._cookies | |
1667 self._cookies_lock.acquire() | |
1668 try: | |
1669 if cookie.domain not in c: c[cookie.domain] = {} | |
1670 c2 = c[cookie.domain] | |
1671 if cookie.path not in c2: c2[cookie.path] = {} | |
1672 c3 = c2[cookie.path] | |
1673 c3[cookie.name] = cookie | |
1674 finally: | |
1675 self._cookies_lock.release() | |
1676 | |
1677 def extract_cookies(self, response, request): | |
1678 """Extract cookies from response, where allowable given the request.""" | |
1679 _debug("extract_cookies: %s", response.info()) | |
1680 self._cookies_lock.acquire() | |
1681 try: | |
1682 for cookie in self.make_cookies(response, request): | |
1683 if self._policy.set_ok(cookie, request): | |
1684 _debug(" setting cookie: %s", cookie) | |
1685 self.set_cookie(cookie) | |
1686 finally: | |
1687 self._cookies_lock.release() | |
1688 | |
1689 def clear(self, domain=None, path=None, name=None): | |
1690 """Clear some cookies. | |
1691 | |
1692 Invoking this method without arguments will clear all cookies. If | |
1693 given a single argument, only cookies belonging to that domain will be | |
1694 removed. If given two arguments, cookies belonging to the specified | |
1695 path within that domain are removed. If given three arguments, then | |
1696 the cookie with the specified name, path and domain is removed. | |
1697 | |
1698 Raises KeyError if no matching cookie exists. | |
1699 | |
1700 """ | |
1701 if name is not None: | |
1702 if (domain is None) or (path is None): | |
1703 raise ValueError( | |
1704 "domain and path must be given to remove a cookie by name") | |
1705 del self._cookies[domain][path][name] | |
1706 elif path is not None: | |
1707 if domain is None: | |
1708 raise ValueError( | |
1709 "domain must be given to remove cookies by path") | |
1710 del self._cookies[domain][path] | |
1711 elif domain is not None: | |
1712 del self._cookies[domain] | |
1713 else: | |
1714 self._cookies = {} | |
1715 | |
1716 def clear_session_cookies(self): | |
1717 """Discard all session cookies. | |
1718 | |
1719 Note that the .save() method won't save session cookies anyway, unless | |
1720 you ask otherwise by passing a true ignore_discard argument. | |
1721 | |
1722 """ | |
1723 self._cookies_lock.acquire() | |
1724 try: | |
1725 for cookie in self: | |
1726 if cookie.discard: | |
1727 self.clear(cookie.domain, cookie.path, cookie.name) | |
1728 finally: | |
1729 self._cookies_lock.release() | |
1730 | |
1731 def clear_expired_cookies(self): | |
1732 """Discard all expired cookies. | |
1733 | |
1734 You probably don't need to call this method: expired cookies are never | |
1735 sent back to the server (provided you're using DefaultCookiePolicy), | |
1736 this method is called by CookieJar itself every so often, and the | |
1737 .save() method won't save expired cookies anyway (unless you ask | |
1738 otherwise by passing a true ignore_expires argument). | |
1739 | |
1740 """ | |
1741 self._cookies_lock.acquire() | |
1742 try: | |
1743 now = time.time() | |
1744 for cookie in self: | |
1745 if cookie.is_expired(now): | |
1746 self.clear(cookie.domain, cookie.path, cookie.name) | |
1747 finally: | |
1748 self._cookies_lock.release() | |
1749 | |
1750 def __iter__(self): | |
1751 return deepvalues(self._cookies) | |
1752 | |
1753 def __len__(self): | |
1754 """Return number of contained cookies.""" | |
1755 i = 0 | |
1756 for cookie in self: i = i + 1 | |
1757 return i | |
1758 | |
1759 def __repr__(self): | |
1760 r = [] | |
1761 for cookie in self: r.append(repr(cookie)) | |
1762 return "<%s[%s]>" % (self.__class__.__name__, ", ".join(r)) | |
1763 | |
1764 def __str__(self): | |
1765 r = [] | |
1766 for cookie in self: r.append(str(cookie)) | |
1767 return "<%s[%s]>" % (self.__class__.__name__, ", ".join(r)) | |
1768 | |
1769 | |
1770 # derives from OSError for backwards-compatibility with Python 2.4.0 | |
1771 class LoadError(OSError): pass | |
1772 | |
1773 class FileCookieJar(CookieJar): | |
1774 """CookieJar that can be loaded from and saved to a file.""" | |
1775 | |
1776 def __init__(self, filename=None, delayload=False, policy=None): | |
1777 """ | |
1778 Cookies are NOT loaded from the named file until either the .load() or | |
1779 .revert() method is called. | |
1780 | |
1781 """ | |
1782 CookieJar.__init__(self, policy) | |
1783 if filename is not None: | |
1784 filename = os.fspath(filename) | |
1785 self.filename = filename | |
1786 self.delayload = bool(delayload) | |
1787 | |
1788 def save(self, filename=None, ignore_discard=False, ignore_expires=False): | |
1789 """Save cookies to a file.""" | |
1790 raise NotImplementedError() | |
1791 | |
1792 def load(self, filename=None, ignore_discard=False, ignore_expires=False): | |
1793 """Load cookies from a file.""" | |
1794 if filename is None: | |
1795 if self.filename is not None: filename = self.filename | |
1796 else: raise ValueError(MISSING_FILENAME_TEXT) | |
1797 | |
1798 with open(filename) as f: | |
1799 self._really_load(f, filename, ignore_discard, ignore_expires) | |
1800 | |
1801 def revert(self, filename=None, | |
1802 ignore_discard=False, ignore_expires=False): | |
1803 """Clear all cookies and reload cookies from a saved file. | |
1804 | |
1805 Raises LoadError (or OSError) if reversion is not successful; the | |
1806 object's state will not be altered if this happens. | |
1807 | |
1808 """ | |
1809 if filename is None: | |
1810 if self.filename is not None: filename = self.filename | |
1811 else: raise ValueError(MISSING_FILENAME_TEXT) | |
1812 | |
1813 self._cookies_lock.acquire() | |
1814 try: | |
1815 | |
1816 old_state = copy.deepcopy(self._cookies) | |
1817 self._cookies = {} | |
1818 try: | |
1819 self.load(filename, ignore_discard, ignore_expires) | |
1820 except OSError: | |
1821 self._cookies = old_state | |
1822 raise | |
1823 | |
1824 finally: | |
1825 self._cookies_lock.release() | |
1826 | |
1827 | |
1828 def lwp_cookie_str(cookie): | |
1829 """Return string representation of Cookie in the LWP cookie file format. | |
1830 | |
1831 Actually, the format is extended a bit -- see module docstring. | |
1832 | |
1833 """ | |
1834 h = [(cookie.name, cookie.value), | |
1835 ("path", cookie.path), | |
1836 ("domain", cookie.domain)] | |
1837 if cookie.port is not None: h.append(("port", cookie.port)) | |
1838 if cookie.path_specified: h.append(("path_spec", None)) | |
1839 if cookie.port_specified: h.append(("port_spec", None)) | |
1840 if cookie.domain_initial_dot: h.append(("domain_dot", None)) | |
1841 if cookie.secure: h.append(("secure", None)) | |
1842 if cookie.expires: h.append(("expires", | |
1843 time2isoz(float(cookie.expires)))) | |
1844 if cookie.discard: h.append(("discard", None)) | |
1845 if cookie.comment: h.append(("comment", cookie.comment)) | |
1846 if cookie.comment_url: h.append(("commenturl", cookie.comment_url)) | |
1847 | |
1848 keys = sorted(cookie._rest.keys()) | |
1849 for k in keys: | |
1850 h.append((k, str(cookie._rest[k]))) | |
1851 | |
1852 h.append(("version", str(cookie.version))) | |
1853 | |
1854 return join_header_words([h]) | |
1855 | |
1856 class LWPCookieJar(FileCookieJar): | |
1857 """ | |
1858 The LWPCookieJar saves a sequence of "Set-Cookie3" lines. | |
1859 "Set-Cookie3" is the format used by the libwww-perl library, not known | |
1860 to be compatible with any browser, but which is easy to read and | |
1861 doesn't lose information about RFC 2965 cookies. | |
1862 | |
1863 Additional methods | |
1864 | |
1865 as_lwp_str(ignore_discard=True, ignore_expired=True) | |
1866 | |
1867 """ | |
1868 | |
1869 def as_lwp_str(self, ignore_discard=True, ignore_expires=True): | |
1870 """Return cookies as a string of "\\n"-separated "Set-Cookie3" headers. | |
1871 | |
1872 ignore_discard and ignore_expires: see docstring for FileCookieJar.save | |
1873 | |
1874 """ | |
1875 now = time.time() | |
1876 r = [] | |
1877 for cookie in self: | |
1878 if not ignore_discard and cookie.discard: | |
1879 continue | |
1880 if not ignore_expires and cookie.is_expired(now): | |
1881 continue | |
1882 r.append("Set-Cookie3: %s" % lwp_cookie_str(cookie)) | |
1883 return "\n".join(r+[""]) | |
1884 | |
1885 def save(self, filename=None, ignore_discard=False, ignore_expires=False): | |
1886 if filename is None: | |
1887 if self.filename is not None: filename = self.filename | |
1888 else: raise ValueError(MISSING_FILENAME_TEXT) | |
1889 | |
1890 with open(filename, "w") as f: | |
1891 # There really isn't an LWP Cookies 2.0 format, but this indicates | |
1892 # that there is extra information in here (domain_dot and | |
1893 # port_spec) while still being compatible with libwww-perl, I hope. | |
1894 f.write("#LWP-Cookies-2.0\n") | |
1895 f.write(self.as_lwp_str(ignore_discard, ignore_expires)) | |
1896 | |
1897 def _really_load(self, f, filename, ignore_discard, ignore_expires): | |
1898 magic = f.readline() | |
1899 if not self.magic_re.search(magic): | |
1900 msg = ("%r does not look like a Set-Cookie3 (LWP) format " | |
1901 "file" % filename) | |
1902 raise LoadError(msg) | |
1903 | |
1904 now = time.time() | |
1905 | |
1906 header = "Set-Cookie3:" | |
1907 boolean_attrs = ("port_spec", "path_spec", "domain_dot", | |
1908 "secure", "discard") | |
1909 value_attrs = ("version", | |
1910 "port", "path", "domain", | |
1911 "expires", | |
1912 "comment", "commenturl") | |
1913 | |
1914 try: | |
1915 while 1: | |
1916 line = f.readline() | |
1917 if line == "": break | |
1918 if not line.startswith(header): | |
1919 continue | |
1920 line = line[len(header):].strip() | |
1921 | |
1922 for data in split_header_words([line]): | |
1923 name, value = data[0] | |
1924 standard = {} | |
1925 rest = {} | |
1926 for k in boolean_attrs: | |
1927 standard[k] = False | |
1928 for k, v in data[1:]: | |
1929 if k is not None: | |
1930 lc = k.lower() | |
1931 else: | |
1932 lc = None | |
1933 # don't lose case distinction for unknown fields | |
1934 if (lc in value_attrs) or (lc in boolean_attrs): | |
1935 k = lc | |
1936 if k in boolean_attrs: | |
1937 if v is None: v = True | |
1938 standard[k] = v | |
1939 elif k in value_attrs: | |
1940 standard[k] = v | |
1941 else: | |
1942 rest[k] = v | |
1943 | |
1944 h = standard.get | |
1945 expires = h("expires") | |
1946 discard = h("discard") | |
1947 if expires is not None: | |
1948 expires = iso2time(expires) | |
1949 if expires is None: | |
1950 discard = True | |
1951 domain = h("domain") | |
1952 domain_specified = domain.startswith(".") | |
1953 c = Cookie(h("version"), name, value, | |
1954 h("port"), h("port_spec"), | |
1955 domain, domain_specified, h("domain_dot"), | |
1956 h("path"), h("path_spec"), | |
1957 h("secure"), | |
1958 expires, | |
1959 discard, | |
1960 h("comment"), | |
1961 h("commenturl"), | |
1962 rest) | |
1963 if not ignore_discard and c.discard: | |
1964 continue | |
1965 if not ignore_expires and c.is_expired(now): | |
1966 continue | |
1967 self.set_cookie(c) | |
1968 except OSError: | |
1969 raise | |
1970 except Exception: | |
1971 _warn_unhandled_exception() | |
1972 raise LoadError("invalid Set-Cookie3 format file %r: %r" % | |
1973 (filename, line)) | |
1974 | |
1975 | |
1976 class MozillaCookieJar(FileCookieJar): | |
1977 """ | |
1978 | |
1979 WARNING: you may want to backup your browser's cookies file if you use | |
1980 this class to save cookies. I *think* it works, but there have been | |
1981 bugs in the past! | |
1982 | |
1983 This class differs from CookieJar only in the format it uses to save and | |
1984 load cookies to and from a file. This class uses the Mozilla/Netscape | |
1985 `cookies.txt' format. lynx uses this file format, too. | |
1986 | |
1987 Don't expect cookies saved while the browser is running to be noticed by | |
1988 the browser (in fact, Mozilla on unix will overwrite your saved cookies if | |
1989 you change them on disk while it's running; on Windows, you probably can't | |
1990 save at all while the browser is running). | |
1991 | |
1992 Note that the Mozilla/Netscape format will downgrade RFC2965 cookies to | |
1993 Netscape cookies on saving. | |
1994 | |
1995 In particular, the cookie version and port number information is lost, | |
1996 together with information about whether or not Path, Port and Discard were | |
1997 specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the | |
1998 domain as set in the HTTP header started with a dot (yes, I'm aware some | |
1999 domains in Netscape files start with a dot and some don't -- trust me, you | |
2000 really don't want to know any more about this). | |
2001 | |
2002 Note that though Mozilla and Netscape use the same format, they use | |
2003 slightly different headers. The class saves cookies using the Netscape | |
2004 header by default (Mozilla can cope with that). | |
2005 | |
2006 """ | |
2007 magic_re = re.compile("#( Netscape)? HTTP Cookie File") | |
2008 header = """\ | |
2009 # Netscape HTTP Cookie File | |
2010 # http://curl.haxx.se/rfc/cookie_spec.html | |
2011 # This is a generated file! Do not edit. | |
2012 | |
2013 """ | |
2014 | |
2015 def _really_load(self, f, filename, ignore_discard, ignore_expires): | |
2016 now = time.time() | |
2017 | |
2018 magic = f.readline() | |
2019 if not self.magic_re.search(magic): | |
2020 raise LoadError( | |
2021 "%r does not look like a Netscape format cookies file" % | |
2022 filename) | |
2023 | |
2024 try: | |
2025 while 1: | |
2026 line = f.readline() | |
2027 if line == "": break | |
2028 | |
2029 # last field may be absent, so keep any trailing tab | |
2030 if line.endswith("\n"): line = line[:-1] | |
2031 | |
2032 # skip comments and blank lines XXX what is $ for? | |
2033 if (line.strip().startswith(("#", "$")) or | |
2034 line.strip() == ""): | |
2035 continue | |
2036 | |
2037 domain, domain_specified, path, secure, expires, name, value = \ | |
2038 line.split("\t") | |
2039 secure = (secure == "TRUE") | |
2040 domain_specified = (domain_specified == "TRUE") | |
2041 if name == "": | |
2042 # cookies.txt regards 'Set-Cookie: foo' as a cookie | |
2043 # with no name, whereas http.cookiejar regards it as a | |
2044 # cookie with no value. | |
2045 name = value | |
2046 value = None | |
2047 | |
2048 initial_dot = domain.startswith(".") | |
2049 assert domain_specified == initial_dot | |
2050 | |
2051 discard = False | |
2052 if expires == "": | |
2053 expires = None | |
2054 discard = True | |
2055 | |
2056 # assume path_specified is false | |
2057 c = Cookie(0, name, value, | |
2058 None, False, | |
2059 domain, domain_specified, initial_dot, | |
2060 path, False, | |
2061 secure, | |
2062 expires, | |
2063 discard, | |
2064 None, | |
2065 None, | |
2066 {}) | |
2067 if not ignore_discard and c.discard: | |
2068 continue | |
2069 if not ignore_expires and c.is_expired(now): | |
2070 continue | |
2071 self.set_cookie(c) | |
2072 | |
2073 except OSError: | |
2074 raise | |
2075 except Exception: | |
2076 _warn_unhandled_exception() | |
2077 raise LoadError("invalid Netscape format cookies file %r: %r" % | |
2078 (filename, line)) | |
2079 | |
2080 def save(self, filename=None, ignore_discard=False, ignore_expires=False): | |
2081 if filename is None: | |
2082 if self.filename is not None: filename = self.filename | |
2083 else: raise ValueError(MISSING_FILENAME_TEXT) | |
2084 | |
2085 with open(filename, "w") as f: | |
2086 f.write(self.header) | |
2087 now = time.time() | |
2088 for cookie in self: | |
2089 if not ignore_discard and cookie.discard: | |
2090 continue | |
2091 if not ignore_expires and cookie.is_expired(now): | |
2092 continue | |
2093 if cookie.secure: secure = "TRUE" | |
2094 else: secure = "FALSE" | |
2095 if cookie.domain.startswith("."): initial_dot = "TRUE" | |
2096 else: initial_dot = "FALSE" | |
2097 if cookie.expires is not None: | |
2098 expires = str(cookie.expires) | |
2099 else: | |
2100 expires = "" | |
2101 if cookie.value is None: | |
2102 # cookies.txt regards 'Set-Cookie: foo' as a cookie | |
2103 # with no name, whereas http.cookiejar regards it as a | |
2104 # cookie with no value. | |
2105 name = "" | |
2106 value = cookie.name | |
2107 else: | |
2108 name = cookie.name | |
2109 value = cookie.value | |
2110 f.write( | |
2111 "\t".join([cookie.domain, initial_dot, cookie.path, | |
2112 secure, expires, name, value])+ | |
2113 "\n") |