tests versuch 2
This commit is contained in:
parent
fdf385fe06
commit
c88f7df83a
2363 changed files with 408191 additions and 0 deletions
|
@ -0,0 +1,18 @@
|
|||
# SPDX-FileCopyrightText: 2015 Eric Larson
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
"""CacheControl import Interface.
|
||||
|
||||
Make it easy to import from cachecontrol without long namespaces.
|
||||
"""
|
||||
__author__ = "Eric Larson"
|
||||
__email__ = "eric@ionrock.org"
|
||||
__version__ = "0.12.11"
|
||||
|
||||
from .wrapper import CacheControl
|
||||
from .adapter import CacheControlAdapter
|
||||
from .controller import CacheController
|
||||
|
||||
import logging
|
||||
logging.getLogger(__name__).addHandler(logging.NullHandler())
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,61 @@
|
|||
# SPDX-FileCopyrightText: 2015 Eric Larson
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import logging
|
||||
|
||||
from pip._vendor import requests
|
||||
|
||||
from pip._vendor.cachecontrol.adapter import CacheControlAdapter
|
||||
from pip._vendor.cachecontrol.cache import DictCache
|
||||
from pip._vendor.cachecontrol.controller import logger
|
||||
|
||||
from argparse import ArgumentParser
|
||||
|
||||
|
||||
def setup_logging():
|
||||
logger.setLevel(logging.DEBUG)
|
||||
handler = logging.StreamHandler()
|
||||
logger.addHandler(handler)
|
||||
|
||||
|
||||
def get_session():
|
||||
adapter = CacheControlAdapter(
|
||||
DictCache(), cache_etags=True, serializer=None, heuristic=None
|
||||
)
|
||||
sess = requests.Session()
|
||||
sess.mount("http://", adapter)
|
||||
sess.mount("https://", adapter)
|
||||
|
||||
sess.cache_controller = adapter.controller
|
||||
return sess
|
||||
|
||||
|
||||
def get_args():
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument("url", help="The URL to try and cache")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main(args=None):
|
||||
args = get_args()
|
||||
sess = get_session()
|
||||
|
||||
# Make a request to get a response
|
||||
resp = sess.get(args.url)
|
||||
|
||||
# Turn on logging
|
||||
setup_logging()
|
||||
|
||||
# try setting the cache
|
||||
sess.cache_controller.cache_response(resp.request, resp.raw)
|
||||
|
||||
# Now try to get it
|
||||
if sess.cache_controller.cached_request(resp.request):
|
||||
print("Cached!")
|
||||
else:
|
||||
print("Not cached :(")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,137 @@
|
|||
# SPDX-FileCopyrightText: 2015 Eric Larson
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import types
|
||||
import functools
|
||||
import zlib
|
||||
|
||||
from pip._vendor.requests.adapters import HTTPAdapter
|
||||
|
||||
from .controller import CacheController, PERMANENT_REDIRECT_STATUSES
|
||||
from .cache import DictCache
|
||||
from .filewrapper import CallbackFileWrapper
|
||||
|
||||
|
||||
class CacheControlAdapter(HTTPAdapter):
|
||||
invalidating_methods = {"PUT", "PATCH", "DELETE"}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
cache=None,
|
||||
cache_etags=True,
|
||||
controller_class=None,
|
||||
serializer=None,
|
||||
heuristic=None,
|
||||
cacheable_methods=None,
|
||||
*args,
|
||||
**kw
|
||||
):
|
||||
super(CacheControlAdapter, self).__init__(*args, **kw)
|
||||
self.cache = DictCache() if cache is None else cache
|
||||
self.heuristic = heuristic
|
||||
self.cacheable_methods = cacheable_methods or ("GET",)
|
||||
|
||||
controller_factory = controller_class or CacheController
|
||||
self.controller = controller_factory(
|
||||
self.cache, cache_etags=cache_etags, serializer=serializer
|
||||
)
|
||||
|
||||
def send(self, request, cacheable_methods=None, **kw):
|
||||
"""
|
||||
Send a request. Use the request information to see if it
|
||||
exists in the cache and cache the response if we need to and can.
|
||||
"""
|
||||
cacheable = cacheable_methods or self.cacheable_methods
|
||||
if request.method in cacheable:
|
||||
try:
|
||||
cached_response = self.controller.cached_request(request)
|
||||
except zlib.error:
|
||||
cached_response = None
|
||||
if cached_response:
|
||||
return self.build_response(request, cached_response, from_cache=True)
|
||||
|
||||
# check for etags and add headers if appropriate
|
||||
request.headers.update(self.controller.conditional_headers(request))
|
||||
|
||||
resp = super(CacheControlAdapter, self).send(request, **kw)
|
||||
|
||||
return resp
|
||||
|
||||
def build_response(
|
||||
self, request, response, from_cache=False, cacheable_methods=None
|
||||
):
|
||||
"""
|
||||
Build a response by making a request or using the cache.
|
||||
|
||||
This will end up calling send and returning a potentially
|
||||
cached response
|
||||
"""
|
||||
cacheable = cacheable_methods or self.cacheable_methods
|
||||
if not from_cache and request.method in cacheable:
|
||||
# Check for any heuristics that might update headers
|
||||
# before trying to cache.
|
||||
if self.heuristic:
|
||||
response = self.heuristic.apply(response)
|
||||
|
||||
# apply any expiration heuristics
|
||||
if response.status == 304:
|
||||
# We must have sent an ETag request. This could mean
|
||||
# that we've been expired already or that we simply
|
||||
# have an etag. In either case, we want to try and
|
||||
# update the cache if that is the case.
|
||||
cached_response = self.controller.update_cached_response(
|
||||
request, response
|
||||
)
|
||||
|
||||
if cached_response is not response:
|
||||
from_cache = True
|
||||
|
||||
# We are done with the server response, read a
|
||||
# possible response body (compliant servers will
|
||||
# not return one, but we cannot be 100% sure) and
|
||||
# release the connection back to the pool.
|
||||
response.read(decode_content=False)
|
||||
response.release_conn()
|
||||
|
||||
response = cached_response
|
||||
|
||||
# We always cache the 301 responses
|
||||
elif int(response.status) in PERMANENT_REDIRECT_STATUSES:
|
||||
self.controller.cache_response(request, response)
|
||||
else:
|
||||
# Wrap the response file with a wrapper that will cache the
|
||||
# response when the stream has been consumed.
|
||||
response._fp = CallbackFileWrapper(
|
||||
response._fp,
|
||||
functools.partial(
|
||||
self.controller.cache_response, request, response
|
||||
),
|
||||
)
|
||||
if response.chunked:
|
||||
super_update_chunk_length = response._update_chunk_length
|
||||
|
||||
def _update_chunk_length(self):
|
||||
super_update_chunk_length()
|
||||
if self.chunk_left == 0:
|
||||
self._fp._close()
|
||||
|
||||
response._update_chunk_length = types.MethodType(
|
||||
_update_chunk_length, response
|
||||
)
|
||||
|
||||
resp = super(CacheControlAdapter, self).build_response(request, response)
|
||||
|
||||
# See if we should invalidate the cache.
|
||||
if request.method in self.invalidating_methods and resp.ok:
|
||||
cache_url = self.controller.cache_url(request.url)
|
||||
self.cache.delete(cache_url)
|
||||
|
||||
# Give the request a from_cache attr to let people use it
|
||||
resp.from_cache = from_cache
|
||||
|
||||
return resp
|
||||
|
||||
def close(self):
|
||||
self.cache.close()
|
||||
super(CacheControlAdapter, self).close()
|
|
@ -0,0 +1,65 @@
|
|||
# SPDX-FileCopyrightText: 2015 Eric Larson
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
"""
|
||||
The cache object API for implementing caches. The default is a thread
|
||||
safe in-memory dictionary.
|
||||
"""
|
||||
from threading import Lock
|
||||
|
||||
|
||||
class BaseCache(object):
|
||||
|
||||
def get(self, key):
|
||||
raise NotImplementedError()
|
||||
|
||||
def set(self, key, value, expires=None):
|
||||
raise NotImplementedError()
|
||||
|
||||
def delete(self, key):
|
||||
raise NotImplementedError()
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
|
||||
class DictCache(BaseCache):
|
||||
|
||||
def __init__(self, init_dict=None):
|
||||
self.lock = Lock()
|
||||
self.data = init_dict or {}
|
||||
|
||||
def get(self, key):
|
||||
return self.data.get(key, None)
|
||||
|
||||
def set(self, key, value, expires=None):
|
||||
with self.lock:
|
||||
self.data.update({key: value})
|
||||
|
||||
def delete(self, key):
|
||||
with self.lock:
|
||||
if key in self.data:
|
||||
self.data.pop(key)
|
||||
|
||||
|
||||
class SeparateBodyBaseCache(BaseCache):
|
||||
"""
|
||||
In this variant, the body is not stored mixed in with the metadata, but is
|
||||
passed in (as a bytes-like object) in a separate call to ``set_body()``.
|
||||
|
||||
That is, the expected interaction pattern is::
|
||||
|
||||
cache.set(key, serialized_metadata)
|
||||
cache.set_body(key)
|
||||
|
||||
Similarly, the body should be loaded separately via ``get_body()``.
|
||||
"""
|
||||
def set_body(self, key, body):
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_body(self, key):
|
||||
"""
|
||||
Return the body as file-like object.
|
||||
"""
|
||||
raise NotImplementedError()
|
|
@ -0,0 +1,9 @@
|
|||
# SPDX-FileCopyrightText: 2015 Eric Larson
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from .file_cache import FileCache, SeparateBodyFileCache
|
||||
from .redis_cache import RedisCache
|
||||
|
||||
|
||||
__all__ = ["FileCache", "SeparateBodyFileCache", "RedisCache"]
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,188 @@
|
|||
# SPDX-FileCopyrightText: 2015 Eric Larson
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import hashlib
|
||||
import os
|
||||
from textwrap import dedent
|
||||
|
||||
from ..cache import BaseCache, SeparateBodyBaseCache
|
||||
from ..controller import CacheController
|
||||
|
||||
try:
|
||||
FileNotFoundError
|
||||
except NameError:
|
||||
# py2.X
|
||||
FileNotFoundError = (IOError, OSError)
|
||||
|
||||
|
||||
def _secure_open_write(filename, fmode):
|
||||
# We only want to write to this file, so open it in write only mode
|
||||
flags = os.O_WRONLY
|
||||
|
||||
# os.O_CREAT | os.O_EXCL will fail if the file already exists, so we only
|
||||
# will open *new* files.
|
||||
# We specify this because we want to ensure that the mode we pass is the
|
||||
# mode of the file.
|
||||
flags |= os.O_CREAT | os.O_EXCL
|
||||
|
||||
# Do not follow symlinks to prevent someone from making a symlink that
|
||||
# we follow and insecurely open a cache file.
|
||||
if hasattr(os, "O_NOFOLLOW"):
|
||||
flags |= os.O_NOFOLLOW
|
||||
|
||||
# On Windows we'll mark this file as binary
|
||||
if hasattr(os, "O_BINARY"):
|
||||
flags |= os.O_BINARY
|
||||
|
||||
# Before we open our file, we want to delete any existing file that is
|
||||
# there
|
||||
try:
|
||||
os.remove(filename)
|
||||
except (IOError, OSError):
|
||||
# The file must not exist already, so we can just skip ahead to opening
|
||||
pass
|
||||
|
||||
# Open our file, the use of os.O_CREAT | os.O_EXCL will ensure that if a
|
||||
# race condition happens between the os.remove and this line, that an
|
||||
# error will be raised. Because we utilize a lockfile this should only
|
||||
# happen if someone is attempting to attack us.
|
||||
fd = os.open(filename, flags, fmode)
|
||||
try:
|
||||
return os.fdopen(fd, "wb")
|
||||
|
||||
except:
|
||||
# An error occurred wrapping our FD in a file object
|
||||
os.close(fd)
|
||||
raise
|
||||
|
||||
|
||||
class _FileCacheMixin:
|
||||
"""Shared implementation for both FileCache variants."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
directory,
|
||||
forever=False,
|
||||
filemode=0o0600,
|
||||
dirmode=0o0700,
|
||||
use_dir_lock=None,
|
||||
lock_class=None,
|
||||
):
|
||||
|
||||
if use_dir_lock is not None and lock_class is not None:
|
||||
raise ValueError("Cannot use use_dir_lock and lock_class together")
|
||||
|
||||
try:
|
||||
from lockfile import LockFile
|
||||
from lockfile.mkdirlockfile import MkdirLockFile
|
||||
except ImportError:
|
||||
notice = dedent(
|
||||
"""
|
||||
NOTE: In order to use the FileCache you must have
|
||||
lockfile installed. You can install it via pip:
|
||||
pip install lockfile
|
||||
"""
|
||||
)
|
||||
raise ImportError(notice)
|
||||
|
||||
else:
|
||||
if use_dir_lock:
|
||||
lock_class = MkdirLockFile
|
||||
|
||||
elif lock_class is None:
|
||||
lock_class = LockFile
|
||||
|
||||
self.directory = directory
|
||||
self.forever = forever
|
||||
self.filemode = filemode
|
||||
self.dirmode = dirmode
|
||||
self.lock_class = lock_class
|
||||
|
||||
@staticmethod
|
||||
def encode(x):
|
||||
return hashlib.sha224(x.encode()).hexdigest()
|
||||
|
||||
def _fn(self, name):
|
||||
# NOTE: This method should not change as some may depend on it.
|
||||
# See: https://github.com/ionrock/cachecontrol/issues/63
|
||||
hashed = self.encode(name)
|
||||
parts = list(hashed[:5]) + [hashed]
|
||||
return os.path.join(self.directory, *parts)
|
||||
|
||||
def get(self, key):
|
||||
name = self._fn(key)
|
||||
try:
|
||||
with open(name, "rb") as fh:
|
||||
return fh.read()
|
||||
|
||||
except FileNotFoundError:
|
||||
return None
|
||||
|
||||
def set(self, key, value, expires=None):
|
||||
name = self._fn(key)
|
||||
self._write(name, value)
|
||||
|
||||
def _write(self, path, data: bytes):
|
||||
"""
|
||||
Safely write the data to the given path.
|
||||
"""
|
||||
# Make sure the directory exists
|
||||
try:
|
||||
os.makedirs(os.path.dirname(path), self.dirmode)
|
||||
except (IOError, OSError):
|
||||
pass
|
||||
|
||||
with self.lock_class(path) as lock:
|
||||
# Write our actual file
|
||||
with _secure_open_write(lock.path, self.filemode) as fh:
|
||||
fh.write(data)
|
||||
|
||||
def _delete(self, key, suffix):
|
||||
name = self._fn(key) + suffix
|
||||
if not self.forever:
|
||||
try:
|
||||
os.remove(name)
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
|
||||
class FileCache(_FileCacheMixin, BaseCache):
|
||||
"""
|
||||
Traditional FileCache: body is stored in memory, so not suitable for large
|
||||
downloads.
|
||||
"""
|
||||
|
||||
def delete(self, key):
|
||||
self._delete(key, "")
|
||||
|
||||
|
||||
class SeparateBodyFileCache(_FileCacheMixin, SeparateBodyBaseCache):
|
||||
"""
|
||||
Memory-efficient FileCache: body is stored in a separate file, reducing
|
||||
peak memory usage.
|
||||
"""
|
||||
|
||||
def get_body(self, key):
|
||||
name = self._fn(key) + ".body"
|
||||
try:
|
||||
return open(name, "rb")
|
||||
except FileNotFoundError:
|
||||
return None
|
||||
|
||||
def set_body(self, key, body):
|
||||
name = self._fn(key) + ".body"
|
||||
self._write(name, body)
|
||||
|
||||
def delete(self, key):
|
||||
self._delete(key, "")
|
||||
self._delete(key, ".body")
|
||||
|
||||
|
||||
def url_to_file_path(url, filecache):
|
||||
"""Return the file cache path based on the URL.
|
||||
|
||||
This does not ensure the file exists!
|
||||
"""
|
||||
key = CacheController.cache_url(url)
|
||||
return filecache._fn(key)
|
|
@ -0,0 +1,39 @@
|
|||
# SPDX-FileCopyrightText: 2015 Eric Larson
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from __future__ import division
|
||||
|
||||
from datetime import datetime
|
||||
from pip._vendor.cachecontrol.cache import BaseCache
|
||||
|
||||
|
||||
class RedisCache(BaseCache):
|
||||
|
||||
def __init__(self, conn):
|
||||
self.conn = conn
|
||||
|
||||
def get(self, key):
|
||||
return self.conn.get(key)
|
||||
|
||||
def set(self, key, value, expires=None):
|
||||
if not expires:
|
||||
self.conn.set(key, value)
|
||||
elif isinstance(expires, datetime):
|
||||
expires = expires - datetime.utcnow()
|
||||
self.conn.setex(key, int(expires.total_seconds()), value)
|
||||
else:
|
||||
self.conn.setex(key, expires, value)
|
||||
|
||||
def delete(self, key):
|
||||
self.conn.delete(key)
|
||||
|
||||
def clear(self):
|
||||
"""Helper for clearing all the keys in a database. Use with
|
||||
caution!"""
|
||||
for key in self.conn.keys():
|
||||
self.conn.delete(key)
|
||||
|
||||
def close(self):
|
||||
"""Redis uses connection pooling, no need to close the connection."""
|
||||
pass
|
|
@ -0,0 +1,32 @@
|
|||
# SPDX-FileCopyrightText: 2015 Eric Larson
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
try:
|
||||
from urllib.parse import urljoin
|
||||
except ImportError:
|
||||
from urlparse import urljoin
|
||||
|
||||
|
||||
try:
|
||||
import cPickle as pickle
|
||||
except ImportError:
|
||||
import pickle
|
||||
|
||||
# Handle the case where the requests module has been patched to not have
|
||||
# urllib3 bundled as part of its source.
|
||||
try:
|
||||
from pip._vendor.requests.packages.urllib3.response import HTTPResponse
|
||||
except ImportError:
|
||||
from pip._vendor.urllib3.response import HTTPResponse
|
||||
|
||||
try:
|
||||
from pip._vendor.requests.packages.urllib3.util import is_fp_closed
|
||||
except ImportError:
|
||||
from pip._vendor.urllib3.util import is_fp_closed
|
||||
|
||||
# Replicate some six behaviour
|
||||
try:
|
||||
text_type = unicode
|
||||
except NameError:
|
||||
text_type = str
|
|
@ -0,0 +1,439 @@
|
|||
# SPDX-FileCopyrightText: 2015 Eric Larson
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
"""
|
||||
The httplib2 algorithms ported for use with requests.
|
||||
"""
|
||||
import logging
|
||||
import re
|
||||
import calendar
|
||||
import time
|
||||
from email.utils import parsedate_tz
|
||||
|
||||
from pip._vendor.requests.structures import CaseInsensitiveDict
|
||||
|
||||
from .cache import DictCache, SeparateBodyBaseCache
|
||||
from .serialize import Serializer
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
|
||||
|
||||
PERMANENT_REDIRECT_STATUSES = (301, 308)
|
||||
|
||||
|
||||
def parse_uri(uri):
|
||||
"""Parses a URI using the regex given in Appendix B of RFC 3986.
|
||||
|
||||
(scheme, authority, path, query, fragment) = parse_uri(uri)
|
||||
"""
|
||||
groups = URI.match(uri).groups()
|
||||
return (groups[1], groups[3], groups[4], groups[6], groups[8])
|
||||
|
||||
|
||||
class CacheController(object):
|
||||
"""An interface to see if request should cached or not."""
|
||||
|
||||
def __init__(
|
||||
self, cache=None, cache_etags=True, serializer=None, status_codes=None
|
||||
):
|
||||
self.cache = DictCache() if cache is None else cache
|
||||
self.cache_etags = cache_etags
|
||||
self.serializer = serializer or Serializer()
|
||||
self.cacheable_status_codes = status_codes or (200, 203, 300, 301, 308)
|
||||
|
||||
@classmethod
|
||||
def _urlnorm(cls, uri):
|
||||
"""Normalize the URL to create a safe key for the cache"""
|
||||
(scheme, authority, path, query, fragment) = parse_uri(uri)
|
||||
if not scheme or not authority:
|
||||
raise Exception("Only absolute URIs are allowed. uri = %s" % uri)
|
||||
|
||||
scheme = scheme.lower()
|
||||
authority = authority.lower()
|
||||
|
||||
if not path:
|
||||
path = "/"
|
||||
|
||||
# Could do syntax based normalization of the URI before
|
||||
# computing the digest. See Section 6.2.2 of Std 66.
|
||||
request_uri = query and "?".join([path, query]) or path
|
||||
defrag_uri = scheme + "://" + authority + request_uri
|
||||
|
||||
return defrag_uri
|
||||
|
||||
@classmethod
|
||||
def cache_url(cls, uri):
|
||||
return cls._urlnorm(uri)
|
||||
|
||||
def parse_cache_control(self, headers):
|
||||
known_directives = {
|
||||
# https://tools.ietf.org/html/rfc7234#section-5.2
|
||||
"max-age": (int, True),
|
||||
"max-stale": (int, False),
|
||||
"min-fresh": (int, True),
|
||||
"no-cache": (None, False),
|
||||
"no-store": (None, False),
|
||||
"no-transform": (None, False),
|
||||
"only-if-cached": (None, False),
|
||||
"must-revalidate": (None, False),
|
||||
"public": (None, False),
|
||||
"private": (None, False),
|
||||
"proxy-revalidate": (None, False),
|
||||
"s-maxage": (int, True),
|
||||
}
|
||||
|
||||
cc_headers = headers.get("cache-control", headers.get("Cache-Control", ""))
|
||||
|
||||
retval = {}
|
||||
|
||||
for cc_directive in cc_headers.split(","):
|
||||
if not cc_directive.strip():
|
||||
continue
|
||||
|
||||
parts = cc_directive.split("=", 1)
|
||||
directive = parts[0].strip()
|
||||
|
||||
try:
|
||||
typ, required = known_directives[directive]
|
||||
except KeyError:
|
||||
logger.debug("Ignoring unknown cache-control directive: %s", directive)
|
||||
continue
|
||||
|
||||
if not typ or not required:
|
||||
retval[directive] = None
|
||||
if typ:
|
||||
try:
|
||||
retval[directive] = typ(parts[1].strip())
|
||||
except IndexError:
|
||||
if required:
|
||||
logger.debug(
|
||||
"Missing value for cache-control " "directive: %s",
|
||||
directive,
|
||||
)
|
||||
except ValueError:
|
||||
logger.debug(
|
||||
"Invalid value for cache-control directive " "%s, must be %s",
|
||||
directive,
|
||||
typ.__name__,
|
||||
)
|
||||
|
||||
return retval
|
||||
|
||||
def cached_request(self, request):
|
||||
"""
|
||||
Return a cached response if it exists in the cache, otherwise
|
||||
return False.
|
||||
"""
|
||||
cache_url = self.cache_url(request.url)
|
||||
logger.debug('Looking up "%s" in the cache', cache_url)
|
||||
cc = self.parse_cache_control(request.headers)
|
||||
|
||||
# Bail out if the request insists on fresh data
|
||||
if "no-cache" in cc:
|
||||
logger.debug('Request header has "no-cache", cache bypassed')
|
||||
return False
|
||||
|
||||
if "max-age" in cc and cc["max-age"] == 0:
|
||||
logger.debug('Request header has "max_age" as 0, cache bypassed')
|
||||
return False
|
||||
|
||||
# Request allows serving from the cache, let's see if we find something
|
||||
cache_data = self.cache.get(cache_url)
|
||||
if cache_data is None:
|
||||
logger.debug("No cache entry available")
|
||||
return False
|
||||
|
||||
if isinstance(self.cache, SeparateBodyBaseCache):
|
||||
body_file = self.cache.get_body(cache_url)
|
||||
else:
|
||||
body_file = None
|
||||
|
||||
# Check whether it can be deserialized
|
||||
resp = self.serializer.loads(request, cache_data, body_file)
|
||||
if not resp:
|
||||
logger.warning("Cache entry deserialization failed, entry ignored")
|
||||
return False
|
||||
|
||||
# If we have a cached permanent redirect, return it immediately. We
|
||||
# don't need to test our response for other headers b/c it is
|
||||
# intrinsically "cacheable" as it is Permanent.
|
||||
#
|
||||
# See:
|
||||
# https://tools.ietf.org/html/rfc7231#section-6.4.2
|
||||
#
|
||||
# Client can try to refresh the value by repeating the request
|
||||
# with cache busting headers as usual (ie no-cache).
|
||||
if int(resp.status) in PERMANENT_REDIRECT_STATUSES:
|
||||
msg = (
|
||||
"Returning cached permanent redirect response "
|
||||
"(ignoring date and etag information)"
|
||||
)
|
||||
logger.debug(msg)
|
||||
return resp
|
||||
|
||||
headers = CaseInsensitiveDict(resp.headers)
|
||||
if not headers or "date" not in headers:
|
||||
if "etag" not in headers:
|
||||
# Without date or etag, the cached response can never be used
|
||||
# and should be deleted.
|
||||
logger.debug("Purging cached response: no date or etag")
|
||||
self.cache.delete(cache_url)
|
||||
logger.debug("Ignoring cached response: no date")
|
||||
return False
|
||||
|
||||
now = time.time()
|
||||
date = calendar.timegm(parsedate_tz(headers["date"]))
|
||||
current_age = max(0, now - date)
|
||||
logger.debug("Current age based on date: %i", current_age)
|
||||
|
||||
# TODO: There is an assumption that the result will be a
|
||||
# urllib3 response object. This may not be best since we
|
||||
# could probably avoid instantiating or constructing the
|
||||
# response until we know we need it.
|
||||
resp_cc = self.parse_cache_control(headers)
|
||||
|
||||
# determine freshness
|
||||
freshness_lifetime = 0
|
||||
|
||||
# Check the max-age pragma in the cache control header
|
||||
if "max-age" in resp_cc:
|
||||
freshness_lifetime = resp_cc["max-age"]
|
||||
logger.debug("Freshness lifetime from max-age: %i", freshness_lifetime)
|
||||
|
||||
# If there isn't a max-age, check for an expires header
|
||||
elif "expires" in headers:
|
||||
expires = parsedate_tz(headers["expires"])
|
||||
if expires is not None:
|
||||
expire_time = calendar.timegm(expires) - date
|
||||
freshness_lifetime = max(0, expire_time)
|
||||
logger.debug("Freshness lifetime from expires: %i", freshness_lifetime)
|
||||
|
||||
# Determine if we are setting freshness limit in the
|
||||
# request. Note, this overrides what was in the response.
|
||||
if "max-age" in cc:
|
||||
freshness_lifetime = cc["max-age"]
|
||||
logger.debug(
|
||||
"Freshness lifetime from request max-age: %i", freshness_lifetime
|
||||
)
|
||||
|
||||
if "min-fresh" in cc:
|
||||
min_fresh = cc["min-fresh"]
|
||||
# adjust our current age by our min fresh
|
||||
current_age += min_fresh
|
||||
logger.debug("Adjusted current age from min-fresh: %i", current_age)
|
||||
|
||||
# Return entry if it is fresh enough
|
||||
if freshness_lifetime > current_age:
|
||||
logger.debug('The response is "fresh", returning cached response')
|
||||
logger.debug("%i > %i", freshness_lifetime, current_age)
|
||||
return resp
|
||||
|
||||
# we're not fresh. If we don't have an Etag, clear it out
|
||||
if "etag" not in headers:
|
||||
logger.debug('The cached response is "stale" with no etag, purging')
|
||||
self.cache.delete(cache_url)
|
||||
|
||||
# return the original handler
|
||||
return False
|
||||
|
||||
def conditional_headers(self, request):
|
||||
cache_url = self.cache_url(request.url)
|
||||
resp = self.serializer.loads(request, self.cache.get(cache_url))
|
||||
new_headers = {}
|
||||
|
||||
if resp:
|
||||
headers = CaseInsensitiveDict(resp.headers)
|
||||
|
||||
if "etag" in headers:
|
||||
new_headers["If-None-Match"] = headers["ETag"]
|
||||
|
||||
if "last-modified" in headers:
|
||||
new_headers["If-Modified-Since"] = headers["Last-Modified"]
|
||||
|
||||
return new_headers
|
||||
|
||||
def _cache_set(self, cache_url, request, response, body=None, expires_time=None):
|
||||
"""
|
||||
Store the data in the cache.
|
||||
"""
|
||||
if isinstance(self.cache, SeparateBodyBaseCache):
|
||||
# We pass in the body separately; just put a placeholder empty
|
||||
# string in the metadata.
|
||||
self.cache.set(
|
||||
cache_url,
|
||||
self.serializer.dumps(request, response, b""),
|
||||
expires=expires_time,
|
||||
)
|
||||
self.cache.set_body(cache_url, body)
|
||||
else:
|
||||
self.cache.set(
|
||||
cache_url,
|
||||
self.serializer.dumps(request, response, body),
|
||||
expires=expires_time,
|
||||
)
|
||||
|
||||
def cache_response(self, request, response, body=None, status_codes=None):
|
||||
"""
|
||||
Algorithm for caching requests.
|
||||
|
||||
This assumes a requests Response object.
|
||||
"""
|
||||
# From httplib2: Don't cache 206's since we aren't going to
|
||||
# handle byte range requests
|
||||
cacheable_status_codes = status_codes or self.cacheable_status_codes
|
||||
if response.status not in cacheable_status_codes:
|
||||
logger.debug(
|
||||
"Status code %s not in %s", response.status, cacheable_status_codes
|
||||
)
|
||||
return
|
||||
|
||||
response_headers = CaseInsensitiveDict(response.headers)
|
||||
|
||||
if "date" in response_headers:
|
||||
date = calendar.timegm(parsedate_tz(response_headers["date"]))
|
||||
else:
|
||||
date = 0
|
||||
|
||||
# If we've been given a body, our response has a Content-Length, that
|
||||
# Content-Length is valid then we can check to see if the body we've
|
||||
# been given matches the expected size, and if it doesn't we'll just
|
||||
# skip trying to cache it.
|
||||
if (
|
||||
body is not None
|
||||
and "content-length" in response_headers
|
||||
and response_headers["content-length"].isdigit()
|
||||
and int(response_headers["content-length"]) != len(body)
|
||||
):
|
||||
return
|
||||
|
||||
cc_req = self.parse_cache_control(request.headers)
|
||||
cc = self.parse_cache_control(response_headers)
|
||||
|
||||
cache_url = self.cache_url(request.url)
|
||||
logger.debug('Updating cache with response from "%s"', cache_url)
|
||||
|
||||
# Delete it from the cache if we happen to have it stored there
|
||||
no_store = False
|
||||
if "no-store" in cc:
|
||||
no_store = True
|
||||
logger.debug('Response header has "no-store"')
|
||||
if "no-store" in cc_req:
|
||||
no_store = True
|
||||
logger.debug('Request header has "no-store"')
|
||||
if no_store and self.cache.get(cache_url):
|
||||
logger.debug('Purging existing cache entry to honor "no-store"')
|
||||
self.cache.delete(cache_url)
|
||||
if no_store:
|
||||
return
|
||||
|
||||
# https://tools.ietf.org/html/rfc7234#section-4.1:
|
||||
# A Vary header field-value of "*" always fails to match.
|
||||
# Storing such a response leads to a deserialization warning
|
||||
# during cache lookup and is not allowed to ever be served,
|
||||
# so storing it can be avoided.
|
||||
if "*" in response_headers.get("vary", ""):
|
||||
logger.debug('Response header has "Vary: *"')
|
||||
return
|
||||
|
||||
# If we've been given an etag, then keep the response
|
||||
if self.cache_etags and "etag" in response_headers:
|
||||
expires_time = 0
|
||||
if response_headers.get("expires"):
|
||||
expires = parsedate_tz(response_headers["expires"])
|
||||
if expires is not None:
|
||||
expires_time = calendar.timegm(expires) - date
|
||||
|
||||
expires_time = max(expires_time, 14 * 86400)
|
||||
|
||||
logger.debug("etag object cached for {0} seconds".format(expires_time))
|
||||
logger.debug("Caching due to etag")
|
||||
self._cache_set(cache_url, request, response, body, expires_time)
|
||||
|
||||
# Add to the cache any permanent redirects. We do this before looking
|
||||
# that the Date headers.
|
||||
elif int(response.status) in PERMANENT_REDIRECT_STATUSES:
|
||||
logger.debug("Caching permanent redirect")
|
||||
self._cache_set(cache_url, request, response, b"")
|
||||
|
||||
# Add to the cache if the response headers demand it. If there
|
||||
# is no date header then we can't do anything about expiring
|
||||
# the cache.
|
||||
elif "date" in response_headers:
|
||||
date = calendar.timegm(parsedate_tz(response_headers["date"]))
|
||||
# cache when there is a max-age > 0
|
||||
if "max-age" in cc and cc["max-age"] > 0:
|
||||
logger.debug("Caching b/c date exists and max-age > 0")
|
||||
expires_time = cc["max-age"]
|
||||
self._cache_set(
|
||||
cache_url,
|
||||
request,
|
||||
response,
|
||||
body,
|
||||
expires_time,
|
||||
)
|
||||
|
||||
# If the request can expire, it means we should cache it
|
||||
# in the meantime.
|
||||
elif "expires" in response_headers:
|
||||
if response_headers["expires"]:
|
||||
expires = parsedate_tz(response_headers["expires"])
|
||||
if expires is not None:
|
||||
expires_time = calendar.timegm(expires) - date
|
||||
else:
|
||||
expires_time = None
|
||||
|
||||
logger.debug(
|
||||
"Caching b/c of expires header. expires in {0} seconds".format(
|
||||
expires_time
|
||||
)
|
||||
)
|
||||
self._cache_set(
|
||||
cache_url,
|
||||
request,
|
||||
response,
|
||||
body,
|
||||
expires_time,
|
||||
)
|
||||
|
||||
def update_cached_response(self, request, response):
|
||||
"""On a 304 we will get a new set of headers that we want to
|
||||
update our cached value with, assuming we have one.
|
||||
|
||||
This should only ever be called when we've sent an ETag and
|
||||
gotten a 304 as the response.
|
||||
"""
|
||||
cache_url = self.cache_url(request.url)
|
||||
|
||||
cached_response = self.serializer.loads(request, self.cache.get(cache_url))
|
||||
|
||||
if not cached_response:
|
||||
# we didn't have a cached response
|
||||
return response
|
||||
|
||||
# Lets update our headers with the headers from the new request:
|
||||
# http://tools.ietf.org/html/draft-ietf-httpbis-p4-conditional-26#section-4.1
|
||||
#
|
||||
# The server isn't supposed to send headers that would make
|
||||
# the cached body invalid. But... just in case, we'll be sure
|
||||
# to strip out ones we know that might be problmatic due to
|
||||
# typical assumptions.
|
||||
excluded_headers = ["content-length"]
|
||||
|
||||
cached_response.headers.update(
|
||||
dict(
|
||||
(k, v)
|
||||
for k, v in response.headers.items()
|
||||
if k.lower() not in excluded_headers
|
||||
)
|
||||
)
|
||||
|
||||
# we want a 200 b/c we have content via the cache
|
||||
cached_response.status = 200
|
||||
|
||||
# update our cache
|
||||
self._cache_set(cache_url, request, cached_response)
|
||||
|
||||
return cached_response
|
|
@ -0,0 +1,111 @@
|
|||
# SPDX-FileCopyrightText: 2015 Eric Larson
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from tempfile import NamedTemporaryFile
|
||||
import mmap
|
||||
|
||||
|
||||
class CallbackFileWrapper(object):
|
||||
"""
|
||||
Small wrapper around a fp object which will tee everything read into a
|
||||
buffer, and when that file is closed it will execute a callback with the
|
||||
contents of that buffer.
|
||||
|
||||
All attributes are proxied to the underlying file object.
|
||||
|
||||
This class uses members with a double underscore (__) leading prefix so as
|
||||
not to accidentally shadow an attribute.
|
||||
|
||||
The data is stored in a temporary file until it is all available. As long
|
||||
as the temporary files directory is disk-based (sometimes it's a
|
||||
memory-backed-``tmpfs`` on Linux), data will be unloaded to disk if memory
|
||||
pressure is high. For small files the disk usually won't be used at all,
|
||||
it'll all be in the filesystem memory cache, so there should be no
|
||||
performance impact.
|
||||
"""
|
||||
|
||||
def __init__(self, fp, callback):
|
||||
self.__buf = NamedTemporaryFile("rb+", delete=True)
|
||||
self.__fp = fp
|
||||
self.__callback = callback
|
||||
|
||||
def __getattr__(self, name):
|
||||
# The vaguaries of garbage collection means that self.__fp is
|
||||
# not always set. By using __getattribute__ and the private
|
||||
# name[0] allows looking up the attribute value and raising an
|
||||
# AttributeError when it doesn't exist. This stop thigns from
|
||||
# infinitely recursing calls to getattr in the case where
|
||||
# self.__fp hasn't been set.
|
||||
#
|
||||
# [0] https://docs.python.org/2/reference/expressions.html#atom-identifiers
|
||||
fp = self.__getattribute__("_CallbackFileWrapper__fp")
|
||||
return getattr(fp, name)
|
||||
|
||||
def __is_fp_closed(self):
|
||||
try:
|
||||
return self.__fp.fp is None
|
||||
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
try:
|
||||
return self.__fp.closed
|
||||
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
# We just don't cache it then.
|
||||
# TODO: Add some logging here...
|
||||
return False
|
||||
|
||||
def _close(self):
|
||||
if self.__callback:
|
||||
if self.__buf.tell() == 0:
|
||||
# Empty file:
|
||||
result = b""
|
||||
else:
|
||||
# Return the data without actually loading it into memory,
|
||||
# relying on Python's buffer API and mmap(). mmap() just gives
|
||||
# a view directly into the filesystem's memory cache, so it
|
||||
# doesn't result in duplicate memory use.
|
||||
self.__buf.seek(0, 0)
|
||||
result = memoryview(
|
||||
mmap.mmap(self.__buf.fileno(), 0, access=mmap.ACCESS_READ)
|
||||
)
|
||||
self.__callback(result)
|
||||
|
||||
# We assign this to None here, because otherwise we can get into
|
||||
# really tricky problems where the CPython interpreter dead locks
|
||||
# because the callback is holding a reference to something which
|
||||
# has a __del__ method. Setting this to None breaks the cycle
|
||||
# and allows the garbage collector to do it's thing normally.
|
||||
self.__callback = None
|
||||
|
||||
# Closing the temporary file releases memory and frees disk space.
|
||||
# Important when caching big files.
|
||||
self.__buf.close()
|
||||
|
||||
def read(self, amt=None):
|
||||
data = self.__fp.read(amt)
|
||||
if data:
|
||||
# We may be dealing with b'', a sign that things are over:
|
||||
# it's passed e.g. after we've already closed self.__buf.
|
||||
self.__buf.write(data)
|
||||
if self.__is_fp_closed():
|
||||
self._close()
|
||||
|
||||
return data
|
||||
|
||||
def _safe_read(self, amt):
|
||||
data = self.__fp._safe_read(amt)
|
||||
if amt == 2 and data == b"\r\n":
|
||||
# urllib executes this read to toss the CRLF at the end
|
||||
# of the chunk.
|
||||
return data
|
||||
|
||||
self.__buf.write(data)
|
||||
if self.__is_fp_closed():
|
||||
self._close()
|
||||
|
||||
return data
|
|
@ -0,0 +1,139 @@
|
|||
# SPDX-FileCopyrightText: 2015 Eric Larson
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import calendar
|
||||
import time
|
||||
|
||||
from email.utils import formatdate, parsedate, parsedate_tz
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
TIME_FMT = "%a, %d %b %Y %H:%M:%S GMT"
|
||||
|
||||
|
||||
def expire_after(delta, date=None):
|
||||
date = date or datetime.utcnow()
|
||||
return date + delta
|
||||
|
||||
|
||||
def datetime_to_header(dt):
|
||||
return formatdate(calendar.timegm(dt.timetuple()))
|
||||
|
||||
|
||||
class BaseHeuristic(object):
|
||||
|
||||
def warning(self, response):
|
||||
"""
|
||||
Return a valid 1xx warning header value describing the cache
|
||||
adjustments.
|
||||
|
||||
The response is provided too allow warnings like 113
|
||||
http://tools.ietf.org/html/rfc7234#section-5.5.4 where we need
|
||||
to explicitly say response is over 24 hours old.
|
||||
"""
|
||||
return '110 - "Response is Stale"'
|
||||
|
||||
def update_headers(self, response):
|
||||
"""Update the response headers with any new headers.
|
||||
|
||||
NOTE: This SHOULD always include some Warning header to
|
||||
signify that the response was cached by the client, not
|
||||
by way of the provided headers.
|
||||
"""
|
||||
return {}
|
||||
|
||||
def apply(self, response):
|
||||
updated_headers = self.update_headers(response)
|
||||
|
||||
if updated_headers:
|
||||
response.headers.update(updated_headers)
|
||||
warning_header_value = self.warning(response)
|
||||
if warning_header_value is not None:
|
||||
response.headers.update({"Warning": warning_header_value})
|
||||
|
||||
return response
|
||||
|
||||
|
||||
class OneDayCache(BaseHeuristic):
|
||||
"""
|
||||
Cache the response by providing an expires 1 day in the
|
||||
future.
|
||||
"""
|
||||
|
||||
def update_headers(self, response):
|
||||
headers = {}
|
||||
|
||||
if "expires" not in response.headers:
|
||||
date = parsedate(response.headers["date"])
|
||||
expires = expire_after(timedelta(days=1), date=datetime(*date[:6]))
|
||||
headers["expires"] = datetime_to_header(expires)
|
||||
headers["cache-control"] = "public"
|
||||
return headers
|
||||
|
||||
|
||||
class ExpiresAfter(BaseHeuristic):
|
||||
"""
|
||||
Cache **all** requests for a defined time period.
|
||||
"""
|
||||
|
||||
def __init__(self, **kw):
|
||||
self.delta = timedelta(**kw)
|
||||
|
||||
def update_headers(self, response):
|
||||
expires = expire_after(self.delta)
|
||||
return {"expires": datetime_to_header(expires), "cache-control": "public"}
|
||||
|
||||
def warning(self, response):
|
||||
tmpl = "110 - Automatically cached for %s. Response might be stale"
|
||||
return tmpl % self.delta
|
||||
|
||||
|
||||
class LastModified(BaseHeuristic):
|
||||
"""
|
||||
If there is no Expires header already, fall back on Last-Modified
|
||||
using the heuristic from
|
||||
http://tools.ietf.org/html/rfc7234#section-4.2.2
|
||||
to calculate a reasonable value.
|
||||
|
||||
Firefox also does something like this per
|
||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Caching_FAQ
|
||||
http://lxr.mozilla.org/mozilla-release/source/netwerk/protocol/http/nsHttpResponseHead.cpp#397
|
||||
Unlike mozilla we limit this to 24-hr.
|
||||
"""
|
||||
cacheable_by_default_statuses = {
|
||||
200, 203, 204, 206, 300, 301, 404, 405, 410, 414, 501
|
||||
}
|
||||
|
||||
def update_headers(self, resp):
|
||||
headers = resp.headers
|
||||
|
||||
if "expires" in headers:
|
||||
return {}
|
||||
|
||||
if "cache-control" in headers and headers["cache-control"] != "public":
|
||||
return {}
|
||||
|
||||
if resp.status not in self.cacheable_by_default_statuses:
|
||||
return {}
|
||||
|
||||
if "date" not in headers or "last-modified" not in headers:
|
||||
return {}
|
||||
|
||||
date = calendar.timegm(parsedate_tz(headers["date"]))
|
||||
last_modified = parsedate(headers["last-modified"])
|
||||
if date is None or last_modified is None:
|
||||
return {}
|
||||
|
||||
now = time.time()
|
||||
current_age = max(0, now - date)
|
||||
delta = date - calendar.timegm(last_modified)
|
||||
freshness_lifetime = max(0, min(delta / 10, 24 * 3600))
|
||||
if freshness_lifetime <= current_age:
|
||||
return {}
|
||||
|
||||
expires = date + freshness_lifetime
|
||||
return {"expires": time.strftime(TIME_FMT, time.gmtime(expires))}
|
||||
|
||||
def warning(self, resp):
|
||||
return None
|
|
@ -0,0 +1,190 @@
|
|||
# SPDX-FileCopyrightText: 2015 Eric Larson
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import base64
|
||||
import io
|
||||
import json
|
||||
import zlib
|
||||
|
||||
from pip._vendor import msgpack
|
||||
from pip._vendor.requests.structures import CaseInsensitiveDict
|
||||
|
||||
from .compat import HTTPResponse, pickle, text_type
|
||||
|
||||
|
||||
def _b64_decode_bytes(b):
|
||||
return base64.b64decode(b.encode("ascii"))
|
||||
|
||||
|
||||
def _b64_decode_str(s):
|
||||
return _b64_decode_bytes(s).decode("utf8")
|
||||
|
||||
|
||||
_default_body_read = object()
|
||||
|
||||
|
||||
class Serializer(object):
|
||||
def dumps(self, request, response, body=None):
|
||||
response_headers = CaseInsensitiveDict(response.headers)
|
||||
|
||||
if body is None:
|
||||
# When a body isn't passed in, we'll read the response. We
|
||||
# also update the response with a new file handler to be
|
||||
# sure it acts as though it was never read.
|
||||
body = response.read(decode_content=False)
|
||||
response._fp = io.BytesIO(body)
|
||||
|
||||
# NOTE: This is all a bit weird, but it's really important that on
|
||||
# Python 2.x these objects are unicode and not str, even when
|
||||
# they contain only ascii. The problem here is that msgpack
|
||||
# understands the difference between unicode and bytes and we
|
||||
# have it set to differentiate between them, however Python 2
|
||||
# doesn't know the difference. Forcing these to unicode will be
|
||||
# enough to have msgpack know the difference.
|
||||
data = {
|
||||
u"response": {
|
||||
u"body": body, # Empty bytestring if body is stored separately
|
||||
u"headers": dict(
|
||||
(text_type(k), text_type(v)) for k, v in response.headers.items()
|
||||
),
|
||||
u"status": response.status,
|
||||
u"version": response.version,
|
||||
u"reason": text_type(response.reason),
|
||||
u"strict": response.strict,
|
||||
u"decode_content": response.decode_content,
|
||||
}
|
||||
}
|
||||
|
||||
# Construct our vary headers
|
||||
data[u"vary"] = {}
|
||||
if u"vary" in response_headers:
|
||||
varied_headers = response_headers[u"vary"].split(",")
|
||||
for header in varied_headers:
|
||||
header = text_type(header).strip()
|
||||
header_value = request.headers.get(header, None)
|
||||
if header_value is not None:
|
||||
header_value = text_type(header_value)
|
||||
data[u"vary"][header] = header_value
|
||||
|
||||
return b",".join([b"cc=4", msgpack.dumps(data, use_bin_type=True)])
|
||||
|
||||
def loads(self, request, data, body_file=None):
|
||||
# Short circuit if we've been given an empty set of data
|
||||
if not data:
|
||||
return
|
||||
|
||||
# Determine what version of the serializer the data was serialized
|
||||
# with
|
||||
try:
|
||||
ver, data = data.split(b",", 1)
|
||||
except ValueError:
|
||||
ver = b"cc=0"
|
||||
|
||||
# Make sure that our "ver" is actually a version and isn't a false
|
||||
# positive from a , being in the data stream.
|
||||
if ver[:3] != b"cc=":
|
||||
data = ver + data
|
||||
ver = b"cc=0"
|
||||
|
||||
# Get the version number out of the cc=N
|
||||
ver = ver.split(b"=", 1)[-1].decode("ascii")
|
||||
|
||||
# Dispatch to the actual load method for the given version
|
||||
try:
|
||||
return getattr(self, "_loads_v{}".format(ver))(request, data, body_file)
|
||||
|
||||
except AttributeError:
|
||||
# This is a version we don't have a loads function for, so we'll
|
||||
# just treat it as a miss and return None
|
||||
return
|
||||
|
||||
def prepare_response(self, request, cached, body_file=None):
|
||||
"""Verify our vary headers match and construct a real urllib3
|
||||
HTTPResponse object.
|
||||
"""
|
||||
# Special case the '*' Vary value as it means we cannot actually
|
||||
# determine if the cached response is suitable for this request.
|
||||
# This case is also handled in the controller code when creating
|
||||
# a cache entry, but is left here for backwards compatibility.
|
||||
if "*" in cached.get("vary", {}):
|
||||
return
|
||||
|
||||
# Ensure that the Vary headers for the cached response match our
|
||||
# request
|
||||
for header, value in cached.get("vary", {}).items():
|
||||
if request.headers.get(header, None) != value:
|
||||
return
|
||||
|
||||
body_raw = cached["response"].pop("body")
|
||||
|
||||
headers = CaseInsensitiveDict(data=cached["response"]["headers"])
|
||||
if headers.get("transfer-encoding", "") == "chunked":
|
||||
headers.pop("transfer-encoding")
|
||||
|
||||
cached["response"]["headers"] = headers
|
||||
|
||||
try:
|
||||
if body_file is None:
|
||||
body = io.BytesIO(body_raw)
|
||||
else:
|
||||
body = body_file
|
||||
except TypeError:
|
||||
# This can happen if cachecontrol serialized to v1 format (pickle)
|
||||
# using Python 2. A Python 2 str(byte string) will be unpickled as
|
||||
# a Python 3 str (unicode string), which will cause the above to
|
||||
# fail with:
|
||||
#
|
||||
# TypeError: 'str' does not support the buffer interface
|
||||
body = io.BytesIO(body_raw.encode("utf8"))
|
||||
|
||||
return HTTPResponse(body=body, preload_content=False, **cached["response"])
|
||||
|
||||
def _loads_v0(self, request, data, body_file=None):
|
||||
# The original legacy cache data. This doesn't contain enough
|
||||
# information to construct everything we need, so we'll treat this as
|
||||
# a miss.
|
||||
return
|
||||
|
||||
def _loads_v1(self, request, data, body_file=None):
|
||||
try:
|
||||
cached = pickle.loads(data)
|
||||
except ValueError:
|
||||
return
|
||||
|
||||
return self.prepare_response(request, cached, body_file)
|
||||
|
||||
def _loads_v2(self, request, data, body_file=None):
|
||||
assert body_file is None
|
||||
try:
|
||||
cached = json.loads(zlib.decompress(data).decode("utf8"))
|
||||
except (ValueError, zlib.error):
|
||||
return
|
||||
|
||||
# We need to decode the items that we've base64 encoded
|
||||
cached["response"]["body"] = _b64_decode_bytes(cached["response"]["body"])
|
||||
cached["response"]["headers"] = dict(
|
||||
(_b64_decode_str(k), _b64_decode_str(v))
|
||||
for k, v in cached["response"]["headers"].items()
|
||||
)
|
||||
cached["response"]["reason"] = _b64_decode_str(cached["response"]["reason"])
|
||||
cached["vary"] = dict(
|
||||
(_b64_decode_str(k), _b64_decode_str(v) if v is not None else v)
|
||||
for k, v in cached["vary"].items()
|
||||
)
|
||||
|
||||
return self.prepare_response(request, cached, body_file)
|
||||
|
||||
def _loads_v3(self, request, data, body_file):
|
||||
# Due to Python 2 encoding issues, it's impossible to know for sure
|
||||
# exactly how to load v3 entries, thus we'll treat these as a miss so
|
||||
# that they get rewritten out as v4 entries.
|
||||
return
|
||||
|
||||
def _loads_v4(self, request, data, body_file=None):
|
||||
try:
|
||||
cached = msgpack.loads(data, raw=False)
|
||||
except ValueError:
|
||||
return
|
||||
|
||||
return self.prepare_response(request, cached, body_file)
|
|
@ -0,0 +1,33 @@
|
|||
# SPDX-FileCopyrightText: 2015 Eric Larson
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from .adapter import CacheControlAdapter
|
||||
from .cache import DictCache
|
||||
|
||||
|
||||
def CacheControl(
|
||||
sess,
|
||||
cache=None,
|
||||
cache_etags=True,
|
||||
serializer=None,
|
||||
heuristic=None,
|
||||
controller_class=None,
|
||||
adapter_class=None,
|
||||
cacheable_methods=None,
|
||||
):
|
||||
|
||||
cache = DictCache() if cache is None else cache
|
||||
adapter_class = adapter_class or CacheControlAdapter
|
||||
adapter = adapter_class(
|
||||
cache,
|
||||
cache_etags=cache_etags,
|
||||
serializer=serializer,
|
||||
heuristic=heuristic,
|
||||
controller_class=controller_class,
|
||||
cacheable_methods=cacheable_methods,
|
||||
)
|
||||
sess.mount("http://", adapter)
|
||||
sess.mount("https://", adapter)
|
||||
|
||||
return sess
|
Loading…
Add table
Add a link
Reference in a new issue