2023-01-12 01:04:47 +00:00
|
|
|
# SPDX-FileCopyrightText: 2015 Eric Larson
|
|
|
|
#
|
|
|
|
# SPDX-License-Identifier: Apache-2.0
|
2023-09-17 19:22:54 +00:00
|
|
|
from __future__ import annotations
|
|
|
|
|
2023-01-06 11:47:44 +00:00
|
|
|
import gc
|
2023-01-12 01:04:47 +00:00
|
|
|
import hashlib
|
|
|
|
import os
|
|
|
|
from textwrap import dedent
|
2023-09-17 19:22:54 +00:00
|
|
|
from typing import IO, TYPE_CHECKING, Union
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
from cachecontrol.cache import BaseCache, SeparateBodyBaseCache
|
|
|
|
from cachecontrol.controller import CacheController
|
2023-01-12 01:04:47 +00:00
|
|
|
|
2023-09-17 19:22:54 +00:00
|
|
|
if TYPE_CHECKING:
|
|
|
|
from datetime import datetime
|
2023-01-12 01:04:47 +00:00
|
|
|
|
2023-09-17 19:22:54 +00:00
|
|
|
from filelock import BaseFileLock
|
2023-01-12 01:04:47 +00:00
|
|
|
|
|
|
|
|
2023-09-17 19:22:54 +00:00
|
|
|
def _secure_open_write(filename: str, fmode: int) -> IO[bytes]:
|
2023-01-12 01:04:47 +00:00
|
|
|
# We only want to write to this file, so open it in write only mode
|
|
|
|
flags = os.O_WRONLY
|
|
|
|
|
|
|
|
# os.O_CREAT | os.O_EXCL will fail if the file already exists, so we only
|
|
|
|
# will open *new* files.
|
|
|
|
# We specify this because we want to ensure that the mode we pass is the
|
|
|
|
# mode of the file.
|
|
|
|
flags |= os.O_CREAT | os.O_EXCL
|
|
|
|
|
|
|
|
# Do not follow symlinks to prevent someone from making a symlink that
|
|
|
|
# we follow and insecurely open a cache file.
|
|
|
|
if hasattr(os, "O_NOFOLLOW"):
|
|
|
|
flags |= os.O_NOFOLLOW
|
|
|
|
|
|
|
|
# On Windows we'll mark this file as binary
|
|
|
|
if hasattr(os, "O_BINARY"):
|
|
|
|
flags |= os.O_BINARY
|
|
|
|
|
|
|
|
# Before we open our file, we want to delete any existing file that is
|
|
|
|
# there
|
|
|
|
try:
|
|
|
|
os.remove(filename)
|
2023-01-06 11:47:44 +00:00
|
|
|
gc.collect(2)
|
2023-09-17 19:22:54 +00:00
|
|
|
except OSError:
|
2023-01-12 01:04:47 +00:00
|
|
|
# The file must not exist already, so we can just skip ahead to opening
|
|
|
|
pass
|
|
|
|
|
|
|
|
# Open our file, the use of os.O_CREAT | os.O_EXCL will ensure that if a
|
|
|
|
# race condition happens between the os.remove and this line, that an
|
|
|
|
# error will be raised. Because we utilize a lockfile this should only
|
|
|
|
# happen if someone is attempting to attack us.
|
|
|
|
fd = os.open(filename, flags, fmode)
|
|
|
|
try:
|
|
|
|
return os.fdopen(fd, "wb")
|
|
|
|
|
|
|
|
except:
|
|
|
|
# An error occurred wrapping our FD in a file object
|
|
|
|
os.close(fd)
|
|
|
|
raise
|
|
|
|
|
|
|
|
|
2023-01-06 11:47:44 +00:00
|
|
|
class _FileCacheMixin:
|
|
|
|
"""Shared implementation for both FileCache variants."""
|
2023-01-12 01:04:47 +00:00
|
|
|
|
|
|
|
def __init__(
|
|
|
|
self,
|
2023-09-17 19:22:54 +00:00
|
|
|
directory: Union[str, Path],
|
|
|
|
forever: bool = False,
|
|
|
|
filemode: int = 0o0600,
|
|
|
|
dirmode: int = 0o0700,
|
|
|
|
lock_class: type[BaseFileLock] | None = None,
|
|
|
|
) -> None:
|
2023-01-12 01:04:47 +00:00
|
|
|
try:
|
2023-01-06 11:47:44 +00:00
|
|
|
if lock_class is None:
|
|
|
|
from filelock import FileLock
|
2023-09-17 19:22:54 +00:00
|
|
|
|
2023-01-06 11:47:44 +00:00
|
|
|
lock_class = FileLock
|
2023-01-12 01:04:47 +00:00
|
|
|
except ImportError:
|
|
|
|
notice = dedent(
|
|
|
|
"""
|
|
|
|
NOTE: In order to use the FileCache you must have
|
2023-01-06 11:47:44 +00:00
|
|
|
filelock installed. You can install it via pip:
|
2023-09-17 19:22:54 +00:00
|
|
|
pip install cachecontrol[filecache]
|
2023-01-12 01:04:47 +00:00
|
|
|
"""
|
|
|
|
)
|
|
|
|
raise ImportError(notice)
|
|
|
|
|
|
|
|
self.directory = directory
|
|
|
|
self.forever = forever
|
|
|
|
self.filemode = filemode
|
|
|
|
self.dirmode = dirmode
|
|
|
|
self.lock_class = lock_class
|
|
|
|
|
|
|
|
@staticmethod
|
2023-09-17 19:22:54 +00:00
|
|
|
def encode(x: str) -> str:
|
2023-01-12 01:04:47 +00:00
|
|
|
return hashlib.sha224(x.encode()).hexdigest()
|
|
|
|
|
2023-09-17 19:22:54 +00:00
|
|
|
def _fn(self, name: str) -> str:
|
2023-01-12 01:04:47 +00:00
|
|
|
# NOTE: This method should not change as some may depend on it.
|
|
|
|
# See: https://github.com/ionrock/cachecontrol/issues/63
|
|
|
|
hashed = self.encode(name)
|
|
|
|
parts = list(hashed[:5]) + [hashed]
|
|
|
|
return os.path.join(self.directory, *parts)
|
|
|
|
|
2023-09-17 19:22:54 +00:00
|
|
|
def get(self, key: str) -> bytes | None:
|
2023-01-12 01:04:47 +00:00
|
|
|
name = self._fn(key)
|
|
|
|
try:
|
|
|
|
with open(name, "rb") as fh:
|
|
|
|
return fh.read()
|
|
|
|
|
|
|
|
except FileNotFoundError:
|
|
|
|
return None
|
|
|
|
|
2023-09-17 19:22:54 +00:00
|
|
|
def set(
|
|
|
|
self, key: str, value: bytes, expires: int | datetime | None = None
|
|
|
|
) -> None:
|
2023-01-12 01:04:47 +00:00
|
|
|
name = self._fn(key)
|
2023-01-06 11:47:44 +00:00
|
|
|
self._write(name, value)
|
2023-01-12 01:04:47 +00:00
|
|
|
|
2023-09-17 19:22:54 +00:00
|
|
|
def _write(self, path: str, data: bytes) -> None:
|
2023-01-06 11:47:44 +00:00
|
|
|
"""
|
|
|
|
Safely write the data to the given path.
|
|
|
|
"""
|
2023-01-12 01:04:47 +00:00
|
|
|
# Make sure the directory exists
|
|
|
|
try:
|
2023-01-06 11:47:44 +00:00
|
|
|
os.makedirs(os.path.dirname(path), self.dirmode)
|
2023-09-17 19:22:54 +00:00
|
|
|
except OSError:
|
2023-01-12 01:04:47 +00:00
|
|
|
pass
|
|
|
|
|
2023-01-06 11:47:44 +00:00
|
|
|
with self.lock_class(path + ".lock"):
|
2023-01-12 01:04:47 +00:00
|
|
|
# Write our actual file
|
2023-01-06 11:47:44 +00:00
|
|
|
with _secure_open_write(path, self.filemode) as fh:
|
|
|
|
fh.write(data)
|
2023-01-12 01:04:47 +00:00
|
|
|
|
2023-09-17 19:22:54 +00:00
|
|
|
def _delete(self, key: str, suffix: str) -> None:
|
2023-01-06 11:47:44 +00:00
|
|
|
name = self._fn(key) + suffix
|
2023-01-12 01:04:47 +00:00
|
|
|
if not self.forever:
|
|
|
|
try:
|
|
|
|
os.remove(name)
|
|
|
|
except FileNotFoundError:
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
2023-01-06 11:47:44 +00:00
|
|
|
class FileCache(_FileCacheMixin, BaseCache):
|
|
|
|
"""
|
|
|
|
Traditional FileCache: body is stored in memory, so not suitable for large
|
|
|
|
downloads.
|
|
|
|
"""
|
|
|
|
|
2023-09-17 19:22:54 +00:00
|
|
|
def delete(self, key: str) -> None:
|
2023-01-06 11:47:44 +00:00
|
|
|
self._delete(key, "")
|
|
|
|
|
|
|
|
|
|
|
|
class SeparateBodyFileCache(_FileCacheMixin, SeparateBodyBaseCache):
|
|
|
|
"""
|
|
|
|
Memory-efficient FileCache: body is stored in a separate file, reducing
|
|
|
|
peak memory usage.
|
|
|
|
"""
|
|
|
|
|
2023-09-17 19:22:54 +00:00
|
|
|
def get_body(self, key: str) -> IO[bytes] | None:
|
2023-01-06 11:47:44 +00:00
|
|
|
name = self._fn(key) + ".body"
|
|
|
|
try:
|
|
|
|
return open(name, "rb")
|
|
|
|
except FileNotFoundError:
|
|
|
|
return None
|
|
|
|
|
2023-09-17 19:22:54 +00:00
|
|
|
def set_body(self, key: str, body: bytes) -> None:
|
2023-01-06 11:47:44 +00:00
|
|
|
name = self._fn(key) + ".body"
|
|
|
|
self._write(name, body)
|
|
|
|
|
2023-09-17 19:22:54 +00:00
|
|
|
def delete(self, key: str) -> None:
|
2023-01-06 11:47:44 +00:00
|
|
|
self._delete(key, "")
|
|
|
|
self._delete(key, ".body")
|
|
|
|
|
|
|
|
|
2023-09-17 19:22:54 +00:00
|
|
|
def url_to_file_path(url: str, filecache: FileCache) -> str:
|
2023-01-12 01:04:47 +00:00
|
|
|
"""Return the file cache path based on the URL.
|
|
|
|
|
|
|
|
This does not ensure the file exists!
|
|
|
|
"""
|
|
|
|
key = CacheController.cache_url(url)
|
|
|
|
return filecache._fn(key)
|