diff --git a/CHANGES.md b/CHANGES.md index 0e2a4a63..ac8d68af 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,4 +1,15 @@ -### 3.26.1 (2023-02-08 20:50:00 UTC) +### 3.27.0 (202x-xx-xx xx:xx:00 UTC) + +* Update Cachecontrol 0.12.6 (167a605) to 0.12.11 (c05ef9e) +* Add Filelock 3.9.0 (ce3e891) +* Remove Lockfile no longer used by Cachecontrol + + +[develop changelog] + + + +### 3.26.1 (2023-02-08 20:50:00 UTC) * Change forced show updates process during startup to prevent webUI blocking * Change allow Python 3.11.2, 3.10.10 diff --git a/lib/cachecontrol/__init__.py b/lib/cachecontrol/__init__.py index 6ba19d3a..f631ae6d 100644 --- a/lib/cachecontrol/__init__.py +++ b/lib/cachecontrol/__init__.py @@ -8,7 +8,7 @@ Make it easy to import from cachecontrol without long namespaces. """ __author__ = "Eric Larson" __email__ = "eric@ionrock.org" -__version__ = "0.12.6" +__version__ = "0.12.11" from .wrapper import CacheControl from .adapter import CacheControlAdapter diff --git a/lib/cachecontrol/cache.py b/lib/cachecontrol/cache.py index 44e4309d..2a965f59 100644 --- a/lib/cachecontrol/cache.py +++ b/lib/cachecontrol/cache.py @@ -41,3 +41,25 @@ class DictCache(BaseCache): with self.lock: if key in self.data: self.data.pop(key) + + +class SeparateBodyBaseCache(BaseCache): + """ + In this variant, the body is not stored mixed in with the metadata, but is + passed in (as a bytes-like object) in a separate call to ``set_body()``. + + That is, the expected interaction pattern is:: + + cache.set(key, serialized_metadata) + cache.set_body(key) + + Similarly, the body should be loaded separately via ``get_body()``. + """ + def set_body(self, key, body): + raise NotImplementedError() + + def get_body(self, key): + """ + Return the body as file-like object. + """ + raise NotImplementedError() diff --git a/lib/cachecontrol/caches/__init__.py b/lib/cachecontrol/caches/__init__.py index 44becd68..37827291 100644 --- a/lib/cachecontrol/caches/__init__.py +++ b/lib/cachecontrol/caches/__init__.py @@ -2,5 +2,8 @@ # # SPDX-License-Identifier: Apache-2.0 -from .file_cache import FileCache # noqa -from .redis_cache import RedisCache # noqa +from .file_cache import FileCache, SeparateBodyFileCache +from .redis_cache import RedisCache + + +__all__ = ["FileCache", "SeparateBodyFileCache", "RedisCache"] diff --git a/lib/cachecontrol/caches/file_cache.py b/lib/cachecontrol/caches/file_cache.py index 6cd1106f..f5cc2c8f 100644 --- a/lib/cachecontrol/caches/file_cache.py +++ b/lib/cachecontrol/caches/file_cache.py @@ -1,12 +1,12 @@ # SPDX-FileCopyrightText: 2015 Eric Larson # # SPDX-License-Identifier: Apache-2.0 - +import gc import hashlib import os from textwrap import dedent -from ..cache import BaseCache +from ..cache import BaseCache, SeparateBodyBaseCache from ..controller import CacheController try: @@ -39,6 +39,7 @@ def _secure_open_write(filename, fmode): # there try: os.remove(filename) + gc.collect(2) except (IOError, OSError): # The file must not exist already, so we can just skip ahead to opening pass @@ -57,7 +58,8 @@ def _secure_open_write(filename, fmode): raise -class FileCache(BaseCache): +class _FileCacheMixin: + """Shared implementation for both FileCache variants.""" def __init__( self, @@ -65,33 +67,23 @@ class FileCache(BaseCache): forever=False, filemode=0o0600, dirmode=0o0700, - use_dir_lock=None, lock_class=None, ): - if use_dir_lock is not None and lock_class is not None: - raise ValueError("Cannot use use_dir_lock and lock_class together") - try: - from lockfile import LockFile - from lockfile.mkdirlockfile import MkdirLockFile + if lock_class is None: + from filelock import FileLock + lock_class = FileLock except ImportError: notice = dedent( """ NOTE: In order to use the FileCache you must have - lockfile installed. You can install it via pip: - pip install lockfile + filelock installed. You can install it via pip: + pip install filelock """ ) raise ImportError(notice) - else: - if use_dir_lock: - lock_class = MkdirLockFile - - elif lock_class is None: - lock_class = LockFile - self.directory = directory self.forever = forever self.filemode = filemode @@ -120,20 +112,25 @@ class FileCache(BaseCache): def set(self, key, value, expires=None): name = self._fn(key) + self._write(name, value) + def _write(self, path, data: bytes): + """ + Safely write the data to the given path. + """ # Make sure the directory exists try: - os.makedirs(os.path.dirname(name), self.dirmode) + os.makedirs(os.path.dirname(path), self.dirmode) except (IOError, OSError): pass - with self.lock_class(name) as lock: + with self.lock_class(path + ".lock"): # Write our actual file - with _secure_open_write(lock.path, self.filemode) as fh: - fh.write(value) + with _secure_open_write(path, self.filemode) as fh: + fh.write(data) - def delete(self, key): - name = self._fn(key) + def _delete(self, key, suffix): + name = self._fn(key) + suffix if not self.forever: try: os.remove(name) @@ -141,6 +138,38 @@ class FileCache(BaseCache): pass +class FileCache(_FileCacheMixin, BaseCache): + """ + Traditional FileCache: body is stored in memory, so not suitable for large + downloads. + """ + + def delete(self, key): + self._delete(key, "") + + +class SeparateBodyFileCache(_FileCacheMixin, SeparateBodyBaseCache): + """ + Memory-efficient FileCache: body is stored in a separate file, reducing + peak memory usage. + """ + + def get_body(self, key): + name = self._fn(key) + ".body" + try: + return open(name, "rb") + except FileNotFoundError: + return None + + def set_body(self, key, body): + name = self._fn(key) + ".body" + self._write(name, body) + + def delete(self, key): + self._delete(key, "") + self._delete(key, ".body") + + def url_to_file_path(url, filecache): """Return the file cache path based on the URL. diff --git a/lib/cachecontrol/caches/redis_cache.py b/lib/cachecontrol/caches/redis_cache.py index 564c30e4..7bcb38a2 100644 --- a/lib/cachecontrol/caches/redis_cache.py +++ b/lib/cachecontrol/caches/redis_cache.py @@ -19,9 +19,11 @@ class RedisCache(BaseCache): def set(self, key, value, expires=None): if not expires: self.conn.set(key, value) - else: + elif isinstance(expires, datetime): expires = expires - datetime.utcnow() self.conn.setex(key, int(expires.total_seconds()), value) + else: + self.conn.setex(key, expires, value) def delete(self, key): self.conn.delete(key) diff --git a/lib/cachecontrol/controller.py b/lib/cachecontrol/controller.py index aad14f42..48db4401 100644 --- a/lib/cachecontrol/controller.py +++ b/lib/cachecontrol/controller.py @@ -13,7 +13,7 @@ from email.utils import parsedate_tz from requests.structures import CaseInsensitiveDict -from .cache import DictCache +from .cache import DictCache, SeparateBodyBaseCache from .serialize import Serializer @@ -27,15 +27,14 @@ PERMANENT_REDIRECT_STATUSES = (301, 308) def parse_uri(uri): """Parses a URI using the regex given in Appendix B of RFC 3986. - (scheme, authority, path, query, fragment) = parse_uri(uri) + (scheme, authority, path, query, fragment) = parse_uri(uri) """ groups = URI.match(uri).groups() return (groups[1], groups[3], groups[4], groups[6], groups[8]) class CacheController(object): - """An interface to see if request should cached or not. - """ + """An interface to see if request should cached or not.""" def __init__( self, cache=None, cache_etags=True, serializer=None, status_codes=None @@ -123,6 +122,26 @@ class CacheController(object): return retval + def _load_from_cache(self, request): + """ + Load a cached response, or return None if it's not available. + """ + cache_url = request.url + cache_data = self.cache.get(cache_url) + if cache_data is None: + logger.debug("No cache entry available") + return None + + if isinstance(self.cache, SeparateBodyBaseCache): + body_file = self.cache.get_body(cache_url) + else: + body_file = None + + result = self.serializer.loads(request, cache_data, body_file) + if result is None: + logger.warning("Cache entry deserialization failed, entry ignored") + return result + def cached_request(self, request): """ Return a cached response if it exists in the cache, otherwise @@ -141,16 +160,9 @@ class CacheController(object): logger.debug('Request header has "max_age" as 0, cache bypassed') return False - # Request allows serving from the cache, let's see if we find something - cache_data = self.cache.get(cache_url) - if cache_data is None: - logger.debug("No cache entry available") - return False - - # Check whether it can be deserialized - resp = self.serializer.loads(request, cache_data) + # Check whether we can load the response from the cache: + resp = self._load_from_cache(request) if not resp: - logger.warning("Cache entry deserialization failed, entry ignored") return False # If we have a cached permanent redirect, return it immediately. We @@ -164,7 +176,7 @@ class CacheController(object): # with cache busting headers as usual (ie no-cache). if int(resp.status) in PERMANENT_REDIRECT_STATUSES: msg = ( - 'Returning cached permanent redirect response ' + "Returning cached permanent redirect response " "(ignoring date and etag information)" ) logger.debug(msg) @@ -236,8 +248,7 @@ class CacheController(object): return False def conditional_headers(self, request): - cache_url = self.cache_url(request.url) - resp = self.serializer.loads(request, self.cache.get(cache_url)) + resp = self._load_from_cache(request) new_headers = {} if resp: @@ -251,6 +262,29 @@ class CacheController(object): return new_headers + def _cache_set(self, cache_url, request, response, body=None, expires_time=None): + """ + Store the data in the cache. + """ + if isinstance(self.cache, SeparateBodyBaseCache): + # We pass in the body separately; just put a placeholder empty + # string in the metadata. + self.cache.set( + cache_url, + self.serializer.dumps(request, response, b""), + expires=expires_time, + ) + # body is None can happen when, for example, we're only updating + # headers, as is the case in update_cached_response(). + if body is not None: + self.cache.set_body(cache_url, body) + else: + self.cache.set( + cache_url, + self.serializer.dumps(request, response, body), + expires=expires_time, + ) + def cache_response(self, request, response, body=None, status_codes=None): """ Algorithm for caching requests. @@ -268,10 +302,8 @@ class CacheController(object): response_headers = CaseInsensitiveDict(response.headers) - if 'date' in response_headers: - date = calendar.timegm( - parsedate_tz(response_headers['date']) - ) + if "date" in response_headers: + date = calendar.timegm(parsedate_tz(response_headers["date"])) else: date = 0 @@ -319,59 +351,61 @@ class CacheController(object): # If we've been given an etag, then keep the response if self.cache_etags and "etag" in response_headers: expires_time = 0 - if response_headers.get('expires'): - expires = parsedate_tz(response_headers['expires']) + if response_headers.get("expires"): + expires = parsedate_tz(response_headers["expires"]) if expires is not None: expires_time = calendar.timegm(expires) - date expires_time = max(expires_time, 14 * 86400) - logger.debug('etag object cached for {0} seconds'.format(expires_time)) + logger.debug("etag object cached for {0} seconds".format(expires_time)) logger.debug("Caching due to etag") - self.cache.set( - cache_url, - self.serializer.dumps(request, response, body), - expires=expires_time - ) + self._cache_set(cache_url, request, response, body, expires_time) # Add to the cache any permanent redirects. We do this before looking # that the Date headers. elif int(response.status) in PERMANENT_REDIRECT_STATUSES: logger.debug("Caching permanent redirect") - self.cache.set(cache_url, self.serializer.dumps(request, response, b'')) + self._cache_set(cache_url, request, response, b"") # Add to the cache if the response headers demand it. If there # is no date header then we can't do anything about expiring # the cache. elif "date" in response_headers: - date = calendar.timegm( - parsedate_tz(response_headers['date']) - ) + date = calendar.timegm(parsedate_tz(response_headers["date"])) # cache when there is a max-age > 0 if "max-age" in cc and cc["max-age"] > 0: logger.debug("Caching b/c date exists and max-age > 0") - expires_time = cc['max-age'] - self.cache.set( + expires_time = cc["max-age"] + self._cache_set( cache_url, - self.serializer.dumps(request, response, body), - expires=expires_time + request, + response, + body, + expires_time, ) # If the request can expire, it means we should cache it # in the meantime. elif "expires" in response_headers: if response_headers["expires"]: - expires = parsedate_tz(response_headers['expires']) + expires = parsedate_tz(response_headers["expires"]) if expires is not None: expires_time = calendar.timegm(expires) - date else: expires_time = None - logger.debug('Caching b/c of expires header. expires in {0} seconds'.format(expires_time)) - self.cache.set( + logger.debug( + "Caching b/c of expires header. expires in {0} seconds".format( + expires_time + ) + ) + self._cache_set( cache_url, - self.serializer.dumps(request, response, body=body), - expires=expires_time, + request, + response, + body, + expires_time, ) def update_cached_response(self, request, response): @@ -382,8 +416,7 @@ class CacheController(object): gotten a 304 as the response. """ cache_url = self.cache_url(request.url) - - cached_response = self.serializer.loads(request, self.cache.get(cache_url)) + cached_response = self._load_from_cache(request) if not cached_response: # we didn't have a cached response @@ -414,8 +447,7 @@ class CacheController(object): cached_response._fp.seek(0) except (BaseException, Exception): pass - body = cached_response.read(decode_content=False) - self.cache.set(cache_url, self.serializer.dumps(request, cached_response, body)) + self._cache_set(cache_url, request, cached_response) try: cached_response._fp.seek(0) diff --git a/lib/cachecontrol/filewrapper.py b/lib/cachecontrol/filewrapper.py index dd91334c..f5ed5f6f 100644 --- a/lib/cachecontrol/filewrapper.py +++ b/lib/cachecontrol/filewrapper.py @@ -2,7 +2,8 @@ # # SPDX-License-Identifier: Apache-2.0 -from io import BytesIO +from tempfile import NamedTemporaryFile +import mmap class CallbackFileWrapper(object): @@ -15,10 +16,17 @@ class CallbackFileWrapper(object): This class uses members with a double underscore (__) leading prefix so as not to accidentally shadow an attribute. + + The data is stored in a temporary file until it is all available. As long + as the temporary files directory is disk-based (sometimes it's a + memory-backed-``tmpfs`` on Linux), data will be unloaded to disk if memory + pressure is high. For small files the disk usually won't be used at all, + it'll all be in the filesystem memory cache, so there should be no + performance impact. """ def __init__(self, fp, callback): - self.__buf = BytesIO() + self.__buf = NamedTemporaryFile("rb+", delete=True) self.__fp = fp self.__callback = callback @@ -53,7 +61,19 @@ class CallbackFileWrapper(object): def _close(self): if self.__callback: - self.__callback(self.__buf.getvalue()) + if self.__buf.tell() == 0: + # Empty file: + result = b"" + else: + # Return the data without actually loading it into memory, + # relying on Python's buffer API and mmap(). mmap() just gives + # a view directly into the filesystem's memory cache, so it + # doesn't result in duplicate memory use. + self.__buf.seek(0, 0) + result = memoryview( + mmap.mmap(self.__buf.fileno(), 0, access=mmap.ACCESS_READ) + ) + self.__callback(result) # We assign this to None here, because otherwise we can get into # really tricky problems where the CPython interpreter dead locks @@ -62,9 +82,16 @@ class CallbackFileWrapper(object): # and allows the garbage collector to do it's thing normally. self.__callback = None + # Closing the temporary file releases memory and frees disk space. + # Important when caching big files. + self.__buf.close() + def read(self, amt=None): data = self.__fp.read(amt) - self.__buf.write(data) + if data: + # We may be dealing with b'', a sign that things are over: + # it's passed e.g. after we've already closed self.__buf. + self.__buf.write(data) if self.__is_fp_closed(): self._close() diff --git a/lib/cachecontrol/serialize.py b/lib/cachecontrol/serialize.py index 4e49a90e..70135d39 100644 --- a/lib/cachecontrol/serialize.py +++ b/lib/cachecontrol/serialize.py @@ -25,10 +25,16 @@ _default_body_read = object() class Serializer(object): - - def dumps(self, request, response, body): + def dumps(self, request, response, body=None): response_headers = CaseInsensitiveDict(response.headers) + if body is None: + # When a body isn't passed in, we'll read the response. We + # also update the response with a new file handler to be + # sure it acts as though it was never read. + body = response.read(decode_content=False) + response._fp = io.BytesIO(body) + # NOTE: This is all a bit weird, but it's really important that on # Python 2.x these objects are unicode and not str, even when # they contain only ascii. The problem here is that msgpack @@ -38,7 +44,7 @@ class Serializer(object): # enough to have msgpack know the difference. data = { u"response": { - u"body": body, + u"body": body, # Empty bytestring if body is stored separately u"headers": dict( (text_type(k), text_type(v)) for k, v in response.headers.items() ), @@ -63,7 +69,7 @@ class Serializer(object): return b",".join([b"cc=4", msgpack.dumps(data, use_bin_type=True)]) - def loads(self, request, data): + def loads(self, request, data, body_file=None): # Short circuit if we've been given an empty set of data if not data: return @@ -86,14 +92,14 @@ class Serializer(object): # Dispatch to the actual load method for the given version try: - return getattr(self, "_loads_v{}".format(ver))(request, data) + return getattr(self, "_loads_v{}".format(ver))(request, data, body_file) except AttributeError: # This is a version we don't have a loads function for, so we'll # just treat it as a miss and return None return - def prepare_response(self, request, cached): + def prepare_response(self, request, cached, body_file=None): """Verify our vary headers match and construct a real urllib3 HTTPResponse object. """ @@ -119,7 +125,10 @@ class Serializer(object): cached["response"]["headers"] = headers try: - body = io.BytesIO(body_raw) + if body_file is None: + body = io.BytesIO(body_raw) + else: + body = body_file except TypeError: # This can happen if cachecontrol serialized to v1 format (pickle) # using Python 2. A Python 2 str(byte string) will be unpickled as @@ -131,21 +140,22 @@ class Serializer(object): return HTTPResponse(body=body, preload_content=False, **cached["response"]) - def _loads_v0(self, request, data): + def _loads_v0(self, request, data, body_file=None): # The original legacy cache data. This doesn't contain enough # information to construct everything we need, so we'll treat this as # a miss. return - def _loads_v1(self, request, data): + def _loads_v1(self, request, data, body_file=None): try: cached = pickle.loads(data) except ValueError: return - return self.prepare_response(request, cached) + return self.prepare_response(request, cached, body_file) - def _loads_v2(self, request, data): + def _loads_v2(self, request, data, body_file=None): + assert body_file is None try: cached = json.loads(zlib.decompress(data).decode("utf8")) except (ValueError, zlib.error): @@ -163,18 +173,18 @@ class Serializer(object): for k, v in cached["vary"].items() ) - return self.prepare_response(request, cached) + return self.prepare_response(request, cached, body_file) - def _loads_v3(self, request, data): + def _loads_v3(self, request, data, body_file): # Due to Python 2 encoding issues, it's impossible to know for sure # exactly how to load v3 entries, thus we'll treat these as a miss so # that they get rewritten out as v4 entries. return - def _loads_v4(self, request, data): + def _loads_v4(self, request, data, body_file=None): try: cached = msgpack.loads(data, raw=False) except ValueError: return - return self.prepare_response(request, cached) + return self.prepare_response(request, cached, body_file) diff --git a/lib/filelock/__init__.py b/lib/filelock/__init__.py new file mode 100644 index 00000000..36fe7e43 --- /dev/null +++ b/lib/filelock/__init__.py @@ -0,0 +1,52 @@ +""" +A platform independent file lock that supports the with-statement. + +.. autodata:: filelock.__version__ + :no-value: + +""" +from __future__ import annotations + +import sys +import warnings +from typing import TYPE_CHECKING + +from ._api import AcquireReturnProxy, BaseFileLock +from ._error import Timeout +from ._soft import SoftFileLock +from ._unix import UnixFileLock, has_fcntl +from ._windows import WindowsFileLock +from .version import version + +#: version of the project as a string +__version__: str = version + + +if sys.platform == "win32": # pragma: win32 cover + _FileLock: type[BaseFileLock] = WindowsFileLock +else: # pragma: win32 no cover + if has_fcntl: + _FileLock: type[BaseFileLock] = UnixFileLock + else: + _FileLock = SoftFileLock + if warnings is not None: + warnings.warn("only soft file lock is available") + +#: Alias for the lock, which should be used for the current platform. On Windows, this is an alias for +# :class:`WindowsFileLock`, on Unix for :class:`UnixFileLock` and otherwise for :class:`SoftFileLock`. +if TYPE_CHECKING: + FileLock = SoftFileLock +else: + FileLock = _FileLock + + +__all__ = [ + "__version__", + "FileLock", + "SoftFileLock", + "Timeout", + "UnixFileLock", + "WindowsFileLock", + "BaseFileLock", + "AcquireReturnProxy", +] diff --git a/lib/filelock/_api.py b/lib/filelock/_api.py new file mode 100644 index 00000000..273b82e6 --- /dev/null +++ b/lib/filelock/_api.py @@ -0,0 +1,246 @@ +from __future__ import annotations + +import contextlib +import logging +import os +import time +import warnings +from abc import ABC, abstractmethod +from threading import Lock +from types import TracebackType +from typing import Any + +from ._error import Timeout + +_LOGGER = logging.getLogger("filelock") + + +# This is a helper class which is returned by :meth:`BaseFileLock.acquire` and wraps the lock to make sure __enter__ +# is not called twice when entering the with statement. If we would simply return *self*, the lock would be acquired +# again in the *__enter__* method of the BaseFileLock, but not released again automatically. issue #37 (memory leak) +class AcquireReturnProxy: + """A context aware object that will release the lock file when exiting.""" + + def __init__(self, lock: BaseFileLock) -> None: + self.lock = lock + + def __enter__(self) -> BaseFileLock: + return self.lock + + def __exit__( + self, + exc_type: type[BaseException] | None, # noqa: U100 + exc_value: BaseException | None, # noqa: U100 + traceback: TracebackType | None, # noqa: U100 + ) -> None: + self.lock.release() + + +class BaseFileLock(ABC, contextlib.ContextDecorator): + """Abstract base class for a file lock object.""" + + def __init__(self, lock_file: str | os.PathLike[Any], timeout: float = -1) -> None: + """ + Create a new lock object. + + :param lock_file: path to the file + :param timeout: default timeout when acquiring the lock, in seconds. It will be used as fallback value in + the acquire method, if no timeout value (``None``) is given. If you want to disable the timeout, set it + to a negative value. A timeout of 0 means, that there is exactly one attempt to acquire the file lock. + """ + # The path to the lock file. + self._lock_file: str = os.fspath(lock_file) + + # The file descriptor for the *_lock_file* as it is returned by the os.open() function. + # This file lock is only NOT None, if the object currently holds the lock. + self._lock_file_fd: int | None = None + + # The default timeout value. + self._timeout: float = timeout + + # We use this lock primarily for the lock counter. + self._thread_lock: Lock = Lock() + + # The lock counter is used for implementing the nested locking mechanism. Whenever the lock is acquired, the + # counter is increased and the lock is only released, when this value is 0 again. + self._lock_counter: int = 0 + + @property + def lock_file(self) -> str: + """:return: path to the lock file""" + return self._lock_file + + @property + def timeout(self) -> float: + """ + :return: the default timeout value, in seconds + + .. versionadded:: 2.0.0 + """ + return self._timeout + + @timeout.setter + def timeout(self, value: float | str) -> None: + """ + Change the default timeout value. + + :param value: the new value, in seconds + """ + self._timeout = float(value) + + @abstractmethod + def _acquire(self) -> None: + """If the file lock could be acquired, self._lock_file_fd holds the file descriptor of the lock file.""" + raise NotImplementedError + + @abstractmethod + def _release(self) -> None: + """Releases the lock and sets self._lock_file_fd to None.""" + raise NotImplementedError + + @property + def is_locked(self) -> bool: + """ + + :return: A boolean indicating if the lock file is holding the lock currently. + + .. versionchanged:: 2.0.0 + + This was previously a method and is now a property. + """ + return self._lock_file_fd is not None + + def acquire( + self, + timeout: float | None = None, + poll_interval: float = 0.05, + *, + poll_intervall: float | None = None, + blocking: bool = True, + ) -> AcquireReturnProxy: + """ + Try to acquire the file lock. + + :param timeout: maximum wait time for acquiring the lock, ``None`` means use the default :attr:`~timeout` is and + if ``timeout < 0``, there is no timeout and this method will block until the lock could be acquired + :param poll_interval: interval of trying to acquire the lock file + :param poll_intervall: deprecated, kept for backwards compatibility, use ``poll_interval`` instead + :param blocking: defaults to True. If False, function will return immediately if it cannot obtain a lock on the + first attempt. Otherwise this method will block until the timeout expires or the lock is acquired. + :raises Timeout: if fails to acquire lock within the timeout period + :return: a context object that will unlock the file when the context is exited + + .. code-block:: python + + # You can use this method in the context manager (recommended) + with lock.acquire(): + pass + + # Or use an equivalent try-finally construct: + lock.acquire() + try: + pass + finally: + lock.release() + + .. versionchanged:: 2.0.0 + + This method returns now a *proxy* object instead of *self*, + so that it can be used in a with statement without side effects. + + """ + # Use the default timeout, if no timeout is provided. + if timeout is None: + timeout = self.timeout + + if poll_intervall is not None: + msg = "use poll_interval instead of poll_intervall" + warnings.warn(msg, DeprecationWarning, stacklevel=2) + poll_interval = poll_intervall + + # Increment the number right at the beginning. We can still undo it, if something fails. + with self._thread_lock: + self._lock_counter += 1 + + lock_id = id(self) + lock_filename = self._lock_file + start_time = time.monotonic() + try: + while True: + with self._thread_lock: + if not self.is_locked: + _LOGGER.debug("Attempting to acquire lock %s on %s", lock_id, lock_filename) + self._acquire() + + if self.is_locked: + _LOGGER.debug("Lock %s acquired on %s", lock_id, lock_filename) + break + elif blocking is False: + _LOGGER.debug("Failed to immediately acquire lock %s on %s", lock_id, lock_filename) + raise Timeout(self._lock_file) + elif 0 <= timeout < time.monotonic() - start_time: + _LOGGER.debug("Timeout on acquiring lock %s on %s", lock_id, lock_filename) + raise Timeout(self._lock_file) + else: + msg = "Lock %s not acquired on %s, waiting %s seconds ..." + _LOGGER.debug(msg, lock_id, lock_filename, poll_interval) + time.sleep(poll_interval) + except BaseException: # Something did go wrong, so decrement the counter. + with self._thread_lock: + self._lock_counter = max(0, self._lock_counter - 1) + raise + return AcquireReturnProxy(lock=self) + + def release(self, force: bool = False) -> None: + """ + Releases the file lock. Please note, that the lock is only completely released, if the lock counter is 0. Also + note, that the lock file itself is not automatically deleted. + + :param force: If true, the lock counter is ignored and the lock is released in every case/ + """ + with self._thread_lock: + + if self.is_locked: + self._lock_counter -= 1 + + if self._lock_counter == 0 or force: + lock_id, lock_filename = id(self), self._lock_file + + _LOGGER.debug("Attempting to release lock %s on %s", lock_id, lock_filename) + self._release() + self._lock_counter = 0 + _LOGGER.debug("Lock %s released on %s", lock_id, lock_filename) + + def __enter__(self) -> BaseFileLock: + """ + Acquire the lock. + + :return: the lock object + """ + self.acquire() + return self + + def __exit__( + self, + exc_type: type[BaseException] | None, # noqa: U100 + exc_value: BaseException | None, # noqa: U100 + traceback: TracebackType | None, # noqa: U100 + ) -> None: + """ + Release the lock. + + :param exc_type: the exception type if raised + :param exc_value: the exception value if raised + :param traceback: the exception traceback if raised + """ + self.release() + + def __del__(self) -> None: + """Called when the lock object is deleted.""" + self.release(force=True) + + +__all__ = [ + "BaseFileLock", + "AcquireReturnProxy", +] diff --git a/lib/filelock/_error.py b/lib/filelock/_error.py new file mode 100644 index 00000000..b3885214 --- /dev/null +++ b/lib/filelock/_error.py @@ -0,0 +1,17 @@ +from __future__ import annotations + + +class Timeout(TimeoutError): + """Raised when the lock could not be acquired in *timeout* seconds.""" + + def __init__(self, lock_file: str) -> None: + #: The path of the file lock. + self.lock_file = lock_file + + def __str__(self) -> str: + return f"The file lock '{self.lock_file}' could not be acquired." + + +__all__ = [ + "Timeout", +] diff --git a/lib/filelock/_soft.py b/lib/filelock/_soft.py new file mode 100644 index 00000000..cb09799a --- /dev/null +++ b/lib/filelock/_soft.py @@ -0,0 +1,47 @@ +from __future__ import annotations + +import os +import sys +from errno import EACCES, EEXIST, ENOENT + +from ._api import BaseFileLock +from ._util import raise_on_exist_ro_file + + +class SoftFileLock(BaseFileLock): + """Simply watches the existence of the lock file.""" + + def _acquire(self) -> None: + raise_on_exist_ro_file(self._lock_file) + # first check for exists and read-only mode as the open will mask this case as EEXIST + mode = ( + os.O_WRONLY # open for writing only + | os.O_CREAT + | os.O_EXCL # together with above raise EEXIST if the file specified by filename exists + | os.O_TRUNC # truncate the file to zero byte + ) + try: + fd = os.open(self._lock_file, mode) + except OSError as exception: + if exception.errno == EEXIST: # expected if cannot lock + pass + elif exception.errno == ENOENT: # No such file or directory - parent directory is missing + raise + elif exception.errno == EACCES and sys.platform != "win32": # pragma: win32 no cover + # Permission denied - parent dir is R/O + raise # note windows does not allow you to make a folder r/o only files + else: + self._lock_file_fd = fd + + def _release(self) -> None: + os.close(self._lock_file_fd) # type: ignore # the lock file is definitely not None + self._lock_file_fd = None + try: + os.remove(self._lock_file) + except OSError: # the file is already deleted and that's what we want + pass + + +__all__ = [ + "SoftFileLock", +] diff --git a/lib/filelock/_unix.py b/lib/filelock/_unix.py new file mode 100644 index 00000000..03b612c9 --- /dev/null +++ b/lib/filelock/_unix.py @@ -0,0 +1,56 @@ +from __future__ import annotations + +import os +import sys +from typing import cast + +from ._api import BaseFileLock + +#: a flag to indicate if the fcntl API is available +has_fcntl = False +if sys.platform == "win32": # pragma: win32 cover + + class UnixFileLock(BaseFileLock): + """Uses the :func:`fcntl.flock` to hard lock the lock file on unix systems.""" + + def _acquire(self) -> None: + raise NotImplementedError + + def _release(self) -> None: + raise NotImplementedError + +else: # pragma: win32 no cover + try: + import fcntl + except ImportError: + pass + else: + has_fcntl = True + + class UnixFileLock(BaseFileLock): + """Uses the :func:`fcntl.flock` to hard lock the lock file on unix systems.""" + + def _acquire(self) -> None: + open_mode = os.O_RDWR | os.O_CREAT | os.O_TRUNC + fd = os.open(self._lock_file, open_mode) + try: + fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + except OSError: + os.close(fd) + else: + self._lock_file_fd = fd + + def _release(self) -> None: + # Do not remove the lockfile: + # https://github.com/tox-dev/py-filelock/issues/31 + # https://stackoverflow.com/questions/17708885/flock-removing-locked-file-without-race-condition + fd = cast(int, self._lock_file_fd) + self._lock_file_fd = None + fcntl.flock(fd, fcntl.LOCK_UN) + os.close(fd) + + +__all__ = [ + "has_fcntl", + "UnixFileLock", +] diff --git a/lib/filelock/_util.py b/lib/filelock/_util.py new file mode 100644 index 00000000..238b80fa --- /dev/null +++ b/lib/filelock/_util.py @@ -0,0 +1,20 @@ +from __future__ import annotations + +import os +import stat + + +def raise_on_exist_ro_file(filename: str) -> None: + try: + file_stat = os.stat(filename) # use stat to do exists + can write to check without race condition + except OSError: + return None # swallow does not exist or other errors + + if file_stat.st_mtime != 0: # if os.stat returns but modification is zero that's an invalid os.stat - ignore it + if not (file_stat.st_mode & stat.S_IWUSR): + raise PermissionError(f"Permission denied: {filename!r}") + + +__all__ = [ + "raise_on_exist_ro_file", +] diff --git a/lib/filelock/_windows.py b/lib/filelock/_windows.py new file mode 100644 index 00000000..60e68cb9 --- /dev/null +++ b/lib/filelock/_windows.py @@ -0,0 +1,63 @@ +from __future__ import annotations + +import os +import sys +from errno import ENOENT +from typing import cast + +from ._api import BaseFileLock +from ._util import raise_on_exist_ro_file + +if sys.platform == "win32": # pragma: win32 cover + import msvcrt + + class WindowsFileLock(BaseFileLock): + """Uses the :func:`msvcrt.locking` function to hard lock the lock file on windows systems.""" + + def _acquire(self) -> None: + raise_on_exist_ro_file(self._lock_file) + mode = ( + os.O_RDWR # open for read and write + | os.O_CREAT # create file if not exists + | os.O_TRUNC # truncate file if not empty + ) + try: + fd = os.open(self._lock_file, mode) + except OSError as exception: + if exception.errno == ENOENT: # No such file or directory + raise + else: + try: + msvcrt.locking(fd, msvcrt.LK_NBLCK, 1) + except OSError: + os.close(fd) + else: + self._lock_file_fd = fd + + def _release(self) -> None: + fd = cast(int, self._lock_file_fd) + self._lock_file_fd = None + msvcrt.locking(fd, msvcrt.LK_UNLCK, 1) + os.close(fd) + + try: + os.remove(self._lock_file) + # Probably another instance of the application hat acquired the file lock. + except OSError: + pass + +else: # pragma: win32 no cover + + class WindowsFileLock(BaseFileLock): + """Uses the :func:`msvcrt.locking` function to hard lock the lock file on windows systems.""" + + def _acquire(self) -> None: + raise NotImplementedError + + def _release(self) -> None: + raise NotImplementedError + + +__all__ = [ + "WindowsFileLock", +] diff --git a/lib/filelock/py.typed b/lib/filelock/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/lib/filelock/version.py b/lib/filelock/version.py new file mode 100644 index 00000000..d20e218b --- /dev/null +++ b/lib/filelock/version.py @@ -0,0 +1,4 @@ +# file generated by setuptools_scm +# don't change, don't track in version control +__version__ = version = '3.9.0' +__version_tuple__ = version_tuple = (3, 9, 0) diff --git a/lib/lockfile/__init__.py b/lib/lockfile/__init__.py deleted file mode 100644 index d905af96..00000000 --- a/lib/lockfile/__init__.py +++ /dev/null @@ -1,326 +0,0 @@ -""" -lockfile.py - Platform-independent advisory file locks. - -Requires Python 2.5 unless you apply 2.4.diff -Locking is done on a per-thread basis instead of a per-process basis. - -Usage: - ->>> lock = LockFile('somefile') ->>> try: -... lock.acquire() -... except AlreadyLocked: -... print 'somefile', 'is locked already.' -... except LockFailed: -... print 'somefile', 'can\\'t be locked.' -... else: -... print 'got lock' -got lock ->>> print lock.is_locked() -True ->>> lock.release() - ->>> lock = LockFile('somefile') ->>> print lock.is_locked() -False ->>> with lock: -... print lock.is_locked() -True ->>> print lock.is_locked() -False - ->>> lock = LockFile('somefile') ->>> # It is okay to lock twice from the same thread... ->>> with lock: -... lock.acquire() -... ->>> # Though no counter is kept, so you can't unlock multiple times... ->>> print lock.is_locked() -False - -Exceptions: - - Error - base class for other exceptions - LockError - base class for all locking exceptions - AlreadyLocked - Another thread or process already holds the lock - LockFailed - Lock failed for some other reason - UnlockError - base class for all unlocking exceptions - AlreadyUnlocked - File was not locked. - NotMyLock - File was locked but not by the current thread/process -""" - -from __future__ import absolute_import - -import sys -import socket -import os -import threading -import time -import urllib -import warnings -import functools - -# Work with PEP8 and non-PEP8 versions of threading module. -if not hasattr(threading, "current_thread"): - threading.current_thread = threading.currentThread -if not hasattr(threading.Thread, "get_name"): - threading.Thread.get_name = threading.Thread.getName - -__all__ = ['Error', 'LockError', 'LockTimeout', 'AlreadyLocked', - 'LockFailed', 'UnlockError', 'NotLocked', 'NotMyLock', - 'LinkLockFile', 'MkdirLockFile', 'SQLiteLockFile', - 'LockBase', 'locked'] - -class Error(Exception): - """ - Base class for other exceptions. - - >>> try: - ... raise Error - ... except Exception: - ... pass - """ - pass - -class LockError(Error): - """ - Base class for error arising from attempts to acquire the lock. - - >>> try: - ... raise LockError - ... except Error: - ... pass - """ - pass - -class LockTimeout(LockError): - """Raised when lock creation fails within a user-defined period of time. - - >>> try: - ... raise LockTimeout - ... except LockError: - ... pass - """ - pass - -class AlreadyLocked(LockError): - """Some other thread/process is locking the file. - - >>> try: - ... raise AlreadyLocked - ... except LockError: - ... pass - """ - pass - -class LockFailed(LockError): - """Lock file creation failed for some other reason. - - >>> try: - ... raise LockFailed - ... except LockError: - ... pass - """ - pass - -class UnlockError(Error): - """ - Base class for errors arising from attempts to release the lock. - - >>> try: - ... raise UnlockError - ... except Error: - ... pass - """ - pass - -class NotLocked(UnlockError): - """Raised when an attempt is made to unlock an unlocked file. - - >>> try: - ... raise NotLocked - ... except UnlockError: - ... pass - """ - pass - -class NotMyLock(UnlockError): - """Raised when an attempt is made to unlock a file someone else locked. - - >>> try: - ... raise NotMyLock - ... except UnlockError: - ... pass - """ - pass - -class LockBase: - """Base class for platform-specific lock classes.""" - def __init__(self, path, threaded=True, timeout=None): - """ - >>> lock = LockBase('somefile') - >>> lock = LockBase('somefile', threaded=False) - """ - self.path = path - self.lock_file = os.path.abspath(path) + ".lock" - self.hostname = socket.gethostname() - self.pid = os.getpid() - if threaded: - t = threading.current_thread() - # Thread objects in Python 2.4 and earlier do not have ident - # attrs. Worm around that. - ident = getattr(t, "ident", hash(t)) - self.tname = "-%x" % (ident & 0xffffffff) - else: - self.tname = "" - dirname = os.path.dirname(self.lock_file) - - # unique name is mostly about the current process, but must - # also contain the path -- otherwise, two adjacent locked - # files conflict (one file gets locked, creating lock-file and - # unique file, the other one gets locked, creating lock-file - # and overwriting the already existing lock-file, then one - # gets unlocked, deleting both lock-file and unique file, - # finally the last lock errors out upon releasing. - self.unique_name = os.path.join(dirname, - "%s%s.%s%s" % (self.hostname, - self.tname, - self.pid, - hash(self.path))) - self.timeout = timeout - - def acquire(self, timeout=None): - """ - Acquire the lock. - - * If timeout is omitted (or None), wait forever trying to lock the - file. - - * If timeout > 0, try to acquire the lock for that many seconds. If - the lock period expires and the file is still locked, raise - LockTimeout. - - * If timeout <= 0, raise AlreadyLocked immediately if the file is - already locked. - """ - raise NotImplemented("implement in subclass") - - def release(self): - """ - Release the lock. - - If the file is not locked, raise NotLocked. - """ - raise NotImplemented("implement in subclass") - - def is_locked(self): - """ - Tell whether or not the file is locked. - """ - raise NotImplemented("implement in subclass") - - def i_am_locking(self): - """ - Return True if this object is locking the file. - """ - raise NotImplemented("implement in subclass") - - def break_lock(self): - """ - Remove a lock. Useful if a locking thread failed to unlock. - """ - raise NotImplemented("implement in subclass") - - def __enter__(self): - """ - Context manager support. - """ - self.acquire() - return self - - def __exit__(self, *_exc): - """ - Context manager support. - """ - self.release() - - def __repr__(self): - return "<%s: %r -- %r>" % (self.__class__.__name__, self.unique_name, - self.path) - -def _fl_helper(cls, mod, *args, **kwds): - warnings.warn("Import from %s module instead of lockfile package" % mod, - DeprecationWarning, stacklevel=2) - # This is a bit funky, but it's only for awhile. The way the unit tests - # are constructed this function winds up as an unbound method, so it - # actually takes three args, not two. We want to toss out self. - if not isinstance(args[0], str): - # We are testing, avoid the first arg - args = args[1:] - if len(args) == 1 and not kwds: - kwds["threaded"] = True - return cls(*args, **kwds) - -def LinkFileLock(*args, **kwds): - """Factory function provided for backwards compatibility. - - Do not use in new code. Instead, import LinkLockFile from the - lockfile.linklockfile module. - """ - from . import linklockfile - return _fl_helper(linklockfile.LinkLockFile, "lockfile.linklockfile", - *args, **kwds) - -def MkdirFileLock(*args, **kwds): - """Factory function provided for backwards compatibility. - - Do not use in new code. Instead, import MkdirLockFile from the - lockfile.mkdirlockfile module. - """ - from . import mkdirlockfile - return _fl_helper(mkdirlockfile.MkdirLockFile, "lockfile.mkdirlockfile", - *args, **kwds) - -def SQLiteFileLock(*args, **kwds): - """Factory function provided for backwards compatibility. - - Do not use in new code. Instead, import SQLiteLockFile from the - lockfile.mkdirlockfile module. - """ - from . import sqlitelockfile - return _fl_helper(sqlitelockfile.SQLiteLockFile, "lockfile.sqlitelockfile", - *args, **kwds) - -def locked(path, timeout=None): - """Decorator which enables locks for decorated function. - - Arguments: - - path: path for lockfile. - - timeout (optional): Timeout for acquiring lock. - - Usage: - @locked('/var/run/myname', timeout=0) - def myname(...): - ... - """ - def decor(func): - @functools.wraps(func) - def wrapper(*args, **kwargs): - lock = FileLock(path, timeout=timeout) - lock.acquire() - try: - return func(*args, **kwargs) - finally: - lock.release() - return wrapper - return decor - -if hasattr(os, "link"): - from . import linklockfile as _llf - LockFile = _llf.LinkLockFile -else: - from . import mkdirlockfile as _mlf - LockFile = _mlf.MkdirLockFile - -FileLock = LockFile - diff --git a/lib/lockfile/linklockfile.py b/lib/lockfile/linklockfile.py deleted file mode 100644 index dd34433b..00000000 --- a/lib/lockfile/linklockfile.py +++ /dev/null @@ -1,76 +0,0 @@ -from __future__ import absolute_import - -import time -import os -import errno - -from . import (LockBase, LockFailed, NotLocked, NotMyLock, LockTimeout, - AlreadyLocked) - - -class LinkLockFile(LockBase): - """ - Lock access to a file using atomic property of link(2). - - lock = LinkLockFile('somefile'[, threaded=False[, timeout=None]]) - """ - - # noinspection PyTypeChecker - def acquire(self, timeout=None): - try: - open(self.unique_name, 'wb').close() - except IOError: - raise LockFailed('failed to create %s' % self.unique_name) - - timeout = timeout is not None and timeout or self.timeout - end_time = time.time() - if timeout is not None and timeout > 0: - end_time += timeout - - while True: - # Try and create a hard link to it. - try: - os.link(self.unique_name, self.lock_file) - except OSError as e: - if errno.ENOSYS == e.errno: - raise LockFailed('%s' % e.strerror) - - # Link creation failed. Maybe we've double-locked? - nlinks = os.stat(self.unique_name).st_nlink - if nlinks == 2: - # The original link plus the one I created == 2. We're - # good to go. - return - else: - # Otherwise the lock creation failed. - if timeout is not None and time.time() > end_time: - os.unlink(self.unique_name) - if timeout > 0: - raise LockTimeout('Timeout waiting to acquire lock for %s' % self.path) - else: - raise AlreadyLocked('%s is already locked' % self.path) - - time.sleep(timeout is not None and (timeout / 10) or 0.1) - else: - # Link creation succeeded. We're good to go. - return - - def release(self): - if not self.is_locked(): - raise NotLocked('%s is not locked' % self.path) - elif not os.path.exists(self.unique_name): - raise NotMyLock('%s is locked, but not by me' % self.path) - os.unlink(self.unique_name) - os.unlink(self.lock_file) - - def is_locked(self): - return os.path.exists(self.lock_file) - - def i_am_locking(self): - return (self.is_locked() and - os.path.exists(self.unique_name) and - os.stat(self.unique_name).st_nlink == 2) - - def break_lock(self): - if os.path.exists(self.lock_file): - os.unlink(self.lock_file) diff --git a/lib/lockfile/mkdirlockfile.py b/lib/lockfile/mkdirlockfile.py deleted file mode 100644 index e1f4820f..00000000 --- a/lib/lockfile/mkdirlockfile.py +++ /dev/null @@ -1,91 +0,0 @@ -from __future__ import absolute_import, division - -import time -import os -import sys -import errno -import shutil - -from . import (LockBase, LockFailed, NotLocked, NotMyLock, LockTimeout, - AlreadyLocked) - -class MkdirLockFile(LockBase): - """Lock file by creating a directory.""" - def __init__(self, path, threaded=True, timeout=None): - """ - >>> lock = MkdirLockFile('somefile') - >>> lock = MkdirLockFile('somefile', threaded=False) - """ - LockBase.__init__(self, path, threaded, timeout) - # Lock file itself is a directory. Place the unique file name into - # it. - self.unique_name = os.path.join(self.lock_file, - "%s.%s%s" % (self.hostname, - self.tname, - self.pid)) - - def acquire(self, timeout=None): - timeout = timeout if timeout is not None else self.timeout - end_time = time.time() - if timeout is not None and timeout > 0: - end_time += timeout - - if timeout is None: - wait = 0.1 - else: - wait = max(0, timeout / 10) - - while True: - try: - os.mkdir(self.lock_file) - except OSError: - err = sys.exc_info()[1] - if err.errno == errno.EEXIST: - # Already locked. - if os.path.exists(self.unique_name): - # Already locked by me. - return - if timeout is not None and time.time() > end_time: - if timeout > 0: - raise LockTimeout("Timeout waiting to acquire" - " lock for %s" % - self.path) - else: - # Someone else has the lock. - raise AlreadyLocked("%s is already locked" % - self.path) - time.sleep(wait) - else: - # Couldn't create the lock for some other reason - raise LockFailed("failed to create %s" % self.lock_file) - else: - open(self.unique_name, "wb").close() - return - - def release(self): - if not self.is_locked(): - raise NotLocked("%s is not locked" % self.path) - elif not os.path.exists(self.unique_name): - raise NotMyLock("%s is locked, but not by me" % self.path) - os.unlink(self.unique_name) - self.delete_directory() - - def delete_directory(self): - # NOTE(dims): We may end up with a race condition here. The path - # can be deleted between the .exists() and the .rmtree() call. - # So we should catch any exception if the path does not exist. - try: - shutil.rmtree(self.lock_file) - except Exception: - pass - - def is_locked(self): - return os.path.exists(self.lock_file) - - def i_am_locking(self): - return (self.is_locked() and - os.path.exists(self.unique_name)) - - def break_lock(self): - if os.path.exists(self.lock_file): - self.delete_directory() diff --git a/lib/lockfile/pidlockfile.py b/lib/lockfile/pidlockfile.py deleted file mode 100644 index e92f9ead..00000000 --- a/lib/lockfile/pidlockfile.py +++ /dev/null @@ -1,193 +0,0 @@ -# -*- coding: utf-8 -*- - -# pidlockfile.py -# -# Copyright © 2008–2009 Ben Finney -# -# This is free software: you may copy, modify, and/or distribute this work -# under the terms of the Python Software Foundation License, version 2 or -# later as published by the Python Software Foundation. -# No warranty expressed or implied. See the file LICENSE.PSF-2 for details. - -""" Lockfile behaviour implemented via Unix PID files. - """ - -from __future__ import absolute_import - -import os -import sys -import errno -import time - -from . import (LockBase, AlreadyLocked, LockFailed, NotLocked, NotMyLock, - LockTimeout) - - -class PIDLockFile(LockBase): - """ Lockfile implemented as a Unix PID file. - - The lock file is a normal file named by the attribute `path`. - A lock's PID file contains a single line of text, containing - the process ID (PID) of the process that acquired the lock. - - >>> lock = PIDLockFile('somefile') - >>> lock = PIDLockFile('somefile') - """ - - def __init__(self, path, threaded=False, timeout=None): - # pid lockfiles don't support threaded operation, so always force - # False as the threaded arg. - LockBase.__init__(self, path, False, timeout) - dirname = os.path.dirname(self.lock_file) - basename = os.path.split(self.path)[-1] - self.unique_name = self.path - - def read_pid(self): - """ Get the PID from the lock file. - """ - return read_pid_from_pidfile(self.path) - - def is_locked(self): - """ Test if the lock is currently held. - - The lock is held if the PID file for this lock exists. - - """ - return os.path.exists(self.path) - - def i_am_locking(self): - """ Test if the lock is held by the current process. - - Returns ``True`` if the current process ID matches the - number stored in the PID file. - """ - return self.is_locked() and os.getpid() == self.read_pid() - - def acquire(self, timeout=None): - """ Acquire the lock. - - Creates the PID file for this lock, or raises an error if - the lock could not be acquired. - """ - - timeout = timeout is not None and timeout or self.timeout - end_time = time.time() - if timeout is not None and timeout > 0: - end_time += timeout - - while True: - try: - write_pid_to_pidfile(self.path) - except OSError as exc: - if exc.errno == errno.EEXIST: - # The lock creation failed. Maybe sleep a bit. - if timeout is not None and time.time() > end_time: - if timeout > 0: - raise LockTimeout("Timeout waiting to acquire" - " lock for %s" % - self.path) - else: - raise AlreadyLocked("%s is already locked" % - self.path) - time.sleep(timeout is not None and timeout/10 or 0.1) - else: - raise LockFailed("failed to create %s" % self.path) - else: - return - - def release(self): - """ Release the lock. - - Removes the PID file to release the lock, or raises an - error if the current process does not hold the lock. - - """ - if not self.is_locked(): - raise NotLocked("%s is not locked" % self.path) - if not self.i_am_locking(): - raise NotMyLock("%s is locked, but not by me" % self.path) - remove_existing_pidfile(self.path) - - def break_lock(self): - """ Break an existing lock. - - Removes the PID file if it already exists, otherwise does - nothing. - - """ - remove_existing_pidfile(self.path) - -def read_pid_from_pidfile(pidfile_path): - """ Read the PID recorded in the named PID file. - - Read and return the numeric PID recorded as text in the named - PID file. If the PID file cannot be read, or if the content is - not a valid PID, return ``None``. - - """ - pid = None - try: - pidfile = open(pidfile_path, 'r') - except IOError: - pass - else: - # According to the FHS 2.3 section on PID files in /var/run: - # - # The file must consist of the process identifier in - # ASCII-encoded decimal, followed by a newline character. - # - # Programs that read PID files should be somewhat flexible - # in what they accept; i.e., they should ignore extra - # whitespace, leading zeroes, absence of the trailing - # newline, or additional lines in the PID file. - - line = pidfile.readline().strip() - try: - pid = int(line) - except ValueError: - pass - pidfile.close() - - return pid - - -def write_pid_to_pidfile(pidfile_path): - """ Write the PID in the named PID file. - - Get the numeric process ID (“PID”) of the current process - and write it to the named file as a line of text. - - """ - open_flags = (os.O_CREAT | os.O_EXCL | os.O_WRONLY) - open_mode = 0o644 - pidfile_fd = os.open(pidfile_path, open_flags, open_mode) - pidfile = os.fdopen(pidfile_fd, 'w') - - # According to the FHS 2.3 section on PID files in /var/run: - # - # The file must consist of the process identifier in - # ASCII-encoded decimal, followed by a newline character. For - # example, if crond was process number 25, /var/run/crond.pid - # would contain three characters: two, five, and newline. - - pid = os.getpid() - line = "%(pid)d\n" % vars() - pidfile.write(line) - pidfile.close() - - -def remove_existing_pidfile(pidfile_path): - """ Remove the named PID file if it exists. - - Removing a PID file that doesn't already exist puts us in the - desired state, so we ignore the condition if the file does not - exist. - - """ - try: - os.remove(pidfile_path) - except OSError as exc: - if exc.errno == errno.ENOENT: - pass - else: - raise diff --git a/lib/lockfile/sqlitelockfile.py b/lib/lockfile/sqlitelockfile.py deleted file mode 100644 index 7dee4a85..00000000 --- a/lib/lockfile/sqlitelockfile.py +++ /dev/null @@ -1,155 +0,0 @@ -from __future__ import absolute_import, division - -import time -import os - -try: - unicode -except NameError: - unicode = str - -from . import LockBase, NotLocked, NotMyLock, LockTimeout, AlreadyLocked - -class SQLiteLockFile(LockBase): - "Demonstrate SQL-based locking." - - testdb = None - - def __init__(self, path, threaded=True, timeout=None): - """ - >>> lock = SQLiteLockFile('somefile') - >>> lock = SQLiteLockFile('somefile', threaded=False) - """ - LockBase.__init__(self, path, threaded, timeout) - self.lock_file = unicode(self.lock_file) - self.unique_name = unicode(self.unique_name) - - if SQLiteLockFile.testdb is None: - import tempfile - _fd, testdb = tempfile.mkstemp() - os.close(_fd) - os.unlink(testdb) - del _fd, tempfile - SQLiteLockFile.testdb = testdb - - import sqlite3 - self.connection = sqlite3.connect(SQLiteLockFile.testdb) - - c = self.connection.cursor() - try: - c.execute("create table locks" - "(" - " lock_file varchar(32)," - " unique_name varchar(32)" - ")") - except sqlite3.OperationalError: - pass - else: - self.connection.commit() - import atexit - atexit.register(os.unlink, SQLiteLockFile.testdb) - - def acquire(self, timeout=None): - timeout = timeout is not None and timeout or self.timeout - end_time = time.time() - if timeout is not None and timeout > 0: - end_time += timeout - - if timeout is None: - wait = 0.1 - elif timeout <= 0: - wait = 0 - else: - wait = timeout / 10 - - cursor = self.connection.cursor() - - while True: - if not self.is_locked(): - # Not locked. Try to lock it. - cursor.execute("insert into locks" - " (lock_file, unique_name)" - " values" - " (?, ?)", - (self.lock_file, self.unique_name)) - self.connection.commit() - - # Check to see if we are the only lock holder. - cursor.execute("select * from locks" - " where unique_name = ?", - (self.unique_name,)) - rows = cursor.fetchall() - if len(rows) > 1: - # Nope. Someone else got there. Remove our lock. - cursor.execute("delete from locks" - " where unique_name = ?", - (self.unique_name,)) - self.connection.commit() - else: - # Yup. We're done, so go home. - return - else: - # Check to see if we are the only lock holder. - cursor.execute("select * from locks" - " where unique_name = ?", - (self.unique_name,)) - rows = cursor.fetchall() - if len(rows) == 1: - # We're the locker, so go home. - return - - # Maybe we should wait a bit longer. - if timeout is not None and time.time() > end_time: - if timeout > 0: - # No more waiting. - raise LockTimeout("Timeout waiting to acquire" - " lock for %s" % - self.path) - else: - # Someone else has the lock and we are impatient.. - raise AlreadyLocked("%s is already locked" % self.path) - - # Well, okay. We'll give it a bit longer. - time.sleep(wait) - - def release(self): - if not self.is_locked(): - raise NotLocked("%s is not locked" % self.path) - if not self.i_am_locking(): - raise NotMyLock("%s is locked, but not by me (by %s)" % - (self.unique_name, self._who_is_locking())) - cursor = self.connection.cursor() - cursor.execute("delete from locks" - " where unique_name = ?", - (self.unique_name,)) - self.connection.commit() - - def _who_is_locking(self): - cursor = self.connection.cursor() - cursor.execute("select unique_name from locks" - " where lock_file = ?", - (self.lock_file,)) - return cursor.fetchone()[0] - - def is_locked(self): - cursor = self.connection.cursor() - cursor.execute("select * from locks" - " where lock_file = ?", - (self.lock_file,)) - rows = cursor.fetchall() - return not not rows - - def i_am_locking(self): - cursor = self.connection.cursor() - cursor.execute("select * from locks" - " where lock_file = ?" - " and unique_name = ?", - (self.lock_file, self.unique_name)) - return not not cursor.fetchall() - - def break_lock(self): - cursor = self.connection.cursor() - cursor.execute("delete from locks" - " where lock_file = ?", - (self.lock_file,)) - self.connection.commit() diff --git a/lib/lockfile/symlinklockfile.py b/lib/lockfile/symlinklockfile.py deleted file mode 100644 index 57551a36..00000000 --- a/lib/lockfile/symlinklockfile.py +++ /dev/null @@ -1,69 +0,0 @@ -from __future__ import absolute_import - -import time -import os - -from . import (LockBase, LockFailed, NotLocked, NotMyLock, LockTimeout, - AlreadyLocked) - -class SymlinkLockFile(LockBase): - """Lock access to a file using symlink(2).""" - - def __init__(self, path, threaded=True, timeout=None): - # super(SymlinkLockFile).__init(...) - LockBase.__init__(self, path, threaded, timeout) - # split it back! - self.unique_name = os.path.split(self.unique_name)[1] - - def acquire(self, timeout=None): - # Hopefully unnecessary for symlink. - #try: - # open(self.unique_name, "wb").close() - #except IOError: - # raise LockFailed("failed to create %s" % self.unique_name) - timeout = timeout is not None and timeout or self.timeout - end_time = time.time() - if timeout is not None and timeout > 0: - end_time += timeout - - while True: - # Try and create a symbolic link to it. - try: - os.symlink(self.unique_name, self.lock_file) - except OSError: - # Link creation failed. Maybe we've double-locked? - if self.i_am_locking(): - # Linked to out unique name. Proceed. - return - else: - # Otherwise the lock creation failed. - if timeout is not None and time.time() > end_time: - if timeout > 0: - raise LockTimeout("Timeout waiting to acquire" - " lock for %s" % - self.path) - else: - raise AlreadyLocked("%s is already locked" % - self.path) - time.sleep(timeout/10 if timeout is not None else 0.1) - else: - # Link creation succeeded. We're good to go. - return - - def release(self): - if not self.is_locked(): - raise NotLocked("%s is not locked" % self.path) - elif not self.i_am_locking(): - raise NotMyLock("%s is locked, but not by me" % self.path) - os.unlink(self.lock_file) - - def is_locked(self): - return os.path.islink(self.lock_file) - - def i_am_locking(self): - return os.path.islink(self.lock_file) and \ - os.readlink(self.lock_file) == self.unique_name - - def break_lock(self): - if os.path.islink(self.lock_file): # exists && link - os.unlink(self.lock_file) diff --git a/lib/sg_helpers.py b/lib/sg_helpers.py index cd5b4fae..67f2c771 100644 --- a/lib/sg_helpers.py +++ b/lib/sg_helpers.py @@ -25,7 +25,7 @@ import unicodedata from exceptions_helper import ex, ConnectionSkipException from json_helper import json_loads -from lib.cachecontrol import CacheControl, caches +from cachecontrol import CacheControl, caches # from lib.tmdbsimple.configuration import Configuration # from lib.tmdbsimple.genres import Genres from cfscrape import CloudflareScraper