SickGear/lib/apprise/attachment/AttachHTTP.py

# -*- coding: utf-8 -*-
#
# Copyright (C) 2019 Chris Caron <lead2gold@gmail.com>
# All rights reserved.
#
# This code is licensed under the MIT License.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files(the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions :
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.

import re
import os
import six
import requests
from tempfile import NamedTemporaryFile
from .AttachBase import AttachBase
from ..URLBase import PrivacyMode
from ..AppriseLocale import gettext_lazy as _


class AttachHTTP(AttachBase):
    """
    A wrapper for HTTP based attachment sources
    """

    # The default descriptive name associated with the service
    service_name = _('Web Based')

    # The default protocol
    protocol = 'http'

    # The default secure protocol
    secure_protocol = 'https'

    # The maximum number of seconds to wait for a connection to be established
    # before out-right just giving up
    connection_timeout_sec = 5.0

    # The number of bytes in memory to read from the remote source at a time
    chunk_size = 8192

    def __init__(self, headers=None, **kwargs):
        """
        Initialize HTTP Object

        headers can be a dictionary of key/value pairs that you want to
        additionally include as part of the server headers to post with

        """
        super(AttachHTTP, self).__init__(**kwargs)

        self.schema = 'https' if self.secure else 'http'

        self.fullpath = kwargs.get('fullpath')
        if not isinstance(self.fullpath, six.string_types):
            self.fullpath = '/'

        self.headers = {}
        if headers:
            # Store our extra headers
            self.headers.update(headers)

        # Where our content is written to upon a call to download.
        self._temp_file = None

        # Our Query String Dictionary; we use this to track arguments
        # specified that aren't otherwise part of this class
        self.qsd = {k: v for k, v in kwargs.get('qsd', {}).items()
                    if k not in self.template_args}

        return

    def download(self, **kwargs):
        """
        Perform retrieval of the configuration based on the specified request
        """

        # Ensure any existing content set has been invalidated
        self.invalidate()

        # prepare header
        headers = {
            'User-Agent': self.app_id,
        }

        # Apply any/all header over-rides defined
        headers.update(self.headers)

        auth = None
        if self.user:
            auth = (self.user, self.password)

        url = '%s://%s' % (self.schema, self.host)
        if isinstance(self.port, int):
            url += ':%d' % self.port

        url += self.fullpath

        self.logger.debug('HTTP POST URL: %s (cert_verify=%r)' % (
            url, self.verify_certificate,
        ))

        # Where our request object will temporarily live.
        r = None

        # Always call throttle before any remote server i/o is made
        self.throttle()

        try:
            # Make our request
            with requests.get(
                    url,
                    headers=headers,
                    auth=auth,
                    params=self.qsd,
                    verify=self.verify_certificate,
                    timeout=self.connection_timeout_sec,
                    stream=True) as r:

                # Handle Errors
                r.raise_for_status()

                # Get our file-size (if known)
                try:
                    file_size = int(r.headers.get('Content-Length', '0'))
                except (TypeError, ValueError):
                    # Handle edge case where Content-Length is a bad value
                    file_size = 0

                # Perform a little Q/A on file limitations and restrictions
                if self.max_file_size > 0 and file_size > self.max_file_size:

                    # The content retrieved is to large
                    self.logger.error(
                        'HTTP response exceeds allowable maximum file length '
                        '({}KB): {}'.format(
                            int(self.max_file_size / 1024),
                            self.url(privacy=True)))

                    # Return False (signifying a failure)
                    return False

                # Detect config format based on mime if the format isn't
                # already enforced
                self.detected_mimetype = r.headers.get('Content-Type')

                d = r.headers.get('Content-Disposition', '')
                result = re.search(
                    "filename=['\"]?(?P<name>[^'\"]+)['\"]?", d, re.I)
                if result:
                    self.detected_name = result.group('name').strip()

                # Create a temporary file to work with
                self._temp_file = NamedTemporaryFile()

                # Get our chunk size
                chunk_size = self.chunk_size

                # Track all bytes written to disk
                bytes_written = 0

                # If we get here, we can now safely write our content to disk
                for chunk in r.iter_content(chunk_size=chunk_size):
                    # filter out keep-alive chunks
                    if chunk:
                        self._temp_file.write(chunk)
                        bytes_written = self._temp_file.tell()

                        # Prevent a case where Content-Length isn't provided
                        # we don't want to fetch beyond our limits
                        if self.max_file_size > 0:
                            if bytes_written > self.max_file_size:
                                # The content retrieved is to large
                                self.logger.error(
                                    'HTTP response exceeds allowable maximum '
                                    'file length ({}KB): {}'.format(
                                        int(self.max_file_size / 1024),
                                        self.url(privacy=True)))

                                # Invalidate any variables previously set
                                self.invalidate()

                                # Return False (signifying a failure)
                                return False

                            elif bytes_written + chunk_size \
                                    > self.max_file_size:
                                # Adjust out next read to accomodate up to our
                                # limit +1. This will prevent us from readig
                                # to much into our memory buffer
                                self.max_file_size - bytes_written + 1

                # Ensure our content is flushed to disk for post-processing
                self._temp_file.flush()

            # Set our minimum requirements for a successful download() call
            self.download_path = self._temp_file.name
            if not self.detected_name:
                self.detected_name = os.path.basename(self.fullpath)

        except requests.RequestException as e:
            self.logger.error(
                'A Connection error occured retrieving HTTP '
                'configuration from %s.' % self.host)
            self.logger.debug('Socket Exception: %s' % str(e))

            # Invalidate any variables previously set
            self.invalidate()

            # Return False (signifying a failure)
            return False

        except (IOError, OSError):
            # IOError is present for backwards compatibility with Python
            # versions older then 3.3.  >= 3.3 throw OSError now.

            # Could not open and/or write the temporary file
            self.logger.error(
                'Could not write attachment to disk: {}'.format(
                    self.url(privacy=True)))

            # Invalidate any variables previously set
            self.invalidate()

            # Return False (signifying a failure)
            return False

        # Return our success
        return True

    def invalidate(self):
        """
        Close our temporary file
        """
        if self._temp_file:
            self._temp_file.close()
            self._temp_file = None

        super(AttachHTTP, self).invalidate()

    def url(self, privacy=False, *args, **kwargs):
        """
        Returns the URL built dynamically based on specified arguments.
        """

        # Define any arguments set
        args = {
            'verify': 'yes' if self.verify_certificate else 'no',
        }

        # Prepare our cache value
        if self.cache is not None:
            if isinstance(self.cache, bool) or not self.cache:
                cache = 'yes' if self.cache else 'no'
            else:
                cache = int(self.cache)

            # Set our cache value
            args['cache'] = cache

        if self._mimetype:
            # A format was enforced
            args['mime'] = self._mimetype

        if self._name:
            # A name was enforced
            args['name'] = self._name

        # Append our headers into our args
        args.update({'+{}'.format(k): v for k, v in self.headers.items()})

        # Apply any remaining entries to our URL
        args.update(self.qsd)

        # Determine Authentication
        auth = ''
        if self.user and self.password:
            auth = '{user}:{password}@'.format(
                user=self.quote(self.user, safe=''),
                password=self.pprint(
                    self.password, privacy, mode=PrivacyMode.Secret, safe=''),
            )
        elif self.user:
            auth = '{user}@'.format(
                user=self.quote(self.user, safe=''),
            )

        default_port = 443 if self.secure else 80

        return '{schema}://{auth}{hostname}{port}{fullpath}?{args}'.format(
            schema=self.secure_protocol if self.secure else self.protocol,
            auth=auth,
            hostname=self.quote(self.host, safe=''),
            port='' if self.port is None or self.port == default_port
                 else ':{}'.format(self.port),
            fullpath=self.quote(self.fullpath, safe='/'),
            args=self.urlencode(args),
        )

    @staticmethod
    def parse_url(url):
        """
        Parses the URL and returns enough arguments that can allow
        us to substantiate this object.

        """
        results = AttachBase.parse_url(url)

        if not results:
            # We're done early as we couldn't load the results
            return results

        # Add our headers that the user can potentially over-ride if they wish
        # to to our returned result set
        results['headers'] = results['qsd-']
        results['headers'].update(results['qsd+'])

        return results
Change bump to major version 3.xx to signal that this branch supports Python3+ only. Initial SickGear for Python 3. 2023-01-12 01:04:47 +00:00			`# -- coding: utf-8 --`
			`#`
			`# Copyright (C) 2019 Chris Caron <lead2gold@gmail.com>`
			`# All rights reserved.`
			`#`
			`# This code is licensed under the MIT License.`
			`#`
			`# Permission is hereby granted, free of charge, to any person obtaining a copy`
			`# of this software and associated documentation files(the "Software"), to deal`
			`# in the Software without restriction, including without limitation the rights`
			`# to use, copy, modify, merge, publish, distribute, sublicense, and / or sell`
			`# copies of the Software, and to permit persons to whom the Software is`
			`# furnished to do so, subject to the following conditions :`
			`#`
			`# The above copyright notice and this permission notice shall be included in`
			`# all copies or substantial portions of the Software.`
			`#`
			`# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR`
			`# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,`
			`# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE`
			`# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER`
			`# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,`
			`# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN`
			`# THE SOFTWARE.`

			`import re`
			`import os`
			`import six`
			`import requests`
			`from tempfile import NamedTemporaryFile`
			`from .AttachBase import AttachBase`
			`from ..URLBase import PrivacyMode`
			`from ..AppriseLocale import gettext_lazy as _`


			`class AttachHTTP(AttachBase):`
			`"""`
			`A wrapper for HTTP based attachment sources`
			`"""`

			`# The default descriptive name associated with the service`
			`service_name = _('Web Based')`

			`# The default protocol`
			`protocol = 'http'`

			`# The default secure protocol`
			`secure_protocol = 'https'`

			`# The maximum number of seconds to wait for a connection to be established`
			`# before out-right just giving up`
			`connection_timeout_sec = 5.0`

			`# The number of bytes in memory to read from the remote source at a time`
			`chunk_size = 8192`

			`def __init__(self, headers=None, **kwargs):`
			`"""`
			`Initialize HTTP Object`

			`headers can be a dictionary of key/value pairs that you want to`
			`additionally include as part of the server headers to post with`

			`"""`
			`super(AttachHTTP, self).__init__(**kwargs)`

			`self.schema = 'https' if self.secure else 'http'`

			`self.fullpath = kwargs.get('fullpath')`
			`if not isinstance(self.fullpath, six.string_types):`
			`self.fullpath = '/'`

			`self.headers = {}`
			`if headers:`
			`# Store our extra headers`
			`self.headers.update(headers)`

			`# Where our content is written to upon a call to download.`
			`self._temp_file = None`

			`# Our Query String Dictionary; we use this to track arguments`
			`# specified that aren't otherwise part of this class`
			`self.qsd = {k: v for k, v in kwargs.get('qsd', {}).items()`
			`if k not in self.template_args}`

			`return`

			`def download(self, **kwargs):`
			`"""`
			`Perform retrieval of the configuration based on the specified request`
			`"""`

			`# Ensure any existing content set has been invalidated`
			`self.invalidate()`

			`# prepare header`
			`headers = {`
			`'User-Agent': self.app_id,`
			`}`

			`# Apply any/all header over-rides defined`
			`headers.update(self.headers)`

			`auth = None`
			`if self.user:`
			`auth = (self.user, self.password)`

			`url = '%s://%s' % (self.schema, self.host)`
			`if isinstance(self.port, int):`
			`url += ':%d' % self.port`

			`url += self.fullpath`

			`self.logger.debug('HTTP POST URL: %s (cert_verify=%r)' % (`
			`url, self.verify_certificate,`
			`))`

			`# Where our request object will temporarily live.`
			`r = None`

			`# Always call throttle before any remote server i/o is made`
			`self.throttle()`

			`try:`
			`# Make our request`
			`with requests.get(`
			`url,`
			`headers=headers,`
			`auth=auth,`
			`params=self.qsd,`
			`verify=self.verify_certificate,`
			`timeout=self.connection_timeout_sec,`
			`stream=True) as r:`

			`# Handle Errors`
			`r.raise_for_status()`

			`# Get our file-size (if known)`
			`try:`
			`file_size = int(r.headers.get('Content-Length', '0'))`
			`except (TypeError, ValueError):`
			`# Handle edge case where Content-Length is a bad value`
			`file_size = 0`

			`# Perform a little Q/A on file limitations and restrictions`
			`if self.max_file_size > 0 and file_size > self.max_file_size:`

			`# The content retrieved is to large`
			`self.logger.error(`
			`'HTTP response exceeds allowable maximum file length '`
			`'({}KB): {}'.format(`
			`int(self.max_file_size / 1024),`
			`self.url(privacy=True)))`

			`# Return False (signifying a failure)`
			`return False`

			`# Detect config format based on mime if the format isn't`
			`# already enforced`
			`self.detected_mimetype = r.headers.get('Content-Type')`

			`d = r.headers.get('Content-Disposition', '')`
			`result = re.search(`
			`"filename=['\"]?(?P<name>[^'\"]+)['\"]?", d, re.I)`
			`if result:`
			`self.detected_name = result.group('name').strip()`

			`# Create a temporary file to work with`
			`self._temp_file = NamedTemporaryFile()`

			`# Get our chunk size`
			`chunk_size = self.chunk_size`

			`# Track all bytes written to disk`
			`bytes_written = 0`

			`# If we get here, we can now safely write our content to disk`
			`for chunk in r.iter_content(chunk_size=chunk_size):`
			`# filter out keep-alive chunks`
			`if chunk:`
			`self._temp_file.write(chunk)`
			`bytes_written = self._temp_file.tell()`

			`# Prevent a case where Content-Length isn't provided`
			`# we don't want to fetch beyond our limits`
			`if self.max_file_size > 0:`
			`if bytes_written > self.max_file_size:`
			`# The content retrieved is to large`
			`self.logger.error(`
			`'HTTP response exceeds allowable maximum '`
			`'file length ({}KB): {}'.format(`
			`int(self.max_file_size / 1024),`
			`self.url(privacy=True)))`

			`# Invalidate any variables previously set`
			`self.invalidate()`

			`# Return False (signifying a failure)`
			`return False`

			`elif bytes_written + chunk_size \`
			`> self.max_file_size:`
			`# Adjust out next read to accomodate up to our`
			`# limit +1. This will prevent us from readig`
			`# to much into our memory buffer`
			`self.max_file_size - bytes_written + 1`

			`# Ensure our content is flushed to disk for post-processing`
			`self._temp_file.flush()`

			`# Set our minimum requirements for a successful download() call`
			`self.download_path = self._temp_file.name`
			`if not self.detected_name:`
			`self.detected_name = os.path.basename(self.fullpath)`

			`except requests.RequestException as e:`
			`self.logger.error(`
			`'A Connection error occured retrieving HTTP '`
			`'configuration from %s.' % self.host)`
			`self.logger.debug('Socket Exception: %s' % str(e))`

			`# Invalidate any variables previously set`
			`self.invalidate()`

			`# Return False (signifying a failure)`
			`return False`

			`except (IOError, OSError):`
			`# IOError is present for backwards compatibility with Python`
			`# versions older then 3.3. >= 3.3 throw OSError now.`

			`# Could not open and/or write the temporary file`
			`self.logger.error(`
			`'Could not write attachment to disk: {}'.format(`
			`self.url(privacy=True)))`

			`# Invalidate any variables previously set`
			`self.invalidate()`

			`# Return False (signifying a failure)`
			`return False`

			`# Return our success`
			`return True`

			`def invalidate(self):`
			`"""`
			`Close our temporary file`
			`"""`
			`if self._temp_file:`
			`self._temp_file.close()`
			`self._temp_file = None`

			`super(AttachHTTP, self).invalidate()`

			`def url(self, privacy=False, args, *kwargs):`
			`"""`
			`Returns the URL built dynamically based on specified arguments.`
			`"""`

			`# Define any arguments set`
			`args = {`
			`'verify': 'yes' if self.verify_certificate else 'no',`
			`}`

			`# Prepare our cache value`
			`if self.cache is not None:`
			`if isinstance(self.cache, bool) or not self.cache:`
			`cache = 'yes' if self.cache else 'no'`
			`else:`
			`cache = int(self.cache)`

			`# Set our cache value`
			`args['cache'] = cache`

			`if self._mimetype:`
			`# A format was enforced`
			`args['mime'] = self._mimetype`

			`if self._name:`
			`# A name was enforced`
			`args['name'] = self._name`

			`# Append our headers into our args`
			`args.update({'+{}'.format(k): v for k, v in self.headers.items()})`

			`# Apply any remaining entries to our URL`
			`args.update(self.qsd)`

			`# Determine Authentication`
			`auth = ''`
			`if self.user and self.password:`
			`auth = '{user}:{password}@'.format(`
			`user=self.quote(self.user, safe=''),`
			`password=self.pprint(`
			`self.password, privacy, mode=PrivacyMode.Secret, safe=''),`
			`)`
			`elif self.user:`
			`auth = '{user}@'.format(`
			`user=self.quote(self.user, safe=''),`
			`)`

			`default_port = 443 if self.secure else 80`

			`return '{schema}://{auth}{hostname}{port}{fullpath}?{args}'.format(`
			`schema=self.secure_protocol if self.secure else self.protocol,`
			`auth=auth,`
			`hostname=self.quote(self.host, safe=''),`
			`port='' if self.port is None or self.port == default_port`
			`else ':{}'.format(self.port),`
			`fullpath=self.quote(self.fullpath, safe='/'),`
			`args=self.urlencode(args),`
			`)`

			`@staticmethod`
			`def parse_url(url):`
			`"""`
			`Parses the URL and returns enough arguments that can allow`
			`us to substantiate this object.`

			`"""`
			`results = AttachBase.parse_url(url)`

			`if not results:`
			`# We're done early as we couldn't load the results`
			`return results`

			`# Add our headers that the user can potentially over-ride if they wish`
			`# to to our returned result set`
			`results['headers'] = results['qsd-']`
			`results['headers'].update(results['qsd+'])`

			`return results`