From 8fe34fb5eb8c8432b4ea1bfafc90bbe6c3771870 Mon Sep 17 00:00:00 2001
From: JackDandy <jackdandy@users.noreply.github.com>
Date: Mon, 26 Mar 2018 19:35:48 +0100
Subject: [PATCH] =?UTF-8?q?Update=20feedparser=20library=205.2.1=20(f1dd1b?=
 =?UTF-8?q?b)=20=E2=86=92=205.2.1=20(5646f4c)=20-=20Uses=20the=20faster=20?=
 =?UTF-8?q?cchardet=20if=20installed.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 CHANGES.md                  |  1 +
 lib/feedparser/__init__.py  |  8 ++++-
 lib/feedparser/api.py       | 68 +++++++++++++++++++++++++++++--------
 lib/feedparser/encodings.py |  5 ++-
 lib/feedparser/mixin.py     |  4 +--
 5 files changed, 67 insertions(+), 19 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index 3ce3ecb9..8ecb35b7 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -4,6 +4,7 @@
 * Update cachecontrol library 0.12.3 (db54c40) to 0.12.4 (bd94f7e)
 * Update chardet packages 3.0.4 (9b8c5c2) to 4.0.0 (b3d867a)
 * Update dateutil library 2.6.1 (2f3a160) to 2.7.2 (ff03c0f)
+* Update feedparser library 5.2.1 (f1dd1bb) to 5.2.1 (5646f4c) - Uses the faster cchardet if installed
 
 [develop changelog]
 
diff --git a/lib/feedparser/__init__.py b/lib/feedparser/__init__.py
index a52b39a2..916a61c2 100644
--- a/lib/feedparser/__init__.py
+++ b/lib/feedparser/__init__.py
@@ -41,4 +41,10 @@ from .api import parse
 from .datetimes import registerDateHandler
 from .exceptions import *
 
-api.USER_AGENT = USER_AGENT
+# If you want feedparser to automatically resolve all relative URIs, set this
+# to 1.
+RESOLVE_RELATIVE_URIS = 1
+
+# If you want feedparser to automatically sanitize all potentially unsafe
+# HTML content, set this to 1.
+SANITIZE_HTML = 1
diff --git a/lib/feedparser/api.py b/lib/feedparser/api.py
index 614bd2d2..d2d97a64 100644
--- a/lib/feedparser/api.py
+++ b/lib/feedparser/api.py
@@ -75,17 +75,7 @@ except NameError:
 # of pre-installed parsers until it finds one that supports everything we need.
 PREFERRED_XML_PARSERS = ["drv_libxml2"]
 
-# If you want feedparser to automatically resolve all relative URIs, set this
-# to 1.
-RESOLVE_RELATIVE_URIS = 1
-
-# If you want feedparser to automatically sanitize all potentially unsafe
-# HTML content, set this to 1.
-SANITIZE_HTML = 1
-
 _XML_AVAILABLE = True
-mixin.RESOLVE_RELATIVE_URIS = RESOLVE_RELATIVE_URIS
-mixin.SANITIZE_HTML = SANITIZE_HTML
 
 SUPPORTED_VERSIONS = {
     '': 'unknown',
@@ -175,17 +165,61 @@ StrictFeedParser = type(str('StrictFeedParser'), (
     _StrictFeedParser, _FeedParserMixin, xml.sax.handler.ContentHandler, object
 ), {})
 
-def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, referrer=None, handlers=None, request_headers=None, response_headers=None):
+def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, referrer=None, handlers=None, request_headers=None, response_headers=None, resolve_relative_uris=None, sanitize_html=None):
     '''Parse a feed from a URL, file, stream, or string.
 
-    request_headers, if given, is a dict from http header name to value to add
-    to the request; this overrides internally generated values.
+    :param url_file_stream_or_string:
+        File-like object, URL, file path, or string. Both byte and text strings
+        are accepted. If necessary, encoding will be derived from the response
+        headers or automatically detected.
+
+        Note that strings may trigger network I/O or filesystem access
+        depending on the value. Wrap an untrusted string in
+        a :class:`io.StringIO` or :class:`io.BytesIO` to avoid this. Do not
+        pass untrusted strings to this function.
+
+        When a URL is not passed the feed location to use in relative URL
+        resolution should be passed in the ``Content-Location`` response header
+        (see ``response_headers`` below).
+
+    :param str etag: HTTP ``ETag`` request header.
+    :param modified: HTTP ``Last-Modified`` request header.
+    :type modified: :class:`str`, :class:`time.struct_time` 9-tuple, or
+        :class:`datetime.datetime`
+    :param str agent: HTTP ``User-Agent`` request header, which defaults to
+        the value of :data:`feedparser.USER_AGENT`.
+    :param referrer: HTTP ``Referer`` [sic] request header.
+    :param request_headers:
+        A mapping of HTTP header name to HTTP header value to add to the
+        request, overriding internally generated values.
+    :type request_headers: :class:`dict` mapping :class:`str` to :class:`str`
+    :param response_headers:
+        A mapping of HTTP header name to HTTP header value. Multiple values may
+        be joined with a comma. If a HTTP request was made, these headers
+        override any matching headers in the response. Otherwise this specifies
+        the entirety of the response headers.
+    :type response_headers: :class:`dict` mapping :class:`str` to :class:`str`
+
+    :param bool resolve_relative_uris:
+        Should feedparser attempt to resolve relative URIs absolute ones within
+        HTML content?  Defaults to the value of
+        :data:`feedparser.RESOLVE_RELATIVE_URIS`, which is ``True``.
+    :param bool sanitize_html:
+        Should feedparser skip HTML sanitization? Only disable this if you know
+        what you are doing!  Defaults to the value of
+        :data:`feedparser.SANITIZE_HTML`, which is ``True``.
 
     :return: A :class:`FeedParserDict`.
     '''
-
+    if not agent or sanitize_html is None or resolve_relative_uris is None:
+        import feedparser
     if not agent:
-        agent = USER_AGENT
+        agent = feedparser.USER_AGENT
+    if sanitize_html is None:
+        sanitize_html = feedparser.SANITIZE_HTML
+    if resolve_relative_uris is None:
+        resolve_relative_uris = feedparser.RESOLVE_RELATIVE_URIS
+
     result = FeedParserDict(
         bozo = False,
         entries = [],
@@ -220,6 +254,8 @@ def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, refer
     if use_strict_parser:
         # initialize the SAX parser
         feedparser = StrictFeedParser(baseuri, baselang, 'utf-8')
+        feedparser.resolve_relative_uris = resolve_relative_uris
+        feedparser.sanitize_html = sanitize_html
         saxparser = xml.sax.make_parser(PREFERRED_XML_PARSERS)
         saxparser.setFeature(xml.sax.handler.feature_namespaces, 1)
         try:
@@ -239,6 +275,8 @@ def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, refer
             use_strict_parser = 0
     if not use_strict_parser and _SGML_AVAILABLE:
         feedparser = LooseFeedParser(baseuri, baselang, 'utf-8', entities)
+        feedparser.resolve_relative_uris = resolve_relative_uris
+        feedparser.sanitize_html = sanitize_html
         feedparser.feed(data.decode('utf-8', 'replace'))
     result['feed'] = feedparser.feeddata
     result['entries'] = feedparser.entries
diff --git a/lib/feedparser/encodings.py b/lib/feedparser/encodings.py
index 6bbdaf70..a5a7635d 100644
--- a/lib/feedparser/encodings.py
+++ b/lib/feedparser/encodings.py
@@ -34,7 +34,10 @@ import collections
 import re
 
 try:
-    import chardet
+    try:
+        import cchardet as chardet
+    except ImportError:
+        import chardet
 except ImportError:
     chardet = None
     lazy_chardet_encoding = None
diff --git a/lib/feedparser/mixin.py b/lib/feedparser/mixin.py
index 263bb0d0..5f97dc80 100644
--- a/lib/feedparser/mixin.py
+++ b/lib/feedparser/mixin.py
@@ -515,12 +515,12 @@ class _FeedParserMixin(
 
         is_htmlish = self.mapContentType(self.contentparams.get('type', 'text/html')) in self.html_types
         # resolve relative URIs within embedded markup
-        if is_htmlish and RESOLVE_RELATIVE_URIS:
+        if is_htmlish and self.resolve_relative_uris:
             if element in self.can_contain_relative_uris:
                 output = _resolveRelativeURIs(output, self.baseuri, self.encoding, self.contentparams.get('type', 'text/html'))
 
         # sanitize embedded markup
-        if is_htmlish and SANITIZE_HTML:
+        if is_htmlish and self.sanitize_html:
             if element in self.can_contain_dangerous_markup:
                 output = _sanitizeHTML(output, self.encoding, self.contentparams.get('type', 'text/html'))