SickGear/lib/feedparser/sanitizer.py

# Copyright 2010-2020 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2002-2008 Mark Pilgrim
# All rights reserved.
#
# This file is a part of feedparser.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
#   this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
#   this list of conditions and the following disclaimer in the documentation
#   and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

import re

from .html import _BaseHTMLProcessor
from .urls import make_safe_absolute_uri


class _HTMLSanitizer(_BaseHTMLProcessor):
    acceptable_elements = {
        'a',
        'abbr',
        'acronym',
        'address',
        'area',
        'article',
        'aside',
        'audio',
        'b',
        'big',
        'blockquote',
        'br',
        'button',
        'canvas',
        'caption',
        'center',
        'cite',
        'code',
        'col',
        'colgroup',
        'command',
        'datagrid',
        'datalist',
        'dd',
        'del',
        'details',
        'dfn',
        'dialog',
        'dir',
        'div',
        'dl',
        'dt',
        'em',
        'event-source',
        'fieldset',
        'figcaption',
        'figure',
        'font',
        'footer',
        'form',
        'h1',
        'h2',
        'h3',
        'h4',
        'h5',
        'h6',
        'header',
        'hr',
        'i',
        'img',
        'input',
        'ins',
        'kbd',
        'keygen',
        'label',
        'legend',
        'li',
        'm',
        'map',
        'menu',
        'meter',
        'multicol',
        'nav',
        'nextid',
        'noscript',
        'ol',
        'optgroup',
        'option',
        'output',
        'p',
        'pre',
        'progress',
        'q',
        's',
        'samp',
        'section',
        'select',
        'small',
        'sound',
        'source',
        'spacer',
        'span',
        'strike',
        'strong',
        'sub',
        'sup',
        'table',
        'tbody',
        'td',
        'textarea',
        'tfoot',
        'th',
        'thead',
        'time',
        'tr',
        'tt',
        'u',
        'ul',
        'var',
        'video',
    }

    acceptable_attributes = {
        'abbr',
        'accept',
        'accept-charset',
        'accesskey',
        'action',
        'align',
        'alt',
        'autocomplete',
        'autofocus',
        'axis',
        'background',
        'balance',
        'bgcolor',
        'bgproperties',
        'border',
        'bordercolor',
        'bordercolordark',
        'bordercolorlight',
        'bottompadding',
        'cellpadding',
        'cellspacing',
        'ch',
        'challenge',
        'char',
        'charoff',
        'charset',
        'checked',
        'choff',
        'cite',
        'class',
        'clear',
        'color',
        'cols',
        'colspan',
        'compact',
        'contenteditable',
        'controls',
        'coords',
        'data',
        'datafld',
        'datapagesize',
        'datasrc',
        'datetime',
        'default',
        'delay',
        'dir',
        'disabled',
        'draggable',
        'dynsrc',
        'enctype',
        'end',
        'face',
        'for',
        'form',
        'frame',
        'galleryimg',
        'gutter',
        'headers',
        'height',
        'hidden',
        'hidefocus',
        'high',
        'href',
        'hreflang',
        'hspace',
        'icon',
        'id',
        'inputmode',
        'ismap',
        'keytype',
        'label',
        'lang',
        'leftspacing',
        'list',
        'longdesc',
        'loop',
        'loopcount',
        'loopend',
        'loopstart',
        'low',
        'lowsrc',
        'max',
        'maxlength',
        'media',
        'method',
        'min',
        'multiple',
        'name',
        'nohref',
        'noshade',
        'nowrap',
        'open',
        'optimum',
        'pattern',
        'ping',
        'point-size',
        'poster',
        'pqg',
        'preload',
        'prompt',
        'radiogroup',
        'readonly',
        'rel',
        'repeat-max',
        'repeat-min',
        'replace',
        'required',
        'rev',
        'rightspacing',
        'rows',
        'rowspan',
        'rules',
        'scope',
        'selected',
        'shape',
        'size',
        'span',
        'src',
        'start',
        'step',
        'style',
        'summary',
        'suppress',
        'tabindex',
        'target',
        'template',
        'title',
        'toppadding',
        'type',
        'unselectable',
        'urn',
        'usemap',
        'valign',
        'value',
        'variable',
        'volume',
        'vrml',
        'vspace',
        'width',
        'wrap',
        'xml:lang',
    }

    unacceptable_elements_with_end_tag = {
        'applet',
        'script',
        'style',
    }

    acceptable_css_properties = {
        'azimuth',
        'background-color',
        'border-bottom-color',
        'border-collapse',
        'border-color',
        'border-left-color',
        'border-right-color',
        'border-top-color',
        'clear',
        'color',
        'cursor',
        'direction',
        'display',
        'elevation',
        'float',
        'font',
        'font-family',
        'font-size',
        'font-style',
        'font-variant',
        'font-weight',
        'height',
        'letter-spacing',
        'line-height',
        'overflow',
        'pause',
        'pause-after',
        'pause-before',
        'pitch',
        'pitch-range',
        'richness',
        'speak',
        'speak-header',
        'speak-numeral',
        'speak-punctuation',
        'speech-rate',
        'stress',
        'text-align',
        'text-decoration',
        'text-indent',
        'unicode-bidi',
        'vertical-align',
        'voice-family',
        'volume',
        'white-space',
        'width',
    }

    # survey of common keywords found in feeds
    acceptable_css_keywords = {
        '!important',
        'aqua',
        'auto',
        'black',
        'block',
        'blue',
        'bold',
        'both',
        'bottom',
        'brown',
        'center',
        'collapse',
        'dashed',
        'dotted',
        'fuchsia',
        'gray',
        'green',
        'italic',
        'left',
        'lime',
        'maroon',
        'medium',
        'navy',
        'none',
        'normal',
        'nowrap',
        'olive',
        'pointer',
        'purple',
        'red',
        'right',
        'silver',
        'solid',
        'teal',
        'top',
        'transparent',
        'underline',
        'white',
        'yellow',
    }

    valid_css_values = re.compile(
        r'^('
        r'#[0-9a-f]+'  # Hex values
        r'|rgb\(\d+%?,\d*%?,?\d*%?\)?'  # RGB values
        r'|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?'  # Sizes/widths
        r')$'
    )

    mathml_elements = {
        'annotation',
        'annotation-xml',
        'maction',
        'maligngroup',
        'malignmark',
        'math',
        'menclose',
        'merror',
        'mfenced',
        'mfrac',
        'mglyph',
        'mi',
        'mlabeledtr',
        'mlongdiv',
        'mmultiscripts',
        'mn',
        'mo',
        'mover',
        'mpadded',
        'mphantom',
        'mprescripts',
        'mroot',
        'mrow',
        'ms',
        'mscarries',
        'mscarry',
        'msgroup',
        'msline',
        'mspace',
        'msqrt',
        'msrow',
        'mstack',
        'mstyle',
        'msub',
        'msubsup',
        'msup',
        'mtable',
        'mtd',
        'mtext',
        'mtr',
        'munder',
        'munderover',
        'none',
        'semantics',
    }

    mathml_attributes = {
        'accent',
        'accentunder',
        'actiontype',
        'align',
        'alignmentscope',
        'altimg',
        'altimg-height',
        'altimg-valign',
        'altimg-width',
        'alttext',
        'bevelled',
        'charalign',
        'close',
        'columnalign',
        'columnlines',
        'columnspacing',
        'columnspan',
        'columnwidth',
        'crossout',
        'decimalpoint',
        'denomalign',
        'depth',
        'dir',
        'display',
        'displaystyle',
        'edge',
        'encoding',
        'equalcolumns',
        'equalrows',
        'fence',
        'fontstyle',
        'fontweight',
        'form',
        'frame',
        'framespacing',
        'groupalign',
        'height',
        'href',
        'id',
        'indentalign',
        'indentalignfirst',
        'indentalignlast',
        'indentshift',
        'indentshiftfirst',
        'indentshiftlast',
        'indenttarget',
        'infixlinebreakstyle',
        'largeop',
        'length',
        'linebreak',
        'linebreakmultchar',
        'linebreakstyle',
        'lineleading',
        'linethickness',
        'location',
        'longdivstyle',
        'lquote',
        'lspace',
        'mathbackground',
        'mathcolor',
        'mathsize',
        'mathvariant',
        'maxsize',
        'minlabelspacing',
        'minsize',
        'movablelimits',
        'notation',
        'numalign',
        'open',
        'other',
        'overflow',
        'position',
        'rowalign',
        'rowlines',
        'rowspacing',
        'rowspan',
        'rquote',
        'rspace',
        'scriptlevel',
        'scriptminsize',
        'scriptsizemultiplier',
        'selection',
        'separator',
        'separators',
        'shift',
        'side',
        'src',
        'stackalign',
        'stretchy',
        'subscriptshift',
        'superscriptshift',
        'symmetric',
        'voffset',
        'width',
        'xlink:href',
        'xlink:show',
        'xlink:type',
        'xmlns',
        'xmlns:xlink',
    }

    # svgtiny - foreignObject + linearGradient + radialGradient + stop
    svg_elements = {
        'a',
        'animate',
        'animateColor',
        'animateMotion',
        'animateTransform',
        'circle',
        'defs',
        'desc',
        'ellipse',
        'font-face',
        'font-face-name',
        'font-face-src',
        'foreignObject',
        'g',
        'glyph',
        'hkern',
        'line',
        'linearGradient',
        'marker',
        'metadata',
        'missing-glyph',
        'mpath',
        'path',
        'polygon',
        'polyline',
        'radialGradient',
        'rect',
        'set',
        'stop',
        'svg',
        'switch',
        'text',
        'title',
        'tspan',
        'use',
    }

    # svgtiny + class + opacity + offset + xmlns + xmlns:xlink
    svg_attributes = {
        'accent-height',
        'accumulate',
        'additive',
        'alphabetic',
        'arabic-form',
        'ascent',
        'attributeName',
        'attributeType',
        'baseProfile',
        'bbox',
        'begin',
        'by',
        'calcMode',
        'cap-height',
        'class',
        'color',
        'color-rendering',
        'content',
        'cx',
        'cy',
        'd',
        'descent',
        'display',
        'dur',
        'dx',
        'dy',
        'end',
        'fill',
        'fill-opacity',
        'fill-rule',
        'font-family',
        'font-size',
        'font-stretch',
        'font-style',
        'font-variant',
        'font-weight',
        'from',
        'fx',
        'fy',
        'g1',
        'g2',
        'glyph-name',
        'gradientUnits',
        'hanging',
        'height',
        'horiz-adv-x',
        'horiz-origin-x',
        'id',
        'ideographic',
        'k',
        'keyPoints',
        'keySplines',
        'keyTimes',
        'lang',
        'marker-end',
        'marker-mid',
        'marker-start',
        'markerHeight',
        'markerUnits',
        'markerWidth',
        'mathematical',
        'max',
        'min',
        'name',
        'offset',
        'opacity',
        'orient',
        'origin',
        'overline-position',
        'overline-thickness',
        'panose-1',
        'path',
        'pathLength',
        'points',
        'preserveAspectRatio',
        'r',
        'refX',
        'refY',
        'repeatCount',
        'repeatDur',
        'requiredExtensions',
        'requiredFeatures',
        'restart',
        'rotate',
        'rx',
        'ry',
        'slope',
        'stemh',
        'stemv',
        'stop-color',
        'stop-opacity',
        'strikethrough-position',
        'strikethrough-thickness',
        'stroke',
        'stroke-dasharray',
        'stroke-dashoffset',
        'stroke-linecap',
        'stroke-linejoin',
        'stroke-miterlimit',
        'stroke-opacity',
        'stroke-width',
        'systemLanguage',
        'target',
        'text-anchor',
        'to',
        'transform',
        'type',
        'u1',
        'u2',
        'underline-position',
        'underline-thickness',
        'unicode',
        'unicode-range',
        'units-per-em',
        'values',
        'version',
        'viewBox',
        'visibility',
        'width',
        'widths',
        'x',
        'x-height',
        'x1',
        'x2',
        'xlink:actuate',
        'xlink:arcrole',
        'xlink:href',
        'xlink:role',
        'xlink:show',
        'xlink:title',
        'xlink:type',
        'xml:base',
        'xml:lang',
        'xml:space',
        'xmlns',
        'xmlns:xlink',
        'y',
        'y1',
        'y2',
        'zoomAndPan',
    }

    svg_attr_map = None
    svg_elem_map = None

    acceptable_svg_properties = {
        'fill',
        'fill-opacity',
        'fill-rule',
        'stroke',
        'stroke-linecap',
        'stroke-linejoin',
        'stroke-opacity',
        'stroke-width',
    }

    def __init__(self, encoding=None, _type='application/xhtml+xml'):
        super(_HTMLSanitizer, self).__init__(encoding, _type)

        self.unacceptablestack = 0
        self.mathmlOK = 0
        self.svgOK = 0

    def reset(self):
        super(_HTMLSanitizer, self).reset()
        self.unacceptablestack = 0
        self.mathmlOK = 0
        self.svgOK = 0

    def unknown_starttag(self, tag, attrs):
        acceptable_attributes = self.acceptable_attributes
        keymap = {}
        if tag not in self.acceptable_elements or self.svgOK:
            if tag in self.unacceptable_elements_with_end_tag:
                self.unacceptablestack += 1

            # add implicit namespaces to html5 inline svg/mathml
            if self._type.endswith('html'):
                if not dict(attrs).get('xmlns'):
                    if tag == 'svg':
                        attrs.append(('xmlns', 'http://www.w3.org/2000/svg'))
                    if tag == 'math':
                        attrs.append(('xmlns', 'http://www.w3.org/1998/Math/MathML'))

            # not otherwise acceptable, perhaps it is MathML or SVG?
            if tag == 'math' and ('xmlns', 'http://www.w3.org/1998/Math/MathML') in attrs:
                self.mathmlOK += 1
            if tag == 'svg' and ('xmlns', 'http://www.w3.org/2000/svg') in attrs:
                self.svgOK += 1

            # chose acceptable attributes based on tag class, else bail
            if self.mathmlOK and tag in self.mathml_elements:
                acceptable_attributes = self.mathml_attributes
            elif self.svgOK and tag in self.svg_elements:
                # For most vocabularies, lowercasing is a good idea. Many
                # svg elements, however, are camel case.
                if not self.svg_attr_map:
                    lower = [attr.lower() for attr in self.svg_attributes]
                    mix = [a for a in self.svg_attributes if a not in lower]
                    self.svg_attributes = lower
                    self.svg_attr_map = {a.lower(): a for a in mix}

                    lower = [attr.lower() for attr in self.svg_elements]
                    mix = [a for a in self.svg_elements if a not in lower]
                    self.svg_elements = lower
                    self.svg_elem_map = {a.lower(): a for a in mix}
                acceptable_attributes = self.svg_attributes
                tag = self.svg_elem_map.get(tag, tag)
                keymap = self.svg_attr_map
            elif tag not in self.acceptable_elements:
                return

        # declare xlink namespace, if needed
        if self.mathmlOK or self.svgOK:
            if any((a for a in attrs if a[0].startswith('xlink:'))):
                if not ('xmlns:xlink', 'http://www.w3.org/1999/xlink') in attrs:
                    attrs.append(('xmlns:xlink', 'http://www.w3.org/1999/xlink'))

        clean_attrs = []
        for key, value in self.normalize_attrs(attrs):
            if key == 'style' and 'style' in acceptable_attributes:
                clean_value = self.sanitize_style(value)
                if clean_value:
                    clean_attrs.append((key, clean_value))
            elif key in acceptable_attributes:
                key = keymap.get(key, key)
                # make sure the uri uses an acceptable uri scheme
                if key == 'href':
                    value = make_safe_absolute_uri(value)
                clean_attrs.append((key, value))
        super(_HTMLSanitizer, self).unknown_starttag(tag, clean_attrs)

    def unknown_endtag(self, tag):
        if tag not in self.acceptable_elements:
            if tag in self.unacceptable_elements_with_end_tag:
                self.unacceptablestack -= 1
            if self.mathmlOK and tag in self.mathml_elements:
                if tag == 'math' and self.mathmlOK:
                    self.mathmlOK -= 1
            elif self.svgOK and tag in self.svg_elements:
                tag = self.svg_elem_map.get(tag, tag)
                if tag == 'svg' and self.svgOK:
                    self.svgOK -= 1
            else:
                return
        super(_HTMLSanitizer, self).unknown_endtag(tag)

    def handle_pi(self, text):
        pass

    def handle_decl(self, text):
        pass

    def handle_data(self, text):
        if not self.unacceptablestack:
            super(_HTMLSanitizer, self).handle_data(text)

    def sanitize_style(self, style):
        # disallow urls
        style = re.compile(r'url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)

        # gauntlet
        if not re.match(r"""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
            return ''
        # This replaced a regexp that used re.match and was prone to
        # pathological back-tracking.
        if re.sub(r"\s*[-\w]+\s*:\s*[^:;]*;?", '', style).strip():
            return ''

        clean = []
        for prop, value in re.findall(r"([-\w]+)\s*:\s*([^:;]*)", style):
            if not value:
                continue
            if prop.lower() in self.acceptable_css_properties:
                clean.append(prop + ': ' + value + ';')
            elif prop.split('-')[0].lower() in ['background', 'border', 'margin', 'padding']:
                for keyword in value.split():
                    if (
                            keyword not in self.acceptable_css_keywords
                            and not self.valid_css_values.match(keyword)
                    ):
                        break
                else:
                    clean.append(prop + ': ' + value + ';')
            elif self.svgOK and prop.lower() in self.acceptable_svg_properties:
                clean.append(prop + ': ' + value + ';')

        return ' '.join(clean)

    def parse_comment(self, i, report=1):
        ret = super(_HTMLSanitizer, self).parse_comment(i, report)
        if ret >= 0:
            return ret
        # if ret == -1, this may be a malicious attempt to circumvent
        # sanitization, or a page-destroying unclosed comment
        match = re.compile(r'--[^>]*>').search(self.rawdata, i+4)
        if match:
            return match.end()
        # unclosed comment; deliberately fail to handle_data()
        return len(self.rawdata)


def _sanitize_html(html_source, encoding, _type):
    p = _HTMLSanitizer(encoding, _type)
    html_source = html_source.replace('<![CDATA[', '&lt;![CDATA[')
    p.feed(html_source)
    data = p.output()
    data = data.strip().replace('\r\n', '\n')
    return data


# Match XML entity declarations.
# Example: <!ENTITY copyright "(C)">
RE_ENTITY_PATTERN = re.compile(br'^\s*<!ENTITY([^>]*?)>', re.MULTILINE)

# Match XML DOCTYPE declarations.
# Example: <!DOCTYPE feed [ ]>
RE_DOCTYPE_PATTERN = re.compile(br'^\s*<!DOCTYPE([^>]*?)>', re.MULTILINE)

# Match safe entity declarations.
# This will allow hexadecimal character references through,
# as well as text, but not arbitrary nested entities.
# Example: cubed "&#179;"
# Example: copyright "(C)"
# Forbidden: explode1 "&explode2;&explode2;"
RE_SAFE_ENTITY_PATTERN = re.compile(br'\s+(\w+)\s+"(&#\w+;|[^&"]*)"')


def replace_doctype(data):
    """Strips and replaces the DOCTYPE, returns (rss_version, stripped_data)

    rss_version may be 'rss091n' or None
    stripped_data is the same XML document with a replaced DOCTYPE
    """

    # Divide the document into two groups by finding the location
    # of the first element that doesn't begin with '<?' or '<!'.
    start = re.search(br'<\w', data)
    start = start and start.start() or -1
    head, data = data[:start+1], data[start+1:]

    # Save and then remove all of the ENTITY declarations.
    entity_results = RE_ENTITY_PATTERN.findall(head)
    head = RE_ENTITY_PATTERN.sub(b'', head)

    # Find the DOCTYPE declaration and check the feed type.
    doctype_results = RE_DOCTYPE_PATTERN.findall(head)
    doctype = doctype_results and doctype_results[0] or b''
    if b'netscape' in doctype.lower():
        version = 'rss091n'
    else:
        version = None

    # Re-insert the safe ENTITY declarations if a DOCTYPE was found.
    replacement = b''
    if len(doctype_results) == 1 and entity_results:
        safe_entities = [
            e
            for e in entity_results
            if RE_SAFE_ENTITY_PATTERN.match(e)
        ]
        if safe_entities:
            replacement = b'<!DOCTYPE feed [\n<!ENTITY' \
                        + b'>\n<!ENTITY '.join(safe_entities) \
                        + b'>\n]>'
    data = RE_DOCTYPE_PATTERN.sub(replacement, head) + data

    # Precompute the safe entities for the loose parser.
    safe_entities = {
        k.decode('utf-8'): v.decode('utf-8')
        for k, v in RE_SAFE_ENTITY_PATTERN.findall(replacement)
    }
    return version, data, safe_entities
Change bump to major version 3.xx to signal that this branch supports Python3+ only. Initial SickGear for Python 3. 2023-01-12 01:04:47 +00:00			`# Copyright 2010-2020 Kurt McKee <contactme@kurtmckee.org>`
			`# Copyright 2002-2008 Mark Pilgrim`
			`# All rights reserved.`
			`#`
			`# This file is a part of feedparser.`
			`#`
			`# Redistribution and use in source and binary forms, with or without`
			`# modification, are permitted provided that the following conditions are met:`
			`#`
			`# * Redistributions of source code must retain the above copyright notice,`
			`# this list of conditions and the following disclaimer.`
			`# * Redistributions in binary form must reproduce the above copyright notice,`
			`# this list of conditions and the following disclaimer in the documentation`
			`# and/or other materials provided with the distribution.`
			`#`
			`# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'`
			`# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE`
			`# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE`
			`# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE`
			`# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR`
			`# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF`
			`# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS`
			`# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN`
			`# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)`
			`# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE`
			`# POSSIBILITY OF SUCH DAMAGE.`

			`import re`

			`from .html import _BaseHTMLProcessor`
			`from .urls import make_safe_absolute_uri`


			`class _HTMLSanitizer(_BaseHTMLProcessor):`
			`acceptable_elements = {`
			`'a',`
			`'abbr',`
			`'acronym',`
			`'address',`
			`'area',`
			`'article',`
			`'aside',`
			`'audio',`
			`'b',`
			`'big',`
			`'blockquote',`
			`'br',`
			`'button',`
			`'canvas',`
			`'caption',`
			`'center',`
			`'cite',`
			`'code',`
			`'col',`
			`'colgroup',`
			`'command',`
			`'datagrid',`
			`'datalist',`
			`'dd',`
			`'del',`
			`'details',`
			`'dfn',`
			`'dialog',`
			`'dir',`
			`'div',`
			`'dl',`
			`'dt',`
			`'em',`
			`'event-source',`
			`'fieldset',`
			`'figcaption',`
			`'figure',`
			`'font',`
			`'footer',`
			`'form',`
			`'h1',`
			`'h2',`
			`'h3',`
			`'h4',`
			`'h5',`
			`'h6',`
			`'header',`
			`'hr',`
			`'i',`
			`'img',`
			`'input',`
			`'ins',`
			`'kbd',`
			`'keygen',`
			`'label',`
			`'legend',`
			`'li',`
			`'m',`
			`'map',`
			`'menu',`
			`'meter',`
			`'multicol',`
			`'nav',`
			`'nextid',`
			`'noscript',`
			`'ol',`
			`'optgroup',`
			`'option',`
			`'output',`
			`'p',`
			`'pre',`
			`'progress',`
			`'q',`
			`'s',`
			`'samp',`
			`'section',`
			`'select',`
			`'small',`
			`'sound',`
			`'source',`
			`'spacer',`
			`'span',`
			`'strike',`
			`'strong',`
			`'sub',`
			`'sup',`
			`'table',`
			`'tbody',`
			`'td',`
			`'textarea',`
			`'tfoot',`
			`'th',`
			`'thead',`
			`'time',`
			`'tr',`
			`'tt',`
			`'u',`
			`'ul',`
			`'var',`
			`'video',`
			`}`

			`acceptable_attributes = {`
			`'abbr',`
			`'accept',`
			`'accept-charset',`
			`'accesskey',`
			`'action',`
			`'align',`
			`'alt',`
			`'autocomplete',`
			`'autofocus',`
			`'axis',`
			`'background',`
			`'balance',`
			`'bgcolor',`
			`'bgproperties',`
			`'border',`
			`'bordercolor',`
			`'bordercolordark',`
			`'bordercolorlight',`
			`'bottompadding',`
			`'cellpadding',`
			`'cellspacing',`
			`'ch',`
			`'challenge',`
			`'char',`
			`'charoff',`
			`'charset',`
			`'checked',`
			`'choff',`
			`'cite',`
			`'class',`
			`'clear',`
			`'color',`
			`'cols',`
			`'colspan',`
			`'compact',`
			`'contenteditable',`
			`'controls',`
			`'coords',`
			`'data',`
			`'datafld',`
			`'datapagesize',`
			`'datasrc',`
			`'datetime',`
			`'default',`
			`'delay',`
			`'dir',`
			`'disabled',`
			`'draggable',`
			`'dynsrc',`
			`'enctype',`
			`'end',`
			`'face',`
			`'for',`
			`'form',`
			`'frame',`
			`'galleryimg',`
			`'gutter',`
			`'headers',`
			`'height',`
			`'hidden',`
			`'hidefocus',`
			`'high',`
			`'href',`
			`'hreflang',`
			`'hspace',`
			`'icon',`
			`'id',`
			`'inputmode',`
			`'ismap',`
			`'keytype',`
			`'label',`
			`'lang',`
			`'leftspacing',`
			`'list',`
			`'longdesc',`
			`'loop',`
			`'loopcount',`
			`'loopend',`
			`'loopstart',`
			`'low',`
			`'lowsrc',`
			`'max',`
			`'maxlength',`
			`'media',`
			`'method',`
			`'min',`
			`'multiple',`
			`'name',`
			`'nohref',`
			`'noshade',`
			`'nowrap',`
			`'open',`
			`'optimum',`
			`'pattern',`
			`'ping',`
			`'point-size',`
			`'poster',`
			`'pqg',`
			`'preload',`
			`'prompt',`
			`'radiogroup',`
			`'readonly',`
			`'rel',`
			`'repeat-max',`
			`'repeat-min',`
			`'replace',`
			`'required',`
			`'rev',`
			`'rightspacing',`
			`'rows',`
			`'rowspan',`
			`'rules',`
			`'scope',`
			`'selected',`
			`'shape',`
			`'size',`
			`'span',`
			`'src',`
			`'start',`
			`'step',`
			`'style',`
			`'summary',`
			`'suppress',`
			`'tabindex',`
			`'target',`
			`'template',`
			`'title',`
			`'toppadding',`
			`'type',`
			`'unselectable',`
			`'urn',`
			`'usemap',`
			`'valign',`
			`'value',`
			`'variable',`
			`'volume',`
			`'vrml',`
			`'vspace',`
			`'width',`
			`'wrap',`
			`'xml:lang',`
			`}`

			`unacceptable_elements_with_end_tag = {`
			`'applet',`
			`'script',`
			`'style',`
			`}`

			`acceptable_css_properties = {`
			`'azimuth',`
			`'background-color',`
			`'border-bottom-color',`
			`'border-collapse',`
			`'border-color',`
			`'border-left-color',`
			`'border-right-color',`
			`'border-top-color',`
			`'clear',`
			`'color',`
			`'cursor',`
			`'direction',`
			`'display',`
			`'elevation',`
			`'float',`
			`'font',`
			`'font-family',`
			`'font-size',`
			`'font-style',`
			`'font-variant',`
			`'font-weight',`
			`'height',`
			`'letter-spacing',`
			`'line-height',`
			`'overflow',`
			`'pause',`
			`'pause-after',`
			`'pause-before',`
			`'pitch',`
			`'pitch-range',`
			`'richness',`
			`'speak',`
			`'speak-header',`
			`'speak-numeral',`
			`'speak-punctuation',`
			`'speech-rate',`
			`'stress',`
			`'text-align',`
			`'text-decoration',`
			`'text-indent',`
			`'unicode-bidi',`
			`'vertical-align',`
			`'voice-family',`
			`'volume',`
			`'white-space',`
			`'width',`
			`}`

			`# survey of common keywords found in feeds`
			`acceptable_css_keywords = {`
			`'!important',`
			`'aqua',`
			`'auto',`
			`'black',`
			`'block',`
			`'blue',`
			`'bold',`
			`'both',`
			`'bottom',`
			`'brown',`
			`'center',`
			`'collapse',`
			`'dashed',`
			`'dotted',`
			`'fuchsia',`
			`'gray',`
			`'green',`
			`'italic',`
			`'left',`
			`'lime',`
			`'maroon',`
			`'medium',`
			`'navy',`
			`'none',`
			`'normal',`
			`'nowrap',`
			`'olive',`
			`'pointer',`
			`'purple',`
			`'red',`
			`'right',`
			`'silver',`
			`'solid',`
			`'teal',`
			`'top',`
			`'transparent',`
			`'underline',`
			`'white',`
			`'yellow',`
			`}`

			`valid_css_values = re.compile(`
			`r'^('`
			`r'#[0-9a-f]+' # Hex values`
			`r'\|rgb\(\d+%?,\d%?,?\d%?\)?' # RGB values`
			`r'\|\d{0,2}\.?\d{0,2}(cm\|em\|ex\|in\|mm\|pc\|pt\|px\|%\|,\|\))?' # Sizes/widths`
			`r')$'`
			`)`

			`mathml_elements = {`
			`'annotation',`
			`'annotation-xml',`
			`'maction',`
			`'maligngroup',`
			`'malignmark',`
			`'math',`
			`'menclose',`
			`'merror',`
			`'mfenced',`
			`'mfrac',`
			`'mglyph',`
			`'mi',`
			`'mlabeledtr',`
			`'mlongdiv',`
			`'mmultiscripts',`
			`'mn',`
			`'mo',`
			`'mover',`
			`'mpadded',`
			`'mphantom',`
			`'mprescripts',`
			`'mroot',`
			`'mrow',`
			`'ms',`
			`'mscarries',`
			`'mscarry',`
			`'msgroup',`
			`'msline',`
			`'mspace',`
			`'msqrt',`
			`'msrow',`
			`'mstack',`
			`'mstyle',`
			`'msub',`
			`'msubsup',`
			`'msup',`
			`'mtable',`
			`'mtd',`
			`'mtext',`
			`'mtr',`
			`'munder',`
			`'munderover',`
			`'none',`
			`'semantics',`
			`}`

			`mathml_attributes = {`
			`'accent',`
			`'accentunder',`
			`'actiontype',`
			`'align',`
			`'alignmentscope',`
			`'altimg',`
			`'altimg-height',`
			`'altimg-valign',`
			`'altimg-width',`
			`'alttext',`
			`'bevelled',`
			`'charalign',`
			`'close',`
			`'columnalign',`
			`'columnlines',`
			`'columnspacing',`
			`'columnspan',`
			`'columnwidth',`
			`'crossout',`
			`'decimalpoint',`
			`'denomalign',`
			`'depth',`
			`'dir',`
			`'display',`
			`'displaystyle',`
			`'edge',`
			`'encoding',`
			`'equalcolumns',`
			`'equalrows',`
			`'fence',`
			`'fontstyle',`
			`'fontweight',`
			`'form',`
			`'frame',`
			`'framespacing',`
			`'groupalign',`
			`'height',`
			`'href',`
			`'id',`
			`'indentalign',`
			`'indentalignfirst',`
			`'indentalignlast',`
			`'indentshift',`
			`'indentshiftfirst',`
			`'indentshiftlast',`
			`'indenttarget',`
			`'infixlinebreakstyle',`
			`'largeop',`
			`'length',`
			`'linebreak',`
			`'linebreakmultchar',`
			`'linebreakstyle',`
			`'lineleading',`
			`'linethickness',`
			`'location',`
			`'longdivstyle',`
			`'lquote',`
			`'lspace',`
			`'mathbackground',`
			`'mathcolor',`
			`'mathsize',`
			`'mathvariant',`
			`'maxsize',`
			`'minlabelspacing',`
			`'minsize',`
			`'movablelimits',`
			`'notation',`
			`'numalign',`
			`'open',`
			`'other',`
			`'overflow',`
			`'position',`
			`'rowalign',`
			`'rowlines',`
			`'rowspacing',`
			`'rowspan',`
			`'rquote',`
			`'rspace',`
			`'scriptlevel',`
			`'scriptminsize',`
			`'scriptsizemultiplier',`
			`'selection',`
			`'separator',`
			`'separators',`
			`'shift',`
			`'side',`
			`'src',`
			`'stackalign',`
			`'stretchy',`
			`'subscriptshift',`
			`'superscriptshift',`
			`'symmetric',`
			`'voffset',`
			`'width',`
			`'xlink:href',`
			`'xlink:show',`
			`'xlink:type',`
			`'xmlns',`
			`'xmlns:xlink',`
			`}`

			`# svgtiny - foreignObject + linearGradient + radialGradient + stop`
			`svg_elements = {`
			`'a',`
			`'animate',`
			`'animateColor',`
			`'animateMotion',`
			`'animateTransform',`
			`'circle',`
			`'defs',`
			`'desc',`
			`'ellipse',`
			`'font-face',`
			`'font-face-name',`
			`'font-face-src',`
			`'foreignObject',`
			`'g',`
			`'glyph',`
			`'hkern',`
			`'line',`
			`'linearGradient',`
			`'marker',`
			`'metadata',`
			`'missing-glyph',`
			`'mpath',`
			`'path',`
			`'polygon',`
			`'polyline',`
			`'radialGradient',`
			`'rect',`
			`'set',`
			`'stop',`
			`'svg',`
			`'switch',`
			`'text',`
			`'title',`
			`'tspan',`
			`'use',`
			`}`

			`# svgtiny + class + opacity + offset + xmlns + xmlns:xlink`
			`svg_attributes = {`
			`'accent-height',`
			`'accumulate',`
			`'additive',`
			`'alphabetic',`
			`'arabic-form',`
			`'ascent',`
			`'attributeName',`
			`'attributeType',`
			`'baseProfile',`
			`'bbox',`
			`'begin',`
			`'by',`
			`'calcMode',`
			`'cap-height',`
			`'class',`
			`'color',`
			`'color-rendering',`
			`'content',`
			`'cx',`
			`'cy',`
			`'d',`
			`'descent',`
			`'display',`
			`'dur',`
			`'dx',`
			`'dy',`
			`'end',`
			`'fill',`
			`'fill-opacity',`
			`'fill-rule',`
			`'font-family',`
			`'font-size',`
			`'font-stretch',`
			`'font-style',`
			`'font-variant',`
			`'font-weight',`
			`'from',`
			`'fx',`
			`'fy',`
			`'g1',`
			`'g2',`
			`'glyph-name',`
			`'gradientUnits',`
			`'hanging',`
			`'height',`
			`'horiz-adv-x',`
			`'horiz-origin-x',`
			`'id',`
			`'ideographic',`
			`'k',`
			`'keyPoints',`
			`'keySplines',`
			`'keyTimes',`
			`'lang',`
			`'marker-end',`
			`'marker-mid',`
			`'marker-start',`
			`'markerHeight',`
			`'markerUnits',`
			`'markerWidth',`
			`'mathematical',`
			`'max',`
			`'min',`
			`'name',`
			`'offset',`
			`'opacity',`
			`'orient',`
			`'origin',`
			`'overline-position',`
			`'overline-thickness',`
			`'panose-1',`
			`'path',`
			`'pathLength',`
			`'points',`
			`'preserveAspectRatio',`
			`'r',`
			`'refX',`
			`'refY',`
			`'repeatCount',`
			`'repeatDur',`
			`'requiredExtensions',`
			`'requiredFeatures',`
			`'restart',`
			`'rotate',`
			`'rx',`
			`'ry',`
			`'slope',`
			`'stemh',`
			`'stemv',`
			`'stop-color',`
			`'stop-opacity',`
			`'strikethrough-position',`
			`'strikethrough-thickness',`
			`'stroke',`
			`'stroke-dasharray',`
			`'stroke-dashoffset',`
			`'stroke-linecap',`
			`'stroke-linejoin',`
			`'stroke-miterlimit',`
			`'stroke-opacity',`
			`'stroke-width',`
			`'systemLanguage',`
			`'target',`
			`'text-anchor',`
			`'to',`
			`'transform',`
			`'type',`
			`'u1',`
			`'u2',`
			`'underline-position',`
			`'underline-thickness',`
			`'unicode',`
			`'unicode-range',`
			`'units-per-em',`
			`'values',`
			`'version',`
			`'viewBox',`
			`'visibility',`
			`'width',`
			`'widths',`
			`'x',`
			`'x-height',`
			`'x1',`
			`'x2',`
			`'xlink:actuate',`
			`'xlink:arcrole',`
			`'xlink:href',`
			`'xlink:role',`
			`'xlink:show',`
			`'xlink:title',`
			`'xlink:type',`
			`'xml:base',`
			`'xml:lang',`
			`'xml:space',`
			`'xmlns',`
			`'xmlns:xlink',`
			`'y',`
			`'y1',`
			`'y2',`
			`'zoomAndPan',`
			`}`

			`svg_attr_map = None`
			`svg_elem_map = None`

			`acceptable_svg_properties = {`
			`'fill',`
			`'fill-opacity',`
			`'fill-rule',`
			`'stroke',`
			`'stroke-linecap',`
			`'stroke-linejoin',`
			`'stroke-opacity',`
			`'stroke-width',`
			`}`

			`def __init__(self, encoding=None, _type='application/xhtml+xml'):`
			`super(_HTMLSanitizer, self).__init__(encoding, _type)`

			`self.unacceptablestack = 0`
			`self.mathmlOK = 0`
			`self.svgOK = 0`

			`def reset(self):`
			`super(_HTMLSanitizer, self).reset()`
			`self.unacceptablestack = 0`
			`self.mathmlOK = 0`
			`self.svgOK = 0`

			`def unknown_starttag(self, tag, attrs):`
			`acceptable_attributes = self.acceptable_attributes`
			`keymap = {}`
			`if tag not in self.acceptable_elements or self.svgOK:`
			`if tag in self.unacceptable_elements_with_end_tag:`
			`self.unacceptablestack += 1`

			`# add implicit namespaces to html5 inline svg/mathml`
			`if self._type.endswith('html'):`
			`if not dict(attrs).get('xmlns'):`
			`if tag == 'svg':`
			`attrs.append(('xmlns', 'http://www.w3.org/2000/svg'))`
			`if tag == 'math':`
			`attrs.append(('xmlns', 'http://www.w3.org/1998/Math/MathML'))`

			`# not otherwise acceptable, perhaps it is MathML or SVG?`
			`if tag == 'math' and ('xmlns', 'http://www.w3.org/1998/Math/MathML') in attrs:`
			`self.mathmlOK += 1`
			`if tag == 'svg' and ('xmlns', 'http://www.w3.org/2000/svg') in attrs:`
			`self.svgOK += 1`

			`# chose acceptable attributes based on tag class, else bail`
			`if self.mathmlOK and tag in self.mathml_elements:`
			`acceptable_attributes = self.mathml_attributes`
			`elif self.svgOK and tag in self.svg_elements:`
			`# For most vocabularies, lowercasing is a good idea. Many`
			`# svg elements, however, are camel case.`
			`if not self.svg_attr_map:`
			`lower = [attr.lower() for attr in self.svg_attributes]`
			`mix = [a for a in self.svg_attributes if a not in lower]`
			`self.svg_attributes = lower`
			`self.svg_attr_map = {a.lower(): a for a in mix}`

			`lower = [attr.lower() for attr in self.svg_elements]`
			`mix = [a for a in self.svg_elements if a not in lower]`
			`self.svg_elements = lower`
			`self.svg_elem_map = {a.lower(): a for a in mix}`
			`acceptable_attributes = self.svg_attributes`
			`tag = self.svg_elem_map.get(tag, tag)`
			`keymap = self.svg_attr_map`
			`elif tag not in self.acceptable_elements:`
			`return`

			`# declare xlink namespace, if needed`
			`if self.mathmlOK or self.svgOK:`
			`if any((a for a in attrs if a[0].startswith('xlink:'))):`
			`if not ('xmlns:xlink', 'http://www.w3.org/1999/xlink') in attrs:`
			`attrs.append(('xmlns:xlink', 'http://www.w3.org/1999/xlink'))`

			`clean_attrs = []`
			`for key, value in self.normalize_attrs(attrs):`
			`if key == 'style' and 'style' in acceptable_attributes:`
			`clean_value = self.sanitize_style(value)`
			`if clean_value:`
			`clean_attrs.append((key, clean_value))`
			`elif key in acceptable_attributes:`
			`key = keymap.get(key, key)`
			`# make sure the uri uses an acceptable uri scheme`
			`if key == 'href':`
			`value = make_safe_absolute_uri(value)`
			`clean_attrs.append((key, value))`
			`super(_HTMLSanitizer, self).unknown_starttag(tag, clean_attrs)`

			`def unknown_endtag(self, tag):`
			`if tag not in self.acceptable_elements:`
			`if tag in self.unacceptable_elements_with_end_tag:`
			`self.unacceptablestack -= 1`
			`if self.mathmlOK and tag in self.mathml_elements:`
			`if tag == 'math' and self.mathmlOK:`
			`self.mathmlOK -= 1`
			`elif self.svgOK and tag in self.svg_elements:`
			`tag = self.svg_elem_map.get(tag, tag)`
			`if tag == 'svg' and self.svgOK:`
			`self.svgOK -= 1`
			`else:`
			`return`
			`super(_HTMLSanitizer, self).unknown_endtag(tag)`

			`def handle_pi(self, text):`
			`pass`

			`def handle_decl(self, text):`
			`pass`

			`def handle_data(self, text):`
			`if not self.unacceptablestack:`
			`super(_HTMLSanitizer, self).handle_data(text)`

			`def sanitize_style(self, style):`
			`# disallow urls`
			`style = re.compile(r'url\s\(\s[^\s)]+?\s\)\s').sub(' ', style)`

			`# gauntlet`
			`if not re.match(r"""^([:,;#%.\sa-zA-Z0-9!]\|\w-\w\|'[\s\w]+'\|"[\s\w]+"\|\([\d,\s]+\))*$""", style):`
			`return ''`
			`# This replaced a regexp that used re.match and was prone to`
			`# pathological back-tracking.`
			`if re.sub(r"\s[-\w]+\s:\s[^:;];?", '', style).strip():`
			`return ''`

			`clean = []`
			`for prop, value in re.findall(r"([-\w]+)\s:\s([^:;]*)", style):`
			`if not value:`
			`continue`
			`if prop.lower() in self.acceptable_css_properties:`
			`clean.append(prop + ': ' + value + ';')`
			`elif prop.split('-')[0].lower() in ['background', 'border', 'margin', 'padding']:`
			`for keyword in value.split():`
			`if (`
			`keyword not in self.acceptable_css_keywords`
			`and not self.valid_css_values.match(keyword)`
			`):`
			`break`
			`else:`
			`clean.append(prop + ': ' + value + ';')`
			`elif self.svgOK and prop.lower() in self.acceptable_svg_properties:`
			`clean.append(prop + ': ' + value + ';')`

			`return ' '.join(clean)`

			`def parse_comment(self, i, report=1):`
			`ret = super(_HTMLSanitizer, self).parse_comment(i, report)`
			`if ret >= 0:`
			`return ret`
			`# if ret == -1, this may be a malicious attempt to circumvent`
			`# sanitization, or a page-destroying unclosed comment`
			`match = re.compile(r'--[^>]*>').search(self.rawdata, i+4)`
			`if match:`
			`return match.end()`
			`# unclosed comment; deliberately fail to handle_data()`
			`return len(self.rawdata)`


			`def _sanitize_html(html_source, encoding, _type):`
			`p = _HTMLSanitizer(encoding, _type)`
			`html_source = html_source.replace('<![CDATA[', '<![CDATA[')`
			`p.feed(html_source)`
			`data = p.output()`
			`data = data.strip().replace('\r\n', '\n')`
			`return data`


			`# Match XML entity declarations.`
			`# Example: <!ENTITY copyright "(C)">`
			`RE_ENTITY_PATTERN = re.compile(br'^\s<!ENTITY([^>]?)>', re.MULTILINE)`

			`# Match XML DOCTYPE declarations.`
			`# Example: <!DOCTYPE feed [ ]>`
			`RE_DOCTYPE_PATTERN = re.compile(br'^\s<!DOCTYPE([^>]?)>', re.MULTILINE)`

			`# Match safe entity declarations.`
			`# This will allow hexadecimal character references through,`
			`# as well as text, but not arbitrary nested entities.`
			`# Example: cubed "³"`
			`# Example: copyright "(C)"`
			`# Forbidden: explode1 "&explode2;&explode2;"`
			`RE_SAFE_ENTITY_PATTERN = re.compile(br'\s+(\w+)\s+"(&#\w+;\|[^&"]*)"')`


			`def replace_doctype(data):`
			`"""Strips and replaces the DOCTYPE, returns (rss_version, stripped_data)`

			`rss_version may be 'rss091n' or None`
			`stripped_data is the same XML document with a replaced DOCTYPE`
			`"""`

			`# Divide the document into two groups by finding the location`
			`# of the first element that doesn't begin with '<?' or '<!'.`
			`start = re.search(br'<\w', data)`
			`start = start and start.start() or -1`
			`head, data = data[:start+1], data[start+1:]`

			`# Save and then remove all of the ENTITY declarations.`
			`entity_results = RE_ENTITY_PATTERN.findall(head)`
			`head = RE_ENTITY_PATTERN.sub(b'', head)`

			`# Find the DOCTYPE declaration and check the feed type.`
			`doctype_results = RE_DOCTYPE_PATTERN.findall(head)`
			`doctype = doctype_results and doctype_results[0] or b''`
			`if b'netscape' in doctype.lower():`
			`version = 'rss091n'`
			`else:`
			`version = None`

			`# Re-insert the safe ENTITY declarations if a DOCTYPE was found.`
			`replacement = b''`
			`if len(doctype_results) == 1 and entity_results:`
			`safe_entities = [`
			`e`
			`for e in entity_results`
			`if RE_SAFE_ENTITY_PATTERN.match(e)`
			`]`
			`if safe_entities:`
			`replacement = b'<!DOCTYPE feed [\n<!ENTITY' \`
			`+ b'>\n<!ENTITY '.join(safe_entities) \`
			`+ b'>\n]>'`
			`data = RE_DOCTYPE_PATTERN.sub(replacement, head) + data`

			`# Precompute the safe entities for the loose parser.`
			`safe_entities = {`
			`k.decode('utf-8'): v.decode('utf-8')`
			`for k, v in RE_SAFE_ENTITY_PATTERN.findall(replacement)`
			`}`
			`return version, data, safe_entities`