Merge pull request #482 from JackDandy/feature/UpdateBSoup

Update Beautiful Soup to 4.4.0 (r390).
2024-12-03 01:43:37 +00:00 · 2015-08-11 17:07:30 +01:00 · 2015-08-11 17:07:30 +01:00 · 8b42315bde
commit 8b42315bde
parent 8eee9d8699 b0525a0dd6
9 changed files with 292 additions and 84 deletions
--- a/CHANGES.md
+++ b/CHANGES.md
@ -5,6 +5,7 @@
 * Add search crawler exclusions
 * Fix saving default show list group on add new show options page
 * Remove legacy anime split home option from anime settings tab (new option located in general/interface tab)
 * Update Beautiful Soup 4.3.2 to 4.4.0 (r390)
 ### 0.10.0 (2015-08-06 11:05:00 UTC)
--- a/lib/bs4/init.py
+++ b/lib/bs4/init.py
@ -17,8 +17,8 @@ http://www.crummy.com/software/BeautifulSoup/bs4/doc/
 """
 __author__ = "Leonard Richardson (leonardr@segfault.org)"
-__version__ = "4.3.2"
+__version__ = "4.4.0"
-__copyright__ = "Copyright (c) 2004-2013 Leonard Richardson"
+__copyright__ = "Copyright (c) 2004-2015 Leonard Richardson"
 __license__ = "MIT"
 __all__ = ['BeautifulSoup']
@ -77,10 +77,11 @@ class BeautifulSoup(Tag):
    ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'
-    NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nTo get rid of this warning, change this:\n\n BeautifulSoup([your markup])\n\nto this:\n\n BeautifulSoup([your markup], \"%(parser)s\")\n"
+    NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nTo get rid of this warning, change this:\n\n BeautifulSoup([your markup])\n\nto this:\n\n BeautifulSoup([your markup], \"%(parser)s\")\n"
    def __init__(self, markup="", features=None, builder=None,
-                 parse_only=None, from_encoding=None, **kwargs):
+                 parse_only=None, from_encoding=None, exclude_encodings=None,
                 **kwargs):
        """The Soup object is initialized as the 'root tag', and the
        provided markup (which can be a string or a file-like object)
        is fed into the underlying parser."""
@ -156,8 +157,13 @@ class BeautifulSoup(Tag):
            builder = builder_class()
            if not (original_features == builder.NAME or
                    original_features in builder.ALTERNATE_NAMES):
                if builder.is_xml:
                    markup_type = "XML"
                else:
                    markup_type = "HTML"
                warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % dict(
-                    parser=builder.NAME))
+                    parser=builder.NAME,
                    markup_type=markup_type))
        self.builder = builder
        self.is_xml = builder.is_xml
@ -202,7 +208,8 @@ class BeautifulSoup(Tag):
        for (self.markup, self.original_encoding, self.declared_html_encoding,
         self.contains_replacement_characters) in (
-            self.builder.prepare_markup(markup, from_encoding)):
+             self.builder.prepare_markup(
                 markup, from_encoding, exclude_encodings=exclude_encodings)):
            self.reset()
            try:
                self._feed()
@ -215,6 +222,16 @@ class BeautifulSoup(Tag):
        self.markup = None
        self.builder.soup = None
    def __copy__(self):
        return type(self)(self.encode(), builder=self.builder)
    def __getstate__(self):
        # Frequently a tree builder can't be pickled.
        d = dict(self.__dict__)
        if 'builder' in d and not self.builder.picklable:
            del d['builder']
        return d
    def _feed(self):
        # Convert the document to Unicode.
        self.builder.reset()
@ -241,9 +258,7 @@ class BeautifulSoup(Tag):
    def new_string(self, s, subclass=NavigableString):
        """Create a new NavigableString associated with this soup."""
-        navigable = subclass(s)
+        return subclass(s)
        navigable.setup()
        return navigable
    def insert_before(self, successor):
        raise NotImplementedError("BeautifulSoup objects don't support insert_before().")
@ -302,14 +317,49 @@ class BeautifulSoup(Tag):
    def object_was_parsed(self, o, parent=None, most_recent_element=None):
        """Add an object to the parse tree."""
        parent = parent or self.currentTag
-        most_recent_element = most_recent_element or self._most_recent_element
+        previous_element = most_recent_element or self._most_recent_element
-        o.setup(parent, most_recent_element)
+
        next_element = previous_sibling = next_sibling = None
        if isinstance(o, Tag):
            next_element = o.next_element
            next_sibling = o.next_sibling
            previous_sibling = o.previous_sibling
            if not previous_element:
                previous_element = o.previous_element
        o.setup(parent, previous_element, next_element, previous_sibling, next_sibling)
        if most_recent_element is not None:
            most_recent_element.next_element = o
        self._most_recent_element = o
        parent.contents.append(o)
        if parent.next_sibling:
            # This node is being inserted into an element that has
            # already been parsed. Deal with any dangling references.
            index = parent.contents.index(o)
            if index == 0:
                previous_element = parent
                previous_sibling = None
            else:
                previous_element = previous_sibling = parent.contents[index-1]
            if index == len(parent.contents)-1:
                next_element = parent.next_sibling
                next_sibling = None
            else:
                next_element = next_sibling = parent.contents[index+1]
            o.previous_element = previous_element
            if previous_element:
                previous_element.next_element = o
            o.next_element = next_element
            if next_element:
                next_element.previous_element = o
            o.next_sibling = next_sibling
            if next_sibling:
                next_sibling.previous_sibling = o
            o.previous_sibling = previous_sibling
            if previous_sibling:
                previous_sibling.next_sibling = o
    def _popToTag(self, name, nsprefix=None, inclusivePop=True):
        """Pops the tag stack up to and including the most recent
        instance of the given tag. If inclusivePop is false, pops the tag
--- a/lib/bs4/builder/init.py
+++ b/lib/bs4/builder/init.py
@ -85,6 +85,7 @@ class TreeBuilder(object):
    features = []
    is_xml = False
    picklable = False
    preserve_whitespace_tags = set()
    empty_element_tags = None # A tag will be considered an empty-element
                              # tag when and only when it has no contents.
--- a/lib/bs4/builder/_html5lib.py
+++ b/lib/bs4/builder/_html5lib.py
@ -2,6 +2,7 @@ __all__ = [
    'HTML5TreeBuilder',
    ]
 from pdb import set_trace
 import warnings
 from bs4.builder import (
    PERMISSIVE,
@ -9,7 +10,10 @@ from bs4.builder import (
    HTML_5,
    HTMLTreeBuilder,
    )
-from bs4.element import NamespacedAttribute
+from bs4.element import (
    NamespacedAttribute,
    whitespace_re,
 )
 import html5lib
 from html5lib.constants import namespaces
 from bs4.element import (
@ -26,9 +30,16 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
    features = [NAME, PERMISSIVE, HTML_5, HTML]
-    def prepare_markup(self, markup, user_specified_encoding):
+    def prepare_markup(self, markup, user_specified_encoding,
                       document_declared_encoding=None, exclude_encodings=None):
        # Store the user-specified encoding for use later on.
        self.user_specified_encoding = user_specified_encoding
        # document_declared_encoding and exclude_encodings aren't used
        # ATM because the html5lib TreeBuilder doesn't use
        # UnicodeDammit.
        if exclude_encodings:
            warnings.warn("You provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.")
        yield (markup, None, None, False)
    # These methods are defined by Beautiful Soup.
@ -103,7 +114,13 @@ class AttrList(object):
    def __iter__(self):
        return list(self.attrs.items()).__iter__()
    def __setitem__(self, name, value):
-        "set attr", name, value
+        # If this attribute is a multi-valued attribute for this element,
        # turn its value into a list.
        list_attr = HTML5TreeBuilder.cdata_list_attributes
        if (name in list_attr['*']
            or (self.element.name in list_attr
                and name in list_attr[self.element.name])):
            value = whitespace_re.split(value)
        self.element[name] = value
    def items(self):
        return list(self.attrs.items())
@ -180,6 +197,7 @@ class Element(html5lib.treebuilders._base.Node):
        return AttrList(self.element)
    def setAttributes(self, attributes):
        if attributes is not None and len(attributes) > 0:
            converted_attributes = []
@ -226,6 +244,9 @@ class Element(html5lib.treebuilders._base.Node):
    def reparentChildren(self, new_parent):
        """Move all of this tag's children into another tag."""
        # print "MOVE", self.element.contents
        # print "FROM", self.element
        # print "TO", new_parent.element
        element = self.element
        new_parent_element = new_parent.element
        # Determine what this tag's next_element will be once all the children
@ -244,17 +265,28 @@ class Element(html5lib.treebuilders._base.Node):
            new_parents_last_descendant_next_element = new_parent_element.next_element
        to_append = element.contents
-        append_after = new_parent.element.contents
+        append_after = new_parent_element.contents
        if len(to_append) > 0:
            # Set the first child's previous_element and previous_sibling
            # to elements within the new parent
            first_child = to_append[0]
            if new_parents_last_descendant:
                first_child.previous_element = new_parents_last_descendant
            else:
                first_child.previous_element = new_parent_element
            first_child.previous_sibling = new_parents_last_child
            if new_parents_last_descendant:
                new_parents_last_descendant.next_element = first_child
            else:
                new_parent_element.next_element = first_child
            if new_parents_last_child:
                new_parents_last_child.next_sibling = first_child
            # Fix the last child's next_element and next_sibling
            last_child = to_append[-1]
            last_child.next_element = new_parents_last_descendant_next_element
            if new_parents_last_descendant_next_element:
                new_parents_last_descendant_next_element.previous_element = last_child
            last_child.next_sibling = None
        for child in to_append:
@ -265,6 +297,10 @@ class Element(html5lib.treebuilders._base.Node):
        element.contents = []
        element.next_element = final_next_element
        # print "DONE WITH MOVE"
        # print "FROM", self.element
        # print "TO", new_parent_element
    def cloneNode(self):
        tag = self.soup.new_tag(self.element.name, self.namespace)
        node = Element(tag, self.soup, self.namespace)
--- a/lib/bs4/builder/_htmlparser.py
+++ b/lib/bs4/builder/_htmlparser.py
@ -4,10 +4,16 @@ __all__ = [
    'HTMLParserTreeBuilder',
    ]
-from HTMLParser import (
+from HTMLParser import HTMLParser
-    HTMLParser,
+
-    HTMLParseError,
+try:
-    )
+    from HTMLParser import HTMLParseError
 except ImportError, e:
    # HTMLParseError is removed in Python 3.5. Since it can never be
    # thrown in 3.5, we can just define our own class as a placeholder.
    class HTMLParseError(Exception):
        pass
 import sys
 import warnings
@ -20,8 +26,10 @@ import warnings
 # strict=True works well on Python 3.2.2.
 major, minor, release = sys.version_info[:3]
 CONSTRUCTOR_TAKES_STRICT = major == 3 and minor == 2 and release >= 3
 CONSTRUCTOR_STRICT_IS_DEPRECATED = major == 3 and minor == 3
 CONSTRUCTOR_TAKES_CONVERT_CHARREFS = major == 3 and minor >= 4
 from bs4.element import (
    CData,
    Comment,
@ -119,18 +127,19 @@ class BeautifulSoupHTMLParser(HTMLParser):
 class HTMLParserTreeBuilder(HTMLTreeBuilder):
    is_xml = False
    picklable = True
    NAME = HTMLPARSER
    features = [NAME, HTML, STRICT]
    def __init__(self, *args, **kwargs):
-        if CONSTRUCTOR_TAKES_STRICT:
+        if CONSTRUCTOR_TAKES_STRICT and not CONSTRUCTOR_STRICT_IS_DEPRECATED:
            kwargs['strict'] = False
        if CONSTRUCTOR_TAKES_CONVERT_CHARREFS:
            kwargs['convert_charrefs'] = False
        self.parser_args = (args, kwargs)
    def prepare_markup(self, markup, user_specified_encoding=None,
-                       document_declared_encoding=None):
+                       document_declared_encoding=None, exclude_encodings=None):
        """
        :return: A 4-tuple (markup, original encoding, encoding
        declared within markup, whether any characters had to be
@ -141,7 +150,8 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
            return
        try_encodings = [user_specified_encoding, document_declared_encoding]
-        dammit = UnicodeDammit(markup, try_encodings, is_html=True)
+        dammit = UnicodeDammit(markup, try_encodings, is_html=True,
                               exclude_encodings=exclude_encodings)
        yield (dammit.markup, dammit.original_encoding,
               dammit.declared_html_encoding,
               dammit.contains_replacement_characters)
--- a/lib/bs4/builder/_lxml.py
+++ b/lib/bs4/builder/_lxml.py
@ -31,6 +31,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
    is_xml = True
    NAME = "lxml-xml"
    ALTERNATE_NAMES = ["xml"]
    # Well, it's permissive by XML parser standards.
    features = [NAME, LXML, XML, FAST, PERMISSIVE]
@ -77,6 +78,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
            return (None, tag)
    def prepare_markup(self, markup, user_specified_encoding=None,
                       exclude_encodings=None,
                       document_declared_encoding=None):
        """
        :yield: A series of 4-tuples.
@ -102,7 +104,8 @@ class LXMLTreeBuilderForXML(TreeBuilder):
        # the document as each one in turn.
        is_html = not self.is_xml
        try_encodings = [user_specified_encoding, document_declared_encoding]
-        detector = EncodingDetector(markup, try_encodings, is_html)
+        detector = EncodingDetector(
            markup, try_encodings, is_html, exclude_encodings)
        for encoding in detector.encodings:
            yield (detector.markup, encoding, document_declared_encoding, False)
--- a/lib/bs4/dammit.py
+++ b/lib/bs4/dammit.py
@ -3,10 +3,11 @@
 This library converts a bytestream to Unicode through any means
 necessary. It is heavily based on code from Mark Pilgrim's Universal
-Feed Parser. It works best on XML and XML, but it does not rewrite the
+Feed Parser. It works best on XML and HTML, but it does not rewrite the
 XML or HTML to reflect a new encoding; that's the tree builder's job.
 """
 from pdb import set_trace
 import codecs
 from htmlentitydefs import codepoint2name
 import re
@ -212,8 +213,11 @@ class EncodingDetector:
    5. Windows-1252.
    """
-    def __init__(self, markup, override_encodings=None, is_html=False):
+    def __init__(self, markup, override_encodings=None, is_html=False,
                 exclude_encodings=None):
        self.override_encodings = override_encodings or []
        exclude_encodings = exclude_encodings or []
        self.exclude_encodings = set([x.lower() for x in exclude_encodings])
        self.chardet_encoding = None
        self.is_html = is_html
        self.declared_encoding = None
@ -224,6 +228,8 @@ class EncodingDetector:
    def _usable(self, encoding, tried):
        if encoding is not None:
            encoding = encoding.lower()
            if encoding in self.exclude_encodings:
                return False
            if encoding not in tried:
                tried.add(encoding)
                return True
@ -266,6 +272,9 @@ class EncodingDetector:
    def strip_byte_order_mark(cls, data):
        """If a byte-order mark is present, strip it and return the encoding it implies."""
        encoding = None
        if isinstance(data, unicode):
            # Unicode data cannot have a byte-order mark.
            return data, encoding
        if (len(data) >= 4) and (data[:2] == b'\xfe\xff') \
               and (data[2:4] != '\x00\x00'):
            encoding = 'utf-16be'
@ -306,7 +315,7 @@ class EncodingDetector:
            declared_encoding_match = html_meta_re.search(markup, endpos=html_endpos)
        if declared_encoding_match is not None:
            declared_encoding = declared_encoding_match.groups()[0].decode(
-                'ascii')
+                'ascii', 'replace')
        if declared_encoding:
            return declared_encoding.lower()
        return None
@ -331,13 +340,14 @@ class UnicodeDammit:
        ]
    def __init__(self, markup, override_encodings=[],
-                 smart_quotes_to=None, is_html=False):
+                 smart_quotes_to=None, is_html=False, exclude_encodings=[]):
        self.smart_quotes_to = smart_quotes_to
        self.tried_encodings = []
        self.contains_replacement_characters = False
        self.is_html = is_html
-        self.detector = EncodingDetector(markup, override_encodings, is_html)
+        self.detector = EncodingDetector(
            markup, override_encodings, is_html, exclude_encodings)
        # Short-circuit if the data is in Unicode to begin with.
        if isinstance(markup, unicode) or markup == '':
--- a/lib/bs4/diagnose.py
+++ b/lib/bs4/diagnose.py
@ -33,12 +33,21 @@ def diagnose(data):
    if 'lxml' in basic_parsers:
        basic_parsers.append(["lxml", "xml"])
        try:
            from lxml import etree
            print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION))
        except ImportError, e:
            print (
                "lxml is not installed or couldn't be imported.")
    if 'html5lib' in basic_parsers:
        try:
            import html5lib
            print "Found html5lib version %s" % html5lib.__version__
        except ImportError, e:
            print (
                "html5lib is not installed or couldn't be imported.")
    if hasattr(data, 'read'):
        data = data.read()
--- a/lib/bs4/element.py
+++ b/lib/bs4/element.py
@ -1,3 +1,4 @@
 from pdb import set_trace
 import collections
 import re
 import sys
@ -185,24 +186,40 @@ class PageElement(object):
            return self.HTML_FORMATTERS.get(
                name, HTMLAwareEntitySubstitution.substitute_xml)
-    def setup(self, parent=None, previous_element=None):
+    def setup(self, parent=None, previous_element=None, next_element=None,
              previous_sibling=None, next_sibling=None):
        """Sets up the initial relations between this element and
        other elements."""
        self.parent = parent
        self.previous_element = previous_element
        if previous_element is not None:
            self.previous_element.next_element = self
-        self.next_element = None
+
-        self.previous_sibling = None
+        self.next_element = next_element
-        self.next_sibling = None
+        if self.next_element:
-        if self.parent is not None and self.parent.contents:
+            self.next_element.previous_element = self
-            self.previous_sibling = self.parent.contents[-1]
+
        self.next_sibling = next_sibling
        if self.next_sibling:
            self.next_sibling.previous_sibling = self
        if (not previous_sibling
            and self.parent is not None and self.parent.contents):
            previous_sibling = self.parent.contents[-1]
        self.previous_sibling = previous_sibling
        if previous_sibling:
            self.previous_sibling.next_sibling = self
    nextSibling = _alias("next_sibling")  # BS3
    previousSibling = _alias("previous_sibling")  # BS3
    def replace_with(self, replace_with):
        if not self.parent:
            raise ValueError(
                "Cannot replace one element with another when the"
                "element to be replaced is not part of a tree.")
        if replace_with is self:
            return
        if replace_with is self.parent:
@ -216,6 +233,10 @@ class PageElement(object):
    def unwrap(self):
        my_parent = self.parent
        if not self.parent:
            raise ValueError(
                "Cannot replace an element with its contents when that"
                "element is not part of a tree.")
        my_index = self.parent.index(self)
        self.extract()
        for child in reversed(self.contents[:]):
@ -240,17 +261,20 @@ class PageElement(object):
        last_child = self._last_descendant()
        next_element = last_child.next_element
-        if self.previous_element is not None:
+        if (self.previous_element is not None and
            self.previous_element != next_element):
            self.previous_element.next_element = next_element
-        if next_element is not None:
+        if next_element is not None and next_element != self.previous_element:
            next_element.previous_element = self.previous_element
        self.previous_element = None
        last_child.next_element = None
        self.parent = None
-        if self.previous_sibling is not None:
+        if (self.previous_sibling is not None
            and self.previous_sibling != self.next_sibling):
            self.previous_sibling.next_sibling = self.next_sibling
-        if self.next_sibling is not None:
+        if (self.next_sibling is not None
            and self.next_sibling != self.previous_sibling):
            self.next_sibling.previous_sibling = self.previous_sibling
        self.previous_sibling = self.next_sibling = None
        return self
@ -478,6 +502,10 @@ class PageElement(object):
    def _find_all(self, name, attrs, text, limit, generator, **kwargs):
        "Iterates over a generator looking for things that match."
        if text is None and 'string' in kwargs:
            text = kwargs['string']
            del kwargs['string']
        if isinstance(name, SoupStrainer):
            strainer = name
        else:
@ -558,7 +586,7 @@ class PageElement(object):
    #     |                           Attribute
    #    Tag
    attribselect_re = re.compile(
-        r'^(?P<tag>[a-zA-Z0-9][-.a-zA-Z0-9:_]*)?\[(?P<attribute>\w+)(?P<operator>[=~\|\^\$\*]?)' +
+        r'^(?P<tag>[a-zA-Z0-9][-.a-zA-Z0-9:_]*)?\[(?P<attribute>[\w-]+)(?P<operator>[=~\|\^\$\*]?)' +
        r'=?"?(?P<value>[^\]"]*)"?\]$'
        )
@ -654,11 +682,17 @@ class NavigableString(unicode, PageElement):
        how to handle non-ASCII characters.
        """
        if isinstance(value, unicode):
-            return unicode.__new__(cls, value)
+            u = unicode.__new__(cls, value)
-        return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
+        else:
            u = unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
        u.setup()
        return u
    def __copy__(self):
-        return self
+        """A copy of a NavigableString has the same contents and class
        as the original, but it is not connected to the parse tree.
        """
        return type(self)(self)
    def __getnewargs__(self):
        return (unicode(self),)
@ -759,11 +793,14 @@ class Tag(PageElement):
        self.prefix = prefix
        if attrs is None:
            attrs = {}
-        elif attrs and builder.cdata_list_attributes:
+        elif attrs:
            if builder is not None and builder.cdata_list_attributes:
                attrs = builder._replace_cdata_list_attribute_values(
                    self.name, attrs)
            else:
                attrs = dict(attrs)
        else:
            attrs = dict(attrs)
        self.attrs = attrs
        self.contents = []
        self.setup(parent, previous)
@ -778,6 +815,18 @@ class Tag(PageElement):
    parserClass = _alias("parser_class")  # BS3
    def __copy__(self):
        """A copy of a Tag is a new Tag, unconnected to the parse tree.
        Its contents are a copy of the old Tag's contents.
        """
        clone = type(self)(None, self.builder, self.name, self.namespace,
                           self.nsprefix, self.attrs)
        for attr in ('can_be_empty_element', 'hidden'):
            setattr(clone, attr, getattr(self, attr))
        for child in self.contents:
            clone.append(child.__copy__())
        return clone
    @property
    def is_empty_element(self):
        """Is this tag an empty-element tag? (aka a self-closing tag)
@ -971,14 +1020,24 @@ class Tag(PageElement):
        as defined in __eq__."""
        return not self == other
-    def __repr__(self, encoding=DEFAULT_OUTPUT_ENCODING):
+    def __repr__(self, encoding="unicode-escape"):
        """Renders this tag as a string."""
        if PY3K:
            # "The return value must be a string object", i.e. Unicode
            return self.decode()
        else:
            # "The return value must be a string object", i.e. a bytestring.
            # By convention, the return value of __repr__ should also be
            # an ASCII string.
            return self.encode(encoding)
    def __unicode__(self):
        return self.decode()
    def __str__(self):
        if PY3K:
            return self.decode()
        else:
            return self.encode()
    if PY3K:
@ -1103,12 +1162,18 @@ class Tag(PageElement):
                       formatter="minimal"):
        """Renders the contents of this tag as a Unicode string.
        :param indent_level: Each line of the rendering will be
           indented this many spaces.
        :param eventual_encoding: The tag is destined to be
           encoded into this encoding. This method is _not_
           responsible for performing that encoding. This information
           is passed in so that it can be substituted in if the
           document contains a <META> tag that mentions the document's
           encoding.
        :param formatter: The output formatter responsible for converting
           entities to Unicode characters.
        """
        # First off, turn a string formatter into a function. This
        # will stop the lookup from happening over and over again.
@ -1137,7 +1202,17 @@ class Tag(PageElement):
    def encode_contents(
        self, indent_level=None, encoding=DEFAULT_OUTPUT_ENCODING,
        formatter="minimal"):
-        """Renders the contents of this tag as a bytestring."""
+        """Renders the contents of this tag as a bytestring.
        :param indent_level: Each line of the rendering will be
           indented this many spaces.
        :param eventual_encoding: The bytestring will be in this encoding.
        :param formatter: The output formatter responsible for converting
           entities to Unicode characters.
        """
        contents = self.decode_contents(indent_level, encoding, formatter)
        return contents.encode(encoding)
@ -1201,7 +1276,14 @@ class Tag(PageElement):
    _selector_combinators = ['>', '+', '~']
    _select_debug = False
-    def select(self, selector, _candidate_generator=None):
+    def select_one(self, selector):
        """Perform a CSS selection operation on the current element."""
        value = self.select(selector, limit=1)
        if value:
            return value[0]
        return None
    def select(self, selector, _candidate_generator=None, limit=None):
        """Perform a CSS selection operation on the current element."""
        # Remove whitespace directly after the grouping operator ','
@ -1272,7 +1354,10 @@ class Tag(PageElement):
                            "A pseudo-class must be prefixed with a tag name.")
                    pseudo_attributes = re.match('([a-zA-Z\d-]+)\(([a-zA-Z\d]+)\)', pseudo)
                    found = []
-                    if pseudo_attributes is not None:
+                    if pseudo_attributes is None:
                        pseudo_type = pseudo
                        pseudo_value = None
                    else:
                        pseudo_type, pseudo_value = pseudo_attributes.groups()
                    if pseudo_type == 'nth-of-type':
                        try:
@ -1376,6 +1461,7 @@ class Tag(PageElement):
                else:
                    _use_candidate_generator = _candidate_generator
                count = 0
                for tag in current_context:
                    if self._select_debug:
                        print "    Running candidate generator on %s %s" % (
@ -1400,6 +1486,8 @@ class Tag(PageElement):
                                # don't include it in the context more than once.
                                new_context.append(candidate)
                                new_context_ids.add(id(candidate))
                                if limit and len(new_context) >= limit:
                                    break
                        elif self._select_debug:
                            print "     FAILURE %s %s" % (candidate.name, repr(candidate.attrs))