import support
import unittest, codecs
from html5lib.inputstream import HTMLInputStream
class HTMLInputStreamShortChunk(HTMLInputStream):
_defaultChunkSize = 2
class HTMLInputStreamTest(unittest.TestCase):
def test_char_ascii(self):
stream = HTMLInputStream("'", encoding='ascii')
self.assertEquals(stream.charEncoding[0], 'ascii')
self.assertEquals(stream.char(), "'")
def test_char_null(self):
stream = HTMLInputStream("\x00")
self.assertEquals(stream.char(), u'\ufffd')
def test_char_utf8(self):
stream = HTMLInputStream(u'\u2018'.encode('utf-8'), encoding='utf-8')
self.assertEquals(stream.charEncoding[0], 'utf-8')
self.assertEquals(stream.char(), u'\u2018')
def test_char_win1252(self):
stream = HTMLInputStream(u"\xa9\xf1\u2019".encode('windows-1252'))
self.assertEquals(stream.charEncoding[0], 'windows-1252')
self.assertEquals(stream.char(), u"\xa9")
self.assertEquals(stream.char(), u"\xf1")
self.assertEquals(stream.char(), u"\u2019")
def test_bom(self):
stream = HTMLInputStream(codecs.BOM_UTF8 + "'")
self.assertEquals(stream.charEncoding[0], 'utf-8')
self.assertEquals(stream.char(), "'")
def test_utf_16(self):
stream = HTMLInputStream((' '*1025).encode('utf-16'))
self.assert_(stream.charEncoding[0] in ['utf-16-le', 'utf-16-be'], stream.charEncoding)
self.assertEquals(len(stream.charsUntil(' ', True)), 1025)
def test_newlines(self):
stream = HTMLInputStreamShortChunk(codecs.BOM_UTF8 + "a\nbb\r\nccc\rddddxe")
self.assertEquals(stream.position(), (1, 0))
self.assertEquals(stream.charsUntil('c'), u"a\nbb\n")
self.assertEquals(stream.position(), (3, 0))
self.assertEquals(stream.charsUntil('x'), u"ccc\ndddd")
self.assertEquals(stream.position(), (4, 4))
self.assertEquals(stream.charsUntil('e'), u"x")
self.assertEquals(stream.position(), (4, 5))
def test_newlines2(self):
size = HTMLInputStream._defaultChunkSize
stream = HTMLInputStream("\r" * size + "\n")
self.assertEquals(stream.charsUntil('x'), "\n" * size)
def test_position(self):
stream = HTMLInputStreamShortChunk(codecs.BOM_UTF8 + "a\nbb\nccc\nddde\nf\ngh")
self.assertEquals(stream.position(), (1, 0))
self.assertEquals(stream.charsUntil('c'), u"a\nbb\n")
self.assertEquals(stream.position(), (3, 0))
stream.unget(u"\n")
self.assertEquals(stream.position(), (2, 2))
self.assertEquals(stream.charsUntil('c'), u"\n")
self.assertEquals(stream.position(), (3, 0))
stream.unget(u"\n")
self.assertEquals(stream.position(), (2, 2))
self.assertEquals(stream.char(), u"\n")
self.assertEquals(stream.position(), (3, 0))
self.assertEquals(stream.charsUntil('e'), u"ccc\nddd")
self.assertEquals(stream.position(), (4, 3))
self.assertEquals(stream.charsUntil('h'), u"e\nf\ng")
self.assertEquals(stream.position(), (6, 1))
def test_position2(self):
stream = HTMLInputStreamShortChunk("abc\nd")
self.assertEquals(stream.position(), (1, 0))
self.assertEquals(stream.char(), u"a")
self.assertEquals(stream.position(), (1, 1))
self.assertEquals(stream.char(), u"b")
self.assertEquals(stream.position(), (1, 2))
self.assertEquals(stream.char(), u"c")
self.assertEquals(stream.position(), (1, 3))
self.assertEquals(stream.char(), u"\n")
self.assertEquals(stream.position(), (2, 0))
self.assertEquals(stream.char(), u"d")
self.assertEquals(stream.position(), (2, 1))
def buildTestSuite():
return unittest.defaultTestLoader.loadTestsFromName(__name__)
def main():
buildTestSuite()
unittest.main()
if __name__ == '__main__':
main()