SickGear/lib/hachoir/field/string_field.py

"""
String field classes:
- String: Fixed length string (no prefix/no suffix) ;
- CString: String which ends with nul byte ("\0") ;
- UnixLine: Unix line of text, string which ends with "\n" ;
- PascalString8, PascalString16, PascalString32: String prefixed with
  length written in a 8, 16, 32-bit integer (use parent endian).

Constructor has optional arguments:
- strip: value can be a string or True ;
- charset: if set, convert string to unicode using this charset (in "replace"
  mode which replace all buggy characters with ".").

Note: For PascalStringXX, prefixed value is the number of bytes and not
      of characters!
"""

from hachoir.field import FieldError, Bytes
from hachoir.core.endian import LITTLE_ENDIAN, BIG_ENDIAN
from hachoir.core.tools import alignValue, makePrintable
from hachoir.core.i18n import guessBytesCharset
from hachoir.core import config
from codecs import BOM_UTF16_LE, BOM_UTF16_BE, BOM_UTF32_LE, BOM_UTF32_BE

# Default charset used to convert byte string to Unicode
# This charset is used if no charset is specified or on conversion error
FALLBACK_CHARSET = "ISO-8859-1"


class GenericString(Bytes):
    """
    Generic string class.

    charset have to be in CHARSET_8BIT or in UTF_CHARSET.
    """

    VALID_FORMATS = ("C", "UnixLine",
                     "fixed", "Pascal8", "Pascal16", "Pascal32")

    # 8-bit charsets
    CHARSET_8BIT = set((
        "ASCII",          # ANSI X3.4-1968
        "MacRoman",
        "CP037",          # EBCDIC 037
        "CP874",          # Thai
        "WINDOWS-1250",   # Central Europe
        "WINDOWS-1251",   # Cyrillic
        "WINDOWS-1252",   # Latin I
        "WINDOWS-1253",   # Greek
        "WINDOWS-1254",   # Turkish
        "WINDOWS-1255",   # Hebrew
        "WINDOWS-1256",   # Arabic
        "WINDOWS-1257",   # Baltic
        "WINDOWS-1258",   # Vietnam
        "ISO-8859-1",     # Latin-1
        "ISO-8859-2",     # Latin-2
        "ISO-8859-3",     # Latin-3
        "ISO-8859-4",     # Latin-4
        "ISO-8859-5",
        "ISO-8859-6",
        "ISO-8859-7",
        "ISO-8859-8",
        "ISO-8859-9",     # Latin-5
        "ISO-8859-10",    # Latin-6
        "ISO-8859-11",    # Thai
        "ISO-8859-13",    # Latin-7
        "ISO-8859-14",    # Latin-8
        "ISO-8859-15",    # Latin-9 or ("Latin-0")
        "ISO-8859-16",    # Latin-10
    ))

    # UTF-xx charset familly
    UTF_CHARSET = {
        "UTF-8": (8, None),
        "UTF-16-LE": (16, LITTLE_ENDIAN),
        "UTF-32LE": (32, LITTLE_ENDIAN),
        "UTF-16-BE": (16, BIG_ENDIAN),
        "UTF-32BE": (32, BIG_ENDIAN),
        "UTF-16": (16, "BOM"),
        "UTF-32": (32, "BOM"),
    }

    # UTF-xx BOM => charset with endian
    UTF_BOM = {
        16: {BOM_UTF16_LE: "UTF-16-LE", BOM_UTF16_BE: "UTF-16-BE"},
        32: {BOM_UTF32_LE: "UTF-32LE", BOM_UTF32_BE: "UTF-32BE"},
    }

    # Suffix format: value is suffix (string)
    SUFFIX_FORMAT = {
        "C": {
            8: {LITTLE_ENDIAN: b"\0", BIG_ENDIAN: b"\0"},
            16: {LITTLE_ENDIAN: b"\0\0", BIG_ENDIAN: b"\0\0"},
            32: {LITTLE_ENDIAN: b"\0\0\0\0", BIG_ENDIAN: b"\0\0\0\0"},
        },
        "UnixLine": {
            8: {LITTLE_ENDIAN: b"\n", BIG_ENDIAN: b"\n"},
            16: {LITTLE_ENDIAN: b"\n\0", BIG_ENDIAN: b"\0\n"},
            32: {LITTLE_ENDIAN: b"\n\0\0\0", BIG_ENDIAN: b"\0\0\0\n"},
        },

    }

    # Pascal format: value is the size of the prefix in bits
    PASCAL_FORMATS = {
        "Pascal8": 1,
        "Pascal16": 2,
        "Pascal32": 4
    }

    # Raw value: with prefix and suffix, not stripped,
    # and not converted to Unicode
    _raw_value = None

    def __init__(self, parent, name, format, description=None,
                 strip=None, charset=None, nbytes=None, truncate=None):
        Bytes.__init__(self, parent, name, 1, description)

        # Is format valid?
        assert format in self.VALID_FORMATS

        # Store options
        self._format = format
        self._strip = strip
        self._truncate = truncate

        # Check charset and compute character size in bytes
        # (or None when it's not possible to guess character size)
        if not charset or charset in self.CHARSET_8BIT:
            self._character_size = 1   # one byte per character
        elif charset in self.UTF_CHARSET:
            self._character_size = None
        else:
            raise FieldError("Invalid charset for %s: \"%s\"" %
                             (self.path, charset))
        self._charset = charset

        # It is a fixed string?
        if nbytes is not None:
            assert self._format == "fixed"
            # Arbitrary limits, just to catch some bugs...
            if not (1 <= nbytes <= 0xffff):
                raise FieldError("Invalid string size for %s: %s" %
                                 (self.path, nbytes))
            self._content_size = nbytes   # content length in bytes
            self._size = nbytes * 8
            self._content_offset = 0
        else:
            # Format with a suffix: Find the end of the string
            if self._format in self.SUFFIX_FORMAT:
                self._content_offset = 0

                # Choose the suffix
                suffix = self.suffix_str

                # Find the suffix
                length = self._parent.stream.searchBytesLength(
                    suffix, False, self.absolute_address)
                if length is None:
                    raise FieldError("Unable to find end of string %s (format %s)!"
                                     % (self.path, self._format))
                if 1 < len(suffix):
                    # Fix length for little endian bug with UTF-xx charset:
                    #   u"abc" -> "a\0b\0c\0\0\0" (UTF-16-LE)
                    #   search returns length=5, whereas real lenght is 6
                    length = alignValue(length, len(suffix))

                # Compute sizes
                self._content_size = length  # in bytes
                self._size = (length + len(suffix)) * 8

            # Format with a prefix: Read prefixed length in bytes
            else:
                assert self._format in self.PASCAL_FORMATS

                # Get the prefix size
                prefix_size = self.PASCAL_FORMATS[self._format]
                self._content_offset = prefix_size

                # Read the prefix and compute sizes
                value = self._parent.stream.readBits(
                    self.absolute_address, prefix_size * 8, self._parent.endian)
                self._content_size = value   # in bytes
                self._size = (prefix_size + value) * 8

        # For UTF-16 and UTF-32, choose the right charset using BOM
        if self._charset in self.UTF_CHARSET:
            # Charset requires a BOM?
            bomsize, endian = self.UTF_CHARSET[self._charset]
            if endian == "BOM":
                # Read the BOM value
                nbytes = bomsize // 8
                bom = self._parent.stream.readBytes(
                    self.absolute_address, nbytes)

                # Choose right charset using the BOM
                bom_endian = self.UTF_BOM[bomsize]
                if bom not in bom_endian:
                    raise FieldError("String %s has invalid BOM (%s)!"
                                     % (self.path, repr(bom)))
                self._charset = bom_endian[bom]
                self._content_size -= nbytes
                self._content_offset += nbytes

        # Compute length in character if possible
        if self._character_size:
            self._length = self._content_size // self._character_size
        else:
            self._length = None

    @staticmethod
    def staticSuffixStr(format, charset, endian):
        if format not in GenericString.SUFFIX_FORMAT:
            return ''
        suffix = GenericString.SUFFIX_FORMAT[format]
        if charset in GenericString.UTF_CHARSET:
            suffix_size = GenericString.UTF_CHARSET[charset][0]
            suffix = suffix[suffix_size]
        else:
            suffix = suffix[8]
        return suffix[endian]

    def _getSuffixStr(self):
        return self.staticSuffixStr(
            self._format, self._charset, self._parent.endian)
    suffix_str = property(_getSuffixStr)

    def _convertText(self, text):
        if not self._charset:
            # charset is still unknown: guess the charset
            self._charset = guessBytesCharset(text, default=FALLBACK_CHARSET)

        # Try to convert to Unicode
        try:
            return str(text, self._charset, "strict")
        except UnicodeDecodeError as exc:
            err = exc

        # --- Conversion error ---

        # Fix truncated UTF-16 string like 'B\0e' (3 bytes)
        # => Add missing nul byte: 'B\0e\0' (4 bytes)
        if err.reason == "truncated data" \
                and err.end == len(text) \
                and self._charset == "UTF-16-LE":
            try:
                text = str(text + b"\0", self._charset, "strict")
                self.warning(
                    "Fix truncated %s string: add missing nul byte" % self._charset)
                return text
            except UnicodeDecodeError:
                pass

        # On error, use FALLBACK_CHARSET
        self.warning("Unable to convert string to Unicode: %s" % err)
        return str(text, FALLBACK_CHARSET, "strict")

    def _guessCharset(self):
        addr = self.absolute_address + self._content_offset * 8
        bytes = self._parent.stream.readBytes(addr, self._content_size)
        return guessBytesCharset(bytes, default=FALLBACK_CHARSET)

    def createValue(self, human=True):
        # Compress data address (in bits) and size (in bytes)
        if human:
            addr = self.absolute_address + self._content_offset * 8
            size = self._content_size
        else:
            addr = self.absolute_address
            size = self._size // 8
        if size == 0:
            # Empty string
            return ""

        # Read bytes in data stream
        text = self._parent.stream.readBytes(addr, size)

        # Don't transform data?
        if not human:
            return text

        # Convert text to Unicode
        text = self._convertText(text)

        # Truncate
        if self._truncate:
            pos = text.find(self._truncate)
            if 0 <= pos:
                text = text[:pos]

        # Strip string if needed
        if self._strip:
            if isinstance(self._strip, str):
                text = text.strip(self._strip)
            else:
                text = text.strip()
        assert isinstance(text, str)
        return text

    def createDisplay(self, human=True):
        if not human:
            if self._raw_value is None:
                self._raw_value = GenericString.createValue(self, False)
            value = makePrintable(self._raw_value, "ASCII")
        elif self._charset:
            value = makePrintable(self.value, "ISO-8859-1")
        else:
            value = self.value
        if config.max_string_length < len(value):
            # Truncate string if needed
            value = "%s(...)" % value[:config.max_string_length]
        if not self._charset or not human:
            return makePrintable(value, "ASCII", quote='"')
        else:
            if value:
                return '"%s"' % value.replace('"', '\\"')
            else:
                return "(empty)"

    def createRawDisplay(self):
        return GenericString.createDisplay(self, human=False)

    def _getLength(self):
        if self._length is None:
            self._length = len(self.value)
        return self._length
    length = property(_getLength, doc="String length in characters")

    def _getFormat(self):
        return self._format
    format = property(_getFormat, doc="String format (eg. 'C')")

    def _getCharset(self):
        if not self._charset:
            self._charset = self._guessCharset()
        return self._charset
    charset = property(_getCharset, doc="String charset (eg. 'ISO-8859-1')")

    def _getContentSize(self):
        return self._content_size
    content_size = property(_getContentSize, doc="Content size in bytes")

    def _getContentOffset(self):
        return self._content_offset
    content_offset = property(_getContentOffset, doc="Content offset in bytes")

    def getFieldType(self):
        info = self.charset
        if self._strip:
            if isinstance(self._strip, str):
                info += ",strip=%s" % makePrintable(
                    self._strip, "ASCII", quote="'")
            else:
                info += ",strip=True"
        return "%s<%s>" % (Bytes.getFieldType(self), info)


def stringFactory(name, format, doc):
    class NewString(GenericString):
        __doc__ = doc

        def __init__(self, parent, name, description=None,
                     strip=None, charset=None, truncate=None):
            GenericString.__init__(self, parent, name, format, description,
                                   strip=strip, charset=charset, truncate=truncate)
    cls = NewString
    cls.__name__ = name
    return cls


# String which ends with nul byte ("\0")
CString = stringFactory("CString", "C",
                        r"""C string: string ending with nul byte.
See GenericString to get more information.""")

# Unix line of text: string which ends with "\n" (ASCII 0x0A)
UnixLine = stringFactory("UnixLine", "UnixLine",
                         r"""Unix line: string ending with "\n" (ASCII code 10).
See GenericString to get more information.""")

# String prefixed with length written in a 8-bit integer
PascalString8 = stringFactory("PascalString8", "Pascal8",
                              r"""Pascal string: string prefixed with 8-bit integer containing its length (endian depends on parent endian).
See GenericString to get more information.""")

# String prefixed with length written in a 16-bit integer (use parent endian)
PascalString16 = stringFactory("PascalString16", "Pascal16",
                               r"""Pascal string: string prefixed with 16-bit integer containing its length (endian depends on parent endian).
See GenericString to get more information.""")

# String prefixed with length written in a 32-bit integer (use parent endian)
PascalString32 = stringFactory("PascalString32", "Pascal32",
                               r"""Pascal string: string prefixed with 32-bit integer containing its length (endian depends on parent endian).
See GenericString to get more information.""")


class String(GenericString):
    """
    String with fixed size (size in bytes).
    See GenericString to get more information.
    """
    static_size = staticmethod(lambda *args, **kw: args[1] * 8)

    def __init__(self, parent, name, nbytes, description=None,
                 strip=None, charset=None, truncate=None):
        GenericString.__init__(self, parent, name, "fixed", description,
                               strip=strip, charset=charset, nbytes=nbytes, truncate=truncate)


String.__name__ = "FixedString"
Change bump to major version 3.xx to signal that this branch supports Python3+ only. Initial SickGear for Python 3. 2023-01-12 01:04:47 +00:00			`"""`
			`String field classes:`
			`- String: Fixed length string (no prefix/no suffix) ;`
			`- CString: String which ends with nul byte ("\0") ;`
			`- UnixLine: Unix line of text, string which ends with "\n" ;`
			`- PascalString8, PascalString16, PascalString32: String prefixed with`
			`length written in a 8, 16, 32-bit integer (use parent endian).`

			`Constructor has optional arguments:`
			`- strip: value can be a string or True ;`
			`- charset: if set, convert string to unicode using this charset (in "replace"`
			`mode which replace all buggy characters with ".").`

			`Note: For PascalStringXX, prefixed value is the number of bytes and not`
			`of characters!`
			`"""`

			`from hachoir.field import FieldError, Bytes`
			`from hachoir.core.endian import LITTLE_ENDIAN, BIG_ENDIAN`
			`from hachoir.core.tools import alignValue, makePrintable`
			`from hachoir.core.i18n import guessBytesCharset`
			`from hachoir.core import config`
			`from codecs import BOM_UTF16_LE, BOM_UTF16_BE, BOM_UTF32_LE, BOM_UTF32_BE`

			`# Default charset used to convert byte string to Unicode`
			`# This charset is used if no charset is specified or on conversion error`
			`FALLBACK_CHARSET = "ISO-8859-1"`


			`class GenericString(Bytes):`
			`"""`
			`Generic string class.`

			`charset have to be in CHARSET_8BIT or in UTF_CHARSET.`
			`"""`

			`VALID_FORMATS = ("C", "UnixLine",`
			`"fixed", "Pascal8", "Pascal16", "Pascal32")`

			`# 8-bit charsets`
			`CHARSET_8BIT = set((`
			`"ASCII", # ANSI X3.4-1968`
			`"MacRoman",`
			`"CP037", # EBCDIC 037`
			`"CP874", # Thai`
			`"WINDOWS-1250", # Central Europe`
			`"WINDOWS-1251", # Cyrillic`
			`"WINDOWS-1252", # Latin I`
			`"WINDOWS-1253", # Greek`
			`"WINDOWS-1254", # Turkish`
			`"WINDOWS-1255", # Hebrew`
			`"WINDOWS-1256", # Arabic`
			`"WINDOWS-1257", # Baltic`
			`"WINDOWS-1258", # Vietnam`
			`"ISO-8859-1", # Latin-1`
			`"ISO-8859-2", # Latin-2`
			`"ISO-8859-3", # Latin-3`
			`"ISO-8859-4", # Latin-4`
			`"ISO-8859-5",`
			`"ISO-8859-6",`
			`"ISO-8859-7",`
			`"ISO-8859-8",`
			`"ISO-8859-9", # Latin-5`
			`"ISO-8859-10", # Latin-6`
			`"ISO-8859-11", # Thai`
			`"ISO-8859-13", # Latin-7`
			`"ISO-8859-14", # Latin-8`
			`"ISO-8859-15", # Latin-9 or ("Latin-0")`
			`"ISO-8859-16", # Latin-10`
			`))`

			`# UTF-xx charset familly`
			`UTF_CHARSET = {`
			`"UTF-8": (8, None),`
			`"UTF-16-LE": (16, LITTLE_ENDIAN),`
			`"UTF-32LE": (32, LITTLE_ENDIAN),`
			`"UTF-16-BE": (16, BIG_ENDIAN),`
			`"UTF-32BE": (32, BIG_ENDIAN),`
			`"UTF-16": (16, "BOM"),`
			`"UTF-32": (32, "BOM"),`
			`}`

			`# UTF-xx BOM => charset with endian`
			`UTF_BOM = {`
			`16: {BOM_UTF16_LE: "UTF-16-LE", BOM_UTF16_BE: "UTF-16-BE"},`
			`32: {BOM_UTF32_LE: "UTF-32LE", BOM_UTF32_BE: "UTF-32BE"},`
			`}`

			`# Suffix format: value is suffix (string)`
			`SUFFIX_FORMAT = {`
			`"C": {`
			`8: {LITTLE_ENDIAN: b"\0", BIG_ENDIAN: b"\0"},`
			`16: {LITTLE_ENDIAN: b"\0\0", BIG_ENDIAN: b"\0\0"},`
			`32: {LITTLE_ENDIAN: b"\0\0\0\0", BIG_ENDIAN: b"\0\0\0\0"},`
			`},`
			`"UnixLine": {`
			`8: {LITTLE_ENDIAN: b"\n", BIG_ENDIAN: b"\n"},`
			`16: {LITTLE_ENDIAN: b"\n\0", BIG_ENDIAN: b"\0\n"},`
			`32: {LITTLE_ENDIAN: b"\n\0\0\0", BIG_ENDIAN: b"\0\0\0\n"},`
			`},`

			`}`

			`# Pascal format: value is the size of the prefix in bits`
			`PASCAL_FORMATS = {`
			`"Pascal8": 1,`
			`"Pascal16": 2,`
			`"Pascal32": 4`
			`}`

			`# Raw value: with prefix and suffix, not stripped,`
			`# and not converted to Unicode`
			`_raw_value = None`

			`def __init__(self, parent, name, format, description=None,`
			`strip=None, charset=None, nbytes=None, truncate=None):`
			`Bytes.__init__(self, parent, name, 1, description)`

			`# Is format valid?`
			`assert format in self.VALID_FORMATS`

			`# Store options`
			`self._format = format`
			`self._strip = strip`
			`self._truncate = truncate`

			`# Check charset and compute character size in bytes`
			`# (or None when it's not possible to guess character size)`
			`if not charset or charset in self.CHARSET_8BIT:`
			`self._character_size = 1 # one byte per character`
			`elif charset in self.UTF_CHARSET:`
			`self._character_size = None`
			`else:`
			`raise FieldError("Invalid charset for %s: \"%s\"" %`
			`(self.path, charset))`
			`self._charset = charset`

			`# It is a fixed string?`
			`if nbytes is not None:`
			`assert self._format == "fixed"`
			`# Arbitrary limits, just to catch some bugs...`
			`if not (1 <= nbytes <= 0xffff):`
			`raise FieldError("Invalid string size for %s: %s" %`
			`(self.path, nbytes))`
			`self._content_size = nbytes # content length in bytes`
			`self._size = nbytes * 8`
			`self._content_offset = 0`
			`else:`
			`# Format with a suffix: Find the end of the string`
			`if self._format in self.SUFFIX_FORMAT:`
			`self._content_offset = 0`

			`# Choose the suffix`
			`suffix = self.suffix_str`

			`# Find the suffix`
			`length = self._parent.stream.searchBytesLength(`
			`suffix, False, self.absolute_address)`
			`if length is None:`
			`raise FieldError("Unable to find end of string %s (format %s)!"`
			`% (self.path, self._format))`
			`if 1 < len(suffix):`
			`# Fix length for little endian bug with UTF-xx charset:`
			`# u"abc" -> "a\0b\0c\0\0\0" (UTF-16-LE)`
			`# search returns length=5, whereas real lenght is 6`
			`length = alignValue(length, len(suffix))`

			`# Compute sizes`
			`self._content_size = length # in bytes`
			`self._size = (length + len(suffix)) * 8`

			`# Format with a prefix: Read prefixed length in bytes`
			`else:`
			`assert self._format in self.PASCAL_FORMATS`

			`# Get the prefix size`
			`prefix_size = self.PASCAL_FORMATS[self._format]`
			`self._content_offset = prefix_size`

			`# Read the prefix and compute sizes`
			`value = self._parent.stream.readBits(`
			`self.absolute_address, prefix_size * 8, self._parent.endian)`
			`self._content_size = value # in bytes`
			`self._size = (prefix_size + value) * 8`

			`# For UTF-16 and UTF-32, choose the right charset using BOM`
			`if self._charset in self.UTF_CHARSET:`
			`# Charset requires a BOM?`
			`bomsize, endian = self.UTF_CHARSET[self._charset]`
			`if endian == "BOM":`
			`# Read the BOM value`
			`nbytes = bomsize // 8`
			`bom = self._parent.stream.readBytes(`
			`self.absolute_address, nbytes)`

			`# Choose right charset using the BOM`
			`bom_endian = self.UTF_BOM[bomsize]`
			`if bom not in bom_endian:`
			`raise FieldError("String %s has invalid BOM (%s)!"`
			`% (self.path, repr(bom)))`
			`self._charset = bom_endian[bom]`
			`self._content_size -= nbytes`
			`self._content_offset += nbytes`

			`# Compute length in character if possible`
			`if self._character_size:`
			`self._length = self._content_size // self._character_size`
			`else:`
			`self._length = None`

			`@staticmethod`
			`def staticSuffixStr(format, charset, endian):`
			`if format not in GenericString.SUFFIX_FORMAT:`
			`return ''`
			`suffix = GenericString.SUFFIX_FORMAT[format]`
			`if charset in GenericString.UTF_CHARSET:`
			`suffix_size = GenericString.UTF_CHARSET[charset][0]`
			`suffix = suffix[suffix_size]`
			`else:`
			`suffix = suffix[8]`
			`return suffix[endian]`

			`def _getSuffixStr(self):`
			`return self.staticSuffixStr(`
			`self._format, self._charset, self._parent.endian)`
			`suffix_str = property(_getSuffixStr)`

			`def _convertText(self, text):`
			`if not self._charset:`
			`# charset is still unknown: guess the charset`
			`self._charset = guessBytesCharset(text, default=FALLBACK_CHARSET)`

			`# Try to convert to Unicode`
			`try:`
			`return str(text, self._charset, "strict")`
			`except UnicodeDecodeError as exc:`
			`err = exc`

			`# --- Conversion error ---`

			`# Fix truncated UTF-16 string like 'B\0e' (3 bytes)`
			`# => Add missing nul byte: 'B\0e\0' (4 bytes)`
			`if err.reason == "truncated data" \`
			`and err.end == len(text) \`
			`and self._charset == "UTF-16-LE":`
			`try:`
Update hachoir 3.1.2 (f739b43) → 3.2.0 (38d759f). 2023-10-07 23:04:41 +00:00			`text = str(text + b"\0", self._charset, "strict")`
Change bump to major version 3.xx to signal that this branch supports Python3+ only. Initial SickGear for Python 3. 2023-01-12 01:04:47 +00:00			`self.warning(`
			`"Fix truncated %s string: add missing nul byte" % self._charset)`
			`return text`
			`except UnicodeDecodeError:`
			`pass`

			`# On error, use FALLBACK_CHARSET`
			`self.warning("Unable to convert string to Unicode: %s" % err)`
			`return str(text, FALLBACK_CHARSET, "strict")`

			`def _guessCharset(self):`
			`addr = self.absolute_address + self._content_offset * 8`
			`bytes = self._parent.stream.readBytes(addr, self._content_size)`
			`return guessBytesCharset(bytes, default=FALLBACK_CHARSET)`

			`def createValue(self, human=True):`
			`# Compress data address (in bits) and size (in bytes)`
			`if human:`
			`addr = self.absolute_address + self._content_offset * 8`
			`size = self._content_size`
			`else:`
			`addr = self.absolute_address`
			`size = self._size // 8`
			`if size == 0:`
			`# Empty string`
			`return ""`

			`# Read bytes in data stream`
			`text = self._parent.stream.readBytes(addr, size)`

			`# Don't transform data?`
			`if not human:`
			`return text`

			`# Convert text to Unicode`
			`text = self._convertText(text)`

			`# Truncate`
			`if self._truncate:`
			`pos = text.find(self._truncate)`
			`if 0 <= pos:`
			`text = text[:pos]`

			`# Strip string if needed`
			`if self._strip:`
			`if isinstance(self._strip, str):`
			`text = text.strip(self._strip)`
			`else:`
			`text = text.strip()`
			`assert isinstance(text, str)`
			`return text`

			`def createDisplay(self, human=True):`
			`if not human:`
			`if self._raw_value is None:`
			`self._raw_value = GenericString.createValue(self, False)`
			`value = makePrintable(self._raw_value, "ASCII")`
			`elif self._charset:`
			`value = makePrintable(self.value, "ISO-8859-1")`
			`else:`
			`value = self.value`
			`if config.max_string_length < len(value):`
			`# Truncate string if needed`
			`value = "%s(...)" % value[:config.max_string_length]`
			`if not self._charset or not human:`
			`return makePrintable(value, "ASCII", quote='"')`
			`else:`
			`if value:`
			`return '"%s"' % value.replace('"', '\\"')`
			`else:`
			`return "(empty)"`

			`def createRawDisplay(self):`
			`return GenericString.createDisplay(self, human=False)`

			`def _getLength(self):`
			`if self._length is None:`
			`self._length = len(self.value)`
			`return self._length`
			`length = property(_getLength, doc="String length in characters")`

			`def _getFormat(self):`
			`return self._format`
			`format = property(_getFormat, doc="String format (eg. 'C')")`

			`def _getCharset(self):`
			`if not self._charset:`
			`self._charset = self._guessCharset()`
			`return self._charset`
			`charset = property(_getCharset, doc="String charset (eg. 'ISO-8859-1')")`

			`def _getContentSize(self):`
			`return self._content_size`
			`content_size = property(_getContentSize, doc="Content size in bytes")`

			`def _getContentOffset(self):`
			`return self._content_offset`
			`content_offset = property(_getContentOffset, doc="Content offset in bytes")`

			`def getFieldType(self):`
			`info = self.charset`
			`if self._strip:`
			`if isinstance(self._strip, str):`
			`info += ",strip=%s" % makePrintable(`
			`self._strip, "ASCII", quote="'")`
			`else:`
			`info += ",strip=True"`
			`return "%s<%s>" % (Bytes.getFieldType(self), info)`


			`def stringFactory(name, format, doc):`
			`class NewString(GenericString):`
			`__doc__ = doc`

			`def __init__(self, parent, name, description=None,`
			`strip=None, charset=None, truncate=None):`
			`GenericString.__init__(self, parent, name, format, description,`
			`strip=strip, charset=charset, truncate=truncate)`
			`cls = NewString`
			`cls.__name__ = name`
			`return cls`


			`# String which ends with nul byte ("\0")`
			`CString = stringFactory("CString", "C",`
			`r"""C string: string ending with nul byte.`
			`See GenericString to get more information.""")`

			`# Unix line of text: string which ends with "\n" (ASCII 0x0A)`
			`UnixLine = stringFactory("UnixLine", "UnixLine",`
			`r"""Unix line: string ending with "\n" (ASCII code 10).`
			`See GenericString to get more information.""")`

			`# String prefixed with length written in a 8-bit integer`
			`PascalString8 = stringFactory("PascalString8", "Pascal8",`
			`r"""Pascal string: string prefixed with 8-bit integer containing its length (endian depends on parent endian).`
			`See GenericString to get more information.""")`

			`# String prefixed with length written in a 16-bit integer (use parent endian)`
			`PascalString16 = stringFactory("PascalString16", "Pascal16",`
			`r"""Pascal string: string prefixed with 16-bit integer containing its length (endian depends on parent endian).`
			`See GenericString to get more information.""")`

			`# String prefixed with length written in a 32-bit integer (use parent endian)`
			`PascalString32 = stringFactory("PascalString32", "Pascal32",`
			`r"""Pascal string: string prefixed with 32-bit integer containing its length (endian depends on parent endian).`
			`See GenericString to get more information.""")`


			`class String(GenericString):`
			`"""`
			`String with fixed size (size in bytes).`
			`See GenericString to get more information.`
			`"""`
			`static_size = staticmethod(lambda args, kw: args[1] 8)`

			`def __init__(self, parent, name, nbytes, description=None,`
			`strip=None, charset=None, truncate=None):`
			`GenericString.__init__(self, parent, name, "fixed", description,`
			`strip=strip, charset=charset, nbytes=nbytes, truncate=truncate)`


			`String.__name__ = "FixedString"`