SickGear/lib/hachoir_parser/audio/id3.py
2015-08-20 16:48:28 +01:00

507 lines
16 KiB
Python

"""
ID3 metadata parser, supported versions: 1.O, 2.2, 2.3 and 2.4
Informations: http://www.id3.org/
Author: Victor Stinner
"""
from hachoir_core.field import (FieldSet, MatchError, ParserError,
Enum, UInt8, UInt24, UInt32,
CString, String, RawBytes,
Bit, Bits, NullBytes, NullBits)
from hachoir_core.text_handler import textHandler
from hachoir_core.tools import humanDuration
from hachoir_core.endian import NETWORK_ENDIAN
class ID3v1(FieldSet):
static_size = 128 * 8
GENRE_NAME = {
0: u"Blues",
1: u"Classic Rock",
2: u"Country",
3: u"Dance",
4: u"Disco",
5: u"Funk",
6: u"Grunge",
7: u"Hip-Hop",
8: u"Jazz",
9: u"Metal",
10: u"New Age",
11: u"Oldies",
12: u"Other",
13: u"Pop",
14: u"R&B",
15: u"Rap",
16: u"Reggae",
17: u"Rock",
18: u"Techno",
19: u"Industrial",
20: u"Alternative",
21: u"Ska",
22: u"Death Metal",
23: u"Pranks",
24: u"Soundtrack",
25: u"Euro-Techno",
26: u"Ambient",
27: u"Trip-Hop",
28: u"Vocal",
29: u"Jazz+Funk",
30: u"Fusion",
31: u"Trance",
32: u"Classical",
33: u"Instrumental",
34: u"Acid",
35: u"House",
36: u"Game",
37: u"Sound Clip",
38: u"Gospel",
39: u"Noise",
40: u"AlternRock",
41: u"Bass",
42: u"Soul",
43: u"Punk",
44: u"Space",
45: u"Meditative",
46: u"Instrumental Pop",
47: u"Instrumental Rock",
48: u"Ethnic",
49: u"Gothic",
50: u"Darkwave",
51: u"Techno-Industrial",
52: u"Electronic",
53: u"Pop-Folk",
54: u"Eurodance",
55: u"Dream",
56: u"Southern Rock",
57: u"Comedy",
58: u"Cult",
59: u"Gangsta",
60: u"Top 40",
61: u"Christian Rap",
62: u"Pop/Funk",
63: u"Jungle",
64: u"Native American",
65: u"Cabaret",
66: u"New Wave",
67: u"Psychadelic",
68: u"Rave",
69: u"Showtunes",
70: u"Trailer",
71: u"Lo-Fi",
72: u"Tribal",
73: u"Acid Punk",
74: u"Acid Jazz",
75: u"Polka",
76: u"Retro",
77: u"Musical",
78: u"Rock & Roll",
79: u"Hard Rock",
# Following are winamp extentions
80: u"Folk",
81: u"Folk-Rock",
82: u"National Folk",
83: u"Swing",
84: u"Fast Fusion",
85: u"Bebob",
86: u"Latin",
87: u"Revival",
88: u"Celtic",
89: u"Bluegrass",
90: u"Avantgarde",
91: u"Gothic Rock",
92: u"Progressive Rock",
93: u"Psychedelic Rock",
94: u"Symphonic Rock",
95: u"Slow Rock",
96: u"Big Band",
97: u"Chorus",
98: u"Easy Listening",
99: u"Acoustic",
100: u"Humour",
101: u"Speech",
102: u"Chanson",
103: u"Opera",
104: u"Chamber Music",
105: u"Sonata",
106: u"Symphony",
107: u"Booty Bass",
108: u"Primus",
109: u"Porn Groove",
110: u"Satire",
111: u"Slow Jam",
112: u"Club",
113: u"Tango",
114: u"Samba",
115: u"Folklore",
116: u"Ballad",
117: u"Power Ballad",
118: u"Rhythmic Soul",
119: u"Freestyle",
120: u"Duet",
121: u"Punk Rock",
122: u"Drum Solo",
123: u"A capella",
124: u"Euro-House",
125: u"Dance Hall",
126: u"Goa",
127: u"Drum & Bass",
128: u"Club-House",
129: u"Hardcore",
130: u"Terror",
131: u"Indie",
132: u"Britpop",
133: u"Negerpunk",
134: u"Polsk Punk",
135: u"Beat",
136: u"Christian Gangsta Rap",
137: u"Heavy Metal",
138: u"Black Metal",
139: u"Crossover",
140: u"Contemporary Christian",
141: u"Christian Rock ",
142: u"Merengue",
143: u"Salsa",
144: u"Trash Metal",
145: u"Anime",
146: u"JPop",
147: u"Synthpop"
}
def createFields(self):
yield String(self, "signature", 3, "IDv1 signature (\"TAG\")", charset="ASCII")
if self["signature"].value != "TAG":
raise MatchError("Stream doesn't look like ID3v1 (wrong signature)!")
# TODO: Charset of below strings?
yield String(self, "song", 30, "Song title", strip=" \0", charset="ISO-8859-1")
yield String(self, "author", 30, "Author", strip=" \0", charset="ISO-8859-1")
yield String(self, "album", 30, "Album title", strip=" \0", charset="ISO-8859-1")
yield String(self, "year", 4, "Year", strip=" \0", charset="ISO-8859-1")
# TODO: Write better algorithm to guess ID3v1 version
version = self.getVersion()
if version in ("v1.1", "v1.1b"):
if version == "v1.1b":
# ID3 v1.1b
yield String(self, "comment", 29, "Comment", strip=" \0", charset="ISO-8859-1")
yield UInt8(self, "track_nb", "Track number")
else:
# ID3 v1.1
yield String(self, "comment", 30, "Comment", strip=" \0", charset="ISO-8859-1")
yield Enum(UInt8(self, "genre", "Genre"), self.GENRE_NAME)
else:
# ID3 v1.0
yield String(self, "comment", 31, "Comment", strip=" \0", charset="ISO-8859-1")
def getVersion(self):
addr = self.absolute_address + 126*8
bytes = self.stream.readBytes(addr, 2)
# last byte (127) is not space?
if bytes[1] != ' ':
# byte 126 is nul?
if bytes[0] == 0x00:
return "v1.1"
else:
return "v1.1b"
else:
return "1.0"
def createDescription(self):
version = self.getVersion()
return "ID3 %s: author=%s, song=%s" % (
version, self["author"].value, self["song"].value)
def getCharset(field):
try:
key = field.value
return ID3_StringCharset.charset_name[key]
except KeyError:
raise ParserError("ID3v2: Invalid charset (%s)." % key)
class ID3_String(FieldSet):
STRIP = " \0"
def createFields(self):
yield String(self, "text", self._size/8, "Text", charset="ISO-8859-1", strip=self.STRIP)
class ID3_StringCharset(ID3_String):
STRIP = " \0"
charset_desc = {
0: "ISO-8859-1",
1: "UTF-16 with BOM",
2: "UTF-16 (big endian)",
3: "UTF-8"
}
charset_name = {
0: "ISO-8859-1",
1: "UTF-16",
2: "UTF-16-BE",
3: "UTF-8"
}
def createFields(self):
yield Enum(UInt8(self, "charset"), self.charset_desc)
size = (self.size - self.current_size)/8
if not size:
return
charset = getCharset(self["charset"])
yield String(self, "text", size, "Text", charset=charset, strip=self.STRIP)
class ID3_GEOB(ID3_StringCharset):
def createFields(self):
yield Enum(UInt8(self, "charset"), self.charset_desc)
charset = getCharset(self["charset"])
yield CString(self, "mime", "MIME type", charset=charset)
yield CString(self, "filename", "File name", charset=charset)
yield CString(self, "description", "Content description", charset=charset)
size = (self.size - self.current_size) // 8
if not size:
return
yield String(self, "text", size, "Text", charset=charset)
class ID3_Comment(ID3_StringCharset):
def createFields(self):
yield Enum(UInt8(self, "charset"), self.charset_desc)
yield String(self, "lang", 3, "Language", charset="ASCII")
charset = getCharset(self["charset"])
yield CString(self, "title", "Title", charset=charset, strip=self.STRIP)
size = (self.size - self.current_size) // 8
if not size:
return
yield String(self, "text", size, "Text", charset=charset, strip=self.STRIP)
class ID3_StringTitle(ID3_StringCharset):
def createFields(self):
yield Enum(UInt8(self, "charset"), self.charset_desc)
if self.current_size == self.size:
return
charset = getCharset(self["charset"])
yield CString(self, "title", "Title", charset=charset, strip=self.STRIP)
size = (self.size - self.current_size)/8
if not size:
return
yield String(self, "text", size, "Text", charset=charset, strip=self.STRIP)
class ID3_Private(FieldSet):
def createFields(self):
size = self._size/8
# TODO: Strings charset?
if self.stream.readBytes(self.absolute_address, 9) == "PeakValue":
yield String(self, "text", 9, "Text")
size -= 9
yield String(self, "content", size, "Content")
class ID3_TrackLength(FieldSet):
def createFields(self):
yield NullBytes(self, "zero", 1)
yield textHandler(String(self, "length", self._size/8 - 1,
"Length in ms", charset="ASCII"), self.computeLength)
def computeLength(self, field):
try:
ms = int(field.value)
return humanDuration(ms)
except:
return field.value
class ID3_Picture23(FieldSet):
pict_type_name = {
0x00: "Other",
0x01: "32x32 pixels 'file icon' (PNG only)",
0x02: "Other file icon",
0x03: "Cover (front)",
0x04: "Cover (back)",
0x05: "Leaflet page",
0x06: "Media (e.g. lable side of CD)",
0x07: "Lead artist/lead performer/soloist",
0x08: "Artist/performer",
0x09: "Conductor",
0x0A: "Band/Orchestra",
0x0B: "Composer",
0x0C: "Lyricist/text writer",
0x0D: "Recording Location",
0x0E: "During recording",
0x0F: "During performance",
0x10: "Movie/video screen capture",
0x11: "A bright coloured fish",
0x12: "Illustration",
0x13: "Band/artist logotype",
0x14: "Publisher/Studio logotype"
}
def createFields(self):
yield Enum(UInt8(self, "charset"), ID3_StringCharset.charset_desc)
charset = getCharset(self["charset"])
yield String(self, "img_fmt", 3, charset="ASCII")
yield Enum(UInt8(self, "pict_type"), self.pict_type_name)
yield CString(self, "text", "Text", charset=charset, strip=" \0")
size = (self._size - self._current_size) / 8
if size:
yield RawBytes(self, "img_data", size)
class ID3_Picture24(FieldSet):
def createFields(self):
yield Enum(UInt8(self, "charset"), ID3_StringCharset.charset_desc)
charset = getCharset(self["charset"])
yield CString(self, "mime", "MIME type", charset=charset)
yield Enum(UInt8(self, "pict_type"), ID3_Picture23.pict_type_name)
yield CString(self, "description", charset=charset)
size = (self._size - self._current_size) / 8
if size:
yield RawBytes(self, "img_data", size)
class ID3_Chunk(FieldSet):
endian = NETWORK_ENDIAN
tag22_name = {
"TT2": "Track title",
"TP1": "Artist",
"TRK": "Track number",
"COM": "Comment",
"TCM": "Composer",
"TAL": "Album",
"TYE": "Year",
"TEN": "Encoder",
"TCO": "Content type",
"PIC": "Picture"
}
tag23_name = {
"COMM": "Comment",
"GEOB": "Encapsulated object",
"PRIV": "Private",
"TPE1": "Artist",
"TCOP": "Copyright",
"TALB": "Album",
"TENC": "Encoder",
"TYER": "Year",
"TSSE": "Encoder settings",
"TCOM": "Composer",
"TRCK": "Track number",
"PCNT": "Play counter",
"TCON": "Content type",
"TLEN": "Track length",
"TIT2": "Track title",
"WXXX": "User defined URL"
}
handler = {
"COMM": ID3_Comment,
"COM": ID3_Comment,
"GEOB": ID3_GEOB,
"PIC": ID3_Picture23,
"APIC": ID3_Picture24,
"PRIV": ID3_Private,
"TXXX": ID3_StringTitle,
"WOAR": ID3_String,
"WXXX": ID3_StringTitle,
}
def __init__(self, *args):
FieldSet.__init__(self, *args)
if 3 <= self["../ver_major"].value:
self._size = (10 + self["size"].value) * 8
else:
self._size = (self["size"].value + 6) * 8
def createFields(self):
if 3 <= self["../ver_major"].value:
# ID3 v2.3 and 2.4
yield Enum(String(self, "tag", 4, "Tag", charset="ASCII", strip="\0"), ID3_Chunk.tag23_name)
if 4 <= self["../ver_major"].value:
yield ID3_Size(self, "size") # ID3 v2.4
else:
yield UInt32(self, "size") # ID3 v2.3
yield Bit(self, "tag_alter", "Tag alter preservation")
yield Bit(self, "file_alter", "Tag alter preservation")
yield Bit(self, "rd_only", "Read only?")
yield NullBits(self, "padding[]", 5)
yield Bit(self, "compressed", "Frame is compressed?")
yield Bit(self, "encrypted", "Frame is encrypted?")
yield Bit(self, "group", "Grouping identity")
yield NullBits(self, "padding[]", 5)
size = self["size"].value
is_compressed = self["compressed"].value
else:
# ID3 v2.2
yield Enum(String(self, "tag", 3, "Tag", charset="ASCII", strip="\0"), ID3_Chunk.tag22_name)
yield UInt24(self, "size")
size = self["size"].value - self.current_size/8 + 6
is_compressed = False
if size:
cls = None
if not(is_compressed):
tag = self["tag"].value
if tag in ID3_Chunk.handler:
cls = ID3_Chunk.handler[tag]
elif tag[0] == "T":
cls = ID3_StringCharset
if cls:
yield cls(self, "content", "Content", size=size*8)
else:
yield RawBytes(self, "content", size, "Raw data content")
def createDescription(self):
if self["size"].value != 0:
return "ID3 Chunk: %s" % self["tag"].display
else:
return "ID3 Chunk: (terminator)"
class ID3_Size(Bits):
static_size = 32
def __init__(self, parent, name, description=None):
Bits.__init__(self, parent, name, 32, description)
def createValue(self):
data = self.parent.stream.readBytes(self.absolute_address, 4)
# TODO: Check that bit #7 of each byte is nul: not(ord(data[i]) & 127)
return reduce(lambda x, y: x*128 + y, (ord(item) for item in data ))
class ID3v2(FieldSet):
endian = NETWORK_ENDIAN
VALID_MAJOR_VERSIONS = (2, 3, 4)
def __init__(self, parent, name, size=None):
FieldSet.__init__(self, parent, name, size=size)
if not self._size:
self._size = (self["size"].value + 10) * 8
def createDescription(self):
return "ID3 v2.%s.%s" % \
(self["ver_major"].value, self["ver_minor"].value)
def createFields(self):
# Signature + version
yield String(self, "header", 3, "Header (ID3)", charset="ASCII")
yield UInt8(self, "ver_major", "Version (major)")
yield UInt8(self, "ver_minor", "Version (minor)")
# Check format
if self["header"].value != "ID3":
raise MatchError("Signature error, should be \"ID3\".")
if self["ver_major"].value not in self.VALID_MAJOR_VERSIONS \
or self["ver_minor"].value != 0:
raise MatchError(
"Unknown ID3 metadata version (2.%u.%u)"
% (self["ver_major"].value, self["ver_minor"].value))
# Flags
yield Bit(self, "unsync", "Unsynchronisation is used?")
yield Bit(self, "ext", "Extended header is used?")
yield Bit(self, "exp", "Experimental indicator")
yield NullBits(self, "padding[]", 5)
# Size
yield ID3_Size(self, "size")
# All tags
while self.current_size < self._size:
field = ID3_Chunk(self, "field[]")
yield field
if field["size"].value == 0:
break
# Search first byte of the MPEG file
padding = self.seekBit(self._size)
if padding:
yield padding