mirror of
https://github.com/SickGear/SickGear.git
synced 2025-01-10 03:53:39 +00:00
60 lines
1.7 KiB
Python
60 lines
1.7 KiB
Python
|
from langcodes.util import data_filename
|
||
|
|
||
|
LIST_KEYS = {'Description', 'Prefix'}
|
||
|
|
||
|
|
||
|
def parse_file(file):
|
||
|
"""
|
||
|
Take an open file containing the IANA subtag registry, and yield a
|
||
|
dictionary of information for each subtag it describes.
|
||
|
"""
|
||
|
lines = []
|
||
|
for line in file:
|
||
|
line = line.rstrip('\n')
|
||
|
if line == '%%':
|
||
|
# This is a separator between items. Parse the data we've
|
||
|
# collected and yield the result.
|
||
|
yield from parse_item(lines)
|
||
|
lines.clear()
|
||
|
elif line.startswith(' '):
|
||
|
# This is a continuation line. Concatenate it to the previous
|
||
|
# line, including one of the spaces.
|
||
|
lines[-1] += line[1:]
|
||
|
else:
|
||
|
lines.append(line)
|
||
|
yield from parse_item(lines)
|
||
|
|
||
|
|
||
|
def parse_item(lines):
|
||
|
"""
|
||
|
Given the lines that form a subtag entry (after joining wrapped lines
|
||
|
back together), parse the data they contain.
|
||
|
|
||
|
Returns a generator that yields once if there was any data there
|
||
|
(and an empty generator if this was just the header).
|
||
|
"""
|
||
|
info = {}
|
||
|
for line in lines:
|
||
|
key, value = line.split(': ', 1)
|
||
|
if key in LIST_KEYS:
|
||
|
info.setdefault(key, []).append(value)
|
||
|
else:
|
||
|
assert key not in info
|
||
|
info[key] = value
|
||
|
|
||
|
if 'Subtag' in info or 'Tag' in info:
|
||
|
yield info
|
||
|
|
||
|
|
||
|
def parse_registry():
|
||
|
"""
|
||
|
Yield a sequence of dictionaries, containing the info in the included
|
||
|
IANA subtag registry file.
|
||
|
"""
|
||
|
with open(
|
||
|
data_filename('language-subtag-registry.txt'), encoding='utf-8'
|
||
|
) as data_file:
|
||
|
# 'yield from' instead of returning, so that we only close the file
|
||
|
# when finished.
|
||
|
yield from parse_file(data_file)
|