AniDB now updates its xml lists daily.

Added writeback cache for both rss feeds and name parser.
This commit is contained in:
echel0n 2014-07-14 23:55:52 -07:00
parent fefcfa0952
commit 4a061d4dd2
6 changed files with 71 additions and 85685 deletions

View file

@ -15,6 +15,8 @@
# You should have received a copy of the GNU General Public License
# along with aDBa. If not, see <http://www.gnu.org/licenses/>.
from __future__ import with_statement
from time import time, sleep
import aniDBfileInfo as fileInfo
import xml.etree.cElementTree as etree
@ -22,11 +24,10 @@ import os, re, string
from aniDBmaper import AniDBMaper
from aniDBtvDBmaper import TvDBMap
from aniDBerrors import *
from aniDBfileInfo import read_anidb_xml, read_tvdb_map_xml
class aniDBabstractObject(object):
def __init__(self, aniDB, load=False):
self.laoded = False
self.set_connection(aniDB)
@ -98,7 +99,7 @@ class aniDBabstractObject(object):
priority - low = 0, medium = 1, high = 2 (unconfirmed)
"""
if(self.aid):
if (self.aid):
self.aniDB.notifyadd(aid=self.aid, type=1, priority=1)
@ -157,18 +158,19 @@ class Anime(aniDBabstractObject):
self.rawData = self.aniDB.groupstatus(aid=self.aid)
self.release_groups = []
for line in self.rawData.datalines:
self.release_groups.append({"name":unicode(line["name"], "utf-8"),
"rating":line["rating"],
"range":line["episode_range"]
})
self.release_groups.append({"name": unicode(line["name"], "utf-8"),
"rating": line["rating"],
"range": line["episode_range"]
})
return self.release_groups
#TODO: refactor and use the new functions in anidbFileinfo
# TODO: refactor and use the new functions in anidbFileinfo
def _get_aid_from_xml(self, name):
if not self.allAnimeXML:
self.allAnimeXML = self._read_animetitels_xml()
self.allAnimeXML = read_anidb_xml()
regex = re.compile('( \(\d{4}\))|[%s]' % re.escape(string.punctuation)) # remove any punctuation and e.g. ' (2011)'
regex = re.compile(
'( \(\d{4}\))|[%s]' % re.escape(string.punctuation)) # remove any punctuation and e.g. ' (2011)'
#regex = re.compile('[%s]' % re.escape(string.punctuation)) # remove any punctuation and e.g. ' (2011)'
name = regex.sub('', name.lower())
lastAid = 0
@ -185,7 +187,7 @@ class Anime(aniDBabstractObject):
#TODO: refactor and use the new functions in anidbFileinfo
def _get_name_from_xml(self, aid, onlyMain=True):
if not self.allAnimeXML:
self.allAnimeXML = self._read_animetitels_xml()
self.allAnimeXML = read_anidb_xml()
for anime in self.allAnimeXML.findall("anime"):
if int(anime.get("aid", False)) == aid:
@ -196,15 +198,6 @@ class Anime(aniDBabstractObject):
return title.text
return ""
def _read_animetitels_xml(self, path=None):
if not path:
path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "animetitles.xml")
f = open(path, "r")
allAnimeXML = etree.ElementTree(file=f)
return allAnimeXML
def _builPreSequal(self):
if self.related_aid_list and self.related_aid_type:
try:
@ -220,10 +213,9 @@ class Anime(aniDBabstractObject):
self.__dict__["sequal"] = self.related_aid_list
class Episode(aniDBabstractObject):
def __init__(self, aniDB, number=None, epid=None, filePath=None, fid=None, epno=None, paramsA=None, paramsF=None, load=False, calculate=False):
def __init__(self, aniDB, number=None, epid=None, filePath=None, fid=None, epno=None, paramsA=None, paramsF=None,
load=False, calculate=False):
self.maper = AniDBMaper()
self.epid = epid
self.filePath = filePath
@ -232,7 +224,6 @@ class Episode(aniDBabstractObject):
if calculate:
(self.ed2k, self.size) = self._calculate_file_stuff(self.filePath)
if not paramsA:
self.bitCodeA = "C000F0C0"
self.paramsA = self.maper.getFileCodesA(self.bitCodeA)
@ -254,7 +245,8 @@ class Episode(aniDBabstractObject):
if self.filePath and not (self.ed2k or self.size):
(self.ed2k, self.size) = self._calculate_file_stuff(self.filePath)
self.rawData = self.aniDB.file(fid=self.fid, size=self.size, ed2k=self.ed2k, aid=self.aid, aname=None, gid=None, gname=None, epno=self.epno, fmask=self.bitCodeF, amask=self.bitCodeA)
self.rawData = self.aniDB.file(fid=self.fid, size=self.size, ed2k=self.ed2k, aid=self.aid, aname=None, gid=None,
gname=None, epno=self.epno, fmask=self.bitCodeF, amask=self.bitCodeA)
self._fill(self.rawData.datalines[0])
self._build_names()
self.laoded = True
@ -273,7 +265,7 @@ class Episode(aniDBabstractObject):
try:
self.aniDB.mylistadd(size=self.size, ed2k=self.ed2k, state=status)
except Exception, e :
except Exception, e:
self.log(u"exception msg: " + str(e))
else:
# TODO: add the name or something

View file

@ -16,12 +16,16 @@
# along with aDBa. If not, see <http://www.gnu.org/licenses/>.
from __future__ import with_statement
import hashlib
import os
import xml.etree.cElementTree as etree
import time
# http://www.radicand.org/blog/orz/2010/2/21/edonkey2000-hash-in-python/
import requests
def get_file_hash(filePath):
""" Returns the ed2k hash of a given file."""
if not filePath:
@ -31,8 +35,10 @@ def get_file_hash(filePath):
def gen(f):
while True:
x = f.read(9728000)
if x: yield x
else: return
if x:
yield x
else:
return
def md4_hash(data):
m = md4()
@ -44,32 +50,58 @@ def get_file_hash(filePath):
hashes = [md4_hash(data).digest() for data in a]
if len(hashes) == 1:
return hashes[0].encode("hex")
else: return md4_hash(reduce(lambda a,d: a + d, hashes, "")).hexdigest()
else:
return md4_hash(reduce(lambda a, d: a + d, hashes, "")).hexdigest()
def get_file_size(path):
size = os.path.getsize(path)
return size
def get_anime_titles_xml(path):
daily_dump = requests.get("http://raw.githubusercontent.com/ScudLee/anime-lists/master/animetitles.xml")
with open(path, "wb") as f:
f.write(daily_dump.content)
def read_anidb_xml(filePath):
def get_anime_list_xml(path):
daily_dump = requests.get("http://raw.githubusercontent.com/ScudLee/anime-lists/master/anime-list.xml")
with open(path, "wb") as f:
f.write(daily_dump.content)
def read_anidb_xml(filePath=None):
if not filePath:
filePath = os.path.join(os.path.dirname(os.path.abspath( __file__ )), "animetitles.xml")
filePath = os.path.join(os.path.dirname(os.path.abspath(__file__)), "animetitles.xml")
if not os.path.isfile(filePath):
get_anime_titles_xml(filePath)
else:
mtime = os.path.getmtime(filePath)
if time.time() > mtime + 24 * 60 * 60:
get_anime_titles_xml(filePath)
return read_xml_into_etree(filePath)
def read_tvdb_map_xml(filePath):
def read_tvdb_map_xml(filePath=None):
if not filePath:
filePath = os.path.join(os.path.dirname(os.path.abspath( __file__ )), "anime-list.xml")
filePath = os.path.join(os.path.dirname(os.path.abspath(__file__)), "anime-list.xml")
if not os.path.isfile(filePath):
get_anime_list_xml(filePath)
else:
mtime = os.path.getmtime(filePath)
if time.time() > mtime + 24 * 60 * 60:
get_anime_list_xml(filePath)
return read_xml_into_etree(filePath)
def read_xml_into_etree(filePath):
if not filePath:
return None
f = open(filePath,"r")
xmlASetree = etree.ElementTree(file = f)
return xmlASetree
if not filePath:
return None
with open(filePath, "r") as f:
return etree.ElementTree(file=f)

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -591,7 +591,7 @@ class NameParserCache:
name = name.encode('utf-8', 'ignore')
try :
with closing(shelve.open(self.db_name)) as npc:
with closing(shelve.open(self.db_name, writeback=True)) as npc:
npc[str(name)] = parse_result
while len(npc.items()) > self.npc_cache_size:
@ -604,7 +604,7 @@ class NameParserCache:
name = name.encode('utf-8', 'ignore')
try:
with closing(shelve.open(ek.ek(os.path.join, sickbeard.CACHE_DIR, 'name_parser_cache'))) as npc:
with closing(shelve.open(self.db_name, writeback=True)) as npc:
parse_result = npc.get(str(name), None)
except Exception as e:
logger.log(u"NameParser cache error: " + ex(e), logger.ERROR)

View file

@ -20,7 +20,7 @@ class RSSFeeds:
def clearCache(self, age=None):
try:
with closing(shelve.open(ek.ek(os.path.join, sickbeard.CACHE_DIR, self.db_name))) as fs:
with closing(shelve.open(self.db_name, writeback=True)) as fs:
fc = cache.Cache(fs)
fc.purge(age)
except Exception as e:
@ -35,7 +35,7 @@ class RSSFeeds:
url += urllib.urlencode(post_data)
try:
with closing(shelve.open(self.db_name)) as fs:
with closing(shelve.open(self.db_name, writeback=True)) as fs:
fc = cache.Cache(fs)
feed = fc.fetch(url, False, False, request_headers)
except Exception as e: