AniDB now updates its xml lists daily.

Added writeback cache for both rss feeds and name parser.
2025-01-05 17:43:37 +00:00 · 2014-07-14 23:55:52 -07:00 · 2014-07-14 23:55:52 -07:00 · 4a061d4dd2
commit 4a061d4dd2
parent fefcfa0952
6 changed files with 71 additions and 85685 deletions
--- a/lib/adba/aniDBAbstracter.py
+++ b/lib/adba/aniDBAbstracter.py
@ -15,6 +15,8 @@
 # You should have received a copy of the GNU General Public License
 # along with aDBa.  If not, see <http://www.gnu.org/licenses/>.

+from __future__ import with_statement
+
 from time import time, sleep
 import aniDBfileInfo as fileInfo
 import xml.etree.cElementTree as etree
@ -22,11 +24,10 @@ import os, re, string
 from aniDBmaper import AniDBMaper
 from aniDBtvDBmaper import TvDBMap
 from aniDBerrors import *
-
+from aniDBfileInfo import read_anidb_xml, read_tvdb_map_xml


 class aniDBabstractObject(object):
-
    def __init__(self, aniDB, load=False):
        self.laoded = False
        self.set_connection(aniDB)
@ -98,7 +99,7 @@ class aniDBabstractObject(object):
        priority - low = 0, medium = 1, high = 2 (unconfirmed)
        
        """
-        if(self.aid):
+        if (self.aid):
            self.aniDB.notifyadd(aid=self.aid, type=1, priority=1)


@ -157,18 +158,19 @@ class Anime(aniDBabstractObject):
        self.rawData = self.aniDB.groupstatus(aid=self.aid)
        self.release_groups = []
        for line in self.rawData.datalines:
-            self.release_groups.append({"name":unicode(line["name"], "utf-8"),
-                                        "rating":line["rating"],
-                                        "range":line["episode_range"]
-                                        })
+            self.release_groups.append({"name": unicode(line["name"], "utf-8"),
+                                        "rating": line["rating"],
+                                        "range": line["episode_range"]
+            })
        return self.release_groups

-    #TODO: refactor and use the new functions in anidbFileinfo
+    # TODO: refactor and use the new functions in anidbFileinfo
    def _get_aid_from_xml(self, name):
        if not self.allAnimeXML:
-            self.allAnimeXML = self._read_animetitels_xml()
+            self.allAnimeXML = read_anidb_xml()

-        regex = re.compile('( \(\d{4}\))|[%s]' % re.escape(string.punctuation)) # remove any punctuation and e.g. ' (2011)'
+        regex = re.compile(
+            '( \(\d{4}\))|[%s]' % re.escape(string.punctuation))  # remove any punctuation and e.g. ' (2011)'
        #regex = re.compile('[%s]'  % re.escape(string.punctuation)) # remove any punctuation and e.g. ' (2011)'
        name = regex.sub('', name.lower())
        lastAid = 0
@ -185,7 +187,7 @@ class Anime(aniDBabstractObject):
    #TODO: refactor and use the new functions in anidbFileinfo
    def _get_name_from_xml(self, aid, onlyMain=True):
        if not self.allAnimeXML:
-            self.allAnimeXML = self._read_animetitels_xml()
+            self.allAnimeXML = read_anidb_xml()

        for anime in self.allAnimeXML.findall("anime"):
            if int(anime.get("aid", False)) == aid:
@ -196,15 +198,6 @@ class Anime(aniDBabstractObject):
                        return title.text
        return ""

-
-    def _read_animetitels_xml(self, path=None):
-        if not path:
-            path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "animetitles.xml")
-
-        f = open(path, "r")
-        allAnimeXML = etree.ElementTree(file=f)
-        return allAnimeXML
-
    def _builPreSequal(self):
        if self.related_aid_list and self.related_aid_type:
            try:
@ -220,10 +213,9 @@ class Anime(aniDBabstractObject):
                    self.__dict__["sequal"] = self.related_aid_list


-
 class Episode(aniDBabstractObject):
-
-    def __init__(self, aniDB, number=None, epid=None, filePath=None, fid=None, epno=None, paramsA=None, paramsF=None, load=False, calculate=False):
+    def __init__(self, aniDB, number=None, epid=None, filePath=None, fid=None, epno=None, paramsA=None, paramsF=None,
+                 load=False, calculate=False):
        self.maper = AniDBMaper()
        self.epid = epid
        self.filePath = filePath
@ -232,7 +224,6 @@ class Episode(aniDBabstractObject):
        if calculate:
            (self.ed2k, self.size) = self._calculate_file_stuff(self.filePath)

-
        if not paramsA:
            self.bitCodeA = "C000F0C0"
            self.paramsA = self.maper.getFileCodesA(self.bitCodeA)
@ -254,7 +245,8 @@ class Episode(aniDBabstractObject):
        if self.filePath and not (self.ed2k or self.size):
            (self.ed2k, self.size) = self._calculate_file_stuff(self.filePath)

-        self.rawData = self.aniDB.file(fid=self.fid, size=self.size, ed2k=self.ed2k, aid=self.aid, aname=None, gid=None, gname=None, epno=self.epno, fmask=self.bitCodeF, amask=self.bitCodeA)
+        self.rawData = self.aniDB.file(fid=self.fid, size=self.size, ed2k=self.ed2k, aid=self.aid, aname=None, gid=None,
+                                       gname=None, epno=self.epno, fmask=self.bitCodeF, amask=self.bitCodeA)
        self._fill(self.rawData.datalines[0])
        self._build_names()
        self.laoded = True
@ -273,7 +265,7 @@ class Episode(aniDBabstractObject):

        try:
            self.aniDB.mylistadd(size=self.size, ed2k=self.ed2k, state=status)
-        except Exception, e :
+        except Exception, e:
            self.log(u"exception msg: " + str(e))
        else:
            # TODO: add the name or something
--- a/lib/adba/aniDBfileInfo.py
+++ b/lib/adba/aniDBfileInfo.py
@ -16,12 +16,16 @@
 # along with aDBa.  If not, see <http://www.gnu.org/licenses/>.

 from __future__ import with_statement
+
 import hashlib
 import os
 import xml.etree.cElementTree as etree
-
+import time

 # http://www.radicand.org/blog/orz/2010/2/21/edonkey2000-hash-in-python/
+import requests
+
+
 def get_file_hash(filePath):
    """ Returns the ed2k hash of a given file."""
    if not filePath:
@ -31,8 +35,10 @@ def get_file_hash(filePath):
    def gen(f):
        while True:
            x = f.read(9728000)
-            if x: yield x
-            else: return
+            if x:
+                yield x
+            else:
+                return

    def md4_hash(data):
        m = md4()
@ -44,32 +50,58 @@ def get_file_hash(filePath):
        hashes = [md4_hash(data).digest() for data in a]
        if len(hashes) == 1:
            return hashes[0].encode("hex")
-        else: return md4_hash(reduce(lambda a,d: a + d, hashes, "")).hexdigest()
-        
-        
+        else:
+            return md4_hash(reduce(lambda a, d: a + d, hashes, "")).hexdigest()
+
+
 def get_file_size(path):
    size = os.path.getsize(path)
    return size


+def get_anime_titles_xml(path):
+    daily_dump = requests.get("http://raw.githubusercontent.com/ScudLee/anime-lists/master/animetitles.xml")
+    with open(path, "wb") as f:
+        f.write(daily_dump.content)

-def read_anidb_xml(filePath):
+
+def get_anime_list_xml(path):
+    daily_dump = requests.get("http://raw.githubusercontent.com/ScudLee/anime-lists/master/anime-list.xml")
+    with open(path, "wb") as f:
+        f.write(daily_dump.content)
+
+
+def read_anidb_xml(filePath=None):
    if not filePath:
-        filePath = os.path.join(os.path.dirname(os.path.abspath( __file__ )), "animetitles.xml")
+        filePath = os.path.join(os.path.dirname(os.path.abspath(__file__)), "animetitles.xml")
+
+    if not os.path.isfile(filePath):
+        get_anime_titles_xml(filePath)
+    else:
+        mtime = os.path.getmtime(filePath)
+        if time.time() > mtime + 24 * 60 * 60:
+            get_anime_titles_xml(filePath)
+
    return read_xml_into_etree(filePath)


-def read_tvdb_map_xml(filePath):
+def read_tvdb_map_xml(filePath=None):
    if not filePath:
-        filePath = os.path.join(os.path.dirname(os.path.abspath( __file__ )), "anime-list.xml")
+        filePath = os.path.join(os.path.dirname(os.path.abspath(__file__)), "anime-list.xml")
+
+    if not os.path.isfile(filePath):
+        get_anime_list_xml(filePath)
+    else:
+        mtime = os.path.getmtime(filePath)
+        if time.time() > mtime + 24 * 60 * 60:
+            get_anime_list_xml(filePath)
+
    return read_xml_into_etree(filePath)


 def read_xml_into_etree(filePath):
-        if not filePath:
-            return None
-        
-        f = open(filePath,"r")
-        xmlASetree = etree.ElementTree(file = f)
-        return xmlASetree
-    
+    if not filePath:
+        return None
+
+    with open(filePath, "r") as f:
+        return etree.ElementTree(file=f)
--- a/lib/adba/anime-list.xml
+++ b/lib/adba/anime-list.xml
--- a/lib/adba/animetitles.xml
+++ b/lib/adba/animetitles.xml
--- a/sickbeard/name_parser/parser.py
+++ b/sickbeard/name_parser/parser.py
@ -591,7 +591,7 @@ class NameParserCache:
        name = name.encode('utf-8', 'ignore')

        try :
-            with closing(shelve.open(self.db_name)) as npc:
+            with closing(shelve.open(self.db_name, writeback=True)) as npc:
                npc[str(name)] = parse_result

                while len(npc.items()) > self.npc_cache_size:
@ -604,7 +604,7 @@ class NameParserCache:
        name = name.encode('utf-8', 'ignore')

        try:
-            with closing(shelve.open(ek.ek(os.path.join, sickbeard.CACHE_DIR, 'name_parser_cache'))) as npc:
+            with closing(shelve.open(self.db_name, writeback=True)) as npc:
                parse_result = npc.get(str(name), None)
        except Exception as e:
            logger.log(u"NameParser cache error: " + ex(e), logger.ERROR)
--- a/sickbeard/rssfeeds.py
+++ b/sickbeard/rssfeeds.py
@ -20,7 +20,7 @@ class RSSFeeds:

    def clearCache(self, age=None):
        try:
-            with closing(shelve.open(ek.ek(os.path.join, sickbeard.CACHE_DIR, self.db_name))) as fs:
+            with closing(shelve.open(self.db_name, writeback=True)) as fs:
                fc = cache.Cache(fs)
                fc.purge(age)
        except Exception as e:
@ -35,7 +35,7 @@ class RSSFeeds:
            url += urllib.urlencode(post_data)

        try:
-            with closing(shelve.open(self.db_name)) as fs:
+            with closing(shelve.open(self.db_name, writeback=True)) as fs:
                fc = cache.Cache(fs)
                feed = fc.fetch(url, False, False, request_headers)
        except Exception as e: