mirror of
https://github.com/SickGear/SickGear.git
synced 2025-01-05 17:43:37 +00:00
Merge pull request #390 from adam111316/feature/UpdateFeedparserLibs
Update feedparser library 5.1.3 to 5.2.0 (8c62940)
This commit is contained in:
commit
1d72320757
2412 changed files with 766 additions and 33727 deletions
|
@ -26,6 +26,8 @@
|
|||
* Change don't create a backup from an initial zero byte main database file, PEP8 and code tidy up
|
||||
* Fix show list view when no shows exist and "Group show lists shows into" is set to anything other than "One Show List"
|
||||
* Add coverage testing and coveralls support
|
||||
* Update feedparser library 5.1.3 to 5.2.0 (8c62940)
|
||||
* Remove feedcache implementation and library
|
||||
|
||||
[develop changelog]
|
||||
* Update Requests library 2.7.0 (ab1f493) to 2.7.0 (8b5e457)
|
||||
|
|
|
@ -1,44 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
#
|
||||
# Copyright 2007 Doug Hellmann.
|
||||
#
|
||||
#
|
||||
# All Rights Reserved
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software and
|
||||
# its documentation for any purpose and without fee is hereby
|
||||
# granted, provided that the above copyright notice appear in all
|
||||
# copies and that both that copyright notice and this permission
|
||||
# notice appear in supporting documentation, and that the name of Doug
|
||||
# Hellmann not be used in advertising or publicity pertaining to
|
||||
# distribution of the software without specific, written prior
|
||||
# permission.
|
||||
#
|
||||
# DOUG HELLMANN DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
|
||||
# INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN
|
||||
# NO EVENT SHALL DOUG HELLMANN BE LIABLE FOR ANY SPECIAL, INDIRECT OR
|
||||
# CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
||||
# NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
||||
# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
#
|
||||
|
||||
"""
|
||||
|
||||
"""
|
||||
|
||||
__module_id__ = "$Id$"
|
||||
|
||||
#
|
||||
# Import system modules
|
||||
#
|
||||
|
||||
|
||||
#
|
||||
# Import local modules
|
||||
#
|
||||
from cache import Cache
|
||||
|
||||
#
|
||||
# Module
|
||||
#
|
|
@ -1,204 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
#
|
||||
# Copyright 2007 Doug Hellmann.
|
||||
#
|
||||
#
|
||||
# All Rights Reserved
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software and
|
||||
# its documentation for any purpose and without fee is hereby
|
||||
# granted, provided that the above copyright notice appear in all
|
||||
# copies and that both that copyright notice and this permission
|
||||
# notice appear in supporting documentation, and that the name of Doug
|
||||
# Hellmann not be used in advertising or publicity pertaining to
|
||||
# distribution of the software without specific, written prior
|
||||
# permission.
|
||||
#
|
||||
# DOUG HELLMANN DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
|
||||
# INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN
|
||||
# NO EVENT SHALL DOUG HELLMANN BE LIABLE FOR ANY SPECIAL, INDIRECT OR
|
||||
# CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
||||
# NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
||||
# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
#
|
||||
|
||||
"""
|
||||
|
||||
"""
|
||||
|
||||
__module_id__ = "$Id$"
|
||||
|
||||
#
|
||||
# Import system modules
|
||||
#
|
||||
from feedparser import feedparser
|
||||
|
||||
import logging
|
||||
import time
|
||||
|
||||
#
|
||||
# Import local modules
|
||||
#
|
||||
|
||||
|
||||
#
|
||||
# Module
|
||||
#
|
||||
|
||||
logger = logging.getLogger('feedcache.cache')
|
||||
|
||||
|
||||
class Cache:
|
||||
"""A class to wrap Mark Pilgrim's Universal Feed Parser module
|
||||
(http://www.feedparser.org) so that parameters can be used to
|
||||
cache the feed results locally instead of fetching the feed every
|
||||
time it is requested. Uses both etag and modified times for
|
||||
caching.
|
||||
"""
|
||||
|
||||
def __init__(self, storage, timeToLiveSeconds=300, userAgent='feedcache'):
|
||||
"""
|
||||
Arguments:
|
||||
|
||||
storage -- Backing store for the cache. It should follow
|
||||
the dictionary API, with URLs used as keys. It should
|
||||
persist data.
|
||||
|
||||
timeToLiveSeconds=300 -- The length of time content should
|
||||
live in the cache before an update is attempted.
|
||||
|
||||
userAgent='feedcache' -- User agent string to be used when
|
||||
fetching feed contents.
|
||||
|
||||
"""
|
||||
self.storage = storage
|
||||
self.time_to_live = timeToLiveSeconds
|
||||
self.user_agent = userAgent
|
||||
return
|
||||
|
||||
def purge(self, olderThanSeconds):
|
||||
"""Remove cached data from the storage if the data is older than the
|
||||
date given. If olderThanSeconds is None, the entire cache is purged.
|
||||
"""
|
||||
if olderThanSeconds is None:
|
||||
logger.debug('purging the entire cache')
|
||||
for key in self.storage.keys():
|
||||
del self.storage[key]
|
||||
else:
|
||||
now = time.time()
|
||||
# Iterate over the keys and load each item one at a time
|
||||
# to avoid having the entire cache loaded into memory
|
||||
# at one time.
|
||||
for url in self.storage.keys():
|
||||
(cached_time, cached_data) = self.storage[url]
|
||||
age = now - cached_time
|
||||
if age >= olderThanSeconds:
|
||||
logger.debug('removing %s with age %d', url, age)
|
||||
del self.storage[url]
|
||||
return
|
||||
|
||||
def fetch(self, url, force_update=False, offline=False, request_headers=None):
|
||||
"""Return the feed at url.
|
||||
|
||||
url - The URL of the feed.
|
||||
|
||||
force_update=False - When True, update the cache whether the
|
||||
current contents have
|
||||
exceeded their time-to-live
|
||||
or not.
|
||||
|
||||
offline=False - When True, only return data from the local
|
||||
cache and never access the remote
|
||||
URL.
|
||||
|
||||
If there is data for that feed in the cache already, check
|
||||
the expiration date before accessing the server. If the
|
||||
cached data has not expired, return it without accessing the
|
||||
server.
|
||||
|
||||
In cases where the server is accessed, check for updates
|
||||
before deciding what to return. If the server reports a
|
||||
status of 304, the previously cached content is returned.
|
||||
|
||||
The cache is only updated if the server returns a status of
|
||||
200, to avoid holding redirected data in the cache.
|
||||
"""
|
||||
logger.debug('url="%s"' % url)
|
||||
|
||||
# Convert the URL to a value we can use
|
||||
# as a key for the storage backend.
|
||||
key = url
|
||||
if isinstance(key, unicode):
|
||||
key = key.encode('utf-8')
|
||||
|
||||
modified = None
|
||||
etag = None
|
||||
now = time.time()
|
||||
|
||||
cached_time, cached_content = self.storage.get(key, (None, None))
|
||||
|
||||
# Offline mode support (no networked requests)
|
||||
# so return whatever we found in the storage.
|
||||
# If there is nothing in the storage, we'll be returning None.
|
||||
if offline:
|
||||
logger.debug('offline mode')
|
||||
return cached_content
|
||||
|
||||
# Does the storage contain a version of the data
|
||||
# which is older than the time-to-live?
|
||||
logger.debug('cache modified time: %s' % str(cached_time))
|
||||
if cached_time is not None and not force_update:
|
||||
if self.time_to_live:
|
||||
age = now - cached_time
|
||||
if age <= self.time_to_live:
|
||||
logger.debug('cache contents still valid')
|
||||
return cached_content
|
||||
else:
|
||||
logger.debug('cache contents older than TTL')
|
||||
else:
|
||||
logger.debug('no TTL value')
|
||||
|
||||
# The cache is out of date, but we have
|
||||
# something. Try to use the etag and modified_time
|
||||
# values from the cached content.
|
||||
etag = cached_content.get('etag')
|
||||
modified = cached_content.get('modified')
|
||||
logger.debug('cached etag=%s' % etag)
|
||||
logger.debug('cached modified=%s' % str(modified))
|
||||
else:
|
||||
logger.debug('nothing in the cache, or forcing update')
|
||||
|
||||
# We know we need to fetch, so go ahead and do it.
|
||||
logger.debug('fetching...')
|
||||
parsed_result = feedparser.parse(url,
|
||||
agent=self.user_agent,
|
||||
modified=modified,
|
||||
etag=etag,
|
||||
request_headers=request_headers)
|
||||
|
||||
status = parsed_result.get('status', None)
|
||||
logger.debug('HTTP status=%s' % status)
|
||||
if status == 304:
|
||||
# No new data, based on the etag or modified values.
|
||||
# We need to update the modified time in the
|
||||
# storage, though, so we know that what we have
|
||||
# stored is up to date.
|
||||
self.storage[key] = (now, cached_content)
|
||||
|
||||
# Return the data from the cache, since
|
||||
# the parsed data will be empty.
|
||||
parsed_result = cached_content
|
||||
elif status == 200:
|
||||
# There is new content, so store it unless there was an error.
|
||||
error = parsed_result.get('bozo_exception')
|
||||
if not error:
|
||||
logger.debug('Updating stored data for %s' % url)
|
||||
self.storage[key] = (now, parsed_result)
|
||||
else:
|
||||
logger.warning('Not storing data with exception: %s',
|
||||
error)
|
||||
else:
|
||||
logger.warning('Not updating cache with HTTP status %s', status)
|
||||
|
||||
return parsed_result
|
|
@ -1,69 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
#
|
||||
# Copyright 2007 Doug Hellmann.
|
||||
#
|
||||
#
|
||||
# All Rights Reserved
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software and
|
||||
# its documentation for any purpose and without fee is hereby
|
||||
# granted, provided that the above copyright notice appear in all
|
||||
# copies and that both that copyright notice and this permission
|
||||
# notice appear in supporting documentation, and that the name of Doug
|
||||
# Hellmann not be used in advertising or publicity pertaining to
|
||||
# distribution of the software without specific, written prior
|
||||
# permission.
|
||||
#
|
||||
# DOUG HELLMANN DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
|
||||
# INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN
|
||||
# NO EVENT SHALL DOUG HELLMANN BE LIABLE FOR ANY SPECIAL, INDIRECT OR
|
||||
# CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
||||
# NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
||||
# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
#
|
||||
from __future__ import with_statement
|
||||
|
||||
"""Lock wrapper for cache storage which do not permit multi-threaded access.
|
||||
|
||||
"""
|
||||
|
||||
__module_id__ = "$Id$"
|
||||
|
||||
#
|
||||
# Import system modules
|
||||
#
|
||||
import threading
|
||||
|
||||
#
|
||||
# Import local modules
|
||||
#
|
||||
|
||||
|
||||
#
|
||||
# Module
|
||||
#
|
||||
|
||||
class CacheStorageLock:
|
||||
"""Lock wrapper for cache storage which do not permit multi-threaded access.
|
||||
"""
|
||||
|
||||
def __init__(self, shelf):
|
||||
self.lock = threading.Lock()
|
||||
self.shelf = shelf
|
||||
return
|
||||
|
||||
def __getitem__(self, key):
|
||||
with self.lock:
|
||||
return self.shelf[key]
|
||||
|
||||
def get(self, key, default=None):
|
||||
with self.lock:
|
||||
try:
|
||||
return self.shelf[key]
|
||||
except KeyError:
|
||||
return default
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
with self.lock:
|
||||
self.shelf[key] = value
|
|
@ -1,63 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
#
|
||||
# Copyright 2007 Doug Hellmann.
|
||||
#
|
||||
#
|
||||
# All Rights Reserved
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software and
|
||||
# its documentation for any purpose and without fee is hereby
|
||||
# granted, provided that the above copyright notice appear in all
|
||||
# copies and that both that copyright notice and this permission
|
||||
# notice appear in supporting documentation, and that the name of Doug
|
||||
# Hellmann not be used in advertising or publicity pertaining to
|
||||
# distribution of the software without specific, written prior
|
||||
# permission.
|
||||
#
|
||||
# DOUG HELLMANN DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
|
||||
# INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN
|
||||
# NO EVENT SHALL DOUG HELLMANN BE LIABLE FOR ANY SPECIAL, INDIRECT OR
|
||||
# CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
||||
# NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
||||
# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
#
|
||||
|
||||
"""Example use of feedcache.Cache.
|
||||
|
||||
"""
|
||||
|
||||
__module_id__ = "$Id$"
|
||||
|
||||
#
|
||||
# Import system modules
|
||||
#
|
||||
import sys
|
||||
import shelve
|
||||
|
||||
#
|
||||
# Import local modules
|
||||
#
|
||||
import cache
|
||||
|
||||
#
|
||||
# Module
|
||||
#
|
||||
|
||||
def main(urls=[]):
|
||||
print 'Saving feed data to ./.feedcache'
|
||||
storage = shelve.open('.feedcache')
|
||||
try:
|
||||
fc = cache.Cache(storage)
|
||||
for url in urls:
|
||||
parsed_data = fc.fetch(url)
|
||||
print parsed_data.feed.title
|
||||
for entry in parsed_data.entries:
|
||||
print '\t', entry.title
|
||||
finally:
|
||||
storage.close()
|
||||
return
|
||||
|
||||
if __name__ == '__main__':
|
||||
main(sys.argv[1:])
|
||||
|
|
@ -1,144 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
#
|
||||
# Copyright 2007 Doug Hellmann.
|
||||
#
|
||||
#
|
||||
# All Rights Reserved
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software and
|
||||
# its documentation for any purpose and without fee is hereby
|
||||
# granted, provided that the above copyright notice appear in all
|
||||
# copies and that both that copyright notice and this permission
|
||||
# notice appear in supporting documentation, and that the name of Doug
|
||||
# Hellmann not be used in advertising or publicity pertaining to
|
||||
# distribution of the software without specific, written prior
|
||||
# permission.
|
||||
#
|
||||
# DOUG HELLMANN DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
|
||||
# INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN
|
||||
# NO EVENT SHALL DOUG HELLMANN BE LIABLE FOR ANY SPECIAL, INDIRECT OR
|
||||
# CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
||||
# NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
||||
# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
#
|
||||
|
||||
"""Example use of feedcache.Cache combined with threads.
|
||||
|
||||
"""
|
||||
|
||||
__module_id__ = "$Id$"
|
||||
|
||||
#
|
||||
# Import system modules
|
||||
#
|
||||
import Queue
|
||||
import sys
|
||||
import shove
|
||||
import threading
|
||||
|
||||
#
|
||||
# Import local modules
|
||||
#
|
||||
import cache
|
||||
|
||||
#
|
||||
# Module
|
||||
#
|
||||
|
||||
MAX_THREADS=5
|
||||
OUTPUT_DIR='/tmp/feedcache_example'
|
||||
|
||||
|
||||
def main(urls=[]):
|
||||
|
||||
if not urls:
|
||||
print 'Specify the URLs to a few RSS or Atom feeds on the command line.'
|
||||
return
|
||||
|
||||
# Decide how many threads to start
|
||||
num_threads = min(len(urls), MAX_THREADS)
|
||||
|
||||
# Add the URLs to a queue
|
||||
url_queue = Queue.Queue()
|
||||
for url in urls:
|
||||
url_queue.put(url)
|
||||
|
||||
# Add poison pills to the url queue to cause
|
||||
# the worker threads to break out of their loops
|
||||
for i in range(num_threads):
|
||||
url_queue.put(None)
|
||||
|
||||
# Track the entries in the feeds being fetched
|
||||
entry_queue = Queue.Queue()
|
||||
|
||||
print 'Saving feed data to', OUTPUT_DIR
|
||||
storage = shove.Shove('file://' + OUTPUT_DIR)
|
||||
try:
|
||||
|
||||
# Start a few worker threads
|
||||
worker_threads = []
|
||||
for i in range(num_threads):
|
||||
t = threading.Thread(target=fetch_urls,
|
||||
args=(storage, url_queue, entry_queue,))
|
||||
worker_threads.append(t)
|
||||
t.setDaemon(True)
|
||||
t.start()
|
||||
|
||||
# Start a thread to print the results
|
||||
printer_thread = threading.Thread(target=print_entries, args=(entry_queue,))
|
||||
printer_thread.setDaemon(True)
|
||||
printer_thread.start()
|
||||
|
||||
# Wait for all of the URLs to be processed
|
||||
url_queue.join()
|
||||
|
||||
# Wait for the worker threads to finish
|
||||
for t in worker_threads:
|
||||
t.join()
|
||||
|
||||
# Poison the print thread and wait for it to exit
|
||||
entry_queue.put((None,None))
|
||||
entry_queue.join()
|
||||
printer_thread.join()
|
||||
|
||||
finally:
|
||||
storage.close()
|
||||
return
|
||||
|
||||
|
||||
def fetch_urls(storage, input_queue, output_queue):
|
||||
"""Thread target for fetching feed data.
|
||||
"""
|
||||
c = cache.Cache(storage)
|
||||
|
||||
while True:
|
||||
next_url = input_queue.get()
|
||||
if next_url is None: # None causes thread to exit
|
||||
input_queue.task_done()
|
||||
break
|
||||
|
||||
feed_data = c.fetch(next_url)
|
||||
for entry in feed_data.entries:
|
||||
output_queue.put( (feed_data.feed, entry) )
|
||||
input_queue.task_done()
|
||||
return
|
||||
|
||||
|
||||
def print_entries(input_queue):
|
||||
"""Thread target for printing the contents of the feeds.
|
||||
"""
|
||||
while True:
|
||||
feed, entry = input_queue.get()
|
||||
if feed is None: # None causes thread to exist
|
||||
input_queue.task_done()
|
||||
break
|
||||
|
||||
print '%s: %s' % (feed.title, entry.title)
|
||||
input_queue.task_done()
|
||||
return
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main(sys.argv[1:])
|
||||
|
|
@ -1,323 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
#
|
||||
# Copyright 2007 Doug Hellmann.
|
||||
#
|
||||
#
|
||||
# All Rights Reserved
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software and
|
||||
# its documentation for any purpose and without fee is hereby
|
||||
# granted, provided that the above copyright notice appear in all
|
||||
# copies and that both that copyright notice and this permission
|
||||
# notice appear in supporting documentation, and that the name of Doug
|
||||
# Hellmann not be used in advertising or publicity pertaining to
|
||||
# distribution of the software without specific, written prior
|
||||
# permission.
|
||||
#
|
||||
# DOUG HELLMANN DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
|
||||
# INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN
|
||||
# NO EVENT SHALL DOUG HELLMANN BE LIABLE FOR ANY SPECIAL, INDIRECT OR
|
||||
# CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
||||
# NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
||||
# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
#
|
||||
|
||||
"""Unittests for feedcache.cache
|
||||
|
||||
"""
|
||||
|
||||
__module_id__ = "$Id$"
|
||||
|
||||
import logging
|
||||
logging.basicConfig(level=logging.DEBUG,
|
||||
format='%(asctime)s %(levelname)-8s %(name)s %(message)s',
|
||||
)
|
||||
logger = logging.getLogger('feedcache.test_cache')
|
||||
|
||||
#
|
||||
# Import system modules
|
||||
#
|
||||
import copy
|
||||
import time
|
||||
import unittest
|
||||
import UserDict
|
||||
|
||||
#
|
||||
# Import local modules
|
||||
#
|
||||
import cache
|
||||
from test_server import HTTPTestBase, TestHTTPServer
|
||||
|
||||
#
|
||||
# Module
|
||||
#
|
||||
|
||||
|
||||
class CacheTestBase(HTTPTestBase):
|
||||
|
||||
CACHE_TTL = 30
|
||||
|
||||
def setUp(self):
|
||||
HTTPTestBase.setUp(self)
|
||||
|
||||
self.storage = self.getStorage()
|
||||
self.cache = cache.Cache(self.storage,
|
||||
timeToLiveSeconds=self.CACHE_TTL,
|
||||
userAgent='feedcache.test',
|
||||
)
|
||||
return
|
||||
|
||||
def getStorage(self):
|
||||
"Return a cache storage for the test."
|
||||
return {}
|
||||
|
||||
|
||||
class CacheTest(CacheTestBase):
|
||||
|
||||
CACHE_TTL = 30
|
||||
|
||||
def getServer(self):
|
||||
"These tests do not want to use the ETag or If-Modified-Since headers"
|
||||
return TestHTTPServer(applyModifiedHeaders=False)
|
||||
|
||||
def testRetrieveNotInCache(self):
|
||||
# Retrieve data not already in the cache.
|
||||
feed_data = self.cache.fetch(self.TEST_URL)
|
||||
self.failUnless(feed_data)
|
||||
self.failUnlessEqual(feed_data.feed.title, 'CacheTest test data')
|
||||
return
|
||||
|
||||
def testRetrieveIsInCache(self):
|
||||
# Retrieve data which is alread in the cache,
|
||||
# and verify that the second copy is identitical
|
||||
# to the first.
|
||||
|
||||
# First fetch
|
||||
feed_data = self.cache.fetch(self.TEST_URL)
|
||||
|
||||
# Second fetch
|
||||
feed_data2 = self.cache.fetch(self.TEST_URL)
|
||||
|
||||
# Since it is the in-memory storage, we should have the
|
||||
# exact same object.
|
||||
self.failUnless(feed_data is feed_data2)
|
||||
return
|
||||
|
||||
def testExpireDataInCache(self):
|
||||
# Retrieve data which is in the cache but which
|
||||
# has expired and verify that the second copy
|
||||
# is different from the first.
|
||||
|
||||
# First fetch
|
||||
feed_data = self.cache.fetch(self.TEST_URL)
|
||||
|
||||
# Change the timeout and sleep to move the clock
|
||||
self.cache.time_to_live = 0
|
||||
time.sleep(1)
|
||||
|
||||
# Second fetch
|
||||
feed_data2 = self.cache.fetch(self.TEST_URL)
|
||||
|
||||
# Since we reparsed, the cache response should be different.
|
||||
self.failIf(feed_data is feed_data2)
|
||||
return
|
||||
|
||||
def testForceUpdate(self):
|
||||
# Force cache to retrieve data which is alread in the cache,
|
||||
# and verify that the new data is different.
|
||||
|
||||
# Pre-populate the storage with bad data
|
||||
self.cache.storage[self.TEST_URL] = (time.time() + 100, self.id())
|
||||
|
||||
# Fetch the data
|
||||
feed_data = self.cache.fetch(self.TEST_URL, force_update=True)
|
||||
|
||||
self.failIfEqual(feed_data, self.id())
|
||||
return
|
||||
|
||||
def testOfflineMode(self):
|
||||
# Retrieve data which is alread in the cache,
|
||||
# whether it is expired or not.
|
||||
|
||||
# Pre-populate the storage with data
|
||||
self.cache.storage[self.TEST_URL] = (0, self.id())
|
||||
|
||||
# Fetch it
|
||||
feed_data = self.cache.fetch(self.TEST_URL, offline=True)
|
||||
|
||||
self.failUnlessEqual(feed_data, self.id())
|
||||
return
|
||||
|
||||
def testUnicodeURL(self):
|
||||
# Pass in a URL which is unicode
|
||||
|
||||
url = unicode(self.TEST_URL)
|
||||
feed_data = self.cache.fetch(url)
|
||||
|
||||
storage = self.cache.storage
|
||||
key = unicode(self.TEST_URL).encode('UTF-8')
|
||||
|
||||
# Verify that the storage has a key
|
||||
self.failUnless(key in storage)
|
||||
|
||||
# Now pull the data from the storage directly
|
||||
storage_timeout, storage_data = self.cache.storage.get(key)
|
||||
self.failUnlessEqual(feed_data, storage_data)
|
||||
return
|
||||
|
||||
|
||||
class SingleWriteMemoryStorage(UserDict.UserDict):
|
||||
"""Cache storage which only allows the cache value
|
||||
for a URL to be updated one time.
|
||||
"""
|
||||
|
||||
def __setitem__(self, url, data):
|
||||
if url in self.keys():
|
||||
modified, existing = self[url]
|
||||
# Allow the modified time to change,
|
||||
# but not the feed content.
|
||||
if data[1] != existing:
|
||||
raise AssertionError('Trying to update cache for %s to %s' \
|
||||
% (url, data))
|
||||
UserDict.UserDict.__setitem__(self, url, data)
|
||||
return
|
||||
|
||||
|
||||
class CacheConditionalGETTest(CacheTestBase):
|
||||
|
||||
CACHE_TTL = 0
|
||||
|
||||
def getStorage(self):
|
||||
return SingleWriteMemoryStorage()
|
||||
|
||||
def testFetchOnceForEtag(self):
|
||||
# Fetch data which has a valid ETag value, and verify
|
||||
# that while we hit the server twice the response
|
||||
# codes cause us to use the same data.
|
||||
|
||||
# First fetch populates the cache
|
||||
response1 = self.cache.fetch(self.TEST_URL)
|
||||
self.failUnlessEqual(response1.feed.title, 'CacheTest test data')
|
||||
|
||||
# Remove the modified setting from the cache so we know
|
||||
# the next time we check the etag will be used
|
||||
# to check for updates. Since we are using an in-memory
|
||||
# cache, modifying response1 updates the cache storage
|
||||
# directly.
|
||||
response1['modified'] = None
|
||||
|
||||
# This should result in a 304 status, and no data from
|
||||
# the server. That means the cache won't try to
|
||||
# update the storage, so our SingleWriteMemoryStorage
|
||||
# should not raise and we should have the same
|
||||
# response object.
|
||||
response2 = self.cache.fetch(self.TEST_URL)
|
||||
self.failUnless(response1 is response2)
|
||||
|
||||
# Should have hit the server twice
|
||||
self.failUnlessEqual(self.server.getNumRequests(), 2)
|
||||
return
|
||||
|
||||
def testFetchOnceForModifiedTime(self):
|
||||
# Fetch data which has a valid Last-Modified value, and verify
|
||||
# that while we hit the server twice the response
|
||||
# codes cause us to use the same data.
|
||||
|
||||
# First fetch populates the cache
|
||||
response1 = self.cache.fetch(self.TEST_URL)
|
||||
self.failUnlessEqual(response1.feed.title, 'CacheTest test data')
|
||||
|
||||
# Remove the etag setting from the cache so we know
|
||||
# the next time we check the modified time will be used
|
||||
# to check for updates. Since we are using an in-memory
|
||||
# cache, modifying response1 updates the cache storage
|
||||
# directly.
|
||||
response1['etag'] = None
|
||||
|
||||
# This should result in a 304 status, and no data from
|
||||
# the server. That means the cache won't try to
|
||||
# update the storage, so our SingleWriteMemoryStorage
|
||||
# should not raise and we should have the same
|
||||
# response object.
|
||||
response2 = self.cache.fetch(self.TEST_URL)
|
||||
self.failUnless(response1 is response2)
|
||||
|
||||
# Should have hit the server twice
|
||||
self.failUnlessEqual(self.server.getNumRequests(), 2)
|
||||
return
|
||||
|
||||
|
||||
class CacheRedirectHandlingTest(CacheTestBase):
|
||||
|
||||
def _test(self, response):
|
||||
# Set up the server to redirect requests,
|
||||
# then verify that the cache is not updated
|
||||
# for the original or new URL and that the
|
||||
# redirect status is fed back to us with
|
||||
# the fetched data.
|
||||
|
||||
self.server.setResponse(response, '/redirected')
|
||||
|
||||
response1 = self.cache.fetch(self.TEST_URL)
|
||||
|
||||
# The response should include the status code we set
|
||||
self.failUnlessEqual(response1.get('status'), response)
|
||||
|
||||
# The response should include the new URL, too
|
||||
self.failUnlessEqual(response1.href, self.TEST_URL + 'redirected')
|
||||
|
||||
# The response should not have been cached under either URL
|
||||
self.failIf(self.TEST_URL in self.storage)
|
||||
self.failIf(self.TEST_URL + 'redirected' in self.storage)
|
||||
return
|
||||
|
||||
def test301(self):
|
||||
self._test(301)
|
||||
|
||||
def test302(self):
|
||||
self._test(302)
|
||||
|
||||
def test303(self):
|
||||
self._test(303)
|
||||
|
||||
def test307(self):
|
||||
self._test(307)
|
||||
|
||||
|
||||
class CachePurgeTest(CacheTestBase):
|
||||
|
||||
def testPurgeAll(self):
|
||||
# Remove everything from the cache
|
||||
|
||||
self.cache.fetch(self.TEST_URL)
|
||||
self.failUnless(self.storage.keys(),
|
||||
'Have no data in the cache storage')
|
||||
|
||||
self.cache.purge(None)
|
||||
|
||||
self.failIf(self.storage.keys(),
|
||||
'Still have data in the cache storage')
|
||||
return
|
||||
|
||||
def testPurgeByAge(self):
|
||||
# Remove old content from the cache
|
||||
|
||||
self.cache.fetch(self.TEST_URL)
|
||||
self.failUnless(self.storage.keys(),
|
||||
'have no data in the cache storage')
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
remains = (time.time(), copy.deepcopy(self.storage[self.TEST_URL][1]))
|
||||
self.storage['http://this.should.remain/'] = remains
|
||||
|
||||
self.cache.purge(1)
|
||||
|
||||
self.failUnlessEqual(self.storage.keys(),
|
||||
['http://this.should.remain/'])
|
||||
return
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
|
@ -1,90 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
#
|
||||
# Copyright 2007 Doug Hellmann.
|
||||
#
|
||||
#
|
||||
# All Rights Reserved
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software and
|
||||
# its documentation for any purpose and without fee is hereby
|
||||
# granted, provided that the above copyright notice appear in all
|
||||
# copies and that both that copyright notice and this permission
|
||||
# notice appear in supporting documentation, and that the name of Doug
|
||||
# Hellmann not be used in advertising or publicity pertaining to
|
||||
# distribution of the software without specific, written prior
|
||||
# permission.
|
||||
#
|
||||
# DOUG HELLMANN DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
|
||||
# INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN
|
||||
# NO EVENT SHALL DOUG HELLMANN BE LIABLE FOR ANY SPECIAL, INDIRECT OR
|
||||
# CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
||||
# NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
||||
# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
#
|
||||
|
||||
"""Tests for shelflock.
|
||||
|
||||
"""
|
||||
|
||||
__module_id__ = "$Id$"
|
||||
|
||||
#
|
||||
# Import system modules
|
||||
#
|
||||
import os
|
||||
import shelve
|
||||
import tempfile
|
||||
import threading
|
||||
import unittest
|
||||
|
||||
#
|
||||
# Import local modules
|
||||
#
|
||||
from cache import Cache
|
||||
from cachestoragelock import CacheStorageLock
|
||||
from test_server import HTTPTestBase
|
||||
|
||||
#
|
||||
# Module
|
||||
#
|
||||
|
||||
class CacheShelveTest(HTTPTestBase):
|
||||
|
||||
def setUp(self):
|
||||
HTTPTestBase.setUp(self)
|
||||
handle, self.shelve_filename = tempfile.mkstemp('.shelve')
|
||||
os.close(handle) # we just want the file name, so close the open handle
|
||||
os.unlink(self.shelve_filename) # remove the empty file
|
||||
return
|
||||
|
||||
def tearDown(self):
|
||||
try:
|
||||
os.unlink(self.shelve_filename)
|
||||
except AttributeError:
|
||||
pass
|
||||
HTTPTestBase.tearDown(self)
|
||||
return
|
||||
|
||||
def test(self):
|
||||
storage = shelve.open(self.shelve_filename)
|
||||
locking_storage = CacheStorageLock(storage)
|
||||
try:
|
||||
fc = Cache(locking_storage)
|
||||
|
||||
# First fetch the data through the cache
|
||||
parsed_data = fc.fetch(self.TEST_URL)
|
||||
self.failUnlessEqual(parsed_data.feed.title, 'CacheTest test data')
|
||||
|
||||
# Now retrieve the same data directly from the shelf
|
||||
modified, shelved_data = storage[self.TEST_URL]
|
||||
|
||||
# The data should be the same
|
||||
self.failUnlessEqual(parsed_data, shelved_data)
|
||||
finally:
|
||||
storage.close()
|
||||
return
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
|
@ -1,241 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
#
|
||||
# Copyright 2007 Doug Hellmann.
|
||||
#
|
||||
#
|
||||
# All Rights Reserved
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software and
|
||||
# its documentation for any purpose and without fee is hereby
|
||||
# granted, provided that the above copyright notice appear in all
|
||||
# copies and that both that copyright notice and this permission
|
||||
# notice appear in supporting documentation, and that the name of Doug
|
||||
# Hellmann not be used in advertising or publicity pertaining to
|
||||
# distribution of the software without specific, written prior
|
||||
# permission.
|
||||
#
|
||||
# DOUG HELLMANN DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
|
||||
# INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN
|
||||
# NO EVENT SHALL DOUG HELLMANN BE LIABLE FOR ANY SPECIAL, INDIRECT OR
|
||||
# CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
||||
# NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
||||
# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
#
|
||||
|
||||
"""Simple HTTP server for testing the feed cache.
|
||||
|
||||
"""
|
||||
|
||||
__module_id__ = "$Id$"
|
||||
|
||||
#
|
||||
# Import system modules
|
||||
#
|
||||
import BaseHTTPServer
|
||||
import logging
|
||||
import md5
|
||||
import threading
|
||||
import time
|
||||
import unittest
|
||||
import urllib
|
||||
|
||||
#
|
||||
# Import local modules
|
||||
#
|
||||
|
||||
|
||||
#
|
||||
# Module
|
||||
#
|
||||
logger = logging.getLogger('feedcache.test_server')
|
||||
|
||||
|
||||
def make_etag(data):
|
||||
"""Given a string containing data to be returned to the client,
|
||||
compute an ETag value for the data.
|
||||
"""
|
||||
_md5 = md5.new()
|
||||
_md5.update(data)
|
||||
return _md5.hexdigest()
|
||||
|
||||
|
||||
class TestHTTPHandler(BaseHTTPServer.BaseHTTPRequestHandler):
|
||||
"HTTP request handler which serves the same feed data every time."
|
||||
|
||||
FEED_DATA = """<?xml version="1.0" encoding="utf-8"?>
|
||||
|
||||
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en-us">
|
||||
<title>CacheTest test data</title>
|
||||
<link href="http://localhost/feedcache/" rel="alternate"></link>
|
||||
<link href="http://localhost/feedcache/atom/" rel="self"></link>
|
||||
<id>http://localhost/feedcache/</id>
|
||||
<updated>2006-10-14T11:00:36Z</updated>
|
||||
<entry>
|
||||
<title>single test entry</title>
|
||||
<link href="http://www.example.com/" rel="alternate"></link>
|
||||
<updated>2006-10-14T11:00:36Z</updated>
|
||||
<author>
|
||||
<name>author goes here</name>
|
||||
<email>authoremail@example.com</email>
|
||||
</author>
|
||||
<id>http://www.example.com/</id>
|
||||
<summary type="html">description goes here</summary>
|
||||
<link length="100" href="http://www.example.com/enclosure" type="text/html" rel="enclosure">
|
||||
</link>
|
||||
</entry>
|
||||
</feed>"""
|
||||
|
||||
# The data does not change, so save the ETag and modified times
|
||||
# as class attributes.
|
||||
ETAG = make_etag(FEED_DATA)
|
||||
# Calculated using email.utils.formatdate(usegmt=True)
|
||||
MODIFIED_TIME = 'Sun, 08 Apr 2012 20:16:48 GMT'
|
||||
|
||||
def do_GET(self):
|
||||
"Handle GET requests."
|
||||
logger.debug('GET %s', self.path)
|
||||
|
||||
if self.path == '/shutdown':
|
||||
# Shortcut to handle stopping the server
|
||||
logger.debug('Stopping server')
|
||||
self.server.stop()
|
||||
self.send_response(200)
|
||||
|
||||
else:
|
||||
# Record the request for tests that count them
|
||||
self.server.requests.append(self.path)
|
||||
# Process the request
|
||||
logger.debug('pre-defined response code: %d', self.server.response)
|
||||
handler_method_name = 'do_GET_%d' % self.server.response
|
||||
handler_method = getattr(self, handler_method_name)
|
||||
handler_method()
|
||||
return
|
||||
|
||||
def do_GET_3xx(self):
|
||||
"Handle redirects"
|
||||
if self.path.endswith('/redirected'):
|
||||
logger.debug('already redirected')
|
||||
# We have already redirected, so return the data.
|
||||
return self.do_GET_200()
|
||||
new_path = self.server.new_path
|
||||
logger.debug('redirecting to %s', new_path)
|
||||
self.send_response(self.server.response)
|
||||
self.send_header('Location', new_path)
|
||||
return
|
||||
|
||||
do_GET_301 = do_GET_3xx
|
||||
do_GET_302 = do_GET_3xx
|
||||
do_GET_303 = do_GET_3xx
|
||||
do_GET_307 = do_GET_3xx
|
||||
|
||||
def do_GET_200(self):
|
||||
logger.debug('Etag: %s' % self.ETAG)
|
||||
logger.debug('Last-Modified: %s' % self.MODIFIED_TIME)
|
||||
|
||||
incoming_etag = self.headers.get('If-None-Match', None)
|
||||
logger.debug('Incoming ETag: "%s"' % incoming_etag)
|
||||
|
||||
incoming_modified = self.headers.get('If-Modified-Since', None)
|
||||
logger.debug('Incoming If-Modified-Since: %s' % incoming_modified)
|
||||
|
||||
send_data = True
|
||||
|
||||
# Does the client have the same version of the data we have?
|
||||
if self.server.apply_modified_headers:
|
||||
if incoming_etag == self.ETAG:
|
||||
logger.debug('Response 304, etag')
|
||||
self.send_response(304)
|
||||
send_data = False
|
||||
|
||||
elif incoming_modified == self.MODIFIED_TIME:
|
||||
logger.debug('Response 304, modified time')
|
||||
self.send_response(304)
|
||||
send_data = False
|
||||
|
||||
# Now optionally send the data, if the client needs it
|
||||
if send_data:
|
||||
logger.debug('Response 200')
|
||||
self.send_response(200)
|
||||
|
||||
self.send_header('Content-Type', 'application/atom+xml')
|
||||
|
||||
logger.debug('Outgoing Etag: %s' % self.ETAG)
|
||||
self.send_header('ETag', self.ETAG)
|
||||
|
||||
logger.debug('Outgoing modified time: %s' % self.MODIFIED_TIME)
|
||||
self.send_header('Last-Modified', self.MODIFIED_TIME)
|
||||
|
||||
self.end_headers()
|
||||
|
||||
logger.debug('Sending data')
|
||||
self.wfile.write(self.FEED_DATA)
|
||||
return
|
||||
|
||||
|
||||
class TestHTTPServer(BaseHTTPServer.HTTPServer):
|
||||
"""HTTP Server which counts the number of requests made
|
||||
and can stop based on client instructions.
|
||||
"""
|
||||
|
||||
def __init__(self, applyModifiedHeaders=True, handler=TestHTTPHandler):
|
||||
self.apply_modified_headers = applyModifiedHeaders
|
||||
self.keep_serving = True
|
||||
self.requests = []
|
||||
self.setResponse(200)
|
||||
BaseHTTPServer.HTTPServer.__init__(self, ('', 9999), handler)
|
||||
return
|
||||
|
||||
def setResponse(self, newResponse, newPath=None):
|
||||
"""Sets the response code to use for future requests, and a new
|
||||
path to be used as a redirect target, if necessary.
|
||||
"""
|
||||
self.response = newResponse
|
||||
self.new_path = newPath
|
||||
return
|
||||
|
||||
def getNumRequests(self):
|
||||
"Return the number of requests which have been made on the server."
|
||||
return len(self.requests)
|
||||
|
||||
def stop(self):
|
||||
"Stop serving requests, after the next request."
|
||||
self.keep_serving = False
|
||||
return
|
||||
|
||||
def serve_forever(self):
|
||||
"Main loop for server"
|
||||
while self.keep_serving:
|
||||
self.handle_request()
|
||||
logger.debug('exiting')
|
||||
return
|
||||
|
||||
|
||||
class HTTPTestBase(unittest.TestCase):
|
||||
"Base class for tests that use a TestHTTPServer"
|
||||
|
||||
TEST_URL = 'http://localhost:9999/'
|
||||
|
||||
CACHE_TTL = 0
|
||||
|
||||
def setUp(self):
|
||||
self.server = self.getServer()
|
||||
self.server_thread = threading.Thread(target=self.server.serve_forever)
|
||||
# set daemon flag so the tests don't hang if cleanup fails
|
||||
self.server_thread.setDaemon(True)
|
||||
self.server_thread.start()
|
||||
return
|
||||
|
||||
def getServer(self):
|
||||
"Return a web server for the test."
|
||||
s = TestHTTPServer()
|
||||
s.setResponse(200)
|
||||
return s
|
||||
|
||||
def tearDown(self):
|
||||
# Stop the server thread
|
||||
urllib.urlretrieve(self.TEST_URL + 'shutdown')
|
||||
time.sleep(1)
|
||||
self.server.server_close()
|
||||
self.server_thread.join()
|
||||
return
|
|
@ -1,89 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
#
|
||||
# Copyright 2007 Doug Hellmann.
|
||||
#
|
||||
#
|
||||
# All Rights Reserved
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software and
|
||||
# its documentation for any purpose and without fee is hereby
|
||||
# granted, provided that the above copyright notice appear in all
|
||||
# copies and that both that copyright notice and this permission
|
||||
# notice appear in supporting documentation, and that the name of Doug
|
||||
# Hellmann not be used in advertising or publicity pertaining to
|
||||
# distribution of the software without specific, written prior
|
||||
# permission.
|
||||
#
|
||||
# DOUG HELLMANN DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
|
||||
# INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN
|
||||
# NO EVENT SHALL DOUG HELLMANN BE LIABLE FOR ANY SPECIAL, INDIRECT OR
|
||||
# CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
||||
# NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
||||
# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
#
|
||||
|
||||
"""Tests with shove filesystem storage.
|
||||
|
||||
"""
|
||||
|
||||
__module_id__ = "$Id$"
|
||||
|
||||
#
|
||||
# Import system modules
|
||||
#
|
||||
import os
|
||||
import shove
|
||||
import tempfile
|
||||
import threading
|
||||
import unittest
|
||||
|
||||
#
|
||||
# Import local modules
|
||||
#
|
||||
from cache import Cache
|
||||
from test_server import HTTPTestBase
|
||||
|
||||
#
|
||||
# Module
|
||||
#
|
||||
|
||||
class CacheShoveTest(HTTPTestBase):
|
||||
|
||||
def setUp(self):
|
||||
HTTPTestBase.setUp(self)
|
||||
self.shove_dirname = tempfile.mkdtemp('shove')
|
||||
return
|
||||
|
||||
def tearDown(self):
|
||||
try:
|
||||
os.system('rm -rf %s' % self.storage_dirname)
|
||||
except AttributeError:
|
||||
pass
|
||||
HTTPTestBase.tearDown(self)
|
||||
return
|
||||
|
||||
def test(self):
|
||||
# First fetch the data through the cache
|
||||
storage = shove.Shove('file://' + self.shove_dirname)
|
||||
try:
|
||||
fc = Cache(storage)
|
||||
parsed_data = fc.fetch(self.TEST_URL)
|
||||
self.failUnlessEqual(parsed_data.feed.title, 'CacheTest test data')
|
||||
finally:
|
||||
storage.close()
|
||||
|
||||
# Now retrieve the same data directly from the shelf
|
||||
storage = shove.Shove('file://' + self.shove_dirname)
|
||||
try:
|
||||
modified, shelved_data = storage[self.TEST_URL]
|
||||
finally:
|
||||
storage.close()
|
||||
|
||||
# The data should be the same
|
||||
self.failUnlessEqual(parsed_data, shelved_data)
|
||||
return
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
|
@ -1,30 +0,0 @@
|
|||
Metadata-Version: 1.1
|
||||
Name: feedparser
|
||||
Version: 5.1.3
|
||||
Summary: Universal feed parser, handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds
|
||||
Home-page: http://code.google.com/p/feedparser/
|
||||
Author: Kurt McKee
|
||||
Author-email: contactme@kurtmckee.org
|
||||
License: UNKNOWN
|
||||
Download-URL: http://code.google.com/p/feedparser/
|
||||
Description: UNKNOWN
|
||||
Keywords: atom,cdf,feed,parser,rdf,rss
|
||||
Platform: POSIX
|
||||
Platform: Windows
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: License :: OSI Approved
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Programming Language :: Python :: 2
|
||||
Classifier: Programming Language :: Python :: 2.4
|
||||
Classifier: Programming Language :: Python :: 2.5
|
||||
Classifier: Programming Language :: Python :: 2.6
|
||||
Classifier: Programming Language :: Python :: 2.7
|
||||
Classifier: Programming Language :: Python :: 3
|
||||
Classifier: Programming Language :: Python :: 3.0
|
||||
Classifier: Programming Language :: Python :: 3.1
|
||||
Classifier: Programming Language :: Python :: 3.2
|
||||
Classifier: Programming Language :: Python :: 3.3
|
||||
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
||||
Classifier: Topic :: Text Processing :: Markup :: XML
|
File diff suppressed because it is too large
Load diff
|
@ -1 +0,0 @@
|
|||
|
|
@ -1 +0,0 @@
|
|||
feedparser
|
File diff suppressed because it is too large
Load diff
|
@ -1,859 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
__author__ = "Mark Pilgrim <http://diveintomark.org/>"
|
||||
__license__ = """
|
||||
Copyright (c) 2010-2012 Kurt McKee <contactme@kurtmckee.org>
|
||||
Copyright (c) 2004-2008 Mark Pilgrim
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE."""
|
||||
|
||||
import codecs
|
||||
import datetime
|
||||
import glob
|
||||
import operator
|
||||
import os
|
||||
import posixpath
|
||||
import pprint
|
||||
import re
|
||||
import struct
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
import unittest
|
||||
import urllib
|
||||
import warnings
|
||||
import zlib
|
||||
import BaseHTTPServer
|
||||
import SimpleHTTPServer
|
||||
|
||||
import feedparser
|
||||
|
||||
if not feedparser._XML_AVAILABLE:
|
||||
sys.stderr.write('No XML parsers available, unit testing can not proceed\n')
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# the utf_32 codec was introduced in Python 2.6; it's necessary to
|
||||
# check this as long as feedparser supports Python 2.4 and 2.5
|
||||
codecs.lookup('utf_32')
|
||||
except LookupError:
|
||||
_UTF32_AVAILABLE = False
|
||||
else:
|
||||
_UTF32_AVAILABLE = True
|
||||
|
||||
_s2bytes = feedparser._s2bytes
|
||||
_l2bytes = feedparser._l2bytes
|
||||
|
||||
#---------- custom HTTP server (used to serve test feeds) ----------
|
||||
|
||||
_PORT = 8097 # not really configurable, must match hardcoded port in tests
|
||||
_HOST = '127.0.0.1' # also not really configurable
|
||||
|
||||
class FeedParserTestRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
|
||||
headers_re = re.compile(_s2bytes(r"^Header:\s+([^:]+):(.+)$"), re.MULTILINE)
|
||||
|
||||
def send_head(self):
|
||||
"""Send custom headers defined in test case
|
||||
|
||||
Example:
|
||||
<!--
|
||||
Header: Content-type: application/atom+xml
|
||||
Header: X-Foo: bar
|
||||
-->
|
||||
"""
|
||||
# Short-circuit the HTTP status test `test_redirect_to_304()`
|
||||
if self.path == '/-/return-304.xml':
|
||||
self.send_response(304)
|
||||
self.send_header('Content-type', 'text/xml')
|
||||
self.end_headers()
|
||||
return feedparser._StringIO(u''.encode('utf-8'))
|
||||
path = self.translate_path(self.path)
|
||||
# the compression tests' filenames determine the header sent
|
||||
if self.path.startswith('/tests/compression'):
|
||||
if self.path.endswith('gz'):
|
||||
headers = {'Content-Encoding': 'gzip'}
|
||||
else:
|
||||
headers = {'Content-Encoding': 'deflate'}
|
||||
headers['Content-type'] = 'application/xml'
|
||||
else:
|
||||
headers = dict([(k.decode('utf-8'), v.decode('utf-8').strip()) for k, v in self.headers_re.findall(open(path, 'rb').read())])
|
||||
f = open(path, 'rb')
|
||||
if (self.headers.get('if-modified-since') == headers.get('Last-Modified', 'nom')) \
|
||||
or (self.headers.get('if-none-match') == headers.get('ETag', 'nomatch')):
|
||||
status = 304
|
||||
else:
|
||||
status = 200
|
||||
headers.setdefault('Status', status)
|
||||
self.send_response(int(headers['Status']))
|
||||
headers.setdefault('Content-type', self.guess_type(path))
|
||||
self.send_header("Content-type", headers['Content-type'])
|
||||
self.send_header("Content-Length", str(os.stat(f.name)[6]))
|
||||
for k, v in headers.items():
|
||||
if k not in ('Status', 'Content-type'):
|
||||
self.send_header(k, v)
|
||||
self.end_headers()
|
||||
return f
|
||||
|
||||
def log_request(self, *args):
|
||||
pass
|
||||
|
||||
class FeedParserTestServer(threading.Thread):
|
||||
"""HTTP Server that runs in a thread and handles a predetermined number of requests"""
|
||||
|
||||
def __init__(self, requests):
|
||||
threading.Thread.__init__(self)
|
||||
self.requests = requests
|
||||
self.ready = threading.Event()
|
||||
|
||||
def run(self):
|
||||
self.httpd = BaseHTTPServer.HTTPServer((_HOST, _PORT), FeedParserTestRequestHandler)
|
||||
self.ready.set()
|
||||
while self.requests:
|
||||
self.httpd.handle_request()
|
||||
self.requests -= 1
|
||||
self.ready.clear()
|
||||
|
||||
#---------- dummy test case class (test methods are added dynamically) ----------
|
||||
unicode1_re = re.compile(_s2bytes(" u'"))
|
||||
unicode2_re = re.compile(_s2bytes(' u"'))
|
||||
|
||||
# _bytes is only used in everythingIsUnicode().
|
||||
# In Python 2 it's str, and in Python 3 it's bytes.
|
||||
_bytes = type(_s2bytes(''))
|
||||
|
||||
def everythingIsUnicode(d):
|
||||
"""Takes a dictionary, recursively verifies that every value is unicode"""
|
||||
for k, v in d.iteritems():
|
||||
if isinstance(v, dict) and k != 'headers':
|
||||
if not everythingIsUnicode(v):
|
||||
return False
|
||||
elif isinstance(v, list):
|
||||
for i in v:
|
||||
if isinstance(i, dict) and not everythingIsUnicode(i):
|
||||
return False
|
||||
elif isinstance(i, _bytes):
|
||||
return False
|
||||
elif isinstance(v, _bytes):
|
||||
return False
|
||||
return True
|
||||
|
||||
def failUnlessEval(self, xmlfile, evalString, msg=None):
|
||||
"""Fail unless eval(evalString, env)"""
|
||||
env = feedparser.parse(xmlfile)
|
||||
try:
|
||||
if not eval(evalString, globals(), env):
|
||||
failure=(msg or 'not eval(%s) \nWITH env(%s)' % (evalString, pprint.pformat(env)))
|
||||
raise self.failureException, failure
|
||||
if not everythingIsUnicode(env):
|
||||
raise self.failureException, "not everything is unicode \nWITH env(%s)" % (pprint.pformat(env), )
|
||||
except SyntaxError:
|
||||
# Python 3 doesn't have the `u""` syntax, so evalString needs to be modified,
|
||||
# which will require the failure message to be updated
|
||||
evalString = re.sub(unicode1_re, _s2bytes(" '"), evalString)
|
||||
evalString = re.sub(unicode2_re, _s2bytes(' "'), evalString)
|
||||
if not eval(evalString, globals(), env):
|
||||
failure=(msg or 'not eval(%s) \nWITH env(%s)' % (evalString, pprint.pformat(env)))
|
||||
raise self.failureException, failure
|
||||
|
||||
class BaseTestCase(unittest.TestCase):
|
||||
failUnlessEval = failUnlessEval
|
||||
|
||||
class TestCase(BaseTestCase):
|
||||
pass
|
||||
|
||||
class TestTemporaryFallbackBehavior(unittest.TestCase):
|
||||
"These tests are temporarily here because of issues 310 and 328"
|
||||
def test_issue_328_fallback_behavior(self):
|
||||
warnings.filterwarnings('error')
|
||||
|
||||
d = feedparser.FeedParserDict()
|
||||
d['published'] = u'pub string'
|
||||
d['published_parsed'] = u'pub tuple'
|
||||
d['updated'] = u'upd string'
|
||||
d['updated_parsed'] = u'upd tuple'
|
||||
# Ensure that `updated` doesn't map to `published` when it exists
|
||||
self.assertTrue('published' in d)
|
||||
self.assertTrue('published_parsed' in d)
|
||||
self.assertTrue('updated' in d)
|
||||
self.assertTrue('updated_parsed' in d)
|
||||
self.assertEqual(d['published'], 'pub string')
|
||||
self.assertEqual(d['published_parsed'], 'pub tuple')
|
||||
self.assertEqual(d['updated'], 'upd string')
|
||||
self.assertEqual(d['updated_parsed'], 'upd tuple')
|
||||
|
||||
d = feedparser.FeedParserDict()
|
||||
d['published'] = u'pub string'
|
||||
d['published_parsed'] = u'pub tuple'
|
||||
# Ensure that `updated` doesn't actually exist
|
||||
self.assertTrue('updated' not in d)
|
||||
self.assertTrue('updated_parsed' not in d)
|
||||
# Ensure that accessing `updated` throws a DeprecationWarning
|
||||
try:
|
||||
d['updated']
|
||||
except DeprecationWarning:
|
||||
# Expected behavior
|
||||
pass
|
||||
else:
|
||||
# Wrong behavior
|
||||
self.assertEqual(True, False)
|
||||
try:
|
||||
d['updated_parsed']
|
||||
except DeprecationWarning:
|
||||
# Expected behavior
|
||||
pass
|
||||
else:
|
||||
# Wrong behavior
|
||||
self.assertEqual(True, False)
|
||||
# Ensure that `updated` maps to `published`
|
||||
warnings.filterwarnings('ignore')
|
||||
self.assertEqual(d['updated'], u'pub string')
|
||||
self.assertEqual(d['updated_parsed'], u'pub tuple')
|
||||
warnings.resetwarnings()
|
||||
|
||||
|
||||
class TestEverythingIsUnicode(unittest.TestCase):
|
||||
"Ensure that `everythingIsUnicode()` is working appropriately"
|
||||
def test_everything_is_unicode(self):
|
||||
self.assertTrue(everythingIsUnicode(
|
||||
{'a': u'a', 'b': [u'b', {'c': u'c'}], 'd': {'e': u'e'}}
|
||||
))
|
||||
def test_not_everything_is_unicode(self):
|
||||
self.assertFalse(everythingIsUnicode({'a': _s2bytes('a')}))
|
||||
self.assertFalse(everythingIsUnicode({'a': [_s2bytes('a')]}))
|
||||
self.assertFalse(everythingIsUnicode({'a': {'b': _s2bytes('b')}}))
|
||||
self.assertFalse(everythingIsUnicode({'a': [{'b': _s2bytes('b')}]}))
|
||||
|
||||
class TestLooseParser(BaseTestCase):
|
||||
"Test the sgmllib-based parser by manipulating feedparser " \
|
||||
"into believing no XML parsers are installed"
|
||||
def __init__(self, arg):
|
||||
unittest.TestCase.__init__(self, arg)
|
||||
self._xml_available = feedparser._XML_AVAILABLE
|
||||
def setUp(self):
|
||||
feedparser._XML_AVAILABLE = 0
|
||||
def tearDown(self):
|
||||
feedparser._XML_AVAILABLE = self._xml_available
|
||||
|
||||
class TestStrictParser(BaseTestCase):
|
||||
pass
|
||||
|
||||
class TestMicroformats(BaseTestCase):
|
||||
pass
|
||||
|
||||
class TestEncodings(BaseTestCase):
|
||||
def test_doctype_replacement(self):
|
||||
"Ensure that non-ASCII-compatible encodings don't hide " \
|
||||
"disallowed ENTITY declarations"
|
||||
doc = """<?xml version="1.0" encoding="utf-16be"?>
|
||||
<!DOCTYPE feed [
|
||||
<!ENTITY exponential1 "bogus ">
|
||||
<!ENTITY exponential2 "&exponential1;&exponential1;">
|
||||
<!ENTITY exponential3 "&exponential2;&exponential2;">
|
||||
]>
|
||||
<feed><title type="html">&exponential3;</title></feed>"""
|
||||
doc = codecs.BOM_UTF16_BE + doc.encode('utf-16be')
|
||||
result = feedparser.parse(doc)
|
||||
self.assertEqual(result['feed']['title'], u'&exponential3')
|
||||
def test_gb2312_converted_to_gb18030_in_xml_encoding(self):
|
||||
# \u55de was chosen because it exists in gb18030 but not gb2312
|
||||
feed = u'''<?xml version="1.0" encoding="gb2312"?>
|
||||
<feed><title>\u55de</title></feed>'''
|
||||
result = feedparser.parse(feed.encode('gb18030'), response_headers={
|
||||
'Content-Type': 'text/xml'
|
||||
})
|
||||
self.assertEqual(result.encoding, 'gb18030')
|
||||
|
||||
class TestFeedParserDict(unittest.TestCase):
|
||||
"Ensure that FeedParserDict returns values as expected and won't crash"
|
||||
def setUp(self):
|
||||
self.d = feedparser.FeedParserDict()
|
||||
def _check_key(self, k):
|
||||
self.assertTrue(k in self.d)
|
||||
self.assertTrue(hasattr(self.d, k))
|
||||
self.assertEqual(self.d[k], 1)
|
||||
self.assertEqual(getattr(self.d, k), 1)
|
||||
def _check_no_key(self, k):
|
||||
self.assertTrue(k not in self.d)
|
||||
self.assertTrue(not hasattr(self.d, k))
|
||||
def test_empty(self):
|
||||
keys = (
|
||||
'a','entries', 'id', 'guid', 'summary', 'subtitle', 'description',
|
||||
'category', 'enclosures', 'license', 'categories',
|
||||
)
|
||||
for k in keys:
|
||||
self._check_no_key(k)
|
||||
self.assertTrue('items' not in self.d)
|
||||
self.assertTrue(hasattr(self.d, 'items')) # dict.items() exists
|
||||
def test_neutral(self):
|
||||
self.d['a'] = 1
|
||||
self._check_key('a')
|
||||
def test_single_mapping_target_1(self):
|
||||
self.d['id'] = 1
|
||||
self._check_key('id')
|
||||
self._check_key('guid')
|
||||
def test_single_mapping_target_2(self):
|
||||
self.d['guid'] = 1
|
||||
self._check_key('id')
|
||||
self._check_key('guid')
|
||||
def test_multiple_mapping_target_1(self):
|
||||
self.d['summary'] = 1
|
||||
self._check_key('summary')
|
||||
self._check_key('description')
|
||||
def test_multiple_mapping_target_2(self):
|
||||
self.d['subtitle'] = 1
|
||||
self._check_key('subtitle')
|
||||
self._check_key('description')
|
||||
def test_multiple_mapping_mapped_key(self):
|
||||
self.d['description'] = 1
|
||||
self._check_key('summary')
|
||||
self._check_key('description')
|
||||
def test_license(self):
|
||||
self.d['links'] = []
|
||||
try:
|
||||
self.d['license']
|
||||
self.assertTrue(False)
|
||||
except KeyError:
|
||||
pass
|
||||
self.d['links'].append({'rel': 'license'})
|
||||
try:
|
||||
self.d['license']
|
||||
self.assertTrue(False)
|
||||
except KeyError:
|
||||
pass
|
||||
self.d['links'].append({'rel': 'license', 'href': 'http://dom.test/'})
|
||||
self.assertEqual(self.d['license'], 'http://dom.test/')
|
||||
def test_category(self):
|
||||
self.d['tags'] = []
|
||||
try:
|
||||
self.d['category']
|
||||
self.assertTrue(False)
|
||||
except KeyError:
|
||||
pass
|
||||
self.d['tags'] = [{}]
|
||||
try:
|
||||
self.d['category']
|
||||
self.assertTrue(False)
|
||||
except KeyError:
|
||||
pass
|
||||
self.d['tags'] = [{'term': 'cat'}]
|
||||
self.assertEqual(self.d['category'], 'cat')
|
||||
self.d['tags'].append({'term': 'dog'})
|
||||
self.assertEqual(self.d['category'], 'cat')
|
||||
|
||||
class TestOpenResource(unittest.TestCase):
|
||||
"Ensure that `_open_resource()` interprets its arguments as URIs, " \
|
||||
"file-like objects, or in-memory feeds as expected"
|
||||
def test_fileobj(self):
|
||||
r = feedparser._open_resource(sys.stdin, '', '', '', '', [], {})
|
||||
self.assertTrue(r is sys.stdin)
|
||||
def test_feed(self):
|
||||
f = feedparser.parse(u'feed://localhost:8097/tests/http/target.xml')
|
||||
self.assertEqual(f.href, u'http://localhost:8097/tests/http/target.xml')
|
||||
def test_feed_http(self):
|
||||
f = feedparser.parse(u'feed:http://localhost:8097/tests/http/target.xml')
|
||||
self.assertEqual(f.href, u'http://localhost:8097/tests/http/target.xml')
|
||||
def test_bytes(self):
|
||||
s = '<feed><item><title>text</title></item></feed>'.encode('utf-8')
|
||||
r = feedparser._open_resource(s, '', '', '', '', [], {})
|
||||
self.assertEqual(s, r.read())
|
||||
def test_string(self):
|
||||
s = '<feed><item><title>text</title></item></feed>'
|
||||
r = feedparser._open_resource(s, '', '', '', '', [], {})
|
||||
self.assertEqual(s.encode('utf-8'), r.read())
|
||||
def test_unicode_1(self):
|
||||
s = u'<feed><item><title>text</title></item></feed>'
|
||||
r = feedparser._open_resource(s, '', '', '', '', [], {})
|
||||
self.assertEqual(s.encode('utf-8'), r.read())
|
||||
def test_unicode_2(self):
|
||||
s = u'<feed><item><title>t\u00e9xt</title></item></feed>'
|
||||
r = feedparser._open_resource(s, '', '', '', '', [], {})
|
||||
self.assertEqual(s.encode('utf-8'), r.read())
|
||||
|
||||
class TestMakeSafeAbsoluteURI(unittest.TestCase):
|
||||
"Exercise the URI joining and sanitization code"
|
||||
base = u'http://d.test/d/f.ext'
|
||||
def _mktest(rel, expect, doc):
|
||||
def fn(self):
|
||||
value = feedparser._makeSafeAbsoluteURI(self.base, rel)
|
||||
self.assertEqual(value, expect)
|
||||
fn.__doc__ = doc
|
||||
return fn
|
||||
|
||||
# make the test cases; the call signature is:
|
||||
# (relative_url, expected_return_value, test_doc_string)
|
||||
test_abs = _mktest(u'https://s.test/', u'https://s.test/', 'absolute uri')
|
||||
test_rel = _mktest(u'/new', u'http://d.test/new', 'relative uri')
|
||||
test_bad = _mktest(u'x://bad.test/', u'', 'unacceptable uri protocol')
|
||||
test_mag = _mktest(u'magnet:?xt=a', u'magnet:?xt=a', 'magnet uri')
|
||||
|
||||
def test_catch_ValueError(self):
|
||||
'catch ValueError in Python 2.7 and up'
|
||||
uri = u'http://bad]test/'
|
||||
value1 = feedparser._makeSafeAbsoluteURI(uri)
|
||||
value2 = feedparser._makeSafeAbsoluteURI(self.base, uri)
|
||||
swap = feedparser.ACCEPTABLE_URI_SCHEMES
|
||||
feedparser.ACCEPTABLE_URI_SCHEMES = ()
|
||||
value3 = feedparser._makeSafeAbsoluteURI(self.base, uri)
|
||||
feedparser.ACCEPTABLE_URI_SCHEMES = swap
|
||||
# Only Python 2.7 and up throw a ValueError, otherwise uri is returned
|
||||
self.assertTrue(value1 in (uri, u''))
|
||||
self.assertTrue(value2 in (uri, u''))
|
||||
self.assertTrue(value3 in (uri, u''))
|
||||
|
||||
class TestConvertToIdn(unittest.TestCase):
|
||||
"Test IDN support (unavailable in Jython as of Jython 2.5.2)"
|
||||
# this is the greek test domain
|
||||
hostname = u'\u03c0\u03b1\u03c1\u03ac\u03b4\u03b5\u03b9\u03b3\u03bc\u03b1'
|
||||
hostname += u'.\u03b4\u03bf\u03ba\u03b9\u03bc\u03ae'
|
||||
def test_control(self):
|
||||
r = feedparser._convert_to_idn(u'http://example.test/')
|
||||
self.assertEqual(r, u'http://example.test/')
|
||||
def test_idn(self):
|
||||
r = feedparser._convert_to_idn(u'http://%s/' % (self.hostname,))
|
||||
self.assertEqual(r, u'http://xn--hxajbheg2az3al.xn--jxalpdlp/')
|
||||
def test_port(self):
|
||||
r = feedparser._convert_to_idn(u'http://%s:8080/' % (self.hostname,))
|
||||
self.assertEqual(r, u'http://xn--hxajbheg2az3al.xn--jxalpdlp:8080/')
|
||||
|
||||
class TestCompression(unittest.TestCase):
|
||||
"Test the gzip and deflate support in the HTTP code"
|
||||
def test_gzip_good(self):
|
||||
f = feedparser.parse('http://localhost:8097/tests/compression/gzip.gz')
|
||||
self.assertEqual(f.version, 'atom10')
|
||||
def test_gzip_not_compressed(self):
|
||||
f = feedparser.parse('http://localhost:8097/tests/compression/gzip-not-compressed.gz')
|
||||
self.assertEqual(f.bozo, 1)
|
||||
self.assertTrue(isinstance(f.bozo_exception, IOError))
|
||||
self.assertEqual(f['feed']['title'], 'gzip')
|
||||
def test_gzip_struct_error(self):
|
||||
f = feedparser.parse('http://localhost:8097/tests/compression/gzip-struct-error.gz')
|
||||
self.assertEqual(f.bozo, 1)
|
||||
self.assertTrue(isinstance(f.bozo_exception, struct.error))
|
||||
def test_zlib_good(self):
|
||||
f = feedparser.parse('http://localhost:8097/tests/compression/deflate.z')
|
||||
self.assertEqual(f.version, 'atom10')
|
||||
def test_zlib_no_headers(self):
|
||||
f = feedparser.parse('http://localhost:8097/tests/compression/deflate-no-headers.z')
|
||||
self.assertEqual(f.version, 'atom10')
|
||||
def test_zlib_not_compressed(self):
|
||||
f = feedparser.parse('http://localhost:8097/tests/compression/deflate-not-compressed.z')
|
||||
self.assertEqual(f.bozo, 1)
|
||||
self.assertTrue(isinstance(f.bozo_exception, zlib.error))
|
||||
self.assertEqual(f['feed']['title'], 'deflate')
|
||||
|
||||
class TestHTTPStatus(unittest.TestCase):
|
||||
"Test HTTP redirection and other status codes"
|
||||
def test_301(self):
|
||||
f = feedparser.parse('http://localhost:8097/tests/http/http_status_301.xml')
|
||||
self.assertEqual(f.status, 301)
|
||||
self.assertEqual(f.href, 'http://localhost:8097/tests/http/target.xml')
|
||||
self.assertEqual(f.entries[0].title, 'target')
|
||||
def test_302(self):
|
||||
f = feedparser.parse('http://localhost:8097/tests/http/http_status_302.xml')
|
||||
self.assertEqual(f.status, 302)
|
||||
self.assertEqual(f.href, 'http://localhost:8097/tests/http/target.xml')
|
||||
self.assertEqual(f.entries[0].title, 'target')
|
||||
def test_303(self):
|
||||
f = feedparser.parse('http://localhost:8097/tests/http/http_status_303.xml')
|
||||
self.assertEqual(f.status, 303)
|
||||
self.assertEqual(f.href, 'http://localhost:8097/tests/http/target.xml')
|
||||
self.assertEqual(f.entries[0].title, 'target')
|
||||
def test_307(self):
|
||||
f = feedparser.parse('http://localhost:8097/tests/http/http_status_307.xml')
|
||||
self.assertEqual(f.status, 307)
|
||||
self.assertEqual(f.href, 'http://localhost:8097/tests/http/target.xml')
|
||||
self.assertEqual(f.entries[0].title, 'target')
|
||||
def test_304(self):
|
||||
# first retrieve the url
|
||||
u = 'http://localhost:8097/tests/http/http_status_304.xml'
|
||||
f = feedparser.parse(u)
|
||||
self.assertEqual(f.status, 200)
|
||||
self.assertEqual(f.entries[0].title, 'title 304')
|
||||
# extract the etag and last-modified headers
|
||||
e = [v for k, v in f.headers.items() if k.lower() == 'etag'][0]
|
||||
mh = [v for k, v in f.headers.items() if k.lower() == 'last-modified'][0]
|
||||
ms = f.updated
|
||||
mt = f.updated_parsed
|
||||
md = datetime.datetime(*mt[0:7])
|
||||
self.assertTrue(isinstance(mh, basestring))
|
||||
self.assertTrue(isinstance(ms, basestring))
|
||||
self.assertTrue(isinstance(mt, time.struct_time))
|
||||
self.assertTrue(isinstance(md, datetime.datetime))
|
||||
# test that sending back the etag results in a 304
|
||||
f = feedparser.parse(u, etag=e)
|
||||
self.assertEqual(f.status, 304)
|
||||
# test that sending back last-modified (string) results in a 304
|
||||
f = feedparser.parse(u, modified=ms)
|
||||
self.assertEqual(f.status, 304)
|
||||
# test that sending back last-modified (9-tuple) results in a 304
|
||||
f = feedparser.parse(u, modified=mt)
|
||||
self.assertEqual(f.status, 304)
|
||||
# test that sending back last-modified (datetime) results in a 304
|
||||
f = feedparser.parse(u, modified=md)
|
||||
self.assertEqual(f.status, 304)
|
||||
def test_404(self):
|
||||
f = feedparser.parse('http://localhost:8097/tests/http/http_status_404.xml')
|
||||
self.assertEqual(f.status, 404)
|
||||
def test_9001(self):
|
||||
f = feedparser.parse('http://localhost:8097/tests/http/http_status_9001.xml')
|
||||
self.assertEqual(f.bozo, 1)
|
||||
def test_redirect_to_304(self):
|
||||
# ensure that an http redirect to an http 304 doesn't
|
||||
# trigger a bozo_exception
|
||||
u = 'http://localhost:8097/tests/http/http_redirect_to_304.xml'
|
||||
f = feedparser.parse(u)
|
||||
self.assertTrue(f.bozo == 0)
|
||||
self.assertTrue(f.status == 302)
|
||||
|
||||
class TestDateParsers(unittest.TestCase):
|
||||
"Test the various date parsers; most of the test cases are constructed " \
|
||||
"dynamically based on the contents of the `date_tests` dict, below"
|
||||
def test_None(self):
|
||||
self.assertTrue(feedparser._parse_date(None) is None)
|
||||
def _check_date(self, func, dtstring, dttuple):
|
||||
try:
|
||||
tup = func(dtstring)
|
||||
except (OverflowError, ValueError):
|
||||
tup = None
|
||||
self.assertEqual(tup, dttuple)
|
||||
self.assertEqual(tup, feedparser._parse_date(dtstring))
|
||||
def test_year_10000_date(self):
|
||||
# On some systems this date string will trigger an OverflowError.
|
||||
# On Jython and x64 systems, however, it's interpreted just fine.
|
||||
try:
|
||||
date = feedparser._parse_date_rfc822(u'Sun, 31 Dec 9999 23:59:59 -9999')
|
||||
except OverflowError:
|
||||
date = None
|
||||
self.assertTrue(date in (None, (10000, 1, 5, 4, 38, 59, 2, 5, 0)))
|
||||
|
||||
date_tests = {
|
||||
feedparser._parse_date_greek: (
|
||||
(u'', None), # empty string
|
||||
(u'\u039a\u03c5\u03c1, 11 \u0399\u03bf\u03cd\u03bb 2004 12:00:00 EST', (2004, 7, 11, 17, 0, 0, 6, 193, 0)),
|
||||
),
|
||||
feedparser._parse_date_hungarian: (
|
||||
(u'', None), # empty string
|
||||
(u'2004-j\u00falius-13T9:15-05:00', (2004, 7, 13, 14, 15, 0, 1, 195, 0)),
|
||||
),
|
||||
feedparser._parse_date_iso8601: (
|
||||
(u'', None), # empty string
|
||||
(u'-0312', (2003, 12, 1, 0, 0, 0, 0, 335, 0)), # 2-digit year/month only variant
|
||||
(u'031231', (2003, 12, 31, 0, 0, 0, 2, 365, 0)), # 2-digit year/month/day only, no hyphens
|
||||
(u'03-12-31', (2003, 12, 31, 0, 0, 0, 2, 365, 0)), # 2-digit year/month/day only
|
||||
(u'-03-12', (2003, 12, 1, 0, 0, 0, 0, 335, 0)), # 2-digit year/month only
|
||||
(u'03335', (2003, 12, 1, 0, 0, 0, 0, 335, 0)), # 2-digit year/ordinal, no hyphens
|
||||
(u'2003-12-31T10:14:55.1234Z', (2003, 12, 31, 10, 14, 55, 2, 365, 0)), # fractional seconds
|
||||
# Special case for Google's extra zero in the month
|
||||
(u'2003-012-31T10:14:55+00:00', (2003, 12, 31, 10, 14, 55, 2, 365, 0)),
|
||||
),
|
||||
feedparser._parse_date_nate: (
|
||||
(u'', None), # empty string
|
||||
(u'2004-05-25 \uc624\ud6c4 11:23:17', (2004, 5, 25, 14, 23, 17, 1, 146, 0)),
|
||||
),
|
||||
feedparser._parse_date_onblog: (
|
||||
(u'', None), # empty string
|
||||
(u'2004\ub144 05\uc6d4 28\uc77c 01:31:15', (2004, 5, 27, 16, 31, 15, 3, 148, 0)),
|
||||
),
|
||||
feedparser._parse_date_perforce: (
|
||||
(u'', None), # empty string
|
||||
(u'Fri, 2006/09/15 08:19:53 EDT', (2006, 9, 15, 12, 19, 53, 4, 258, 0)),
|
||||
),
|
||||
feedparser._parse_date_rfc822: (
|
||||
(u'', None), # empty string
|
||||
(u'Thu, 01 Jan 0100 00:00:01 +0100', (99, 12, 31, 23, 0, 1, 3, 365, 0)), # ancient date
|
||||
(u'Thu, 01 Jan 04 19:48:21 GMT', (2004, 1, 1, 19, 48, 21, 3, 1, 0)), # 2-digit year
|
||||
(u'Thu, 01 Jan 2004 19:48:21 GMT', (2004, 1, 1, 19, 48, 21, 3, 1, 0)), # 4-digit year
|
||||
(u'Thu, 5 Apr 2012 10:00:00 GMT', (2012, 4, 5, 10, 0, 0, 3, 96, 0)), # 1-digit day
|
||||
(u'Wed, 19 Aug 2009 18:28:00 Etc/GMT', (2009, 8, 19, 18, 28, 0, 2, 231, 0)), # etc/gmt timezone
|
||||
(u'Wed, 19 Feb 2012 22:40:00 GMT-01:01', (2012, 2, 19, 23, 41, 0, 6, 50, 0)), # gmt+hh:mm timezone
|
||||
(u'Mon, 13 Feb, 2012 06:28:00 UTC', (2012, 2, 13, 6, 28, 0, 0, 44, 0)), # extraneous comma
|
||||
(u'Thu, 01 Jan 2004 00:00 GMT', (2004, 1, 1, 0, 0, 0, 3, 1, 0)), # no seconds
|
||||
(u'Thu, 01 Jan 2004', (2004, 1, 1, 0, 0, 0, 3, 1, 0)), # no time
|
||||
# Additional tests to handle Disney's long month names and invalid timezones
|
||||
(u'Mon, 26 January 2004 16:31:00 AT', (2004, 1, 26, 20, 31, 0, 0, 26, 0)),
|
||||
(u'Mon, 26 January 2004 16:31:00 ET', (2004, 1, 26, 21, 31, 0, 0, 26, 0)),
|
||||
(u'Mon, 26 January 2004 16:31:00 CT', (2004, 1, 26, 22, 31, 0, 0, 26, 0)),
|
||||
(u'Mon, 26 January 2004 16:31:00 MT', (2004, 1, 26, 23, 31, 0, 0, 26, 0)),
|
||||
(u'Mon, 26 January 2004 16:31:00 PT', (2004, 1, 27, 0, 31, 0, 1, 27, 0)),
|
||||
),
|
||||
feedparser._parse_date_rfc822_grubby: (
|
||||
(u'Thu Aug 30 2012 17:26:16 +0200', (2012, 8, 30, 15, 26, 16, 3, 243, 0)),
|
||||
),
|
||||
feedparser._parse_date_asctime: (
|
||||
(u'Sun Jan 4 16:29:06 2004', (2004, 1, 4, 16, 29, 6, 6, 4, 0)),
|
||||
),
|
||||
feedparser._parse_date_w3dtf: (
|
||||
(u'', None), # empty string
|
||||
(u'2003-12-31T10:14:55Z', (2003, 12, 31, 10, 14, 55, 2, 365, 0)), # UTC
|
||||
(u'2003-12-31T10:14:55-08:00', (2003, 12, 31, 18, 14, 55, 2, 365, 0)), # San Francisco timezone
|
||||
(u'2003-12-31T18:14:55+08:00', (2003, 12, 31, 10, 14, 55, 2, 365, 0)), # Tokyo timezone
|
||||
(u'2007-04-23T23:25:47.538+10:00', (2007, 4, 23, 13, 25, 47, 0, 113, 0)), # fractional seconds
|
||||
(u'2003-12-31', (2003, 12, 31, 0, 0, 0, 2, 365, 0)), # year/month/day only
|
||||
(u'20031231', (2003, 12, 31, 0, 0, 0, 2, 365, 0)), # year/month/day only, no hyphens
|
||||
(u'2003-12', (2003, 12, 1, 0, 0, 0, 0, 335, 0)), # year/month only
|
||||
(u'2003', (2003, 1, 1, 0, 0, 0, 2, 1, 0)), # year only
|
||||
# MSSQL-style dates
|
||||
(u'2004-07-08 23:56:58 -00:20', (2004, 7, 9, 0, 16, 58, 4, 191, 0)), # with timezone
|
||||
(u'2004-07-08 23:56:58', (2004, 7, 8, 23, 56, 58, 3, 190, 0)), # without timezone
|
||||
(u'2004-07-08 23:56:58.0', (2004, 7, 8, 23, 56, 58, 3, 190, 0)), # with fractional second
|
||||
# Special cases for out-of-range times
|
||||
(u'2003-12-31T25:14:55Z', (2004, 1, 1, 1, 14, 55, 3, 1, 0)), # invalid (25 hours)
|
||||
(u'2003-12-31T10:61:55Z', (2003, 12, 31, 11, 1, 55, 2, 365, 0)), # invalid (61 minutes)
|
||||
(u'2003-12-31T10:14:61Z', (2003, 12, 31, 10, 15, 1, 2, 365, 0)), # invalid (61 seconds)
|
||||
# Special cases for rollovers in leap years
|
||||
(u'2004-02-28T18:14:55-08:00', (2004, 2, 29, 2, 14, 55, 6, 60, 0)), # feb 28 in leap year
|
||||
(u'2003-02-28T18:14:55-08:00', (2003, 3, 1, 2, 14, 55, 5, 60, 0)), # feb 28 in non-leap year
|
||||
(u'2000-02-28T18:14:55-08:00', (2000, 2, 29, 2, 14, 55, 1, 60, 0)), # feb 28 in leap year on century divisible by 400
|
||||
)
|
||||
}
|
||||
|
||||
def make_date_test(f, s, t):
|
||||
return lambda self: self._check_date(f, s, t)
|
||||
|
||||
for func, items in date_tests.iteritems():
|
||||
for i, (dtstring, dttuple) in enumerate(items):
|
||||
uniqfunc = make_date_test(func, dtstring, dttuple)
|
||||
setattr(TestDateParsers, 'test_%s_%02i' % (func.__name__, i), uniqfunc)
|
||||
|
||||
|
||||
class TestHTMLGuessing(unittest.TestCase):
|
||||
"Exercise the HTML sniffing code"
|
||||
def _mktest(text, expect, doc):
|
||||
def fn(self):
|
||||
value = bool(feedparser._FeedParserMixin.lookslikehtml(text))
|
||||
self.assertEqual(value, expect)
|
||||
fn.__doc__ = doc
|
||||
return fn
|
||||
|
||||
test_text_1 = _mktest(u'plain text', False, u'plain text')
|
||||
test_text_2 = _mktest(u'2 < 3', False, u'plain text with angle bracket')
|
||||
test_html_1 = _mktest(u'<a href="">a</a>', True, u'anchor tag')
|
||||
test_html_2 = _mktest(u'<i>i</i>', True, u'italics tag')
|
||||
test_html_3 = _mktest(u'<b>b</b>', True, u'bold tag')
|
||||
test_html_4 = _mktest(u'<code>', False, u'allowed tag, no end tag')
|
||||
test_html_5 = _mktest(u'<rss> .. </rss>', False, u'disallowed tag')
|
||||
test_entity_1 = _mktest(u'AT&T', False, u'corporation name')
|
||||
test_entity_2 = _mktest(u'©', True, u'named entity reference')
|
||||
test_entity_3 = _mktest(u'©', True, u'numeric entity reference')
|
||||
test_entity_4 = _mktest(u'©', True, u'hex numeric entity reference')
|
||||
|
||||
#---------- additional api unit tests, not backed by files
|
||||
|
||||
class TestBuildRequest(unittest.TestCase):
|
||||
"Test that HTTP request objects are created as expected"
|
||||
def test_extra_headers(self):
|
||||
"""You can pass in extra headers and they go into the request object."""
|
||||
|
||||
request = feedparser._build_urllib2_request(
|
||||
'http://example.com/feed',
|
||||
'agent-name',
|
||||
None, None, None, None,
|
||||
{'Cache-Control': 'max-age=0'})
|
||||
# nb, urllib2 folds the case of the headers
|
||||
self.assertEqual(
|
||||
request.get_header('Cache-control'), 'max-age=0')
|
||||
|
||||
|
||||
class TestLxmlBug(unittest.TestCase):
|
||||
def test_lxml_etree_bug(self):
|
||||
try:
|
||||
import lxml.etree
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
doc = u"<feed>&illformed_charref</feed>".encode('utf8')
|
||||
# Importing lxml.etree currently causes libxml2 to
|
||||
# throw SAXException instead of SAXParseException.
|
||||
feedparser.parse(feedparser._StringIO(doc))
|
||||
self.assertTrue(True)
|
||||
|
||||
#---------- parse test files and create test methods ----------
|
||||
def convert_to_utf8(data):
|
||||
"Identify data's encoding using its byte order mark" \
|
||||
"and convert it to its utf-8 equivalent"
|
||||
if data[:4] == _l2bytes([0x4c, 0x6f, 0xa7, 0x94]):
|
||||
return data.decode('cp037').encode('utf-8')
|
||||
elif data[:4] == _l2bytes([0x00, 0x00, 0xfe, 0xff]):
|
||||
if not _UTF32_AVAILABLE:
|
||||
return None
|
||||
return data.decode('utf-32be').encode('utf-8')
|
||||
elif data[:4] == _l2bytes([0xff, 0xfe, 0x00, 0x00]):
|
||||
if not _UTF32_AVAILABLE:
|
||||
return None
|
||||
return data.decode('utf-32le').encode('utf-8')
|
||||
elif data[:4] == _l2bytes([0x00, 0x00, 0x00, 0x3c]):
|
||||
if not _UTF32_AVAILABLE:
|
||||
return None
|
||||
return data.decode('utf-32be').encode('utf-8')
|
||||
elif data[:4] == _l2bytes([0x3c, 0x00, 0x00, 0x00]):
|
||||
if not _UTF32_AVAILABLE:
|
||||
return None
|
||||
return data.decode('utf-32le').encode('utf-8')
|
||||
elif data[:4] == _l2bytes([0x00, 0x3c, 0x00, 0x3f]):
|
||||
return data.decode('utf-16be').encode('utf-8')
|
||||
elif data[:4] == _l2bytes([0x3c, 0x00, 0x3f, 0x00]):
|
||||
return data.decode('utf-16le').encode('utf-8')
|
||||
elif (data[:2] == _l2bytes([0xfe, 0xff])) and (data[2:4] != _l2bytes([0x00, 0x00])):
|
||||
return data[2:].decode('utf-16be').encode('utf-8')
|
||||
elif (data[:2] == _l2bytes([0xff, 0xfe])) and (data[2:4] != _l2bytes([0x00, 0x00])):
|
||||
return data[2:].decode('utf-16le').encode('utf-8')
|
||||
elif data[:3] == _l2bytes([0xef, 0xbb, 0xbf]):
|
||||
return data[3:]
|
||||
# no byte order mark was found
|
||||
return data
|
||||
|
||||
skip_re = re.compile(_s2bytes("SkipUnless:\s*(.*?)\n"))
|
||||
desc_re = re.compile(_s2bytes("Description:\s*(.*?)\s*Expect:\s*(.*)\s*-->"))
|
||||
def getDescription(xmlfile, data):
|
||||
"""Extract test data
|
||||
|
||||
Each test case is an XML file which contains not only a test feed
|
||||
but also the description of the test and the condition that we
|
||||
would expect the parser to create when it parses the feed. Example:
|
||||
<!--
|
||||
Description: feed title
|
||||
Expect: feed['title'] == u'Example feed'
|
||||
-->
|
||||
"""
|
||||
skip_results = skip_re.search(data)
|
||||
if skip_results:
|
||||
skipUnless = skip_results.group(1).strip()
|
||||
else:
|
||||
skipUnless = '1'
|
||||
search_results = desc_re.search(data)
|
||||
if not search_results:
|
||||
raise RuntimeError, "can't parse %s" % xmlfile
|
||||
description, evalString = map(lambda s: s.strip(), list(search_results.groups()))
|
||||
description = xmlfile + ": " + unicode(description, 'utf8')
|
||||
return description, evalString, skipUnless
|
||||
|
||||
def buildTestCase(xmlfile, description, evalString):
|
||||
func = lambda self, xmlfile=xmlfile, evalString=evalString: \
|
||||
self.failUnlessEval(xmlfile, evalString)
|
||||
func.__doc__ = description
|
||||
return func
|
||||
|
||||
def runtests():
|
||||
"Read the files in the tests/ directory, dynamically add tests to the " \
|
||||
"TestCases above, spawn the HTTP server, and run the test suite"
|
||||
if sys.argv[1:]:
|
||||
allfiles = filter(lambda s: s.endswith('.xml'), reduce(operator.add, map(glob.glob, sys.argv[1:]), []))
|
||||
sys.argv = [sys.argv[0]] #+ sys.argv[2:]
|
||||
else:
|
||||
allfiles = glob.glob(os.path.join('.', 'tests', '**', '**', '*.xml'))
|
||||
wellformedfiles = glob.glob(os.path.join('.', 'tests', 'wellformed', '**', '*.xml'))
|
||||
illformedfiles = glob.glob(os.path.join('.', 'tests', 'illformed', '*.xml'))
|
||||
encodingfiles = glob.glob(os.path.join('.', 'tests', 'encoding', '*.xml'))
|
||||
entitiesfiles = glob.glob(os.path.join('.', 'tests', 'entities', '*.xml'))
|
||||
microformatfiles = glob.glob(os.path.join('.', 'tests', 'microformats', '**', '*.xml'))
|
||||
httpd = None
|
||||
# there are several compression test cases that must be accounted for
|
||||
# as well as a number of http status tests that redirect to a target
|
||||
# and a few `_open_resource`-related tests
|
||||
httpcount = 6 + 17 + 2
|
||||
httpcount += len([f for f in allfiles if 'http' in f])
|
||||
httpcount += len([f for f in wellformedfiles if 'http' in f])
|
||||
httpcount += len([f for f in illformedfiles if 'http' in f])
|
||||
httpcount += len([f for f in encodingfiles if 'http' in f])
|
||||
try:
|
||||
for c, xmlfile in enumerate(allfiles + encodingfiles + illformedfiles + entitiesfiles):
|
||||
addTo = TestCase
|
||||
if xmlfile in encodingfiles:
|
||||
addTo = TestEncodings
|
||||
elif xmlfile in entitiesfiles:
|
||||
addTo = (TestStrictParser, TestLooseParser)
|
||||
elif xmlfile in microformatfiles:
|
||||
addTo = TestMicroformats
|
||||
elif xmlfile in wellformedfiles:
|
||||
addTo = (TestStrictParser, TestLooseParser)
|
||||
data = open(xmlfile, 'rb').read()
|
||||
if 'encoding' in xmlfile:
|
||||
data = convert_to_utf8(data)
|
||||
if data is None:
|
||||
# convert_to_utf8 found a byte order mark for utf_32
|
||||
# but it's not supported in this installation of Python
|
||||
if 'http' in xmlfile:
|
||||
httpcount -= 1 + (xmlfile in wellformedfiles)
|
||||
continue
|
||||
description, evalString, skipUnless = getDescription(xmlfile, data)
|
||||
testName = 'test_%06d' % c
|
||||
ishttp = 'http' in xmlfile
|
||||
try:
|
||||
if not eval(skipUnless): raise NotImplementedError
|
||||
except (ImportError, LookupError, NotImplementedError, AttributeError):
|
||||
if ishttp:
|
||||
httpcount -= 1 + (xmlfile in wellformedfiles)
|
||||
continue
|
||||
if ishttp:
|
||||
xmlfile = 'http://%s:%s/%s' % (_HOST, _PORT, posixpath.normpath(xmlfile.replace('\\', '/')))
|
||||
testFunc = buildTestCase(xmlfile, description, evalString)
|
||||
if isinstance(addTo, tuple):
|
||||
setattr(addTo[0], testName, testFunc)
|
||||
setattr(addTo[1], testName, testFunc)
|
||||
else:
|
||||
setattr(addTo, testName, testFunc)
|
||||
if feedparser.TIDY_MARKUP and feedparser._mxtidy:
|
||||
sys.stderr.write('\nWarning: feedparser.TIDY_MARKUP invalidates tests, turning it off temporarily\n\n')
|
||||
feedparser.TIDY_MARKUP = 0
|
||||
if httpcount:
|
||||
httpd = FeedParserTestServer(httpcount)
|
||||
httpd.daemon = True
|
||||
httpd.start()
|
||||
httpd.ready.wait()
|
||||
testsuite = unittest.TestSuite()
|
||||
testloader = unittest.TestLoader()
|
||||
testsuite.addTest(testloader.loadTestsFromTestCase(TestCase))
|
||||
testsuite.addTest(testloader.loadTestsFromTestCase(TestStrictParser))
|
||||
testsuite.addTest(testloader.loadTestsFromTestCase(TestLooseParser))
|
||||
testsuite.addTest(testloader.loadTestsFromTestCase(TestEncodings))
|
||||
testsuite.addTest(testloader.loadTestsFromTestCase(TestDateParsers))
|
||||
testsuite.addTest(testloader.loadTestsFromTestCase(TestHTMLGuessing))
|
||||
testsuite.addTest(testloader.loadTestsFromTestCase(TestHTTPStatus))
|
||||
testsuite.addTest(testloader.loadTestsFromTestCase(TestCompression))
|
||||
testsuite.addTest(testloader.loadTestsFromTestCase(TestConvertToIdn))
|
||||
testsuite.addTest(testloader.loadTestsFromTestCase(TestMicroformats))
|
||||
testsuite.addTest(testloader.loadTestsFromTestCase(TestOpenResource))
|
||||
testsuite.addTest(testloader.loadTestsFromTestCase(TestFeedParserDict))
|
||||
testsuite.addTest(testloader.loadTestsFromTestCase(TestMakeSafeAbsoluteURI))
|
||||
testsuite.addTest(testloader.loadTestsFromTestCase(TestEverythingIsUnicode))
|
||||
testsuite.addTest(testloader.loadTestsFromTestCase(TestTemporaryFallbackBehavior))
|
||||
testsuite.addTest(testloader.loadTestsFromTestCase(TestLxmlBug))
|
||||
testresults = unittest.TextTestRunner(verbosity=1).run(testsuite)
|
||||
|
||||
# Return 0 if successful, 1 if there was a failure
|
||||
sys.exit(not testresults.wasSuccessful())
|
||||
finally:
|
||||
if httpd:
|
||||
if httpd.requests:
|
||||
# Should never get here unless something went horribly wrong, like the
|
||||
# user hitting Ctrl-C. Tell our HTTP server that it's done, then do
|
||||
# one more request to flush it. This rarely works; the combination of
|
||||
# threading, self-terminating HTTP servers, and unittest is really
|
||||
# quite flaky. Just what you want in a testing framework, no?
|
||||
httpd.requests = 0
|
||||
if httpd.ready:
|
||||
urllib.urlopen('http://127.0.0.1:8097/tests/wellformed/rss/aaa_wellformed.xml').read()
|
||||
httpd.join(0)
|
||||
|
||||
if __name__ == "__main__":
|
||||
runtests()
|
Binary file not shown.
|
@ -1 +0,0 @@
|
|||
<feed><title>deflate</title></feed>
|
Binary file not shown.
|
@ -1 +0,0 @@
|
|||
<feed><title>gzip</title></feed>
|
Binary file not shown.
Binary file not shown.
|
@ -1 +0,0 @@
|
|||
<feed xmlns="http://www.w3.org/2005/Atom"></feed>
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0" encoding="big5"?>
|
||||
<!--
|
||||
SkipUnless: __import__('codecs').lookup('big5')
|
||||
Description: big5
|
||||
Expect: not bozo and encoding == 'big5'
|
||||
-->
|
||||
<rss>
|
||||
</rss>
|
|
@ -1,7 +0,0 @@
|
|||
<?xml version="1.0" encoding="bogus"?>
|
||||
<!--
|
||||
Description: bogus encoding
|
||||
Expect: bozo
|
||||
-->
|
||||
<rss>
|
||||
</rss>
|
|
@ -1,13 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Description: utf-8 interpreted as iso-8859-1 and re-encoded as utf-8
|
||||
Expect: bozo and ord(entries[0]['description']) == 8230
|
||||
-->
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<item>
|
||||
<description>…</description>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
||||
|
|
@ -1,10 +0,0 @@
|
|||
<!--
|
||||
SkipUnless: __import__('sys').version.split()[0] >= '2.2.0'
|
||||
Description: crashes
|
||||
Expect: 1
|
||||
-->
|
||||
<rss>
|
||||
<item>
|
||||
<description><![CDATA[<a href="http://www.example.com/">¤</a><a href="&"></a>]]></description>
|
||||
</item>
|
||||
</rss>
|
|
@ -1,11 +0,0 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<!--
|
||||
Note: text/xml defaults to us-ascii, in conflict with the XML declaration of utf-8
|
||||
Header: Content-type: text/xml
|
||||
Description: Content-type with no charset (text/xml defaults to us-ascii)
|
||||
Expect: bozo and isinstance(bozo_exception, feedparser.CharacterEncodingOverride)
|
||||
-->
|
||||
|
||||
<feed version="0.3" xmlns="http://purl.org/atom/ns#">
|
||||
<title>Iñtërnâtiônàlizætiøn</title>
|
||||
</feed>
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Header: Content-type: text/plain
|
||||
Description: text/plain + no encoding
|
||||
Expect: bozo
|
||||
-->
|
||||
<rss version="2.0">
|
||||
</rss>
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Header: Content-type: text/plain; charset=utf-8
|
||||
Description: text/plain + charset
|
||||
Expect: bozo and encoding == 'utf-8'
|
||||
-->
|
||||
<rss version="2.0">
|
||||
</rss>
|
|
@ -1,10 +0,0 @@
|
|||
<!--
|
||||
Description: Ensure when there are invalid bytes in encoding specified by BOM, feedparser doesn't crash
|
||||
Expect: bozo
|
||||
-->
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<title>Valid UTF8: ѨInvalid UTF8: España</title>
|
||||
<description><pre class="screen"></pre></description>
|
||||
</channel>
|
||||
</rss
|
|
@ -1,13 +0,0 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<!--
|
||||
Description: unguessable characters
|
||||
Expect: bozo and entries[0].summary == u'\xe2\u20ac\u2122\xe2\u20ac\x9d\u0160'
|
||||
-->
|
||||
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<item>
|
||||
<description><![CDATA[ ’<>â€<C3A2>© ]]></description>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
Binary file not shown.
Binary file not shown.
|
@ -1,13 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Description: using win-1252 character points instead of unicode
|
||||
Expect: not bozo and entries[0]['description'] == u'don\u2019t'
|
||||
-->
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<item>
|
||||
<description>dont</description>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
||||
|
|
@ -1,13 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Description: using win-1252 character points instead of unicode
|
||||
Expect: not bozo and entries[0]['description'] == u'don\u2019t'
|
||||
-->
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<item>
|
||||
<description>don’t</description>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
||||
|
|
@ -1,13 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Description: using win-1252 character points instead of unicode
|
||||
Expect: not bozo and entries[0]['description'] == u'don’t'
|
||||
-->
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<item>
|
||||
<description>don&#146;t</description>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
||||
|
|
@ -1,13 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Description: utf-8 interpreted as iso-8859-1 and re-encoded as utf-8
|
||||
Expect: not bozo and ord(entries[0]['description']) == 8230
|
||||
-->
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<item>
|
||||
<description>…</description>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
||||
|
|
@ -1,9 +0,0 @@
|
|||
<!--
|
||||
Description: crashes
|
||||
Expect: 1
|
||||
-->
|
||||
<rss>
|
||||
<item>
|
||||
<description><![CDATA[<img alt="©" />]]></description>
|
||||
</item>
|
||||
</rss>
|
|
@ -1,9 +0,0 @@
|
|||
<!--
|
||||
Description: crashes
|
||||
Expect: 1
|
||||
-->
|
||||
<rss>
|
||||
<item>
|
||||
<description><a href="http://example.com"><img src="http://example.com/logo.gif" alt="The image &acirc;&#128;&#156;http://example.com/logo.gif&acirc;&#128;&#65533; cannot be displayed, because it contains errors."></a><br></description>
|
||||
</item>
|
||||
</rss>
|
|
@ -1,14 +0,0 @@
|
|||
<?xml version="1.0" encoding="euc-kr"?>
|
||||
<!--
|
||||
SkipUnless: __import__('codecs').lookup('euc-kr')
|
||||
Description: euc-kr character in attribute of embedded HTML
|
||||
Expect: not bozo and entries[0]['description'] == u'<img alt="\ub144" />'
|
||||
-->
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<item>
|
||||
<description><img alt="³â" /></description>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
||||
|
|
@ -1,14 +0,0 @@
|
|||
<?xml version="1.0" encoding="euc-kr"?>
|
||||
<!--
|
||||
SkipUnless: __import__('codecs').lookup('euc-kr')
|
||||
Description: euc-kr encoding in item description
|
||||
Expect: not bozo and entries[0]['description'] == u'\ub144'
|
||||
-->
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<item>
|
||||
<description>³â</description>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
||||
|
|
@ -1,12 +0,0 @@
|
|||
<?xml version="1.0" encoding="euc-kr"?>
|
||||
<!--
|
||||
SkipUnless: __import__('codecs').lookup('euc-kr')
|
||||
Description: euc-kr encoding
|
||||
Expect: not bozo and feed['title'] == u'\ub144'
|
||||
-->
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<title>³â</title>
|
||||
</channel>
|
||||
</rss>
|
||||
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Header: Content-type: application/atom+xml;charset='us-ascii'
|
||||
Description: application/atom+xml + explicit charset
|
||||
Expect: not bozo and encoding == 'us-ascii'
|
||||
-->
|
||||
<feed version="0.3" xmlns="http://purl.org/atom/ns#">
|
||||
</feed>
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0" encoding="iso-8859-1"?>
|
||||
<!--
|
||||
Header: Content-type: application/atom+xml; charset='us-ascii'
|
||||
Description: application/atom+xml + charset overrides encoding
|
||||
Expect: not bozo and encoding == 'us-ascii'
|
||||
-->
|
||||
<feed version="0.3" xmlns="http://purl.org/atom/ns#">
|
||||
</feed>
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Header: Content-type: application/atom+xml
|
||||
Description: application/atom+xml + no encoding
|
||||
Expect: not bozo and encoding == 'utf-8'
|
||||
-->
|
||||
<feed version="0.3" xmlns="http://purl.org/atom/ns#">
|
||||
</feed>
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0" encoding="iso-8859-1"?>
|
||||
<!--
|
||||
Header: Content-type: application/atom+xml
|
||||
Description: application/atom+xml + explicit encoding
|
||||
Expect: not bozo and encoding == 'iso-8859-1'
|
||||
-->
|
||||
<feed version="0.3" xmlns="http://purl.org/atom/ns#">
|
||||
</feed>
|
|
@ -1,9 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
SkipUnless: __import__('codecs').lookup('gb2312')
|
||||
Header: Content-type: application/atom+xml;charset='gb2312'
|
||||
Description: application/atom+xml + explicit charset
|
||||
Expect: not bozo and encoding == 'gb18030'
|
||||
-->
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
</feed>
|
|
@ -1,9 +0,0 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<!--
|
||||
SkipUnless: __import__('codecs').lookup('gb2312')
|
||||
Header: Content-type: application/atom+xml; charset='gb2312'
|
||||
Description: application/atom+xml + charset overrides encoding
|
||||
Expect: not bozo and encoding == 'gb18030'
|
||||
-->
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
</feed>
|
|
@ -1,9 +0,0 @@
|
|||
<?xml version="1.0" encoding="gb2312"?>
|
||||
<!--
|
||||
SkipUnless: __import__('codecs').lookup('gb2312')
|
||||
Header: Content-type: application/atom+xml
|
||||
Description: application/atom+xml + explicit encoding
|
||||
Expect: not bozo and encoding == 'gb18030'
|
||||
-->
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
</feed>
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Header: Content-type: application/rss+xml;charset= 'us-ascii'
|
||||
Description: application/rss+xml + explicit charset
|
||||
Expect: not bozo and encoding == 'us-ascii'
|
||||
-->
|
||||
<rss version="2.0">
|
||||
</rss>
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0" encoding="iso-8859-1"?>
|
||||
<!--
|
||||
Header: Content-type: application/rss+xml;charset= "us-ascii"
|
||||
Description: application/rss+xml + charset overrides encoding
|
||||
Expect: not bozo and encoding == 'us-ascii'
|
||||
-->
|
||||
<rss version="2.0">
|
||||
</rss>
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Header: Content-type: application/rss+xml
|
||||
Description: application/rss+xml + no encoding
|
||||
Expect: not bozo and encoding == 'utf-8'
|
||||
-->
|
||||
<rss version="2.0">
|
||||
</rss>
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0" encoding="iso-8859-1"?>
|
||||
<!--
|
||||
Header: Content-type: application/rss+xml
|
||||
Description: application/rss+xml + explicit encoding
|
||||
Expect: not bozo and encoding == 'iso-8859-1'
|
||||
-->
|
||||
<rss version="2.0">
|
||||
</rss>
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Header: Content-type: application/xml;charset= "us-ascii"
|
||||
Description: application/xml + explicit charset
|
||||
Expect: not bozo and encoding == 'us-ascii'
|
||||
-->
|
||||
<rss version="2.0">
|
||||
</rss>
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0" encoding="iso-8859-1"?>
|
||||
<!--
|
||||
Header: Content-type: application/xml;charset = us-ascii
|
||||
Description: application/xml + charset overrides encoding
|
||||
Expect: not bozo and encoding == 'us-ascii'
|
||||
-->
|
||||
<rss version="2.0">
|
||||
</rss>
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Header: Content-type: application/xml
|
||||
Description: application/xml + no encoding
|
||||
Expect: not bozo and encoding == 'utf-8'
|
||||
-->
|
||||
<rss version="2.0">
|
||||
</rss>
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Header: Content-type: application/xml-dtd; charset="us-ascii"
|
||||
Description: application/xml-dtd + explicit charset
|
||||
Expect: not bozo and encoding == 'us-ascii'
|
||||
-->
|
||||
<feed version="0.3" xmlns="http://purl.org/atom/ns#">
|
||||
</feed>
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0" encoding="iso-8859-1"?>
|
||||
<!--
|
||||
Header: Content-type: application/xml-dtd; charset="us-ascii"
|
||||
Description: application/xml-dtd + charset overrides encoding
|
||||
Expect: not bozo and encoding == 'us-ascii'
|
||||
-->
|
||||
<feed version="0.3" xmlns="http://purl.org/atom/ns#">
|
||||
</feed>
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Header: Content-type: application/xml-dtd
|
||||
Description: application/xml-dtd + no encoding
|
||||
Expect: not bozo and encoding == 'utf-8'
|
||||
-->
|
||||
<feed version="0.3" xmlns="http://purl.org/atom/ns#">
|
||||
</feed>
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0" encoding="iso-8859-1"?>
|
||||
<!--
|
||||
Header: Content-type: application/xml-dtd
|
||||
Description: application/xml-dtd + explicit encoding
|
||||
Expect: not bozo and encoding == 'iso-8859-1'
|
||||
-->
|
||||
<feed version="0.3" xmlns="http://purl.org/atom/ns#">
|
||||
</feed>
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0" encoding="iso-8859-1"?>
|
||||
<!--
|
||||
Header: Content-type: application/xml
|
||||
Description: application/xml + explicit encoding
|
||||
Expect: not bozo and encoding == 'iso-8859-1'
|
||||
-->
|
||||
<rss version="2.0">
|
||||
</rss>
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Header: Content-type: application/xml-external-parsed-entity; charset="us-ascii"
|
||||
Description: application/xml-external-parsed-entity + explicit charset
|
||||
Expect: not bozo and encoding == 'us-ascii'
|
||||
-->
|
||||
<feed version="0.3" xmlns="http://purl.org/atom/ns#">
|
||||
</feed>
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0" encoding="iso-8859-1"?>
|
||||
<!--
|
||||
Header: Content-type: application/xml-external-parsed-entity;charset=us-ascii
|
||||
Description: application/xml-external-parsed-entity + charset overrides encoding
|
||||
Expect: not bozo and encoding == 'us-ascii'
|
||||
-->
|
||||
<feed version="0.3" xmlns="http://purl.org/atom/ns#">
|
||||
</feed>
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Header: Content-type: application/xml-external-parsed-entity
|
||||
Description: application/xml-external-parsed-entity + no encoding
|
||||
Expect: not bozo and encoding == 'utf-8'
|
||||
-->
|
||||
<feed version="0.3" xmlns="http://purl.org/atom/ns#">
|
||||
</feed>
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0" encoding="iso-8859-1"?>
|
||||
<!--
|
||||
Header: Content-type: application/xml-external-parsed-entity
|
||||
Description: application/xml-parsed-entity + explicit encoding
|
||||
Expect: not bozo and encoding == 'iso-8859-1'
|
||||
-->
|
||||
<feed version="0.3" xmlns="http://purl.org/atom/ns#">
|
||||
</feed>
|
|
@ -1,13 +0,0 @@
|
|||
<?xml version='1.0' encoding='UTF-8'?>
|
||||
<!--
|
||||
Header: Content-type: application/atom+xml
|
||||
Description: crashes while resolving relative URIs when content contains attributes which contain (valid) non-ASCII characters
|
||||
Expect: not bozo
|
||||
-->
|
||||
<feed xmlns='http://www.w3.org/2005/Atom'>
|
||||
<entry>
|
||||
<content type='xhtml'><div xmlns='http://www.w3.org/1999/xhtml'>
|
||||
<img alt="Browser market shares at ‘ongoing’" />
|
||||
</div></content>
|
||||
</entry>
|
||||
</feed>
|
|
@ -1,13 +0,0 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<!--
|
||||
Header: Content-type: application/xml
|
||||
Description: application/xml with no charset (control for tests/illformed/encoding/http_i18n.xml)
|
||||
Expect: not bozo
|
||||
-->
|
||||
|
||||
<feed version="0.3" xmlns="http://purl.org/atom/ns#">
|
||||
<title>Iñtërnâtiônàlizætiøn</title>
|
||||
<link rel='alternate' type='text/html' href='http://example.com/'/>
|
||||
<modified>2004-06-02T19:07:55-04:00</modified>
|
||||
<tagline>If your parser thinks this is well-formed, it's right.</tagline>
|
||||
</feed>
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Header: Content-type: text/atom+xml;charset='us-ascii'
|
||||
Description: text/atom+xml + explicit charset
|
||||
Expect: not bozo and encoding == 'us-ascii'
|
||||
-->
|
||||
<feed version="0.3" xmlns="http://purl.org/atom/ns#">
|
||||
</feed>
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0" encoding="iso-8859-1"?>
|
||||
<!--
|
||||
Header: Content-type: text/atom+xml; charset='us-ascii'
|
||||
Description: text/atom+xml + charset overrides encoding
|
||||
Expect: not bozo and encoding == 'us-ascii'
|
||||
-->
|
||||
<feed version="0.3" xmlns="http://purl.org/atom/ns#">
|
||||
</feed>
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Header: Content-type: text/atom+xml
|
||||
Description: text/atom+xml + no encoding
|
||||
Expect: not bozo and encoding == 'us-ascii'
|
||||
-->
|
||||
<feed version="0.3" xmlns="http://purl.org/atom/ns#">
|
||||
</feed>
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0" encoding="iso-8859-1"?>
|
||||
<!--
|
||||
Header: Content-type: text/atom+xml
|
||||
Description: text/atom+xml + explicit encoding
|
||||
Expect: not bozo and encoding == 'us-ascii'
|
||||
-->
|
||||
<feed version="0.3" xmlns="http://purl.org/atom/ns#">
|
||||
</feed>
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Header: Content-type: text/rss+xml;charset= 'us-ascii'
|
||||
Description: text/rss+xml + explicit charset
|
||||
Expect: not bozo and encoding == 'us-ascii'
|
||||
-->
|
||||
<rss version="2.0">
|
||||
</rss>
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0" encoding="iso-8859-1"?>
|
||||
<!--
|
||||
Header: Content-type: text/rss+xml;charset= "us-ascii"
|
||||
Description: text/rss+xml + charset overrides encoding
|
||||
Expect: not bozo and encoding == 'us-ascii'
|
||||
-->
|
||||
<rss version="2.0">
|
||||
</rss>
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Header: Content-type: text/rss+xml
|
||||
Description: text/rss+xml + no encoding
|
||||
Expect: not bozo and encoding == 'us-ascii'
|
||||
-->
|
||||
<rss version="2.0">
|
||||
</rss>
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0" encoding="iso-8859-1"?>
|
||||
<!--
|
||||
Header: Content-type: text/rss+xml
|
||||
Description: text/rss+xml + explicit encoding
|
||||
Expect: not bozo and encoding == 'us-ascii'
|
||||
-->
|
||||
<rss version="2.0">
|
||||
</rss>
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Header: Content-type: text/xml;
|
||||
Description: text/xml + bogus charset
|
||||
Expect: not bozo and encoding == 'us-ascii'
|
||||
-->
|
||||
<rss version="2.0">
|
||||
</rss>
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Header: Content-type: text/xml; charset:iso-8859-1
|
||||
Description: text/xml + bogus parameter
|
||||
Expect: not bozo and encoding == 'us-ascii'
|
||||
-->
|
||||
<rss version="2.0">
|
||||
</rss>
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Header: Content-type: text/xml;charset= "us-ascii"
|
||||
Description: text/xml + explicit charset
|
||||
Expect: not bozo and encoding == 'us-ascii'
|
||||
-->
|
||||
<rss version="2.0">
|
||||
</rss>
|
|
@ -1,16 +0,0 @@
|
|||
<!--
|
||||
SkipUnless: __import__('codecs').lookup('windows-1252')
|
||||
Header: Content-type: text/xml; charset=windows-1252
|
||||
Description: text/xml + explicit charset (this one is harder than the others)
|
||||
Expect: not bozo and entries[0]['description'] == u'This is a \xa3\u201ctest.\u201d'
|
||||
-->
|
||||
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<item>
|
||||
<title>Foo</title>
|
||||
<link>http://purl.org/rss/2.0/?item</link>
|
||||
<description>This is a £“test.”</description>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0" encoding="iso-8859-1"?>
|
||||
<!--
|
||||
Header: Content-type: text/xml;charset = us-ascii
|
||||
Description: text/xml + charset overrides encoding
|
||||
Expect: not bozo and encoding == 'us-ascii'
|
||||
-->
|
||||
<rss version="2.0">
|
||||
</rss>
|
|
@ -1,17 +0,0 @@
|
|||
<?xml version='1.0' encoding='iso-8859-1'?>
|
||||
<!--
|
||||
SkipUnless: __import__('codecs').lookup('windows-1252')
|
||||
Header: Content-type: text/xml; charset=windows-1252
|
||||
Description: text/xml + charset overrides encoding (this one is harder than the others)
|
||||
Expect: not bozo and entries[0]['description'] == u'This is a \xa3\u201ctest.\u201d'
|
||||
-->
|
||||
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<item>
|
||||
<title>Foo</title>
|
||||
<link>http://purl.org/rss/2.0/?item</link>
|
||||
<description>This is a £“test.”</description>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Header: Content-type: text/xml
|
||||
Description: text/xml + no encoding
|
||||
Expect: not bozo and encoding == 'us-ascii'
|
||||
-->
|
||||
<rss version="2.0">
|
||||
</rss>
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Header: Content-type: text/xml-external-parsed-entity; charset="us-ascii"
|
||||
Description: text/xml-external-parsed-entity + explicit charset
|
||||
Expect: not bozo and encoding == 'us-ascii'
|
||||
-->
|
||||
<feed version="0.3" xmlns="http://purl.org/atom/ns#">
|
||||
</feed>
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0" encoding="iso-8859-1"?>
|
||||
<!--
|
||||
Header: Content-type: text/xml-external-parsed-entity;charset=us-ascii
|
||||
Description: text/xml-external-parsed-entity + charset overrides encoding
|
||||
Expect: not bozo and encoding == 'us-ascii'
|
||||
-->
|
||||
<feed version="0.3" xmlns="http://purl.org/atom/ns#">
|
||||
</feed>
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Header: Content-type: text/xml-external-parsed-entity
|
||||
Description: text/xml-external-parsed-entity + no encoding
|
||||
Expect: not bozo and encoding == 'us-ascii'
|
||||
-->
|
||||
<feed version="0.3" xmlns="http://purl.org/atom/ns#">
|
||||
</feed>
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0" encoding="iso-8859-1"?>
|
||||
<!--
|
||||
Header: Content-type: text/xml-external-parsed-entity
|
||||
Description: text/xml-parsed-entity + explicit encoding
|
||||
Expect: not bozo and encoding == 'us-ascii'
|
||||
-->
|
||||
<feed version="0.3" xmlns="http://purl.org/atom/ns#">
|
||||
</feed>
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Header: Content-type: text/xml; qs=0.9
|
||||
Description: text/xml + qs value
|
||||
Expect: not bozo and encoding == 'us-ascii'
|
||||
-->
|
||||
<feed version="0.3" xmlns="http://purl.org/atom/ns#">
|
||||
</feed>
|
Binary file not shown.
Binary file not shown.
|
@ -1,7 +0,0 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Description: no content-type and no encoding
|
||||
Expect: not bozo and encoding == 'utf-8'
|
||||
-->
|
||||
<feed version="0.3" xmlns="http://purl.org/atom/ns#">
|
||||
</feed>
|
|
@ -1,7 +0,0 @@
|
|||
<?xml version="1.0" encoding="iso-8859-1"?>
|
||||
<!--
|
||||
Description: no content-type + explicit encoding
|
||||
Expect: not bozo and encoding == 'iso-8859-1'
|
||||
-->
|
||||
<feed version="0.3" xmlns="http://purl.org/atom/ns#">
|
||||
</feed>
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue