2023-04-13 07:04:58 +00:00
|
|
|
# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
|
2023-01-12 01:04:47 +00:00
|
|
|
# Copyright 2002-2008 Mark Pilgrim
|
|
|
|
# All rights reserved.
|
|
|
|
#
|
|
|
|
# This file is a part of feedparser.
|
|
|
|
#
|
|
|
|
# Redistribution and use in source and binary forms, with or without
|
|
|
|
# modification, are permitted provided that the following conditions are met:
|
|
|
|
#
|
|
|
|
# * Redistributions of source code must retain the above copyright notice,
|
|
|
|
# this list of conditions and the following disclaimer.
|
|
|
|
# * Redistributions in binary form must reproduce the above copyright notice,
|
|
|
|
# this list of conditions and the following disclaimer in the documentation
|
|
|
|
# and/or other materials provided with the distribution.
|
|
|
|
#
|
|
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
|
|
|
|
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
|
|
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
|
|
# POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
|
2023-04-13 07:04:58 +00:00
|
|
|
from __future__ import annotations
|
2023-01-12 01:04:47 +00:00
|
|
|
|
2023-04-13 07:04:58 +00:00
|
|
|
import typing
|
2023-01-12 01:04:47 +00:00
|
|
|
|
2023-04-13 07:04:58 +00:00
|
|
|
import requests
|
2023-01-12 01:04:47 +00:00
|
|
|
|
2023-04-13 07:04:58 +00:00
|
|
|
from .datetimes import _parse_date
|
2023-01-12 01:04:47 +00:00
|
|
|
|
2023-04-13 07:04:58 +00:00
|
|
|
# HTTP "Accept" header to send to servers when downloading feeds.
|
|
|
|
ACCEPT_HEADER: str = (
|
|
|
|
"application/atom+xml"
|
|
|
|
",application/rdf+xml"
|
|
|
|
",application/rss+xml"
|
|
|
|
",application/x-netcdf"
|
|
|
|
",application/xml"
|
|
|
|
";q=0.9,text/xml"
|
|
|
|
";q=0.2,*/*"
|
|
|
|
";q=0.1"
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def get(url: str, result: dict[str, typing.Any]) -> bytes:
|
|
|
|
from . import USER_AGENT
|
|
|
|
|
|
|
|
agent = USER_AGENT
|
|
|
|
|
|
|
|
try:
|
|
|
|
response = requests.get(
|
|
|
|
url,
|
|
|
|
headers={"User-Agent": agent, "Accept": ACCEPT_HEADER},
|
|
|
|
timeout=10,
|
|
|
|
)
|
|
|
|
except requests.RequestException as exception:
|
|
|
|
result["bozo"] = True
|
|
|
|
result["bozo_exception"] = exception
|
|
|
|
return b""
|
|
|
|
|
|
|
|
# Lowercase the HTTP header keys for comparisons per RFC 2616.
|
|
|
|
result["headers"] = {k.lower(): v for k, v in response.headers.items()}
|
2023-01-12 01:04:47 +00:00
|
|
|
|
|
|
|
# save HTTP headers
|
2023-04-13 07:04:58 +00:00
|
|
|
if "etag" in result["headers"]:
|
|
|
|
result["etag"] = result["headers"]["etag"]
|
|
|
|
if "last-modified" in result["headers"]:
|
|
|
|
modified = result["headers"]["last-modified"]
|
2023-01-12 01:04:47 +00:00
|
|
|
if modified:
|
2023-04-13 07:04:58 +00:00
|
|
|
result["modified"] = modified
|
|
|
|
result["modified_parsed"] = _parse_date(modified)
|
|
|
|
result["href"] = response.url
|
|
|
|
result["status"] = response.status_code
|
|
|
|
return response.content
|