# Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
# Copyright 2002-2008 Mark Pilgrim
# All rights reserved.
#
# This file is a part of feedparser.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
#   this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
#   this list of conditions and the following disclaimer in the documentation
#   and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

import re
import time

# ISO-8601 date parsing routines written by Fazal Majid.
# The ISO 8601 standard is very convoluted and irregular - a full ISO 8601
# parser is beyond the scope of feedparser and would be a worthwhile addition
# to the Python library.
# A single regular expression cannot parse ISO 8601 date formats into groups
# as the standard is highly irregular (for instance is 030104 2003-01-04 or
# 0301-04-01), so we use templates instead.
# Please note the order in templates is significant because we need a
# greedy match.
_iso8601_tmpl = [
    "YYYY-?MM-?DD",
    "YYYY-0MM?-?DD",
    "YYYY-MM",
    "YYYY-?OOO",
    "YY-?MM-?DD",
    "YY-?OOO",
    "YYYY",
    "-YY-?MM",
    "-OOO",
    "-YY",
    "--MM-?DD",
    "--MM",
    "---DD",
    "CC",
    "",
]

_iso8601_re = [
    tmpl.replace("YYYY", r"(?P<year>\d{4})")
    .replace("YY", r"(?P<year>\d\d)")
    .replace("MM", r"(?P<month>[01]\d)")
    .replace("DD", r"(?P<day>[0123]\d)")
    .replace("OOO", r"(?P<ordinal>[0123]\d\d)")
    .replace("CC", r"(?P<century>\d\d$)")
    + r"(T?(?P<hour>\d{2}):(?P<minute>\d{2})"
    + r"(:(?P<second>\d{2}))?"
    + r"(\.(?P<fracsecond>\d+))?"
    + r"(?P<tz>[+-](?P<tzhour>\d{2})(:(?P<tzmin>\d{2}))?|Z)?)?"
    for tmpl in _iso8601_tmpl
]
_iso8601_matches = [re.compile(regex).match for regex in _iso8601_re]


def _parse_date_iso8601(date_string):
    """Parse a variety of ISO-8601-compatible formats like 20040105"""
    m = None
    for _iso8601_match in _iso8601_matches:
        m = _iso8601_match(date_string)
        if m:
            break
    if not m:
        return
    if m.span() == (0, 0):
        return
    params = m.groupdict()
    ordinal = params.get("ordinal", 0)
    if ordinal:
        ordinal = int(ordinal)
    else:
        ordinal = 0
    year = params.get("year", "--")
    if not year or year == "--":
        year = time.gmtime()[0]
    elif len(year) == 2:
        # ISO 8601 assumes current century, i.e. 93 -> 2093, NOT 1993
        year = 100 * int(time.gmtime()[0] / 100) + int(year)
    else:
        year = int(year)
    month = params.get("month", "-")
    if not month or month == "-":
        # ordinals are NOT normalized by mktime, we simulate them
        # by setting month=1, day=ordinal
        if ordinal:
            month = 1
        else:
            month = time.gmtime()[1]
    month = int(month)
    day = params.get("day", 0)
    if not day:
        # see above
        if ordinal:
            day = ordinal
        elif (
            params.get("century", 0) or params.get("year", 0) or params.get("month", 0)
        ):
            day = 1
        else:
            day = time.gmtime()[2]
    else:
        day = int(day)
    # special case of the century - is the first year of the 21st century
    # 2000 or 2001 ? The debate goes on...
    if "century" in params:
        year = (int(params["century"]) - 1) * 100 + 1
    # in ISO 8601 most fields are optional
    for field in ["hour", "minute", "second", "tzhour", "tzmin"]:
        if not params.get(field, None):
            params[field] = 0
    hour = int(params.get("hour", 0))
    minute = int(params.get("minute", 0))
    second = int(float(params.get("second", 0)))
    # weekday is normalized by mktime(), we can ignore it
    weekday = 0
    daylight_savings_flag = -1
    tm = [
        year,
        month,
        day,
        hour,
        minute,
        second,
        weekday,
        ordinal,
        daylight_savings_flag,
    ]
    # ISO 8601 time zone adjustments
    tz = params.get("tz")
    if tz and tz != "Z":
        if tz[0] == "-":
            tm[3] += int(params.get("tzhour", 0))
            tm[4] += int(params.get("tzmin", 0))
        elif tz[0] == "+":
            tm[3] -= int(params.get("tzhour", 0))
            tm[4] -= int(params.get("tzmin", 0))
        else:
            return None
    # Python's time.mktime() is a wrapper around the ANSI C mktime(3c)
    # which is guaranteed to normalize d/m/y/h/m/s.
    # Many implementations have bugs, but we'll pretend they don't.
    return time.localtime(time.mktime(tuple(tm)))