# -*- coding: utf-8 -*-
# Copyright 2011-2012 Antoine Bertin <diaoulael@gmail.com>
#
# This file is part of subliminal.
#
# subliminal is free software; you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# subliminal is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with subliminal.  If not, see <http://www.gnu.org/licenses/>.
import re
from six import string_types, text_type
from _23 import decode_str


__all__ = ['get_keywords', 'split_keyword', 'to_unicode']


def get_keywords(guess):
    """Retrieve keywords from guessed informations

    :param guess: guessed informations
    :type guess: :class:`guessit.guess.Guess`
    :return: lower case alphanumeric keywords
    :rtype: set

    """
    keywords = set()
    for k in ['releaseGroup', 'screenSize', 'videoCodec', 'format']:
        if k in guess:
            keywords = keywords | split_keyword(guess[k].lower())
    return keywords


def split_keyword(keyword):
    """Split a keyword in multiple ones on any non-alphanumeric character

    :param string keyword: keyword
    :return: keywords
    :rtype: set

    """
    split = set(re.findall(r'\w+', keyword))
    return split


def to_unicode(data):
    """Convert a basestring to unicode

    :param basestring data: data to decode
    :return: data as unicode
    :rtype: unicode

    """
    if not isinstance(data, string_types):
        raise ValueError('Basestring expected')
    if isinstance(data, text_type):
        return data
    for encoding in ('utf-8', 'latin-1'):
        try:
            return decode_str(data, encoding)
        except UnicodeDecodeError:
            pass
    return decode_str(data, 'utf-8', 'replace')