# -*- coding: utf-8 -*- # # Copyright (C) 2019 Chris Caron # All rights reserved. # # This code is licensed under the MIT License. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files(the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and / or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions : # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. import re import sys import json import contextlib import os import hashlib from itertools import chain from os.path import expanduser from functools import reduce from . import common from .logger import logger from urllib.parse import unquote from urllib.parse import quote from urllib.parse import urlparse from urllib.parse import urlencode as _urlencode import importlib.util def import_module(path, name): """ Load our module based on path """ # if path.endswith('test_module_detection0/a/hook.py'): # import pdb # pdb.set_trace() spec = importlib.util.spec_from_file_location(name, path) try: module = importlib.util.module_from_spec(spec) sys.modules[name] = module spec.loader.exec_module(module) except Exception as e: # module isn't loadable del sys.modules[name] module = None logger.debug( 'Custom module exception raised from %s (name=%s) %s', path, name, str(e)) return module # Hash of all paths previously scanned so we don't waste effort/overhead doing # it again PATHS_PREVIOUSLY_SCANNED = set() # URL Indexing Table for returns via parse_url() # The below accepts and scans for: # - schema:// # - schema://path # - schema://path?kwargs # VALID_URL_RE = re.compile( r'^[\s]*((?P[^:\s]+):[/\\]+)?((?P[^?]+)' r'(\?(?P.+))?)?[\s]*$', ) VALID_QUERY_RE = re.compile(r'^(?P.*[/\\])(?P[^/\\]+)?$') # delimiters used to separate values when content is passed in by string. # This is useful when turning a string into a list STRING_DELIMITERS = r'[\[\]\;,\s]+' # Pre-Escape content since we reference it so much ESCAPED_PATH_SEPARATOR = re.escape('\\/') ESCAPED_WIN_PATH_SEPARATOR = re.escape('\\') ESCAPED_NUX_PATH_SEPARATOR = re.escape('/') TIDY_WIN_PATH_RE = re.compile( r'(^[%s]{2}|[^%s\s][%s]|[\s][%s]{2}])([%s]+)' % ( ESCAPED_WIN_PATH_SEPARATOR, ESCAPED_WIN_PATH_SEPARATOR, ESCAPED_WIN_PATH_SEPARATOR, ESCAPED_WIN_PATH_SEPARATOR, ESCAPED_WIN_PATH_SEPARATOR, ), ) TIDY_WIN_TRIM_RE = re.compile( r'^(.+[^:][^%s])[\s%s]*$' % ( ESCAPED_WIN_PATH_SEPARATOR, ESCAPED_WIN_PATH_SEPARATOR, ), ) TIDY_NUX_PATH_RE = re.compile( r'([%s])([%s]+)' % ( ESCAPED_NUX_PATH_SEPARATOR, ESCAPED_NUX_PATH_SEPARATOR, ), ) TIDY_NUX_TRIM_RE = re.compile( r'([^%s])[\s%s]+$' % ( ESCAPED_NUX_PATH_SEPARATOR, ESCAPED_NUX_PATH_SEPARATOR, ), ) # The handling of custom arguments passed in the URL; we treat any # argument (which would otherwise appear in the qsd area of our parse_url() # function differently if they start with a +, - or : value NOTIFY_CUSTOM_ADD_TOKENS = re.compile(r'^( |\+)(?P.*)\s*') NOTIFY_CUSTOM_DEL_TOKENS = re.compile(r'^-(?P.*)\s*') NOTIFY_CUSTOM_COLON_TOKENS = re.compile(r'^:(?P.*)\s*') # Used for attempting to acquire the schema if the URL can't be parsed. GET_SCHEMA_RE = re.compile(r'\s*(?P[a-z0-9]{2,9})://.*$', re.I) # Used for validating that a provided entry is indeed a schema # this is slightly different then the GET_SCHEMA_RE above which # insists the schema is only valid with a :// entry. this one # extrapolates the individual entries URL_DETAILS_RE = re.compile( r'\s*(?P[a-z0-9]{2,9})(://(?P.*))?$', re.I) # Regular expression based and expanded from: # http://www.regular-expressions.info/email.html # Extended to support colon (:) delimiter for parsing names from the URL # such as: # - 'Optional Name':user@example.com # - 'Optional Name' # # The expression also parses the general email as well such as: # - user@example.com # - label+user@example.com GET_EMAIL_RE = re.compile( r'(([\s"\']+)?(?P[^:<\'"]+)?[:<\s\'"]+)?' r'(?P((?P