From 9c5e71d67750b81bae6e7b2004a03536dea66469 Mon Sep 17 00:00:00 2001 From: Prinz23 Date: Sat, 28 Oct 2023 02:31:34 +0200 Subject: [PATCH 1/4] add exclude_dirs parameter to scantree --- lib/sg_helpers.py | 18 ++++++++++++------ sickgear/helpers.py | 8 ++++---- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/lib/sg_helpers.py b/lib/sg_helpers.py index 89835561..8e585ffd 100644 --- a/lib/sg_helpers.py +++ b/lib/sg_helpers.py @@ -1079,6 +1079,7 @@ def save_failure(url, domain, log_failure_url, post_data, post_json): def scantree(path, # type: AnyStr exclude=None, # type: Optional[AnyStr, List[AnyStr]] + exclude_dirs=None, # type: Optional[AnyStr, List[AnyStr]] include=None, # type: Optional[AnyStr, List[AnyStr]] follow_symlinks=False, # type: bool filter_kind=None, # type: Optional[bool] @@ -1089,7 +1090,8 @@ def scantree(path, # type: AnyStr """Yield DirEntry objects for given path. Returns without yield if path fails sanity check :param path: Path to scan, sanity check is_dir and exists - :param exclude: Escaped regex string(s) to exclude + :param exclude: Escaped regex string(s) to exclude (files and directories) + :param exclude_dirs: Escaped regex string(s) to exclude (directories only) :param include: Escaped regex string(s) to include :param follow_symlinks: Follow symlinks :param filter_kind: None to yield everything, True yields directories, False yields files @@ -1097,22 +1099,26 @@ def scantree(path, # type: AnyStr :param exclude_folders_with_files: exclude folder that contain the listed file(s) """ if isinstance(path, string_types) and path and os.path.isdir(path): - rc_exc, rc_inc = [re.compile(rx % '|'.join( + rc_exc, rc_exc_dir, rc_inc = [re.compile(rx % '|'.join( [x for x in (param, ([param], [])[None is param])[not isinstance(param, list)]])) - for rx, param in ((r'(?i)^(?:(?!%s).)*$', exclude), (r'(?i)%s', include))] + for rx, param in ((r'(?i)^(?:(?!%s).)*$', exclude), (r'(?i)^(?:(?!%s).)*$', exclude_dirs), + (r'(?i)%s', include))] for entry in scandir(path): is_dir = entry.is_dir(follow_symlinks=follow_symlinks) is_file = entry.is_file(follow_symlinks=follow_symlinks) no_filter = any([None is filter_kind, filter_kind and is_dir, not filter_kind and is_file]) - if (rc_exc.search(entry.name), True)[not exclude] and (rc_inc.search(entry.name), True)[not include] \ - and (no_filter or (not filter_kind and is_dir and recurse)): + if ((rc_exc.search(entry.name), True)[not exclude] + and (rc_exc_dir.search(entry.name), True)[not exclude_dirs or not is_dir] + and (rc_inc.search(entry.name), True)[not include] + and (no_filter or (not filter_kind and is_dir and recurse))): if is_dir and exclude_folders_with_files and any(os.path.isfile(os.path.join(entry.path, e_f)) for e_f in exclude_folders_with_files): logger.debug(f'Ignoring Folder: "{entry.path}", because it contains a exclude file' f' "{", ".join(exclude_folders_with_files)}"') continue if recurse and is_dir: - for subentry in scantree(entry.path, exclude, include, follow_symlinks, filter_kind, recurse): + for subentry in scantree(entry.path, exclude, exclude_dirs, include, follow_symlinks, filter_kind, + recurse): yield subentry if no_filter: yield entry diff --git a/sickgear/helpers.py b/sickgear/helpers.py index 15e84e77..79787ba1 100644 --- a/sickgear/helpers.py +++ b/sickgear/helpers.py @@ -354,10 +354,10 @@ def list_media_files(path): if [direntry for direntry in scantree(path, include=[r'\.sickgearignore'], filter_kind=False, recurse=False)]: logger.debug('Skipping folder "%s" because it contains ".sickgearignore"' % path) else: - result = [direntry.path for direntry in scantree(path, exclude=[ - 'Extras', - 'Behind The Scenes', 'Deleted Scenes', 'Featurettes', - 'Interviews', 'Scenes', 'Shorts', 'Trailers', 'Other' + result = [direntry.path for direntry in scantree(path, exclude_dirs=[ + '^Extras$', + '^Behind The Scenes$', '^Deleted Scenes$', '^Featurettes$', + '^Interviews$', '^Scenes$', '^Shorts$', '^Trailers$', '^Other$' ], filter_kind=False, exclude_folders_with_files=['.sickgearignore']) if has_media_ext(direntry.name)] return result From 67d73fa4017f108785b53fbb80443d911ab30c6a Mon Sep 17 00:00:00 2001 From: Prinz23 Date: Sat, 28 Oct 2023 02:37:13 +0200 Subject: [PATCH 2/4] revert has_media_ext ... since these don't apply to filenames (dirs only) --- sickgear/helpers.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sickgear/helpers.py b/sickgear/helpers.py index 79787ba1..81fc637b 100644 --- a/sickgear/helpers.py +++ b/sickgear/helpers.py @@ -158,9 +158,7 @@ def has_media_ext(filename): return False sep_file = filename.rpartition('.') - return (None is re.search( - r'(extras|-(?:Behind\sThe\sScenes|Deleted\sScenes|Featurettes|Interviews|Scenes|Shorts|Trailers|Other))$', - sep_file[0], re.I)) and (sep_file[2].lower() in mediaExtensions) + return (None is re.search('extras?$', sep_file[0], re.I)) and (sep_file[2].lower() in mediaExtensions) def has_image_ext(filename): From 54765a5432d9e524ddc00a30876a08100a6336d6 Mon Sep 17 00:00:00 2001 From: Prinz23 Date: Sat, 28 Oct 2023 09:49:16 +0200 Subject: [PATCH 3/4] optimize scantree --- lib/sg_helpers.py | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/lib/sg_helpers.py b/lib/sg_helpers.py index 8e585ffd..b412d21e 100644 --- a/lib/sg_helpers.py +++ b/lib/sg_helpers.py @@ -1084,7 +1084,11 @@ def scantree(path, # type: AnyStr follow_symlinks=False, # type: bool filter_kind=None, # type: Optional[bool] recurse=True, # type: bool - exclude_folders_with_files=None # type: Optional[List[AnyStr]] + exclude_folders_with_files=None, # type: Optional[List[AnyStr]] + internal_call=False, # type: bool + rc_exc=None, # type: List + rc_exc_dir=None, # type: List + rc_inc=None # type: List ): # type: (...) -> Generator[DirEntry, None, None] """Yield DirEntry objects for given path. Returns without yield if path fails sanity check @@ -1097,19 +1101,24 @@ def scantree(path, # type: AnyStr :param filter_kind: None to yield everything, True yields directories, False yields files :param recurse: Recursively scan the tree :param exclude_folders_with_files: exclude folder that contain the listed file(s) + :param internal_call: internal use + :param rc_exc: internal use + :param rc_exc_dir: internal use + :param rc_inc: internal use """ if isinstance(path, string_types) and path and os.path.isdir(path): - rc_exc, rc_exc_dir, rc_inc = [re.compile(rx % '|'.join( - [x for x in (param, ([param], [])[None is param])[not isinstance(param, list)]])) - for rx, param in ((r'(?i)^(?:(?!%s).)*$', exclude), (r'(?i)^(?:(?!%s).)*$', exclude_dirs), - (r'(?i)%s', include))] + if not internal_call: + rc_exc, rc_exc_dir, rc_inc = [re.compile(rx % '|'.join( + [x for x in (param, ([param], [])[None is param])[not isinstance(param, list)]])) + for rx, param in ((r'(?i)^(?:(?!%s).)*$', exclude), (r'(?i)^(?:(?!%s).)*$', exclude_dirs), + (r'(?i)%s', include))] for entry in scandir(path): is_dir = entry.is_dir(follow_symlinks=follow_symlinks) is_file = entry.is_file(follow_symlinks=follow_symlinks) no_filter = any([None is filter_kind, filter_kind and is_dir, not filter_kind and is_file]) - if ((rc_exc.search(entry.name), True)[not exclude] - and (rc_exc_dir.search(entry.name), True)[not exclude_dirs or not is_dir] - and (rc_inc.search(entry.name), True)[not include] + if ((not exclude or rc_exc.search(entry.name)) + and (not exclude_dirs or not is_dir or rc_exc_dir.search(entry.name)) + and (not include or rc_inc.search(entry.name)) and (no_filter or (not filter_kind and is_dir and recurse))): if is_dir and exclude_folders_with_files and any(os.path.isfile(os.path.join(entry.path, e_f)) for e_f in exclude_folders_with_files): @@ -1117,8 +1126,11 @@ def scantree(path, # type: AnyStr f' "{", ".join(exclude_folders_with_files)}"') continue if recurse and is_dir: - for subentry in scantree(entry.path, exclude, exclude_dirs, include, follow_symlinks, filter_kind, - recurse): + for subentry in scantree( + path=entry.path, exclude=exclude, exclude_dirs=exclude_dirs, include=include, + follow_symlinks=follow_symlinks, filter_kind=filter_kind, recurse=recurse, + exclude_folders_with_files=exclude_folders_with_files, internal_call=True, + rc_exc=rc_exc, rc_exc_dir=rc_exc_dir, rc_inc=rc_inc): yield subentry if no_filter: yield entry From 957acb9c695090b048f864197a90e4b1cba5770c Mon Sep 17 00:00:00 2001 From: Prinz23 Date: Sat, 28 Oct 2023 10:09:36 +0200 Subject: [PATCH 4/4] make sure that scantree is using key words arg --- sickgear/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sickgear/helpers.py b/sickgear/helpers.py index 81fc637b..0abbe5ae 100644 --- a/sickgear/helpers.py +++ b/sickgear/helpers.py @@ -1016,7 +1016,7 @@ def clear_cache(force=False): dirty = None del_time = SGDatetime.timestamp_near(td=datetime.timedelta(hours=12)) direntry_args = dict(follow_symlinks=False) - for direntry in scantree(sickgear.CACHE_DIR, ['images|rss|zoneinfo'], follow_symlinks=True): + for direntry in scantree(sickgear.CACHE_DIR, exclude_dirs=['images|rss|zoneinfo'], follow_symlinks=True): if direntry.is_file(**direntry_args) and (force or del_time > direntry.stat(**direntry_args).st_mtime): dirty = dirty or False if remove_file_perm(direntry.path) else True elif direntry.is_dir(**direntry_args) and direntry.name not in ['cheetah', 'sessions', 'indexers']: