2017-10-09 00:32:58 +00:00
|
|
|
# Copyright 2017 Virgil Dupras
|
2014-10-05 20:31:16 +00:00
|
|
|
#
|
2015-01-03 21:33:16 +00:00
|
|
|
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
2014-10-05 20:31:16 +00:00
|
|
|
# which should be included with this package. The terms are also available at
|
2015-01-03 21:33:16 +00:00
|
|
|
# http://www.gnu.org/licenses/gpl-3.0.html
|
2009-06-18 18:37:53 +00:00
|
|
|
|
2017-10-09 00:32:58 +00:00
|
|
|
import os
|
2010-08-15 12:42:55 +00:00
|
|
|
from xml.etree import ElementTree as ET
|
2011-01-26 11:50:44 +00:00
|
|
|
import logging
|
2009-06-01 09:55:11 +00:00
|
|
|
|
2014-10-05 20:31:16 +00:00
|
|
|
from hscommon.jobprogress import job
|
2011-01-11 10:59:53 +00:00
|
|
|
from hscommon.path import Path
|
2011-01-11 12:36:05 +00:00
|
|
|
from hscommon.util import FileOrPath
|
2021-08-28 09:05:07 +00:00
|
|
|
from hscommon.trans import tr
|
2009-06-01 09:55:11 +00:00
|
|
|
|
2009-10-23 12:56:52 +00:00
|
|
|
from . import fs
|
|
|
|
|
2013-08-18 22:36:09 +00:00
|
|
|
__all__ = [
|
2020-01-01 02:16:27 +00:00
|
|
|
"Directories",
|
|
|
|
"DirectoryState",
|
|
|
|
"AlreadyThereError",
|
|
|
|
"InvalidPathError",
|
2013-08-18 22:36:09 +00:00
|
|
|
]
|
|
|
|
|
2020-01-01 02:16:27 +00:00
|
|
|
|
2011-04-12 11:22:29 +00:00
|
|
|
class DirectoryState:
|
2013-08-18 22:36:09 +00:00
|
|
|
"""Enum describing how a folder should be considered.
|
2014-10-05 20:31:16 +00:00
|
|
|
|
2013-08-18 22:36:09 +00:00
|
|
|
* DirectoryState.Normal: Scan all files normally
|
|
|
|
* DirectoryState.Reference: Scan files, but make sure never to delete any of them
|
|
|
|
* DirectoryState.Excluded: Don't scan this folder
|
|
|
|
"""
|
2020-01-01 02:16:27 +00:00
|
|
|
|
2021-08-21 23:02:02 +00:00
|
|
|
NORMAL = 0
|
|
|
|
REFERENCE = 1
|
|
|
|
EXCLUDED = 2
|
2009-06-01 09:55:11 +00:00
|
|
|
|
2020-01-01 02:16:27 +00:00
|
|
|
|
2009-06-01 09:55:11 +00:00
|
|
|
class AlreadyThereError(Exception):
|
|
|
|
"""The path being added is already in the directory list"""
|
|
|
|
|
2020-01-01 02:16:27 +00:00
|
|
|
|
2009-06-01 09:55:11 +00:00
|
|
|
class InvalidPathError(Exception):
|
|
|
|
"""The path being added is invalid"""
|
|
|
|
|
2020-01-01 02:16:27 +00:00
|
|
|
|
2011-01-26 11:50:44 +00:00
|
|
|
class Directories:
|
2013-08-18 22:36:09 +00:00
|
|
|
"""Holds user folder selection.
|
2014-10-05 20:31:16 +00:00
|
|
|
|
2013-08-18 22:36:09 +00:00
|
|
|
Manages the selection that the user make through the folder selection dialog. It also manages
|
|
|
|
folder states, and how recursion applies to them.
|
2014-10-05 20:31:16 +00:00
|
|
|
|
2013-08-18 22:36:09 +00:00
|
|
|
Then, when the user starts the scan, :meth:`get_files` is called to retrieve all files (wrapped
|
|
|
|
in :mod:`core.fs`) that have to be scanned according to the chosen folders/states.
|
|
|
|
"""
|
2020-01-01 02:16:27 +00:00
|
|
|
|
|
|
|
# ---Override
|
2020-08-20 00:46:06 +00:00
|
|
|
def __init__(self, exclude_list=None):
|
2009-06-01 09:55:11 +00:00
|
|
|
self._dirs = []
|
2014-03-15 21:31:33 +00:00
|
|
|
# {path: state}
|
2009-06-01 09:55:11 +00:00
|
|
|
self.states = {}
|
2020-08-20 00:46:06 +00:00
|
|
|
self._exclude_list = exclude_list
|
2014-10-05 20:31:16 +00:00
|
|
|
|
2009-10-23 12:56:52 +00:00
|
|
|
def __contains__(self, path):
|
|
|
|
for p in self._dirs:
|
|
|
|
if path in p:
|
2009-06-01 09:55:11 +00:00
|
|
|
return True
|
|
|
|
return False
|
2014-10-05 20:31:16 +00:00
|
|
|
|
2014-10-13 19:08:59 +00:00
|
|
|
def __delitem__(self, key):
|
2009-06-01 09:55:11 +00:00
|
|
|
self._dirs.__delitem__(key)
|
2014-10-05 20:31:16 +00:00
|
|
|
|
2014-10-13 19:08:59 +00:00
|
|
|
def __getitem__(self, key):
|
2009-06-01 09:55:11 +00:00
|
|
|
return self._dirs.__getitem__(key)
|
2014-10-05 20:31:16 +00:00
|
|
|
|
2009-06-01 09:55:11 +00:00
|
|
|
def __len__(self):
|
|
|
|
return len(self._dirs)
|
2014-10-05 20:31:16 +00:00
|
|
|
|
2020-01-01 02:16:27 +00:00
|
|
|
# ---Private
|
2020-08-20 00:46:06 +00:00
|
|
|
def _default_state_for_path(self, path):
|
|
|
|
# New logic with regex filters
|
2020-08-29 01:57:00 +00:00
|
|
|
if self._exclude_list is not None and self._exclude_list.mark_count > 0:
|
2020-08-20 00:46:06 +00:00
|
|
|
# We iterate even if we only have one item here
|
2020-08-29 01:57:00 +00:00
|
|
|
for denied_path_re in self._exclude_list.compiled:
|
|
|
|
if denied_path_re.match(str(path.name)):
|
2021-08-21 23:02:02 +00:00
|
|
|
return DirectoryState.EXCLUDED
|
2020-08-29 01:57:00 +00:00
|
|
|
# return # We still use the old logic to force state on hidden dirs
|
2009-06-09 15:35:17 +00:00
|
|
|
# Override this in subclasses to specify the state of some special folders.
|
2020-08-20 00:46:06 +00:00
|
|
|
if path.name.startswith("."):
|
2021-08-21 23:02:02 +00:00
|
|
|
return DirectoryState.EXCLUDED
|
2014-10-05 20:31:16 +00:00
|
|
|
|
2020-08-20 00:46:06 +00:00
|
|
|
def _get_files(self, from_path, fileclasses, j):
|
2017-10-09 00:32:58 +00:00
|
|
|
for root, dirs, files in os.walk(str(from_path)):
|
|
|
|
j.check_if_cancelled()
|
2021-08-21 23:02:02 +00:00
|
|
|
root_path = Path(root)
|
|
|
|
state = self.get_state(root_path)
|
|
|
|
if state == DirectoryState.EXCLUDED and not any(p[: len(root_path)] == root_path for p in self.states):
|
2017-10-09 00:32:58 +00:00
|
|
|
# Recursively get files from folders with lots of subfolder is expensive. However, there
|
|
|
|
# might be a subfolder in this path that is not excluded. What we want to do is to skim
|
|
|
|
# through self.states and see if we must continue, or we can stop right here to save time
|
2021-08-21 23:02:02 +00:00
|
|
|
del dirs[:]
|
2017-10-09 00:32:58 +00:00
|
|
|
try:
|
2021-08-21 23:02:02 +00:00
|
|
|
if state != DirectoryState.EXCLUDED:
|
2020-08-20 00:46:06 +00:00
|
|
|
# Old logic
|
2020-08-29 01:57:00 +00:00
|
|
|
if self._exclude_list is None or not self._exclude_list.mark_count:
|
2021-08-21 23:02:02 +00:00
|
|
|
found_files = [fs.get_file(root_path + f, fileclasses=fileclasses) for f in files]
|
2020-08-20 00:46:06 +00:00
|
|
|
else:
|
|
|
|
found_files = []
|
2020-08-29 01:57:00 +00:00
|
|
|
# print(f"len of files: {len(files)} {files}")
|
2020-08-20 00:46:06 +00:00
|
|
|
for f in files:
|
2021-06-18 23:52:31 +00:00
|
|
|
if not self._exclude_list.is_excluded(root, f):
|
2021-08-21 23:02:02 +00:00
|
|
|
found_files.append(fs.get_file(root_path + f, fileclasses=fileclasses))
|
2017-10-09 00:32:58 +00:00
|
|
|
found_files = [f for f in found_files if f is not None]
|
|
|
|
# In some cases, directories can be considered as files by dupeGuru, which is
|
|
|
|
# why we have this line below. In fact, there only one case: Bundle files under
|
|
|
|
# OS X... In other situations, this forloop will do nothing.
|
|
|
|
for d in dirs[:]:
|
2021-08-21 23:02:02 +00:00
|
|
|
f = fs.get_file(root_path + d, fileclasses=fileclasses)
|
2017-10-09 00:32:58 +00:00
|
|
|
if f is not None:
|
|
|
|
found_files.append(f)
|
|
|
|
dirs.remove(d)
|
2020-01-01 02:16:27 +00:00
|
|
|
logging.debug(
|
|
|
|
"Collected %d files in folder %s",
|
|
|
|
len(found_files),
|
2021-08-21 23:02:02 +00:00
|
|
|
str(root_path),
|
2020-01-01 02:16:27 +00:00
|
|
|
)
|
2017-10-09 00:32:58 +00:00
|
|
|
for file in found_files:
|
2021-08-21 23:02:02 +00:00
|
|
|
file.is_ref = state == DirectoryState.REFERENCE
|
2017-10-09 00:32:58 +00:00
|
|
|
yield file
|
|
|
|
except (EnvironmentError, fs.InvalidPath):
|
|
|
|
pass
|
2014-10-05 20:31:16 +00:00
|
|
|
|
2020-08-20 00:46:06 +00:00
|
|
|
def _get_folders(self, from_folder, j):
|
2011-07-11 18:18:55 +00:00
|
|
|
j.check_if_cancelled()
|
2011-04-12 11:22:29 +00:00
|
|
|
try:
|
|
|
|
for subfolder in from_folder.subfolders:
|
2011-07-11 18:18:55 +00:00
|
|
|
for folder in self._get_folders(subfolder, j):
|
2011-04-12 11:22:29 +00:00
|
|
|
yield folder
|
2011-04-14 13:37:12 +00:00
|
|
|
state = self.get_state(from_folder.path)
|
2021-08-21 23:02:02 +00:00
|
|
|
if state != DirectoryState.EXCLUDED:
|
|
|
|
from_folder.is_ref = state == DirectoryState.REFERENCE
|
2011-04-14 13:37:12 +00:00
|
|
|
logging.debug("Yielding Folder %r state: %d", from_folder, state)
|
2011-04-12 11:22:29 +00:00
|
|
|
yield from_folder
|
|
|
|
except (EnvironmentError, fs.InvalidPath):
|
|
|
|
pass
|
2014-10-05 20:31:16 +00:00
|
|
|
|
2020-01-01 02:16:27 +00:00
|
|
|
# ---Public
|
2009-06-01 09:55:11 +00:00
|
|
|
def add_path(self, path):
|
2013-08-18 22:36:09 +00:00
|
|
|
"""Adds ``path`` to self, if not already there.
|
2014-10-05 20:31:16 +00:00
|
|
|
|
2013-08-18 22:36:09 +00:00
|
|
|
Raises :exc:`AlreadyThereError` if ``path`` is already in self. If path is a directory
|
|
|
|
containing some of the directories already present in self, ``path`` will be added, but all
|
|
|
|
directories under it will be removed. Can also raise :exc:`InvalidPathError` if ``path``
|
|
|
|
does not exist.
|
2014-10-05 20:31:16 +00:00
|
|
|
|
2013-08-18 22:36:09 +00:00
|
|
|
:param Path path: path to add
|
2009-06-01 09:55:11 +00:00
|
|
|
"""
|
|
|
|
if path in self:
|
2009-10-23 12:56:52 +00:00
|
|
|
raise AlreadyThereError()
|
2012-08-09 14:53:24 +00:00
|
|
|
if not path.exists():
|
2009-06-18 18:37:53 +00:00
|
|
|
raise InvalidPathError()
|
2009-10-23 12:56:52 +00:00
|
|
|
self._dirs = [p for p in self._dirs if p not in path]
|
|
|
|
self._dirs.append(path)
|
2014-10-05 20:31:16 +00:00
|
|
|
|
2009-10-23 12:56:52 +00:00
|
|
|
@staticmethod
|
|
|
|
def get_subfolders(path):
|
2013-08-18 22:36:09 +00:00
|
|
|
"""Returns a sorted list of paths corresponding to subfolders in ``path``.
|
2014-10-05 20:31:16 +00:00
|
|
|
|
2013-08-18 22:36:09 +00:00
|
|
|
:param Path path: get subfolders from there
|
|
|
|
:rtype: list of Path
|
|
|
|
"""
|
2009-10-23 12:56:52 +00:00
|
|
|
try:
|
2013-11-16 17:06:16 +00:00
|
|
|
subpaths = [p for p in path.listdir() if p.isdir()]
|
2014-10-13 19:08:59 +00:00
|
|
|
subpaths.sort(key=lambda x: x.name.lower())
|
2013-11-16 17:06:16 +00:00
|
|
|
return subpaths
|
2009-10-23 12:56:52 +00:00
|
|
|
except EnvironmentError:
|
|
|
|
return []
|
2014-10-05 20:31:16 +00:00
|
|
|
|
2020-08-20 00:46:06 +00:00
|
|
|
def get_files(self, fileclasses=None, j=job.nulljob):
|
2009-06-01 09:55:11 +00:00
|
|
|
"""Returns a list of all files that are not excluded.
|
2014-10-05 20:31:16 +00:00
|
|
|
|
2013-08-18 22:36:09 +00:00
|
|
|
Returned files also have their ``is_ref`` attr set if applicable.
|
2009-06-01 09:55:11 +00:00
|
|
|
"""
|
2016-05-29 21:15:55 +00:00
|
|
|
if fileclasses is None:
|
|
|
|
fileclasses = [fs.File]
|
2021-08-28 09:05:07 +00:00
|
|
|
file_count = 0
|
2009-10-23 12:56:52 +00:00
|
|
|
for path in self._dirs:
|
2020-08-20 00:46:06 +00:00
|
|
|
for file in self._get_files(path, fileclasses=fileclasses, j=j):
|
2021-08-28 09:05:07 +00:00
|
|
|
file_count += 1
|
|
|
|
if type(j) != job.NullJob:
|
|
|
|
j.set_progress(-1, tr("Collected {} files to scan").format(file_count))
|
2009-10-23 12:56:52 +00:00
|
|
|
yield file
|
2014-10-05 20:31:16 +00:00
|
|
|
|
2016-05-29 21:15:55 +00:00
|
|
|
def get_folders(self, folderclass=None, j=job.nulljob):
|
2011-04-12 11:22:29 +00:00
|
|
|
"""Returns a list of all folders that are not excluded.
|
2014-10-05 20:31:16 +00:00
|
|
|
|
2013-08-18 22:36:09 +00:00
|
|
|
Returned folders also have their ``is_ref`` attr set if applicable.
|
2011-04-12 11:22:29 +00:00
|
|
|
"""
|
2016-05-29 21:15:55 +00:00
|
|
|
if folderclass is None:
|
|
|
|
folderclass = fs.Folder
|
2021-08-28 09:05:07 +00:00
|
|
|
folder_count = 0
|
2011-04-12 11:22:29 +00:00
|
|
|
for path in self._dirs:
|
2016-05-29 21:15:55 +00:00
|
|
|
from_folder = folderclass(path)
|
2011-07-11 18:18:55 +00:00
|
|
|
for folder in self._get_folders(from_folder, j):
|
2021-08-28 09:05:07 +00:00
|
|
|
folder_count += 1
|
|
|
|
if type(j) != job.NullJob:
|
|
|
|
j.set_progress(-1, tr("Collected {} folders to scan").format(folder_count))
|
2011-04-12 11:22:29 +00:00
|
|
|
yield folder
|
2014-10-05 20:31:16 +00:00
|
|
|
|
2020-08-20 00:46:06 +00:00
|
|
|
def get_state(self, path):
|
2013-08-18 22:36:09 +00:00
|
|
|
"""Returns the state of ``path``.
|
2014-10-05 20:31:16 +00:00
|
|
|
|
2013-08-18 22:36:09 +00:00
|
|
|
:rtype: :class:`DirectoryState`
|
2009-06-01 09:55:11 +00:00
|
|
|
"""
|
2017-10-09 00:32:58 +00:00
|
|
|
# direct match? easy result.
|
2009-06-18 18:37:53 +00:00
|
|
|
if path in self.states:
|
2009-06-01 09:55:11 +00:00
|
|
|
return self.states[path]
|
2021-08-21 23:02:02 +00:00
|
|
|
state = self._default_state_for_path(path) or DirectoryState.NORMAL
|
2020-08-29 01:57:00 +00:00
|
|
|
# Save non-default states in cache, necessary for _get_files()
|
2021-08-21 23:02:02 +00:00
|
|
|
if state != DirectoryState.NORMAL:
|
2020-08-29 01:57:00 +00:00
|
|
|
self.states[path] = state
|
|
|
|
return state
|
|
|
|
|
2017-10-09 00:32:58 +00:00
|
|
|
prevlen = 0
|
|
|
|
# we loop through the states to find the longest matching prefix
|
2020-08-29 01:57:00 +00:00
|
|
|
# if the parent has a state in cache, return that state
|
2017-10-09 00:32:58 +00:00
|
|
|
for p, s in self.states.items():
|
|
|
|
if p.is_parent_of(path) and len(p) > prevlen:
|
|
|
|
prevlen = len(p)
|
|
|
|
state = s
|
|
|
|
return state
|
2014-10-05 20:31:16 +00:00
|
|
|
|
2010-08-15 13:07:44 +00:00
|
|
|
def has_any_file(self):
|
2013-08-18 22:36:09 +00:00
|
|
|
"""Returns whether selected folders contain any file.
|
2014-10-05 20:31:16 +00:00
|
|
|
|
2013-08-18 22:36:09 +00:00
|
|
|
Because it stops at the first file it finds, it's much faster than get_files().
|
2014-10-05 20:31:16 +00:00
|
|
|
|
2013-08-18 22:36:09 +00:00
|
|
|
:rtype: bool
|
|
|
|
"""
|
2010-08-15 13:07:44 +00:00
|
|
|
try:
|
|
|
|
next(self.get_files())
|
|
|
|
return True
|
|
|
|
except StopIteration:
|
|
|
|
return False
|
2014-10-05 20:31:16 +00:00
|
|
|
|
2009-06-18 18:37:53 +00:00
|
|
|
def load_from_file(self, infile):
|
2013-08-18 22:36:09 +00:00
|
|
|
"""Load folder selection from ``infile``.
|
2014-10-05 20:31:16 +00:00
|
|
|
|
2013-08-18 22:36:09 +00:00
|
|
|
:param file infile: path or file pointer to XML generated through :meth:`save_to_file`
|
|
|
|
"""
|
2009-06-01 09:55:11 +00:00
|
|
|
try:
|
2010-08-15 12:42:55 +00:00
|
|
|
root = ET.parse(infile).getroot()
|
|
|
|
except Exception:
|
2009-06-01 09:55:11 +00:00
|
|
|
return
|
2020-06-30 05:51:06 +00:00
|
|
|
for rdn in root.iter("root_directory"):
|
2010-03-01 11:21:43 +00:00
|
|
|
attrib = rdn.attrib
|
2020-01-01 02:16:27 +00:00
|
|
|
if "path" not in attrib:
|
2009-06-18 18:37:53 +00:00
|
|
|
continue
|
2020-01-01 02:16:27 +00:00
|
|
|
path = attrib["path"]
|
2009-06-01 09:55:11 +00:00
|
|
|
try:
|
|
|
|
self.add_path(Path(path))
|
2009-06-18 18:37:53 +00:00
|
|
|
except (AlreadyThereError, InvalidPathError):
|
2009-06-01 09:55:11 +00:00
|
|
|
pass
|
2020-06-30 05:51:06 +00:00
|
|
|
for sn in root.iter("state"):
|
2010-03-01 11:21:43 +00:00
|
|
|
attrib = sn.attrib
|
2020-01-01 02:16:27 +00:00
|
|
|
if not ("path" in attrib and "value" in attrib):
|
2009-06-01 09:55:11 +00:00
|
|
|
continue
|
2020-01-01 02:16:27 +00:00
|
|
|
path = attrib["path"]
|
|
|
|
state = attrib["value"]
|
2014-03-15 21:31:33 +00:00
|
|
|
self.states[Path(path)] = int(state)
|
2014-10-05 20:31:16 +00:00
|
|
|
|
2010-03-01 11:21:43 +00:00
|
|
|
def save_to_file(self, outfile):
|
2013-08-18 22:36:09 +00:00
|
|
|
"""Save folder selection as XML to ``outfile``.
|
2014-10-05 20:31:16 +00:00
|
|
|
|
2013-08-18 22:36:09 +00:00
|
|
|
:param file outfile: path or file pointer to XML file to save to.
|
|
|
|
"""
|
2020-01-01 02:16:27 +00:00
|
|
|
with FileOrPath(outfile, "wb") as fp:
|
|
|
|
root = ET.Element("directories")
|
2009-10-23 12:56:52 +00:00
|
|
|
for root_path in self:
|
2020-01-01 02:16:27 +00:00
|
|
|
root_path_node = ET.SubElement(root, "root_directory")
|
|
|
|
root_path_node.set("path", str(root_path))
|
2010-08-11 14:39:06 +00:00
|
|
|
for path, state in self.states.items():
|
2020-01-01 02:16:27 +00:00
|
|
|
state_node = ET.SubElement(root, "state")
|
|
|
|
state_node.set("path", str(path))
|
|
|
|
state_node.set("value", str(state))
|
2010-08-15 12:42:55 +00:00
|
|
|
tree = ET.ElementTree(root)
|
2020-01-01 02:16:27 +00:00
|
|
|
tree.write(fp, encoding="utf-8")
|
2014-10-05 20:31:16 +00:00
|
|
|
|
2009-06-18 18:37:53 +00:00
|
|
|
def set_state(self, path, state):
|
2013-08-18 22:36:09 +00:00
|
|
|
"""Set the state of folder at ``path``.
|
2014-10-05 20:31:16 +00:00
|
|
|
|
2013-08-18 22:36:09 +00:00
|
|
|
:param Path path: path of the target folder
|
|
|
|
:param state: state to set folder to
|
|
|
|
:type state: :class:`DirectoryState`
|
|
|
|
"""
|
2009-06-18 18:37:53 +00:00
|
|
|
if self.get_state(path) == state:
|
2009-06-18 18:13:45 +00:00
|
|
|
return
|
2014-03-15 21:31:33 +00:00
|
|
|
for iter_path in list(self.states.keys()):
|
|
|
|
if path.is_parent_of(iter_path):
|
|
|
|
del self.states[iter_path]
|
2009-06-18 18:13:45 +00:00
|
|
|
self.states[path] = state
|