Code cleanups in core and other affected files

This commit is contained in:
Andrew Senetar 2021-08-21 18:02:02 -05:00
parent 1ef5f56158
commit d576a7043c
Signed by: arsenetar
GPG Key ID: C63300DCE48AB2F1
27 changed files with 281 additions and 293 deletions

View File

@ -48,31 +48,31 @@ MSG_MANY_FILES_TO_OPEN = tr(
class DestType:
Direct = 0
Relative = 1
Absolute = 2
DIRECT = 0
RELATIVE = 1
ABSOLUTE = 2
class JobType:
Scan = "job_scan"
Load = "job_load"
Move = "job_move"
Copy = "job_copy"
Delete = "job_delete"
SCAN = "job_scan"
LOAD = "job_load"
MOVE = "job_move"
COPY = "job_copy"
DELETE = "job_delete"
class AppMode:
Standard = 0
Music = 1
Picture = 2
STANDARD = 0
MUSIC = 1
PICTURE = 2
JOBID2TITLE = {
JobType.Scan: tr("Scanning for duplicates"),
JobType.Load: tr("Loading"),
JobType.Move: tr("Moving"),
JobType.Copy: tr("Copying"),
JobType.Delete: tr("Sending to Trash"),
JobType.SCAN: tr("Scanning for duplicates"),
JobType.LOAD: tr("Loading"),
JobType.MOVE: tr("Moving"),
JobType.COPY: tr("Copying"),
JobType.DELETE: tr("Sending to Trash"),
}
@ -135,7 +135,7 @@ class DupeGuru(Broadcaster):
self.appdata = desktop.special_folder_path(desktop.SpecialFolder.APPDATA, appname=self.NAME, portable=portable)
if not op.exists(self.appdata):
os.makedirs(self.appdata)
self.app_mode = AppMode.Standard
self.app_mode = AppMode.STANDARD
self.discarded_file_count = 0
self.exclude_list = ExcludeList()
self.directories = directories.Directories(self.exclude_list)
@ -148,7 +148,7 @@ class DupeGuru(Broadcaster):
"escape_filter_regexp": True,
"clean_empty_dirs": False,
"ignore_hardlink_matches": False,
"copymove_dest_type": DestType.Relative,
"copymove_dest_type": DestType.RELATIVE,
"picture_cache_type": self.PICTURE_CACHE_TYPE,
}
self.selected_dupes = []
@ -169,9 +169,9 @@ class DupeGuru(Broadcaster):
def _recreate_result_table(self):
if self.result_table is not None:
self.result_table.disconnect()
if self.app_mode == AppMode.Picture:
if self.app_mode == AppMode.PICTURE:
self.result_table = pe.result_table.ResultTable(self)
elif self.app_mode == AppMode.Music:
elif self.app_mode == AppMode.MUSIC:
self.result_table = me.result_table.ResultTable(self)
else:
self.result_table = se.result_table.ResultTable(self)
@ -184,15 +184,13 @@ class DupeGuru(Broadcaster):
return op.join(self.appdata, cache_name)
def _get_dupe_sort_key(self, dupe, get_group, key, delta):
if self.app_mode in (AppMode.Music, AppMode.Picture):
if key == "folder_path":
dupe_folder_path = getattr(dupe, "display_folder_path", dupe.folder_path)
return str(dupe_folder_path).lower()
if self.app_mode == AppMode.Picture:
if delta and key == "dimensions":
r = cmp_value(dupe, key)
ref_value = cmp_value(get_group().ref, key)
return get_delta_dimensions(r, ref_value)
if self.app_mode in (AppMode.MUSIC, AppMode.PICTURE) and key == "folder_path":
dupe_folder_path = getattr(dupe, "display_folder_path", dupe.folder_path)
return str(dupe_folder_path).lower()
if self.app_mode == AppMode.PICTURE and delta and key == "dimensions":
r = cmp_value(dupe, key)
ref_value = cmp_value(get_group().ref, key)
return get_delta_dimensions(r, ref_value)
if key == "marked":
return self.results.is_marked(dupe)
if key == "percentage":
@ -212,10 +210,9 @@ class DupeGuru(Broadcaster):
return result
def _get_group_sort_key(self, group, key):
if self.app_mode in (AppMode.Music, AppMode.Picture):
if key == "folder_path":
dupe_folder_path = getattr(group.ref, "display_folder_path", group.ref.folder_path)
return str(dupe_folder_path).lower()
if self.app_mode in (AppMode.MUSIC, AppMode.PICTURE) and key == "folder_path":
dupe_folder_path = getattr(group.ref, "display_folder_path", group.ref.folder_path)
return str(dupe_folder_path).lower()
if key == "percentage":
return group.percentage
if key == "dupe_count":
@ -294,32 +291,32 @@ class DupeGuru(Broadcaster):
self.view.show_message(msg)
def _job_completed(self, jobid):
if jobid == JobType.Scan:
if jobid == JobType.SCAN:
self._results_changed()
if not self.results.groups:
self.view.show_message(tr("No duplicates found."))
else:
self.view.show_results_window()
if jobid in {JobType.Move, JobType.Delete}:
if jobid in {JobType.MOVE, JobType.DELETE}:
self._results_changed()
if jobid == JobType.Load:
if jobid == JobType.LOAD:
self._recreate_result_table()
self._results_changed()
self.view.show_results_window()
if jobid in {JobType.Copy, JobType.Move, JobType.Delete}:
if jobid in {JobType.COPY, JobType.MOVE, JobType.DELETE}:
if self.results.problems:
self.problem_dialog.refresh()
self.view.show_problem_dialog()
else:
msg = {
JobType.Copy: tr("All marked files were copied successfully."),
JobType.Move: tr("All marked files were moved successfully."),
JobType.Delete: tr("All marked files were successfully sent to Trash."),
JobType.COPY: tr("All marked files were copied successfully."),
JobType.MOVE: tr("All marked files were moved successfully."),
JobType.DELETE: tr("All marked files were successfully sent to Trash."),
}[jobid]
self.view.show_message(msg)
def _job_error(self, jobid, err):
if jobid == JobType.Load:
if jobid == JobType.LOAD:
msg = tr("Could not load file: {}").format(err)
self.view.show_message(msg)
return False
@ -349,17 +346,17 @@ class DupeGuru(Broadcaster):
# --- Protected
def _get_fileclasses(self):
if self.app_mode == AppMode.Picture:
if self.app_mode == AppMode.PICTURE:
return [pe.photo.PLAT_SPECIFIC_PHOTO_CLASS]
elif self.app_mode == AppMode.Music:
elif self.app_mode == AppMode.MUSIC:
return [me.fs.MusicFile]
else:
return [se.fs.File]
def _prioritization_categories(self):
if self.app_mode == AppMode.Picture:
if self.app_mode == AppMode.PICTURE:
return pe.prioritize.all_categories()
elif self.app_mode == AppMode.Music:
elif self.app_mode == AppMode.MUSIC:
return me.prioritize.all_categories()
else:
return prioritize.all_categories()
@ -397,16 +394,16 @@ class DupeGuru(Broadcaster):
self.remove_duplicates(dupes)
self.ignore_list_dialog.refresh()
def apply_filter(self, filter):
def apply_filter(self, result_filter):
"""Apply a filter ``filter`` to the results so that it shows only dupe groups that match it.
:param str filter: filter to apply
"""
self.results.apply_filter(None)
if self.options["escape_filter_regexp"]:
filter = escape(filter, set("()[]\\.|+?^"))
filter = escape(filter, "*", ".")
self.results.apply_filter(filter)
result_filter = escape(result_filter, set("()[]\\.|+?^"))
result_filter = escape(result_filter, "*", ".")
self.results.apply_filter(result_filter)
self._results_changed()
def clean_empty_dirs(self, path):
@ -424,10 +421,10 @@ class DupeGuru(Broadcaster):
source_path = dupe.path
location_path = first(p for p in self.directories if dupe.path in p)
dest_path = Path(destination)
if dest_type in {DestType.Relative, DestType.Absolute}:
if dest_type in {DestType.RELATIVE, DestType.ABSOLUTE}:
# no filename, no windows drive letter
source_base = source_path.remove_drive_letter().parent()
if dest_type == DestType.Relative:
if dest_type == DestType.RELATIVE:
source_base = source_base[location_path:]
dest_path = dest_path[source_base]
if not dest_path.exists():
@ -466,7 +463,7 @@ class DupeGuru(Broadcaster):
)
if destination:
desttype = self.options["copymove_dest_type"]
jobid = JobType.Copy if copy else JobType.Move
jobid = JobType.COPY if copy else JobType.MOVE
self._start_job(jobid, do)
def delete_marked(self):
@ -482,7 +479,7 @@ class DupeGuru(Broadcaster):
self.deletion_options.direct,
]
logging.debug("Starting deletion job with args %r", args)
self._start_job(JobType.Delete, self._do_delete, args=args)
self._start_job(JobType.DELETE, self._do_delete, args=args)
def export_to_xhtml(self):
"""Export current results to XHTML.
@ -582,7 +579,7 @@ class DupeGuru(Broadcaster):
def do(j):
self.results.load_from_xml(filename, self._get_file, j)
self._start_job(JobType.Load, do)
self._start_job(JobType.LOAD, do)
def make_selected_reference(self):
"""Promote :attr:`selected_dupes` to reference position within their respective groups.
@ -786,7 +783,7 @@ class DupeGuru(Broadcaster):
for k, v in self.options.items():
if hasattr(scanner, k):
setattr(scanner, k, v)
if self.app_mode == AppMode.Picture:
if self.app_mode == AppMode.PICTURE:
scanner.cache_path = self._get_picture_cache_path()
self.results.groups = []
self._recreate_result_table()
@ -794,7 +791,7 @@ class DupeGuru(Broadcaster):
def do(j):
j.set_progress(0, tr("Collecting files to scan"))
if scanner.scan_type == ScanType.Folders:
if scanner.scan_type == ScanType.FOLDERS:
files = list(self.directories.get_folders(folderclass=se.fs.Folder, j=j))
else:
files = list(self.directories.get_files(fileclasses=self.fileclasses, j=j))
@ -804,7 +801,7 @@ class DupeGuru(Broadcaster):
self.results.groups = scanner.get_dupe_groups(files, self.ignore_list, j)
self.discarded_file_count = scanner.discarded_file_count
self._start_job(JobType.Scan, do)
self._start_job(JobType.SCAN, do)
def toggle_selected_mark_state(self):
selected = self.without_ref(self.selected_dupes)
@ -849,18 +846,18 @@ class DupeGuru(Broadcaster):
@property
def SCANNER_CLASS(self):
if self.app_mode == AppMode.Picture:
if self.app_mode == AppMode.PICTURE:
return pe.scanner.ScannerPE
elif self.app_mode == AppMode.Music:
elif self.app_mode == AppMode.MUSIC:
return me.scanner.ScannerME
else:
return se.scanner.ScannerSE
@property
def METADATA_TO_READ(self):
if self.app_mode == AppMode.Picture:
if self.app_mode == AppMode.PICTURE:
return ["size", "mtime", "dimensions", "exif_timestamp"]
elif self.app_mode == AppMode.Music:
elif self.app_mode == AppMode.MUSIC:
return [
"size",
"mtime",

View File

@ -30,9 +30,9 @@ class DirectoryState:
* DirectoryState.Excluded: Don't scan this folder
"""
Normal = 0
Reference = 1
Excluded = 2
NORMAL = 0
REFERENCE = 1
EXCLUDED = 2
class AlreadyThereError(Exception):
@ -82,50 +82,49 @@ class Directories:
# We iterate even if we only have one item here
for denied_path_re in self._exclude_list.compiled:
if denied_path_re.match(str(path.name)):
return DirectoryState.Excluded
return DirectoryState.EXCLUDED
# return # We still use the old logic to force state on hidden dirs
# Override this in subclasses to specify the state of some special folders.
if path.name.startswith("."):
return DirectoryState.Excluded
return DirectoryState.EXCLUDED
def _get_files(self, from_path, fileclasses, j):
for root, dirs, files in os.walk(str(from_path)):
j.check_if_cancelled()
rootPath = Path(root)
state = self.get_state(rootPath)
if state == DirectoryState.Excluded:
root_path = Path(root)
state = self.get_state(root_path)
if state == DirectoryState.EXCLUDED and not any(p[: len(root_path)] == root_path for p in self.states):
# Recursively get files from folders with lots of subfolder is expensive. However, there
# might be a subfolder in this path that is not excluded. What we want to do is to skim
# through self.states and see if we must continue, or we can stop right here to save time
if not any(p[: len(rootPath)] == rootPath for p in self.states):
del dirs[:]
del dirs[:]
try:
if state != DirectoryState.Excluded:
if state != DirectoryState.EXCLUDED:
# Old logic
if self._exclude_list is None or not self._exclude_list.mark_count:
found_files = [fs.get_file(rootPath + f, fileclasses=fileclasses) for f in files]
found_files = [fs.get_file(root_path + f, fileclasses=fileclasses) for f in files]
else:
found_files = []
# print(f"len of files: {len(files)} {files}")
for f in files:
if not self._exclude_list.is_excluded(root, f):
found_files.append(fs.get_file(rootPath + f, fileclasses=fileclasses))
found_files.append(fs.get_file(root_path + f, fileclasses=fileclasses))
found_files = [f for f in found_files if f is not None]
# In some cases, directories can be considered as files by dupeGuru, which is
# why we have this line below. In fact, there only one case: Bundle files under
# OS X... In other situations, this forloop will do nothing.
for d in dirs[:]:
f = fs.get_file(rootPath + d, fileclasses=fileclasses)
f = fs.get_file(root_path + d, fileclasses=fileclasses)
if f is not None:
found_files.append(f)
dirs.remove(d)
logging.debug(
"Collected %d files in folder %s",
len(found_files),
str(rootPath),
str(root_path),
)
for file in found_files:
file.is_ref = state == DirectoryState.Reference
file.is_ref = state == DirectoryState.REFERENCE
yield file
except (EnvironmentError, fs.InvalidPath):
pass
@ -137,8 +136,8 @@ class Directories:
for folder in self._get_folders(subfolder, j):
yield folder
state = self.get_state(from_folder.path)
if state != DirectoryState.Excluded:
from_folder.is_ref = state == DirectoryState.Reference
if state != DirectoryState.EXCLUDED:
from_folder.is_ref = state == DirectoryState.REFERENCE
logging.debug("Yielding Folder %r state: %d", from_folder, state)
yield from_folder
except (EnvironmentError, fs.InvalidPath):
@ -207,9 +206,9 @@ class Directories:
# direct match? easy result.
if path in self.states:
return self.states[path]
state = self._default_state_for_path(path) or DirectoryState.Normal
state = self._default_state_for_path(path) or DirectoryState.NORMAL
# Save non-default states in cache, necessary for _get_files()
if state != DirectoryState.Normal:
if state != DirectoryState.NORMAL:
self.states[path] = state
return state

View File

@ -106,14 +106,14 @@ def compare_fields(first, second, flags=()):
# We don't want to remove field directly in the list. We must work on a copy.
second = second[:]
for field1 in first:
max = 0
max_score = 0
matched_field = None
for field2 in second:
r = compare(field1, field2, flags)
if r > max:
max = r
if r > max_score:
max_score = r
matched_field = field2
results.append(max)
results.append(max_score)
if matched_field:
second.remove(matched_field)
else:

View File

@ -150,10 +150,7 @@ class ExcludeList(Markable):
# @timer
@memoize
def _do_compile(self, expr):
try:
return re.compile(expr)
except Exception as e:
raise (e)
return re.compile(expr)
# @timer
# @memoize # probably not worth memoizing this one if we memoize the above
@ -235,7 +232,7 @@ class ExcludeList(Markable):
# This exception should never be ignored
raise AlreadyThereException()
if regex in forbidden_regexes:
raise Exception("Forbidden (dangerous) expression.")
raise ValueError("Forbidden (dangerous) expression.")
iscompilable, exception, compiled = self.compile_re(regex)
if not iscompilable and not forced:

View File

@ -168,7 +168,6 @@ class File:
setattr(self, field, md5.digest())
except Exception as e:
logging.error(f"Error computing md5samples: {e}")
pass
def _read_all_info(self, attrnames=None):
"""Cache all possible info.

View File

@ -44,5 +44,4 @@ class DetailsPanel(GUIObject, DupeGuruGUIObject):
# --- Event Handlers
def dupes_selected(self):
self._refresh()
self.view.refresh()
self._view_updated()

View File

@ -11,7 +11,7 @@ from hscommon.gui.tree import Tree, Node
from ..directories import DirectoryState
from .base import DupeGuruGUIObject
STATE_ORDER = [DirectoryState.Normal, DirectoryState.Reference, DirectoryState.Excluded]
STATE_ORDER = [DirectoryState.NORMAL, DirectoryState.REFERENCE, DirectoryState.EXCLUDED]
# Lazily loads children
@ -86,9 +86,9 @@ class DirectoryTree(Tree, DupeGuruGUIObject):
else:
# All selected nodes or on second-or-more level, exclude them.
nodes = self.selected_nodes
newstate = DirectoryState.Excluded
if all(node.state == DirectoryState.Excluded for node in nodes):
newstate = DirectoryState.Normal
newstate = DirectoryState.EXCLUDED
if all(node.state == DirectoryState.EXCLUDED for node in nodes):
newstate = DirectoryState.NORMAL
for node in nodes:
node.state = newstate
@ -103,5 +103,4 @@ class DirectoryTree(Tree, DupeGuruGUIObject):
# --- Event Handlers
def directories_changed(self):
self._refresh()
self.view.refresh()
self._view_updated()

View File

@ -5,7 +5,6 @@
# which should be included with this package. The terms are also available at
# http://www.gnu.org/licenses/gpl-3.0.html
# from hscommon.trans import tr
from .exclude_list_table import ExcludeListTable
from core.exclude import has_sep
from os import sep
@ -47,10 +46,7 @@ class ExcludeListDialogCore:
return False
def add(self, regex):
try:
self.exclude_list.add(regex)
except Exception as e:
raise (e)
self.exclude_list.add(regex)
self.exclude_list.mark(regex)
self.exclude_list_table.add(regex)

View File

@ -88,9 +88,8 @@ class IgnoreList:
except KeyError:
return False
if not inner(first, second):
if not inner(second, first):
raise ValueError()
if not inner(first, second) and not inner(second, first):
raise ValueError()
def load_from_xml(self, infile):
"""Loads the ignore list from a XML created with save_to_xml.

View File

@ -17,9 +17,9 @@ class ScannerME(ScannerBase):
@staticmethod
def get_scan_options():
return [
ScanOption(ScanType.Filename, tr("Filename")),
ScanOption(ScanType.Fields, tr("Filename - Fields")),
ScanOption(ScanType.FieldsNoOrder, tr("Filename - Fields (No Order)")),
ScanOption(ScanType.Tag, tr("Tags")),
ScanOption(ScanType.Contents, tr("Contents")),
ScanOption(ScanType.FILENAME, tr("Filename")),
ScanOption(ScanType.FIELDS, tr("Filename - Fields")),
ScanOption(ScanType.FIELDSNOORDER, tr("Filename - Fields (No Order)")),
ScanOption(ScanType.TAG, tr("Tags")),
ScanOption(ScanType.CONTENTS, tr("Contents")),
]

View File

@ -193,8 +193,8 @@ class TIFF_file:
self.s2nfunc = s2n_intel if self.endian == INTEL_ENDIAN else s2n_motorola
def s2n(self, offset, length, signed=0, debug=False):
slice = self.data[offset : offset + length]
val = self.s2nfunc(slice)
data_slice = self.data[offset : offset + length]
val = self.s2nfunc(data_slice)
# Sign extension ?
if signed:
msb = 1 << (8 * length - 1)
@ -206,7 +206,7 @@ class TIFF_file:
"Slice for offset %d length %d: %r and value: %d",
offset,
length,
slice,
data_slice,
val,
)
return val
@ -236,10 +236,10 @@ class TIFF_file:
for i in range(entries):
entry = ifd + 2 + 12 * i
tag = self.s2n(entry, 2)
type = self.s2n(entry + 2, 2)
if not 1 <= type <= 10:
entry_type = self.s2n(entry + 2, 2)
if not 1 <= entry_type <= 10:
continue # not handled
typelen = [1, 1, 2, 4, 8, 1, 1, 2, 4, 8][type - 1]
typelen = [1, 1, 2, 4, 8, 1, 1, 2, 4, 8][entry_type - 1]
count = self.s2n(entry + 4, 4)
if count > MAX_COUNT:
logging.debug("Probably corrupt. Aborting.")
@ -247,14 +247,14 @@ class TIFF_file:
offset = entry + 8
if count * typelen > 4:
offset = self.s2n(offset, 4)
if type == 2:
if entry_type == 2:
# Special case: nul-terminated ASCII string
values = str(self.data[offset : offset + count - 1], encoding="latin-1")
else:
values = []
signed = type == 6 or type >= 8
for j in range(count):
if type in {5, 10}:
signed = entry_type == 6 or entry_type >= 8
for _ in range(count):
if entry_type in {5, 10}:
# The type is either 5 or 10
value_j = Fraction(self.s2n(offset, 4, signed), self.s2n(offset + 4, 4, signed))
else:
@ -263,7 +263,7 @@ class TIFF_file:
values.append(value_j)
offset = offset + typelen
# Now "values" is either a string or an array
a.append((tag, type, values))
a.append((tag, entry_type, values))
return a
@ -298,7 +298,7 @@ def get_fields(fp):
T = TIFF_file(data)
# There may be more than one IFD per file, but we only read the first one because others are
# most likely thumbnails.
main_IFD_offset = T.first_IFD()
main_ifd_offset = T.first_IFD()
result = {}
def add_tag_to_result(tag, values):
@ -310,8 +310,8 @@ def get_fields(fp):
return # don't overwrite data
result[stag] = values
logging.debug("IFD at offset %d", main_IFD_offset)
IFD = T.dump_IFD(main_IFD_offset)
logging.debug("IFD at offset %d", main_ifd_offset)
IFD = T.dump_IFD(main_ifd_offset)
exif_off = gps_off = 0
for tag, type, values in IFD:
if tag == 0x8769:

View File

@ -18,12 +18,12 @@ class ScannerPE(Scanner):
@staticmethod
def get_scan_options():
return [
ScanOption(ScanType.FuzzyBlock, tr("Contents")),
ScanOption(ScanType.ExifTimestamp, tr("EXIF Timestamp")),
ScanOption(ScanType.FUZZYBLOCK, tr("Contents")),
ScanOption(ScanType.EXIFTIMESTAMP, tr("EXIF Timestamp")),
]
def _getmatches(self, files, j):
if self.scan_type == ScanType.FuzzyBlock:
if self.scan_type == ScanType.FUZZYBLOCK:
return matchblock.getmatches(
files,
cache_path=self.cache_path,
@ -31,7 +31,7 @@ class ScannerPE(Scanner):
match_scaled=self.match_scaled,
j=j,
)
elif self.scan_type == ScanType.ExifTimestamp:
elif self.scan_type == ScanType.EXIFTIMESTAMP:
return matchexif.getmatches(files, self.match_scaled, j)
else:
raise Exception("Invalid scan type")
raise ValueError("Invalid scan type")

View File

@ -21,16 +21,16 @@ from . import engine
class ScanType:
Filename = 0
Fields = 1
FieldsNoOrder = 2
Tag = 3
Folders = 4
Contents = 5
FILENAME = 0
FIELDS = 1
FIELDSNOORDER = 2
TAG = 3
FOLDERS = 4
CONTENTS = 5
# PE
FuzzyBlock = 10
ExifTimestamp = 11
FUZZYBLOCK = 10
EXIFTIMESTAMP = 11
ScanOption = namedtuple("ScanOption", "scan_type label")
@ -78,15 +78,15 @@ class Scanner:
def _getmatches(self, files, j):
if self.size_threshold or self.scan_type in {
ScanType.Contents,
ScanType.Folders,
ScanType.CONTENTS,
ScanType.FOLDERS,
}:
j = j.start_subjob([2, 8])
for f in j.iter_with_progress(files, tr("Read size of %d/%d files")):
f.size # pre-read, makes a smoother progress if read here (especially for bundles)
if self.size_threshold:
files = [f for f in files if f.size >= self.size_threshold]
if self.scan_type in {ScanType.Contents, ScanType.Folders}:
if self.scan_type in {ScanType.CONTENTS, ScanType.FOLDERS}:
return engine.getmatches_by_contents(files, bigsize=self.big_file_size_threshold, j=j)
else:
j = j.start_subjob([2, 8])
@ -94,13 +94,13 @@ class Scanner:
kw["match_similar_words"] = self.match_similar_words
kw["weight_words"] = self.word_weighting
kw["min_match_percentage"] = self.min_match_percentage
if self.scan_type == ScanType.FieldsNoOrder:
self.scan_type = ScanType.Fields
if self.scan_type == ScanType.FIELDSNOORDER:
self.scan_type = ScanType.FIELDS
kw["no_field_order"] = True
func = {
ScanType.Filename: lambda f: engine.getwords(rem_file_ext(f.name)),
ScanType.Fields: lambda f: engine.getfields(rem_file_ext(f.name)),
ScanType.Tag: lambda f: [
ScanType.FILENAME: lambda f: engine.getwords(rem_file_ext(f.name)),
ScanType.FIELDS: lambda f: engine.getfields(rem_file_ext(f.name)),
ScanType.TAG: lambda f: [
engine.getwords(str(getattr(f, attrname)))
for attrname in SCANNABLE_TAGS
if attrname in self.scanned_tags
@ -150,7 +150,7 @@ class Scanner:
# "duplicated duplicates if you will). Then, we also don't want mixed file kinds if the
# option isn't enabled, we want matches for which both files exist and, lastly, we don't
# want matches with both files as ref.
if self.scan_type == ScanType.Folders and matches:
if self.scan_type == ScanType.FOLDERS and matches:
allpath = {m.first.path for m in matches}
allpath |= {m.second.path for m in matches}
sortedpaths = sorted(allpath)
@ -171,10 +171,10 @@ class Scanner:
logging.info("Grouping matches")
groups = engine.get_groups(matches)
if self.scan_type in {
ScanType.Filename,
ScanType.Fields,
ScanType.FieldsNoOrder,
ScanType.Tag,
ScanType.FILENAME,
ScanType.FIELDS,
ScanType.FIELDSNOORDER,
ScanType.TAG,
}:
matched_files = dedupe([m.first for m in matches] + [m.second for m in matches])
self.discarded_file_count = len(matched_files) - sum(len(g) for g in groups)
@ -199,7 +199,7 @@ class Scanner:
match_similar_words = False
min_match_percentage = 80
mix_file_kind = True
scan_type = ScanType.Filename
scan_type = ScanType.FILENAME
scanned_tags = {"artist", "title"}
size_threshold = 0
big_file_size_threshold = 0

View File

@ -13,7 +13,7 @@ class ScannerSE(ScannerBase):
@staticmethod
def get_scan_options():
return [
ScanOption(ScanType.Filename, tr("Filename")),
ScanOption(ScanType.Contents, tr("Contents")),
ScanOption(ScanType.Folders, tr("Folders")),
ScanOption(ScanType.FILENAME, tr("Filename")),
ScanOption(ScanType.CONTENTS, tr("Contents")),
ScanOption(ScanType.FOLDERS, tr("Folders")),
]

View File

@ -110,7 +110,7 @@ class TestCaseDupeGuru:
os.link(str(tmppath["myfile"]), str(tmppath["hardlink"]))
app = TestApp().app
app.directories.add_path(tmppath)
app.options["scan_type"] = ScanType.Contents
app.options["scan_type"] = ScanType.CONTENTS
app.options["ignore_hardlink_matches"] = True
app.start_scanning()
eq_(len(app.results.groups), 0)

View File

@ -140,20 +140,20 @@ def test_states():
d = Directories()
p = testpath["onefile"]
d.add_path(p)
eq_(DirectoryState.Normal, d.get_state(p))
d.set_state(p, DirectoryState.Reference)
eq_(DirectoryState.Reference, d.get_state(p))
eq_(DirectoryState.Reference, d.get_state(p["dir1"]))
eq_(DirectoryState.NORMAL, d.get_state(p))
d.set_state(p, DirectoryState.REFERENCE)
eq_(DirectoryState.REFERENCE, d.get_state(p))
eq_(DirectoryState.REFERENCE, d.get_state(p["dir1"]))
eq_(1, len(d.states))
eq_(p, list(d.states.keys())[0])
eq_(DirectoryState.Reference, d.states[p])
eq_(DirectoryState.REFERENCE, d.states[p])
def test_get_state_with_path_not_there():
# When the path's not there, just return DirectoryState.Normal
d = Directories()
d.add_path(testpath["onefile"])
eq_(d.get_state(testpath), DirectoryState.Normal)
eq_(d.get_state(testpath), DirectoryState.NORMAL)
def test_states_overwritten_when_larger_directory_eat_smaller_ones():
@ -162,20 +162,20 @@ def test_states_overwritten_when_larger_directory_eat_smaller_ones():
d = Directories()
p = testpath["onefile"]
d.add_path(p)
d.set_state(p, DirectoryState.Excluded)
d.set_state(p, DirectoryState.EXCLUDED)
d.add_path(testpath)
d.set_state(testpath, DirectoryState.Reference)
eq_(d.get_state(p), DirectoryState.Reference)
eq_(d.get_state(p["dir1"]), DirectoryState.Reference)
eq_(d.get_state(testpath), DirectoryState.Reference)
d.set_state(testpath, DirectoryState.REFERENCE)
eq_(d.get_state(p), DirectoryState.REFERENCE)
eq_(d.get_state(p["dir1"]), DirectoryState.REFERENCE)
eq_(d.get_state(testpath), DirectoryState.REFERENCE)
def test_get_files():
d = Directories()
p = testpath["fs"]
d.add_path(p)
d.set_state(p["dir1"], DirectoryState.Reference)
d.set_state(p["dir2"], DirectoryState.Excluded)
d.set_state(p["dir1"], DirectoryState.REFERENCE)
d.set_state(p["dir2"], DirectoryState.EXCLUDED)
files = list(d.get_files())
eq_(5, len(files))
for f in files:
@ -204,8 +204,8 @@ def test_get_folders():
d = Directories()
p = testpath["fs"]
d.add_path(p)
d.set_state(p["dir1"], DirectoryState.Reference)
d.set_state(p["dir2"], DirectoryState.Excluded)
d.set_state(p["dir1"], DirectoryState.REFERENCE)
d.set_state(p["dir2"], DirectoryState.EXCLUDED)
folders = list(d.get_folders())
eq_(len(folders), 3)
ref = [f for f in folders if f.is_ref]
@ -220,7 +220,7 @@ def test_get_files_with_inherited_exclusion():
d = Directories()
p = testpath["onefile"]
d.add_path(p)
d.set_state(p, DirectoryState.Excluded)
d.set_state(p, DirectoryState.EXCLUDED)
eq_([], list(d.get_files()))
@ -233,14 +233,14 @@ def test_save_and_load(tmpdir):
p2.mkdir()
d1.add_path(p1)
d1.add_path(p2)
d1.set_state(p1, DirectoryState.Reference)
d1.set_state(p1["dir1"], DirectoryState.Excluded)
d1.set_state(p1, DirectoryState.REFERENCE)
d1.set_state(p1["dir1"], DirectoryState.EXCLUDED)
tmpxml = str(tmpdir.join("directories_testunit.xml"))
d1.save_to_file(tmpxml)
d2.load_from_file(tmpxml)
eq_(2, len(d2))
eq_(DirectoryState.Reference, d2.get_state(p1))
eq_(DirectoryState.Excluded, d2.get_state(p1["dir1"]))
eq_(DirectoryState.REFERENCE, d2.get_state(p1))
eq_(DirectoryState.EXCLUDED, d2.get_state(p1["dir1"]))
def test_invalid_path():
@ -258,7 +258,7 @@ def test_set_state_on_invalid_path():
Path(
"foobar",
),
DirectoryState.Normal,
DirectoryState.NORMAL,
)
except LookupError:
assert False
@ -287,7 +287,7 @@ def test_unicode_save(tmpdir):
p1.mkdir()
p1["foo\xe9"].mkdir()
d.add_path(p1)
d.set_state(p1["foo\xe9"], DirectoryState.Excluded)
d.set_state(p1["foo\xe9"], DirectoryState.EXCLUDED)
tmpxml = str(tmpdir.join("directories_testunit.xml"))
try:
d.save_to_file(tmpxml)
@ -321,10 +321,10 @@ def test_get_state_returns_excluded_by_default_for_hidden_directories(tmpdir):
hidden_dir_path = p[".foo"]
p[".foo"].mkdir()
d.add_path(p)
eq_(d.get_state(hidden_dir_path), DirectoryState.Excluded)
eq_(d.get_state(hidden_dir_path), DirectoryState.EXCLUDED)
# But it can be overriden
d.set_state(hidden_dir_path, DirectoryState.Normal)
eq_(d.get_state(hidden_dir_path), DirectoryState.Normal)
d.set_state(hidden_dir_path, DirectoryState.NORMAL)
eq_(d.get_state(hidden_dir_path), DirectoryState.NORMAL)
def test_default_path_state_override(tmpdir):
@ -332,7 +332,7 @@ def test_default_path_state_override(tmpdir):
class MyDirectories(Directories):
def _default_state_for_path(self, path):
if "foobar" in path:
return DirectoryState.Excluded
return DirectoryState.EXCLUDED
d = MyDirectories()
p1 = Path(str(tmpdir))
@ -341,12 +341,12 @@ def test_default_path_state_override(tmpdir):
p1["foobaz"].mkdir()
p1["foobaz/somefile"].open("w").close()
d.add_path(p1)
eq_(d.get_state(p1["foobaz"]), DirectoryState.Normal)
eq_(d.get_state(p1["foobar"]), DirectoryState.Excluded)
eq_(d.get_state(p1["foobaz"]), DirectoryState.NORMAL)
eq_(d.get_state(p1["foobar"]), DirectoryState.EXCLUDED)
eq_(len(list(d.get_files())), 1) # only the 'foobaz' file is there
# However, the default state can be changed
d.set_state(p1["foobar"], DirectoryState.Normal)
eq_(d.get_state(p1["foobar"]), DirectoryState.Normal)
d.set_state(p1["foobar"], DirectoryState.NORMAL)
eq_(d.get_state(p1["foobar"]), DirectoryState.NORMAL)
eq_(len(list(d.get_files())), 2)
@ -375,11 +375,11 @@ files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled
p1["$Recycle.Bin"].mkdir()
p1["$Recycle.Bin"]["subdir"].mkdir()
self.d.add_path(p1)
eq_(self.d.get_state(p1["$Recycle.Bin"]), DirectoryState.Excluded)
eq_(self.d.get_state(p1["$Recycle.Bin"]), DirectoryState.EXCLUDED)
# By default, subdirs should be excluded too, but this can be overridden separately
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Excluded)
self.d.set_state(p1["$Recycle.Bin"]["subdir"], DirectoryState.Normal)
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Normal)
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.EXCLUDED)
self.d.set_state(p1["$Recycle.Bin"]["subdir"], DirectoryState.NORMAL)
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.NORMAL)
def test_exclude_refined(self, tmpdir):
regex1 = r"^\$Recycle\.Bin$"
@ -398,16 +398,16 @@ files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled
self.d.add_path(p1["$Recycle.Bin"])
# Filter should set the default state to Excluded
eq_(self.d.get_state(p1["$Recycle.Bin"]), DirectoryState.Excluded)
eq_(self.d.get_state(p1["$Recycle.Bin"]), DirectoryState.EXCLUDED)
# The subdir should inherit its parent state
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Excluded)
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdar"]), DirectoryState.Excluded)
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.EXCLUDED)
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdar"]), DirectoryState.EXCLUDED)
# Override a child path's state
self.d.set_state(p1["$Recycle.Bin"]["subdir"], DirectoryState.Normal)
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Normal)
self.d.set_state(p1["$Recycle.Bin"]["subdir"], DirectoryState.NORMAL)
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.NORMAL)
# Parent should keep its default state, and the other child too
eq_(self.d.get_state(p1["$Recycle.Bin"]), DirectoryState.Excluded)
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdar"]), DirectoryState.Excluded)
eq_(self.d.get_state(p1["$Recycle.Bin"]), DirectoryState.EXCLUDED)
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdar"]), DirectoryState.EXCLUDED)
# print(f"get_folders(): {[x for x in self.d.get_folders()]}")
# only the 2 files directly under the Normal directory
@ -419,8 +419,8 @@ files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled
assert "somesubdirfile.png" in files
assert "unwanted_subdirfile.gif" in files
# Overriding the parent should enable all children
self.d.set_state(p1["$Recycle.Bin"], DirectoryState.Normal)
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdar"]), DirectoryState.Normal)
self.d.set_state(p1["$Recycle.Bin"], DirectoryState.NORMAL)
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdar"]), DirectoryState.NORMAL)
# all files there
files = self.get_files_and_expect_num_result(6)
assert "somefile.png" in files
@ -444,7 +444,7 @@ files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled
assert self.d._exclude_list.error(regex3) is None
# print(f"get_folders(): {[x for x in self.d.get_folders()]}")
# Directory shouldn't change its state here, unless explicitely done by user
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Normal)
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.NORMAL)
files = self.get_files_and_expect_num_result(5)
assert "unwanted_subdirfile.gif" not in files
assert "unwanted_subdarfile.png" in files
@ -454,14 +454,14 @@ files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled
self.d._exclude_list.rename(regex3, regex4)
assert self.d._exclude_list.error(regex4) is None
p1["$Recycle.Bin"]["subdar"]["file_ending_with_subdir"].open("w").close()
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Excluded)
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.EXCLUDED)
files = self.get_files_and_expect_num_result(4)
assert "file_ending_with_subdir" not in files
assert "somesubdarfile.jpeg" in files
assert "somesubdirfile.png" not in files
assert "unwanted_subdirfile.gif" not in files
self.d.set_state(p1["$Recycle.Bin"]["subdir"], DirectoryState.Normal)
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Normal)
self.d.set_state(p1["$Recycle.Bin"]["subdir"], DirectoryState.NORMAL)
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.NORMAL)
# print(f"get_folders(): {[x for x in self.d.get_folders()]}")
files = self.get_files_and_expect_num_result(6)
assert "file_ending_with_subdir" not in files
@ -471,7 +471,7 @@ files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled
regex5 = r".*subdir.*"
self.d._exclude_list.rename(regex4, regex5)
# Files containing substring should be filtered
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Normal)
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.NORMAL)
# The path should not match, only the filename, the "subdir" in the directory name shouldn't matter
p1["$Recycle.Bin"]["subdir"]["file_which_shouldnt_match"].open("w").close()
files = self.get_files_and_expect_num_result(5)
@ -493,7 +493,7 @@ files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled
assert self.d._exclude_list.error(regex6) is None
assert regex6 in self.d._exclude_list
# This still should not be affected
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Normal)
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.NORMAL)
files = self.get_files_and_expect_num_result(5)
# These files are under the "/subdir" directory
assert "somesubdirfile.png" not in files
@ -518,7 +518,7 @@ files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled
self.d._exclude_list.add(regex3)
self.d._exclude_list.mark(regex3)
# print(f"get_folders(): {[x for x in self.d.get_folders()]}")
eq_(self.d.get_state(p1["$Recycle.Bin"]["思叫物語"]), DirectoryState.Excluded)
eq_(self.d.get_state(p1["$Recycle.Bin"]["思叫物語"]), DirectoryState.EXCLUDED)
files = self.get_files_and_expect_num_result(2)
assert "過去白濁物語~]_カラー.jpg" not in files
assert "なししろ会う前" not in files
@ -527,7 +527,7 @@ files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled
regex4 = r".*物語$"
self.d._exclude_list.rename(regex3, regex4)
assert self.d._exclude_list.error(regex4) is None
self.d.set_state(p1["$Recycle.Bin"]["思叫物語"], DirectoryState.Normal)
self.d.set_state(p1["$Recycle.Bin"]["思叫物語"], DirectoryState.NORMAL)
files = self.get_files_and_expect_num_result(5)
assert "過去白濁物語~]_カラー.jpg" in files
assert "なししろ会う前" in files
@ -546,8 +546,8 @@ files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled
p1["foobar"][".hidden_dir"][".hidden_subfile.png"].open("w").close()
self.d.add_path(p1["foobar"])
# It should not inherit its parent's state originally
eq_(self.d.get_state(p1["foobar"][".hidden_dir"]), DirectoryState.Excluded)
self.d.set_state(p1["foobar"][".hidden_dir"], DirectoryState.Normal)
eq_(self.d.get_state(p1["foobar"][".hidden_dir"]), DirectoryState.EXCLUDED)
self.d.set_state(p1["foobar"][".hidden_dir"], DirectoryState.NORMAL)
# The files should still be filtered
files = self.get_files_and_expect_num_result(1)
eq_(len(self.d._exclude_list.compiled_paths), 0)

View File

@ -52,7 +52,7 @@ def test_empty(fake_fileexists):
def test_default_settings(fake_fileexists):
s = Scanner()
eq_(s.min_match_percentage, 80)
eq_(s.scan_type, ScanType.Filename)
eq_(s.scan_type, ScanType.FILENAME)
eq_(s.mix_file_kind, True)
eq_(s.word_weighting, False)
eq_(s.match_similar_words, False)
@ -119,7 +119,7 @@ def test_prioritize(fake_fileexists):
def test_content_scan(fake_fileexists):
s = Scanner()
s.scan_type = ScanType.Contents
s.scan_type = ScanType.CONTENTS
f = [no("foo"), no("bar"), no("bleh")]
f[0].md5 = f[0].md5partial = f[0].md5samples = "foobar"
f[1].md5 = f[1].md5partial = f[1].md5samples = "foobar"
@ -137,14 +137,14 @@ def test_content_scan_compare_sizes_first(fake_fileexists):
raise AssertionError()
s = Scanner()
s.scan_type = ScanType.Contents
s.scan_type = ScanType.CONTENTS
f = [MyFile("foo", 1), MyFile("bar", 2)]
eq_(len(s.get_dupe_groups(f)), 0)
def test_big_file_partial_hashes(fake_fileexists):
s = Scanner()
s.scan_type = ScanType.Contents
s.scan_type = ScanType.CONTENTS
smallsize = 1
bigsize = 100 * 1024 * 1024 # 100MB
@ -173,7 +173,7 @@ def test_big_file_partial_hashes(fake_fileexists):
def test_min_match_perc_doesnt_matter_for_content_scan(fake_fileexists):
s = Scanner()
s.scan_type = ScanType.Contents
s.scan_type = ScanType.CONTENTS
f = [no("foo"), no("bar"), no("bleh")]
f[0].md5 = f[0].md5partial = f[0].md5samples = "foobar"
f[1].md5 = f[1].md5partial = f[1].md5samples = "foobar"
@ -190,7 +190,7 @@ def test_min_match_perc_doesnt_matter_for_content_scan(fake_fileexists):
def test_content_scan_doesnt_put_md5_in_words_at_the_end(fake_fileexists):
s = Scanner()
s.scan_type = ScanType.Contents
s.scan_type = ScanType.CONTENTS
f = [no("foo"), no("bar")]
f[0].md5 = f[0].md5partial = f[0].md5samples = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
f[1].md5 = f[1].md5partial = f[1].md5samples = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
@ -256,7 +256,7 @@ def test_similar_words(fake_fileexists):
def test_fields(fake_fileexists):
s = Scanner()
s.scan_type = ScanType.Fields
s.scan_type = ScanType.FIELDS
f = [no("The White Stripes - Little Ghost"), no("The White Stripes - Little Acorn")]
r = s.get_dupe_groups(f)
eq_(len(r), 0)
@ -264,7 +264,7 @@ def test_fields(fake_fileexists):
def test_fields_no_order(fake_fileexists):
s = Scanner()
s.scan_type = ScanType.FieldsNoOrder
s.scan_type = ScanType.FIELDSNOORDER
f = [no("The White Stripes - Little Ghost"), no("Little Ghost - The White Stripes")]
r = s.get_dupe_groups(f)
eq_(len(r), 1)
@ -272,7 +272,7 @@ def test_fields_no_order(fake_fileexists):
def test_tag_scan(fake_fileexists):
s = Scanner()
s.scan_type = ScanType.Tag
s.scan_type = ScanType.TAG
o1 = no("foo")
o2 = no("bar")
o1.artist = "The White Stripes"
@ -285,7 +285,7 @@ def test_tag_scan(fake_fileexists):
def test_tag_with_album_scan(fake_fileexists):
s = Scanner()
s.scan_type = ScanType.Tag
s.scan_type = ScanType.TAG
s.scanned_tags = set(["artist", "album", "title"])
o1 = no("foo")
o2 = no("bar")