1
0
mirror of https://github.com/arsenetar/dupeguru.git synced 2026-01-22 14:41:39 +00:00

Code cleanups in core and other affected files

This commit is contained in:
2021-08-21 18:02:02 -05:00
parent 1ef5f56158
commit d576a7043c
27 changed files with 281 additions and 293 deletions

View File

@@ -21,16 +21,16 @@ from . import engine
class ScanType:
Filename = 0
Fields = 1
FieldsNoOrder = 2
Tag = 3
Folders = 4
Contents = 5
FILENAME = 0
FIELDS = 1
FIELDSNOORDER = 2
TAG = 3
FOLDERS = 4
CONTENTS = 5
# PE
FuzzyBlock = 10
ExifTimestamp = 11
FUZZYBLOCK = 10
EXIFTIMESTAMP = 11
ScanOption = namedtuple("ScanOption", "scan_type label")
@@ -78,15 +78,15 @@ class Scanner:
def _getmatches(self, files, j):
if self.size_threshold or self.scan_type in {
ScanType.Contents,
ScanType.Folders,
ScanType.CONTENTS,
ScanType.FOLDERS,
}:
j = j.start_subjob([2, 8])
for f in j.iter_with_progress(files, tr("Read size of %d/%d files")):
f.size # pre-read, makes a smoother progress if read here (especially for bundles)
if self.size_threshold:
files = [f for f in files if f.size >= self.size_threshold]
if self.scan_type in {ScanType.Contents, ScanType.Folders}:
if self.scan_type in {ScanType.CONTENTS, ScanType.FOLDERS}:
return engine.getmatches_by_contents(files, bigsize=self.big_file_size_threshold, j=j)
else:
j = j.start_subjob([2, 8])
@@ -94,13 +94,13 @@ class Scanner:
kw["match_similar_words"] = self.match_similar_words
kw["weight_words"] = self.word_weighting
kw["min_match_percentage"] = self.min_match_percentage
if self.scan_type == ScanType.FieldsNoOrder:
self.scan_type = ScanType.Fields
if self.scan_type == ScanType.FIELDSNOORDER:
self.scan_type = ScanType.FIELDS
kw["no_field_order"] = True
func = {
ScanType.Filename: lambda f: engine.getwords(rem_file_ext(f.name)),
ScanType.Fields: lambda f: engine.getfields(rem_file_ext(f.name)),
ScanType.Tag: lambda f: [
ScanType.FILENAME: lambda f: engine.getwords(rem_file_ext(f.name)),
ScanType.FIELDS: lambda f: engine.getfields(rem_file_ext(f.name)),
ScanType.TAG: lambda f: [
engine.getwords(str(getattr(f, attrname)))
for attrname in SCANNABLE_TAGS
if attrname in self.scanned_tags
@@ -150,7 +150,7 @@ class Scanner:
# "duplicated duplicates if you will). Then, we also don't want mixed file kinds if the
# option isn't enabled, we want matches for which both files exist and, lastly, we don't
# want matches with both files as ref.
if self.scan_type == ScanType.Folders and matches:
if self.scan_type == ScanType.FOLDERS and matches:
allpath = {m.first.path for m in matches}
allpath |= {m.second.path for m in matches}
sortedpaths = sorted(allpath)
@@ -171,10 +171,10 @@ class Scanner:
logging.info("Grouping matches")
groups = engine.get_groups(matches)
if self.scan_type in {
ScanType.Filename,
ScanType.Fields,
ScanType.FieldsNoOrder,
ScanType.Tag,
ScanType.FILENAME,
ScanType.FIELDS,
ScanType.FIELDSNOORDER,
ScanType.TAG,
}:
matched_files = dedupe([m.first for m in matches] + [m.second for m in matches])
self.discarded_file_count = len(matched_files) - sum(len(g) for g in groups)
@@ -199,7 +199,7 @@ class Scanner:
match_similar_words = False
min_match_percentage = 80
mix_file_kind = True
scan_type = ScanType.Filename
scan_type = ScanType.FILENAME
scanned_tags = {"artist", "title"}
size_threshold = 0
big_file_size_threshold = 0