1
0
mirror of https://github.com/arsenetar/dupeguru.git synced 2026-01-22 14:41:39 +00:00

Format files with black

- Format all files with black
- Update tox.ini flake8 arguments to be compatible
- Add black to requirements-extra.txt
- Reduce ignored flake8 rules and fix a few violations
This commit is contained in:
2019-12-31 20:16:27 -06:00
parent 359d6498f7
commit 7ba8aa3514
141 changed files with 5241 additions and 3648 deletions

View File

@@ -48,14 +48,18 @@ except Exception:
logging.warning("Had problems to determine cpu count on launch.")
RESULTS_QUEUE_LIMIT = 8
def get_cache(cache_path, readonly=False):
if cache_path.endswith('shelve'):
if cache_path.endswith("shelve"):
from .cache_shelve import ShelveCache
return ShelveCache(cache_path, readonly=readonly)
else:
from .cache_sqlite import SqliteCache
return SqliteCache(cache_path, readonly=readonly)
def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob):
# The MemoryError handlers in there use logging without first caring about whether or not
# there is enough memory left to carry on the operation because it is assumed that the
@@ -63,7 +67,7 @@ def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob):
# time that MemoryError is raised.
cache = get_cache(cache_path)
cache.purge_outdated()
prepared = [] # only pictures for which there was no error getting blocks
prepared = [] # only pictures for which there was no error getting blocks
try:
for picture in j.iter_with_progress(pictures, tr("Analyzed %d/%d pictures")):
if not picture.path:
@@ -77,7 +81,7 @@ def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob):
picture.unicode_path = str(picture.path)
logging.debug("Analyzing picture at %s", picture.unicode_path)
if with_dimensions:
picture.dimensions # pre-read dimensions
picture.dimensions # pre-read dimensions
try:
if picture.unicode_path not in cache:
blocks = picture.get_blocks(BLOCK_COUNT_PER_SIDE)
@@ -86,32 +90,45 @@ def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob):
except (IOError, ValueError) as e:
logging.warning(str(e))
except MemoryError:
logging.warning("Ran out of memory while reading %s of size %d", picture.unicode_path, picture.size)
if picture.size < 10 * 1024 * 1024: # We're really running out of memory
logging.warning(
"Ran out of memory while reading %s of size %d",
picture.unicode_path,
picture.size,
)
if (
picture.size < 10 * 1024 * 1024
): # We're really running out of memory
raise
except MemoryError:
logging.warning('Ran out of memory while preparing pictures')
logging.warning("Ran out of memory while preparing pictures")
cache.close()
return prepared
def get_chunks(pictures):
min_chunk_count = multiprocessing.cpu_count() * 2 # have enough chunks to feed all subprocesses
min_chunk_count = (
multiprocessing.cpu_count() * 2
) # have enough chunks to feed all subprocesses
chunk_count = len(pictures) // DEFAULT_CHUNK_SIZE
chunk_count = max(min_chunk_count, chunk_count)
chunk_size = (len(pictures) // chunk_count) + 1
chunk_size = max(MIN_CHUNK_SIZE, chunk_size)
logging.info(
"Creating %d chunks with a chunk size of %d for %d pictures", chunk_count,
chunk_size, len(pictures)
"Creating %d chunks with a chunk size of %d for %d pictures",
chunk_count,
chunk_size,
len(pictures),
)
chunks = [pictures[i:i+chunk_size] for i in range(0, len(pictures), chunk_size)]
chunks = [pictures[i : i + chunk_size] for i in range(0, len(pictures), chunk_size)]
return chunks
def get_match(first, second, percentage):
if percentage < 0:
percentage = 0
return Match(first, second, percentage)
def async_compare(ref_ids, other_ids, dbname, threshold, picinfo):
# The list of ids in ref_ids have to be compared to the list of ids in other_ids. other_ids
# can be None. In this case, ref_ids has to be compared with itself
@@ -142,6 +159,7 @@ def async_compare(ref_ids, other_ids, dbname, threshold, picinfo):
cache.close()
return results
def getmatches(pictures, cache_path, threshold, match_scaled=False, j=job.nulljob):
def get_picinfo(p):
if match_scaled:
@@ -160,11 +178,16 @@ def getmatches(pictures, cache_path, threshold, match_scaled=False, j=job.nulljo
async_results.remove(result)
comparison_count += 1
# About the NOQA below: I think there's a bug in pyflakes. To investigate...
progress_msg = tr("Performed %d/%d chunk matches") % (comparison_count, len(comparisons_to_do)) # NOQA
progress_msg = tr("Performed %d/%d chunk matches") % (
comparison_count,
len(comparisons_to_do),
) # NOQA
j.set_progress(comparison_count, progress_msg)
j = j.start_subjob([3, 7])
pictures = prepare_pictures(pictures, cache_path, with_dimensions=not match_scaled, j=j)
pictures = prepare_pictures(
pictures, cache_path, with_dimensions=not match_scaled, j=j
)
j = j.start_subjob([9, 1], tr("Preparing for matching"))
cache = get_cache(cache_path)
id2picture = {}
@@ -175,7 +198,7 @@ def getmatches(pictures, cache_path, threshold, match_scaled=False, j=job.nulljo
except ValueError:
pass
cache.close()
pictures = [p for p in pictures if hasattr(p, 'cache_id')]
pictures = [p for p in pictures if hasattr(p, "cache_id")]
pool = multiprocessing.Pool()
async_results = []
matches = []
@@ -203,9 +226,17 @@ def getmatches(pictures, cache_path, threshold, match_scaled=False, j=job.nulljo
# some wiggle room, log about the incident, and stop matching right here. We then process
# the matches we have. The rest of the process doesn't allocate much and we should be
# alright.
del comparisons_to_do, chunks, pictures # some wiggle room for the next statements
logging.warning("Ran out of memory when scanning! We had %d matches.", len(matches))
del matches[-len(matches)//3:] # some wiggle room to ensure we don't run out of memory again.
del (
comparisons_to_do,
chunks,
pictures,
) # some wiggle room for the next statements
logging.warning(
"Ran out of memory when scanning! We had %d matches.", len(matches)
)
del matches[
-len(matches) // 3 :
] # some wiggle room to ensure we don't run out of memory again.
pool.close()
result = []
myiter = j.iter_with_progress(
@@ -220,10 +251,10 @@ def getmatches(pictures, cache_path, threshold, match_scaled=False, j=job.nulljo
if percentage == 100 and ref.md5 != other.md5:
percentage = 99
if percentage >= threshold:
ref.dimensions # pre-read dimensions for display in results
ref.dimensions # pre-read dimensions for display in results
other.dimensions
result.append(get_match(ref, other, percentage))
return result
multiprocessing.freeze_support()
multiprocessing.freeze_support()