mirror of
				https://github.com/arsenetar/dupeguru.git
				synced 2025-09-11 17:58:17 +00:00 
			
		
		
		
	match all orientations
This commit is contained in:
		
							parent
							
								
									322d29a996
								
							
						
					
					
						commit
						b00f0bf4f1
					
				| @ -304,12 +304,12 @@ def getmatches_by_contents(files, bigsize=0, j=job.nulljob): | |||||||
|                 result.append(Match(first, second, 100)) |                 result.append(Match(first, second, 100)) | ||||||
|                 continue |                 continue | ||||||
|             # if digests are the same (and not None) then files match |             # if digests are the same (and not None) then files match | ||||||
|             if first.digest_partial == second.digest_partial and first.digest_partial is not None: |             if first.digest_partial is not None and first.digest_partial == second.digest_partial: | ||||||
|                 if bigsize > 0 and first.size > bigsize: |                 if bigsize > 0 and first.size > bigsize: | ||||||
|                     if first.digest_samples == second.digest_samples and first.digest_samples is not None: |                     if first.digest_samples is not None and first.digest_samples == second.digest_samples: | ||||||
|                         result.append(Match(first, second, 100)) |                         result.append(Match(first, second, 100)) | ||||||
|                 else: |                 else: | ||||||
|                     if first.digest == second.digest and first.digest is not None: |                     if first.digest is not None and first.digest == second.digest: | ||||||
|                         result.append(Match(first, second, 100)) |                         result.append(Match(first, second, 100)) | ||||||
|         group_count += 1 |         group_count += 1 | ||||||
|         j.add_progress(desc=PROGRESS_MESSAGE % (len(result), group_count)) |         j.add_progress(desc=PROGRESS_MESSAGE % (len(result), group_count)) | ||||||
|  | |||||||
| @ -199,7 +199,7 @@ class File: | |||||||
|     # Slots for File make us save quite a bit of memory. In a memory test I've made with a lot of |     # Slots for File make us save quite a bit of memory. In a memory test I've made with a lot of | ||||||
|     # files, I saved 35% memory usage with "unread" files (no _read_info() call) and gains become |     # files, I saved 35% memory usage with "unread" files (no _read_info() call) and gains become | ||||||
|     # even greater when we take into account read attributes (70%!). Yeah, it's worth it. |     # even greater when we take into account read attributes (70%!). Yeah, it's worth it. | ||||||
|     __slots__ = ("path", "is_ref", "words") + tuple(INITIAL_INFO.keys()) |     __slots__ = ("path", "unicode_path", "is_ref", "words") + tuple(INITIAL_INFO.keys()) | ||||||
| 
 | 
 | ||||||
|     def __init__(self, path): |     def __init__(self, path): | ||||||
|         for attrname in self.INITIAL_INFO: |         for attrname in self.INITIAL_INFO: | ||||||
| @ -210,6 +210,8 @@ class File: | |||||||
|             self.mtime = nonone(path.stat().st_mtime, 0) |             self.mtime = nonone(path.stat().st_mtime, 0) | ||||||
|         else: |         else: | ||||||
|             self.path = path |             self.path = path | ||||||
|  |         if self.path: | ||||||
|  |             self.unicode_path = str(self.path) | ||||||
| 
 | 
 | ||||||
|     def __repr__(self): |     def __repr__(self): | ||||||
|         return f"<{self.__class__.__name__} {str(self.path)}>" |         return f"<{self.__class__.__name__} {str(self.path)}>" | ||||||
|  | |||||||
| @ -15,10 +15,10 @@ from core.pe.cache import bytes_to_colors, colors_to_bytes | |||||||
| class SqliteCache: | class SqliteCache: | ||||||
|     """A class to cache picture blocks in a sqlite backend.""" |     """A class to cache picture blocks in a sqlite backend.""" | ||||||
| 
 | 
 | ||||||
|     schema_version = 1 |     schema_version = 2 | ||||||
|     schema_version_description = "Changed from string to bytes for blocks." |     schema_version_description = "Added blocks for all 8 orientations." | ||||||
| 
 | 
 | ||||||
|     create_table_query = "CREATE TABLE IF NOT EXISTS pictures(path TEXT, mtime_ns INTEGER, blocks BLOB)" |     create_table_query = "CREATE TABLE IF NOT EXISTS pictures(path TEXT, mtime_ns INTEGER, blocks BLOB, blocks2 BLOB, blocks3 BLOB, blocks4 BLOB, blocks5 BLOB, blocks6 BLOB, blocks7 BLOB, blocks8 BLOB)" | ||||||
|     create_index_query = "CREATE INDEX IF NOT EXISTS idx_path on pictures (path)" |     create_index_query = "CREATE INDEX IF NOT EXISTS idx_path on pictures (path)" | ||||||
|     drop_table_query = "DROP TABLE IF EXISTS pictures" |     drop_table_query = "DROP TABLE IF EXISTS pictures" | ||||||
|     drop_index_query = "DROP INDEX IF EXISTS idx_path" |     drop_index_query = "DROP INDEX IF EXISTS idx_path" | ||||||
| @ -43,12 +43,12 @@ class SqliteCache: | |||||||
|     # Optimized |     # Optimized | ||||||
|     def __getitem__(self, key): |     def __getitem__(self, key): | ||||||
|         if isinstance(key, int): |         if isinstance(key, int): | ||||||
|             sql = "select blocks from pictures where rowid = ?" |             sql = "select blocks, blocks2, blocks3, blocks4, blocks5, blocks6, blocks7, blocks8 from pictures where rowid = ?" | ||||||
|         else: |         else: | ||||||
|             sql = "select blocks from pictures where path = ?" |             sql = "select blocks, blocks2, blocks3, blocks4, blocks5, blocks6, blocks7, blocks8 from pictures where path = ?" | ||||||
|         result = self.con.execute(sql, [key]).fetchone() |         blocks = self.con.execute(sql, [key]).fetchone() | ||||||
|         if result: |         if blocks: | ||||||
|             result = bytes_to_colors(result[0]) |             result = [bytes_to_colors(block) for block in blocks] | ||||||
|             return result |             return result | ||||||
|         else: |         else: | ||||||
|             raise KeyError(key) |             raise KeyError(key) | ||||||
| @ -64,17 +64,17 @@ class SqliteCache: | |||||||
|         return result[0][0] |         return result[0][0] | ||||||
| 
 | 
 | ||||||
|     def __setitem__(self, path_str, blocks): |     def __setitem__(self, path_str, blocks): | ||||||
|         blocks = colors_to_bytes(blocks) |         blocks = [colors_to_bytes(block) for block in blocks] | ||||||
|         if op.exists(path_str): |         if op.exists(path_str): | ||||||
|             mtime = int(os.stat(path_str).st_mtime) |             mtime = int(os.stat(path_str).st_mtime) | ||||||
|         else: |         else: | ||||||
|             mtime = 0 |             mtime = 0 | ||||||
|         if path_str in self: |         if path_str in self: | ||||||
|             sql = "update pictures set blocks = ?, mtime_ns = ? where path = ?" |             sql = "update pictures set blocks = ?, blocks2 = ?, blocks3 = ?, blocks4 = ?, blocks5 = ?, blocks6 = ?, blocks7 = ?, blocks8 = ?, mtime_ns = ? where path = ?" | ||||||
|         else: |         else: | ||||||
|             sql = "insert into pictures(blocks,mtime_ns,path) values(?,?,?)" |             sql = "insert into pictures(blocks,blocks2,blocks3,blocks4,blocks5,blocks6,blocks7,blocks8,mtime_ns,path) values(?,?,?,?,?,?,?,?,?,?)" | ||||||
|         try: |         try: | ||||||
|             self.con.execute(sql, [blocks, mtime, path_str]) |             self.con.execute(sql, blocks + [mtime, path_str]) | ||||||
|         except sqlite.OperationalError: |         except sqlite.OperationalError: | ||||||
|             logging.warning("Picture cache could not set value for key %r", path_str) |             logging.warning("Picture cache could not set value for key %r", path_str) | ||||||
|         except sqlite.DatabaseError as e: |         except sqlite.DatabaseError as e: | ||||||
| @ -136,9 +136,9 @@ class SqliteCache: | |||||||
|             raise ValueError(path) |             raise ValueError(path) | ||||||
| 
 | 
 | ||||||
|     def get_multiple(self, rowids): |     def get_multiple(self, rowids): | ||||||
|         sql = "select rowid, blocks from pictures where rowid in (%s)" % ",".join(map(str, rowids)) |         sql = "select rowid, blocks, blocks2, blocks3, blocks4, blocks5, blocks6, blocks7, blocks8 from pictures where rowid in (%s)" % ",".join(map(str, rowids)) | ||||||
|         cur = self.con.execute(sql) |         cur = self.con.execute(sql) | ||||||
|         return ((rowid, bytes_to_colors(blocks)) for rowid, blocks in cur) |         return ((rowid, [bytes_to_colors(blocks), bytes_to_colors(blocks2), bytes_to_colors(blocks3), bytes_to_colors(blocks4), bytes_to_colors(blocks5), bytes_to_colors(blocks6), bytes_to_colors(blocks7), bytes_to_colors(blocks8)]) for rowid, blocks, blocks2, blocks3, blocks4, blocks5, blocks6, blocks7, blocks8 in cur) | ||||||
| 
 | 
 | ||||||
|     def purge_outdated(self): |     def purge_outdated(self): | ||||||
|         """Go through the cache and purge outdated records. |         """Go through the cache and purge outdated records. | ||||||
|  | |||||||
| @ -72,13 +72,12 @@ def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob): | |||||||
|                 # entry in iPhoto library. |                 # entry in iPhoto library. | ||||||
|                 logging.warning("We have a picture with a null path here") |                 logging.warning("We have a picture with a null path here") | ||||||
|                 continue |                 continue | ||||||
|             picture.unicode_path = str(picture.path) |  | ||||||
|             logging.debug("Analyzing picture at %s", picture.unicode_path) |             logging.debug("Analyzing picture at %s", picture.unicode_path) | ||||||
|             if with_dimensions: |             if with_dimensions: | ||||||
|                 picture.dimensions  # pre-read dimensions |                 picture.dimensions  # pre-read dimensions | ||||||
|             try: |             try: | ||||||
|                 if picture.unicode_path not in cache: |                 if picture.unicode_path not in cache: | ||||||
|                     blocks = picture.get_blocks(BLOCK_COUNT_PER_SIDE) |                     blocks = [picture.get_blocks(BLOCK_COUNT_PER_SIDE, orientation) for orientation in range(1, 9)] | ||||||
|                     cache[picture.unicode_path] = blocks |                     cache[picture.unicode_path] = blocks | ||||||
|                 prepared.append(picture) |                 prepared.append(picture) | ||||||
|             except (OSError, ValueError) as e: |             except (OSError, ValueError) as e: | ||||||
| @ -125,7 +124,7 @@ def async_compare(ref_ids, other_ids, dbname, threshold, picinfo): | |||||||
|     # picinfo is a dictionary {pic_id: (dimensions, is_ref)} |     # picinfo is a dictionary {pic_id: (dimensions, is_ref)} | ||||||
|     cache = get_cache(dbname, readonly=True) |     cache = get_cache(dbname, readonly=True) | ||||||
|     limit = 100 - threshold |     limit = 100 - threshold | ||||||
|     ref_pairs = list(cache.get_multiple(ref_ids)) |     ref_pairs = list(cache.get_multiple(ref_ids))  # (rowid, [b, b2, ..., b8]) | ||||||
|     if other_ids is not None: |     if other_ids is not None: | ||||||
|         other_pairs = list(cache.get_multiple(other_ids)) |         other_pairs = list(cache.get_multiple(other_ids)) | ||||||
|         comparisons_to_do = [(r, o) for r in ref_pairs for o in other_pairs] |         comparisons_to_do = [(r, o) for r in ref_pairs for o in other_pairs] | ||||||
| @ -137,15 +136,22 @@ def async_compare(ref_ids, other_ids, dbname, threshold, picinfo): | |||||||
|         other_dimensions, other_is_ref = picinfo[other_id] |         other_dimensions, other_is_ref = picinfo[other_id] | ||||||
|         if ref_is_ref and other_is_ref: |         if ref_is_ref and other_is_ref: | ||||||
|             continue |             continue | ||||||
|         if ref_dimensions != other_dimensions: |         rotated_ref_dimensions = (ref_dimensions[1], ref_dimensions[0]) | ||||||
|  |         if ref_dimensions != other_dimensions and rotated_ref_dimensions != other_dimensions: | ||||||
|             continue |             continue | ||||||
|         try: |         for orientation_ref in range(8): | ||||||
|             diff = avgdiff(ref_blocks, other_blocks, limit, MIN_ITERATIONS) |             for orientation_other in range(8): | ||||||
|             percentage = 100 - diff |                 try: | ||||||
|         except (DifferentBlockCountError, NoBlocksError): |                     diff = avgdiff(ref_blocks[orientation_ref], other_blocks[orientation_other], limit, MIN_ITERATIONS) | ||||||
|             percentage = 0 |                     percentage = 100 - diff | ||||||
|         if percentage >= threshold: |                 except (DifferentBlockCountError, NoBlocksError): | ||||||
|             results.append((ref_id, other_id, percentage)) |                     percentage = 0 | ||||||
|  |                 if percentage >= threshold: | ||||||
|  |                     results.append((ref_id, other_id, percentage)) | ||||||
|  |                     break | ||||||
|  |             else: | ||||||
|  |                 continue | ||||||
|  |             break | ||||||
|     cache.close() |     cache.close() | ||||||
|     return results |     return results | ||||||
| 
 | 
 | ||||||
| @ -153,7 +159,7 @@ def async_compare(ref_ids, other_ids, dbname, threshold, picinfo): | |||||||
| def getmatches(pictures, cache_path, threshold, match_scaled=False, j=job.nulljob): | def getmatches(pictures, cache_path, threshold, match_scaled=False, j=job.nulljob): | ||||||
|     def get_picinfo(p): |     def get_picinfo(p): | ||||||
|         if match_scaled: |         if match_scaled: | ||||||
|             return (None, p.is_ref) |             return ((None, None), p.is_ref) | ||||||
|         else: |         else: | ||||||
|             return (p.dimensions, p.is_ref) |             return (p.dimensions, p.is_ref) | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -100,5 +100,8 @@ class Photo(fs.File): | |||||||
|         elif field == "exif_timestamp": |         elif field == "exif_timestamp": | ||||||
|             self.exif_timestamp = self._get_exif_timestamp() |             self.exif_timestamp = self._get_exif_timestamp() | ||||||
| 
 | 
 | ||||||
|     def get_blocks(self, block_count_per_side): |     def get_blocks(self, block_count_per_side, orientation: int = None): | ||||||
|         return self._plat_get_blocks(block_count_per_side, self._get_orientation()) |         if orientation is None: | ||||||
|  |             return self._plat_get_blocks(block_count_per_side, self._get_orientation()) | ||||||
|  |         else: | ||||||
|  |             return self._plat_get_blocks(block_count_per_side, orientation) | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user