1
0
mirror of https://github.com/arsenetar/dupeguru.git synced 2026-01-28 17:31:38 +00:00

Compare commits

...

10 Commits

Author SHA1 Message Date
16aa6c21ff Merge pull request #1353 from gauravk268/master
Prevent negative total size in Results class when removing duplcates
2026-01-06 12:34:00 -06:00
Gaurav Kumar
c32a1246b5 fix: Prevent negative total size in Results class when removing duplicates 2026-01-04 17:07:23 +00:00
7f61330dda Merge pull request #1310 from glubsy/build/fix/setuptools_sandbox_deprecation
Remove usage of deprecated setuptools sandbox
2025-12-31 21:38:56 -06:00
eb4d791434 ci: Fix typo in default.yml workflow 2025-12-31 21:34:48 -06:00
e9948d9b3f ci: Update workflow action versions, misc cleanups in .github
- Bump action versions
- Bump minimum Python version teste to 3.8, add 3.13 and 3.14
- Cleanup extra entries in FUNDING.yml
2025-12-31 21:32:23 -06:00
glubsy
c9aa6f1b7a Remove usage of deprecated setuptools sandbox
The sandbox module has been deprecated for several Python versions and
is now removed.
2025-07-19 20:20:31 +02:00
Alexander Gee
8f197ea7e1 feat: Create longest and shortest path criteria (#1242)
* Create longest and shortest path criteria
2024-08-23 18:31:46 -05:00
3a97ba941a ci: Merge artifacts
- Merge the resulting artifacts
- Use only the .so files from build
2024-05-11 01:21:58 -07:00
e3bcf9d686 chore: Update VS Code configuration 2024-05-11 00:12:19 -07:00
a81069be61 fix: Photo matching fixes
- Correct bad query introduced in rotation matching
- Promote get_orientation from "private" on photo class
- Fix prepare_pictures to only generate the needed blocks, add check for missing blocks when rotation matchin is true
- Fix cache test inputs to match schema
2024-05-11 00:11:27 -07:00
13 changed files with 66 additions and 52 deletions

12
.github/FUNDING.yml vendored
View File

@@ -1,13 +1 @@
# These are supported funding model platforms
github: arsenetar github: arsenetar
patreon: # Replace with a single Patreon username
open_collective: # Replace with a single Open Collective username
ko_fi: # Replace with a single Ko-fi username
tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
liberapay: # Replace with a single Liberapay username
issuehunt: # Replace with a single IssueHunt username
otechie: # Replace with a single Otechie username
lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']

View File

@@ -25,11 +25,10 @@ jobs:
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@v2 uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
# Initializes the CodeQL tools for scanning. # Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL - name: Initialize CodeQL
uses: github/codeql-action/init@v1 uses: github/codeql-action/init@5d4e8d1aca955e8d8589aabd499c5cae939e33c7 # v4.31.9
with: with:
languages: ${{ matrix.language }} languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file. # If you wish to specify custom queries, you can do so here or in a config file.
@@ -44,7 +43,7 @@ jobs:
make modules make modules
- if: matrix.language == 'python' - if: matrix.language == 'python'
name: Autobuild name: Autobuild
uses: github/codeql-action/autobuild@v1 uses: github/codeql-action/autobuild@5d4e8d1aca955e8d8589aabd499c5cae939e33c7 # v4.31.9
# Analysis # Analysis
- name: Perform CodeQL Analysis - name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v1 uses: github/codeql-action/analyze@5d4e8d1aca955e8d8589aabd499c5cae939e33c7 # v4.31.9

View File

@@ -11,9 +11,9 @@ jobs:
pre-commit: pre-commit:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
- name: Set up Python 3.12 - name: Set up Python 3.12
uses: actions/setup-python@v5 uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
with: with:
python-version: "3.12" python-version: "3.12"
- uses: pre-commit/action@v3.0.1 - uses: pre-commit/action@v3.0.1
@@ -23,7 +23,7 @@ jobs:
strategy: strategy:
matrix: matrix:
os: [ubuntu-latest] os: [ubuntu-latest]
python-version: [3.7, 3.8, 3.9, "3.10", "3.11", "3.12"] python-version: [3.8, 3.9, "3.10", "3.11", "3.12", "3.13", "3.14"]
include: include:
- os: windows-latest - os: windows-latest
python-version: "3.12" python-version: "3.12"
@@ -31,9 +31,9 @@ jobs:
python-version: "3.12" python-version: "3.12"
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
- name: Set up Python ${{ matrix.python-version }} - name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5 uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
with: with:
python-version: ${{ matrix.python-version }} python-version: ${{ matrix.python-version }}
- name: Install dependencies - name: Install dependencies
@@ -49,7 +49,17 @@ jobs:
pytest core hscommon pytest core hscommon
- name: Upload Artifacts - name: Upload Artifacts
if: matrix.os == 'ubuntu-latest' if: matrix.os == 'ubuntu-latest'
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with: with:
name: modules ${{ matrix.python-version }} name: modules ${{ matrix.python-version }}
path: ${{ github.workspace }}/**/*.so path: build/**/*.so
merge-artifacts:
needs: [test]
runs-on: ubuntu-latest
steps:
- name: Merge Artifacts
uses: actions/upload-artifact/merge@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: modules
pattern: modules*
delete-merged: true

View File

@@ -15,7 +15,7 @@ jobs:
push-source: push-source:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
- name: Get Transifex Client - name: Get Transifex Client
run: | run: |
curl -o- https://raw.githubusercontent.com/transifex/cli/master/install.sh | bash -s -- $TX_VERSION curl -o- https://raw.githubusercontent.com/transifex/cli/master/install.sh | bash -s -- $TX_VERSION

2
.vscode/launch.json vendored
View File

@@ -6,7 +6,7 @@
"configurations": [ "configurations": [
{ {
"name": "DupuGuru", "name": "DupuGuru",
"type": "python", "type": "debugpy",
"request": "launch", "request": "launch",
"program": "run.py", "program": "run.py",
"console": "integratedTerminal", "console": "integratedTerminal",

View File

@@ -12,5 +12,6 @@
"[python]": { "[python]": {
"editor.formatOnSave": true, "editor.formatOnSave": true,
"editor.defaultFormatter": "ms-python.black-formatter" "editor.defaultFormatter": "ms-python.black-formatter"
} },
"python.testing.pytestEnabled": true
} }

View File

@@ -10,7 +10,6 @@ from optparse import OptionParser
import shutil import shutil
from multiprocessing import Pool from multiprocessing import Pool
from setuptools import sandbox
from hscommon import sphinxgen from hscommon import sphinxgen
from hscommon.build import ( from hscommon.build import (
add_to_pythonpath, add_to_pythonpath,
@@ -18,6 +17,7 @@ from hscommon.build import (
fix_qt_resource_file, fix_qt_resource_file,
) )
from hscommon import loc from hscommon import loc
import subprocess
def parse_args(): def parse_args():
@@ -118,7 +118,7 @@ def build_normpo():
def build_pe_modules(): def build_pe_modules():
print("Building PE Modules") print("Building PE Modules")
# Leverage setup.py to build modules # Leverage setup.py to build modules
sandbox.run_setup("setup.py", ["build_ext", "--inplace"]) subprocess.check_call([sys.executable, "setup.py", "build_ext", "--inplace"])
def build_normal(): def build_normal():

View File

@@ -158,7 +158,7 @@ class SqliteCache:
ids = ",".join(map(str, rowids)) ids = ",".join(map(str, rowids))
sql = ( sql = (
"select rowid, blocks, blocks2, blocks3, blocks4, blocks5, blocks6, blocks7, blocks8 " "select rowid, blocks, blocks2, blocks3, blocks4, blocks5, blocks6, blocks7, blocks8 "
f"from pictures where rowid in {ids}" f"from pictures where rowid in ({ids})"
) )
cur = self.con.execute(sql) cur = self.con.execute(sql)
return ( return (

View File

@@ -54,7 +54,7 @@ def get_cache(cache_path, readonly=False):
return SqliteCache(cache_path, readonly=readonly) return SqliteCache(cache_path, readonly=readonly)
def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob): def prepare_pictures(pictures, cache_path, with_dimensions, match_rotated, j=job.nulljob):
# The MemoryError handlers in there use logging without first caring about whether or not # The MemoryError handlers in there use logging without first caring about whether or not
# there is enough memory left to carry on the operation because it is assumed that the # there is enough memory left to carry on the operation because it is assumed that the
# MemoryError happens when trying to read an image file, which is freed from memory by the # MemoryError happens when trying to read an image file, which is freed from memory by the
@@ -76,8 +76,14 @@ def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob):
if with_dimensions: if with_dimensions:
picture.dimensions # pre-read dimensions picture.dimensions # pre-read dimensions
try: try:
if picture.unicode_path not in cache: if picture.unicode_path not in cache or (
blocks = [picture.get_blocks(BLOCK_COUNT_PER_SIDE, orientation) for orientation in range(1, 9)] match_rotated and any(block == [] for block in cache[picture.unicode_path])
):
if match_rotated:
blocks = [picture.get_blocks(BLOCK_COUNT_PER_SIDE, orientation) for orientation in range(1, 9)]
else:
blocks = [[]] * 8
blocks[max(picture.get_orientation() - 1, 0)] = picture.get_blocks(BLOCK_COUNT_PER_SIDE)
cache[picture.unicode_path] = blocks cache[picture.unicode_path] = blocks
prepared.append(picture) prepared.append(picture)
except (OSError, ValueError) as e: except (OSError, ValueError) as e:
@@ -187,7 +193,7 @@ def getmatches(pictures, cache_path, threshold, match_scaled=False, match_rotate
j.set_progress(comparison_count, progress_msg) j.set_progress(comparison_count, progress_msg)
j = j.start_subjob([3, 7]) j = j.start_subjob([3, 7])
pictures = prepare_pictures(pictures, cache_path, with_dimensions=not match_scaled, j=j) pictures = prepare_pictures(pictures, cache_path, not match_scaled, match_rotated, j=j)
j = j.start_subjob([9, 1], tr("Preparing for matching")) j = j.start_subjob([9, 1], tr("Preparing for matching"))
cache = get_cache(cache_path) cache = get_cache(cache_path)
id2picture = {} id2picture = {}

View File

@@ -37,7 +37,7 @@ class Photo(fs.File):
def _plat_get_blocks(self, block_count_per_side, orientation): def _plat_get_blocks(self, block_count_per_side, orientation):
raise NotImplementedError() raise NotImplementedError()
def _get_orientation(self): def get_orientation(self):
if not hasattr(self, "_cached_orientation"): if not hasattr(self, "_cached_orientation"):
try: try:
with self.path.open("rb") as fp: with self.path.open("rb") as fp:
@@ -95,13 +95,13 @@ class Photo(fs.File):
fs.File._read_info(self, field) fs.File._read_info(self, field)
if field == "dimensions": if field == "dimensions":
self.dimensions = self._plat_get_dimensions() self.dimensions = self._plat_get_dimensions()
if self._get_orientation() in {5, 6, 7, 8}: if self.get_orientation() in {5, 6, 7, 8}:
self.dimensions = (self.dimensions[1], self.dimensions[0]) self.dimensions = (self.dimensions[1], self.dimensions[0])
elif field == "exif_timestamp": elif field == "exif_timestamp":
self.exif_timestamp = self._get_exif_timestamp() self.exif_timestamp = self._get_exif_timestamp()
def get_blocks(self, block_count_per_side, orientation: int = None): def get_blocks(self, block_count_per_side, orientation: int = None):
if orientation is None: if orientation is None:
return self._plat_get_blocks(block_count_per_side, self._get_orientation()) return self._plat_get_blocks(block_count_per_side, self.get_orientation())
else: else:
return self._plat_get_blocks(block_count_per_side, orientation) return self._plat_get_blocks(block_count_per_side, orientation)

View File

@@ -96,6 +96,8 @@ class FilenameCategory(CriterionCategory):
DOESNT_END_WITH_NUMBER = 1 DOESNT_END_WITH_NUMBER = 1
LONGEST = 2 LONGEST = 2
SHORTEST = 3 SHORTEST = 3
LONGEST_PATH = 4
SHORTEST_PATH = 5
def format_criterion_value(self, value): def format_criterion_value(self, value):
return { return {
@@ -103,6 +105,8 @@ class FilenameCategory(CriterionCategory):
self.DOESNT_END_WITH_NUMBER: tr("Doesn't end with number"), self.DOESNT_END_WITH_NUMBER: tr("Doesn't end with number"),
self.LONGEST: tr("Longest"), self.LONGEST: tr("Longest"),
self.SHORTEST: tr("Shortest"), self.SHORTEST: tr("Shortest"),
self.LONGEST_PATH: tr("Longest Path"),
self.SHORTEST_PATH: tr("Shortest Path"),
}[value] }[value]
def extract_value(self, dupe): def extract_value(self, dupe):
@@ -116,6 +120,10 @@ class FilenameCategory(CriterionCategory):
return 0 if ends_with_digit else 1 return 0 if ends_with_digit else 1
else: else:
return 1 if ends_with_digit else 0 return 1 if ends_with_digit else 0
elif crit_value == self.LONGEST_PATH:
return len(str(dupe.folder_path)) * -1
elif crit_value == self.SHORTEST_PATH:
return len(str(dupe.folder_path))
else: else:
value = len(value) value = len(value)
if crit_value == self.LONGEST: if crit_value == self.LONGEST:
@@ -130,6 +138,8 @@ class FilenameCategory(CriterionCategory):
self.DOESNT_END_WITH_NUMBER, self.DOESNT_END_WITH_NUMBER,
self.LONGEST, self.LONGEST,
self.SHORTEST, self.SHORTEST,
self.LONGEST_PATH,
self.SHORTEST_PATH,
] ]
] ]

View File

@@ -325,7 +325,7 @@ class Results(Markable):
del self.__group_of_duplicate[dupe] del self.__group_of_duplicate[dupe]
self._remove_mark_flag(dupe) self._remove_mark_flag(dupe)
self.__total_count -= 1 self.__total_count -= 1
self.__total_size -= dupe.size self.__total_size = max(0, self.__total_size - dupe.size)
if not group: if not group:
del self.__group_of_duplicate[ref] del self.__group_of_duplicate[ref]
self.__groups.remove(group) self.__groups.remove(group)

View File

@@ -59,13 +59,13 @@ class BaseTestCaseCache:
def test_set_then_retrieve_blocks(self): def test_set_then_retrieve_blocks(self):
c = self.get_cache() c = self.get_cache()
b = [(0, 0, 0), (1, 2, 3)] b = [[(0, 0, 0), (1, 2, 3)]] * 8
c["foo"] = b c["foo"] = b
eq_(b, c["foo"]) eq_(b, c["foo"])
def test_delitem(self): def test_delitem(self):
c = self.get_cache() c = self.get_cache()
c["foo"] = "" c["foo"] = [[]] * 8
del c["foo"] del c["foo"]
assert "foo" not in c assert "foo" not in c
with raises(KeyError): with raises(KeyError):
@@ -74,16 +74,16 @@ class BaseTestCaseCache:
def test_persistance(self, tmpdir): def test_persistance(self, tmpdir):
DBNAME = tmpdir.join("hstest.db") DBNAME = tmpdir.join("hstest.db")
c = self.get_cache(str(DBNAME)) c = self.get_cache(str(DBNAME))
c["foo"] = [(1, 2, 3)] c["foo"] = [[(1, 2, 3)]] * 8
del c del c
c = self.get_cache(str(DBNAME)) c = self.get_cache(str(DBNAME))
eq_([(1, 2, 3)], c["foo"]) eq_([[(1, 2, 3)]] * 8, c["foo"])
def test_filter(self): def test_filter(self):
c = self.get_cache() c = self.get_cache()
c["foo"] = "" c["foo"] = [[]] * 8
c["bar"] = "" c["bar"] = [[]] * 8
c["baz"] = "" c["baz"] = [[]] * 8
c.filter(lambda p: p != "bar") # only 'bar' is removed c.filter(lambda p: p != "bar") # only 'bar' is removed
eq_(2, len(c)) eq_(2, len(c))
assert "foo" in c assert "foo" in c
@@ -92,9 +92,9 @@ class BaseTestCaseCache:
def test_clear(self): def test_clear(self):
c = self.get_cache() c = self.get_cache()
c["foo"] = "" c["foo"] = [[]] * 8
c["bar"] = "" c["bar"] = [[]] * 8
c["baz"] = "" c["baz"] = [[]] * 8
c.clear() c.clear()
eq_(0, len(c)) eq_(0, len(c))
assert "foo" not in c assert "foo" not in c
@@ -104,7 +104,7 @@ class BaseTestCaseCache:
def test_by_id(self): def test_by_id(self):
# it's possible to use the cache by referring to the files by their row_id # it's possible to use the cache by referring to the files by their row_id
c = self.get_cache() c = self.get_cache()
b = [(0, 0, 0), (1, 2, 3)] b = [[(0, 0, 0), (1, 2, 3)]] * 8
c["foo"] = b c["foo"] = b
foo_id = c.get_id("foo") foo_id = c.get_id("foo")
eq_(c[foo_id], b) eq_(c[foo_id], b)
@@ -127,10 +127,10 @@ class TestCaseSqliteCache(BaseTestCaseCache):
fp.write("invalid sqlite content") fp.write("invalid sqlite content")
fp.close() fp.close()
c = self.get_cache(dbname) # should not raise a DatabaseError c = self.get_cache(dbname) # should not raise a DatabaseError
c["foo"] = [(1, 2, 3)] c["foo"] = [[(1, 2, 3)]] * 8
del c del c
c = self.get_cache(dbname) c = self.get_cache(dbname)
eq_(c["foo"], [(1, 2, 3)]) eq_(c["foo"], [[(1, 2, 3)]] * 8)
class TestCaseCacheSQLEscape: class TestCaseCacheSQLEscape:
@@ -152,7 +152,7 @@ class TestCaseCacheSQLEscape:
def test_delitem(self): def test_delitem(self):
c = self.get_cache() c = self.get_cache()
c["foo'bar"] = [] c["foo'bar"] = [[]] * 8
try: try:
del c["foo'bar"] del c["foo'bar"]
except KeyError: except KeyError: