1
0
mirror of https://github.com/arsenetar/dupeguru.git synced 2026-01-28 17:31:38 +00:00

Compare commits

...

10 Commits

Author SHA1 Message Date
16aa6c21ff Merge pull request #1353 from gauravk268/master
Prevent negative total size in Results class when removing duplcates
2026-01-06 12:34:00 -06:00
Gaurav Kumar
c32a1246b5 fix: Prevent negative total size in Results class when removing duplicates 2026-01-04 17:07:23 +00:00
7f61330dda Merge pull request #1310 from glubsy/build/fix/setuptools_sandbox_deprecation
Remove usage of deprecated setuptools sandbox
2025-12-31 21:38:56 -06:00
eb4d791434 ci: Fix typo in default.yml workflow 2025-12-31 21:34:48 -06:00
e9948d9b3f ci: Update workflow action versions, misc cleanups in .github
- Bump action versions
- Bump minimum Python version teste to 3.8, add 3.13 and 3.14
- Cleanup extra entries in FUNDING.yml
2025-12-31 21:32:23 -06:00
glubsy
c9aa6f1b7a Remove usage of deprecated setuptools sandbox
The sandbox module has been deprecated for several Python versions and
is now removed.
2025-07-19 20:20:31 +02:00
Alexander Gee
8f197ea7e1 feat: Create longest and shortest path criteria (#1242)
* Create longest and shortest path criteria
2024-08-23 18:31:46 -05:00
3a97ba941a ci: Merge artifacts
- Merge the resulting artifacts
- Use only the .so files from build
2024-05-11 01:21:58 -07:00
e3bcf9d686 chore: Update VS Code configuration 2024-05-11 00:12:19 -07:00
a81069be61 fix: Photo matching fixes
- Correct bad query introduced in rotation matching
- Promote get_orientation from "private" on photo class
- Fix prepare_pictures to only generate the needed blocks, add check for missing blocks when rotation matchin is true
- Fix cache test inputs to match schema
2024-05-11 00:11:27 -07:00
13 changed files with 66 additions and 52 deletions

12
.github/FUNDING.yml vendored
View File

@@ -1,13 +1 @@
# These are supported funding model platforms
github: arsenetar
patreon: # Replace with a single Patreon username
open_collective: # Replace with a single Open Collective username
ko_fi: # Replace with a single Ko-fi username
tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
liberapay: # Replace with a single Liberapay username
issuehunt: # Replace with a single IssueHunt username
otechie: # Replace with a single Otechie username
lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']

View File

@@ -25,11 +25,10 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@v2
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@v1
uses: github/codeql-action/init@5d4e8d1aca955e8d8589aabd499c5cae939e33c7 # v4.31.9
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
@@ -44,7 +43,7 @@ jobs:
make modules
- if: matrix.language == 'python'
name: Autobuild
uses: github/codeql-action/autobuild@v1
uses: github/codeql-action/autobuild@5d4e8d1aca955e8d8589aabd499c5cae939e33c7 # v4.31.9
# Analysis
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v1
uses: github/codeql-action/analyze@5d4e8d1aca955e8d8589aabd499c5cae939e33c7 # v4.31.9

View File

@@ -11,9 +11,9 @@ jobs:
pre-commit:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
- name: Set up Python 3.12
uses: actions/setup-python@v5
uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
with:
python-version: "3.12"
- uses: pre-commit/action@v3.0.1
@@ -23,7 +23,7 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest]
python-version: [3.7, 3.8, 3.9, "3.10", "3.11", "3.12"]
python-version: [3.8, 3.9, "3.10", "3.11", "3.12", "3.13", "3.14"]
include:
- os: windows-latest
python-version: "3.12"
@@ -31,9 +31,9 @@ jobs:
python-version: "3.12"
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
@@ -49,7 +49,17 @@ jobs:
pytest core hscommon
- name: Upload Artifacts
if: matrix.os == 'ubuntu-latest'
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: modules ${{ matrix.python-version }}
path: ${{ github.workspace }}/**/*.so
path: build/**/*.so
merge-artifacts:
needs: [test]
runs-on: ubuntu-latest
steps:
- name: Merge Artifacts
uses: actions/upload-artifact/merge@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: modules
pattern: modules*
delete-merged: true

View File

@@ -15,7 +15,7 @@ jobs:
push-source:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
- name: Get Transifex Client
run: |
curl -o- https://raw.githubusercontent.com/transifex/cli/master/install.sh | bash -s -- $TX_VERSION

2
.vscode/launch.json vendored
View File

@@ -6,7 +6,7 @@
"configurations": [
{
"name": "DupuGuru",
"type": "python",
"type": "debugpy",
"request": "launch",
"program": "run.py",
"console": "integratedTerminal",

View File

@@ -12,5 +12,6 @@
"[python]": {
"editor.formatOnSave": true,
"editor.defaultFormatter": "ms-python.black-formatter"
}
},
"python.testing.pytestEnabled": true
}

View File

@@ -10,7 +10,6 @@ from optparse import OptionParser
import shutil
from multiprocessing import Pool
from setuptools import sandbox
from hscommon import sphinxgen
from hscommon.build import (
add_to_pythonpath,
@@ -18,6 +17,7 @@ from hscommon.build import (
fix_qt_resource_file,
)
from hscommon import loc
import subprocess
def parse_args():
@@ -118,7 +118,7 @@ def build_normpo():
def build_pe_modules():
print("Building PE Modules")
# Leverage setup.py to build modules
sandbox.run_setup("setup.py", ["build_ext", "--inplace"])
subprocess.check_call([sys.executable, "setup.py", "build_ext", "--inplace"])
def build_normal():

View File

@@ -158,7 +158,7 @@ class SqliteCache:
ids = ",".join(map(str, rowids))
sql = (
"select rowid, blocks, blocks2, blocks3, blocks4, blocks5, blocks6, blocks7, blocks8 "
f"from pictures where rowid in {ids}"
f"from pictures where rowid in ({ids})"
)
cur = self.con.execute(sql)
return (

View File

@@ -54,7 +54,7 @@ def get_cache(cache_path, readonly=False):
return SqliteCache(cache_path, readonly=readonly)
def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob):
def prepare_pictures(pictures, cache_path, with_dimensions, match_rotated, j=job.nulljob):
# The MemoryError handlers in there use logging without first caring about whether or not
# there is enough memory left to carry on the operation because it is assumed that the
# MemoryError happens when trying to read an image file, which is freed from memory by the
@@ -76,8 +76,14 @@ def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob):
if with_dimensions:
picture.dimensions # pre-read dimensions
try:
if picture.unicode_path not in cache:
blocks = [picture.get_blocks(BLOCK_COUNT_PER_SIDE, orientation) for orientation in range(1, 9)]
if picture.unicode_path not in cache or (
match_rotated and any(block == [] for block in cache[picture.unicode_path])
):
if match_rotated:
blocks = [picture.get_blocks(BLOCK_COUNT_PER_SIDE, orientation) for orientation in range(1, 9)]
else:
blocks = [[]] * 8
blocks[max(picture.get_orientation() - 1, 0)] = picture.get_blocks(BLOCK_COUNT_PER_SIDE)
cache[picture.unicode_path] = blocks
prepared.append(picture)
except (OSError, ValueError) as e:
@@ -187,7 +193,7 @@ def getmatches(pictures, cache_path, threshold, match_scaled=False, match_rotate
j.set_progress(comparison_count, progress_msg)
j = j.start_subjob([3, 7])
pictures = prepare_pictures(pictures, cache_path, with_dimensions=not match_scaled, j=j)
pictures = prepare_pictures(pictures, cache_path, not match_scaled, match_rotated, j=j)
j = j.start_subjob([9, 1], tr("Preparing for matching"))
cache = get_cache(cache_path)
id2picture = {}

View File

@@ -37,7 +37,7 @@ class Photo(fs.File):
def _plat_get_blocks(self, block_count_per_side, orientation):
raise NotImplementedError()
def _get_orientation(self):
def get_orientation(self):
if not hasattr(self, "_cached_orientation"):
try:
with self.path.open("rb") as fp:
@@ -95,13 +95,13 @@ class Photo(fs.File):
fs.File._read_info(self, field)
if field == "dimensions":
self.dimensions = self._plat_get_dimensions()
if self._get_orientation() in {5, 6, 7, 8}:
if self.get_orientation() in {5, 6, 7, 8}:
self.dimensions = (self.dimensions[1], self.dimensions[0])
elif field == "exif_timestamp":
self.exif_timestamp = self._get_exif_timestamp()
def get_blocks(self, block_count_per_side, orientation: int = None):
if orientation is None:
return self._plat_get_blocks(block_count_per_side, self._get_orientation())
return self._plat_get_blocks(block_count_per_side, self.get_orientation())
else:
return self._plat_get_blocks(block_count_per_side, orientation)

View File

@@ -96,6 +96,8 @@ class FilenameCategory(CriterionCategory):
DOESNT_END_WITH_NUMBER = 1
LONGEST = 2
SHORTEST = 3
LONGEST_PATH = 4
SHORTEST_PATH = 5
def format_criterion_value(self, value):
return {
@@ -103,6 +105,8 @@ class FilenameCategory(CriterionCategory):
self.DOESNT_END_WITH_NUMBER: tr("Doesn't end with number"),
self.LONGEST: tr("Longest"),
self.SHORTEST: tr("Shortest"),
self.LONGEST_PATH: tr("Longest Path"),
self.SHORTEST_PATH: tr("Shortest Path"),
}[value]
def extract_value(self, dupe):
@@ -116,6 +120,10 @@ class FilenameCategory(CriterionCategory):
return 0 if ends_with_digit else 1
else:
return 1 if ends_with_digit else 0
elif crit_value == self.LONGEST_PATH:
return len(str(dupe.folder_path)) * -1
elif crit_value == self.SHORTEST_PATH:
return len(str(dupe.folder_path))
else:
value = len(value)
if crit_value == self.LONGEST:
@@ -130,6 +138,8 @@ class FilenameCategory(CriterionCategory):
self.DOESNT_END_WITH_NUMBER,
self.LONGEST,
self.SHORTEST,
self.LONGEST_PATH,
self.SHORTEST_PATH,
]
]

View File

@@ -325,7 +325,7 @@ class Results(Markable):
del self.__group_of_duplicate[dupe]
self._remove_mark_flag(dupe)
self.__total_count -= 1
self.__total_size -= dupe.size
self.__total_size = max(0, self.__total_size - dupe.size)
if not group:
del self.__group_of_duplicate[ref]
self.__groups.remove(group)

View File

@@ -59,13 +59,13 @@ class BaseTestCaseCache:
def test_set_then_retrieve_blocks(self):
c = self.get_cache()
b = [(0, 0, 0), (1, 2, 3)]
b = [[(0, 0, 0), (1, 2, 3)]] * 8
c["foo"] = b
eq_(b, c["foo"])
def test_delitem(self):
c = self.get_cache()
c["foo"] = ""
c["foo"] = [[]] * 8
del c["foo"]
assert "foo" not in c
with raises(KeyError):
@@ -74,16 +74,16 @@ class BaseTestCaseCache:
def test_persistance(self, tmpdir):
DBNAME = tmpdir.join("hstest.db")
c = self.get_cache(str(DBNAME))
c["foo"] = [(1, 2, 3)]
c["foo"] = [[(1, 2, 3)]] * 8
del c
c = self.get_cache(str(DBNAME))
eq_([(1, 2, 3)], c["foo"])
eq_([[(1, 2, 3)]] * 8, c["foo"])
def test_filter(self):
c = self.get_cache()
c["foo"] = ""
c["bar"] = ""
c["baz"] = ""
c["foo"] = [[]] * 8
c["bar"] = [[]] * 8
c["baz"] = [[]] * 8
c.filter(lambda p: p != "bar") # only 'bar' is removed
eq_(2, len(c))
assert "foo" in c
@@ -92,9 +92,9 @@ class BaseTestCaseCache:
def test_clear(self):
c = self.get_cache()
c["foo"] = ""
c["bar"] = ""
c["baz"] = ""
c["foo"] = [[]] * 8
c["bar"] = [[]] * 8
c["baz"] = [[]] * 8
c.clear()
eq_(0, len(c))
assert "foo" not in c
@@ -104,7 +104,7 @@ class BaseTestCaseCache:
def test_by_id(self):
# it's possible to use the cache by referring to the files by their row_id
c = self.get_cache()
b = [(0, 0, 0), (1, 2, 3)]
b = [[(0, 0, 0), (1, 2, 3)]] * 8
c["foo"] = b
foo_id = c.get_id("foo")
eq_(c[foo_id], b)
@@ -127,10 +127,10 @@ class TestCaseSqliteCache(BaseTestCaseCache):
fp.write("invalid sqlite content")
fp.close()
c = self.get_cache(dbname) # should not raise a DatabaseError
c["foo"] = [(1, 2, 3)]
c["foo"] = [[(1, 2, 3)]] * 8
del c
c = self.get_cache(dbname)
eq_(c["foo"], [(1, 2, 3)])
eq_(c["foo"], [[(1, 2, 3)]] * 8)
class TestCaseCacheSQLEscape:
@@ -152,7 +152,7 @@ class TestCaseCacheSQLEscape:
def test_delitem(self):
c = self.get_cache()
c["foo'bar"] = []
c["foo'bar"] = [[]] * 8
try:
del c["foo'bar"]
except KeyError: