1
0
mirror of https://github.com/arsenetar/dupeguru.git synced 2026-03-12 19:41:38 +00:00

Compare commits

..

3 Commits

Author SHA1 Message Date
Luca Falavigna
db5e80ed7e Merge 95c49ffdef into 08154815d0 2024-02-20 10:20:41 -07:00
Luca Falavigna
95c49ffdef Merge branch 'arsenetar:master' into manpages 2024-02-12 08:31:54 +01:00
Luca Falavigna
b72b64f4fc Add manpage 2022-08-31 14:57:38 +00:00
14 changed files with 71 additions and 66 deletions

12
.github/FUNDING.yml vendored
View File

@@ -1 +1,13 @@
# These are supported funding model platforms
github: arsenetar github: arsenetar
patreon: # Replace with a single Patreon username
open_collective: # Replace with a single Open Collective username
ko_fi: # Replace with a single Ko-fi username
tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
liberapay: # Replace with a single Liberapay username
issuehunt: # Replace with a single IssueHunt username
otechie: # Replace with a single Otechie username
lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']

View File

@@ -25,10 +25,11 @@ jobs:
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 uses: actions/checkout@v2
# Initializes the CodeQL tools for scanning. # Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL - name: Initialize CodeQL
uses: github/codeql-action/init@5d4e8d1aca955e8d8589aabd499c5cae939e33c7 # v4.31.9 uses: github/codeql-action/init@v1
with: with:
languages: ${{ matrix.language }} languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file. # If you wish to specify custom queries, you can do so here or in a config file.
@@ -43,7 +44,7 @@ jobs:
make modules make modules
- if: matrix.language == 'python' - if: matrix.language == 'python'
name: Autobuild name: Autobuild
uses: github/codeql-action/autobuild@5d4e8d1aca955e8d8589aabd499c5cae939e33c7 # v4.31.9 uses: github/codeql-action/autobuild@v1
# Analysis # Analysis
- name: Perform CodeQL Analysis - name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@5d4e8d1aca955e8d8589aabd499c5cae939e33c7 # v4.31.9 uses: github/codeql-action/analyze@v1

View File

@@ -11,9 +11,9 @@ jobs:
pre-commit: pre-commit:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 - uses: actions/checkout@v4
- name: Set up Python 3.12 - name: Set up Python 3.12
uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 uses: actions/setup-python@v5
with: with:
python-version: "3.12" python-version: "3.12"
- uses: pre-commit/action@v3.0.1 - uses: pre-commit/action@v3.0.1
@@ -23,7 +23,7 @@ jobs:
strategy: strategy:
matrix: matrix:
os: [ubuntu-latest] os: [ubuntu-latest]
python-version: [3.8, 3.9, "3.10", "3.11", "3.12", "3.13", "3.14"] python-version: [3.7, 3.8, 3.9, "3.10", "3.11", "3.12"]
include: include:
- os: windows-latest - os: windows-latest
python-version: "3.12" python-version: "3.12"
@@ -31,9 +31,9 @@ jobs:
python-version: "3.12" python-version: "3.12"
steps: steps:
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 - uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }} - name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 uses: actions/setup-python@v5
with: with:
python-version: ${{ matrix.python-version }} python-version: ${{ matrix.python-version }}
- name: Install dependencies - name: Install dependencies
@@ -49,17 +49,7 @@ jobs:
pytest core hscommon pytest core hscommon
- name: Upload Artifacts - name: Upload Artifacts
if: matrix.os == 'ubuntu-latest' if: matrix.os == 'ubuntu-latest'
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 uses: actions/upload-artifact@v4
with: with:
name: modules ${{ matrix.python-version }} name: modules ${{ matrix.python-version }}
path: build/**/*.so path: ${{ github.workspace }}/**/*.so
merge-artifacts:
needs: [test]
runs-on: ubuntu-latest
steps:
- name: Merge Artifacts
uses: actions/upload-artifact/merge@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: modules
pattern: modules*
delete-merged: true

View File

@@ -15,7 +15,7 @@ jobs:
push-source: push-source:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 - uses: actions/checkout@v4
- name: Get Transifex Client - name: Get Transifex Client
run: | run: |
curl -o- https://raw.githubusercontent.com/transifex/cli/master/install.sh | bash -s -- $TX_VERSION curl -o- https://raw.githubusercontent.com/transifex/cli/master/install.sh | bash -s -- $TX_VERSION

2
.vscode/launch.json vendored
View File

@@ -6,7 +6,7 @@
"configurations": [ "configurations": [
{ {
"name": "DupuGuru", "name": "DupuGuru",
"type": "debugpy", "type": "python",
"request": "launch", "request": "launch",
"program": "run.py", "program": "run.py",
"console": "integratedTerminal", "console": "integratedTerminal",

View File

@@ -12,6 +12,5 @@
"[python]": { "[python]": {
"editor.formatOnSave": true, "editor.formatOnSave": true,
"editor.defaultFormatter": "ms-python.black-formatter" "editor.defaultFormatter": "ms-python.black-formatter"
}, }
"python.testing.pytestEnabled": true
} }

View File

@@ -10,6 +10,7 @@ from optparse import OptionParser
import shutil import shutil
from multiprocessing import Pool from multiprocessing import Pool
from setuptools import sandbox
from hscommon import sphinxgen from hscommon import sphinxgen
from hscommon.build import ( from hscommon.build import (
add_to_pythonpath, add_to_pythonpath,
@@ -17,7 +18,6 @@ from hscommon.build import (
fix_qt_resource_file, fix_qt_resource_file,
) )
from hscommon import loc from hscommon import loc
import subprocess
def parse_args(): def parse_args():
@@ -118,7 +118,7 @@ def build_normpo():
def build_pe_modules(): def build_pe_modules():
print("Building PE Modules") print("Building PE Modules")
# Leverage setup.py to build modules # Leverage setup.py to build modules
subprocess.check_call([sys.executable, "setup.py", "build_ext", "--inplace"]) sandbox.run_setup("setup.py", ["build_ext", "--inplace"])
def build_normal(): def build_normal():

View File

@@ -158,7 +158,7 @@ class SqliteCache:
ids = ",".join(map(str, rowids)) ids = ",".join(map(str, rowids))
sql = ( sql = (
"select rowid, blocks, blocks2, blocks3, blocks4, blocks5, blocks6, blocks7, blocks8 " "select rowid, blocks, blocks2, blocks3, blocks4, blocks5, blocks6, blocks7, blocks8 "
f"from pictures where rowid in ({ids})" f"from pictures where rowid in {ids}"
) )
cur = self.con.execute(sql) cur = self.con.execute(sql)
return ( return (

View File

@@ -54,7 +54,7 @@ def get_cache(cache_path, readonly=False):
return SqliteCache(cache_path, readonly=readonly) return SqliteCache(cache_path, readonly=readonly)
def prepare_pictures(pictures, cache_path, with_dimensions, match_rotated, j=job.nulljob): def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob):
# The MemoryError handlers in there use logging without first caring about whether or not # The MemoryError handlers in there use logging without first caring about whether or not
# there is enough memory left to carry on the operation because it is assumed that the # there is enough memory left to carry on the operation because it is assumed that the
# MemoryError happens when trying to read an image file, which is freed from memory by the # MemoryError happens when trying to read an image file, which is freed from memory by the
@@ -76,14 +76,8 @@ def prepare_pictures(pictures, cache_path, with_dimensions, match_rotated, j=job
if with_dimensions: if with_dimensions:
picture.dimensions # pre-read dimensions picture.dimensions # pre-read dimensions
try: try:
if picture.unicode_path not in cache or ( if picture.unicode_path not in cache:
match_rotated and any(block == [] for block in cache[picture.unicode_path])
):
if match_rotated:
blocks = [picture.get_blocks(BLOCK_COUNT_PER_SIDE, orientation) for orientation in range(1, 9)] blocks = [picture.get_blocks(BLOCK_COUNT_PER_SIDE, orientation) for orientation in range(1, 9)]
else:
blocks = [[]] * 8
blocks[max(picture.get_orientation() - 1, 0)] = picture.get_blocks(BLOCK_COUNT_PER_SIDE)
cache[picture.unicode_path] = blocks cache[picture.unicode_path] = blocks
prepared.append(picture) prepared.append(picture)
except (OSError, ValueError) as e: except (OSError, ValueError) as e:
@@ -193,7 +187,7 @@ def getmatches(pictures, cache_path, threshold, match_scaled=False, match_rotate
j.set_progress(comparison_count, progress_msg) j.set_progress(comparison_count, progress_msg)
j = j.start_subjob([3, 7]) j = j.start_subjob([3, 7])
pictures = prepare_pictures(pictures, cache_path, not match_scaled, match_rotated, j=j) pictures = prepare_pictures(pictures, cache_path, with_dimensions=not match_scaled, j=j)
j = j.start_subjob([9, 1], tr("Preparing for matching")) j = j.start_subjob([9, 1], tr("Preparing for matching"))
cache = get_cache(cache_path) cache = get_cache(cache_path)
id2picture = {} id2picture = {}

View File

@@ -37,7 +37,7 @@ class Photo(fs.File):
def _plat_get_blocks(self, block_count_per_side, orientation): def _plat_get_blocks(self, block_count_per_side, orientation):
raise NotImplementedError() raise NotImplementedError()
def get_orientation(self): def _get_orientation(self):
if not hasattr(self, "_cached_orientation"): if not hasattr(self, "_cached_orientation"):
try: try:
with self.path.open("rb") as fp: with self.path.open("rb") as fp:
@@ -95,13 +95,13 @@ class Photo(fs.File):
fs.File._read_info(self, field) fs.File._read_info(self, field)
if field == "dimensions": if field == "dimensions":
self.dimensions = self._plat_get_dimensions() self.dimensions = self._plat_get_dimensions()
if self.get_orientation() in {5, 6, 7, 8}: if self._get_orientation() in {5, 6, 7, 8}:
self.dimensions = (self.dimensions[1], self.dimensions[0]) self.dimensions = (self.dimensions[1], self.dimensions[0])
elif field == "exif_timestamp": elif field == "exif_timestamp":
self.exif_timestamp = self._get_exif_timestamp() self.exif_timestamp = self._get_exif_timestamp()
def get_blocks(self, block_count_per_side, orientation: int = None): def get_blocks(self, block_count_per_side, orientation: int = None):
if orientation is None: if orientation is None:
return self._plat_get_blocks(block_count_per_side, self.get_orientation()) return self._plat_get_blocks(block_count_per_side, self._get_orientation())
else: else:
return self._plat_get_blocks(block_count_per_side, orientation) return self._plat_get_blocks(block_count_per_side, orientation)

View File

@@ -96,8 +96,6 @@ class FilenameCategory(CriterionCategory):
DOESNT_END_WITH_NUMBER = 1 DOESNT_END_WITH_NUMBER = 1
LONGEST = 2 LONGEST = 2
SHORTEST = 3 SHORTEST = 3
LONGEST_PATH = 4
SHORTEST_PATH = 5
def format_criterion_value(self, value): def format_criterion_value(self, value):
return { return {
@@ -105,8 +103,6 @@ class FilenameCategory(CriterionCategory):
self.DOESNT_END_WITH_NUMBER: tr("Doesn't end with number"), self.DOESNT_END_WITH_NUMBER: tr("Doesn't end with number"),
self.LONGEST: tr("Longest"), self.LONGEST: tr("Longest"),
self.SHORTEST: tr("Shortest"), self.SHORTEST: tr("Shortest"),
self.LONGEST_PATH: tr("Longest Path"),
self.SHORTEST_PATH: tr("Shortest Path"),
}[value] }[value]
def extract_value(self, dupe): def extract_value(self, dupe):
@@ -120,10 +116,6 @@ class FilenameCategory(CriterionCategory):
return 0 if ends_with_digit else 1 return 0 if ends_with_digit else 1
else: else:
return 1 if ends_with_digit else 0 return 1 if ends_with_digit else 0
elif crit_value == self.LONGEST_PATH:
return len(str(dupe.folder_path)) * -1
elif crit_value == self.SHORTEST_PATH:
return len(str(dupe.folder_path))
else: else:
value = len(value) value = len(value)
if crit_value == self.LONGEST: if crit_value == self.LONGEST:
@@ -138,8 +130,6 @@ class FilenameCategory(CriterionCategory):
self.DOESNT_END_WITH_NUMBER, self.DOESNT_END_WITH_NUMBER,
self.LONGEST, self.LONGEST,
self.SHORTEST, self.SHORTEST,
self.LONGEST_PATH,
self.SHORTEST_PATH,
] ]
] ]

View File

@@ -325,7 +325,7 @@ class Results(Markable):
del self.__group_of_duplicate[dupe] del self.__group_of_duplicate[dupe]
self._remove_mark_flag(dupe) self._remove_mark_flag(dupe)
self.__total_count -= 1 self.__total_count -= 1
self.__total_size = max(0, self.__total_size - dupe.size) self.__total_size -= dupe.size
if not group: if not group:
del self.__group_of_duplicate[ref] del self.__group_of_duplicate[ref]
self.__groups.remove(group) self.__groups.remove(group)

View File

@@ -59,13 +59,13 @@ class BaseTestCaseCache:
def test_set_then_retrieve_blocks(self): def test_set_then_retrieve_blocks(self):
c = self.get_cache() c = self.get_cache()
b = [[(0, 0, 0), (1, 2, 3)]] * 8 b = [(0, 0, 0), (1, 2, 3)]
c["foo"] = b c["foo"] = b
eq_(b, c["foo"]) eq_(b, c["foo"])
def test_delitem(self): def test_delitem(self):
c = self.get_cache() c = self.get_cache()
c["foo"] = [[]] * 8 c["foo"] = ""
del c["foo"] del c["foo"]
assert "foo" not in c assert "foo" not in c
with raises(KeyError): with raises(KeyError):
@@ -74,16 +74,16 @@ class BaseTestCaseCache:
def test_persistance(self, tmpdir): def test_persistance(self, tmpdir):
DBNAME = tmpdir.join("hstest.db") DBNAME = tmpdir.join("hstest.db")
c = self.get_cache(str(DBNAME)) c = self.get_cache(str(DBNAME))
c["foo"] = [[(1, 2, 3)]] * 8 c["foo"] = [(1, 2, 3)]
del c del c
c = self.get_cache(str(DBNAME)) c = self.get_cache(str(DBNAME))
eq_([[(1, 2, 3)]] * 8, c["foo"]) eq_([(1, 2, 3)], c["foo"])
def test_filter(self): def test_filter(self):
c = self.get_cache() c = self.get_cache()
c["foo"] = [[]] * 8 c["foo"] = ""
c["bar"] = [[]] * 8 c["bar"] = ""
c["baz"] = [[]] * 8 c["baz"] = ""
c.filter(lambda p: p != "bar") # only 'bar' is removed c.filter(lambda p: p != "bar") # only 'bar' is removed
eq_(2, len(c)) eq_(2, len(c))
assert "foo" in c assert "foo" in c
@@ -92,9 +92,9 @@ class BaseTestCaseCache:
def test_clear(self): def test_clear(self):
c = self.get_cache() c = self.get_cache()
c["foo"] = [[]] * 8 c["foo"] = ""
c["bar"] = [[]] * 8 c["bar"] = ""
c["baz"] = [[]] * 8 c["baz"] = ""
c.clear() c.clear()
eq_(0, len(c)) eq_(0, len(c))
assert "foo" not in c assert "foo" not in c
@@ -104,7 +104,7 @@ class BaseTestCaseCache:
def test_by_id(self): def test_by_id(self):
# it's possible to use the cache by referring to the files by their row_id # it's possible to use the cache by referring to the files by their row_id
c = self.get_cache() c = self.get_cache()
b = [[(0, 0, 0), (1, 2, 3)]] * 8 b = [(0, 0, 0), (1, 2, 3)]
c["foo"] = b c["foo"] = b
foo_id = c.get_id("foo") foo_id = c.get_id("foo")
eq_(c[foo_id], b) eq_(c[foo_id], b)
@@ -127,10 +127,10 @@ class TestCaseSqliteCache(BaseTestCaseCache):
fp.write("invalid sqlite content") fp.write("invalid sqlite content")
fp.close() fp.close()
c = self.get_cache(dbname) # should not raise a DatabaseError c = self.get_cache(dbname) # should not raise a DatabaseError
c["foo"] = [[(1, 2, 3)]] * 8 c["foo"] = [(1, 2, 3)]
del c del c
c = self.get_cache(dbname) c = self.get_cache(dbname)
eq_(c["foo"], [[(1, 2, 3)]] * 8) eq_(c["foo"], [(1, 2, 3)])
class TestCaseCacheSQLEscape: class TestCaseCacheSQLEscape:
@@ -152,7 +152,7 @@ class TestCaseCacheSQLEscape:
def test_delitem(self): def test_delitem(self):
c = self.get_cache() c = self.get_cache()
c["foo'bar"] = [[]] * 8 c["foo'bar"] = []
try: try:
del c["foo'bar"] del c["foo'bar"]
except KeyError: except KeyError:

19
pkg/debian/dupeguru.1 Normal file
View File

@@ -0,0 +1,19 @@
.TH dupeguru 1 2015-01-15 "debomatic"
.SH NAME
dupeguru \- GUI tool to find duplicate files in a system
.SH SYNOPSIS
.B dupeguru
.SH DESCRIPTION
dupeGuru is a tool to find duplicate files on your computer.
.PP
It can scan either filenames or contents. The filename scan features a
fuzzy matching algorithm that can find duplicate filenames even when
they are not exactly the same.
.PP
dupeGuru is customizable: you can tweak its matching engine to find
exactly the kind of duplicates you want to find.
.SH COPYRIGHT
This manual page is Copyright 2007-2022 Luca Falavigna <dktrkranz@debian.org>.
Permission is granted to copy, distribute and/or modify this document
under the terms of the GNU General Public License, Version 3 or any later
version published by the Free Software Foundation.