mirror of
https://github.com/arsenetar/dupeguru.git
synced 2024-10-31 22:05:58 +00:00
Compare commits
124 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
8f197ea7e1 | ||
3a97ba941a | |||
e3bcf9d686 | |||
a81069be61 | |||
08154815d0 | |||
a95a9db08b | |||
3d866cec9a | |||
253dfd897c | |||
6e87f53f91 | |||
95e04c4d82 | |||
e3a612a704 | |||
53d5ac06bf | |||
13dd00c798 | |||
|
9f22835f73 | ||
|
85a4557525 | ||
70d956b4f8 | |||
|
007404f46a | ||
4385b50825 | |||
4ef1d24351 | |||
03be82c0b0 | |||
|
332b814c00 | ||
|
f56bef67e1 | ||
|
8160fe4fcc | ||
9ad84ade29 | |||
18f32fda19 | |||
99ec4e0f27 | |||
|
fe0e4bef91 | ||
322d29a996 | |||
c5a71f61b8 | |||
10405ad063 | |||
a257dbf0d5 | |||
|
7a4506ece3 | ||
aade6593ac | |||
6d8b86b7eb | |||
e41c91623c | |||
46521c8af1 | |||
549eb7f153 | |||
8125e3ec97 | |||
8c5e18b980 | |||
d81759f77f | |||
c57042fdd2 | |||
057be0294a | |||
81daddd072 | |||
1e651a1603 | |||
78f4145910 | |||
46d1afb566 | |||
a5e31f15f0 | |||
0cf6c9a1a2 | |||
6db2fa2be6 | |||
2dd2a801cc | |||
83f5e80427 | |||
091cae0cc6 | |||
e30a135451 | |||
1db93fd142 | |||
48862b6414 | |||
|
c920412856 | ||
4448b999ab | |||
af1ae33598 | |||
265d10b261 | |||
|
f1153c85c0 | ||
|
1eee3fd7e4 | ||
|
1827827fdf | ||
|
db174d4e63 | ||
1f1dfa88dc | |||
916c5204cf | |||
71af825b37 | |||
97f490b8b7 | |||
d369bcddd7 | |||
360dceca7b | |||
92b27801c3 | |||
|
b9aabb8545 | ||
d5eeab4a17 | |||
7865e4aeac | |||
58863b1728 | |||
e382683f66 | |||
f7ed1c801c | |||
f587c7b5d8 | |||
40ff40bea8 | |||
7a44c72a0a | |||
66aff9f74e | |||
5451f55219 | |||
36280b01e6 | |||
18359c3ea6 | |||
0a4e61edf5 | |||
d73a85b82e | |||
81c593399e | |||
6a732a79a8 | |||
63dd4d4561 | |||
e0061d7bc1 | |||
c5818b1d1f | |||
a470a8de25 | |||
a37b5b0eeb | |||
efd500ecc1 | |||
43fcc52291 | |||
50f5db1543 | |||
a5b0ccdd02 | |||
143147cb8e | |||
ebb81d9f03 | |||
da9f8b2b9d | |||
5ed5eddde6 | |||
9f40e4e786 | |||
86bf9b39d0 | |||
c0be0aecbd | |||
c408873d20 | |||
bbcdfbf698 | |||
8cee1a9467 | |||
448d33dcb6 | |||
8d414cadac | |||
f902ee889a | |||
bc89e71935 | |||
17b83c8001 | |||
0f845ee67a | |||
d40e32a143 | |||
1bc206e62d | |||
106a0feaba | |||
984e0c4094 | |||
9321e811d7 | |||
a64fcbfb5c | |||
cff07a12d6 | |||
|
b9c7832c4a | ||
b9dfeac2f3 | |||
efc99eee96 | |||
|
ff7733bb73 | ||
4b2fbe87ea |
13
.github/FUNDING.yml
vendored
Normal file
13
.github/FUNDING.yml
vendored
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
# These are supported funding model platforms
|
||||||
|
|
||||||
|
github: arsenetar
|
||||||
|
patreon: # Replace with a single Patreon username
|
||||||
|
open_collective: # Replace with a single Open Collective username
|
||||||
|
ko_fi: # Replace with a single Ko-fi username
|
||||||
|
tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
|
||||||
|
community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
|
||||||
|
liberapay: # Replace with a single Liberapay username
|
||||||
|
issuehunt: # Replace with a single IssueHunt username
|
||||||
|
otechie: # Replace with a single Otechie username
|
||||||
|
lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
|
||||||
|
custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
|
54
.github/workflows/codeql-analysis.yml
vendored
54
.github/workflows/codeql-analysis.yml
vendored
@ -2,12 +2,12 @@ name: "CodeQL"
|
|||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
branches: [ master ]
|
branches: [master]
|
||||||
pull_request:
|
pull_request:
|
||||||
# The branches below must be a subset of the branches above
|
# The branches below must be a subset of the branches above
|
||||||
branches: [ master ]
|
branches: [master]
|
||||||
schedule:
|
schedule:
|
||||||
- cron: '24 20 * * 2'
|
- cron: "24 20 * * 2"
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
analyze:
|
analyze:
|
||||||
@ -21,30 +21,30 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
language: [ 'cpp', 'python' ]
|
language: ["cpp", "python"]
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@v2
|
uses: actions/checkout@v2
|
||||||
|
|
||||||
# Initializes the CodeQL tools for scanning.
|
# Initializes the CodeQL tools for scanning.
|
||||||
- name: Initialize CodeQL
|
- name: Initialize CodeQL
|
||||||
uses: github/codeql-action/init@v1
|
uses: github/codeql-action/init@v1
|
||||||
with:
|
with:
|
||||||
languages: ${{ matrix.language }}
|
languages: ${{ matrix.language }}
|
||||||
# If you wish to specify custom queries, you can do so here or in a config file.
|
# If you wish to specify custom queries, you can do so here or in a config file.
|
||||||
# By default, queries listed here will override any specified in a config file.
|
# By default, queries listed here will override any specified in a config file.
|
||||||
# Prefix the list here with "+" to use these queries and those in the config file.
|
# Prefix the list here with "+" to use these queries and those in the config file.
|
||||||
# queries: ./path/to/local/query, your-org/your-repo/queries@main
|
# queries: ./path/to/local/query, your-org/your-repo/queries@main
|
||||||
- if: matrix.language == 'cpp'
|
- if: matrix.language == 'cpp'
|
||||||
name: Build Cpp
|
name: Build Cpp
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install python3-pyqt5
|
sudo apt-get install python3-pyqt5
|
||||||
make modules
|
make modules
|
||||||
- if: matrix.language == 'python'
|
- if: matrix.language == 'python'
|
||||||
name: Autobuild
|
name: Autobuild
|
||||||
uses: github/codeql-action/autobuild@v1
|
uses: github/codeql-action/autobuild@v1
|
||||||
# Analysis
|
# Analysis
|
||||||
- name: Perform CodeQL Analysis
|
- name: Perform CodeQL Analysis
|
||||||
uses: github/codeql-action/analyze@v1
|
uses: github/codeql-action/analyze@v1
|
||||||
|
113
.github/workflows/default.yml
vendored
113
.github/workflows/default.yml
vendored
@ -4,81 +4,62 @@ name: Default CI/CD
|
|||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
branches: [ master ]
|
|
||||||
pull_request:
|
pull_request:
|
||||||
branches: [ master ]
|
branches: [master]
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
lint:
|
pre-commit:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v4
|
||||||
- name: Set up Python 3.10
|
- name: Set up Python 3.12
|
||||||
uses: actions/setup-python@v2
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: '3.10'
|
python-version: "3.12"
|
||||||
- name: Install dependencies
|
- uses: pre-commit/action@v3.0.1
|
||||||
run: |
|
|
||||||
python -m pip install --upgrade pip
|
|
||||||
pip install -r requirements.txt -r requirements-extra.txt
|
|
||||||
- name: Lint with flake8
|
|
||||||
run: |
|
|
||||||
flake8 .
|
|
||||||
format:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v2
|
|
||||||
- name: Set up Python 3.10
|
|
||||||
uses: actions/setup-python@v2
|
|
||||||
with:
|
|
||||||
python-version: '3.10'
|
|
||||||
- name: Install dependencies
|
|
||||||
run: |
|
|
||||||
python -m pip install --upgrade pip
|
|
||||||
pip install -r requirements.txt -r requirements-extra.txt
|
|
||||||
- name: Check format with black
|
|
||||||
run: |
|
|
||||||
black .
|
|
||||||
test:
|
test:
|
||||||
needs: [lint, format]
|
needs: [pre-commit]
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
os: [ubuntu-latest, macos-latest, windows-latest]
|
os: [ubuntu-latest]
|
||||||
python-version: [3.6, 3.7, 3.8, 3.9, '3.10']
|
python-version: [3.7, 3.8, 3.9, "3.10", "3.11", "3.12"]
|
||||||
exclude:
|
include:
|
||||||
- os: macos-latest
|
|
||||||
python-version: 3.6
|
|
||||||
- os: macos-latest
|
|
||||||
python-version: 3.7
|
|
||||||
- os: macos-latest
|
|
||||||
python-version: 3.8
|
|
||||||
- os: windows-latest
|
- os: windows-latest
|
||||||
python-version: 3.6
|
python-version: "3.12"
|
||||||
- os: windows-latest
|
- os: macos-latest
|
||||||
python-version: 3.7
|
python-version: "3.12"
|
||||||
- os: windows-latest
|
|
||||||
python-version: 3.8
|
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v4
|
||||||
- name: Set up Python ${{ matrix.python-version }}
|
- name: Set up Python ${{ matrix.python-version }}
|
||||||
uses: actions/setup-python@v2
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
pip install -r requirements.txt -r requirements-extra.txt
|
pip install setuptools
|
||||||
- name: Build python modules
|
pip install -r requirements.txt -r requirements-extra.txt
|
||||||
run: |
|
- name: Build python modules
|
||||||
python build.py --modules
|
run: |
|
||||||
- name: Run tests
|
python build.py --modules
|
||||||
run: |
|
- name: Run tests
|
||||||
pytest core hscommon
|
run: |
|
||||||
- name: Upload Artifacts
|
pytest core hscommon
|
||||||
if: matrix.os == 'ubuntu-latest'
|
- name: Upload Artifacts
|
||||||
uses: actions/upload-artifact@v3
|
if: matrix.os == 'ubuntu-latest'
|
||||||
with:
|
uses: actions/upload-artifact@v4
|
||||||
name: modules ${{ matrix.python-version }}
|
with:
|
||||||
path: ${{ github.workspace }}/**/*.so
|
name: modules ${{ matrix.python-version }}
|
||||||
|
path: build/**/*.so
|
||||||
|
merge-artifacts:
|
||||||
|
needs: [test]
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Merge Artifacts
|
||||||
|
uses: actions/upload-artifact/merge@v4
|
||||||
|
with:
|
||||||
|
name: modules
|
||||||
|
pattern: modules*
|
||||||
|
delete-merged: true
|
||||||
|
26
.github/workflows/tx-push.yml
vendored
Normal file
26
.github/workflows/tx-push.yml
vendored
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
# Push translation source to Transifex
|
||||||
|
name: Transifex Sync
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
paths:
|
||||||
|
- locale/*.pot
|
||||||
|
|
||||||
|
env:
|
||||||
|
TX_VERSION: "v1.6.10"
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
push-source:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
- name: Get Transifex Client
|
||||||
|
run: |
|
||||||
|
curl -o- https://raw.githubusercontent.com/transifex/cli/master/install.sh | bash -s -- $TX_VERSION
|
||||||
|
- name: Update & Push Translation Sources
|
||||||
|
env:
|
||||||
|
TX_TOKEN: ${{ secrets.TX_TOKEN }}
|
||||||
|
run: |
|
||||||
|
./tx push -s --use-git-timestamps
|
125
.gitignore
vendored
125
.gitignore
vendored
@ -1,30 +1,111 @@
|
|||||||
.DS_Store
|
# Byte-compiled / optimized / DLL files
|
||||||
__pycache__
|
__pycache__/
|
||||||
*.egg-info
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
|
||||||
|
# C extensions
|
||||||
*.so
|
*.so
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
share/python-wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
MANIFEST
|
||||||
|
|
||||||
|
# PyInstaller
|
||||||
|
# Usually these files are written by a python script from a template
|
||||||
|
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||||
|
*.manifest
|
||||||
|
*.spec
|
||||||
|
|
||||||
|
# Installer logs
|
||||||
|
pip-log.txt
|
||||||
|
pip-delete-this-directory.txt
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
htmlcov/
|
||||||
|
.tox/
|
||||||
|
.nox/
|
||||||
|
.coverage
|
||||||
|
.coverage.*
|
||||||
|
.cache
|
||||||
|
nosetests.xml
|
||||||
|
coverage.xml
|
||||||
|
*.cover
|
||||||
|
*.py,cover
|
||||||
|
.hypothesis/
|
||||||
|
.pytest_cache/
|
||||||
|
cover/
|
||||||
|
|
||||||
|
# Translations
|
||||||
*.mo
|
*.mo
|
||||||
*.waf*
|
#*.pot
|
||||||
.lock-waf*
|
|
||||||
.tox
|
|
||||||
/tags
|
|
||||||
*.eggs
|
|
||||||
|
|
||||||
build
|
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
||||||
dist
|
__pypackages__/
|
||||||
env*
|
|
||||||
/deps
|
|
||||||
cocoa/autogen
|
|
||||||
|
|
||||||
/run.py
|
# Environments
|
||||||
/cocoa/*/Info.plist
|
.env
|
||||||
/cocoa/*/build
|
.venv
|
||||||
|
env*/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
env.bak/
|
||||||
|
venv.bak/
|
||||||
|
|
||||||
|
# mypy
|
||||||
|
.mypy_cache/
|
||||||
|
.dmypy.json
|
||||||
|
dmypy.json
|
||||||
|
|
||||||
|
# Pyre type checker
|
||||||
|
.pyre/
|
||||||
|
|
||||||
|
# pytype static type analyzer
|
||||||
|
.pytype/
|
||||||
|
|
||||||
|
# Cython debug symbols
|
||||||
|
cython_debug/
|
||||||
|
|
||||||
|
# macOS
|
||||||
|
.DS_Store
|
||||||
|
|
||||||
|
# Visual Studio Code
|
||||||
|
.vscode/*
|
||||||
|
!.vscode/settings.json
|
||||||
|
!.vscode/tasks.json
|
||||||
|
!.vscode/launch.json
|
||||||
|
!.vscode/extensions.json
|
||||||
|
!.vscode/*.code-snippets
|
||||||
|
|
||||||
|
# Local History for Visual Studio Code
|
||||||
|
.history/
|
||||||
|
|
||||||
|
# Built Visual Studio Code Extensions
|
||||||
|
*.vsix
|
||||||
|
|
||||||
|
# dupeGuru Specific
|
||||||
/qt/*_rc.py
|
/qt/*_rc.py
|
||||||
/help/*/conf.py
|
/help/*/conf.py
|
||||||
/help/*/changelog.rst
|
/help/*/changelog.rst
|
||||||
/transifex
|
cocoa/autogen
|
||||||
|
/cocoa/*/Info.plist
|
||||||
|
/cocoa/*/build
|
||||||
|
|
||||||
*.pyd
|
*.waf*
|
||||||
*.exe
|
.lock-waf*
|
||||||
*.spec
|
/tags
|
||||||
|
|
||||||
.vscode
|
|
||||||
|
24
.pre-commit-config.yaml
Normal file
24
.pre-commit-config.yaml
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
repos:
|
||||||
|
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||||
|
rev: v4.5.0
|
||||||
|
hooks:
|
||||||
|
- id: check-yaml
|
||||||
|
- id: check-toml
|
||||||
|
- id: end-of-file-fixer
|
||||||
|
exclude: ".*.json"
|
||||||
|
- id: trailing-whitespace
|
||||||
|
- repo: https://github.com/psf/black
|
||||||
|
rev: 24.2.0
|
||||||
|
hooks:
|
||||||
|
- id: black
|
||||||
|
- repo: https://github.com/PyCQA/flake8
|
||||||
|
rev: 7.0.0
|
||||||
|
hooks:
|
||||||
|
- id: flake8
|
||||||
|
exclude: ^(.tox|env|build|dist|help|qt/dg_rc.py|pkg).*
|
||||||
|
- repo: https://github.com/alessandrojcm/commitlint-pre-commit-hook
|
||||||
|
rev: v9.11.0
|
||||||
|
hooks:
|
||||||
|
- id: commitlint
|
||||||
|
stages: [commit-msg]
|
||||||
|
additional_dependencies: ["@commitlint/config-conventional"]
|
@ -1 +1 @@
|
|||||||
sonar.python.version=3.6, 3.7, 3.8, 3.9, 3.10
|
sonar.python.version=3.7, 3.8, 3.9, 3.10, 3.11
|
||||||
|
26
.tx/config
26
.tx/config
@ -1,26 +1,20 @@
|
|||||||
[main]
|
[main]
|
||||||
host = https://www.transifex.com
|
host = https://www.transifex.com
|
||||||
|
|
||||||
[dupeguru-1.core]
|
[o:voltaicideas:p:dupeguru-1:r:columns]
|
||||||
file_filter = locale/<lang>/LC_MESSAGES/core.po
|
|
||||||
source_file = locale/core.pot
|
|
||||||
source_lang = en
|
|
||||||
type = PO
|
|
||||||
|
|
||||||
[dupeguru-1.columns]
|
|
||||||
file_filter = locale/<lang>/LC_MESSAGES/columns.po
|
file_filter = locale/<lang>/LC_MESSAGES/columns.po
|
||||||
source_file = locale/columns.pot
|
source_file = locale/columns.pot
|
||||||
source_lang = en
|
source_lang = en
|
||||||
type = PO
|
type = PO
|
||||||
|
|
||||||
[dupeguru-1.ui]
|
[o:voltaicideas:p:dupeguru-1:r:core]
|
||||||
|
file_filter = locale/<lang>/LC_MESSAGES/core.po
|
||||||
|
source_file = locale/core.pot
|
||||||
|
source_lang = en
|
||||||
|
type = PO
|
||||||
|
|
||||||
|
[o:voltaicideas:p:dupeguru-1:r:ui]
|
||||||
file_filter = locale/<lang>/LC_MESSAGES/ui.po
|
file_filter = locale/<lang>/LC_MESSAGES/ui.po
|
||||||
source_file = locale/ui.pot
|
source_file = locale/ui.pot
|
||||||
source_lang = en
|
source_lang = en
|
||||||
type = PO
|
type = PO
|
||||||
|
|
||||||
[dupeguru-1.qtlib]
|
|
||||||
file_filter = qtlib/locale/<lang>/LC_MESSAGES/qtlib.po
|
|
||||||
source_file = qtlib/locale/qtlib.pot
|
|
||||||
source_lang = en
|
|
||||||
type = PO
|
|
||||||
|
12
.vscode/extensions.json
vendored
Normal file
12
.vscode/extensions.json
vendored
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
{
|
||||||
|
// List of extensions which should be recommended for users of this workspace.
|
||||||
|
"recommendations": [
|
||||||
|
"redhat.vscode-yaml",
|
||||||
|
"ms-python.vscode-pylance",
|
||||||
|
"ms-python.python",
|
||||||
|
"ms-python.black-formatter",
|
||||||
|
],
|
||||||
|
// List of extensions recommended by VS Code that should not be recommended for
|
||||||
|
// users of this workspace.
|
||||||
|
"unwantedRecommendations": []
|
||||||
|
}
|
17
.vscode/launch.json
vendored
Normal file
17
.vscode/launch.json
vendored
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
{
|
||||||
|
// Use IntelliSense to learn about possible attributes.
|
||||||
|
// Hover to view descriptions of existing attributes.
|
||||||
|
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||||
|
"version": "0.2.0",
|
||||||
|
"configurations": [
|
||||||
|
{
|
||||||
|
"name": "DupuGuru",
|
||||||
|
"type": "debugpy",
|
||||||
|
"request": "launch",
|
||||||
|
"program": "run.py",
|
||||||
|
"console": "integratedTerminal",
|
||||||
|
"subProcess": true,
|
||||||
|
"justMyCode": false
|
||||||
|
},
|
||||||
|
]
|
||||||
|
}
|
17
.vscode/settings.json
vendored
Normal file
17
.vscode/settings.json
vendored
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
{
|
||||||
|
"cSpell.words": [
|
||||||
|
"Dupras",
|
||||||
|
"hscommon"
|
||||||
|
],
|
||||||
|
"editor.rulers": [
|
||||||
|
88,
|
||||||
|
120
|
||||||
|
],
|
||||||
|
"python.languageServer": "Pylance",
|
||||||
|
"yaml.schemaStore.enable": true,
|
||||||
|
"[python]": {
|
||||||
|
"editor.formatOnSave": true,
|
||||||
|
"editor.defaultFormatter": "ms-python.black-formatter"
|
||||||
|
},
|
||||||
|
"python.testing.pytestEnabled": true
|
||||||
|
}
|
88
CONTRIBUTING.md
Normal file
88
CONTRIBUTING.md
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
# Contributing to dupeGuru
|
||||||
|
|
||||||
|
The following is a set of guidelines and information for contributing to dupeGuru.
|
||||||
|
|
||||||
|
#### Table of Contents
|
||||||
|
|
||||||
|
[Things to Know Before Starting](#things-to-know-before-starting)
|
||||||
|
|
||||||
|
[Ways to Contribute](#ways-to-contribute)
|
||||||
|
* [Reporting Bugs](#reporting-bugs)
|
||||||
|
* [Suggesting Enhancements](#suggesting-enhancements)
|
||||||
|
* [Localization](#localization)
|
||||||
|
* [Code Contribution](#code-contribution)
|
||||||
|
* [Pull Requests](#pull-requests)
|
||||||
|
|
||||||
|
[Style Guides](#style-guides)
|
||||||
|
* [Git Commit Messages](#git-commit-messages)
|
||||||
|
* [Python Style Guide](#python-style-guide)
|
||||||
|
* [Documentation Style Guide](#documentation-style-guide)
|
||||||
|
|
||||||
|
[Additional Notes](#additional-notes)
|
||||||
|
* [Issue and Pull Request Labels](#issue-and-pull-request-labels)
|
||||||
|
|
||||||
|
## Things to Know Before Starting
|
||||||
|
**TODO**
|
||||||
|
## Ways to contribute
|
||||||
|
### Reporting Bugs
|
||||||
|
**TODO**
|
||||||
|
### Suggesting Enhancements
|
||||||
|
**TODO**
|
||||||
|
### Localization
|
||||||
|
**TODO**
|
||||||
|
### Code Contribution
|
||||||
|
**TODO**
|
||||||
|
### Pull Requests
|
||||||
|
Please follow these steps to have your contribution considered by the maintainers:
|
||||||
|
|
||||||
|
1. Keep Pull Request specific to one feature or bug.
|
||||||
|
2. Follow the [style guides](#style-guides)
|
||||||
|
3. After you submit your pull request, verify that all [status checks](https://help.github.com/articles/about-status-checks/) are passing <details><summary>What if the status checks are failing?</summary>If a status check is failing, and you believe that the failure is unrelated to your change, please leave a comment on the pull request explaining why you believe the failure is unrelated. A maintainer will re-run the status check for you. If we conclude that the failure was a false positive, then we will open an issue to track that problem with our status check suite.</details>
|
||||||
|
|
||||||
|
While the prerequisites above must be satisfied prior to having your pull request reviewed, the reviewer(s) may ask you to complete additional design work, tests, or other changes before your pull request can be ultimately accepted.
|
||||||
|
|
||||||
|
## Style Guides
|
||||||
|
### Git Commit Messages
|
||||||
|
- Use the present tense ("Add feature" not "Added feature")
|
||||||
|
- Use the imperative mood ("Move cursor to..." not "Moves cursor to...")
|
||||||
|
- Limit the first line to 72 characters or less
|
||||||
|
- Reference issues and pull requests liberally after the first line
|
||||||
|
|
||||||
|
### Python Style Guide
|
||||||
|
- All files are formatted with [Black](https://github.com/psf/black)
|
||||||
|
- Follow [PEP 8](https://peps.python.org/pep-0008/) as much as practical
|
||||||
|
- Pass [flake8](https://flake8.pycqa.org/en/latest/) linting
|
||||||
|
- Include [PEP 484](https://peps.python.org/pep-0484/) type hints (new code)
|
||||||
|
|
||||||
|
### Documentation Style Guide
|
||||||
|
**TODO**
|
||||||
|
|
||||||
|
## Additional Notes
|
||||||
|
### Issue and Pull Request Labels
|
||||||
|
This section lists and describes the various labels used with issues and pull requests. Each of the labels is listed with a search link as well.
|
||||||
|
|
||||||
|
#### Issue Type and Status
|
||||||
|
| Label name | Search | Description |
|
||||||
|
|------------|--------|-------------|
|
||||||
|
| `enhancement` | [search](https://github.com/arsenetar/dupeguru/issues?q=is%3Aopen+is%3Aissue+label%3Aenhancement) | Feature requests and enhancements. |
|
||||||
|
| `bug` | [search](https://github.com/arsenetar/dupeguru/issues?q=is%3Aopen+is%3Aissue+label%3Abug) | Bug reports. |
|
||||||
|
| `duplicate` | [search](https://github.com/arsenetar/dupeguru/issues?q=is%3Aopen+is%3Aissue+label%3Aduplicate) | Issue is a duplicate of existing issue. |
|
||||||
|
| `needs-reproduction` | [search](https://github.com/arsenetar/dupeguru/issues?q=is%3Aopen+is%3Aissue+label%3Aneeds-reproduction) | A bug that has not been able to be reproduced. |
|
||||||
|
| `needs-information` | [search](https://github.com/arsenetar/dupeguru/issues?q=is%3Aopen+is%3Aissue+label%3Aneeds-information) | More information needs to be collected about these problems or feature requests (e.g. steps to reproduce). |
|
||||||
|
| `blocked` | [search](https://github.com/arsenetar/dupeguru/issues?q=is%3Aopen+is%3Aissue+label%3Ablocked) | Issue blocked by other issues. |
|
||||||
|
| `beginner` | [search](https://github.com/arsenetar/dupeguru/issues?q=is%3Aopen+is%3Aissue+label%3Abeginner) | Less complex issues for users who want to start contributing. |
|
||||||
|
|
||||||
|
#### Category Labels
|
||||||
|
| Label name | Search | Description |
|
||||||
|
|------------|--------|-------------|
|
||||||
|
| `3rd party` | [search](https://github.com/arsenetar/dupeguru/issues?q=is%3Aopen+is%3Aissue+label%3A%223rd%20party%22) | Related to a 3rd party dependency. |
|
||||||
|
| `crash` | [search](https://github.com/arsenetar/dupeguru/issues?q=is%3Aopen+is%3Aissue+label%3Acrash) | Related to crashes (complete, or unhandled). |
|
||||||
|
| `documentation` | [search](https://github.com/arsenetar/dupeguru/issues?q=is%3Aopen+is%3Aissue+label%3Adocumentation) | Related to any documentation. |
|
||||||
|
| `linux` | [search](https://github.com/arsenetar/dupeguru/issues?q=is%3Aopen+is%3Aissue+label%3linux) | Related to running on Linux. |
|
||||||
|
| `mac` | [search](https://github.com/arsenetar/dupeguru/issues?q=is%3Aopen+is%3Aissue+label%3Amac) | Related to running on macOS. |
|
||||||
|
| `performance` | [search](https://github.com/arsenetar/dupeguru/issues?q=is%3Aopen+is%3Aissue+label%3Aperformance) | Related to the performance. |
|
||||||
|
| `ui` | [search](https://github.com/arsenetar/dupeguru/issues?q=is%3Aopen+is%3Aissue+label%3Aui)| Related to the visual design. |
|
||||||
|
| `windows` | [search](https://github.com/arsenetar/dupeguru/issues?q=is%3Aopen+is%3Aissue+label%3Awindows) | Related to running on Windows. |
|
||||||
|
|
||||||
|
#### Pull Request Labels
|
||||||
|
None at this time, if the volume of Pull Requests increase labels may be added to manage.
|
1
LICENSE
1
LICENSE
@ -619,4 +619,3 @@ Program, unless a warranty or assumption of liability accompanies a
|
|||||||
copy of the Program in return for a fee.
|
copy of the Program in return for a fee.
|
||||||
|
|
||||||
END OF TERMS AND CONDITIONS
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
@ -3,4 +3,3 @@ recursive-include core *.m
|
|||||||
include run.py
|
include run.py
|
||||||
graft locale
|
graft locale
|
||||||
graft help
|
graft help
|
||||||
graft qtlib/locale
|
|
12
Makefile
12
Makefile
@ -1,10 +1,10 @@
|
|||||||
PYTHON ?= python3
|
PYTHON ?= python3
|
||||||
PYTHON_VERSION_MINOR := $(shell ${PYTHON} -c "import sys; print(sys.version_info.minor)")
|
PYTHON_VERSION_MINOR := $(shell ${PYTHON} -c "import sys; print(sys.version_info.minor)")
|
||||||
PYRCC5 ?= pyrcc5
|
PYRCC5 ?= pyrcc5
|
||||||
REQ_MINOR_VERSION = 6
|
REQ_MINOR_VERSION = 7
|
||||||
PREFIX ?= /usr/local
|
PREFIX ?= /usr/local
|
||||||
|
|
||||||
# Window compatability via Msys2
|
# Window compatability via Msys2
|
||||||
# - venv creates Scripts instead of bin
|
# - venv creates Scripts instead of bin
|
||||||
# - compile generates .pyd instead of .so
|
# - compile generates .pyd instead of .so
|
||||||
# - venv with --sytem-site-packages has issues on windows as well...
|
# - venv with --sytem-site-packages has issues on windows as well...
|
||||||
@ -12,7 +12,7 @@ PREFIX ?= /usr/local
|
|||||||
ifeq ($(shell ${PYTHON} -c "import platform; print(platform.system())"), Windows)
|
ifeq ($(shell ${PYTHON} -c "import platform; print(platform.system())"), Windows)
|
||||||
BIN = Scripts
|
BIN = Scripts
|
||||||
SO = *.pyd
|
SO = *.pyd
|
||||||
VENV_OPTIONS =
|
VENV_OPTIONS =
|
||||||
else
|
else
|
||||||
BIN = bin
|
BIN = bin
|
||||||
SO = *.so
|
SO = *.so
|
||||||
@ -35,7 +35,7 @@ endif
|
|||||||
# Our build scripts are not very "make like" yet and perform their task in a bundle. For now, we
|
# Our build scripts are not very "make like" yet and perform their task in a bundle. For now, we
|
||||||
# use one of each file to act as a representative, a target, of these groups.
|
# use one of each file to act as a representative, a target, of these groups.
|
||||||
|
|
||||||
packages = hscommon qtlib core qt
|
packages = hscommon core qt
|
||||||
localedirs = $(wildcard locale/*/LC_MESSAGES)
|
localedirs = $(wildcard locale/*/LC_MESSAGES)
|
||||||
pofiles = $(wildcard locale/*/LC_MESSAGES/*.po)
|
pofiles = $(wildcard locale/*/LC_MESSAGES/*.po)
|
||||||
mofiles = $(patsubst %.po,%.mo,$(pofiles))
|
mofiles = $(patsubst %.po,%.mo,$(pofiles))
|
||||||
@ -43,7 +43,7 @@ mofiles = $(patsubst %.po,%.mo,$(pofiles))
|
|||||||
vpath %.po $(localedirs)
|
vpath %.po $(localedirs)
|
||||||
vpath %.mo $(localedirs)
|
vpath %.mo $(localedirs)
|
||||||
|
|
||||||
all: | env i18n modules qt/dg_rc.py
|
all: | env i18n modules qt/dg_rc.py
|
||||||
@echo "Build complete! You can run dupeGuru with 'make run'"
|
@echo "Build complete! You can run dupeGuru with 'make run'"
|
||||||
|
|
||||||
run:
|
run:
|
||||||
@ -82,7 +82,7 @@ qt/dg_rc.py: qt/dg.qrc
|
|||||||
i18n: $(mofiles)
|
i18n: $(mofiles)
|
||||||
|
|
||||||
%.mo: %.po
|
%.mo: %.po
|
||||||
msgfmt -o $@ $<
|
msgfmt -o $@ $<
|
||||||
|
|
||||||
modules: | env
|
modules: | env
|
||||||
$(VENV_PYTHON) build.py --modules
|
$(VENV_PYTHON) build.py --modules
|
||||||
|
17
README.md
17
README.md
@ -1,16 +1,12 @@
|
|||||||
# dupeGuru
|
# dupeGuru
|
||||||
|
|
||||||
[dupeGuru][dupeguru] is a cross-platform (Linux, OS X, Windows) GUI tool to find duplicate files in
|
[dupeGuru][dupeguru] is a cross-platform (Linux, OS X, Windows) GUI tool to find duplicate files in
|
||||||
a system. It is written mostly in Python 3 and has the peculiarity of using
|
a system. It is written mostly in Python 3 and uses [qt](https://www.qt.io/) for the UI.
|
||||||
[multiple GUI toolkits][cross-toolkit], all using the same core Python code. On OS X, the UI layer
|
|
||||||
is written in Objective-C and uses Cocoa. On Linux, it is written in Python and uses Qt5.
|
|
||||||
|
|
||||||
The Cocoa UI of dupeGuru is hosted in a separate repo: https://github.com/arsenetar/dupeguru-cocoa
|
|
||||||
|
|
||||||
## Current status
|
## Current status
|
||||||
Still looking for additional help especially with regards to:
|
Still looking for additional help especially with regards to:
|
||||||
* OSX maintenance: reproducing bugs & cocoa version, building package with Cocoa UI.
|
* OSX maintenance: reproducing bugs, packaging verification.
|
||||||
* Linux maintenance: reproducing bugs, maintaining PPA repository, Debian package.
|
* Linux maintenance: reproducing bugs, maintaining PPA repository, Debian package, rpm package.
|
||||||
* Translations: updating missing strings, transifex project at https://www.transifex.com/voltaicideas/dupeguru-1
|
* Translations: updating missing strings, transifex project at https://www.transifex.com/voltaicideas/dupeguru-1
|
||||||
* Documentation: keeping it up-to-date.
|
* Documentation: keeping it up-to-date.
|
||||||
|
|
||||||
@ -26,7 +22,6 @@ This folder contains the source for dupeGuru. Its documentation is in `help`, bu
|
|||||||
* help: Help document, written for Sphinx.
|
* help: Help document, written for Sphinx.
|
||||||
* locale: .po files for localization.
|
* locale: .po files for localization.
|
||||||
* hscommon: A collection of helpers used across HS applications.
|
* hscommon: A collection of helpers used across HS applications.
|
||||||
* qtlib: A collection of helpers used across Qt UI codebases of HS applications.
|
|
||||||
|
|
||||||
## How to build dupeGuru from source
|
## How to build dupeGuru from source
|
||||||
|
|
||||||
@ -36,19 +31,17 @@ For windows instructions see the [Windows Instructions](Windows.md).
|
|||||||
For macos instructions (qt version) see the [macOS Instructions](macos.md).
|
For macos instructions (qt version) see the [macOS Instructions](macos.md).
|
||||||
|
|
||||||
### Prerequisites
|
### Prerequisites
|
||||||
* [Python 3.6+][python]
|
* [Python 3.7+][python]
|
||||||
* PyQt5
|
* PyQt5
|
||||||
|
|
||||||
### System Setup
|
### System Setup
|
||||||
When running in a linux based environment the following system packages or equivalents are needed to build:
|
When running in a linux based environment the following system packages or equivalents are needed to build:
|
||||||
* python3-pyqt5
|
* python3-pyqt5
|
||||||
* pyqt5-dev-tools (on some systems, see note)
|
* pyqt5-dev-tools (on some systems, see note)
|
||||||
* python3-wheel (for hsaudiotag3k)
|
|
||||||
* python3-venv (only if using a virtual environment)
|
* python3-venv (only if using a virtual environment)
|
||||||
* python3-dev
|
* python3-dev
|
||||||
* build-essential
|
* build-essential
|
||||||
|
|
||||||
|
|
||||||
Note: On some linux systems pyrcc5 is not put on the path when installing python3-pyqt5, this will cause some issues with the resource files (and icons). These systems should have a respective pyqt5-dev-tools package, which should also be installed. The presence of pyrcc5 can be checked with `which pyrcc5`. Debian based systems need the extra package, and Arch does not.
|
Note: On some linux systems pyrcc5 is not put on the path when installing python3-pyqt5, this will cause some issues with the resource files (and icons). These systems should have a respective pyqt5-dev-tools package, which should also be installed. The presence of pyrcc5 can be checked with `which pyrcc5`. Debian based systems need the extra package, and Arch does not.
|
||||||
|
|
||||||
To create packages the following are also needed:
|
To create packages the following are also needed:
|
||||||
@ -70,7 +63,7 @@ dupeGuru comes with a makefile that can be used to build and run:
|
|||||||
$ python run.py
|
$ python run.py
|
||||||
|
|
||||||
### Generating Debian/Ubuntu package
|
### Generating Debian/Ubuntu package
|
||||||
To generate packages the extra requirements in requirements-extra.txt must be installed, the
|
To generate packages the extra requirements in requirements-extra.txt must be installed, the
|
||||||
steps are as follows:
|
steps are as follows:
|
||||||
|
|
||||||
$ cd <dupeGuru directory>
|
$ cd <dupeGuru directory>
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
### Prerequisites
|
### Prerequisites
|
||||||
|
|
||||||
- [Python 3.6+][python]
|
- [Python 3.7+][python]
|
||||||
- [Visual Studio 2019][vs] or [Visual Studio Build Tools 2019][vsBuildTools] with the Windows 10 SDK
|
- [Visual Studio 2019][vs] or [Visual Studio Build Tools 2019][vsBuildTools] with the Windows 10 SDK
|
||||||
- [nsis][nsis] (for installer creation)
|
- [nsis][nsis] (for installer creation)
|
||||||
- [msys2][msys2] (for using makefile method)
|
- [msys2][msys2] (for using makefile method)
|
||||||
@ -16,7 +16,7 @@ After installing python it is recommended to update setuptools before compiling
|
|||||||
More details on setting up python for compiling packages on windows can be found on the [python wiki][pythonWindowsCompilers] Take note of the required vc++ versions.
|
More details on setting up python for compiling packages on windows can be found on the [python wiki][pythonWindowsCompilers] Take note of the required vc++ versions.
|
||||||
|
|
||||||
### With build.py (preferred)
|
### With build.py (preferred)
|
||||||
To build with a different python version 3.6 vs 3.8 or 32 bit vs 64 bit specify that version instead of -3.8 to the `py` command below. If you want to build additional versions while keeping all virtual environments setup use a different location for each virtual environment.
|
To build with a different python version 3.7 vs 3.8 or 32 bit vs 64 bit specify that version instead of -3.8 to the `py` command below. If you want to build additional versions while keeping all virtual environments setup use a different location for each virtual environment.
|
||||||
|
|
||||||
$ cd <dupeGuru directory>
|
$ cd <dupeGuru directory>
|
||||||
$ py -3.8 -m venv .\env
|
$ py -3.8 -m venv .\env
|
||||||
@ -29,7 +29,7 @@ To build with a different python version 3.6 vs 3.8 or 32 bit vs 64 bit specify
|
|||||||
It is possible to build dupeGuru with the makefile on windows using a compatable POSIX environment. The following steps have been tested using [msys2][msys2]. Before running make:
|
It is possible to build dupeGuru with the makefile on windows using a compatable POSIX environment. The following steps have been tested using [msys2][msys2]. Before running make:
|
||||||
1. Install msys2 or other POSIX environment
|
1. Install msys2 or other POSIX environment
|
||||||
2. Install PyQt5 globally via pip
|
2. Install PyQt5 globally via pip
|
||||||
3. Use the respective console for msys2 it is `msys2 msys`
|
3. Use the respective console for msys2 it is `msys2 msys`
|
||||||
|
|
||||||
Then the following execution of the makefile should work. Pass the correct value for PYTHON to the makefile if not on the path as python3.
|
Then the following execution of the makefile should work. Pass the correct value for PYTHON to the makefile if not on the path as python3.
|
||||||
|
|
||||||
|
26
build.py
26
build.py
@ -61,7 +61,7 @@ def parse_args():
|
|||||||
|
|
||||||
|
|
||||||
def build_one_help(language):
|
def build_one_help(language):
|
||||||
print("Generating Help in {}".format(language))
|
print(f"Generating Help in {language}")
|
||||||
current_path = Path(".").absolute()
|
current_path = Path(".").absolute()
|
||||||
changelog_path = current_path.joinpath("help", "changelog")
|
changelog_path = current_path.joinpath("help", "changelog")
|
||||||
tixurl = "https://github.com/arsenetar/dupeguru/issues/{}"
|
tixurl = "https://github.com/arsenetar/dupeguru/issues/{}"
|
||||||
@ -88,14 +88,8 @@ def build_help():
|
|||||||
p.map(build_one_help, languages)
|
p.map(build_one_help, languages)
|
||||||
|
|
||||||
|
|
||||||
def build_qt_localizations():
|
|
||||||
loc.compile_all_po(Path("qtlib", "locale"))
|
|
||||||
loc.merge_locale_dir(Path("qtlib", "locale"), "locale")
|
|
||||||
|
|
||||||
|
|
||||||
def build_localizations():
|
def build_localizations():
|
||||||
loc.compile_all_po("locale")
|
loc.compile_all_po("locale")
|
||||||
build_qt_localizations()
|
|
||||||
locale_dest = Path("build", "locale")
|
locale_dest = Path("build", "locale")
|
||||||
if locale_dest.exists():
|
if locale_dest.exists():
|
||||||
shutil.rmtree(locale_dest)
|
shutil.rmtree(locale_dest)
|
||||||
@ -109,25 +103,16 @@ def build_updatepot():
|
|||||||
print("Building columns.pot")
|
print("Building columns.pot")
|
||||||
loc.generate_pot(["core"], Path("locale", "columns.pot"), ["coltr"])
|
loc.generate_pot(["core"], Path("locale", "columns.pot"), ["coltr"])
|
||||||
print("Building ui.pot")
|
print("Building ui.pot")
|
||||||
# When we're not under OS X, we don't want to overwrite ui.pot because it contains Cocoa locs
|
loc.generate_pot(["qt"], Path("locale", "ui.pot"), ["tr"], merge=True)
|
||||||
# We want to merge the generated pot with the old pot in the most preserving way possible.
|
|
||||||
ui_packages = ["qt", Path("cocoa", "inter")]
|
|
||||||
loc.generate_pot(ui_packages, Path("locale", "ui.pot"), ["tr"], merge=True)
|
|
||||||
print("Building qtlib.pot")
|
|
||||||
loc.generate_pot(["qtlib"], Path("qtlib", "locale", "qtlib.pot"), ["tr"])
|
|
||||||
|
|
||||||
|
|
||||||
def build_mergepot():
|
def build_mergepot():
|
||||||
print("Updating .po files using .pot files")
|
print("Updating .po files using .pot files")
|
||||||
loc.merge_pots_into_pos("locale")
|
loc.merge_pots_into_pos("locale")
|
||||||
loc.merge_pots_into_pos(Path("qtlib", "locale"))
|
|
||||||
# loc.merge_pots_into_pos(Path("cocoalib", "locale"))
|
|
||||||
|
|
||||||
|
|
||||||
def build_normpo():
|
def build_normpo():
|
||||||
loc.normalize_all_pos("locale")
|
loc.normalize_all_pos("locale")
|
||||||
loc.normalize_all_pos(Path("qtlib", "locale"))
|
|
||||||
# loc.normalize_all_pos(Path("cocoalib", "locale"))
|
|
||||||
|
|
||||||
|
|
||||||
def build_pe_modules():
|
def build_pe_modules():
|
||||||
@ -144,14 +129,15 @@ def build_normal():
|
|||||||
print("Building localizations")
|
print("Building localizations")
|
||||||
build_localizations()
|
build_localizations()
|
||||||
print("Building Qt stuff")
|
print("Building Qt stuff")
|
||||||
print_and_do("pyrcc5 {0} > {1}".format(Path("qt", "dg.qrc"), Path("qt", "dg_rc.py")))
|
Path("qt", "dg_rc.py").unlink(missing_ok=True)
|
||||||
|
print_and_do("pyrcc5 {} > {}".format(Path("qt", "dg.qrc"), Path("qt", "dg_rc.py")))
|
||||||
fix_qt_resource_file(Path("qt", "dg_rc.py"))
|
fix_qt_resource_file(Path("qt", "dg_rc.py"))
|
||||||
build_help()
|
build_help()
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
if sys.version_info < (3, 6):
|
if sys.version_info < (3, 7):
|
||||||
sys.exit("Python < 3.6 is unsupported.")
|
sys.exit("Python < 3.7 is unsupported.")
|
||||||
options = parse_args()
|
options = parse_args()
|
||||||
if options.clean and Path("build").exists():
|
if options.clean and Path("build").exists():
|
||||||
shutil.rmtree("build")
|
shutil.rmtree("build")
|
||||||
|
17
commitlint.config.js
Normal file
17
commitlint.config.js
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
const Configuration = {
|
||||||
|
/*
|
||||||
|
* Resolve and load @commitlint/config-conventional from node_modules.
|
||||||
|
* Referenced packages must be installed
|
||||||
|
*/
|
||||||
|
extends: ['@commitlint/config-conventional'],
|
||||||
|
/*
|
||||||
|
* Any rules defined here will override rules from @commitlint/config-conventional
|
||||||
|
*/
|
||||||
|
rules: {
|
||||||
|
'header-max-length': [2, 'always', 72],
|
||||||
|
'subject-case': [2, 'always', 'sentence-case'],
|
||||||
|
'scope-enum': [2, 'always'],
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
module.exports = Configuration;
|
@ -1,2 +1,2 @@
|
|||||||
__version__ = "4.2.0"
|
__version__ = "4.3.1"
|
||||||
__appname__ = "dupeGuru"
|
__appname__ = "dupeGuru"
|
||||||
|
81
core/app.py
81
core/app.py
@ -4,37 +4,39 @@
|
|||||||
# which should be included with this package. The terms are also available at
|
# which should be included with this package. The terms are also available at
|
||||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||||
|
|
||||||
|
import cProfile
|
||||||
|
import datetime
|
||||||
import os
|
import os
|
||||||
import os.path as op
|
import os.path as op
|
||||||
import logging
|
import logging
|
||||||
import subprocess
|
import subprocess
|
||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
from send2trash import send2trash
|
from send2trash import send2trash
|
||||||
from hscommon.jobprogress import job
|
from hscommon.jobprogress import job
|
||||||
from hscommon.notify import Broadcaster
|
from hscommon.notify import Broadcaster
|
||||||
from hscommon.path import Path
|
|
||||||
from hscommon.conflict import smart_move, smart_copy
|
from hscommon.conflict import smart_move, smart_copy
|
||||||
from hscommon.gui.progress_window import ProgressWindow
|
from hscommon.gui.progress_window import ProgressWindow
|
||||||
from hscommon.util import delete_if_empty, first, escape, nonone, allsame
|
from hscommon.util import delete_if_empty, first, escape, nonone, allsame
|
||||||
from hscommon.trans import tr
|
from hscommon.trans import tr
|
||||||
from hscommon import desktop
|
from hscommon import desktop
|
||||||
|
|
||||||
from . import se, me, pe
|
from core import se, me, pe
|
||||||
from .pe.photo import get_delta_dimensions
|
from core.pe.photo import get_delta_dimensions
|
||||||
from .util import cmp_value, fix_surrogate_encoding
|
from core.util import cmp_value, fix_surrogate_encoding
|
||||||
from . import directories, results, export, fs, prioritize
|
from core import directories, results, export, fs, prioritize
|
||||||
from .ignore import IgnoreList
|
from core.ignore import IgnoreList
|
||||||
from .exclude import ExcludeDict as ExcludeList
|
from core.exclude import ExcludeDict as ExcludeList
|
||||||
from .scanner import ScanType
|
from core.scanner import ScanType
|
||||||
from .gui.deletion_options import DeletionOptions
|
from core.gui.deletion_options import DeletionOptions
|
||||||
from .gui.details_panel import DetailsPanel
|
from core.gui.details_panel import DetailsPanel
|
||||||
from .gui.directory_tree import DirectoryTree
|
from core.gui.directory_tree import DirectoryTree
|
||||||
from .gui.ignore_list_dialog import IgnoreListDialog
|
from core.gui.ignore_list_dialog import IgnoreListDialog
|
||||||
from .gui.exclude_list_dialog import ExcludeListDialogCore
|
from core.gui.exclude_list_dialog import ExcludeListDialogCore
|
||||||
from .gui.problem_dialog import ProblemDialog
|
from core.gui.problem_dialog import ProblemDialog
|
||||||
from .gui.stats_label import StatsLabel
|
from core.gui.stats_label import StatsLabel
|
||||||
|
|
||||||
HAD_FIRST_LAUNCH_PREFERENCE = "HadFirstLaunch"
|
HAD_FIRST_LAUNCH_PREFERENCE = "HadFirstLaunch"
|
||||||
DEBUG_MODE_PREFERENCE = "DebugMode"
|
DEBUG_MODE_PREFERENCE = "DebugMode"
|
||||||
@ -124,15 +126,13 @@ class DupeGuru(Broadcaster):
|
|||||||
|
|
||||||
NAME = PROMPT_NAME = "dupeGuru"
|
NAME = PROMPT_NAME = "dupeGuru"
|
||||||
|
|
||||||
PICTURE_CACHE_TYPE = "sqlite" # set to 'shelve' for a ShelveCache
|
|
||||||
|
|
||||||
def __init__(self, view, portable=False):
|
def __init__(self, view, portable=False):
|
||||||
if view.get_default(DEBUG_MODE_PREFERENCE):
|
if view.get_default(DEBUG_MODE_PREFERENCE):
|
||||||
logging.getLogger().setLevel(logging.DEBUG)
|
logging.getLogger().setLevel(logging.DEBUG)
|
||||||
logging.debug("Debug mode enabled")
|
logging.debug("Debug mode enabled")
|
||||||
Broadcaster.__init__(self)
|
Broadcaster.__init__(self)
|
||||||
self.view = view
|
self.view = view
|
||||||
self.appdata = desktop.special_folder_path(desktop.SpecialFolder.APPDATA, appname=self.NAME, portable=portable)
|
self.appdata = desktop.special_folder_path(desktop.SpecialFolder.APPDATA, portable=portable)
|
||||||
if not op.exists(self.appdata):
|
if not op.exists(self.appdata):
|
||||||
os.makedirs(self.appdata)
|
os.makedirs(self.appdata)
|
||||||
self.app_mode = AppMode.STANDARD
|
self.app_mode = AppMode.STANDARD
|
||||||
@ -151,7 +151,8 @@ class DupeGuru(Broadcaster):
|
|||||||
"clean_empty_dirs": False,
|
"clean_empty_dirs": False,
|
||||||
"ignore_hardlink_matches": False,
|
"ignore_hardlink_matches": False,
|
||||||
"copymove_dest_type": DestType.RELATIVE,
|
"copymove_dest_type": DestType.RELATIVE,
|
||||||
"picture_cache_type": self.PICTURE_CACHE_TYPE,
|
"include_exists_check": True,
|
||||||
|
"rehash_ignore_mtime": False,
|
||||||
}
|
}
|
||||||
self.selected_dupes = []
|
self.selected_dupes = []
|
||||||
self.details_panel = DetailsPanel(self)
|
self.details_panel = DetailsPanel(self)
|
||||||
@ -181,8 +182,7 @@ class DupeGuru(Broadcaster):
|
|||||||
self.view.create_results_window()
|
self.view.create_results_window()
|
||||||
|
|
||||||
def _get_picture_cache_path(self):
|
def _get_picture_cache_path(self):
|
||||||
cache_type = self.options["picture_cache_type"]
|
cache_name = "cached_pictures.db"
|
||||||
cache_name = "cached_pictures.shelve" if cache_type == "shelve" else "cached_pictures.db"
|
|
||||||
return op.join(self.appdata, cache_name)
|
return op.join(self.appdata, cache_name)
|
||||||
|
|
||||||
def _get_dupe_sort_key(self, dupe, get_group, key, delta):
|
def _get_dupe_sort_key(self, dupe, get_group, key, delta):
|
||||||
@ -248,7 +248,7 @@ class DupeGuru(Broadcaster):
|
|||||||
ref = group.ref
|
ref = group.ref
|
||||||
linkfunc = os.link if use_hardlinks else os.symlink
|
linkfunc = os.link if use_hardlinks else os.symlink
|
||||||
linkfunc(str(ref.path), str_path)
|
linkfunc(str(ref.path), str_path)
|
||||||
self.clean_empty_dirs(dupe.path.parent())
|
self.clean_empty_dirs(dupe.path.parent)
|
||||||
|
|
||||||
def _create_file(self, path):
|
def _create_file(self, path):
|
||||||
# We add fs.Folder to fileclasses in case the file we're loading contains folder paths.
|
# We add fs.Folder to fileclasses in case the file we're loading contains folder paths.
|
||||||
@ -262,7 +262,7 @@ class DupeGuru(Broadcaster):
|
|||||||
try:
|
try:
|
||||||
f._read_all_info(attrnames=self.METADATA_TO_READ)
|
f._read_all_info(attrnames=self.METADATA_TO_READ)
|
||||||
return f
|
return f
|
||||||
except EnvironmentError:
|
except OSError:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _get_export_data(self):
|
def _get_export_data(self):
|
||||||
@ -415,7 +415,7 @@ class DupeGuru(Broadcaster):
|
|||||||
def clean_empty_dirs(self, path):
|
def clean_empty_dirs(self, path):
|
||||||
if self.options["clean_empty_dirs"]:
|
if self.options["clean_empty_dirs"]:
|
||||||
while delete_if_empty(path, [".DS_Store"]):
|
while delete_if_empty(path, [".DS_Store"]):
|
||||||
path = path.parent()
|
path = path.parent
|
||||||
|
|
||||||
def clear_picture_cache(self):
|
def clear_picture_cache(self):
|
||||||
try:
|
try:
|
||||||
@ -428,25 +428,25 @@ class DupeGuru(Broadcaster):
|
|||||||
|
|
||||||
def copy_or_move(self, dupe, copy: bool, destination: str, dest_type: DestType):
|
def copy_or_move(self, dupe, copy: bool, destination: str, dest_type: DestType):
|
||||||
source_path = dupe.path
|
source_path = dupe.path
|
||||||
location_path = first(p for p in self.directories if dupe.path in p)
|
location_path = first(p for p in self.directories if p in dupe.path.parents)
|
||||||
dest_path = Path(destination)
|
dest_path = Path(destination)
|
||||||
if dest_type in {DestType.RELATIVE, DestType.ABSOLUTE}:
|
if dest_type in {DestType.RELATIVE, DestType.ABSOLUTE}:
|
||||||
# no filename, no windows drive letter
|
# no filename, no windows drive letter
|
||||||
source_base = source_path.remove_drive_letter().parent()
|
source_base = source_path.relative_to(source_path.anchor).parent
|
||||||
if dest_type == DestType.RELATIVE:
|
if dest_type == DestType.RELATIVE:
|
||||||
source_base = source_base[location_path:]
|
source_base = source_base.relative_to(location_path.relative_to(location_path.anchor))
|
||||||
dest_path = dest_path[source_base]
|
dest_path = dest_path.joinpath(source_base)
|
||||||
if not dest_path.exists():
|
if not dest_path.exists():
|
||||||
dest_path.makedirs()
|
dest_path.mkdir(parents=True)
|
||||||
# Add filename to dest_path. For file move/copy, it's not required, but for folders, yes.
|
# Add filename to dest_path. For file move/copy, it's not required, but for folders, yes.
|
||||||
dest_path = dest_path[source_path.name]
|
dest_path = dest_path.joinpath(source_path.name)
|
||||||
logging.debug("Copy/Move operation from '%s' to '%s'", source_path, dest_path)
|
logging.debug("Copy/Move operation from '%s' to '%s'", source_path, dest_path)
|
||||||
# Raises an EnvironmentError if there's a problem
|
# Raises an EnvironmentError if there's a problem
|
||||||
if copy:
|
if copy:
|
||||||
smart_copy(source_path, dest_path)
|
smart_copy(source_path, dest_path)
|
||||||
else:
|
else:
|
||||||
smart_move(source_path, dest_path)
|
smart_move(source_path, dest_path)
|
||||||
self.clean_empty_dirs(source_path.parent())
|
self.clean_empty_dirs(source_path.parent)
|
||||||
|
|
||||||
def copy_or_move_marked(self, copy):
|
def copy_or_move_marked(self, copy):
|
||||||
"""Start an async move (or copy) job on marked duplicates.
|
"""Start an async move (or copy) job on marked duplicates.
|
||||||
@ -553,9 +553,15 @@ class DupeGuru(Broadcaster):
|
|||||||
# a workaround to make the damn thing work.
|
# a workaround to make the damn thing work.
|
||||||
exepath, args = match.groups()
|
exepath, args = match.groups()
|
||||||
path, exename = op.split(exepath)
|
path, exename = op.split(exepath)
|
||||||
subprocess.Popen(exename + args, shell=True, cwd=path)
|
p = subprocess.Popen(
|
||||||
|
exename + args, shell=True, cwd=path, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
|
||||||
|
)
|
||||||
|
output = p.stdout.read()
|
||||||
|
logging.info("Custom command %s %s: %s", exename, args, output)
|
||||||
else:
|
else:
|
||||||
subprocess.Popen(dupe_cmd, shell=True)
|
p = subprocess.Popen(dupe_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||||
|
output = p.stdout.read()
|
||||||
|
logging.info("Custom command %s: %s", dupe_cmd, output)
|
||||||
|
|
||||||
def load(self):
|
def load(self):
|
||||||
"""Load directory selection and ignore list from files in appdata.
|
"""Load directory selection and ignore list from files in appdata.
|
||||||
@ -780,12 +786,13 @@ class DupeGuru(Broadcaster):
|
|||||||
except OSError as e:
|
except OSError as e:
|
||||||
self.view.show_message(tr("Couldn't write to file: {}").format(str(e)))
|
self.view.show_message(tr("Couldn't write to file: {}").format(str(e)))
|
||||||
|
|
||||||
def start_scanning(self):
|
def start_scanning(self, profile_scan=False):
|
||||||
"""Starts an async job to scan for duplicates.
|
"""Starts an async job to scan for duplicates.
|
||||||
|
|
||||||
Scans folders selected in :attr:`directories` and put the results in :attr:`results`
|
Scans folders selected in :attr:`directories` and put the results in :attr:`results`
|
||||||
"""
|
"""
|
||||||
scanner = self.SCANNER_CLASS()
|
scanner = self.SCANNER_CLASS()
|
||||||
|
fs.filesdb.ignore_mtime = self.options["rehash_ignore_mtime"] is True
|
||||||
if not self.directories.has_any_file():
|
if not self.directories.has_any_file():
|
||||||
self.view.show_message(tr("The selected directories contain no scannable file."))
|
self.view.show_message(tr("The selected directories contain no scannable file."))
|
||||||
return
|
return
|
||||||
@ -800,6 +807,9 @@ class DupeGuru(Broadcaster):
|
|||||||
self._results_changed()
|
self._results_changed()
|
||||||
|
|
||||||
def do(j):
|
def do(j):
|
||||||
|
if profile_scan:
|
||||||
|
pr = cProfile.Profile()
|
||||||
|
pr.enable()
|
||||||
j.set_progress(0, tr("Collecting files to scan"))
|
j.set_progress(0, tr("Collecting files to scan"))
|
||||||
if scanner.scan_type == ScanType.FOLDERS:
|
if scanner.scan_type == ScanType.FOLDERS:
|
||||||
files = list(self.directories.get_folders(folderclass=se.fs.Folder, j=j))
|
files = list(self.directories.get_folders(folderclass=se.fs.Folder, j=j))
|
||||||
@ -810,6 +820,9 @@ class DupeGuru(Broadcaster):
|
|||||||
logging.info("Scanning %d files" % len(files))
|
logging.info("Scanning %d files" % len(files))
|
||||||
self.results.groups = scanner.get_dupe_groups(files, self.ignore_list, j)
|
self.results.groups = scanner.get_dupe_groups(files, self.ignore_list, j)
|
||||||
self.discarded_file_count = scanner.discarded_file_count
|
self.discarded_file_count = scanner.discarded_file_count
|
||||||
|
if profile_scan:
|
||||||
|
pr.disable()
|
||||||
|
pr.dump_stats(op.join(self.appdata, f"{datetime.datetime.now():%Y-%m-%d_%H-%M-%S}.profile"))
|
||||||
|
|
||||||
self._start_job(JobType.SCAN, do)
|
self._start_job(JobType.SCAN, do)
|
||||||
|
|
||||||
|
@ -7,13 +7,13 @@
|
|||||||
import os
|
import os
|
||||||
from xml.etree import ElementTree as ET
|
from xml.etree import ElementTree as ET
|
||||||
import logging
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
from hscommon.jobprogress import job
|
from hscommon.jobprogress import job
|
||||||
from hscommon.path import Path
|
|
||||||
from hscommon.util import FileOrPath
|
from hscommon.util import FileOrPath
|
||||||
from hscommon.trans import tr
|
from hscommon.trans import tr
|
||||||
|
|
||||||
from . import fs
|
from core import fs
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"Directories",
|
"Directories",
|
||||||
@ -63,7 +63,7 @@ class Directories:
|
|||||||
|
|
||||||
def __contains__(self, path):
|
def __contains__(self, path):
|
||||||
for p in self._dirs:
|
for p in self._dirs:
|
||||||
if path in p:
|
if path == p or p in path.parents:
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@ -84,64 +84,64 @@ class Directories:
|
|||||||
for denied_path_re in self._exclude_list.compiled:
|
for denied_path_re in self._exclude_list.compiled:
|
||||||
if denied_path_re.match(str(path.name)):
|
if denied_path_re.match(str(path.name)):
|
||||||
return DirectoryState.EXCLUDED
|
return DirectoryState.EXCLUDED
|
||||||
# return # We still use the old logic to force state on hidden dirs
|
return DirectoryState.NORMAL
|
||||||
# Override this in subclasses to specify the state of some special folders.
|
# Override this in subclasses to specify the state of some special folders.
|
||||||
if path.name.startswith("."):
|
if path.name.startswith("."):
|
||||||
return DirectoryState.EXCLUDED
|
return DirectoryState.EXCLUDED
|
||||||
|
return DirectoryState.NORMAL
|
||||||
|
|
||||||
def _get_files(self, from_path, fileclasses, j):
|
def _get_files(self, from_path, fileclasses, j):
|
||||||
for root, dirs, files in os.walk(str(from_path)):
|
try:
|
||||||
j.check_if_cancelled()
|
with os.scandir(from_path) as iter:
|
||||||
root_path = Path(root)
|
root_path = Path(from_path)
|
||||||
state = self.get_state(root_path)
|
state = self.get_state(root_path)
|
||||||
if state == DirectoryState.EXCLUDED and not any(p[: len(root_path)] == root_path for p in self.states):
|
# if we have no un-excluded dirs under this directory skip going deeper
|
||||||
# Recursively get files from folders with lots of subfolder is expensive. However, there
|
skip_dirs = state == DirectoryState.EXCLUDED and not any(
|
||||||
# might be a subfolder in this path that is not excluded. What we want to do is to skim
|
p.parts[: len(root_path.parts)] == root_path.parts for p in self.states
|
||||||
# through self.states and see if we must continue, or we can stop right here to save time
|
)
|
||||||
del dirs[:]
|
count = 0
|
||||||
try:
|
for item in iter:
|
||||||
if state != DirectoryState.EXCLUDED:
|
j.check_if_cancelled()
|
||||||
# Old logic
|
try:
|
||||||
if self._exclude_list is None or not self._exclude_list.mark_count:
|
if item.is_dir():
|
||||||
found_files = [fs.get_file(root_path + f, fileclasses=fileclasses) for f in files]
|
if skip_dirs:
|
||||||
else:
|
continue
|
||||||
found_files = []
|
yield from self._get_files(item.path, fileclasses, j)
|
||||||
# print(f"len of files: {len(files)} {files}")
|
continue
|
||||||
for f in files:
|
elif state == DirectoryState.EXCLUDED:
|
||||||
if not self._exclude_list.is_excluded(root, f):
|
continue
|
||||||
found_files.append(fs.get_file(root_path + f, fileclasses=fileclasses))
|
# File excluding or not
|
||||||
found_files = [f for f in found_files if f is not None]
|
if (
|
||||||
# In some cases, directories can be considered as files by dupeGuru, which is
|
self._exclude_list is None
|
||||||
# why we have this line below. In fact, there only one case: Bundle files under
|
or not self._exclude_list.mark_count
|
||||||
# OS X... In other situations, this forloop will do nothing.
|
or not self._exclude_list.is_excluded(str(from_path), item.name)
|
||||||
for d in dirs[:]:
|
):
|
||||||
f = fs.get_file(root_path + d, fileclasses=fileclasses)
|
file = fs.get_file(item, fileclasses=fileclasses)
|
||||||
if f is not None:
|
if file:
|
||||||
found_files.append(f)
|
file.is_ref = state == DirectoryState.REFERENCE
|
||||||
dirs.remove(d)
|
count += 1
|
||||||
logging.debug(
|
yield file
|
||||||
"Collected %d files in folder %s",
|
except (OSError, fs.InvalidPath):
|
||||||
len(found_files),
|
pass
|
||||||
str(root_path),
|
logging.debug(
|
||||||
)
|
"Collected %d files in folder %s",
|
||||||
for file in found_files:
|
count,
|
||||||
file.is_ref = state == DirectoryState.REFERENCE
|
str(root_path),
|
||||||
yield file
|
)
|
||||||
except (EnvironmentError, fs.InvalidPath):
|
except OSError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def _get_folders(self, from_folder, j):
|
def _get_folders(self, from_folder, j):
|
||||||
j.check_if_cancelled()
|
j.check_if_cancelled()
|
||||||
try:
|
try:
|
||||||
for subfolder in from_folder.subfolders:
|
for subfolder in from_folder.subfolders:
|
||||||
for folder in self._get_folders(subfolder, j):
|
yield from self._get_folders(subfolder, j)
|
||||||
yield folder
|
|
||||||
state = self.get_state(from_folder.path)
|
state = self.get_state(from_folder.path)
|
||||||
if state != DirectoryState.EXCLUDED:
|
if state != DirectoryState.EXCLUDED:
|
||||||
from_folder.is_ref = state == DirectoryState.REFERENCE
|
from_folder.is_ref = state == DirectoryState.REFERENCE
|
||||||
logging.debug("Yielding Folder %r state: %d", from_folder, state)
|
logging.debug("Yielding Folder %r state: %d", from_folder, state)
|
||||||
yield from_folder
|
yield from_folder
|
||||||
except (EnvironmentError, fs.InvalidPath):
|
except (OSError, fs.InvalidPath):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# ---Public
|
# ---Public
|
||||||
@ -159,7 +159,7 @@ class Directories:
|
|||||||
raise AlreadyThereError()
|
raise AlreadyThereError()
|
||||||
if not path.exists():
|
if not path.exists():
|
||||||
raise InvalidPathError()
|
raise InvalidPathError()
|
||||||
self._dirs = [p for p in self._dirs if p not in path]
|
self._dirs = [p for p in self._dirs if path not in p.parents]
|
||||||
self._dirs.append(path)
|
self._dirs.append(path)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -170,10 +170,10 @@ class Directories:
|
|||||||
:rtype: list of Path
|
:rtype: list of Path
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
subpaths = [p for p in path.listdir() if p.isdir()]
|
subpaths = [p for p in path.glob("*") if p.is_dir()]
|
||||||
subpaths.sort(key=lambda x: x.name.lower())
|
subpaths.sort(key=lambda x: x.name.lower())
|
||||||
return subpaths
|
return subpaths
|
||||||
except EnvironmentError:
|
except OSError:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def get_files(self, fileclasses=None, j=job.nulljob):
|
def get_files(self, fileclasses=None, j=job.nulljob):
|
||||||
@ -187,7 +187,7 @@ class Directories:
|
|||||||
for path in self._dirs:
|
for path in self._dirs:
|
||||||
for file in self._get_files(path, fileclasses=fileclasses, j=j):
|
for file in self._get_files(path, fileclasses=fileclasses, j=j):
|
||||||
file_count += 1
|
file_count += 1
|
||||||
if type(j) != job.NullJob:
|
if not isinstance(j, job.NullJob):
|
||||||
j.set_progress(-1, tr("Collected {} files to scan").format(file_count))
|
j.set_progress(-1, tr("Collected {} files to scan").format(file_count))
|
||||||
yield file
|
yield file
|
||||||
|
|
||||||
@ -203,7 +203,7 @@ class Directories:
|
|||||||
from_folder = folderclass(path)
|
from_folder = folderclass(path)
|
||||||
for folder in self._get_folders(from_folder, j):
|
for folder in self._get_folders(from_folder, j):
|
||||||
folder_count += 1
|
folder_count += 1
|
||||||
if type(j) != job.NullJob:
|
if not isinstance(j, job.NullJob):
|
||||||
j.set_progress(-1, tr("Collected {} folders to scan").format(folder_count))
|
j.set_progress(-1, tr("Collected {} folders to scan").format(folder_count))
|
||||||
yield folder
|
yield folder
|
||||||
|
|
||||||
@ -215,19 +215,16 @@ class Directories:
|
|||||||
# direct match? easy result.
|
# direct match? easy result.
|
||||||
if path in self.states:
|
if path in self.states:
|
||||||
return self.states[path]
|
return self.states[path]
|
||||||
state = self._default_state_for_path(path) or DirectoryState.NORMAL
|
state = self._default_state_for_path(path)
|
||||||
# Save non-default states in cache, necessary for _get_files()
|
# Save non-default states in cache, necessary for _get_files()
|
||||||
if state != DirectoryState.NORMAL:
|
if state != DirectoryState.NORMAL:
|
||||||
self.states[path] = state
|
self.states[path] = state
|
||||||
return state
|
return state
|
||||||
|
# find the longest parent path that is in states and return that state if found
|
||||||
prevlen = 0
|
# NOTE: path.parents is ordered longest to shortest
|
||||||
# we loop through the states to find the longest matching prefix
|
for parent_path in path.parents:
|
||||||
# if the parent has a state in cache, return that state
|
if parent_path in self.states:
|
||||||
for p, s in self.states.items():
|
return self.states[parent_path]
|
||||||
if p.is_parent_of(path) and len(p) > prevlen:
|
|
||||||
prevlen = len(p)
|
|
||||||
state = s
|
|
||||||
return state
|
return state
|
||||||
|
|
||||||
def has_any_file(self):
|
def has_any_file(self):
|
||||||
@ -296,6 +293,6 @@ class Directories:
|
|||||||
if self.get_state(path) == state:
|
if self.get_state(path) == state:
|
||||||
return
|
return
|
||||||
for iter_path in list(self.states.keys()):
|
for iter_path in list(self.states.keys()):
|
||||||
if path.is_parent_of(iter_path):
|
if path in iter_path.parents:
|
||||||
del self.states[iter_path]
|
del self.states[iter_path]
|
||||||
self.states[path] = state
|
self.states[path] = state
|
||||||
|
@ -166,7 +166,7 @@ def reduce_common_words(word_dict, threshold):
|
|||||||
The exception to this removal are the objects where all the words of the object are common.
|
The exception to this removal are the objects where all the words of the object are common.
|
||||||
Because if we remove them, we will miss some duplicates!
|
Because if we remove them, we will miss some duplicates!
|
||||||
"""
|
"""
|
||||||
uncommon_words = set(word for word, objects in word_dict.items() if len(objects) < threshold)
|
uncommon_words = {word for word, objects in word_dict.items() if len(objects) < threshold}
|
||||||
for word, objects in list(word_dict.items()):
|
for word, objects in list(word_dict.items()):
|
||||||
if len(objects) < threshold:
|
if len(objects) < threshold:
|
||||||
continue
|
continue
|
||||||
@ -283,7 +283,7 @@ def getmatches_by_contents(files, bigsize=0, j=job.nulljob):
|
|||||||
"""Returns a list of :class:`Match` within ``files`` if their contents is the same.
|
"""Returns a list of :class:`Match` within ``files`` if their contents is the same.
|
||||||
|
|
||||||
:param bigsize: The size in bytes over which we consider files big enough to
|
:param bigsize: The size in bytes over which we consider files big enough to
|
||||||
justify taking samples of md5. If 0, compute md5 as usual.
|
justify taking samples of the file for hashing. If 0, compute digest as usual.
|
||||||
:param j: A :ref:`job progress instance <jobs>`.
|
:param j: A :ref:`job progress instance <jobs>`.
|
||||||
"""
|
"""
|
||||||
size2files = defaultdict(set)
|
size2files = defaultdict(set)
|
||||||
@ -300,15 +300,16 @@ def getmatches_by_contents(files, bigsize=0, j=job.nulljob):
|
|||||||
if first.is_ref and second.is_ref:
|
if first.is_ref and second.is_ref:
|
||||||
continue # Don't spend time comparing two ref pics together.
|
continue # Don't spend time comparing two ref pics together.
|
||||||
if first.size == 0 and second.size == 0:
|
if first.size == 0 and second.size == 0:
|
||||||
# skip md5 for zero length files
|
# skip hashing for zero length files
|
||||||
result.append(Match(first, second, 100))
|
result.append(Match(first, second, 100))
|
||||||
continue
|
continue
|
||||||
if first.md5partial == second.md5partial:
|
# if digests are the same (and not None) then files match
|
||||||
|
if first.digest_partial is not None and first.digest_partial == second.digest_partial:
|
||||||
if bigsize > 0 and first.size > bigsize:
|
if bigsize > 0 and first.size > bigsize:
|
||||||
if first.md5samples == second.md5samples:
|
if first.digest_samples is not None and first.digest_samples == second.digest_samples:
|
||||||
result.append(Match(first, second, 100))
|
result.append(Match(first, second, 100))
|
||||||
else:
|
else:
|
||||||
if first.md5 == second.md5:
|
if first.digest is not None and first.digest == second.digest:
|
||||||
result.append(Match(first, second, 100))
|
result.append(Match(first, second, 100))
|
||||||
group_count += 1
|
group_count += 1
|
||||||
j.add_progress(desc=PROGRESS_MESSAGE % (len(result), group_count))
|
j.add_progress(desc=PROGRESS_MESSAGE % (len(result), group_count))
|
||||||
@ -409,7 +410,7 @@ class Group:
|
|||||||
|
|
||||||
You can call this after the duplicate scanning process to free a bit of memory.
|
You can call this after the duplicate scanning process to free a bit of memory.
|
||||||
"""
|
"""
|
||||||
discarded = set(m for m in self.matches if not all(obj in self.unordered for obj in [m.first, m.second]))
|
discarded = {m for m in self.matches if not all(obj in self.unordered for obj in [m.first, m.second])}
|
||||||
self.matches -= discarded
|
self.matches -= discarded
|
||||||
self.candidates = defaultdict(set)
|
self.candidates = defaultdict(set)
|
||||||
return discarded
|
return discarded
|
||||||
@ -456,7 +457,7 @@ class Group:
|
|||||||
self._matches_for_ref = None
|
self._matches_for_ref = None
|
||||||
if (len(self) > 1) and any(not getattr(item, "is_ref", False) for item in self):
|
if (len(self) > 1) and any(not getattr(item, "is_ref", False) for item in self):
|
||||||
if discard_matches:
|
if discard_matches:
|
||||||
self.matches = set(m for m in self.matches if item not in m)
|
self.matches = {m for m in self.matches if item not in m}
|
||||||
else:
|
else:
|
||||||
self._clear()
|
self._clear()
|
||||||
except ValueError:
|
except ValueError:
|
||||||
@ -529,7 +530,7 @@ def get_groups(matches):
|
|||||||
del dupe2group
|
del dupe2group
|
||||||
del matches
|
del matches
|
||||||
# should free enough memory to continue
|
# should free enough memory to continue
|
||||||
logging.warning("Memory Overflow. Groups: {0}".format(len(groups)))
|
logging.warning(f"Memory Overflow. Groups: {len(groups)}")
|
||||||
# Now that we have a group, we have to discard groups' matches and see if there're any "orphan"
|
# Now that we have a group, we have to discard groups' matches and see if there're any "orphan"
|
||||||
# matches, that is, matches that were candidate in a group but that none of their 2 files were
|
# matches, that is, matches that were candidate in a group but that none of their 2 files were
|
||||||
# accepted in the group. With these orphan groups, it's safe to build additional groups
|
# accepted in the group. With these orphan groups, it's safe to build additional groups
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
# which should be included with this package. The terms are also available at
|
# which should be included with this package. The terms are also available at
|
||||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||||
|
|
||||||
from .markable import Markable
|
from core.markable import Markable
|
||||||
from xml.etree import ElementTree as ET
|
from xml.etree import ElementTree as ET
|
||||||
|
|
||||||
# TODO: perhaps use regex module for better Unicode support? https://pypi.org/project/regex/
|
# TODO: perhaps use regex module for better Unicode support? https://pypi.org/project/regex/
|
||||||
|
302
core/fs.py
302
core/fs.py
@ -11,16 +11,28 @@
|
|||||||
# resulting needless complexity and memory usage. It's been a while since I wanted to do that fork,
|
# resulting needless complexity and memory usage. It's been a while since I wanted to do that fork,
|
||||||
# and I'm doing it now.
|
# and I'm doing it now.
|
||||||
|
|
||||||
import hashlib
|
import os
|
||||||
|
|
||||||
from math import floor
|
from math import floor
|
||||||
import logging
|
import logging
|
||||||
import sqlite3
|
import sqlite3
|
||||||
|
from sys import platform
|
||||||
from threading import Lock
|
from threading import Lock
|
||||||
from typing import Any
|
from typing import Any, AnyStr, Union, Callable
|
||||||
|
|
||||||
from hscommon.path import Path
|
from pathlib import Path
|
||||||
from hscommon.util import nonone, get_file_ext
|
from hscommon.util import nonone, get_file_ext
|
||||||
|
|
||||||
|
hasher: Callable
|
||||||
|
try:
|
||||||
|
import xxhash
|
||||||
|
|
||||||
|
hasher = xxhash.xxh128
|
||||||
|
except ImportError:
|
||||||
|
import hashlib
|
||||||
|
|
||||||
|
hasher = hashlib.md5
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"File",
|
"File",
|
||||||
"Folder",
|
"Folder",
|
||||||
@ -40,9 +52,12 @@ NOT_SET = object()
|
|||||||
# CPU.
|
# CPU.
|
||||||
CHUNK_SIZE = 1024 * 1024 # 1 MiB
|
CHUNK_SIZE = 1024 * 1024 # 1 MiB
|
||||||
|
|
||||||
# Minimum size below which partial hashes don't need to be computed
|
# Minimum size below which partial hashing is not used
|
||||||
MIN_FILE_SIZE = 3 * CHUNK_SIZE # 3MiB, because we take 3 samples
|
MIN_FILE_SIZE = 3 * CHUNK_SIZE # 3MiB, because we take 3 samples
|
||||||
|
|
||||||
|
# Partial hashing offset and size
|
||||||
|
PARTIAL_OFFSET_SIZE = (0x4000, 0x4000)
|
||||||
|
|
||||||
|
|
||||||
class FSError(Exception):
|
class FSError(Exception):
|
||||||
cls_message = "An error has occured on '{name}' in '{parent}'"
|
cls_message = "An error has occured on '{name}' in '{parent}'"
|
||||||
@ -83,75 +98,101 @@ class OperationError(FSError):
|
|||||||
|
|
||||||
|
|
||||||
class FilesDB:
|
class FilesDB:
|
||||||
|
schema_version = 1
|
||||||
|
schema_version_description = "Changed from md5 to xxhash if available."
|
||||||
|
|
||||||
create_table_query = "CREATE TABLE IF NOT EXISTS files (path TEXT PRIMARY KEY, size INTEGER, mtime_ns INTEGER, entry_dt DATETIME, md5 BLOB, md5partial BLOB)"
|
create_table_query = """CREATE TABLE IF NOT EXISTS files (path TEXT PRIMARY KEY, size INTEGER, mtime_ns INTEGER,
|
||||||
drop_table_query = "DROP TABLE files;"
|
entry_dt DATETIME, digest BLOB, digest_partial BLOB, digest_samples BLOB)"""
|
||||||
|
drop_table_query = "DROP TABLE IF EXISTS files;"
|
||||||
select_query = "SELECT {key} FROM files WHERE path=:path AND size=:size and mtime_ns=:mtime_ns"
|
select_query = "SELECT {key} FROM files WHERE path=:path AND size=:size and mtime_ns=:mtime_ns"
|
||||||
|
select_query_ignore_mtime = "SELECT {key} FROM files WHERE path=:path AND size=:size"
|
||||||
insert_query = """
|
insert_query = """
|
||||||
INSERT INTO files (path, size, mtime_ns, entry_dt, {key}) VALUES (:path, :size, :mtime_ns, datetime('now'), :value)
|
INSERT INTO files (path, size, mtime_ns, entry_dt, {key})
|
||||||
|
VALUES (:path, :size, :mtime_ns, datetime('now'), :value)
|
||||||
ON CONFLICT(path) DO UPDATE SET size=:size, mtime_ns=:mtime_ns, entry_dt=datetime('now'), {key}=:value;
|
ON CONFLICT(path) DO UPDATE SET size=:size, mtime_ns=:mtime_ns, entry_dt=datetime('now'), {key}=:value;
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
ignore_mtime = False
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.conn = None
|
self.conn = None
|
||||||
self.cur = None
|
|
||||||
self.lock = None
|
self.lock = None
|
||||||
|
|
||||||
def connect(self, path):
|
def connect(self, path: Union[AnyStr, os.PathLike]) -> None:
|
||||||
# type: (str, ) -> None
|
if platform.startswith("gnu0"):
|
||||||
|
self.conn = sqlite3.connect(path, check_same_thread=False, isolation_level=None)
|
||||||
self.conn = sqlite3.connect(path, check_same_thread=False)
|
else:
|
||||||
self.cur = self.conn.cursor()
|
self.conn = sqlite3.connect(path, check_same_thread=False)
|
||||||
self.cur.execute(self.create_table_query)
|
|
||||||
self.lock = Lock()
|
self.lock = Lock()
|
||||||
|
self._check_upgrade()
|
||||||
|
|
||||||
def clear(self):
|
def _check_upgrade(self) -> None:
|
||||||
# type: () -> None
|
with self.lock, self.conn as conn:
|
||||||
|
has_schema = conn.execute(
|
||||||
|
"SELECT NAME FROM sqlite_master WHERE type='table' AND name='schema_version'"
|
||||||
|
).fetchall()
|
||||||
|
version = None
|
||||||
|
if has_schema:
|
||||||
|
version = conn.execute("SELECT version FROM schema_version ORDER BY version DESC").fetchone()[0]
|
||||||
|
else:
|
||||||
|
conn.execute("CREATE TABLE schema_version (version int PRIMARY KEY, description TEXT)")
|
||||||
|
if version != self.schema_version:
|
||||||
|
conn.execute(self.drop_table_query)
|
||||||
|
conn.execute(
|
||||||
|
"INSERT OR REPLACE INTO schema_version VALUES (:version, :description)",
|
||||||
|
{"version": self.schema_version, "description": self.schema_version_description},
|
||||||
|
)
|
||||||
|
conn.execute(self.create_table_query)
|
||||||
|
|
||||||
with self.lock:
|
def clear(self) -> None:
|
||||||
self.cur.execute(self.drop_table_query)
|
with self.lock, self.conn as conn:
|
||||||
self.cur.execute(self.create_table_query)
|
conn.execute(self.drop_table_query)
|
||||||
|
conn.execute(self.create_table_query)
|
||||||
def get(self, path, key):
|
|
||||||
# type: (Path, str) -> bytes
|
|
||||||
|
|
||||||
|
def get(self, path: Path, key: str) -> Union[bytes, None]:
|
||||||
stat = path.stat()
|
stat = path.stat()
|
||||||
size = stat.st_size
|
size = stat.st_size
|
||||||
mtime_ns = stat.st_mtime_ns
|
mtime_ns = stat.st_mtime_ns
|
||||||
|
try:
|
||||||
|
with self.conn as conn:
|
||||||
|
if self.ignore_mtime:
|
||||||
|
cursor = conn.execute(
|
||||||
|
self.select_query_ignore_mtime.format(key=key), {"path": str(path), "size": size}
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
cursor = conn.execute(
|
||||||
|
self.select_query.format(key=key),
|
||||||
|
{"path": str(path), "size": size, "mtime_ns": mtime_ns},
|
||||||
|
)
|
||||||
|
result = cursor.fetchone()
|
||||||
|
cursor.close()
|
||||||
|
|
||||||
with self.lock:
|
if result:
|
||||||
self.cur.execute(self.select_query.format(key=key), {"path": str(path), "size": size, "mtime_ns": mtime_ns})
|
return result[0]
|
||||||
result = self.cur.fetchone()
|
except Exception as ex:
|
||||||
|
logging.warning(f"Couldn't get {key} for {path} w/{size}, {mtime_ns}: {ex}")
|
||||||
if result:
|
|
||||||
return result[0]
|
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def put(self, path, key, value):
|
def put(self, path: Path, key: str, value: Any) -> None:
|
||||||
# type: (Path, str, Any) -> None
|
|
||||||
|
|
||||||
stat = path.stat()
|
stat = path.stat()
|
||||||
size = stat.st_size
|
size = stat.st_size
|
||||||
mtime_ns = stat.st_mtime_ns
|
mtime_ns = stat.st_mtime_ns
|
||||||
|
try:
|
||||||
|
with self.lock, self.conn as conn:
|
||||||
|
conn.execute(
|
||||||
|
self.insert_query.format(key=key),
|
||||||
|
{"path": str(path), "size": size, "mtime_ns": mtime_ns, "value": value},
|
||||||
|
)
|
||||||
|
except Exception as ex:
|
||||||
|
logging.warning(f"Couldn't put {key} for {path} w/{size}, {mtime_ns}: {ex}")
|
||||||
|
|
||||||
with self.lock:
|
def commit(self) -> None:
|
||||||
self.cur.execute(
|
|
||||||
self.insert_query.format(key=key),
|
|
||||||
{"path": str(path), "size": size, "mtime_ns": mtime_ns, "value": value},
|
|
||||||
)
|
|
||||||
|
|
||||||
def commit(self):
|
|
||||||
# type: () -> None
|
|
||||||
|
|
||||||
with self.lock:
|
with self.lock:
|
||||||
self.conn.commit()
|
self.conn.commit()
|
||||||
|
|
||||||
def close(self):
|
def close(self) -> None:
|
||||||
# type: () -> None
|
|
||||||
|
|
||||||
with self.lock:
|
with self.lock:
|
||||||
self.cur.close()
|
|
||||||
self.conn.close()
|
self.conn.close()
|
||||||
|
|
||||||
|
|
||||||
@ -161,19 +202,26 @@ filesdb = FilesDB() # Singleton
|
|||||||
class File:
|
class File:
|
||||||
"""Represents a file and holds metadata to be used for scanning."""
|
"""Represents a file and holds metadata to be used for scanning."""
|
||||||
|
|
||||||
INITIAL_INFO = {"size": 0, "mtime": 0, "md5": b"", "md5partial": b"", "md5samples": b""}
|
INITIAL_INFO = {"size": 0, "mtime": 0, "digest": b"", "digest_partial": b"", "digest_samples": b""}
|
||||||
# Slots for File make us save quite a bit of memory. In a memory test I've made with a lot of
|
# Slots for File make us save quite a bit of memory. In a memory test I've made with a lot of
|
||||||
# files, I saved 35% memory usage with "unread" files (no _read_info() call) and gains become
|
# files, I saved 35% memory usage with "unread" files (no _read_info() call) and gains become
|
||||||
# even greater when we take into account read attributes (70%!). Yeah, it's worth it.
|
# even greater when we take into account read attributes (70%!). Yeah, it's worth it.
|
||||||
__slots__ = ("path", "is_ref", "words") + tuple(INITIAL_INFO.keys())
|
__slots__ = ("path", "unicode_path", "is_ref", "words") + tuple(INITIAL_INFO.keys())
|
||||||
|
|
||||||
def __init__(self, path):
|
def __init__(self, path):
|
||||||
self.path = path
|
|
||||||
for attrname in self.INITIAL_INFO:
|
for attrname in self.INITIAL_INFO:
|
||||||
setattr(self, attrname, NOT_SET)
|
setattr(self, attrname, NOT_SET)
|
||||||
|
if type(path) is os.DirEntry:
|
||||||
|
self.path = Path(path.path)
|
||||||
|
self.size = nonone(path.stat().st_size, 0)
|
||||||
|
self.mtime = nonone(path.stat().st_mtime, 0)
|
||||||
|
else:
|
||||||
|
self.path = path
|
||||||
|
if self.path:
|
||||||
|
self.unicode_path = str(self.path)
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return "<{} {}>".format(self.__class__.__name__, str(self.path))
|
return f"<{self.__class__.__name__} {str(self.path)}>"
|
||||||
|
|
||||||
def __getattribute__(self, attrname):
|
def __getattribute__(self, attrname):
|
||||||
result = object.__getattribute__(self, attrname)
|
result = object.__getattribute__(self, attrname)
|
||||||
@ -187,32 +235,46 @@ class File:
|
|||||||
result = self.INITIAL_INFO[attrname]
|
result = self.INITIAL_INFO[attrname]
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def _calc_md5(self):
|
def _calc_digest(self):
|
||||||
# type: () -> bytes
|
# type: () -> bytes
|
||||||
|
|
||||||
with self.path.open("rb") as fp:
|
with self.path.open("rb") as fp:
|
||||||
md5 = hashlib.md5()
|
file_hash = hasher()
|
||||||
# The goal here is to not run out of memory on really big files. However, the chunk
|
# The goal here is to not run out of memory on really big files. However, the chunk
|
||||||
# size has to be large enough so that the python loop isn't too costly in terms of
|
# size has to be large enough so that the python loop isn't too costly in terms of
|
||||||
# CPU.
|
# CPU.
|
||||||
CHUNK_SIZE = 1024 * 1024 # 1 mb
|
CHUNK_SIZE = 1024 * 1024 # 1 mb
|
||||||
filedata = fp.read(CHUNK_SIZE)
|
filedata = fp.read(CHUNK_SIZE)
|
||||||
while filedata:
|
while filedata:
|
||||||
md5.update(filedata)
|
file_hash.update(filedata)
|
||||||
filedata = fp.read(CHUNK_SIZE)
|
filedata = fp.read(CHUNK_SIZE)
|
||||||
return md5.digest()
|
return file_hash.digest()
|
||||||
|
|
||||||
def _calc_md5partial(self):
|
def _calc_digest_partial(self):
|
||||||
# type: () -> bytes
|
# type: () -> bytes
|
||||||
|
|
||||||
# This offset is where we should start reading the file to get a partial md5
|
|
||||||
# For audio file, it should be where audio data starts
|
|
||||||
offset, size = (0x4000, 0x4000)
|
|
||||||
|
|
||||||
with self.path.open("rb") as fp:
|
with self.path.open("rb") as fp:
|
||||||
fp.seek(offset)
|
fp.seek(PARTIAL_OFFSET_SIZE[0])
|
||||||
partialdata = fp.read(size)
|
partial_data = fp.read(PARTIAL_OFFSET_SIZE[1])
|
||||||
return hashlib.md5(partialdata).digest()
|
return hasher(partial_data).digest()
|
||||||
|
|
||||||
|
def _calc_digest_samples(self) -> bytes:
|
||||||
|
size = self.size
|
||||||
|
with self.path.open("rb") as fp:
|
||||||
|
# Chunk at 25% of the file
|
||||||
|
fp.seek(floor(size * 25 / 100), 0)
|
||||||
|
file_data = fp.read(CHUNK_SIZE)
|
||||||
|
file_hash = hasher(file_data)
|
||||||
|
|
||||||
|
# Chunk at 60% of the file
|
||||||
|
fp.seek(floor(size * 60 / 100), 0)
|
||||||
|
file_data = fp.read(CHUNK_SIZE)
|
||||||
|
file_hash.update(file_data)
|
||||||
|
|
||||||
|
# Last chunk of the file
|
||||||
|
fp.seek(-CHUNK_SIZE, 2)
|
||||||
|
file_data = fp.read(CHUNK_SIZE)
|
||||||
|
file_hash.update(file_data)
|
||||||
|
return file_hash.digest()
|
||||||
|
|
||||||
def _read_info(self, field):
|
def _read_info(self, field):
|
||||||
# print(f"_read_info({field}) for {self}")
|
# print(f"_read_info({field}) for {self}")
|
||||||
@ -220,48 +282,30 @@ class File:
|
|||||||
stats = self.path.stat()
|
stats = self.path.stat()
|
||||||
self.size = nonone(stats.st_size, 0)
|
self.size = nonone(stats.st_size, 0)
|
||||||
self.mtime = nonone(stats.st_mtime, 0)
|
self.mtime = nonone(stats.st_mtime, 0)
|
||||||
elif field == "md5partial":
|
elif field == "digest_partial":
|
||||||
try:
|
self.digest_partial = filesdb.get(self.path, "digest_partial")
|
||||||
self.md5partial = filesdb.get(self.path, "md5partial")
|
if self.digest_partial is None:
|
||||||
if self.md5partial is None:
|
# If file is smaller than partial requirements just use the full digest
|
||||||
self.md5partial = self._calc_md5partial()
|
if self.size < PARTIAL_OFFSET_SIZE[0] + PARTIAL_OFFSET_SIZE[1]:
|
||||||
filesdb.put(self.path, "md5partial", self.md5partial)
|
self.digest_partial = self.digest
|
||||||
except Exception as e:
|
else:
|
||||||
logging.warning("Couldn't get md5partial for %s: %s", self.path, e)
|
self.digest_partial = self._calc_digest_partial()
|
||||||
elif field == "md5":
|
filesdb.put(self.path, "digest_partial", self.digest_partial)
|
||||||
try:
|
elif field == "digest":
|
||||||
self.md5 = filesdb.get(self.path, "md5")
|
self.digest = filesdb.get(self.path, "digest")
|
||||||
if self.md5 is None:
|
if self.digest is None:
|
||||||
self.md5 = self._calc_md5()
|
self.digest = self._calc_digest()
|
||||||
filesdb.put(self.path, "md5", self.md5)
|
filesdb.put(self.path, "digest", self.digest)
|
||||||
except Exception as e:
|
elif field == "digest_samples":
|
||||||
logging.warning("Couldn't get md5 for %s: %s", self.path, e)
|
size = self.size
|
||||||
elif field == "md5samples":
|
# Might as well hash such small files entirely.
|
||||||
try:
|
if size <= MIN_FILE_SIZE:
|
||||||
with self.path.open("rb") as fp:
|
self.digest_samples = self.digest
|
||||||
size = self.size
|
return
|
||||||
# Might as well hash such small files entirely.
|
self.digest_samples = filesdb.get(self.path, "digest_samples")
|
||||||
if size <= MIN_FILE_SIZE:
|
if self.digest_samples is None:
|
||||||
setattr(self, field, self.md5)
|
self.digest_samples = self._calc_digest_samples()
|
||||||
return
|
filesdb.put(self.path, "digest_samples", self.digest_samples)
|
||||||
|
|
||||||
# Chunk at 25% of the file
|
|
||||||
fp.seek(floor(size * 25 / 100), 0)
|
|
||||||
filedata = fp.read(CHUNK_SIZE)
|
|
||||||
md5 = hashlib.md5(filedata)
|
|
||||||
|
|
||||||
# Chunk at 60% of the file
|
|
||||||
fp.seek(floor(size * 60 / 100), 0)
|
|
||||||
filedata = fp.read(CHUNK_SIZE)
|
|
||||||
md5.update(filedata)
|
|
||||||
|
|
||||||
# Last chunk of the file
|
|
||||||
fp.seek(-CHUNK_SIZE, 2)
|
|
||||||
filedata = fp.read(CHUNK_SIZE)
|
|
||||||
md5.update(filedata)
|
|
||||||
setattr(self, field, md5.digest())
|
|
||||||
except Exception as e:
|
|
||||||
logging.error(f"Error computing md5samples: {e}")
|
|
||||||
|
|
||||||
def _read_all_info(self, attrnames=None):
|
def _read_all_info(self, attrnames=None):
|
||||||
"""Cache all possible info.
|
"""Cache all possible info.
|
||||||
@ -277,17 +321,25 @@ class File:
|
|||||||
@classmethod
|
@classmethod
|
||||||
def can_handle(cls, path):
|
def can_handle(cls, path):
|
||||||
"""Returns whether this file wrapper class can handle ``path``."""
|
"""Returns whether this file wrapper class can handle ``path``."""
|
||||||
return not path.islink() and path.isfile()
|
return not path.is_symlink() and path.is_file()
|
||||||
|
|
||||||
|
def exists(self) -> bool:
|
||||||
|
"""Safely check if the underlying file exists, treat error as non-existent"""
|
||||||
|
try:
|
||||||
|
return self.path.exists()
|
||||||
|
except OSError as ex:
|
||||||
|
logging.warning(f"Checking {self.path} raised: {ex}")
|
||||||
|
return False
|
||||||
|
|
||||||
def rename(self, newname):
|
def rename(self, newname):
|
||||||
if newname == self.name:
|
if newname == self.name:
|
||||||
return
|
return
|
||||||
destpath = self.path.parent()[newname]
|
destpath = self.path.parent.joinpath(newname)
|
||||||
if destpath.exists():
|
if destpath.exists():
|
||||||
raise AlreadyExistsError(newname, self.path.parent())
|
raise AlreadyExistsError(newname, self.path.parent)
|
||||||
try:
|
try:
|
||||||
self.path.rename(destpath)
|
self.path.rename(destpath)
|
||||||
except EnvironmentError:
|
except OSError:
|
||||||
raise OperationError(self)
|
raise OperationError(self)
|
||||||
if not destpath.exists():
|
if not destpath.exists():
|
||||||
raise OperationError(self)
|
raise OperationError(self)
|
||||||
@ -308,19 +360,20 @@ class File:
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def folder_path(self):
|
def folder_path(self):
|
||||||
return self.path.parent()
|
return self.path.parent
|
||||||
|
|
||||||
|
|
||||||
class Folder(File):
|
class Folder(File):
|
||||||
"""A wrapper around a folder path.
|
"""A wrapper around a folder path.
|
||||||
|
|
||||||
It has the size/md5 info of a File, but its value is the sum of its subitems.
|
It has the size/digest info of a File, but its value is the sum of its subitems.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
__slots__ = File.__slots__ + ("_subfolders",)
|
__slots__ = File.__slots__ + ("_subfolders",)
|
||||||
|
|
||||||
def __init__(self, path):
|
def __init__(self, path):
|
||||||
File.__init__(self, path)
|
File.__init__(self, path)
|
||||||
|
self.size = NOT_SET
|
||||||
self._subfolders = None
|
self._subfolders = None
|
||||||
|
|
||||||
def _all_items(self):
|
def _all_items(self):
|
||||||
@ -335,31 +388,31 @@ class Folder(File):
|
|||||||
self.size = size
|
self.size = size
|
||||||
stats = self.path.stat()
|
stats = self.path.stat()
|
||||||
self.mtime = nonone(stats.st_mtime, 0)
|
self.mtime = nonone(stats.st_mtime, 0)
|
||||||
elif field in {"md5", "md5partial", "md5samples"}:
|
elif field in {"digest", "digest_partial", "digest_samples"}:
|
||||||
# What's sensitive here is that we must make sure that subfiles'
|
# What's sensitive here is that we must make sure that subfiles'
|
||||||
# md5 are always added up in the same order, but we also want a
|
# digest are always added up in the same order, but we also want a
|
||||||
# different md5 if a file gets moved in a different subdirectory.
|
# different digest if a file gets moved in a different subdirectory.
|
||||||
|
|
||||||
def get_dir_md5_concat():
|
def get_dir_digest_concat():
|
||||||
items = self._all_items()
|
items = self._all_items()
|
||||||
items.sort(key=lambda f: f.path)
|
items.sort(key=lambda f: f.path)
|
||||||
md5s = [getattr(f, field) for f in items]
|
digests = [getattr(f, field) for f in items]
|
||||||
return b"".join(md5s)
|
return b"".join(digests)
|
||||||
|
|
||||||
md5 = hashlib.md5(get_dir_md5_concat())
|
digest = hasher(get_dir_digest_concat()).digest()
|
||||||
digest = md5.digest()
|
|
||||||
setattr(self, field, digest)
|
setattr(self, field, digest)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def subfolders(self):
|
def subfolders(self):
|
||||||
if self._subfolders is None:
|
if self._subfolders is None:
|
||||||
subfolders = [p for p in self.path.listdir() if not p.islink() and p.isdir()]
|
with os.scandir(self.path) as iter:
|
||||||
|
subfolders = [p for p in iter if not p.is_symlink() and p.is_dir()]
|
||||||
self._subfolders = [self.__class__(p) for p in subfolders]
|
self._subfolders = [self.__class__(p) for p in subfolders]
|
||||||
return self._subfolders
|
return self._subfolders
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def can_handle(cls, path):
|
def can_handle(cls, path):
|
||||||
return not path.islink() and path.isdir()
|
return not path.is_symlink() and path.is_dir()
|
||||||
|
|
||||||
|
|
||||||
def get_file(path, fileclasses=[File]):
|
def get_file(path, fileclasses=[File]):
|
||||||
@ -384,10 +437,11 @@ def get_files(path, fileclasses=[File]):
|
|||||||
assert all(issubclass(fileclass, File) for fileclass in fileclasses)
|
assert all(issubclass(fileclass, File) for fileclass in fileclasses)
|
||||||
try:
|
try:
|
||||||
result = []
|
result = []
|
||||||
for path in path.listdir():
|
with os.scandir(path) as iter:
|
||||||
file = get_file(path, fileclasses=fileclasses)
|
for item in iter:
|
||||||
if file is not None:
|
file = get_file(item, fileclasses=fileclasses)
|
||||||
result.append(file)
|
if file is not None:
|
||||||
|
result.append(file)
|
||||||
return result
|
return result
|
||||||
except EnvironmentError:
|
except OSError:
|
||||||
raise InvalidPath(path)
|
raise InvalidPath(path)
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||||
|
|
||||||
from hscommon.gui.base import GUIObject
|
from hscommon.gui.base import GUIObject
|
||||||
from .base import DupeGuruGUIObject
|
from core.gui.base import DupeGuruGUIObject
|
||||||
|
|
||||||
|
|
||||||
class DetailsPanel(GUIObject, DupeGuruGUIObject):
|
class DetailsPanel(GUIObject, DupeGuruGUIObject):
|
||||||
|
@ -8,8 +8,8 @@
|
|||||||
|
|
||||||
from hscommon.gui.tree import Tree, Node
|
from hscommon.gui.tree import Tree, Node
|
||||||
|
|
||||||
from ..directories import DirectoryState
|
from core.directories import DirectoryState
|
||||||
from .base import DupeGuruGUIObject
|
from core.gui.base import DupeGuruGUIObject
|
||||||
|
|
||||||
STATE_ORDER = [DirectoryState.NORMAL, DirectoryState.REFERENCE, DirectoryState.EXCLUDED]
|
STATE_ORDER = [DirectoryState.NORMAL, DirectoryState.REFERENCE, DirectoryState.EXCLUDED]
|
||||||
|
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
# which should be included with this package. The terms are also available at
|
# which should be included with this package. The terms are also available at
|
||||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||||
|
|
||||||
from .exclude_list_table import ExcludeListTable
|
from core.gui.exclude_list_table import ExcludeListTable
|
||||||
from core.exclude import has_sep
|
from core.exclude import has_sep
|
||||||
from os import sep
|
from os import sep
|
||||||
import logging
|
import logging
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
# which should be included with this package. The terms are also available at
|
# which should be included with this package. The terms are also available at
|
||||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||||
|
|
||||||
from .base import DupeGuruGUIObject
|
from core.gui.base import DupeGuruGUIObject
|
||||||
from hscommon.gui.table import GUITable, Row
|
from hscommon.gui.table import GUITable, Row
|
||||||
from hscommon.gui.column import Column, Columns
|
from hscommon.gui.column import Column, Columns
|
||||||
from hscommon.trans import trget
|
from hscommon.trans import trget
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||||
|
|
||||||
from hscommon.trans import tr
|
from hscommon.trans import tr
|
||||||
from .ignore_list_table import IgnoreListTable
|
from core.gui.ignore_list_table import IgnoreListTable
|
||||||
|
|
||||||
|
|
||||||
class IgnoreListDialog:
|
class IgnoreListDialog:
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
|
|
||||||
from hscommon import desktop
|
from hscommon import desktop
|
||||||
|
|
||||||
from .problem_table import ProblemTable
|
from core.gui.problem_table import ProblemTable
|
||||||
|
|
||||||
|
|
||||||
class ProblemDialog:
|
class ProblemDialog:
|
||||||
|
@ -11,7 +11,7 @@ from operator import attrgetter
|
|||||||
from hscommon.gui.table import GUITable, Row
|
from hscommon.gui.table import GUITable, Row
|
||||||
from hscommon.gui.column import Columns
|
from hscommon.gui.column import Columns
|
||||||
|
|
||||||
from .base import DupeGuruGUIObject
|
from core.gui.base import DupeGuruGUIObject
|
||||||
|
|
||||||
|
|
||||||
class DupeRow(Row):
|
class DupeRow(Row):
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
# which should be included with this package. The terms are also available at
|
# which should be included with this package. The terms are also available at
|
||||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||||
|
|
||||||
from .base import DupeGuruGUIObject
|
from core.gui.base import DupeGuruGUIObject
|
||||||
|
|
||||||
|
|
||||||
class StatsLabel(DupeGuruGUIObject):
|
class StatsLabel(DupeGuruGUIObject):
|
||||||
|
@ -1 +1 @@
|
|||||||
from . import fs, prioritize, result_table, scanner # noqa
|
from core.me import fs, prioritize, result_table, scanner # noqa
|
||||||
|
@ -97,11 +97,6 @@ class MusicFile(fs.File):
|
|||||||
"dupe_count": format_dupe_count(dupe_count),
|
"dupe_count": format_dupe_count(dupe_count),
|
||||||
}
|
}
|
||||||
|
|
||||||
def _get_md5partial_offset_and_size(self):
|
|
||||||
# No longer calculating the offset and audio size, just whole file
|
|
||||||
size = self.path.stat().st_size
|
|
||||||
return (0, size)
|
|
||||||
|
|
||||||
def _read_info(self, field):
|
def _read_info(self, field):
|
||||||
fs.File._read_info(self, field)
|
fs.File._read_info(self, field)
|
||||||
if field in TAG_FIELDS:
|
if field in TAG_FIELDS:
|
||||||
|
@ -1,8 +1,7 @@
|
|||||||
from . import ( # noqa
|
from core.pe import ( # noqa
|
||||||
block,
|
block,
|
||||||
cache,
|
cache,
|
||||||
exif,
|
exif,
|
||||||
iphoto_plist,
|
|
||||||
matchblock,
|
matchblock,
|
||||||
matchexif,
|
matchexif,
|
||||||
photo,
|
photo,
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
# which should be included with this package. The terms are also available at
|
# which should be included with this package. The terms are also available at
|
||||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||||
|
|
||||||
from ._block import NoBlocksError, DifferentBlockCountError, avgdiff, getblocks2 # NOQA
|
from core.pe._block import NoBlocksError, DifferentBlockCountError, avgdiff, getblocks2 # NOQA
|
||||||
|
|
||||||
# Converted to C
|
# Converted to C
|
||||||
# def getblock(image):
|
# def getblock(image):
|
||||||
|
13
core/pe/block.pyi
Normal file
13
core/pe/block.pyi
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
from typing import Tuple, List, Union, Sequence
|
||||||
|
|
||||||
|
_block = Tuple[int, int, int]
|
||||||
|
|
||||||
|
class NoBlocksError(Exception): ... # noqa: E302, E701
|
||||||
|
class DifferentBlockCountError(Exception): ... # noqa E701
|
||||||
|
|
||||||
|
def getblock(image: object) -> Union[_block, None]: ... # noqa: E302
|
||||||
|
def getblocks2(image: object, block_count_per_side: int) -> Union[List[_block], None]: ...
|
||||||
|
def diff(first: _block, second: _block) -> int: ...
|
||||||
|
def avgdiff( # noqa: E302
|
||||||
|
first: Sequence[_block], second: Sequence[_block], limit: int = 768, min_iterations: int = 1
|
||||||
|
) -> Union[int, None]: ...
|
@ -4,24 +4,13 @@
|
|||||||
# which should be included with this package. The terms are also available at
|
# which should be included with this package. The terms are also available at
|
||||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||||
|
|
||||||
from ._cache import string_to_colors # noqa
|
from core.pe._cache import bytes_to_colors # noqa
|
||||||
|
|
||||||
|
|
||||||
def colors_to_string(colors):
|
def colors_to_bytes(colors):
|
||||||
"""Transform the 3 sized tuples 'colors' into a hex string.
|
"""Transform the 3 sized tuples 'colors' into a bytes string.
|
||||||
|
|
||||||
[(0,100,255)] --> 0064ff
|
[(0,100,255)] --> b'\x00d\xff'
|
||||||
[(1,2,3),(4,5,6)] --> 010203040506
|
[(1,2,3),(4,5,6)] --> b'\x01\x02\x03\x04\x05\x06'
|
||||||
"""
|
"""
|
||||||
return "".join("%02x%02x%02x" % (r, g, b) for r, g, b in colors)
|
return b"".join(map(bytes, colors))
|
||||||
|
|
||||||
|
|
||||||
# This function is an important bottleneck of dupeGuru PE. It has been converted to C.
|
|
||||||
# def string_to_colors(s):
|
|
||||||
# """Transform the string 's' in a list of 3 sized tuples.
|
|
||||||
# """
|
|
||||||
# result = []
|
|
||||||
# for i in xrange(0, len(s), 6):
|
|
||||||
# number = int(s[i:i+6], 16)
|
|
||||||
# result.append((number >> 16, (number >> 8) & 0xff, number & 0xff))
|
|
||||||
# return result
|
|
||||||
|
6
core/pe/cache.pyi
Normal file
6
core/pe/cache.pyi
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
from typing import Union, Tuple, List
|
||||||
|
|
||||||
|
_block = Tuple[int, int, int]
|
||||||
|
|
||||||
|
def colors_to_bytes(colors: List[_block]) -> bytes: ... # noqa: E302
|
||||||
|
def bytes_to_colors(s: bytes) -> Union[List[_block], None]: ...
|
@ -1,141 +0,0 @@
|
|||||||
# Copyright 2016 Virgil Dupras
|
|
||||||
#
|
|
||||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
|
||||||
# which should be included with this package. The terms are also available at
|
|
||||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
|
||||||
|
|
||||||
import os
|
|
||||||
import os.path as op
|
|
||||||
import shelve
|
|
||||||
import tempfile
|
|
||||||
from collections import namedtuple
|
|
||||||
|
|
||||||
from .cache import string_to_colors, colors_to_string
|
|
||||||
|
|
||||||
|
|
||||||
def wrap_path(path):
|
|
||||||
return "path:{}".format(path)
|
|
||||||
|
|
||||||
|
|
||||||
def unwrap_path(key):
|
|
||||||
return key[5:]
|
|
||||||
|
|
||||||
|
|
||||||
def wrap_id(path):
|
|
||||||
return "id:{}".format(path)
|
|
||||||
|
|
||||||
|
|
||||||
def unwrap_id(key):
|
|
||||||
return int(key[3:])
|
|
||||||
|
|
||||||
|
|
||||||
CacheRow = namedtuple("CacheRow", "id path blocks mtime")
|
|
||||||
|
|
||||||
|
|
||||||
class ShelveCache:
|
|
||||||
"""A class to cache picture blocks in a shelve backend."""
|
|
||||||
|
|
||||||
def __init__(self, db=None, readonly=False):
|
|
||||||
self.istmp = db is None
|
|
||||||
if self.istmp:
|
|
||||||
self.dtmp = tempfile.mkdtemp()
|
|
||||||
self.ftmp = db = op.join(self.dtmp, "tmpdb")
|
|
||||||
flag = "r" if readonly else "c"
|
|
||||||
self.shelve = shelve.open(db, flag)
|
|
||||||
self.maxid = self._compute_maxid()
|
|
||||||
|
|
||||||
def __contains__(self, key):
|
|
||||||
return wrap_path(key) in self.shelve
|
|
||||||
|
|
||||||
def __delitem__(self, key):
|
|
||||||
row = self.shelve[wrap_path(key)]
|
|
||||||
del self.shelve[wrap_path(key)]
|
|
||||||
del self.shelve[wrap_id(row.id)]
|
|
||||||
|
|
||||||
def __getitem__(self, key):
|
|
||||||
if isinstance(key, int):
|
|
||||||
skey = self.shelve[wrap_id(key)]
|
|
||||||
else:
|
|
||||||
skey = wrap_path(key)
|
|
||||||
return string_to_colors(self.shelve[skey].blocks)
|
|
||||||
|
|
||||||
def __iter__(self):
|
|
||||||
return (unwrap_path(k) for k in self.shelve if k.startswith("path:"))
|
|
||||||
|
|
||||||
def __len__(self):
|
|
||||||
return sum(1 for k in self.shelve if k.startswith("path:"))
|
|
||||||
|
|
||||||
def __setitem__(self, path_str, blocks):
|
|
||||||
blocks = colors_to_string(blocks)
|
|
||||||
if op.exists(path_str):
|
|
||||||
mtime = int(os.stat(path_str).st_mtime)
|
|
||||||
else:
|
|
||||||
mtime = 0
|
|
||||||
if path_str in self:
|
|
||||||
rowid = self.shelve[wrap_path(path_str)].id
|
|
||||||
else:
|
|
||||||
rowid = self._get_new_id()
|
|
||||||
row = CacheRow(rowid, path_str, blocks, mtime)
|
|
||||||
self.shelve[wrap_path(path_str)] = row
|
|
||||||
self.shelve[wrap_id(rowid)] = wrap_path(path_str)
|
|
||||||
|
|
||||||
def _compute_maxid(self):
|
|
||||||
return max((unwrap_id(k) for k in self.shelve if k.startswith("id:")), default=1)
|
|
||||||
|
|
||||||
def _get_new_id(self):
|
|
||||||
self.maxid += 1
|
|
||||||
return self.maxid
|
|
||||||
|
|
||||||
def clear(self):
|
|
||||||
self.shelve.clear()
|
|
||||||
|
|
||||||
def close(self):
|
|
||||||
if self.shelve is not None:
|
|
||||||
self.shelve.close()
|
|
||||||
if self.istmp:
|
|
||||||
os.remove(self.ftmp)
|
|
||||||
os.rmdir(self.dtmp)
|
|
||||||
self.shelve = None
|
|
||||||
|
|
||||||
def filter(self, func):
|
|
||||||
to_delete = [key for key in self if not func(key)]
|
|
||||||
for key in to_delete:
|
|
||||||
del self[key]
|
|
||||||
|
|
||||||
def get_id(self, path):
|
|
||||||
if path in self:
|
|
||||||
return self.shelve[wrap_path(path)].id
|
|
||||||
else:
|
|
||||||
raise ValueError(path)
|
|
||||||
|
|
||||||
def get_multiple(self, rowids):
|
|
||||||
for rowid in rowids:
|
|
||||||
try:
|
|
||||||
skey = self.shelve[wrap_id(rowid)]
|
|
||||||
except KeyError:
|
|
||||||
continue
|
|
||||||
yield (rowid, string_to_colors(self.shelve[skey].blocks))
|
|
||||||
|
|
||||||
def purge_outdated(self):
|
|
||||||
"""Go through the cache and purge outdated records.
|
|
||||||
|
|
||||||
A record is outdated if the picture doesn't exist or if its mtime is greater than the one in
|
|
||||||
the db.
|
|
||||||
"""
|
|
||||||
todelete = []
|
|
||||||
for path in self:
|
|
||||||
row = self.shelve[wrap_path(path)]
|
|
||||||
if row.mtime and op.exists(path):
|
|
||||||
picture_mtime = os.stat(path).st_mtime
|
|
||||||
if int(picture_mtime) <= row.mtime:
|
|
||||||
# not outdated
|
|
||||||
continue
|
|
||||||
todelete.append(path)
|
|
||||||
for path in todelete:
|
|
||||||
try:
|
|
||||||
del self[path]
|
|
||||||
except KeyError:
|
|
||||||
# I have no idea why a KeyError sometimes happen, but it does, as we can see in
|
|
||||||
# #402 and #439. I don't think it hurts to silently ignore the error, so that's
|
|
||||||
# what we do
|
|
||||||
pass
|
|
@ -9,12 +9,24 @@ import os.path as op
|
|||||||
import logging
|
import logging
|
||||||
import sqlite3 as sqlite
|
import sqlite3 as sqlite
|
||||||
|
|
||||||
from .cache import string_to_colors, colors_to_string
|
from core.pe.cache import bytes_to_colors, colors_to_bytes
|
||||||
|
|
||||||
|
|
||||||
class SqliteCache:
|
class SqliteCache:
|
||||||
"""A class to cache picture blocks in a sqlite backend."""
|
"""A class to cache picture blocks in a sqlite backend."""
|
||||||
|
|
||||||
|
schema_version = 2
|
||||||
|
schema_version_description = "Added blocks for all 8 orientations."
|
||||||
|
|
||||||
|
create_table_query = (
|
||||||
|
"CREATE TABLE IF NOT EXISTS "
|
||||||
|
"pictures(path TEXT, mtime_ns INTEGER, blocks BLOB, blocks2 BLOB, blocks3 BLOB, "
|
||||||
|
"blocks4 BLOB, blocks5 BLOB, blocks6 BLOB, blocks7 BLOB, blocks8 BLOB)"
|
||||||
|
)
|
||||||
|
create_index_query = "CREATE INDEX IF NOT EXISTS idx_path on pictures (path)"
|
||||||
|
drop_table_query = "DROP TABLE IF EXISTS pictures"
|
||||||
|
drop_index_query = "DROP INDEX IF EXISTS idx_path"
|
||||||
|
|
||||||
def __init__(self, db=":memory:", readonly=False):
|
def __init__(self, db=":memory:", readonly=False):
|
||||||
# readonly is not used in the sqlite version of the cache
|
# readonly is not used in the sqlite version of the cache
|
||||||
self.dbname = db
|
self.dbname = db
|
||||||
@ -35,12 +47,20 @@ class SqliteCache:
|
|||||||
# Optimized
|
# Optimized
|
||||||
def __getitem__(self, key):
|
def __getitem__(self, key):
|
||||||
if isinstance(key, int):
|
if isinstance(key, int):
|
||||||
sql = "select blocks from pictures where rowid = ?"
|
sql = (
|
||||||
|
"select blocks, blocks2, blocks3, blocks4, blocks5, blocks6, blocks7, blocks8 "
|
||||||
|
"from pictures "
|
||||||
|
"where rowid = ?"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
sql = "select blocks from pictures where path = ?"
|
sql = (
|
||||||
result = self.con.execute(sql, [key]).fetchone()
|
"select blocks, blocks2, blocks3, blocks4, blocks5, blocks6, blocks7, blocks8 "
|
||||||
if result:
|
"from pictures "
|
||||||
result = string_to_colors(result[0])
|
"where path = ?"
|
||||||
|
)
|
||||||
|
blocks = self.con.execute(sql, [key]).fetchone()
|
||||||
|
if blocks:
|
||||||
|
result = [bytes_to_colors(block) for block in blocks]
|
||||||
return result
|
return result
|
||||||
else:
|
else:
|
||||||
raise KeyError(key)
|
raise KeyError(key)
|
||||||
@ -56,35 +76,33 @@ class SqliteCache:
|
|||||||
return result[0][0]
|
return result[0][0]
|
||||||
|
|
||||||
def __setitem__(self, path_str, blocks):
|
def __setitem__(self, path_str, blocks):
|
||||||
blocks = colors_to_string(blocks)
|
blocks = [colors_to_bytes(block) for block in blocks]
|
||||||
if op.exists(path_str):
|
if op.exists(path_str):
|
||||||
mtime = int(os.stat(path_str).st_mtime)
|
mtime = int(os.stat(path_str).st_mtime)
|
||||||
else:
|
else:
|
||||||
mtime = 0
|
mtime = 0
|
||||||
if path_str in self:
|
if path_str in self:
|
||||||
sql = "update pictures set blocks = ?, mtime = ? where path = ?"
|
sql = (
|
||||||
|
"update pictures set blocks = ?, blocks2 = ?, blocks3 = ?, blocks4 = ?, blocks5 = ?, blocks6 = ?, "
|
||||||
|
"blocks7 = ?, blocks8 = ?, mtime_ns = ?"
|
||||||
|
"where path = ?"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
sql = "insert into pictures(blocks,mtime,path) values(?,?,?)"
|
sql = (
|
||||||
|
"insert into pictures(blocks,blocks2,blocks3,blocks4,blocks5,blocks6,blocks7,blocks8,mtime_ns,path) "
|
||||||
|
"values(?,?,?,?,?,?,?,?,?,?)"
|
||||||
|
)
|
||||||
try:
|
try:
|
||||||
self.con.execute(sql, [blocks, mtime, path_str])
|
self.con.execute(sql, blocks + [mtime, path_str])
|
||||||
except sqlite.OperationalError:
|
except sqlite.OperationalError:
|
||||||
logging.warning("Picture cache could not set value for key %r", path_str)
|
logging.warning("Picture cache could not set value for key %r", path_str)
|
||||||
except sqlite.DatabaseError as e:
|
except sqlite.DatabaseError as e:
|
||||||
logging.warning("DatabaseError while setting value for key %r: %s", path_str, str(e))
|
logging.warning("DatabaseError while setting value for key %r: %s", path_str, str(e))
|
||||||
|
|
||||||
def _create_con(self, second_try=False):
|
def _create_con(self, second_try=False):
|
||||||
def create_tables():
|
|
||||||
logging.debug("Creating picture cache tables.")
|
|
||||||
self.con.execute("drop table if exists pictures")
|
|
||||||
self.con.execute("drop index if exists idx_path")
|
|
||||||
self.con.execute("create table pictures(path TEXT, mtime INTEGER, blocks TEXT)")
|
|
||||||
self.con.execute("create index idx_path on pictures (path)")
|
|
||||||
|
|
||||||
self.con = sqlite.connect(self.dbname, isolation_level=None)
|
|
||||||
try:
|
try:
|
||||||
self.con.execute("select path, mtime, blocks from pictures where 1=2")
|
self.con = sqlite.connect(self.dbname, isolation_level=None)
|
||||||
except sqlite.OperationalError: # new db
|
self._check_upgrade()
|
||||||
create_tables()
|
|
||||||
except sqlite.DatabaseError as e: # corrupted db
|
except sqlite.DatabaseError as e: # corrupted db
|
||||||
if second_try:
|
if second_try:
|
||||||
raise # Something really strange is happening
|
raise # Something really strange is happening
|
||||||
@ -93,6 +111,25 @@ class SqliteCache:
|
|||||||
os.remove(self.dbname)
|
os.remove(self.dbname)
|
||||||
self._create_con(second_try=True)
|
self._create_con(second_try=True)
|
||||||
|
|
||||||
|
def _check_upgrade(self) -> None:
|
||||||
|
with self.con as conn:
|
||||||
|
has_schema = conn.execute(
|
||||||
|
"SELECT NAME FROM sqlite_master WHERE type='table' AND name='schema_version'"
|
||||||
|
).fetchall()
|
||||||
|
version = None
|
||||||
|
if has_schema:
|
||||||
|
version = conn.execute("SELECT version FROM schema_version ORDER BY version DESC").fetchone()[0]
|
||||||
|
else:
|
||||||
|
conn.execute("CREATE TABLE schema_version (version int PRIMARY KEY, description TEXT)")
|
||||||
|
if version != self.schema_version:
|
||||||
|
conn.execute(self.drop_table_query)
|
||||||
|
conn.execute(
|
||||||
|
"INSERT OR REPLACE INTO schema_version VALUES (:version, :description)",
|
||||||
|
{"version": self.schema_version, "description": self.schema_version_description},
|
||||||
|
)
|
||||||
|
conn.execute(self.create_table_query)
|
||||||
|
conn.execute(self.create_index_query)
|
||||||
|
|
||||||
def clear(self):
|
def clear(self):
|
||||||
self.close()
|
self.close()
|
||||||
if self.dbname != ":memory:":
|
if self.dbname != ":memory:":
|
||||||
@ -118,9 +155,28 @@ class SqliteCache:
|
|||||||
raise ValueError(path)
|
raise ValueError(path)
|
||||||
|
|
||||||
def get_multiple(self, rowids):
|
def get_multiple(self, rowids):
|
||||||
sql = "select rowid, blocks from pictures where rowid in (%s)" % ",".join(map(str, rowids))
|
ids = ",".join(map(str, rowids))
|
||||||
|
sql = (
|
||||||
|
"select rowid, blocks, blocks2, blocks3, blocks4, blocks5, blocks6, blocks7, blocks8 "
|
||||||
|
f"from pictures where rowid in ({ids})"
|
||||||
|
)
|
||||||
cur = self.con.execute(sql)
|
cur = self.con.execute(sql)
|
||||||
return ((rowid, string_to_colors(blocks)) for rowid, blocks in cur)
|
return (
|
||||||
|
(
|
||||||
|
rowid,
|
||||||
|
[
|
||||||
|
bytes_to_colors(blocks),
|
||||||
|
bytes_to_colors(blocks2),
|
||||||
|
bytes_to_colors(blocks3),
|
||||||
|
bytes_to_colors(blocks4),
|
||||||
|
bytes_to_colors(blocks5),
|
||||||
|
bytes_to_colors(blocks6),
|
||||||
|
bytes_to_colors(blocks7),
|
||||||
|
bytes_to_colors(blocks8),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
for rowid, blocks, blocks2, blocks3, blocks4, blocks5, blocks6, blocks7, blocks8 in cur
|
||||||
|
)
|
||||||
|
|
||||||
def purge_outdated(self):
|
def purge_outdated(self):
|
||||||
"""Go through the cache and purge outdated records.
|
"""Go through the cache and purge outdated records.
|
||||||
@ -129,12 +185,12 @@ class SqliteCache:
|
|||||||
the db.
|
the db.
|
||||||
"""
|
"""
|
||||||
todelete = []
|
todelete = []
|
||||||
sql = "select rowid, path, mtime from pictures"
|
sql = "select rowid, path, mtime_ns from pictures"
|
||||||
cur = self.con.execute(sql)
|
cur = self.con.execute(sql)
|
||||||
for rowid, path_str, mtime in cur:
|
for rowid, path_str, mtime_ns in cur:
|
||||||
if mtime and op.exists(path_str):
|
if mtime_ns and op.exists(path_str):
|
||||||
picture_mtime = os.stat(path_str).st_mtime
|
picture_mtime = os.stat(path_str).st_mtime
|
||||||
if int(picture_mtime) <= mtime:
|
if int(picture_mtime) <= mtime_ns:
|
||||||
# not outdated
|
# not outdated
|
||||||
continue
|
continue
|
||||||
todelete.append(rowid)
|
todelete.append(rowid)
|
||||||
|
@ -1,33 +0,0 @@
|
|||||||
# Created By: Virgil Dupras
|
|
||||||
# Created On: 2014-03-15
|
|
||||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
|
||||||
#
|
|
||||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
|
||||||
# which should be included with this package. The terms are also available at
|
|
||||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
|
||||||
|
|
||||||
import plistlib
|
|
||||||
|
|
||||||
|
|
||||||
class IPhotoPlistParser(plistlib._PlistParser):
|
|
||||||
"""A parser for iPhoto plists.
|
|
||||||
|
|
||||||
iPhoto plists tend to be malformed, so we have to subclass the built-in parser to be a bit more
|
|
||||||
lenient.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
plistlib._PlistParser.__init__(self, use_builtin_types=True, dict_type=dict)
|
|
||||||
# For debugging purposes, we remember the last bit of data to be analyzed so that we can
|
|
||||||
# log it in case of an exception
|
|
||||||
self.lastdata = ""
|
|
||||||
|
|
||||||
def get_data(self):
|
|
||||||
self.lastdata = plistlib._PlistParser.get_data(self)
|
|
||||||
return self.lastdata
|
|
||||||
|
|
||||||
def end_integer(self):
|
|
||||||
try:
|
|
||||||
self.add_object(int(self.get_data()))
|
|
||||||
except ValueError:
|
|
||||||
self.add_object(0)
|
|
@ -15,7 +15,8 @@ from hscommon.trans import tr
|
|||||||
from hscommon.jobprogress import job
|
from hscommon.jobprogress import job
|
||||||
|
|
||||||
from core.engine import Match
|
from core.engine import Match
|
||||||
from .block import avgdiff, DifferentBlockCountError, NoBlocksError
|
from core.pe.block import avgdiff, DifferentBlockCountError, NoBlocksError
|
||||||
|
from core.pe.cache_sqlite import SqliteCache
|
||||||
|
|
||||||
# OPTIMIZATION NOTES:
|
# OPTIMIZATION NOTES:
|
||||||
# The bottleneck of the matching phase is CPU, which is why we use multiprocessing. However, another
|
# The bottleneck of the matching phase is CPU, which is why we use multiprocessing. However, another
|
||||||
@ -27,7 +28,7 @@ from .block import avgdiff, DifferentBlockCountError, NoBlocksError
|
|||||||
# to files in other chunks. So chunkifying doesn't save us any actual comparison, but the advantage
|
# to files in other chunks. So chunkifying doesn't save us any actual comparison, but the advantage
|
||||||
# is that instead of reading blocks from disk number_of_files**2 times, we read it
|
# is that instead of reading blocks from disk number_of_files**2 times, we read it
|
||||||
# number_of_files*number_of_chunks times.
|
# number_of_files*number_of_chunks times.
|
||||||
# Determining the right chunk size is tricky, bceause if it's too big, too many blocks will be in
|
# Determining the right chunk size is tricky, because if it's too big, too many blocks will be in
|
||||||
# memory at the same time and we might end up with memory trashing, which is awfully slow. So,
|
# memory at the same time and we might end up with memory trashing, which is awfully slow. So,
|
||||||
# because our *real* bottleneck is CPU, the chunk size must simply be enough so that the CPU isn't
|
# because our *real* bottleneck is CPU, the chunk size must simply be enough so that the CPU isn't
|
||||||
# starved by Disk IOs.
|
# starved by Disk IOs.
|
||||||
@ -50,17 +51,10 @@ except Exception:
|
|||||||
|
|
||||||
|
|
||||||
def get_cache(cache_path, readonly=False):
|
def get_cache(cache_path, readonly=False):
|
||||||
if cache_path.endswith("shelve"):
|
return SqliteCache(cache_path, readonly=readonly)
|
||||||
from .cache_shelve import ShelveCache
|
|
||||||
|
|
||||||
return ShelveCache(cache_path, readonly=readonly)
|
|
||||||
else:
|
|
||||||
from .cache_sqlite import SqliteCache
|
|
||||||
|
|
||||||
return SqliteCache(cache_path, readonly=readonly)
|
|
||||||
|
|
||||||
|
|
||||||
def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob):
|
def prepare_pictures(pictures, cache_path, with_dimensions, match_rotated, j=job.nulljob):
|
||||||
# The MemoryError handlers in there use logging without first caring about whether or not
|
# The MemoryError handlers in there use logging without first caring about whether or not
|
||||||
# there is enough memory left to carry on the operation because it is assumed that the
|
# there is enough memory left to carry on the operation because it is assumed that the
|
||||||
# MemoryError happens when trying to read an image file, which is freed from memory by the
|
# MemoryError happens when trying to read an image file, which is freed from memory by the
|
||||||
@ -78,16 +72,21 @@ def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob):
|
|||||||
# entry in iPhoto library.
|
# entry in iPhoto library.
|
||||||
logging.warning("We have a picture with a null path here")
|
logging.warning("We have a picture with a null path here")
|
||||||
continue
|
continue
|
||||||
picture.unicode_path = str(picture.path)
|
|
||||||
logging.debug("Analyzing picture at %s", picture.unicode_path)
|
logging.debug("Analyzing picture at %s", picture.unicode_path)
|
||||||
if with_dimensions:
|
if with_dimensions:
|
||||||
picture.dimensions # pre-read dimensions
|
picture.dimensions # pre-read dimensions
|
||||||
try:
|
try:
|
||||||
if picture.unicode_path not in cache:
|
if picture.unicode_path not in cache or (
|
||||||
blocks = picture.get_blocks(BLOCK_COUNT_PER_SIDE)
|
match_rotated and any(block == [] for block in cache[picture.unicode_path])
|
||||||
|
):
|
||||||
|
if match_rotated:
|
||||||
|
blocks = [picture.get_blocks(BLOCK_COUNT_PER_SIDE, orientation) for orientation in range(1, 9)]
|
||||||
|
else:
|
||||||
|
blocks = [[]] * 8
|
||||||
|
blocks[max(picture.get_orientation() - 1, 0)] = picture.get_blocks(BLOCK_COUNT_PER_SIDE)
|
||||||
cache[picture.unicode_path] = blocks
|
cache[picture.unicode_path] = blocks
|
||||||
prepared.append(picture)
|
prepared.append(picture)
|
||||||
except (IOError, ValueError) as e:
|
except (OSError, ValueError) as e:
|
||||||
logging.warning(str(e))
|
logging.warning(str(e))
|
||||||
except MemoryError:
|
except MemoryError:
|
||||||
logging.warning(
|
logging.warning(
|
||||||
@ -125,13 +124,13 @@ def get_match(first, second, percentage):
|
|||||||
return Match(first, second, percentage)
|
return Match(first, second, percentage)
|
||||||
|
|
||||||
|
|
||||||
def async_compare(ref_ids, other_ids, dbname, threshold, picinfo):
|
def async_compare(ref_ids, other_ids, dbname, threshold, picinfo, match_rotated=False):
|
||||||
# The list of ids in ref_ids have to be compared to the list of ids in other_ids. other_ids
|
# The list of ids in ref_ids have to be compared to the list of ids in other_ids. other_ids
|
||||||
# can be None. In this case, ref_ids has to be compared with itself
|
# can be None. In this case, ref_ids has to be compared with itself
|
||||||
# picinfo is a dictionary {pic_id: (dimensions, is_ref)}
|
# picinfo is a dictionary {pic_id: (dimensions, is_ref)}
|
||||||
cache = get_cache(dbname, readonly=True)
|
cache = get_cache(dbname, readonly=True)
|
||||||
limit = 100 - threshold
|
limit = 100 - threshold
|
||||||
ref_pairs = list(cache.get_multiple(ref_ids))
|
ref_pairs = list(cache.get_multiple(ref_ids)) # (rowid, [b, b2, ..., b8])
|
||||||
if other_ids is not None:
|
if other_ids is not None:
|
||||||
other_pairs = list(cache.get_multiple(other_ids))
|
other_pairs = list(cache.get_multiple(other_ids))
|
||||||
comparisons_to_do = [(r, o) for r in ref_pairs for o in other_pairs]
|
comparisons_to_do = [(r, o) for r in ref_pairs for o in other_pairs]
|
||||||
@ -144,22 +143,35 @@ def async_compare(ref_ids, other_ids, dbname, threshold, picinfo):
|
|||||||
if ref_is_ref and other_is_ref:
|
if ref_is_ref and other_is_ref:
|
||||||
continue
|
continue
|
||||||
if ref_dimensions != other_dimensions:
|
if ref_dimensions != other_dimensions:
|
||||||
continue
|
if match_rotated:
|
||||||
try:
|
rotated_ref_dimensions = (ref_dimensions[1], ref_dimensions[0])
|
||||||
diff = avgdiff(ref_blocks, other_blocks, limit, MIN_ITERATIONS)
|
if rotated_ref_dimensions != other_dimensions:
|
||||||
percentage = 100 - diff
|
continue
|
||||||
except (DifferentBlockCountError, NoBlocksError):
|
else:
|
||||||
percentage = 0
|
continue
|
||||||
if percentage >= threshold:
|
|
||||||
results.append((ref_id, other_id, percentage))
|
orientation_range = 1
|
||||||
|
if match_rotated:
|
||||||
|
orientation_range = 8
|
||||||
|
|
||||||
|
for orientation_ref in range(orientation_range):
|
||||||
|
try:
|
||||||
|
diff = avgdiff(ref_blocks[orientation_ref], other_blocks[0], limit, MIN_ITERATIONS)
|
||||||
|
percentage = 100 - diff
|
||||||
|
except (DifferentBlockCountError, NoBlocksError):
|
||||||
|
percentage = 0
|
||||||
|
if percentage >= threshold:
|
||||||
|
results.append((ref_id, other_id, percentage))
|
||||||
|
break
|
||||||
|
|
||||||
cache.close()
|
cache.close()
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
def getmatches(pictures, cache_path, threshold, match_scaled=False, j=job.nulljob):
|
def getmatches(pictures, cache_path, threshold, match_scaled=False, match_rotated=False, j=job.nulljob):
|
||||||
def get_picinfo(p):
|
def get_picinfo(p):
|
||||||
if match_scaled:
|
if match_scaled:
|
||||||
return (None, p.is_ref)
|
return ((None, None), p.is_ref)
|
||||||
else:
|
else:
|
||||||
return (p.dimensions, p.is_ref)
|
return (p.dimensions, p.is_ref)
|
||||||
|
|
||||||
@ -181,7 +193,7 @@ def getmatches(pictures, cache_path, threshold, match_scaled=False, j=job.nulljo
|
|||||||
j.set_progress(comparison_count, progress_msg)
|
j.set_progress(comparison_count, progress_msg)
|
||||||
|
|
||||||
j = j.start_subjob([3, 7])
|
j = j.start_subjob([3, 7])
|
||||||
pictures = prepare_pictures(pictures, cache_path, with_dimensions=not match_scaled, j=j)
|
pictures = prepare_pictures(pictures, cache_path, not match_scaled, match_rotated, j=j)
|
||||||
j = j.start_subjob([9, 1], tr("Preparing for matching"))
|
j = j.start_subjob([9, 1], tr("Preparing for matching"))
|
||||||
cache = get_cache(cache_path)
|
cache = get_cache(cache_path)
|
||||||
id2picture = {}
|
id2picture = {}
|
||||||
@ -211,7 +223,7 @@ def getmatches(pictures, cache_path, threshold, match_scaled=False, j=job.nulljo
|
|||||||
picinfo.update({p.cache_id: get_picinfo(p) for p in other_chunk})
|
picinfo.update({p.cache_id: get_picinfo(p) for p in other_chunk})
|
||||||
else:
|
else:
|
||||||
other_ids = None
|
other_ids = None
|
||||||
args = (ref_ids, other_ids, cache_path, threshold, picinfo)
|
args = (ref_ids, other_ids, cache_path, threshold, picinfo, match_rotated)
|
||||||
async_results.append(pool.apply_async(async_compare, args))
|
async_results.append(pool.apply_async(async_compare, args))
|
||||||
collect_results()
|
collect_results()
|
||||||
collect_results(collect_all=True)
|
collect_results(collect_all=True)
|
||||||
@ -238,7 +250,7 @@ def getmatches(pictures, cache_path, threshold, match_scaled=False, j=job.nulljo
|
|||||||
for ref_id, other_id, percentage in myiter:
|
for ref_id, other_id, percentage in myiter:
|
||||||
ref = id2picture[ref_id]
|
ref = id2picture[ref_id]
|
||||||
other = id2picture[other_id]
|
other = id2picture[other_id]
|
||||||
if percentage == 100 and ref.md5 != other.md5:
|
if percentage == 100 and ref.digest != other.digest:
|
||||||
percentage = 99
|
percentage = 99
|
||||||
if percentage >= threshold:
|
if percentage >= threshold:
|
||||||
ref.dimensions # pre-read dimensions for display in results
|
ref.dimensions # pre-read dimensions for display in results
|
||||||
|
@ -245,4 +245,4 @@ PyObject *PyInit__block(void) {
|
|||||||
PyModule_AddObject(m, "DifferentBlockCountError", DifferentBlockCountError);
|
PyModule_AddObject(m, "DifferentBlockCountError", DifferentBlockCountError);
|
||||||
|
|
||||||
return m;
|
return m;
|
||||||
}
|
}
|
||||||
|
@ -2,8 +2,8 @@
|
|||||||
* Created On: 2010-02-04
|
* Created On: 2010-02-04
|
||||||
* Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
* Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||||
*
|
*
|
||||||
* This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
* This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||||
* which should be included with this package. The terms are also available at
|
* which should be included with this package. The terms are also available at
|
||||||
* http://www.gnu.org/licenses/gpl-3.0.html
|
* http://www.gnu.org/licenses/gpl-3.0.html
|
||||||
**/
|
**/
|
||||||
|
|
||||||
@ -22,7 +22,7 @@ pystring2cfstring(PyObject *pystring)
|
|||||||
UInt8 *s;
|
UInt8 *s;
|
||||||
CFIndex size;
|
CFIndex size;
|
||||||
CFStringRef result;
|
CFStringRef result;
|
||||||
|
|
||||||
if (PyUnicode_Check(pystring)) {
|
if (PyUnicode_Check(pystring)) {
|
||||||
encoded = PyUnicode_AsUTF8String(pystring);
|
encoded = PyUnicode_AsUTF8String(pystring);
|
||||||
if (encoded == NULL) {
|
if (encoded == NULL) {
|
||||||
@ -32,7 +32,7 @@ pystring2cfstring(PyObject *pystring)
|
|||||||
encoded = pystring;
|
encoded = pystring;
|
||||||
Py_INCREF(encoded);
|
Py_INCREF(encoded);
|
||||||
}
|
}
|
||||||
|
|
||||||
s = (UInt8*)PyBytes_AS_STRING(encoded);
|
s = (UInt8*)PyBytes_AS_STRING(encoded);
|
||||||
size = PyBytes_GET_SIZE(encoded);
|
size = PyBytes_GET_SIZE(encoded);
|
||||||
result = CFStringCreateWithBytes(NULL, s, size, kCFStringEncodingUTF8, FALSE);
|
result = CFStringCreateWithBytes(NULL, s, size, kCFStringEncodingUTF8, FALSE);
|
||||||
@ -50,20 +50,20 @@ static PyObject* block_osx_get_image_size(PyObject *self, PyObject *args)
|
|||||||
long width, height;
|
long width, height;
|
||||||
PyObject *pwidth, *pheight;
|
PyObject *pwidth, *pheight;
|
||||||
PyObject *result;
|
PyObject *result;
|
||||||
|
|
||||||
width = 0;
|
width = 0;
|
||||||
height = 0;
|
height = 0;
|
||||||
if (!PyArg_ParseTuple(args, "O", &path)) {
|
if (!PyArg_ParseTuple(args, "O", &path)) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
image_path = pystring2cfstring(path);
|
image_path = pystring2cfstring(path);
|
||||||
if (image_path == NULL) {
|
if (image_path == NULL) {
|
||||||
return PyErr_NoMemory();
|
return PyErr_NoMemory();
|
||||||
}
|
}
|
||||||
image_url = CFURLCreateWithFileSystemPath(NULL, image_path, kCFURLPOSIXPathStyle, FALSE);
|
image_url = CFURLCreateWithFileSystemPath(NULL, image_path, kCFURLPOSIXPathStyle, FALSE);
|
||||||
CFRelease(image_path);
|
CFRelease(image_path);
|
||||||
|
|
||||||
source = CGImageSourceCreateWithURL(image_url, NULL);
|
source = CGImageSourceCreateWithURL(image_url, NULL);
|
||||||
CFRelease(image_url);
|
CFRelease(image_url);
|
||||||
if (source != NULL) {
|
if (source != NULL) {
|
||||||
@ -75,7 +75,7 @@ static PyObject* block_osx_get_image_size(PyObject *self, PyObject *args)
|
|||||||
}
|
}
|
||||||
CFRelease(source);
|
CFRelease(source);
|
||||||
}
|
}
|
||||||
|
|
||||||
pwidth = PyLong_FromLong(width);
|
pwidth = PyLong_FromLong(width);
|
||||||
if (pwidth == NULL) {
|
if (pwidth == NULL) {
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -91,19 +91,19 @@ static PyObject* block_osx_get_image_size(PyObject *self, PyObject *args)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static CGContextRef
|
static CGContextRef
|
||||||
MyCreateBitmapContext(int width, int height)
|
MyCreateBitmapContext(int width, int height)
|
||||||
{
|
{
|
||||||
CGContextRef context = NULL;
|
CGContextRef context = NULL;
|
||||||
CGColorSpaceRef colorSpace;
|
CGColorSpaceRef colorSpace;
|
||||||
void *bitmapData;
|
void *bitmapData;
|
||||||
int bitmapByteCount;
|
int bitmapByteCount;
|
||||||
int bitmapBytesPerRow;
|
int bitmapBytesPerRow;
|
||||||
|
|
||||||
bitmapBytesPerRow = (width * 4);
|
bitmapBytesPerRow = (width * 4);
|
||||||
bitmapByteCount = (bitmapBytesPerRow * height);
|
bitmapByteCount = (bitmapBytesPerRow * height);
|
||||||
|
|
||||||
colorSpace = CGColorSpaceCreateWithName(kCGColorSpaceGenericRGB);
|
colorSpace = CGColorSpaceCreateWithName(kCGColorSpaceGenericRGB);
|
||||||
|
|
||||||
// calloc() must be used to allocate bitmapData here because the buffer has to be zeroed.
|
// calloc() must be used to allocate bitmapData here because the buffer has to be zeroed.
|
||||||
// If it's not zeroes, when images with transparency are drawn in the context, this buffer
|
// If it's not zeroes, when images with transparency are drawn in the context, this buffer
|
||||||
// will stay with undefined pixels, which means that two pictures with the same pixels will
|
// will stay with undefined pixels, which means that two pictures with the same pixels will
|
||||||
@ -113,7 +113,7 @@ MyCreateBitmapContext(int width, int height)
|
|||||||
fprintf(stderr, "Memory not allocated!");
|
fprintf(stderr, "Memory not allocated!");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
context = CGBitmapContextCreate(bitmapData, width, height, 8, bitmapBytesPerRow, colorSpace,
|
context = CGBitmapContextCreate(bitmapData, width, height, 8, bitmapBytesPerRow, colorSpace,
|
||||||
(CGBitmapInfo)kCGImageAlphaNoneSkipLast);
|
(CGBitmapInfo)kCGImageAlphaNoneSkipLast);
|
||||||
if (context== NULL) {
|
if (context== NULL) {
|
||||||
@ -128,7 +128,7 @@ MyCreateBitmapContext(int width, int height)
|
|||||||
static PyObject* getblock(unsigned char *imageData, int imageWidth, int imageHeight, int boxX, int boxY, int boxW, int boxH)
|
static PyObject* getblock(unsigned char *imageData, int imageWidth, int imageHeight, int boxX, int boxY, int boxW, int boxH)
|
||||||
{
|
{
|
||||||
int i,j, totalR, totalG, totalB;
|
int i,j, totalR, totalG, totalB;
|
||||||
|
|
||||||
totalR = totalG = totalB = 0;
|
totalR = totalG = totalB = 0;
|
||||||
for(i=boxY; i<boxY+boxH; i++) {
|
for(i=boxY; i<boxY+boxH; i++) {
|
||||||
for(j=boxX; j<boxX+boxW; j++) {
|
for(j=boxX; j<boxX+boxW; j++) {
|
||||||
@ -142,7 +142,7 @@ static PyObject* getblock(unsigned char *imageData, int imageWidth, int imageHei
|
|||||||
totalR /= pixelCount;
|
totalR /= pixelCount;
|
||||||
totalG /= pixelCount;
|
totalG /= pixelCount;
|
||||||
totalB /= pixelCount;
|
totalB /= pixelCount;
|
||||||
|
|
||||||
return inttuple(3, totalR, totalG, totalB);
|
return inttuple(3, totalR, totalG, totalB);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -155,27 +155,27 @@ static PyObject* block_osx_getblocks(PyObject *self, PyObject *args)
|
|||||||
CGImageRef image;
|
CGImageRef image;
|
||||||
size_t width, height, image_width, image_height;
|
size_t width, height, image_width, image_height;
|
||||||
int block_count, block_width, block_height, orientation, i;
|
int block_count, block_width, block_height, orientation, i;
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "Oii", &path, &block_count, &orientation)) {
|
if (!PyArg_ParseTuple(args, "Oii", &path, &block_count, &orientation)) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (PySequence_Length(path) == 0) {
|
if (PySequence_Length(path) == 0) {
|
||||||
PyErr_SetString(PyExc_ValueError, "empty path");
|
PyErr_SetString(PyExc_ValueError, "empty path");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((orientation > 8) || (orientation < 0)) {
|
if ((orientation > 8) || (orientation < 0)) {
|
||||||
orientation = 0; // simplifies checks later since we can only have values in 0-8
|
orientation = 0; // simplifies checks later since we can only have values in 0-8
|
||||||
}
|
}
|
||||||
|
|
||||||
image_path = pystring2cfstring(path);
|
image_path = pystring2cfstring(path);
|
||||||
if (image_path == NULL) {
|
if (image_path == NULL) {
|
||||||
return PyErr_NoMemory();
|
return PyErr_NoMemory();
|
||||||
}
|
}
|
||||||
image_url = CFURLCreateWithFileSystemPath(NULL, image_path, kCFURLPOSIXPathStyle, FALSE);
|
image_url = CFURLCreateWithFileSystemPath(NULL, image_path, kCFURLPOSIXPathStyle, FALSE);
|
||||||
CFRelease(image_path);
|
CFRelease(image_path);
|
||||||
|
|
||||||
source = CGImageSourceCreateWithURL(image_url, NULL);
|
source = CGImageSourceCreateWithURL(image_url, NULL);
|
||||||
CFRelease(image_url);
|
CFRelease(image_url);
|
||||||
if (source == NULL) {
|
if (source == NULL) {
|
||||||
@ -187,8 +187,8 @@ static PyObject* block_osx_getblocks(PyObject *self, PyObject *args)
|
|||||||
CFRelease(source);
|
CFRelease(source);
|
||||||
return PyErr_NoMemory();
|
return PyErr_NoMemory();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
width = image_width = CGImageGetWidth(image);
|
width = image_width = CGImageGetWidth(image);
|
||||||
height = image_height = CGImageGetHeight(image);
|
height = image_height = CGImageGetHeight(image);
|
||||||
if (orientation >= 5) {
|
if (orientation >= 5) {
|
||||||
@ -196,9 +196,9 @@ static PyObject* block_osx_getblocks(PyObject *self, PyObject *args)
|
|||||||
width = image_height;
|
width = image_height;
|
||||||
height = image_width;
|
height = image_width;
|
||||||
}
|
}
|
||||||
|
|
||||||
CGContextRef context = MyCreateBitmapContext(width, height);
|
CGContextRef context = MyCreateBitmapContext(width, height);
|
||||||
|
|
||||||
if (orientation == 2) {
|
if (orientation == 2) {
|
||||||
// Flip X
|
// Flip X
|
||||||
CGContextTranslateCTM(context, width, 0);
|
CGContextTranslateCTM(context, width, 0);
|
||||||
@ -207,7 +207,7 @@ static PyObject* block_osx_getblocks(PyObject *self, PyObject *args)
|
|||||||
else if (orientation == 3) {
|
else if (orientation == 3) {
|
||||||
// Rot 180
|
// Rot 180
|
||||||
CGContextTranslateCTM(context, width, height);
|
CGContextTranslateCTM(context, width, height);
|
||||||
CGContextRotateCTM(context, RADIANS(180));
|
CGContextRotateCTM(context, RADIANS(180));
|
||||||
}
|
}
|
||||||
else if (orientation == 4) {
|
else if (orientation == 4) {
|
||||||
// Flip Y
|
// Flip Y
|
||||||
@ -242,21 +242,21 @@ static PyObject* block_osx_getblocks(PyObject *self, PyObject *args)
|
|||||||
CGContextDrawImage(context, myBoundingBox, image);
|
CGContextDrawImage(context, myBoundingBox, image);
|
||||||
unsigned char *bitmapData = CGBitmapContextGetData(context);
|
unsigned char *bitmapData = CGBitmapContextGetData(context);
|
||||||
CGContextRelease(context);
|
CGContextRelease(context);
|
||||||
|
|
||||||
CGImageRelease(image);
|
CGImageRelease(image);
|
||||||
CFRelease(source);
|
CFRelease(source);
|
||||||
if (bitmapData == NULL) {
|
if (bitmapData == NULL) {
|
||||||
return PyErr_NoMemory();
|
return PyErr_NoMemory();
|
||||||
}
|
}
|
||||||
|
|
||||||
block_width = max(width/block_count, 1);
|
block_width = max(width/block_count, 1);
|
||||||
block_height = max(height/block_count, 1);
|
block_height = max(height/block_count, 1);
|
||||||
|
|
||||||
result = PyList_New(block_count * block_count);
|
result = PyList_New(block_count * block_count);
|
||||||
if (result == NULL) {
|
if (result == NULL) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
for(i=0; i<block_count; i++) {
|
for(i=0; i<block_count; i++) {
|
||||||
int j, top;
|
int j, top;
|
||||||
top = min(i*block_height, height-block_height);
|
top = min(i*block_height, height-block_height);
|
||||||
@ -271,8 +271,8 @@ static PyObject* block_osx_getblocks(PyObject *self, PyObject *args)
|
|||||||
PyList_SET_ITEM(result, i*block_count+j, block);
|
PyList_SET_ITEM(result, i*block_count+j, block);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
free(bitmapData);
|
free(bitmapData);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -302,4 +302,4 @@ PyInit__block_osx(void)
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
return m;
|
return m;
|
||||||
}
|
}
|
||||||
|
@ -2,94 +2,68 @@
|
|||||||
* Created On: 2010-01-30
|
* Created On: 2010-01-30
|
||||||
* Copyright 2014 Hardcoded Software (http://www.hardcoded.net)
|
* Copyright 2014 Hardcoded Software (http://www.hardcoded.net)
|
||||||
*
|
*
|
||||||
* This software is licensed under the "BSD" License as described in the "LICENSE" file,
|
* This software is licensed under the "BSD" License as described in the
|
||||||
* which should be included with this package. The terms are also available at
|
* "LICENSE" file, which should be included with this package. The terms are
|
||||||
* http://www.hardcoded.net/licenses/bsd_license
|
* also available at http://www.hardcoded.net/licenses/bsd_license
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
/* I know that there strtol out there, but it requires a pointer to
|
static PyObject *cache_bytes_to_colors(PyObject *self, PyObject *args) {
|
||||||
* a char, which would in turn require me to buffer my chars around,
|
char *y;
|
||||||
* making the whole process slower.
|
Py_ssize_t char_count, i, color_count;
|
||||||
*/
|
PyObject *result;
|
||||||
static long
|
unsigned long r, g, b;
|
||||||
xchar_to_long(char c)
|
Py_ssize_t ci;
|
||||||
{
|
PyObject *color_tuple;
|
||||||
if ((c >= 48) && (c <= 57)) { /* 0-9 */
|
|
||||||
return c - 48;
|
|
||||||
}
|
|
||||||
else if ((c >= 65) && (c <= 70)) { /* A-F */
|
|
||||||
return c - 55;
|
|
||||||
}
|
|
||||||
else if ((c >= 97) && (c <= 102)) { /* a-f */
|
|
||||||
return c - 87;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static PyObject*
|
if (!PyArg_ParseTuple(args, "y#", &y, &char_count)) {
|
||||||
cache_string_to_colors(PyObject *self, PyObject *args)
|
return NULL;
|
||||||
{
|
}
|
||||||
char *s;
|
|
||||||
Py_ssize_t char_count, color_count, i;
|
color_count = char_count / 3;
|
||||||
PyObject *result;
|
result = PyList_New(color_count);
|
||||||
|
if (result == NULL) {
|
||||||
if (!PyArg_ParseTuple(args, "s#", &s, &char_count)) {
|
return NULL;
|
||||||
return NULL;
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < color_count; i++) {
|
||||||
|
ci = i * 3;
|
||||||
|
r = (unsigned char)y[ci];
|
||||||
|
g = (unsigned char)y[ci + 1];
|
||||||
|
b = (unsigned char)y[ci + 2];
|
||||||
|
|
||||||
|
color_tuple = inttuple(3, r, g, b);
|
||||||
|
if (color_tuple == NULL) {
|
||||||
|
Py_DECREF(result);
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
PyList_SET_ITEM(result, i, color_tuple);
|
||||||
color_count = (char_count / 6);
|
}
|
||||||
result = PyList_New(color_count);
|
|
||||||
if (result == NULL) {
|
return result;
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i=0; i<color_count; i++) {
|
|
||||||
long r, g, b;
|
|
||||||
Py_ssize_t ci;
|
|
||||||
PyObject *color_tuple;
|
|
||||||
|
|
||||||
ci = i * 6;
|
|
||||||
r = (xchar_to_long(s[ci]) << 4) + xchar_to_long(s[ci+1]);
|
|
||||||
g = (xchar_to_long(s[ci+2]) << 4) + xchar_to_long(s[ci+3]);
|
|
||||||
b = (xchar_to_long(s[ci+4]) << 4) + xchar_to_long(s[ci+5]);
|
|
||||||
|
|
||||||
color_tuple = inttuple(3, r, g, b);
|
|
||||||
if (color_tuple == NULL) {
|
|
||||||
Py_DECREF(result);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
PyList_SET_ITEM(result, i, color_tuple);
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyMethodDef CacheMethods[] = {
|
static PyMethodDef CacheMethods[] = {
|
||||||
{"string_to_colors", cache_string_to_colors, METH_VARARGS,
|
{"bytes_to_colors", cache_bytes_to_colors, METH_VARARGS,
|
||||||
"Transform the string 's' in a list of 3 sized tuples."},
|
"Transform the bytes 's' into a list of 3 sized tuples."},
|
||||||
{NULL, NULL, 0, NULL} /* Sentinel */
|
{NULL, NULL, 0, NULL} /* Sentinel */
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct PyModuleDef CacheDef = {
|
static struct PyModuleDef CacheDef = {PyModuleDef_HEAD_INIT,
|
||||||
PyModuleDef_HEAD_INIT,
|
"_cache",
|
||||||
"_cache",
|
NULL,
|
||||||
NULL,
|
-1,
|
||||||
-1,
|
CacheMethods,
|
||||||
CacheMethods,
|
NULL,
|
||||||
NULL,
|
NULL,
|
||||||
NULL,
|
NULL,
|
||||||
NULL,
|
NULL};
|
||||||
NULL
|
|
||||||
};
|
|
||||||
|
|
||||||
PyObject *
|
PyObject *PyInit__cache(void) {
|
||||||
PyInit__cache(void)
|
PyObject *m = PyModule_Create(&CacheDef);
|
||||||
{
|
if (m == NULL) {
|
||||||
PyObject *m = PyModule_Create(&CacheDef);
|
return NULL;
|
||||||
if (m == NULL) {
|
}
|
||||||
return NULL;
|
return m;
|
||||||
}
|
}
|
||||||
return m;
|
|
||||||
}
|
|
||||||
|
@ -2,8 +2,8 @@
|
|||||||
* Created On: 2010-02-04
|
* Created On: 2010-02-04
|
||||||
* Copyright 2014 Hardcoded Software (http://www.hardcoded.net)
|
* Copyright 2014 Hardcoded Software (http://www.hardcoded.net)
|
||||||
*
|
*
|
||||||
* This software is licensed under the "BSD" License as described in the "LICENSE" file,
|
* This software is licensed under the "BSD" License as described in the "LICENSE" file,
|
||||||
* which should be included with this package. The terms are also available at
|
* which should be included with this package. The terms are also available at
|
||||||
* http://www.hardcoded.net/licenses/bsd_license
|
* http://www.hardcoded.net/licenses/bsd_license
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -27,19 +27,19 @@ PyObject* inttuple(int n, ...)
|
|||||||
PyObject *pnumber;
|
PyObject *pnumber;
|
||||||
PyObject *result;
|
PyObject *result;
|
||||||
va_list numbers;
|
va_list numbers;
|
||||||
|
|
||||||
va_start(numbers, n);
|
va_start(numbers, n);
|
||||||
result = PyTuple_New(n);
|
result = PyTuple_New(n);
|
||||||
|
|
||||||
for (i=0; i<n; i++) {
|
for (i=0; i<n; i++) {
|
||||||
pnumber = PyLong_FromLong(va_arg(numbers, long));
|
pnumber = PyLong_FromUnsignedLong(va_arg(numbers, long));
|
||||||
if (pnumber == NULL) {
|
if (pnumber == NULL) {
|
||||||
Py_DECREF(result);
|
Py_DECREF(result);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
PyTuple_SET_ITEM(result, i, pnumber);
|
PyTuple_SET_ITEM(result, i, pnumber);
|
||||||
}
|
}
|
||||||
|
|
||||||
va_end(numbers);
|
va_end(numbers);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@ -2,8 +2,8 @@
|
|||||||
* Created On: 2010-02-04
|
* Created On: 2010-02-04
|
||||||
* Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
* Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||||
*
|
*
|
||||||
* This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
* This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||||
* which should be included with this package. The terms are also available at
|
* which should be included with this package. The terms are also available at
|
||||||
* http://www.gnu.org/licenses/gpl-3.0.html
|
* http://www.gnu.org/licenses/gpl-3.0.html
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -17,4 +17,4 @@ int min(int a, int b);
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Create a tuple out of an array of integers. */
|
/* Create a tuple out of an array of integers. */
|
||||||
PyObject* inttuple(int n, ...);
|
PyObject* inttuple(int n, ...);
|
||||||
|
@ -9,7 +9,7 @@ from hscommon.util import get_file_ext, format_size
|
|||||||
|
|
||||||
from core.util import format_timestamp, format_perc, format_dupe_count
|
from core.util import format_timestamp, format_perc, format_dupe_count
|
||||||
from core import fs
|
from core import fs
|
||||||
from . import exif
|
from core.pe import exif
|
||||||
|
|
||||||
# This global value is set by the platform-specific subclasser of the Photo base class
|
# This global value is set by the platform-specific subclasser of the Photo base class
|
||||||
PLAT_SPECIFIC_PHOTO_CLASS = None
|
PLAT_SPECIFIC_PHOTO_CLASS = None
|
||||||
@ -29,7 +29,7 @@ class Photo(fs.File):
|
|||||||
__slots__ = fs.File.__slots__ + tuple(INITIAL_INFO.keys())
|
__slots__ = fs.File.__slots__ + tuple(INITIAL_INFO.keys())
|
||||||
|
|
||||||
# These extensions are supported on all platforms
|
# These extensions are supported on all platforms
|
||||||
HANDLED_EXTS = {"png", "jpg", "jpeg", "gif", "bmp", "tiff", "tif"}
|
HANDLED_EXTS = {"png", "jpg", "jpeg", "gif", "bmp", "tiff", "tif", "webp"}
|
||||||
|
|
||||||
def _plat_get_dimensions(self):
|
def _plat_get_dimensions(self):
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
@ -37,7 +37,7 @@ class Photo(fs.File):
|
|||||||
def _plat_get_blocks(self, block_count_per_side, orientation):
|
def _plat_get_blocks(self, block_count_per_side, orientation):
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
def _get_orientation(self):
|
def get_orientation(self):
|
||||||
if not hasattr(self, "_cached_orientation"):
|
if not hasattr(self, "_cached_orientation"):
|
||||||
try:
|
try:
|
||||||
with self.path.open("rb") as fp:
|
with self.path.open("rb") as fp:
|
||||||
@ -95,10 +95,13 @@ class Photo(fs.File):
|
|||||||
fs.File._read_info(self, field)
|
fs.File._read_info(self, field)
|
||||||
if field == "dimensions":
|
if field == "dimensions":
|
||||||
self.dimensions = self._plat_get_dimensions()
|
self.dimensions = self._plat_get_dimensions()
|
||||||
if self._get_orientation() in {5, 6, 7, 8}:
|
if self.get_orientation() in {5, 6, 7, 8}:
|
||||||
self.dimensions = (self.dimensions[1], self.dimensions[0])
|
self.dimensions = (self.dimensions[1], self.dimensions[0])
|
||||||
elif field == "exif_timestamp":
|
elif field == "exif_timestamp":
|
||||||
self.exif_timestamp = self._get_exif_timestamp()
|
self.exif_timestamp = self._get_exif_timestamp()
|
||||||
|
|
||||||
def get_blocks(self, block_count_per_side):
|
def get_blocks(self, block_count_per_side, orientation: int = None):
|
||||||
return self._plat_get_blocks(block_count_per_side, self._get_orientation())
|
if orientation is None:
|
||||||
|
return self._plat_get_blocks(block_count_per_side, self.get_orientation())
|
||||||
|
else:
|
||||||
|
return self._plat_get_blocks(block_count_per_side, orientation)
|
||||||
|
@ -8,12 +8,13 @@ from hscommon.trans import tr
|
|||||||
|
|
||||||
from core.scanner import Scanner, ScanType, ScanOption
|
from core.scanner import Scanner, ScanType, ScanOption
|
||||||
|
|
||||||
from . import matchblock, matchexif
|
from core.pe import matchblock, matchexif
|
||||||
|
|
||||||
|
|
||||||
class ScannerPE(Scanner):
|
class ScannerPE(Scanner):
|
||||||
cache_path = None
|
cache_path = None
|
||||||
match_scaled = False
|
match_scaled = False
|
||||||
|
match_rotated = False
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_scan_options():
|
def get_scan_options():
|
||||||
@ -29,6 +30,7 @@ class ScannerPE(Scanner):
|
|||||||
cache_path=self.cache_path,
|
cache_path=self.cache_path,
|
||||||
threshold=self.min_match_percentage,
|
threshold=self.min_match_percentage,
|
||||||
match_scaled=self.match_scaled,
|
match_scaled=self.match_scaled,
|
||||||
|
match_rotated=self.match_rotated,
|
||||||
j=j,
|
j=j,
|
||||||
)
|
)
|
||||||
elif self.scan_type == ScanType.EXIFTIMESTAMP:
|
elif self.scan_type == ScanType.EXIFTIMESTAMP:
|
||||||
|
@ -43,7 +43,7 @@ class Criterion:
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def display(self):
|
def display(self):
|
||||||
return "{} ({})".format(self.category.NAME, self.display_value)
|
return f"{self.category.NAME} ({self.display_value})"
|
||||||
|
|
||||||
|
|
||||||
class ValueListCategory(CriterionCategory):
|
class ValueListCategory(CriterionCategory):
|
||||||
@ -82,10 +82,12 @@ class FolderCategory(ValueListCategory):
|
|||||||
|
|
||||||
def sort_key(self, dupe, crit_value):
|
def sort_key(self, dupe, crit_value):
|
||||||
value = self.extract_value(dupe)
|
value = self.extract_value(dupe)
|
||||||
if value[: len(crit_value)] == crit_value:
|
# This is instead of using is_relative_to() which was added in py 3.9
|
||||||
return 0
|
try:
|
||||||
else:
|
value.relative_to(crit_value)
|
||||||
|
except ValueError:
|
||||||
return 1
|
return 1
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
class FilenameCategory(CriterionCategory):
|
class FilenameCategory(CriterionCategory):
|
||||||
@ -94,6 +96,8 @@ class FilenameCategory(CriterionCategory):
|
|||||||
DOESNT_END_WITH_NUMBER = 1
|
DOESNT_END_WITH_NUMBER = 1
|
||||||
LONGEST = 2
|
LONGEST = 2
|
||||||
SHORTEST = 3
|
SHORTEST = 3
|
||||||
|
LONGEST_PATH = 4
|
||||||
|
SHORTEST_PATH = 5
|
||||||
|
|
||||||
def format_criterion_value(self, value):
|
def format_criterion_value(self, value):
|
||||||
return {
|
return {
|
||||||
@ -101,6 +105,8 @@ class FilenameCategory(CriterionCategory):
|
|||||||
self.DOESNT_END_WITH_NUMBER: tr("Doesn't end with number"),
|
self.DOESNT_END_WITH_NUMBER: tr("Doesn't end with number"),
|
||||||
self.LONGEST: tr("Longest"),
|
self.LONGEST: tr("Longest"),
|
||||||
self.SHORTEST: tr("Shortest"),
|
self.SHORTEST: tr("Shortest"),
|
||||||
|
self.LONGEST_PATH: tr("Longest Path"),
|
||||||
|
self.SHORTEST_PATH: tr("Shortest Path"),
|
||||||
}[value]
|
}[value]
|
||||||
|
|
||||||
def extract_value(self, dupe):
|
def extract_value(self, dupe):
|
||||||
@ -114,6 +120,10 @@ class FilenameCategory(CriterionCategory):
|
|||||||
return 0 if ends_with_digit else 1
|
return 0 if ends_with_digit else 1
|
||||||
else:
|
else:
|
||||||
return 1 if ends_with_digit else 0
|
return 1 if ends_with_digit else 0
|
||||||
|
elif crit_value == self.LONGEST_PATH:
|
||||||
|
return len(str(dupe.folder_path)) * -1
|
||||||
|
elif crit_value == self.SHORTEST_PATH:
|
||||||
|
return len(str(dupe.folder_path))
|
||||||
else:
|
else:
|
||||||
value = len(value)
|
value = len(value)
|
||||||
if crit_value == self.LONGEST:
|
if crit_value == self.LONGEST:
|
||||||
@ -128,6 +138,8 @@ class FilenameCategory(CriterionCategory):
|
|||||||
self.DOESNT_END_WITH_NUMBER,
|
self.DOESNT_END_WITH_NUMBER,
|
||||||
self.LONGEST,
|
self.LONGEST,
|
||||||
self.SHORTEST,
|
self.SHORTEST,
|
||||||
|
self.LONGEST_PATH,
|
||||||
|
self.SHORTEST_PATH,
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -10,6 +10,7 @@ import logging
|
|||||||
import re
|
import re
|
||||||
import os
|
import os
|
||||||
import os.path as op
|
import os.path as op
|
||||||
|
from errno import EISDIR, EACCES
|
||||||
from xml.etree import ElementTree as ET
|
from xml.etree import ElementTree as ET
|
||||||
|
|
||||||
from hscommon.jobprogress.job import nulljob
|
from hscommon.jobprogress.job import nulljob
|
||||||
@ -17,8 +18,8 @@ from hscommon.conflict import get_conflicted_name
|
|||||||
from hscommon.util import flatten, nonone, FileOrPath, format_size
|
from hscommon.util import flatten, nonone, FileOrPath, format_size
|
||||||
from hscommon.trans import tr
|
from hscommon.trans import tr
|
||||||
|
|
||||||
from . import engine
|
from core import engine
|
||||||
from .markable import Markable
|
from core.markable import Markable
|
||||||
|
|
||||||
|
|
||||||
class Results(Markable):
|
class Results(Markable):
|
||||||
@ -191,7 +192,7 @@ class Results(Markable):
|
|||||||
self.__filters.append(filter_str)
|
self.__filters.append(filter_str)
|
||||||
if self.__filtered_dupes is None:
|
if self.__filtered_dupes is None:
|
||||||
self.__filtered_dupes = flatten(g[:] for g in self.groups)
|
self.__filtered_dupes = flatten(g[:] for g in self.groups)
|
||||||
self.__filtered_dupes = set(dupe for dupe in self.__filtered_dupes if filter_re.search(str(dupe.path)))
|
self.__filtered_dupes = {dupe for dupe in self.__filtered_dupes if filter_re.search(str(dupe.path))}
|
||||||
filtered_groups = set()
|
filtered_groups = set()
|
||||||
for dupe in self.__filtered_dupes:
|
for dupe in self.__filtered_dupes:
|
||||||
filtered_groups.add(self.get_group_of_duplicate(dupe))
|
filtered_groups.add(self.get_group_of_duplicate(dupe))
|
||||||
@ -301,7 +302,7 @@ class Results(Markable):
|
|||||||
try:
|
try:
|
||||||
func(dupe)
|
func(dupe)
|
||||||
to_remove.append(dupe)
|
to_remove.append(dupe)
|
||||||
except (EnvironmentError, UnicodeEncodeError) as e:
|
except (OSError, UnicodeEncodeError) as e:
|
||||||
self.problems.append((dupe, str(e)))
|
self.problems.append((dupe, str(e)))
|
||||||
if remove_from_results:
|
if remove_from_results:
|
||||||
self.remove_duplicates(to_remove)
|
self.remove_duplicates(to_remove)
|
||||||
@ -374,10 +375,10 @@ class Results(Markable):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
do_write(outfile)
|
do_write(outfile)
|
||||||
except IOError as e:
|
except OSError as e:
|
||||||
# If our IOError is because dest is already a directory, we want to handle that. 21 is
|
# If our OSError is because dest is already a directory, we want to handle that. 21 is
|
||||||
# the code we get on OS X and Linux, 13 is what we get on Windows.
|
# the code we get on OS X and Linux (EISDIR), 13 is what we get on Windows (EACCES).
|
||||||
if e.errno in {21, 13}:
|
if e.errno in (EISDIR, EACCES):
|
||||||
p = str(outfile)
|
p = str(outfile)
|
||||||
dirname, basename = op.split(p)
|
dirname, basename = op.split(p)
|
||||||
otherfiles = os.listdir(dirname)
|
otherfiles = os.listdir(dirname)
|
||||||
|
@ -13,7 +13,7 @@ from hscommon.jobprogress import job
|
|||||||
from hscommon.util import dedupe, rem_file_ext, get_file_ext
|
from hscommon.util import dedupe, rem_file_ext, get_file_ext
|
||||||
from hscommon.trans import tr
|
from hscommon.trans import tr
|
||||||
|
|
||||||
from . import engine
|
from core import engine
|
||||||
|
|
||||||
# It's quite ugly to have scan types from all editions all put in the same class, but because there's
|
# It's quite ugly to have scan types from all editions all put in the same class, but because there's
|
||||||
# there will be some nasty bugs popping up (ScanType is used in core when in should exclusively be
|
# there will be some nasty bugs popping up (ScanType is used in core when in should exclusively be
|
||||||
@ -87,8 +87,6 @@ class Scanner:
|
|||||||
}
|
}
|
||||||
):
|
):
|
||||||
j = j.start_subjob([2, 8])
|
j = j.start_subjob([2, 8])
|
||||||
for f in j.iter_with_progress(files, tr("Read size of %d/%d files")):
|
|
||||||
f.size # pre-read, makes a smoother progress if read here (especially for bundles)
|
|
||||||
if self.size_threshold:
|
if self.size_threshold:
|
||||||
files = [f for f in files if f.size >= self.size_threshold]
|
files = [f for f in files if f.size >= self.size_threshold]
|
||||||
if self.large_size_threshold:
|
if self.large_size_threshold:
|
||||||
@ -134,7 +132,7 @@ class Scanner:
|
|||||||
return False
|
return False
|
||||||
if is_same_with_digit(refname, dupename):
|
if is_same_with_digit(refname, dupename):
|
||||||
return True
|
return True
|
||||||
return len(dupe.path) > len(ref.path)
|
return len(dupe.path.parts) > len(ref.path.parts)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_scan_options():
|
def get_scan_options():
|
||||||
@ -164,15 +162,18 @@ class Scanner:
|
|||||||
toremove = set()
|
toremove = set()
|
||||||
last_parent_path = sortedpaths[0]
|
last_parent_path = sortedpaths[0]
|
||||||
for p in sortedpaths[1:]:
|
for p in sortedpaths[1:]:
|
||||||
if p in last_parent_path:
|
if last_parent_path in p.parents:
|
||||||
toremove.add(p)
|
toremove.add(p)
|
||||||
else:
|
else:
|
||||||
last_parent_path = p
|
last_parent_path = p
|
||||||
matches = [m for m in matches if m.first.path not in toremove or m.second.path not in toremove]
|
matches = [m for m in matches if m.first.path not in toremove or m.second.path not in toremove]
|
||||||
if not self.mix_file_kind:
|
if not self.mix_file_kind:
|
||||||
matches = [m for m in matches if get_file_ext(m.first.name) == get_file_ext(m.second.name)]
|
matches = [m for m in matches if get_file_ext(m.first.name) == get_file_ext(m.second.name)]
|
||||||
matches = [m for m in matches if m.first.path.exists() and m.second.path.exists()]
|
if self.include_exists_check:
|
||||||
matches = [m for m in matches if not (m.first.is_ref and m.second.is_ref)]
|
matches = [m for m in matches if m.first.exists() and m.second.exists()]
|
||||||
|
# Contents already handles ref checks, other scan types might not catch during scan
|
||||||
|
if self.scan_type != ScanType.CONTENTS:
|
||||||
|
matches = [m for m in matches if not (m.first.is_ref and m.second.is_ref)]
|
||||||
if ignore_list:
|
if ignore_list:
|
||||||
matches = [m for m in matches if not ignore_list.are_ignored(str(m.first.path), str(m.second.path))]
|
matches = [m for m in matches if not ignore_list.are_ignored(str(m.first.path), str(m.second.path))]
|
||||||
logging.info("Grouping matches")
|
logging.info("Grouping matches")
|
||||||
@ -212,3 +213,4 @@ class Scanner:
|
|||||||
large_size_threshold = 0
|
large_size_threshold = 0
|
||||||
big_file_size_threshold = 0
|
big_file_size_threshold = 0
|
||||||
word_weighting = False
|
word_weighting = False
|
||||||
|
include_exists_check = True
|
||||||
|
@ -1 +1 @@
|
|||||||
from . import fs, result_table, scanner # noqa
|
from core.se import fs, result_table, scanner # noqa
|
||||||
|
@ -7,18 +7,19 @@
|
|||||||
import os
|
import os
|
||||||
import os.path as op
|
import os.path as op
|
||||||
import logging
|
import logging
|
||||||
|
import tempfile
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from hscommon.path import Path
|
from pathlib import Path
|
||||||
import hscommon.conflict
|
import hscommon.conflict
|
||||||
import hscommon.util
|
import hscommon.util
|
||||||
from hscommon.testutil import eq_, log_calls
|
from hscommon.testutil import eq_, log_calls
|
||||||
from hscommon.jobprogress.job import Job
|
from hscommon.jobprogress.job import Job
|
||||||
|
|
||||||
from .base import TestApp
|
from core.tests.base import TestApp
|
||||||
from .results_test import GetTestGroups
|
from core.tests.results_test import GetTestGroups
|
||||||
from .. import app, fs, engine
|
from core import app, fs, engine
|
||||||
from ..scanner import ScanType
|
from core.scanner import ScanType
|
||||||
|
|
||||||
|
|
||||||
def add_fake_files_to_directories(directories, files):
|
def add_fake_files_to_directories(directories, files):
|
||||||
@ -56,7 +57,7 @@ class TestCaseDupeGuru:
|
|||||||
# for this unit is pathetic. What's done is done. My approach now is to add tests for
|
# for this unit is pathetic. What's done is done. My approach now is to add tests for
|
||||||
# every change I want to make. The blowup was caused by a missing import.
|
# every change I want to make. The blowup was caused by a missing import.
|
||||||
p = Path(str(tmpdir))
|
p = Path(str(tmpdir))
|
||||||
p["foo"].open("w").close()
|
p.joinpath("foo").touch()
|
||||||
monkeypatch.setattr(
|
monkeypatch.setattr(
|
||||||
hscommon.conflict,
|
hscommon.conflict,
|
||||||
"smart_copy",
|
"smart_copy",
|
||||||
@ -68,22 +69,23 @@ class TestCaseDupeGuru:
|
|||||||
dgapp = TestApp().app
|
dgapp = TestApp().app
|
||||||
dgapp.directories.add_path(p)
|
dgapp.directories.add_path(p)
|
||||||
[f] = dgapp.directories.get_files()
|
[f] = dgapp.directories.get_files()
|
||||||
dgapp.copy_or_move(f, True, "some_destination", 0)
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||||
eq_(1, len(hscommon.conflict.smart_copy.calls))
|
dgapp.copy_or_move(f, True, tmp_dir, 0)
|
||||||
call = hscommon.conflict.smart_copy.calls[0]
|
eq_(1, len(hscommon.conflict.smart_copy.calls))
|
||||||
eq_(call["dest_path"], op.join("some_destination", "foo"))
|
call = hscommon.conflict.smart_copy.calls[0]
|
||||||
eq_(call["source_path"], f.path)
|
eq_(call["dest_path"], Path(tmp_dir, "foo"))
|
||||||
|
eq_(call["source_path"], f.path)
|
||||||
|
|
||||||
def test_copy_or_move_clean_empty_dirs(self, tmpdir, monkeypatch):
|
def test_copy_or_move_clean_empty_dirs(self, tmpdir, monkeypatch):
|
||||||
tmppath = Path(str(tmpdir))
|
tmppath = Path(str(tmpdir))
|
||||||
sourcepath = tmppath["source"]
|
sourcepath = tmppath.joinpath("source")
|
||||||
sourcepath.mkdir()
|
sourcepath.mkdir()
|
||||||
sourcepath["myfile"].open("w")
|
sourcepath.joinpath("myfile").touch()
|
||||||
app = TestApp().app
|
app = TestApp().app
|
||||||
app.directories.add_path(tmppath)
|
app.directories.add_path(tmppath)
|
||||||
[myfile] = app.directories.get_files()
|
[myfile] = app.directories.get_files()
|
||||||
monkeypatch.setattr(app, "clean_empty_dirs", log_calls(lambda path: None))
|
monkeypatch.setattr(app, "clean_empty_dirs", log_calls(lambda path: None))
|
||||||
app.copy_or_move(myfile, False, tmppath["dest"], 0)
|
app.copy_or_move(myfile, False, tmppath.joinpath("dest"), 0)
|
||||||
calls = app.clean_empty_dirs.calls
|
calls = app.clean_empty_dirs.calls
|
||||||
eq_(1, len(calls))
|
eq_(1, len(calls))
|
||||||
eq_(sourcepath, calls[0]["path"])
|
eq_(sourcepath, calls[0]["path"])
|
||||||
@ -95,7 +97,7 @@ class TestCaseDupeGuru:
|
|||||||
|
|
||||||
# At some point, any() was used in a wrong way that made Scan() wrongly return 1
|
# At some point, any() was used in a wrong way that made Scan() wrongly return 1
|
||||||
app = TestApp().app
|
app = TestApp().app
|
||||||
f1, f2 = [FakeFile("foo") for _ in range(2)]
|
f1, f2 = (FakeFile("foo") for _ in range(2))
|
||||||
f1.is_ref, f2.is_ref = (False, False)
|
f1.is_ref, f2.is_ref = (False, False)
|
||||||
assert not (bool(f1) and bool(f2))
|
assert not (bool(f1) and bool(f2))
|
||||||
add_fake_files_to_directories(app.directories, [f1, f2])
|
add_fake_files_to_directories(app.directories, [f1, f2])
|
||||||
@ -106,8 +108,8 @@ class TestCaseDupeGuru:
|
|||||||
# If the ignore_hardlink_matches option is set, don't match files hardlinking to the same
|
# If the ignore_hardlink_matches option is set, don't match files hardlinking to the same
|
||||||
# inode.
|
# inode.
|
||||||
tmppath = Path(str(tmpdir))
|
tmppath = Path(str(tmpdir))
|
||||||
tmppath["myfile"].open("w").write("foo")
|
tmppath.joinpath("myfile").open("wt").write("foo")
|
||||||
os.link(str(tmppath["myfile"]), str(tmppath["hardlink"]))
|
os.link(str(tmppath.joinpath("myfile")), str(tmppath.joinpath("hardlink")))
|
||||||
app = TestApp().app
|
app = TestApp().app
|
||||||
app.directories.add_path(tmppath)
|
app.directories.add_path(tmppath)
|
||||||
app.options["scan_type"] = ScanType.CONTENTS
|
app.options["scan_type"] = ScanType.CONTENTS
|
||||||
@ -153,7 +155,7 @@ class TestCaseDupeGuruCleanEmptyDirs:
|
|||||||
# delete_if_empty must be recursively called up in the path until it returns False
|
# delete_if_empty must be recursively called up in the path until it returns False
|
||||||
@log_calls
|
@log_calls
|
||||||
def mock_delete_if_empty(path, files_to_delete=[]):
|
def mock_delete_if_empty(path, files_to_delete=[]):
|
||||||
return len(path) > 1
|
return len(path.parts) > 1
|
||||||
|
|
||||||
monkeypatch.setattr(hscommon.util, "delete_if_empty", mock_delete_if_empty)
|
monkeypatch.setattr(hscommon.util, "delete_if_empty", mock_delete_if_empty)
|
||||||
# XXX This monkeypatch is temporary. will be fixed in a better monkeypatcher.
|
# XXX This monkeypatch is temporary. will be fixed in a better monkeypatcher.
|
||||||
@ -180,8 +182,8 @@ class TestCaseDupeGuruWithResults:
|
|||||||
self.rtable.refresh()
|
self.rtable.refresh()
|
||||||
tmpdir = request.getfixturevalue("tmpdir")
|
tmpdir = request.getfixturevalue("tmpdir")
|
||||||
tmppath = Path(str(tmpdir))
|
tmppath = Path(str(tmpdir))
|
||||||
tmppath["foo"].mkdir()
|
tmppath.joinpath("foo").mkdir()
|
||||||
tmppath["bar"].mkdir()
|
tmppath.joinpath("bar").mkdir()
|
||||||
self.app.directories.add_path(tmppath)
|
self.app.directories.add_path(tmppath)
|
||||||
|
|
||||||
def test_get_objects(self, do_setup):
|
def test_get_objects(self, do_setup):
|
||||||
@ -424,12 +426,9 @@ class TestCaseDupeGuruRenameSelected:
|
|||||||
def do_setup(self, request):
|
def do_setup(self, request):
|
||||||
tmpdir = request.getfixturevalue("tmpdir")
|
tmpdir = request.getfixturevalue("tmpdir")
|
||||||
p = Path(str(tmpdir))
|
p = Path(str(tmpdir))
|
||||||
fp = open(str(p["foo bar 1"]), mode="w")
|
p.joinpath("foo bar 1").touch()
|
||||||
fp.close()
|
p.joinpath("foo bar 2").touch()
|
||||||
fp = open(str(p["foo bar 2"]), mode="w")
|
p.joinpath("foo bar 3").touch()
|
||||||
fp.close()
|
|
||||||
fp = open(str(p["foo bar 3"]), mode="w")
|
|
||||||
fp.close()
|
|
||||||
files = fs.get_files(p)
|
files = fs.get_files(p)
|
||||||
for f in files:
|
for f in files:
|
||||||
f.is_ref = False
|
f.is_ref = False
|
||||||
@ -451,7 +450,7 @@ class TestCaseDupeGuruRenameSelected:
|
|||||||
g = self.groups[0]
|
g = self.groups[0]
|
||||||
self.rtable.select([1])
|
self.rtable.select([1])
|
||||||
assert app.rename_selected("renamed")
|
assert app.rename_selected("renamed")
|
||||||
names = [p.name for p in self.p.listdir()]
|
names = [p.name for p in self.p.glob("*")]
|
||||||
assert "renamed" in names
|
assert "renamed" in names
|
||||||
assert "foo bar 2" not in names
|
assert "foo bar 2" not in names
|
||||||
eq_(g.dupes[0].name, "renamed")
|
eq_(g.dupes[0].name, "renamed")
|
||||||
@ -464,7 +463,7 @@ class TestCaseDupeGuruRenameSelected:
|
|||||||
assert not app.rename_selected("renamed")
|
assert not app.rename_selected("renamed")
|
||||||
msg = logging.warning.calls[0]["msg"]
|
msg = logging.warning.calls[0]["msg"]
|
||||||
eq_("dupeGuru Warning: list index out of range", msg)
|
eq_("dupeGuru Warning: list index out of range", msg)
|
||||||
names = [p.name for p in self.p.listdir()]
|
names = [p.name for p in self.p.glob("*")]
|
||||||
assert "renamed" not in names
|
assert "renamed" not in names
|
||||||
assert "foo bar 2" in names
|
assert "foo bar 2" in names
|
||||||
eq_(g.dupes[0].name, "foo bar 2")
|
eq_(g.dupes[0].name, "foo bar 2")
|
||||||
@ -477,7 +476,7 @@ class TestCaseDupeGuruRenameSelected:
|
|||||||
assert not app.rename_selected("foo bar 1")
|
assert not app.rename_selected("foo bar 1")
|
||||||
msg = logging.warning.calls[0]["msg"]
|
msg = logging.warning.calls[0]["msg"]
|
||||||
assert msg.startswith("dupeGuru Warning: 'foo bar 1' already exists in")
|
assert msg.startswith("dupeGuru Warning: 'foo bar 1' already exists in")
|
||||||
names = [p.name for p in self.p.listdir()]
|
names = [p.name for p in self.p.glob("*")]
|
||||||
assert "foo bar 1" in names
|
assert "foo bar 1" in names
|
||||||
assert "foo bar 2" in names
|
assert "foo bar 2" in names
|
||||||
eq_(g.dupes[0].name, "foo bar 2")
|
eq_(g.dupes[0].name, "foo bar 2")
|
||||||
@ -488,9 +487,9 @@ class TestAppWithDirectoriesInTree:
|
|||||||
def do_setup(self, request):
|
def do_setup(self, request):
|
||||||
tmpdir = request.getfixturevalue("tmpdir")
|
tmpdir = request.getfixturevalue("tmpdir")
|
||||||
p = Path(str(tmpdir))
|
p = Path(str(tmpdir))
|
||||||
p["sub1"].mkdir()
|
p.joinpath("sub1").mkdir()
|
||||||
p["sub2"].mkdir()
|
p.joinpath("sub2").mkdir()
|
||||||
p["sub3"].mkdir()
|
p.joinpath("sub3").mkdir()
|
||||||
app = TestApp()
|
app = TestApp()
|
||||||
self.app = app.app
|
self.app = app.app
|
||||||
self.dtree = app.dtree
|
self.dtree = app.dtree
|
||||||
|
@ -5,17 +5,16 @@
|
|||||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||||
|
|
||||||
from hscommon.testutil import TestApp as TestAppBase, CallLogger, eq_, with_app # noqa
|
from hscommon.testutil import TestApp as TestAppBase, CallLogger, eq_, with_app # noqa
|
||||||
from hscommon.path import Path
|
from pathlib import Path
|
||||||
from hscommon.util import get_file_ext, format_size
|
from hscommon.util import get_file_ext, format_size
|
||||||
from hscommon.gui.column import Column
|
from hscommon.gui.column import Column
|
||||||
from hscommon.jobprogress.job import nulljob, JobCancelled
|
from hscommon.jobprogress.job import nulljob, JobCancelled
|
||||||
|
|
||||||
from .. import engine
|
from core import engine, prioritize
|
||||||
from .. import prioritize
|
from core.engine import getwords
|
||||||
from ..engine import getwords
|
from core.app import DupeGuru as DupeGuruBase
|
||||||
from ..app import DupeGuru as DupeGuruBase
|
from core.gui.result_table import ResultTable as ResultTableBase
|
||||||
from ..gui.result_table import ResultTable as ResultTableBase
|
from core.gui.prioritize_dialog import PrioritizeDialog
|
||||||
from ..gui.prioritize_dialog import PrioritizeDialog
|
|
||||||
|
|
||||||
|
|
||||||
class DupeGuruView:
|
class DupeGuruView:
|
||||||
@ -86,9 +85,9 @@ class NamedObject:
|
|||||||
folder = "basepath"
|
folder = "basepath"
|
||||||
self._folder = Path(folder)
|
self._folder = Path(folder)
|
||||||
self.size = size
|
self.size = size
|
||||||
self.md5partial = name
|
self.digest_partial = name
|
||||||
self.md5 = name
|
self.digest = name
|
||||||
self.md5samples = name
|
self.digest_samples = name
|
||||||
if with_words:
|
if with_words:
|
||||||
self.words = getwords(name)
|
self.words = getwords(name)
|
||||||
self.is_ref = False
|
self.is_ref = False
|
||||||
@ -111,11 +110,11 @@ class NamedObject:
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def path(self):
|
def path(self):
|
||||||
return self._folder[self.name]
|
return self._folder.joinpath(self.name)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def folder_path(self):
|
def folder_path(self):
|
||||||
return self.path.parent()
|
return self.path.parent
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def extension(self):
|
def extension(self):
|
||||||
|
@ -9,7 +9,7 @@ from pytest import raises, skip
|
|||||||
from hscommon.testutil import eq_
|
from hscommon.testutil import eq_
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from ..pe.block import avgdiff, getblocks2, NoBlocksError, DifferentBlockCountError
|
from core.pe.block import avgdiff, getblocks2, NoBlocksError, DifferentBlockCountError
|
||||||
except ImportError:
|
except ImportError:
|
||||||
skip("Can't import the block module, probably hasn't been compiled.")
|
skip("Can't import the block module, probably hasn't been compiled.")
|
||||||
|
|
||||||
|
@ -10,41 +10,41 @@ from pytest import raises, skip
|
|||||||
from hscommon.testutil import eq_
|
from hscommon.testutil import eq_
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from ..pe.cache import colors_to_string, string_to_colors
|
from core.pe.cache import colors_to_bytes, bytes_to_colors
|
||||||
from ..pe.cache_sqlite import SqliteCache
|
from core.pe.cache_sqlite import SqliteCache
|
||||||
from ..pe.cache_shelve import ShelveCache
|
|
||||||
except ImportError:
|
except ImportError:
|
||||||
skip("Can't import the cache module, probably hasn't been compiled.")
|
skip("Can't import the cache module, probably hasn't been compiled.")
|
||||||
|
|
||||||
|
|
||||||
class TestCaseColorsToString:
|
class TestCaseColorsToString:
|
||||||
def test_no_color(self):
|
def test_no_color(self):
|
||||||
eq_("", colors_to_string([]))
|
eq_(b"", colors_to_bytes([]))
|
||||||
|
|
||||||
def test_single_color(self):
|
def test_single_color(self):
|
||||||
eq_("000000", colors_to_string([(0, 0, 0)]))
|
eq_(b"\x00\x00\x00", colors_to_bytes([(0, 0, 0)]))
|
||||||
eq_("010101", colors_to_string([(1, 1, 1)]))
|
eq_(b"\x01\x01\x01", colors_to_bytes([(1, 1, 1)]))
|
||||||
eq_("0a141e", colors_to_string([(10, 20, 30)]))
|
eq_(b"\x0a\x14\x1e", colors_to_bytes([(10, 20, 30)]))
|
||||||
|
|
||||||
def test_two_colors(self):
|
def test_two_colors(self):
|
||||||
eq_("000102030405", colors_to_string([(0, 1, 2), (3, 4, 5)]))
|
eq_(b"\x00\x01\x02\x03\x04\x05", colors_to_bytes([(0, 1, 2), (3, 4, 5)]))
|
||||||
|
|
||||||
|
|
||||||
class TestCaseStringToColors:
|
class TestCaseStringToColors:
|
||||||
def test_empty(self):
|
def test_empty(self):
|
||||||
eq_([], string_to_colors(""))
|
eq_([], bytes_to_colors(b""))
|
||||||
|
|
||||||
def test_single_color(self):
|
def test_single_color(self):
|
||||||
eq_([(0, 0, 0)], string_to_colors("000000"))
|
eq_([(0, 0, 0)], bytes_to_colors(b"\x00\x00\x00"))
|
||||||
eq_([(2, 3, 4)], string_to_colors("020304"))
|
eq_([(2, 3, 4)], bytes_to_colors(b"\x02\x03\x04"))
|
||||||
eq_([(10, 20, 30)], string_to_colors("0a141e"))
|
eq_([(10, 20, 30)], bytes_to_colors(b"\x0a\x14\x1e"))
|
||||||
|
|
||||||
def test_two_colors(self):
|
def test_two_colors(self):
|
||||||
eq_([(10, 20, 30), (40, 50, 60)], string_to_colors("0a141e28323c"))
|
eq_([(10, 20, 30), (40, 50, 60)], bytes_to_colors(b"\x0a\x14\x1e\x28\x32\x3c"))
|
||||||
|
|
||||||
def test_incomplete_color(self):
|
def test_incomplete_color(self):
|
||||||
# don't return anything if it's not a complete color
|
# don't return anything if it's not a complete color
|
||||||
eq_([], string_to_colors("102"))
|
eq_([], bytes_to_colors(b"\x01"))
|
||||||
|
eq_([(1, 2, 3)], bytes_to_colors(b"\x01\x02\x03\x04"))
|
||||||
|
|
||||||
|
|
||||||
class BaseTestCaseCache:
|
class BaseTestCaseCache:
|
||||||
@ -59,13 +59,13 @@ class BaseTestCaseCache:
|
|||||||
|
|
||||||
def test_set_then_retrieve_blocks(self):
|
def test_set_then_retrieve_blocks(self):
|
||||||
c = self.get_cache()
|
c = self.get_cache()
|
||||||
b = [(0, 0, 0), (1, 2, 3)]
|
b = [[(0, 0, 0), (1, 2, 3)]] * 8
|
||||||
c["foo"] = b
|
c["foo"] = b
|
||||||
eq_(b, c["foo"])
|
eq_(b, c["foo"])
|
||||||
|
|
||||||
def test_delitem(self):
|
def test_delitem(self):
|
||||||
c = self.get_cache()
|
c = self.get_cache()
|
||||||
c["foo"] = ""
|
c["foo"] = [[]] * 8
|
||||||
del c["foo"]
|
del c["foo"]
|
||||||
assert "foo" not in c
|
assert "foo" not in c
|
||||||
with raises(KeyError):
|
with raises(KeyError):
|
||||||
@ -74,16 +74,16 @@ class BaseTestCaseCache:
|
|||||||
def test_persistance(self, tmpdir):
|
def test_persistance(self, tmpdir):
|
||||||
DBNAME = tmpdir.join("hstest.db")
|
DBNAME = tmpdir.join("hstest.db")
|
||||||
c = self.get_cache(str(DBNAME))
|
c = self.get_cache(str(DBNAME))
|
||||||
c["foo"] = [(1, 2, 3)]
|
c["foo"] = [[(1, 2, 3)]] * 8
|
||||||
del c
|
del c
|
||||||
c = self.get_cache(str(DBNAME))
|
c = self.get_cache(str(DBNAME))
|
||||||
eq_([(1, 2, 3)], c["foo"])
|
eq_([[(1, 2, 3)]] * 8, c["foo"])
|
||||||
|
|
||||||
def test_filter(self):
|
def test_filter(self):
|
||||||
c = self.get_cache()
|
c = self.get_cache()
|
||||||
c["foo"] = ""
|
c["foo"] = [[]] * 8
|
||||||
c["bar"] = ""
|
c["bar"] = [[]] * 8
|
||||||
c["baz"] = ""
|
c["baz"] = [[]] * 8
|
||||||
c.filter(lambda p: p != "bar") # only 'bar' is removed
|
c.filter(lambda p: p != "bar") # only 'bar' is removed
|
||||||
eq_(2, len(c))
|
eq_(2, len(c))
|
||||||
assert "foo" in c
|
assert "foo" in c
|
||||||
@ -92,9 +92,9 @@ class BaseTestCaseCache:
|
|||||||
|
|
||||||
def test_clear(self):
|
def test_clear(self):
|
||||||
c = self.get_cache()
|
c = self.get_cache()
|
||||||
c["foo"] = ""
|
c["foo"] = [[]] * 8
|
||||||
c["bar"] = ""
|
c["bar"] = [[]] * 8
|
||||||
c["baz"] = ""
|
c["baz"] = [[]] * 8
|
||||||
c.clear()
|
c.clear()
|
||||||
eq_(0, len(c))
|
eq_(0, len(c))
|
||||||
assert "foo" not in c
|
assert "foo" not in c
|
||||||
@ -104,7 +104,7 @@ class BaseTestCaseCache:
|
|||||||
def test_by_id(self):
|
def test_by_id(self):
|
||||||
# it's possible to use the cache by referring to the files by their row_id
|
# it's possible to use the cache by referring to the files by their row_id
|
||||||
c = self.get_cache()
|
c = self.get_cache()
|
||||||
b = [(0, 0, 0), (1, 2, 3)]
|
b = [[(0, 0, 0), (1, 2, 3)]] * 8
|
||||||
c["foo"] = b
|
c["foo"] = b
|
||||||
foo_id = c.get_id("foo")
|
foo_id = c.get_id("foo")
|
||||||
eq_(c[foo_id], b)
|
eq_(c[foo_id], b)
|
||||||
@ -127,15 +127,10 @@ class TestCaseSqliteCache(BaseTestCaseCache):
|
|||||||
fp.write("invalid sqlite content")
|
fp.write("invalid sqlite content")
|
||||||
fp.close()
|
fp.close()
|
||||||
c = self.get_cache(dbname) # should not raise a DatabaseError
|
c = self.get_cache(dbname) # should not raise a DatabaseError
|
||||||
c["foo"] = [(1, 2, 3)]
|
c["foo"] = [[(1, 2, 3)]] * 8
|
||||||
del c
|
del c
|
||||||
c = self.get_cache(dbname)
|
c = self.get_cache(dbname)
|
||||||
eq_(c["foo"], [(1, 2, 3)])
|
eq_(c["foo"], [[(1, 2, 3)]] * 8)
|
||||||
|
|
||||||
|
|
||||||
class TestCaseShelveCache(BaseTestCaseCache):
|
|
||||||
def get_cache(self, dbname=None):
|
|
||||||
return ShelveCache(dbname)
|
|
||||||
|
|
||||||
|
|
||||||
class TestCaseCacheSQLEscape:
|
class TestCaseCacheSQLEscape:
|
||||||
@ -157,7 +152,7 @@ class TestCaseCacheSQLEscape:
|
|||||||
|
|
||||||
def test_delitem(self):
|
def test_delitem(self):
|
||||||
c = self.get_cache()
|
c = self.get_cache()
|
||||||
c["foo'bar"] = []
|
c["foo'bar"] = [[]] * 8
|
||||||
try:
|
try:
|
||||||
del c["foo'bar"]
|
del c["foo'bar"]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
|
@ -10,45 +10,39 @@ import tempfile
|
|||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
from pytest import raises
|
from pytest import raises
|
||||||
from hscommon.path import Path
|
from pathlib import Path
|
||||||
from hscommon.testutil import eq_
|
from hscommon.testutil import eq_
|
||||||
from hscommon.plat import ISWINDOWS
|
from hscommon.plat import ISWINDOWS
|
||||||
|
|
||||||
from ..fs import File
|
from core.fs import File
|
||||||
from ..directories import (
|
from core.directories import (
|
||||||
Directories,
|
Directories,
|
||||||
DirectoryState,
|
DirectoryState,
|
||||||
AlreadyThereError,
|
AlreadyThereError,
|
||||||
InvalidPathError,
|
InvalidPathError,
|
||||||
)
|
)
|
||||||
from ..exclude import ExcludeList, ExcludeDict
|
from core.exclude import ExcludeList, ExcludeDict
|
||||||
|
|
||||||
|
|
||||||
def create_fake_fs(rootpath):
|
def create_fake_fs(rootpath):
|
||||||
# We have it as a separate function because other units are using it.
|
# We have it as a separate function because other units are using it.
|
||||||
rootpath = rootpath["fs"]
|
rootpath = rootpath.joinpath("fs")
|
||||||
rootpath.mkdir()
|
rootpath.mkdir()
|
||||||
rootpath["dir1"].mkdir()
|
rootpath.joinpath("dir1").mkdir()
|
||||||
rootpath["dir2"].mkdir()
|
rootpath.joinpath("dir2").mkdir()
|
||||||
rootpath["dir3"].mkdir()
|
rootpath.joinpath("dir3").mkdir()
|
||||||
fp = rootpath["file1.test"].open("w")
|
with rootpath.joinpath("file1.test").open("wt") as fp:
|
||||||
fp.write("1")
|
fp.write("1")
|
||||||
fp.close()
|
with rootpath.joinpath("file2.test").open("wt") as fp:
|
||||||
fp = rootpath["file2.test"].open("w")
|
fp.write("12")
|
||||||
fp.write("12")
|
with rootpath.joinpath("file3.test").open("wt") as fp:
|
||||||
fp.close()
|
fp.write("123")
|
||||||
fp = rootpath["file3.test"].open("w")
|
with rootpath.joinpath("dir1", "file1.test").open("wt") as fp:
|
||||||
fp.write("123")
|
fp.write("1")
|
||||||
fp.close()
|
with rootpath.joinpath("dir2", "file2.test").open("wt") as fp:
|
||||||
fp = rootpath["dir1"]["file1.test"].open("w")
|
fp.write("12")
|
||||||
fp.write("1")
|
with rootpath.joinpath("dir3", "file3.test").open("wt") as fp:
|
||||||
fp.close()
|
fp.write("123")
|
||||||
fp = rootpath["dir2"]["file2.test"].open("w")
|
|
||||||
fp.write("12")
|
|
||||||
fp.close()
|
|
||||||
fp = rootpath["dir3"]["file3.test"].open("w")
|
|
||||||
fp.write("123")
|
|
||||||
fp.close()
|
|
||||||
return rootpath
|
return rootpath
|
||||||
|
|
||||||
|
|
||||||
@ -60,11 +54,10 @@ def setup_module(module):
|
|||||||
# and another with a more complex structure.
|
# and another with a more complex structure.
|
||||||
testpath = Path(tempfile.mkdtemp())
|
testpath = Path(tempfile.mkdtemp())
|
||||||
module.testpath = testpath
|
module.testpath = testpath
|
||||||
rootpath = testpath["onefile"]
|
rootpath = testpath.joinpath("onefile")
|
||||||
rootpath.mkdir()
|
rootpath.mkdir()
|
||||||
fp = rootpath["test.txt"].open("w")
|
with rootpath.joinpath("test.txt").open("wt") as fp:
|
||||||
fp.write("test_data")
|
fp.write("test_data")
|
||||||
fp.close()
|
|
||||||
create_fake_fs(testpath)
|
create_fake_fs(testpath)
|
||||||
|
|
||||||
|
|
||||||
@ -80,13 +73,13 @@ def test_empty():
|
|||||||
|
|
||||||
def test_add_path():
|
def test_add_path():
|
||||||
d = Directories()
|
d = Directories()
|
||||||
p = testpath["onefile"]
|
p = testpath.joinpath("onefile")
|
||||||
d.add_path(p)
|
d.add_path(p)
|
||||||
eq_(1, len(d))
|
eq_(1, len(d))
|
||||||
assert p in d
|
assert p in d
|
||||||
assert (p["foobar"]) in d
|
assert (p.joinpath("foobar")) in d
|
||||||
assert p.parent() not in d
|
assert p.parent not in d
|
||||||
p = testpath["fs"]
|
p = testpath.joinpath("fs")
|
||||||
d.add_path(p)
|
d.add_path(p)
|
||||||
eq_(2, len(d))
|
eq_(2, len(d))
|
||||||
assert p in d
|
assert p in d
|
||||||
@ -94,18 +87,18 @@ def test_add_path():
|
|||||||
|
|
||||||
def test_add_path_when_path_is_already_there():
|
def test_add_path_when_path_is_already_there():
|
||||||
d = Directories()
|
d = Directories()
|
||||||
p = testpath["onefile"]
|
p = testpath.joinpath("onefile")
|
||||||
d.add_path(p)
|
d.add_path(p)
|
||||||
with raises(AlreadyThereError):
|
with raises(AlreadyThereError):
|
||||||
d.add_path(p)
|
d.add_path(p)
|
||||||
with raises(AlreadyThereError):
|
with raises(AlreadyThereError):
|
||||||
d.add_path(p["foobar"])
|
d.add_path(p.joinpath("foobar"))
|
||||||
eq_(1, len(d))
|
eq_(1, len(d))
|
||||||
|
|
||||||
|
|
||||||
def test_add_path_containing_paths_already_there():
|
def test_add_path_containing_paths_already_there():
|
||||||
d = Directories()
|
d = Directories()
|
||||||
d.add_path(testpath["onefile"])
|
d.add_path(testpath.joinpath("onefile"))
|
||||||
eq_(1, len(d))
|
eq_(1, len(d))
|
||||||
d.add_path(testpath)
|
d.add_path(testpath)
|
||||||
eq_(len(d), 1)
|
eq_(len(d), 1)
|
||||||
@ -114,7 +107,7 @@ def test_add_path_containing_paths_already_there():
|
|||||||
|
|
||||||
def test_add_path_non_latin(tmpdir):
|
def test_add_path_non_latin(tmpdir):
|
||||||
p = Path(str(tmpdir))
|
p = Path(str(tmpdir))
|
||||||
to_add = p["unicode\u201a"]
|
to_add = p.joinpath("unicode\u201a")
|
||||||
os.mkdir(str(to_add))
|
os.mkdir(str(to_add))
|
||||||
d = Directories()
|
d = Directories()
|
||||||
try:
|
try:
|
||||||
@ -125,25 +118,25 @@ def test_add_path_non_latin(tmpdir):
|
|||||||
|
|
||||||
def test_del():
|
def test_del():
|
||||||
d = Directories()
|
d = Directories()
|
||||||
d.add_path(testpath["onefile"])
|
d.add_path(testpath.joinpath("onefile"))
|
||||||
try:
|
try:
|
||||||
del d[1]
|
del d[1]
|
||||||
assert False
|
assert False
|
||||||
except IndexError:
|
except IndexError:
|
||||||
pass
|
pass
|
||||||
d.add_path(testpath["fs"])
|
d.add_path(testpath.joinpath("fs"))
|
||||||
del d[1]
|
del d[1]
|
||||||
eq_(1, len(d))
|
eq_(1, len(d))
|
||||||
|
|
||||||
|
|
||||||
def test_states():
|
def test_states():
|
||||||
d = Directories()
|
d = Directories()
|
||||||
p = testpath["onefile"]
|
p = testpath.joinpath("onefile")
|
||||||
d.add_path(p)
|
d.add_path(p)
|
||||||
eq_(DirectoryState.NORMAL, d.get_state(p))
|
eq_(DirectoryState.NORMAL, d.get_state(p))
|
||||||
d.set_state(p, DirectoryState.REFERENCE)
|
d.set_state(p, DirectoryState.REFERENCE)
|
||||||
eq_(DirectoryState.REFERENCE, d.get_state(p))
|
eq_(DirectoryState.REFERENCE, d.get_state(p))
|
||||||
eq_(DirectoryState.REFERENCE, d.get_state(p["dir1"]))
|
eq_(DirectoryState.REFERENCE, d.get_state(p.joinpath("dir1")))
|
||||||
eq_(1, len(d.states))
|
eq_(1, len(d.states))
|
||||||
eq_(p, list(d.states.keys())[0])
|
eq_(p, list(d.states.keys())[0])
|
||||||
eq_(DirectoryState.REFERENCE, d.states[p])
|
eq_(DirectoryState.REFERENCE, d.states[p])
|
||||||
@ -152,7 +145,7 @@ def test_states():
|
|||||||
def test_get_state_with_path_not_there():
|
def test_get_state_with_path_not_there():
|
||||||
# When the path's not there, just return DirectoryState.Normal
|
# When the path's not there, just return DirectoryState.Normal
|
||||||
d = Directories()
|
d = Directories()
|
||||||
d.add_path(testpath["onefile"])
|
d.add_path(testpath.joinpath("onefile"))
|
||||||
eq_(d.get_state(testpath), DirectoryState.NORMAL)
|
eq_(d.get_state(testpath), DirectoryState.NORMAL)
|
||||||
|
|
||||||
|
|
||||||
@ -160,26 +153,26 @@ def test_states_overwritten_when_larger_directory_eat_smaller_ones():
|
|||||||
# ref #248
|
# ref #248
|
||||||
# When setting the state of a folder, we overwrite previously set states for subfolders.
|
# When setting the state of a folder, we overwrite previously set states for subfolders.
|
||||||
d = Directories()
|
d = Directories()
|
||||||
p = testpath["onefile"]
|
p = testpath.joinpath("onefile")
|
||||||
d.add_path(p)
|
d.add_path(p)
|
||||||
d.set_state(p, DirectoryState.EXCLUDED)
|
d.set_state(p, DirectoryState.EXCLUDED)
|
||||||
d.add_path(testpath)
|
d.add_path(testpath)
|
||||||
d.set_state(testpath, DirectoryState.REFERENCE)
|
d.set_state(testpath, DirectoryState.REFERENCE)
|
||||||
eq_(d.get_state(p), DirectoryState.REFERENCE)
|
eq_(d.get_state(p), DirectoryState.REFERENCE)
|
||||||
eq_(d.get_state(p["dir1"]), DirectoryState.REFERENCE)
|
eq_(d.get_state(p.joinpath("dir1")), DirectoryState.REFERENCE)
|
||||||
eq_(d.get_state(testpath), DirectoryState.REFERENCE)
|
eq_(d.get_state(testpath), DirectoryState.REFERENCE)
|
||||||
|
|
||||||
|
|
||||||
def test_get_files():
|
def test_get_files():
|
||||||
d = Directories()
|
d = Directories()
|
||||||
p = testpath["fs"]
|
p = testpath.joinpath("fs")
|
||||||
d.add_path(p)
|
d.add_path(p)
|
||||||
d.set_state(p["dir1"], DirectoryState.REFERENCE)
|
d.set_state(p.joinpath("dir1"), DirectoryState.REFERENCE)
|
||||||
d.set_state(p["dir2"], DirectoryState.EXCLUDED)
|
d.set_state(p.joinpath("dir2"), DirectoryState.EXCLUDED)
|
||||||
files = list(d.get_files())
|
files = list(d.get_files())
|
||||||
eq_(5, len(files))
|
eq_(5, len(files))
|
||||||
for f in files:
|
for f in files:
|
||||||
if f.path.parent() == p["dir1"]:
|
if f.path.parent == p.joinpath("dir1"):
|
||||||
assert f.is_ref
|
assert f.is_ref
|
||||||
else:
|
else:
|
||||||
assert not f.is_ref
|
assert not f.is_ref
|
||||||
@ -193,7 +186,7 @@ def test_get_files_with_folders():
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
d = Directories()
|
d = Directories()
|
||||||
p = testpath["fs"]
|
p = testpath.joinpath("fs")
|
||||||
d.add_path(p)
|
d.add_path(p)
|
||||||
files = list(d.get_files(fileclasses=[FakeFile]))
|
files = list(d.get_files(fileclasses=[FakeFile]))
|
||||||
# We have the 3 root files and the 3 root dirs
|
# We have the 3 root files and the 3 root dirs
|
||||||
@ -202,23 +195,23 @@ def test_get_files_with_folders():
|
|||||||
|
|
||||||
def test_get_folders():
|
def test_get_folders():
|
||||||
d = Directories()
|
d = Directories()
|
||||||
p = testpath["fs"]
|
p = testpath.joinpath("fs")
|
||||||
d.add_path(p)
|
d.add_path(p)
|
||||||
d.set_state(p["dir1"], DirectoryState.REFERENCE)
|
d.set_state(p.joinpath("dir1"), DirectoryState.REFERENCE)
|
||||||
d.set_state(p["dir2"], DirectoryState.EXCLUDED)
|
d.set_state(p.joinpath("dir2"), DirectoryState.EXCLUDED)
|
||||||
folders = list(d.get_folders())
|
folders = list(d.get_folders())
|
||||||
eq_(len(folders), 3)
|
eq_(len(folders), 3)
|
||||||
ref = [f for f in folders if f.is_ref]
|
ref = [f for f in folders if f.is_ref]
|
||||||
not_ref = [f for f in folders if not f.is_ref]
|
not_ref = [f for f in folders if not f.is_ref]
|
||||||
eq_(len(ref), 1)
|
eq_(len(ref), 1)
|
||||||
eq_(ref[0].path, p["dir1"])
|
eq_(ref[0].path, p.joinpath("dir1"))
|
||||||
eq_(len(not_ref), 2)
|
eq_(len(not_ref), 2)
|
||||||
eq_(ref[0].size, 1)
|
eq_(ref[0].size, 1)
|
||||||
|
|
||||||
|
|
||||||
def test_get_files_with_inherited_exclusion():
|
def test_get_files_with_inherited_exclusion():
|
||||||
d = Directories()
|
d = Directories()
|
||||||
p = testpath["onefile"]
|
p = testpath.joinpath("onefile")
|
||||||
d.add_path(p)
|
d.add_path(p)
|
||||||
d.set_state(p, DirectoryState.EXCLUDED)
|
d.set_state(p, DirectoryState.EXCLUDED)
|
||||||
eq_([], list(d.get_files()))
|
eq_([], list(d.get_files()))
|
||||||
@ -234,13 +227,13 @@ def test_save_and_load(tmpdir):
|
|||||||
d1.add_path(p1)
|
d1.add_path(p1)
|
||||||
d1.add_path(p2)
|
d1.add_path(p2)
|
||||||
d1.set_state(p1, DirectoryState.REFERENCE)
|
d1.set_state(p1, DirectoryState.REFERENCE)
|
||||||
d1.set_state(p1["dir1"], DirectoryState.EXCLUDED)
|
d1.set_state(p1.joinpath("dir1"), DirectoryState.EXCLUDED)
|
||||||
tmpxml = str(tmpdir.join("directories_testunit.xml"))
|
tmpxml = str(tmpdir.join("directories_testunit.xml"))
|
||||||
d1.save_to_file(tmpxml)
|
d1.save_to_file(tmpxml)
|
||||||
d2.load_from_file(tmpxml)
|
d2.load_from_file(tmpxml)
|
||||||
eq_(2, len(d2))
|
eq_(2, len(d2))
|
||||||
eq_(DirectoryState.REFERENCE, d2.get_state(p1))
|
eq_(DirectoryState.REFERENCE, d2.get_state(p1))
|
||||||
eq_(DirectoryState.EXCLUDED, d2.get_state(p1["dir1"]))
|
eq_(DirectoryState.EXCLUDED, d2.get_state(p1.joinpath("dir1")))
|
||||||
|
|
||||||
|
|
||||||
def test_invalid_path():
|
def test_invalid_path():
|
||||||
@ -268,7 +261,7 @@ def test_load_from_file_with_invalid_path(tmpdir):
|
|||||||
# This test simulates a load from file resulting in a
|
# This test simulates a load from file resulting in a
|
||||||
# InvalidPath raise. Other directories must be loaded.
|
# InvalidPath raise. Other directories must be loaded.
|
||||||
d1 = Directories()
|
d1 = Directories()
|
||||||
d1.add_path(testpath["onefile"])
|
d1.add_path(testpath.joinpath("onefile"))
|
||||||
# Will raise InvalidPath upon loading
|
# Will raise InvalidPath upon loading
|
||||||
p = Path(str(tmpdir.join("toremove")))
|
p = Path(str(tmpdir.join("toremove")))
|
||||||
p.mkdir()
|
p.mkdir()
|
||||||
@ -283,11 +276,11 @@ def test_load_from_file_with_invalid_path(tmpdir):
|
|||||||
|
|
||||||
def test_unicode_save(tmpdir):
|
def test_unicode_save(tmpdir):
|
||||||
d = Directories()
|
d = Directories()
|
||||||
p1 = Path(str(tmpdir))["hello\xe9"]
|
p1 = Path(str(tmpdir), "hello\xe9")
|
||||||
p1.mkdir()
|
p1.mkdir()
|
||||||
p1["foo\xe9"].mkdir()
|
p1.joinpath("foo\xe9").mkdir()
|
||||||
d.add_path(p1)
|
d.add_path(p1)
|
||||||
d.set_state(p1["foo\xe9"], DirectoryState.EXCLUDED)
|
d.set_state(p1.joinpath("foo\xe9"), DirectoryState.EXCLUDED)
|
||||||
tmpxml = str(tmpdir.join("directories_testunit.xml"))
|
tmpxml = str(tmpdir.join("directories_testunit.xml"))
|
||||||
try:
|
try:
|
||||||
d.save_to_file(tmpxml)
|
d.save_to_file(tmpxml)
|
||||||
@ -297,12 +290,12 @@ def test_unicode_save(tmpdir):
|
|||||||
|
|
||||||
def test_get_files_refreshes_its_directories():
|
def test_get_files_refreshes_its_directories():
|
||||||
d = Directories()
|
d = Directories()
|
||||||
p = testpath["fs"]
|
p = testpath.joinpath("fs")
|
||||||
d.add_path(p)
|
d.add_path(p)
|
||||||
files = d.get_files()
|
files = d.get_files()
|
||||||
eq_(6, len(list(files)))
|
eq_(6, len(list(files)))
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
os.remove(str(p["dir1"]["file1.test"]))
|
os.remove(str(p.joinpath("dir1", "file1.test")))
|
||||||
files = d.get_files()
|
files = d.get_files()
|
||||||
eq_(5, len(list(files)))
|
eq_(5, len(list(files)))
|
||||||
|
|
||||||
@ -311,15 +304,15 @@ def test_get_files_does_not_choke_on_non_existing_directories(tmpdir):
|
|||||||
d = Directories()
|
d = Directories()
|
||||||
p = Path(str(tmpdir))
|
p = Path(str(tmpdir))
|
||||||
d.add_path(p)
|
d.add_path(p)
|
||||||
p.rmtree()
|
shutil.rmtree(str(p))
|
||||||
eq_([], list(d.get_files()))
|
eq_([], list(d.get_files()))
|
||||||
|
|
||||||
|
|
||||||
def test_get_state_returns_excluded_by_default_for_hidden_directories(tmpdir):
|
def test_get_state_returns_excluded_by_default_for_hidden_directories(tmpdir):
|
||||||
d = Directories()
|
d = Directories()
|
||||||
p = Path(str(tmpdir))
|
p = Path(str(tmpdir))
|
||||||
hidden_dir_path = p[".foo"]
|
hidden_dir_path = p.joinpath(".foo")
|
||||||
p[".foo"].mkdir()
|
p.joinpath(".foo").mkdir()
|
||||||
d.add_path(p)
|
d.add_path(p)
|
||||||
eq_(d.get_state(hidden_dir_path), DirectoryState.EXCLUDED)
|
eq_(d.get_state(hidden_dir_path), DirectoryState.EXCLUDED)
|
||||||
# But it can be overriden
|
# But it can be overriden
|
||||||
@ -331,22 +324,23 @@ def test_default_path_state_override(tmpdir):
|
|||||||
# It's possible for a subclass to override the default state of a path
|
# It's possible for a subclass to override the default state of a path
|
||||||
class MyDirectories(Directories):
|
class MyDirectories(Directories):
|
||||||
def _default_state_for_path(self, path):
|
def _default_state_for_path(self, path):
|
||||||
if "foobar" in path:
|
if "foobar" in path.parts:
|
||||||
return DirectoryState.EXCLUDED
|
return DirectoryState.EXCLUDED
|
||||||
|
return DirectoryState.NORMAL
|
||||||
|
|
||||||
d = MyDirectories()
|
d = MyDirectories()
|
||||||
p1 = Path(str(tmpdir))
|
p1 = Path(str(tmpdir))
|
||||||
p1["foobar"].mkdir()
|
p1.joinpath("foobar").mkdir()
|
||||||
p1["foobar/somefile"].open("w").close()
|
p1.joinpath("foobar/somefile").touch()
|
||||||
p1["foobaz"].mkdir()
|
p1.joinpath("foobaz").mkdir()
|
||||||
p1["foobaz/somefile"].open("w").close()
|
p1.joinpath("foobaz/somefile").touch()
|
||||||
d.add_path(p1)
|
d.add_path(p1)
|
||||||
eq_(d.get_state(p1["foobaz"]), DirectoryState.NORMAL)
|
eq_(d.get_state(p1.joinpath("foobaz")), DirectoryState.NORMAL)
|
||||||
eq_(d.get_state(p1["foobar"]), DirectoryState.EXCLUDED)
|
eq_(d.get_state(p1.joinpath("foobar")), DirectoryState.EXCLUDED)
|
||||||
eq_(len(list(d.get_files())), 1) # only the 'foobaz' file is there
|
eq_(len(list(d.get_files())), 1) # only the 'foobaz' file is there
|
||||||
# However, the default state can be changed
|
# However, the default state can be changed
|
||||||
d.set_state(p1["foobar"], DirectoryState.NORMAL)
|
d.set_state(p1.joinpath("foobar"), DirectoryState.NORMAL)
|
||||||
eq_(d.get_state(p1["foobar"]), DirectoryState.NORMAL)
|
eq_(d.get_state(p1.joinpath("foobar")), DirectoryState.NORMAL)
|
||||||
eq_(len(list(d.get_files())), 2)
|
eq_(len(list(d.get_files())), 2)
|
||||||
|
|
||||||
|
|
||||||
@ -372,42 +366,42 @@ files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled
|
|||||||
self.d._exclude_list.add(regex)
|
self.d._exclude_list.add(regex)
|
||||||
self.d._exclude_list.mark(regex)
|
self.d._exclude_list.mark(regex)
|
||||||
p1 = Path(str(tmpdir))
|
p1 = Path(str(tmpdir))
|
||||||
p1["$Recycle.Bin"].mkdir()
|
p1.joinpath("$Recycle.Bin").mkdir()
|
||||||
p1["$Recycle.Bin"]["subdir"].mkdir()
|
p1.joinpath("$Recycle.Bin", "subdir").mkdir()
|
||||||
self.d.add_path(p1)
|
self.d.add_path(p1)
|
||||||
eq_(self.d.get_state(p1["$Recycle.Bin"]), DirectoryState.EXCLUDED)
|
eq_(self.d.get_state(p1.joinpath("$Recycle.Bin")), DirectoryState.EXCLUDED)
|
||||||
# By default, subdirs should be excluded too, but this can be overridden separately
|
# By default, subdirs should be excluded too, but this can be overridden separately
|
||||||
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.EXCLUDED)
|
eq_(self.d.get_state(p1.joinpath("$Recycle.Bin", "subdir")), DirectoryState.EXCLUDED)
|
||||||
self.d.set_state(p1["$Recycle.Bin"]["subdir"], DirectoryState.NORMAL)
|
self.d.set_state(p1.joinpath("$Recycle.Bin", "subdir"), DirectoryState.NORMAL)
|
||||||
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.NORMAL)
|
eq_(self.d.get_state(p1.joinpath("$Recycle.Bin", "subdir")), DirectoryState.NORMAL)
|
||||||
|
|
||||||
def test_exclude_refined(self, tmpdir):
|
def test_exclude_refined(self, tmpdir):
|
||||||
regex1 = r"^\$Recycle\.Bin$"
|
regex1 = r"^\$Recycle\.Bin$"
|
||||||
self.d._exclude_list.add(regex1)
|
self.d._exclude_list.add(regex1)
|
||||||
self.d._exclude_list.mark(regex1)
|
self.d._exclude_list.mark(regex1)
|
||||||
p1 = Path(str(tmpdir))
|
p1 = Path(str(tmpdir))
|
||||||
p1["$Recycle.Bin"].mkdir()
|
p1.joinpath("$Recycle.Bin").mkdir()
|
||||||
p1["$Recycle.Bin"]["somefile.png"].open("w").close()
|
p1.joinpath("$Recycle.Bin", "somefile.png").touch()
|
||||||
p1["$Recycle.Bin"]["some_unwanted_file.jpg"].open("w").close()
|
p1.joinpath("$Recycle.Bin", "some_unwanted_file.jpg").touch()
|
||||||
p1["$Recycle.Bin"]["subdir"].mkdir()
|
p1.joinpath("$Recycle.Bin", "subdir").mkdir()
|
||||||
p1["$Recycle.Bin"]["subdir"]["somesubdirfile.png"].open("w").close()
|
p1.joinpath("$Recycle.Bin", "subdir", "somesubdirfile.png").touch()
|
||||||
p1["$Recycle.Bin"]["subdir"]["unwanted_subdirfile.gif"].open("w").close()
|
p1.joinpath("$Recycle.Bin", "subdir", "unwanted_subdirfile.gif").touch()
|
||||||
p1["$Recycle.Bin"]["subdar"].mkdir()
|
p1.joinpath("$Recycle.Bin", "subdar").mkdir()
|
||||||
p1["$Recycle.Bin"]["subdar"]["somesubdarfile.jpeg"].open("w").close()
|
p1.joinpath("$Recycle.Bin", "subdar", "somesubdarfile.jpeg").touch()
|
||||||
p1["$Recycle.Bin"]["subdar"]["unwanted_subdarfile.png"].open("w").close()
|
p1.joinpath("$Recycle.Bin", "subdar", "unwanted_subdarfile.png").touch()
|
||||||
self.d.add_path(p1["$Recycle.Bin"])
|
self.d.add_path(p1.joinpath("$Recycle.Bin"))
|
||||||
|
|
||||||
# Filter should set the default state to Excluded
|
# Filter should set the default state to Excluded
|
||||||
eq_(self.d.get_state(p1["$Recycle.Bin"]), DirectoryState.EXCLUDED)
|
eq_(self.d.get_state(p1.joinpath("$Recycle.Bin")), DirectoryState.EXCLUDED)
|
||||||
# The subdir should inherit its parent state
|
# The subdir should inherit its parent state
|
||||||
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.EXCLUDED)
|
eq_(self.d.get_state(p1.joinpath("$Recycle.Bin", "subdir")), DirectoryState.EXCLUDED)
|
||||||
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdar"]), DirectoryState.EXCLUDED)
|
eq_(self.d.get_state(p1.joinpath("$Recycle.Bin", "subdar")), DirectoryState.EXCLUDED)
|
||||||
# Override a child path's state
|
# Override a child path's state
|
||||||
self.d.set_state(p1["$Recycle.Bin"]["subdir"], DirectoryState.NORMAL)
|
self.d.set_state(p1.joinpath("$Recycle.Bin", "subdir"), DirectoryState.NORMAL)
|
||||||
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.NORMAL)
|
eq_(self.d.get_state(p1.joinpath("$Recycle.Bin", "subdir")), DirectoryState.NORMAL)
|
||||||
# Parent should keep its default state, and the other child too
|
# Parent should keep its default state, and the other child too
|
||||||
eq_(self.d.get_state(p1["$Recycle.Bin"]), DirectoryState.EXCLUDED)
|
eq_(self.d.get_state(p1.joinpath("$Recycle.Bin")), DirectoryState.EXCLUDED)
|
||||||
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdar"]), DirectoryState.EXCLUDED)
|
eq_(self.d.get_state(p1.joinpath("$Recycle.Bin", "subdar")), DirectoryState.EXCLUDED)
|
||||||
# print(f"get_folders(): {[x for x in self.d.get_folders()]}")
|
# print(f"get_folders(): {[x for x in self.d.get_folders()]}")
|
||||||
|
|
||||||
# only the 2 files directly under the Normal directory
|
# only the 2 files directly under the Normal directory
|
||||||
@ -419,8 +413,8 @@ files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled
|
|||||||
assert "somesubdirfile.png" in files
|
assert "somesubdirfile.png" in files
|
||||||
assert "unwanted_subdirfile.gif" in files
|
assert "unwanted_subdirfile.gif" in files
|
||||||
# Overriding the parent should enable all children
|
# Overriding the parent should enable all children
|
||||||
self.d.set_state(p1["$Recycle.Bin"], DirectoryState.NORMAL)
|
self.d.set_state(p1.joinpath("$Recycle.Bin"), DirectoryState.NORMAL)
|
||||||
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdar"]), DirectoryState.NORMAL)
|
eq_(self.d.get_state(p1.joinpath("$Recycle.Bin", "subdar")), DirectoryState.NORMAL)
|
||||||
# all files there
|
# all files there
|
||||||
files = self.get_files_and_expect_num_result(6)
|
files = self.get_files_and_expect_num_result(6)
|
||||||
assert "somefile.png" in files
|
assert "somefile.png" in files
|
||||||
@ -444,7 +438,7 @@ files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled
|
|||||||
assert self.d._exclude_list.error(regex3) is None
|
assert self.d._exclude_list.error(regex3) is None
|
||||||
# print(f"get_folders(): {[x for x in self.d.get_folders()]}")
|
# print(f"get_folders(): {[x for x in self.d.get_folders()]}")
|
||||||
# Directory shouldn't change its state here, unless explicitely done by user
|
# Directory shouldn't change its state here, unless explicitely done by user
|
||||||
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.NORMAL)
|
eq_(self.d.get_state(p1.joinpath("$Recycle.Bin", "subdir")), DirectoryState.NORMAL)
|
||||||
files = self.get_files_and_expect_num_result(5)
|
files = self.get_files_and_expect_num_result(5)
|
||||||
assert "unwanted_subdirfile.gif" not in files
|
assert "unwanted_subdirfile.gif" not in files
|
||||||
assert "unwanted_subdarfile.png" in files
|
assert "unwanted_subdarfile.png" in files
|
||||||
@ -453,15 +447,15 @@ files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled
|
|||||||
regex4 = r".*subdir$"
|
regex4 = r".*subdir$"
|
||||||
self.d._exclude_list.rename(regex3, regex4)
|
self.d._exclude_list.rename(regex3, regex4)
|
||||||
assert self.d._exclude_list.error(regex4) is None
|
assert self.d._exclude_list.error(regex4) is None
|
||||||
p1["$Recycle.Bin"]["subdar"]["file_ending_with_subdir"].open("w").close()
|
p1.joinpath("$Recycle.Bin", "subdar", "file_ending_with_subdir").touch()
|
||||||
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.EXCLUDED)
|
eq_(self.d.get_state(p1.joinpath("$Recycle.Bin", "subdir")), DirectoryState.EXCLUDED)
|
||||||
files = self.get_files_and_expect_num_result(4)
|
files = self.get_files_and_expect_num_result(4)
|
||||||
assert "file_ending_with_subdir" not in files
|
assert "file_ending_with_subdir" not in files
|
||||||
assert "somesubdarfile.jpeg" in files
|
assert "somesubdarfile.jpeg" in files
|
||||||
assert "somesubdirfile.png" not in files
|
assert "somesubdirfile.png" not in files
|
||||||
assert "unwanted_subdirfile.gif" not in files
|
assert "unwanted_subdirfile.gif" not in files
|
||||||
self.d.set_state(p1["$Recycle.Bin"]["subdir"], DirectoryState.NORMAL)
|
self.d.set_state(p1.joinpath("$Recycle.Bin", "subdir"), DirectoryState.NORMAL)
|
||||||
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.NORMAL)
|
eq_(self.d.get_state(p1.joinpath("$Recycle.Bin", "subdir")), DirectoryState.NORMAL)
|
||||||
# print(f"get_folders(): {[x for x in self.d.get_folders()]}")
|
# print(f"get_folders(): {[x for x in self.d.get_folders()]}")
|
||||||
files = self.get_files_and_expect_num_result(6)
|
files = self.get_files_and_expect_num_result(6)
|
||||||
assert "file_ending_with_subdir" not in files
|
assert "file_ending_with_subdir" not in files
|
||||||
@ -471,9 +465,9 @@ files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled
|
|||||||
regex5 = r".*subdir.*"
|
regex5 = r".*subdir.*"
|
||||||
self.d._exclude_list.rename(regex4, regex5)
|
self.d._exclude_list.rename(regex4, regex5)
|
||||||
# Files containing substring should be filtered
|
# Files containing substring should be filtered
|
||||||
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.NORMAL)
|
eq_(self.d.get_state(p1.joinpath("$Recycle.Bin", "subdir")), DirectoryState.NORMAL)
|
||||||
# The path should not match, only the filename, the "subdir" in the directory name shouldn't matter
|
# The path should not match, only the filename, the "subdir" in the directory name shouldn't matter
|
||||||
p1["$Recycle.Bin"]["subdir"]["file_which_shouldnt_match"].open("w").close()
|
p1.joinpath("$Recycle.Bin", "subdir", "file_which_shouldnt_match").touch()
|
||||||
files = self.get_files_and_expect_num_result(5)
|
files = self.get_files_and_expect_num_result(5)
|
||||||
assert "somesubdirfile.png" not in files
|
assert "somesubdirfile.png" not in files
|
||||||
assert "unwanted_subdirfile.gif" not in files
|
assert "unwanted_subdirfile.gif" not in files
|
||||||
@ -493,7 +487,7 @@ files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled
|
|||||||
assert self.d._exclude_list.error(regex6) is None
|
assert self.d._exclude_list.error(regex6) is None
|
||||||
assert regex6 in self.d._exclude_list
|
assert regex6 in self.d._exclude_list
|
||||||
# This still should not be affected
|
# This still should not be affected
|
||||||
eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.NORMAL)
|
eq_(self.d.get_state(p1.joinpath("$Recycle.Bin", "subdir")), DirectoryState.NORMAL)
|
||||||
files = self.get_files_and_expect_num_result(5)
|
files = self.get_files_and_expect_num_result(5)
|
||||||
# These files are under the "/subdir" directory
|
# These files are under the "/subdir" directory
|
||||||
assert "somesubdirfile.png" not in files
|
assert "somesubdirfile.png" not in files
|
||||||
@ -505,20 +499,20 @@ files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled
|
|||||||
|
|
||||||
def test_japanese_unicode(self, tmpdir):
|
def test_japanese_unicode(self, tmpdir):
|
||||||
p1 = Path(str(tmpdir))
|
p1 = Path(str(tmpdir))
|
||||||
p1["$Recycle.Bin"].mkdir()
|
p1.joinpath("$Recycle.Bin").mkdir()
|
||||||
p1["$Recycle.Bin"]["somerecycledfile.png"].open("w").close()
|
p1.joinpath("$Recycle.Bin", "somerecycledfile.png").touch()
|
||||||
p1["$Recycle.Bin"]["some_unwanted_file.jpg"].open("w").close()
|
p1.joinpath("$Recycle.Bin", "some_unwanted_file.jpg").touch()
|
||||||
p1["$Recycle.Bin"]["subdir"].mkdir()
|
p1.joinpath("$Recycle.Bin", "subdir").mkdir()
|
||||||
p1["$Recycle.Bin"]["subdir"]["過去白濁物語~]_カラー.jpg"].open("w").close()
|
p1.joinpath("$Recycle.Bin", "subdir", "過去白濁物語~]_カラー.jpg").touch()
|
||||||
p1["$Recycle.Bin"]["思叫物語"].mkdir()
|
p1.joinpath("$Recycle.Bin", "思叫物語").mkdir()
|
||||||
p1["$Recycle.Bin"]["思叫物語"]["なししろ会う前"].open("w").close()
|
p1.joinpath("$Recycle.Bin", "思叫物語", "なししろ会う前").touch()
|
||||||
p1["$Recycle.Bin"]["思叫物語"]["堂~ロ"].open("w").close()
|
p1.joinpath("$Recycle.Bin", "思叫物語", "堂~ロ").touch()
|
||||||
self.d.add_path(p1["$Recycle.Bin"])
|
self.d.add_path(p1.joinpath("$Recycle.Bin"))
|
||||||
regex3 = r".*物語.*"
|
regex3 = r".*物語.*"
|
||||||
self.d._exclude_list.add(regex3)
|
self.d._exclude_list.add(regex3)
|
||||||
self.d._exclude_list.mark(regex3)
|
self.d._exclude_list.mark(regex3)
|
||||||
# print(f"get_folders(): {[x for x in self.d.get_folders()]}")
|
# print(f"get_folders(): {[x for x in self.d.get_folders()]}")
|
||||||
eq_(self.d.get_state(p1["$Recycle.Bin"]["思叫物語"]), DirectoryState.EXCLUDED)
|
eq_(self.d.get_state(p1.joinpath("$Recycle.Bin", "思叫物語")), DirectoryState.EXCLUDED)
|
||||||
files = self.get_files_and_expect_num_result(2)
|
files = self.get_files_and_expect_num_result(2)
|
||||||
assert "過去白濁物語~]_カラー.jpg" not in files
|
assert "過去白濁物語~]_カラー.jpg" not in files
|
||||||
assert "なししろ会う前" not in files
|
assert "なししろ会う前" not in files
|
||||||
@ -527,7 +521,7 @@ files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled
|
|||||||
regex4 = r".*物語$"
|
regex4 = r".*物語$"
|
||||||
self.d._exclude_list.rename(regex3, regex4)
|
self.d._exclude_list.rename(regex3, regex4)
|
||||||
assert self.d._exclude_list.error(regex4) is None
|
assert self.d._exclude_list.error(regex4) is None
|
||||||
self.d.set_state(p1["$Recycle.Bin"]["思叫物語"], DirectoryState.NORMAL)
|
self.d.set_state(p1.joinpath("$Recycle.Bin", "思叫物語"), DirectoryState.NORMAL)
|
||||||
files = self.get_files_and_expect_num_result(5)
|
files = self.get_files_and_expect_num_result(5)
|
||||||
assert "過去白濁物語~]_カラー.jpg" in files
|
assert "過去白濁物語~]_カラー.jpg" in files
|
||||||
assert "なししろ会う前" in files
|
assert "なししろ会う前" in files
|
||||||
@ -539,15 +533,15 @@ files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled
|
|||||||
self.d._exclude_list.add(regex)
|
self.d._exclude_list.add(regex)
|
||||||
self.d._exclude_list.mark(regex)
|
self.d._exclude_list.mark(regex)
|
||||||
p1 = Path(str(tmpdir))
|
p1 = Path(str(tmpdir))
|
||||||
p1["foobar"].mkdir()
|
p1.joinpath("foobar").mkdir()
|
||||||
p1["foobar"][".hidden_file.txt"].open("w").close()
|
p1.joinpath("foobar", ".hidden_file.txt").touch()
|
||||||
p1["foobar"][".hidden_dir"].mkdir()
|
p1.joinpath("foobar", ".hidden_dir").mkdir()
|
||||||
p1["foobar"][".hidden_dir"]["foobar.jpg"].open("w").close()
|
p1.joinpath("foobar", ".hidden_dir", "foobar.jpg").touch()
|
||||||
p1["foobar"][".hidden_dir"][".hidden_subfile.png"].open("w").close()
|
p1.joinpath("foobar", ".hidden_dir", ".hidden_subfile.png").touch()
|
||||||
self.d.add_path(p1["foobar"])
|
self.d.add_path(p1.joinpath("foobar"))
|
||||||
# It should not inherit its parent's state originally
|
# It should not inherit its parent's state originally
|
||||||
eq_(self.d.get_state(p1["foobar"][".hidden_dir"]), DirectoryState.EXCLUDED)
|
eq_(self.d.get_state(p1.joinpath("foobar", ".hidden_dir")), DirectoryState.EXCLUDED)
|
||||||
self.d.set_state(p1["foobar"][".hidden_dir"], DirectoryState.NORMAL)
|
self.d.set_state(p1.joinpath("foobar", ".hidden_dir"), DirectoryState.NORMAL)
|
||||||
# The files should still be filtered
|
# The files should still be filtered
|
||||||
files = self.get_files_and_expect_num_result(1)
|
files = self.get_files_and_expect_num_result(1)
|
||||||
eq_(len(self.d._exclude_list.compiled_paths), 0)
|
eq_(len(self.d._exclude_list.compiled_paths), 0)
|
||||||
|
@ -10,9 +10,9 @@ from hscommon.jobprogress import job
|
|||||||
from hscommon.util import first
|
from hscommon.util import first
|
||||||
from hscommon.testutil import eq_, log_calls
|
from hscommon.testutil import eq_, log_calls
|
||||||
|
|
||||||
from .base import NamedObject
|
from core.tests.base import NamedObject
|
||||||
from .. import engine
|
from core import engine
|
||||||
from ..engine import (
|
from core.engine import (
|
||||||
get_match,
|
get_match,
|
||||||
getwords,
|
getwords,
|
||||||
Group,
|
Group,
|
||||||
@ -71,7 +71,10 @@ class TestCasegetwords:
|
|||||||
|
|
||||||
def test_unicode(self):
|
def test_unicode(self):
|
||||||
eq_(["e", "c", "0", "a", "o", "u", "e", "u"], getwords("é ç 0 à ö û è ¤ ù"))
|
eq_(["e", "c", "0", "a", "o", "u", "e", "u"], getwords("é ç 0 à ö û è ¤ ù"))
|
||||||
eq_(["02", "君のこころは輝いてるかい?", "国木田花丸", "solo", "ver"], getwords("02 君のこころは輝いてるかい? 国木田花丸 Solo Ver"))
|
eq_(
|
||||||
|
["02", "君のこころは輝いてるかい?", "国木田花丸", "solo", "ver"],
|
||||||
|
getwords("02 君のこころは輝いてるかい? 国木田花丸 Solo Ver"),
|
||||||
|
)
|
||||||
|
|
||||||
def test_splitter_chars(self):
|
def test_splitter_chars(self):
|
||||||
eq_(
|
eq_(
|
||||||
@ -271,9 +274,9 @@ class TestCaseBuildWordDict:
|
|||||||
class TestCaseMergeSimilarWords:
|
class TestCaseMergeSimilarWords:
|
||||||
def test_some_similar_words(self):
|
def test_some_similar_words(self):
|
||||||
d = {
|
d = {
|
||||||
"foobar": set([1]),
|
"foobar": {1},
|
||||||
"foobar1": set([2]),
|
"foobar1": {2},
|
||||||
"foobar2": set([3]),
|
"foobar2": {3},
|
||||||
}
|
}
|
||||||
merge_similar_words(d)
|
merge_similar_words(d)
|
||||||
eq_(1, len(d))
|
eq_(1, len(d))
|
||||||
@ -283,8 +286,8 @@ class TestCaseMergeSimilarWords:
|
|||||||
class TestCaseReduceCommonWords:
|
class TestCaseReduceCommonWords:
|
||||||
def test_typical(self):
|
def test_typical(self):
|
||||||
d = {
|
d = {
|
||||||
"foo": set([NamedObject("foo bar", True) for _ in range(50)]),
|
"foo": {NamedObject("foo bar", True) for _ in range(50)},
|
||||||
"bar": set([NamedObject("foo bar", True) for _ in range(49)]),
|
"bar": {NamedObject("foo bar", True) for _ in range(49)},
|
||||||
}
|
}
|
||||||
reduce_common_words(d, 50)
|
reduce_common_words(d, 50)
|
||||||
assert "foo" not in d
|
assert "foo" not in d
|
||||||
@ -293,7 +296,7 @@ class TestCaseReduceCommonWords:
|
|||||||
def test_dont_remove_objects_with_only_common_words(self):
|
def test_dont_remove_objects_with_only_common_words(self):
|
||||||
d = {
|
d = {
|
||||||
"common": set([NamedObject("common uncommon", True) for _ in range(50)] + [NamedObject("common", True)]),
|
"common": set([NamedObject("common uncommon", True) for _ in range(50)] + [NamedObject("common", True)]),
|
||||||
"uncommon": set([NamedObject("common uncommon", True)]),
|
"uncommon": {NamedObject("common uncommon", True)},
|
||||||
}
|
}
|
||||||
reduce_common_words(d, 50)
|
reduce_common_words(d, 50)
|
||||||
eq_(1, len(d["common"]))
|
eq_(1, len(d["common"]))
|
||||||
@ -302,7 +305,7 @@ class TestCaseReduceCommonWords:
|
|||||||
def test_values_still_are_set_instances(self):
|
def test_values_still_are_set_instances(self):
|
||||||
d = {
|
d = {
|
||||||
"common": set([NamedObject("common uncommon", True) for _ in range(50)] + [NamedObject("common", True)]),
|
"common": set([NamedObject("common uncommon", True) for _ in range(50)] + [NamedObject("common", True)]),
|
||||||
"uncommon": set([NamedObject("common uncommon", True)]),
|
"uncommon": {NamedObject("common uncommon", True)},
|
||||||
}
|
}
|
||||||
reduce_common_words(d, 50)
|
reduce_common_words(d, 50)
|
||||||
assert isinstance(d["common"], set)
|
assert isinstance(d["common"], set)
|
||||||
@ -312,9 +315,9 @@ class TestCaseReduceCommonWords:
|
|||||||
# If a word has been removed by the reduce, an object in a subsequent common word that
|
# If a word has been removed by the reduce, an object in a subsequent common word that
|
||||||
# contains the word that has been removed would cause a KeyError.
|
# contains the word that has been removed would cause a KeyError.
|
||||||
d = {
|
d = {
|
||||||
"foo": set([NamedObject("foo bar baz", True) for _ in range(50)]),
|
"foo": {NamedObject("foo bar baz", True) for _ in range(50)},
|
||||||
"bar": set([NamedObject("foo bar baz", True) for _ in range(50)]),
|
"bar": {NamedObject("foo bar baz", True) for _ in range(50)},
|
||||||
"baz": set([NamedObject("foo bar baz", True) for _ in range(49)]),
|
"baz": {NamedObject("foo bar baz", True) for _ in range(49)},
|
||||||
}
|
}
|
||||||
try:
|
try:
|
||||||
reduce_common_words(d, 50)
|
reduce_common_words(d, 50)
|
||||||
@ -328,7 +331,7 @@ class TestCaseReduceCommonWords:
|
|||||||
o.words = [["foo", "bar"], ["baz"]]
|
o.words = [["foo", "bar"], ["baz"]]
|
||||||
return o
|
return o
|
||||||
|
|
||||||
d = {"foo": set([create_it() for _ in range(50)])}
|
d = {"foo": {create_it() for _ in range(50)}}
|
||||||
try:
|
try:
|
||||||
reduce_common_words(d, 50)
|
reduce_common_words(d, 50)
|
||||||
except TypeError:
|
except TypeError:
|
||||||
@ -343,7 +346,7 @@ class TestCaseReduceCommonWords:
|
|||||||
d = {
|
d = {
|
||||||
"foo": set([NamedObject("foo bar baz", True) for _ in range(49)] + [only_common]),
|
"foo": set([NamedObject("foo bar baz", True) for _ in range(49)] + [only_common]),
|
||||||
"bar": set([NamedObject("foo bar baz", True) for _ in range(49)] + [only_common]),
|
"bar": set([NamedObject("foo bar baz", True) for _ in range(49)] + [only_common]),
|
||||||
"baz": set([NamedObject("foo bar baz", True) for _ in range(49)]),
|
"baz": {NamedObject("foo bar baz", True) for _ in range(49)},
|
||||||
}
|
}
|
||||||
reduce_common_words(d, 50)
|
reduce_common_words(d, 50)
|
||||||
eq_(1, len(d["foo"]))
|
eq_(1, len(d["foo"]))
|
||||||
@ -530,7 +533,7 @@ class TestCaseGetMatches:
|
|||||||
|
|
||||||
|
|
||||||
class TestCaseGetMatchesByContents:
|
class TestCaseGetMatchesByContents:
|
||||||
def test_big_file_partial_hashes(self):
|
def test_big_file_partial_hashing(self):
|
||||||
smallsize = 1
|
smallsize = 1
|
||||||
bigsize = 100 * 1024 * 1024 # 100MB
|
bigsize = 100 * 1024 * 1024 # 100MB
|
||||||
f = [
|
f = [
|
||||||
@ -539,17 +542,17 @@ class TestCaseGetMatchesByContents:
|
|||||||
no("smallfoo", size=smallsize),
|
no("smallfoo", size=smallsize),
|
||||||
no("smallbar", size=smallsize),
|
no("smallbar", size=smallsize),
|
||||||
]
|
]
|
||||||
f[0].md5 = f[0].md5partial = f[0].md5samples = "foobar"
|
f[0].digest = f[0].digest_partial = f[0].digest_samples = "foobar"
|
||||||
f[1].md5 = f[1].md5partial = f[1].md5samples = "foobar"
|
f[1].digest = f[1].digest_partial = f[1].digest_samples = "foobar"
|
||||||
f[2].md5 = f[2].md5partial = "bleh"
|
f[2].digest = f[2].digest_partial = "bleh"
|
||||||
f[3].md5 = f[3].md5partial = "bleh"
|
f[3].digest = f[3].digest_partial = "bleh"
|
||||||
r = getmatches_by_contents(f, bigsize=bigsize)
|
r = getmatches_by_contents(f, bigsize=bigsize)
|
||||||
eq_(len(r), 2)
|
eq_(len(r), 2)
|
||||||
# User disabled optimization for big files, compute hashes as usual
|
# User disabled optimization for big files, compute digests as usual
|
||||||
r = getmatches_by_contents(f, bigsize=0)
|
r = getmatches_by_contents(f, bigsize=0)
|
||||||
eq_(len(r), 2)
|
eq_(len(r), 2)
|
||||||
# Other file is now slightly different, md5partial is still the same
|
# Other file is now slightly different, digest_partial is still the same
|
||||||
f[1].md5 = f[1].md5samples = "foobardiff"
|
f[1].digest = f[1].digest_samples = "foobardiff"
|
||||||
r = getmatches_by_contents(f, bigsize=bigsize)
|
r = getmatches_by_contents(f, bigsize=bigsize)
|
||||||
# Successfully filter it out
|
# Successfully filter it out
|
||||||
eq_(len(r), 1)
|
eq_(len(r), 1)
|
||||||
@ -884,7 +887,7 @@ class TestCaseGetGroups:
|
|||||||
# If, with a (A, B, C, D) set, all match with A, but C and D don't match with B and that the
|
# If, with a (A, B, C, D) set, all match with A, but C and D don't match with B and that the
|
||||||
# (A, B) match is the highest (thus resulting in an (A, B) group), still match C and D
|
# (A, B) match is the highest (thus resulting in an (A, B) group), still match C and D
|
||||||
# in a separate group instead of discarding them.
|
# in a separate group instead of discarding them.
|
||||||
A, B, C, D = [NamedObject() for _ in range(4)]
|
A, B, C, D = (NamedObject() for _ in range(4))
|
||||||
m1 = Match(A, B, 90) # This is the strongest "A" match
|
m1 = Match(A, B, 90) # This is the strongest "A" match
|
||||||
m2 = Match(A, C, 80) # Because C doesn't match with B, it won't be in the group
|
m2 = Match(A, C, 80) # Because C doesn't match with B, it won't be in the group
|
||||||
m3 = Match(A, D, 80) # Same thing for D
|
m3 = Match(A, D, 80) # Same thing for D
|
||||||
|
@ -10,8 +10,8 @@ from xml.etree import ElementTree as ET
|
|||||||
from hscommon.testutil import eq_
|
from hscommon.testutil import eq_
|
||||||
from hscommon.plat import ISWINDOWS
|
from hscommon.plat import ISWINDOWS
|
||||||
|
|
||||||
from .base import DupeGuru
|
from core.tests.base import DupeGuru
|
||||||
from ..exclude import ExcludeList, ExcludeDict, default_regexes, AlreadyThereException
|
from core.exclude import ExcludeList, ExcludeDict, default_regexes, AlreadyThereException
|
||||||
|
|
||||||
from re import error
|
from re import error
|
||||||
|
|
||||||
@ -289,8 +289,8 @@ class TestCaseListEmptyUnion(TestCaseListEmpty):
|
|||||||
compiled = [x for x in self.exclude_list.compiled]
|
compiled = [x for x in self.exclude_list.compiled]
|
||||||
assert regex not in compiled
|
assert regex not in compiled
|
||||||
# Need to escape both to get the same strings after compilation
|
# Need to escape both to get the same strings after compilation
|
||||||
compiled_escaped = set([x.encode("unicode-escape").decode() for x in compiled[0].pattern.split("|")])
|
compiled_escaped = {x.encode("unicode-escape").decode() for x in compiled[0].pattern.split("|")}
|
||||||
default_escaped = set([x.encode("unicode-escape").decode() for x in default_regexes])
|
default_escaped = {x.encode("unicode-escape").decode() for x in default_regexes}
|
||||||
assert compiled_escaped == default_escaped
|
assert compiled_escaped == default_escaped
|
||||||
eq_(len(default_regexes), len(compiled[0].pattern.split("|")))
|
eq_(len(default_regexes), len(compiled[0].pattern.split("|")))
|
||||||
|
|
||||||
@ -366,8 +366,8 @@ class TestCaseDictEmptyUnion(TestCaseDictEmpty):
|
|||||||
compiled = [x for x in self.exclude_list.compiled]
|
compiled = [x for x in self.exclude_list.compiled]
|
||||||
assert regex not in compiled
|
assert regex not in compiled
|
||||||
# Need to escape both to get the same strings after compilation
|
# Need to escape both to get the same strings after compilation
|
||||||
compiled_escaped = set([x.encode("unicode-escape").decode() for x in compiled[0].pattern.split("|")])
|
compiled_escaped = {x.encode("unicode-escape").decode() for x in compiled[0].pattern.split("|")}
|
||||||
default_escaped = set([x.encode("unicode-escape").decode() for x in default_regexes])
|
default_escaped = {x.encode("unicode-escape").decode() for x in default_regexes}
|
||||||
assert compiled_escaped == default_escaped
|
assert compiled_escaped == default_escaped
|
||||||
eq_(len(default_regexes), len(compiled[0].pattern.split("|")))
|
eq_(len(default_regexes), len(compiled[0].pattern.split("|")))
|
||||||
|
|
||||||
|
@ -6,43 +6,47 @@
|
|||||||
# which should be included with this package. The terms are also available at
|
# which should be included with this package. The terms are also available at
|
||||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||||
|
|
||||||
import hashlib
|
import typing
|
||||||
from os import urandom
|
from os import urandom
|
||||||
|
|
||||||
from hscommon.path import Path
|
from pathlib import Path
|
||||||
from hscommon.testutil import eq_
|
from hscommon.testutil import eq_
|
||||||
from core.tests.directories_test import create_fake_fs
|
from core.tests.directories_test import create_fake_fs
|
||||||
|
|
||||||
from .. import fs
|
from core import fs
|
||||||
|
|
||||||
|
hasher: typing.Callable
|
||||||
|
try:
|
||||||
|
import xxhash
|
||||||
|
|
||||||
|
hasher = xxhash.xxh128
|
||||||
|
except ImportError:
|
||||||
|
import hashlib
|
||||||
|
|
||||||
|
hasher = hashlib.md5
|
||||||
|
|
||||||
|
|
||||||
def create_fake_fs_with_random_data(rootpath):
|
def create_fake_fs_with_random_data(rootpath):
|
||||||
rootpath = rootpath["fs"]
|
rootpath = rootpath.joinpath("fs")
|
||||||
rootpath.mkdir()
|
rootpath.mkdir()
|
||||||
rootpath["dir1"].mkdir()
|
rootpath.joinpath("dir1").mkdir()
|
||||||
rootpath["dir2"].mkdir()
|
rootpath.joinpath("dir2").mkdir()
|
||||||
rootpath["dir3"].mkdir()
|
rootpath.joinpath("dir3").mkdir()
|
||||||
fp = rootpath["file1.test"].open("wb")
|
|
||||||
data1 = urandom(200 * 1024) # 200KiB
|
data1 = urandom(200 * 1024) # 200KiB
|
||||||
data2 = urandom(1024 * 1024) # 1MiB
|
data2 = urandom(1024 * 1024) # 1MiB
|
||||||
data3 = urandom(10 * 1024 * 1024) # 10MiB
|
data3 = urandom(10 * 1024 * 1024) # 10MiB
|
||||||
fp.write(data1)
|
with rootpath.joinpath("file1.test").open("wb") as fp:
|
||||||
fp.close()
|
fp.write(data1)
|
||||||
fp = rootpath["file2.test"].open("wb")
|
with rootpath.joinpath("file2.test").open("wb") as fp:
|
||||||
fp.write(data2)
|
fp.write(data2)
|
||||||
fp.close()
|
with rootpath.joinpath("file3.test").open("wb") as fp:
|
||||||
fp = rootpath["file3.test"].open("wb")
|
fp.write(data3)
|
||||||
fp.write(data3)
|
with rootpath.joinpath("dir1", "file1.test").open("wb") as fp:
|
||||||
fp.close()
|
fp.write(data1)
|
||||||
fp = rootpath["dir1"]["file1.test"].open("wb")
|
with rootpath.joinpath("dir2", "file2.test").open("wb") as fp:
|
||||||
fp.write(data1)
|
fp.write(data2)
|
||||||
fp.close()
|
with rootpath.joinpath("dir3", "file3.test").open("wb") as fp:
|
||||||
fp = rootpath["dir2"]["file2.test"].open("wb")
|
fp.write(data3)
|
||||||
fp.write(data2)
|
|
||||||
fp.close()
|
|
||||||
fp = rootpath["dir3"]["file3.test"].open("wb")
|
|
||||||
fp.write(data3)
|
|
||||||
fp.close()
|
|
||||||
return rootpath
|
return rootpath
|
||||||
|
|
||||||
|
|
||||||
@ -52,54 +56,54 @@ def test_size_aggregates_subfiles(tmpdir):
|
|||||||
eq_(b.size, 12)
|
eq_(b.size, 12)
|
||||||
|
|
||||||
|
|
||||||
def test_md5_aggregate_subfiles_sorted(tmpdir):
|
def test_digest_aggregate_subfiles_sorted(tmpdir):
|
||||||
# dir.allfiles can return child in any order. Thus, bundle.md5 must aggregate
|
# dir.allfiles can return child in any order. Thus, bundle.digest must aggregate
|
||||||
# all files' md5 it contains, but it must make sure that it does so in the
|
# all files' digests it contains, but it must make sure that it does so in the
|
||||||
# same order everytime.
|
# same order everytime.
|
||||||
p = create_fake_fs_with_random_data(Path(str(tmpdir)))
|
p = create_fake_fs_with_random_data(Path(str(tmpdir)))
|
||||||
b = fs.Folder(p)
|
b = fs.Folder(p)
|
||||||
md51 = fs.File(p["dir1"]["file1.test"]).md5
|
digest1 = fs.File(p.joinpath("dir1", "file1.test")).digest
|
||||||
md52 = fs.File(p["dir2"]["file2.test"]).md5
|
digest2 = fs.File(p.joinpath("dir2", "file2.test")).digest
|
||||||
md53 = fs.File(p["dir3"]["file3.test"]).md5
|
digest3 = fs.File(p.joinpath("dir3", "file3.test")).digest
|
||||||
md54 = fs.File(p["file1.test"]).md5
|
digest4 = fs.File(p.joinpath("file1.test")).digest
|
||||||
md55 = fs.File(p["file2.test"]).md5
|
digest5 = fs.File(p.joinpath("file2.test")).digest
|
||||||
md56 = fs.File(p["file3.test"]).md5
|
digest6 = fs.File(p.joinpath("file3.test")).digest
|
||||||
# The expected md5 is the md5 of md5s for folders and the direct md5 for files
|
# The expected digest is the hash of digests for folders and the direct digest for files
|
||||||
folder_md51 = hashlib.md5(md51).digest()
|
folder_digest1 = hasher(digest1).digest()
|
||||||
folder_md52 = hashlib.md5(md52).digest()
|
folder_digest2 = hasher(digest2).digest()
|
||||||
folder_md53 = hashlib.md5(md53).digest()
|
folder_digest3 = hasher(digest3).digest()
|
||||||
md5 = hashlib.md5(folder_md51 + folder_md52 + folder_md53 + md54 + md55 + md56)
|
digest = hasher(folder_digest1 + folder_digest2 + folder_digest3 + digest4 + digest5 + digest6).digest()
|
||||||
eq_(b.md5, md5.digest())
|
eq_(b.digest, digest)
|
||||||
|
|
||||||
|
|
||||||
def test_partial_md5_aggregate_subfile_sorted(tmpdir):
|
def test_partial_digest_aggregate_subfile_sorted(tmpdir):
|
||||||
p = create_fake_fs_with_random_data(Path(str(tmpdir)))
|
p = create_fake_fs_with_random_data(Path(str(tmpdir)))
|
||||||
b = fs.Folder(p)
|
b = fs.Folder(p)
|
||||||
md51 = fs.File(p["dir1"]["file1.test"]).md5partial
|
digest1 = fs.File(p.joinpath("dir1", "file1.test")).digest_partial
|
||||||
md52 = fs.File(p["dir2"]["file2.test"]).md5partial
|
digest2 = fs.File(p.joinpath("dir2", "file2.test")).digest_partial
|
||||||
md53 = fs.File(p["dir3"]["file3.test"]).md5partial
|
digest3 = fs.File(p.joinpath("dir3", "file3.test")).digest_partial
|
||||||
md54 = fs.File(p["file1.test"]).md5partial
|
digest4 = fs.File(p.joinpath("file1.test")).digest_partial
|
||||||
md55 = fs.File(p["file2.test"]).md5partial
|
digest5 = fs.File(p.joinpath("file2.test")).digest_partial
|
||||||
md56 = fs.File(p["file3.test"]).md5partial
|
digest6 = fs.File(p.joinpath("file3.test")).digest_partial
|
||||||
# The expected md5 is the md5 of md5s for folders and the direct md5 for files
|
# The expected digest is the hash of digests for folders and the direct digest for files
|
||||||
folder_md51 = hashlib.md5(md51).digest()
|
folder_digest1 = hasher(digest1).digest()
|
||||||
folder_md52 = hashlib.md5(md52).digest()
|
folder_digest2 = hasher(digest2).digest()
|
||||||
folder_md53 = hashlib.md5(md53).digest()
|
folder_digest3 = hasher(digest3).digest()
|
||||||
md5 = hashlib.md5(folder_md51 + folder_md52 + folder_md53 + md54 + md55 + md56)
|
digest = hasher(folder_digest1 + folder_digest2 + folder_digest3 + digest4 + digest5 + digest6).digest()
|
||||||
eq_(b.md5partial, md5.digest())
|
eq_(b.digest_partial, digest)
|
||||||
|
|
||||||
md51 = fs.File(p["dir1"]["file1.test"]).md5samples
|
digest1 = fs.File(p.joinpath("dir1", "file1.test")).digest_samples
|
||||||
md52 = fs.File(p["dir2"]["file2.test"]).md5samples
|
digest2 = fs.File(p.joinpath("dir2", "file2.test")).digest_samples
|
||||||
md53 = fs.File(p["dir3"]["file3.test"]).md5samples
|
digest3 = fs.File(p.joinpath("dir3", "file3.test")).digest_samples
|
||||||
md54 = fs.File(p["file1.test"]).md5samples
|
digest4 = fs.File(p.joinpath("file1.test")).digest_samples
|
||||||
md55 = fs.File(p["file2.test"]).md5samples
|
digest5 = fs.File(p.joinpath("file2.test")).digest_samples
|
||||||
md56 = fs.File(p["file3.test"]).md5samples
|
digest6 = fs.File(p.joinpath("file3.test")).digest_samples
|
||||||
# The expected md5 is the md5 of md5s for folders and the direct md5 for files
|
# The expected digest is the digest of digests for folders and the direct digest for files
|
||||||
folder_md51 = hashlib.md5(md51).digest()
|
folder_digest1 = hasher(digest1).digest()
|
||||||
folder_md52 = hashlib.md5(md52).digest()
|
folder_digest2 = hasher(digest2).digest()
|
||||||
folder_md53 = hashlib.md5(md53).digest()
|
folder_digest3 = hasher(digest3).digest()
|
||||||
md5 = hashlib.md5(folder_md51 + folder_md52 + folder_md53 + md54 + md55 + md56)
|
digest = hasher(folder_digest1 + folder_digest2 + folder_digest3 + digest4 + digest5 + digest6).digest()
|
||||||
eq_(b.md5samples, md5.digest())
|
eq_(b.digest_samples, digest)
|
||||||
|
|
||||||
|
|
||||||
def test_has_file_attrs(tmpdir):
|
def test_has_file_attrs(tmpdir):
|
||||||
|
@ -10,7 +10,7 @@ from xml.etree import ElementTree as ET
|
|||||||
from pytest import raises
|
from pytest import raises
|
||||||
from hscommon.testutil import eq_
|
from hscommon.testutil import eq_
|
||||||
|
|
||||||
from ..ignore import IgnoreList
|
from core.ignore import IgnoreList
|
||||||
|
|
||||||
|
|
||||||
def test_empty():
|
def test_empty():
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
from hscommon.testutil import eq_
|
from hscommon.testutil import eq_
|
||||||
|
|
||||||
from ..markable import MarkableList, Markable
|
from core.markable import MarkableList, Markable
|
||||||
|
|
||||||
|
|
||||||
def gen():
|
def gen():
|
||||||
|
@ -9,8 +9,8 @@
|
|||||||
import os.path as op
|
import os.path as op
|
||||||
from itertools import combinations
|
from itertools import combinations
|
||||||
|
|
||||||
from .base import TestApp, NamedObject, with_app, eq_
|
from core.tests.base import TestApp, NamedObject, with_app, eq_
|
||||||
from ..engine import Group, Match
|
from core.engine import Group, Match
|
||||||
|
|
||||||
no = NamedObject
|
no = NamedObject
|
||||||
|
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
# which should be included with this package. The terms are also available at
|
# which should be included with this package. The terms are also available at
|
||||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||||
|
|
||||||
from .base import TestApp, GetTestGroups
|
from core.tests.base import TestApp, GetTestGroups
|
||||||
|
|
||||||
|
|
||||||
def app_with_results():
|
def app_with_results():
|
||||||
|
@ -12,10 +12,9 @@ from xml.etree import ElementTree as ET
|
|||||||
from pytest import raises
|
from pytest import raises
|
||||||
from hscommon.testutil import eq_
|
from hscommon.testutil import eq_
|
||||||
from hscommon.util import first
|
from hscommon.util import first
|
||||||
|
from core import engine
|
||||||
from .. import engine
|
from core.tests.base import NamedObject, GetTestGroups, DupeGuru
|
||||||
from .base import NamedObject, GetTestGroups, DupeGuru
|
from core.results import Results
|
||||||
from ..results import Results
|
|
||||||
|
|
||||||
|
|
||||||
class TestCaseResultsEmpty:
|
class TestCaseResultsEmpty:
|
||||||
@ -337,7 +336,7 @@ class TestCaseResultsMarkings:
|
|||||||
def log_object(o):
|
def log_object(o):
|
||||||
log.append(o)
|
log.append(o)
|
||||||
if o is self.objects[1]:
|
if o is self.objects[1]:
|
||||||
raise EnvironmentError("foobar")
|
raise OSError("foobar")
|
||||||
|
|
||||||
log = []
|
log = []
|
||||||
self.results.mark_all()
|
self.results.mark_all()
|
||||||
@ -447,7 +446,7 @@ class TestCaseResultsXML:
|
|||||||
self.results.groups = self.groups
|
self.results.groups = self.groups
|
||||||
|
|
||||||
def get_file(self, path): # use this as a callback for load_from_xml
|
def get_file(self, path): # use this as a callback for load_from_xml
|
||||||
return [o for o in self.objects if o.path == path][0]
|
return [o for o in self.objects if str(o.path) == path][0]
|
||||||
|
|
||||||
def test_save_to_xml(self):
|
def test_save_to_xml(self):
|
||||||
self.objects[0].is_ref = True
|
self.objects[0].is_ref = True
|
||||||
@ -464,7 +463,7 @@ class TestCaseResultsXML:
|
|||||||
eq_(6, len(g1))
|
eq_(6, len(g1))
|
||||||
eq_(3, len([c for c in g1 if c.tag == "file"]))
|
eq_(3, len([c for c in g1 if c.tag == "file"]))
|
||||||
eq_(3, len([c for c in g1 if c.tag == "match"]))
|
eq_(3, len([c for c in g1 if c.tag == "match"]))
|
||||||
d1, d2, d3 = [c for c in g1 if c.tag == "file"]
|
d1, d2, d3 = (c for c in g1 if c.tag == "file")
|
||||||
eq_(op.join("basepath", "foo bar"), d1.get("path"))
|
eq_(op.join("basepath", "foo bar"), d1.get("path"))
|
||||||
eq_(op.join("basepath", "bar bleh"), d2.get("path"))
|
eq_(op.join("basepath", "bar bleh"), d2.get("path"))
|
||||||
eq_(op.join("basepath", "foo bleh"), d3.get("path"))
|
eq_(op.join("basepath", "foo bleh"), d3.get("path"))
|
||||||
@ -477,7 +476,7 @@ class TestCaseResultsXML:
|
|||||||
eq_(3, len(g2))
|
eq_(3, len(g2))
|
||||||
eq_(2, len([c for c in g2 if c.tag == "file"]))
|
eq_(2, len([c for c in g2 if c.tag == "file"]))
|
||||||
eq_(1, len([c for c in g2 if c.tag == "match"]))
|
eq_(1, len([c for c in g2 if c.tag == "match"]))
|
||||||
d1, d2 = [c for c in g2 if c.tag == "file"]
|
d1, d2 = (c for c in g2 if c.tag == "file")
|
||||||
eq_(op.join("basepath", "ibabtu"), d1.get("path"))
|
eq_(op.join("basepath", "ibabtu"), d1.get("path"))
|
||||||
eq_(op.join("basepath", "ibabtu"), d2.get("path"))
|
eq_(op.join("basepath", "ibabtu"), d2.get("path"))
|
||||||
eq_("n", d1.get("is_ref"))
|
eq_("n", d1.get("is_ref"))
|
||||||
|
@ -7,29 +7,33 @@
|
|||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from hscommon.jobprogress import job
|
from hscommon.jobprogress import job
|
||||||
from hscommon.path import Path
|
from pathlib import Path
|
||||||
from hscommon.testutil import eq_
|
from hscommon.testutil import eq_
|
||||||
|
|
||||||
from .. import fs
|
from core import fs
|
||||||
from ..engine import getwords, Match
|
from core.engine import getwords, Match
|
||||||
from ..ignore import IgnoreList
|
from core.ignore import IgnoreList
|
||||||
from ..scanner import Scanner, ScanType
|
from core.scanner import Scanner, ScanType
|
||||||
from ..me.scanner import ScannerME
|
from core.me.scanner import ScannerME
|
||||||
|
|
||||||
|
|
||||||
|
# TODO update this to be able to inherit from fs.File
|
||||||
class NamedObject:
|
class NamedObject:
|
||||||
def __init__(self, name="foobar", size=1, path=None):
|
def __init__(self, name="foobar", size=1, path=None):
|
||||||
if path is None:
|
if path is None:
|
||||||
path = Path(name)
|
path = Path(name)
|
||||||
else:
|
else:
|
||||||
path = Path(path)[name]
|
path = Path(path, name)
|
||||||
self.name = name
|
self.name = name
|
||||||
self.size = size
|
self.size = size
|
||||||
self.path = path
|
self.path = path
|
||||||
self.words = getwords(name)
|
self.words = getwords(name)
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return "<NamedObject %r %r>" % (self.name, self.path)
|
return "<NamedObject {!r} {!r}>".format(self.name, self.path)
|
||||||
|
|
||||||
|
def exists(self):
|
||||||
|
return self.path.exists()
|
||||||
|
|
||||||
|
|
||||||
no = NamedObject
|
no = NamedObject
|
||||||
@ -123,19 +127,19 @@ def test_content_scan(fake_fileexists):
|
|||||||
s = Scanner()
|
s = Scanner()
|
||||||
s.scan_type = ScanType.CONTENTS
|
s.scan_type = ScanType.CONTENTS
|
||||||
f = [no("foo"), no("bar"), no("bleh")]
|
f = [no("foo"), no("bar"), no("bleh")]
|
||||||
f[0].md5 = f[0].md5partial = f[0].md5samples = "foobar"
|
f[0].digest = f[0].digest_partial = f[0].digest_samples = "foobar"
|
||||||
f[1].md5 = f[1].md5partial = f[1].md5samples = "foobar"
|
f[1].digest = f[1].digest_partial = f[1].digest_samples = "foobar"
|
||||||
f[2].md5 = f[2].md5partial = f[1].md5samples = "bleh"
|
f[2].digest = f[2].digest_partial = f[1].digest_samples = "bleh"
|
||||||
r = s.get_dupe_groups(f)
|
r = s.get_dupe_groups(f)
|
||||||
eq_(len(r), 1)
|
eq_(len(r), 1)
|
||||||
eq_(len(r[0]), 2)
|
eq_(len(r[0]), 2)
|
||||||
eq_(s.discarded_file_count, 0) # don't count the different md5 as discarded!
|
eq_(s.discarded_file_count, 0) # don't count the different digest as discarded!
|
||||||
|
|
||||||
|
|
||||||
def test_content_scan_compare_sizes_first(fake_fileexists):
|
def test_content_scan_compare_sizes_first(fake_fileexists):
|
||||||
class MyFile(no):
|
class MyFile(no):
|
||||||
@property
|
@property
|
||||||
def md5(self):
|
def digest(self):
|
||||||
raise AssertionError()
|
raise AssertionError()
|
||||||
|
|
||||||
s = Scanner()
|
s = Scanner()
|
||||||
@ -161,14 +165,14 @@ def test_ignore_file_size(fake_fileexists):
|
|||||||
no("largeignore1", large_size + 1),
|
no("largeignore1", large_size + 1),
|
||||||
no("largeignore2", large_size + 1),
|
no("largeignore2", large_size + 1),
|
||||||
]
|
]
|
||||||
f[0].md5 = f[0].md5partial = f[0].md5samples = "smallignore"
|
f[0].digest = f[0].digest_partial = f[0].digest_samples = "smallignore"
|
||||||
f[1].md5 = f[1].md5partial = f[1].md5samples = "smallignore"
|
f[1].digest = f[1].digest_partial = f[1].digest_samples = "smallignore"
|
||||||
f[2].md5 = f[2].md5partial = f[2].md5samples = "small"
|
f[2].digest = f[2].digest_partial = f[2].digest_samples = "small"
|
||||||
f[3].md5 = f[3].md5partial = f[3].md5samples = "small"
|
f[3].digest = f[3].digest_partial = f[3].digest_samples = "small"
|
||||||
f[4].md5 = f[4].md5partial = f[4].md5samples = "large"
|
f[4].digest = f[4].digest_partial = f[4].digest_samples = "large"
|
||||||
f[5].md5 = f[5].md5partial = f[5].md5samples = "large"
|
f[5].digest = f[5].digest_partial = f[5].digest_samples = "large"
|
||||||
f[6].md5 = f[6].md5partial = f[6].md5samples = "largeignore"
|
f[6].digest = f[6].digest_partial = f[6].digest_samples = "largeignore"
|
||||||
f[7].md5 = f[7].md5partial = f[7].md5samples = "largeignore"
|
f[7].digest = f[7].digest_partial = f[7].digest_samples = "largeignore"
|
||||||
|
|
||||||
r = s.get_dupe_groups(f)
|
r = s.get_dupe_groups(f)
|
||||||
# No ignores
|
# No ignores
|
||||||
@ -197,21 +201,21 @@ def test_big_file_partial_hashes(fake_fileexists):
|
|||||||
s.big_file_size_threshold = bigsize
|
s.big_file_size_threshold = bigsize
|
||||||
|
|
||||||
f = [no("bigfoo", bigsize), no("bigbar", bigsize), no("smallfoo", smallsize), no("smallbar", smallsize)]
|
f = [no("bigfoo", bigsize), no("bigbar", bigsize), no("smallfoo", smallsize), no("smallbar", smallsize)]
|
||||||
f[0].md5 = f[0].md5partial = f[0].md5samples = "foobar"
|
f[0].digest = f[0].digest_partial = f[0].digest_samples = "foobar"
|
||||||
f[1].md5 = f[1].md5partial = f[1].md5samples = "foobar"
|
f[1].digest = f[1].digest_partial = f[1].digest_samples = "foobar"
|
||||||
f[2].md5 = f[2].md5partial = "bleh"
|
f[2].digest = f[2].digest_partial = "bleh"
|
||||||
f[3].md5 = f[3].md5partial = "bleh"
|
f[3].digest = f[3].digest_partial = "bleh"
|
||||||
r = s.get_dupe_groups(f)
|
r = s.get_dupe_groups(f)
|
||||||
eq_(len(r), 2)
|
eq_(len(r), 2)
|
||||||
|
|
||||||
# md5partial is still the same, but the file is actually different
|
# digest_partial is still the same, but the file is actually different
|
||||||
f[1].md5 = f[1].md5samples = "difffoobar"
|
f[1].digest = f[1].digest_samples = "difffoobar"
|
||||||
# here we compare the full md5s, as the user disabled the optimization
|
# here we compare the full digests, as the user disabled the optimization
|
||||||
s.big_file_size_threshold = 0
|
s.big_file_size_threshold = 0
|
||||||
r = s.get_dupe_groups(f)
|
r = s.get_dupe_groups(f)
|
||||||
eq_(len(r), 1)
|
eq_(len(r), 1)
|
||||||
|
|
||||||
# here we should compare the md5samples, and see they are different
|
# here we should compare the digest_samples, and see they are different
|
||||||
s.big_file_size_threshold = bigsize
|
s.big_file_size_threshold = bigsize
|
||||||
r = s.get_dupe_groups(f)
|
r = s.get_dupe_groups(f)
|
||||||
eq_(len(r), 1)
|
eq_(len(r), 1)
|
||||||
@ -221,9 +225,9 @@ def test_min_match_perc_doesnt_matter_for_content_scan(fake_fileexists):
|
|||||||
s = Scanner()
|
s = Scanner()
|
||||||
s.scan_type = ScanType.CONTENTS
|
s.scan_type = ScanType.CONTENTS
|
||||||
f = [no("foo"), no("bar"), no("bleh")]
|
f = [no("foo"), no("bar"), no("bleh")]
|
||||||
f[0].md5 = f[0].md5partial = f[0].md5samples = "foobar"
|
f[0].digest = f[0].digest_partial = f[0].digest_samples = "foobar"
|
||||||
f[1].md5 = f[1].md5partial = f[1].md5samples = "foobar"
|
f[1].digest = f[1].digest_partial = f[1].digest_samples = "foobar"
|
||||||
f[2].md5 = f[2].md5partial = f[2].md5samples = "bleh"
|
f[2].digest = f[2].digest_partial = f[2].digest_samples = "bleh"
|
||||||
s.min_match_percentage = 101
|
s.min_match_percentage = 101
|
||||||
r = s.get_dupe_groups(f)
|
r = s.get_dupe_groups(f)
|
||||||
eq_(len(r), 1)
|
eq_(len(r), 1)
|
||||||
@ -234,12 +238,16 @@ def test_min_match_perc_doesnt_matter_for_content_scan(fake_fileexists):
|
|||||||
eq_(len(r[0]), 2)
|
eq_(len(r[0]), 2)
|
||||||
|
|
||||||
|
|
||||||
def test_content_scan_doesnt_put_md5_in_words_at_the_end(fake_fileexists):
|
def test_content_scan_doesnt_put_digest_in_words_at_the_end(fake_fileexists):
|
||||||
s = Scanner()
|
s = Scanner()
|
||||||
s.scan_type = ScanType.CONTENTS
|
s.scan_type = ScanType.CONTENTS
|
||||||
f = [no("foo"), no("bar")]
|
f = [no("foo"), no("bar")]
|
||||||
f[0].md5 = f[0].md5partial = f[0].md5samples = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
|
f[0].digest = f[0].digest_partial = f[0].digest_samples = (
|
||||||
f[1].md5 = f[1].md5partial = f[1].md5samples = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
|
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
|
||||||
|
)
|
||||||
|
f[1].digest = f[1].digest_partial = f[1].digest_samples = (
|
||||||
|
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
|
||||||
|
)
|
||||||
r = s.get_dupe_groups(f)
|
r = s.get_dupe_groups(f)
|
||||||
# FIXME looks like we are missing something here?
|
# FIXME looks like we are missing something here?
|
||||||
r[0]
|
r[0]
|
||||||
@ -332,7 +340,7 @@ def test_tag_scan(fake_fileexists):
|
|||||||
def test_tag_with_album_scan(fake_fileexists):
|
def test_tag_with_album_scan(fake_fileexists):
|
||||||
s = Scanner()
|
s = Scanner()
|
||||||
s.scan_type = ScanType.TAG
|
s.scan_type = ScanType.TAG
|
||||||
s.scanned_tags = set(["artist", "album", "title"])
|
s.scanned_tags = {"artist", "album", "title"}
|
||||||
o1 = no("foo")
|
o1 = no("foo")
|
||||||
o2 = no("bar")
|
o2 = no("bar")
|
||||||
o3 = no("bleh")
|
o3 = no("bleh")
|
||||||
@ -352,7 +360,7 @@ def test_tag_with_album_scan(fake_fileexists):
|
|||||||
def test_that_dash_in_tags_dont_create_new_fields(fake_fileexists):
|
def test_that_dash_in_tags_dont_create_new_fields(fake_fileexists):
|
||||||
s = Scanner()
|
s = Scanner()
|
||||||
s.scan_type = ScanType.TAG
|
s.scan_type = ScanType.TAG
|
||||||
s.scanned_tags = set(["artist", "album", "title"])
|
s.scanned_tags = {"artist", "album", "title"}
|
||||||
s.min_match_percentage = 50
|
s.min_match_percentage = 50
|
||||||
o1 = no("foo")
|
o1 = no("foo")
|
||||||
o2 = no("bar")
|
o2 = no("bar")
|
||||||
@ -369,7 +377,7 @@ def test_that_dash_in_tags_dont_create_new_fields(fake_fileexists):
|
|||||||
def test_tag_scan_with_different_scanned(fake_fileexists):
|
def test_tag_scan_with_different_scanned(fake_fileexists):
|
||||||
s = Scanner()
|
s = Scanner()
|
||||||
s.scan_type = ScanType.TAG
|
s.scan_type = ScanType.TAG
|
||||||
s.scanned_tags = set(["track", "year"])
|
s.scanned_tags = {"track", "year"}
|
||||||
o1 = no("foo")
|
o1 = no("foo")
|
||||||
o2 = no("bar")
|
o2 = no("bar")
|
||||||
o1.artist = "The White Stripes"
|
o1.artist = "The White Stripes"
|
||||||
@ -387,7 +395,7 @@ def test_tag_scan_with_different_scanned(fake_fileexists):
|
|||||||
def test_tag_scan_only_scans_existing_tags(fake_fileexists):
|
def test_tag_scan_only_scans_existing_tags(fake_fileexists):
|
||||||
s = Scanner()
|
s = Scanner()
|
||||||
s.scan_type = ScanType.TAG
|
s.scan_type = ScanType.TAG
|
||||||
s.scanned_tags = set(["artist", "foo"])
|
s.scanned_tags = {"artist", "foo"}
|
||||||
o1 = no("foo")
|
o1 = no("foo")
|
||||||
o2 = no("bar")
|
o2 = no("bar")
|
||||||
o1.artist = "The White Stripes"
|
o1.artist = "The White Stripes"
|
||||||
@ -401,7 +409,7 @@ def test_tag_scan_only_scans_existing_tags(fake_fileexists):
|
|||||||
def test_tag_scan_converts_to_str(fake_fileexists):
|
def test_tag_scan_converts_to_str(fake_fileexists):
|
||||||
s = Scanner()
|
s = Scanner()
|
||||||
s.scan_type = ScanType.TAG
|
s.scan_type = ScanType.TAG
|
||||||
s.scanned_tags = set(["track"])
|
s.scanned_tags = {"track"}
|
||||||
o1 = no("foo")
|
o1 = no("foo")
|
||||||
o2 = no("bar")
|
o2 = no("bar")
|
||||||
o1.track = 42
|
o1.track = 42
|
||||||
@ -416,7 +424,7 @@ def test_tag_scan_converts_to_str(fake_fileexists):
|
|||||||
def test_tag_scan_non_ascii(fake_fileexists):
|
def test_tag_scan_non_ascii(fake_fileexists):
|
||||||
s = Scanner()
|
s = Scanner()
|
||||||
s.scan_type = ScanType.TAG
|
s.scan_type = ScanType.TAG
|
||||||
s.scanned_tags = set(["title"])
|
s.scanned_tags = {"title"}
|
||||||
o1 = no("foo")
|
o1 = no("foo")
|
||||||
o2 = no("bar")
|
o2 = no("bar")
|
||||||
o1.title = "foobar\u00e9"
|
o1.title = "foobar\u00e9"
|
||||||
@ -568,12 +576,14 @@ def test_dont_group_files_that_dont_exist(tmpdir):
|
|||||||
s = Scanner()
|
s = Scanner()
|
||||||
s.scan_type = ScanType.CONTENTS
|
s.scan_type = ScanType.CONTENTS
|
||||||
p = Path(str(tmpdir))
|
p = Path(str(tmpdir))
|
||||||
p["file1"].open("w").write("foo")
|
with p.joinpath("file1").open("w") as fp:
|
||||||
p["file2"].open("w").write("foo")
|
fp.write("foo")
|
||||||
|
with p.joinpath("file2").open("w") as fp:
|
||||||
|
fp.write("foo")
|
||||||
file1, file2 = fs.get_files(p)
|
file1, file2 = fs.get_files(p)
|
||||||
|
|
||||||
def getmatches(*args, **kw):
|
def getmatches(*args, **kw):
|
||||||
file2.path.remove()
|
file2.path.unlink()
|
||||||
return [Match(file1, file2, 100)]
|
return [Match(file1, file2, 100)]
|
||||||
|
|
||||||
s._getmatches = getmatches
|
s._getmatches = getmatches
|
||||||
@ -587,21 +597,21 @@ def test_folder_scan_exclude_subfolder_matches(fake_fileexists):
|
|||||||
s = Scanner()
|
s = Scanner()
|
||||||
s.scan_type = ScanType.FOLDERS
|
s.scan_type = ScanType.FOLDERS
|
||||||
topf1 = no("top folder 1", size=42)
|
topf1 = no("top folder 1", size=42)
|
||||||
topf1.md5 = topf1.md5partial = topf1.md5samples = b"some_md5_1"
|
topf1.digest = topf1.digest_partial = topf1.digest_samples = b"some_digest__1"
|
||||||
topf1.path = Path("/topf1")
|
topf1.path = Path("/topf1")
|
||||||
topf2 = no("top folder 2", size=42)
|
topf2 = no("top folder 2", size=42)
|
||||||
topf2.md5 = topf2.md5partial = topf2.md5samples = b"some_md5_1"
|
topf2.digest = topf2.digest_partial = topf2.digest_samples = b"some_digest__1"
|
||||||
topf2.path = Path("/topf2")
|
topf2.path = Path("/topf2")
|
||||||
subf1 = no("sub folder 1", size=41)
|
subf1 = no("sub folder 1", size=41)
|
||||||
subf1.md5 = subf1.md5partial = subf1.md5samples = b"some_md5_2"
|
subf1.digest = subf1.digest_partial = subf1.digest_samples = b"some_digest__2"
|
||||||
subf1.path = Path("/topf1/sub")
|
subf1.path = Path("/topf1/sub")
|
||||||
subf2 = no("sub folder 2", size=41)
|
subf2 = no("sub folder 2", size=41)
|
||||||
subf2.md5 = subf2.md5partial = subf2.md5samples = b"some_md5_2"
|
subf2.digest = subf2.digest_partial = subf2.digest_samples = b"some_digest__2"
|
||||||
subf2.path = Path("/topf2/sub")
|
subf2.path = Path("/topf2/sub")
|
||||||
eq_(len(s.get_dupe_groups([topf1, topf2, subf1, subf2])), 1) # only top folders
|
eq_(len(s.get_dupe_groups([topf1, topf2, subf1, subf2])), 1) # only top folders
|
||||||
# however, if another folder matches a subfolder, keep in in the matches
|
# however, if another folder matches a subfolder, keep in in the matches
|
||||||
otherf = no("other folder", size=41)
|
otherf = no("other folder", size=41)
|
||||||
otherf.md5 = otherf.md5partial = otherf.md5samples = b"some_md5_2"
|
otherf.digest = otherf.digest_partial = otherf.digest_samples = b"some_digest__2"
|
||||||
otherf.path = Path("/otherfolder")
|
otherf.path = Path("/otherfolder")
|
||||||
eq_(len(s.get_dupe_groups([topf1, topf2, subf1, subf2, otherf])), 2)
|
eq_(len(s.get_dupe_groups([topf1, topf2, subf1, subf2, otherf])), 2)
|
||||||
|
|
||||||
@ -624,9 +634,9 @@ def test_dont_count_ref_files_as_discarded(fake_fileexists):
|
|||||||
o1 = no("foo", path="p1")
|
o1 = no("foo", path="p1")
|
||||||
o2 = no("foo", path="p2")
|
o2 = no("foo", path="p2")
|
||||||
o3 = no("foo", path="p3")
|
o3 = no("foo", path="p3")
|
||||||
o1.md5 = o1.md5partial = o1.md5samples = "foobar"
|
o1.digest = o1.digest_partial = o1.digest_samples = "foobar"
|
||||||
o2.md5 = o2.md5partial = o2.md5samples = "foobar"
|
o2.digest = o2.digest_partial = o2.digest_samples = "foobar"
|
||||||
o3.md5 = o3.md5partial = o3.md5samples = "foobar"
|
o3.digest = o3.digest_partial = o3.digest_samples = "foobar"
|
||||||
o1.is_ref = True
|
o1.is_ref = True
|
||||||
o2.is_ref = True
|
o2.is_ref = True
|
||||||
eq_(len(s.get_dupe_groups([o1, o2, o3])), 1)
|
eq_(len(s.get_dupe_groups([o1, o2, o3])), 1)
|
||||||
|
37
core/util.py
37
core/util.py
@ -7,6 +7,12 @@
|
|||||||
import time
|
import time
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
|
import urllib.request
|
||||||
|
import urllib.error
|
||||||
|
import json
|
||||||
|
import semantic_version
|
||||||
|
import logging
|
||||||
|
from typing import Union
|
||||||
|
|
||||||
from hscommon.util import format_time_decimal
|
from hscommon.util import format_time_decimal
|
||||||
|
|
||||||
@ -64,3 +70,34 @@ def fix_surrogate_encoding(s, encoding="utf-8"):
|
|||||||
|
|
||||||
def executable_folder():
|
def executable_folder():
|
||||||
return os.path.dirname(os.path.abspath(sys.argv[0]))
|
return os.path.dirname(os.path.abspath(sys.argv[0]))
|
||||||
|
|
||||||
|
|
||||||
|
def check_for_update(current_version: str, include_prerelease: bool = False) -> Union[None, dict]:
|
||||||
|
request = urllib.request.Request(
|
||||||
|
"https://api.github.com/repos/arsenetar/dupeguru/releases",
|
||||||
|
headers={"Accept": "application/vnd.github.v3+json"},
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(request) as response:
|
||||||
|
if response.status != 200:
|
||||||
|
logging.warn(f"Error retriving updates. Status: {response.status}")
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
response_json = json.loads(response.read())
|
||||||
|
except json.JSONDecodeError as ex:
|
||||||
|
logging.warn(f"Error parsing updates. {ex.msg}")
|
||||||
|
return None
|
||||||
|
except urllib.error.URLError as ex:
|
||||||
|
logging.warn(f"Error retriving updates. {ex.reason}")
|
||||||
|
return None
|
||||||
|
new_version = semantic_version.Version(current_version)
|
||||||
|
new_url = None
|
||||||
|
for release in response_json:
|
||||||
|
release_version = semantic_version.Version(release["name"])
|
||||||
|
if new_version < release_version and (include_prerelease or not release_version.prerelease):
|
||||||
|
new_version = release_version
|
||||||
|
new_url = release["html_url"]
|
||||||
|
if new_url is not None:
|
||||||
|
return {"version": new_version, "url": new_url}
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
@ -1,3 +1,31 @@
|
|||||||
|
=== 4.3.1 (2022-07-08)
|
||||||
|
* Fix issue where cache db exceptions could prevent files being hashed (#1015)
|
||||||
|
* Add extra guard for non-zero length files without digests to prevent false duplicates
|
||||||
|
* Update Italian translations
|
||||||
|
|
||||||
|
=== 4.3.0 (2022-07-01)
|
||||||
|
* Redirect stdout from custom command to the log files (#1008)
|
||||||
|
* Update translations
|
||||||
|
* Fix typo in debian control file (#989)
|
||||||
|
* Add option to profile scans
|
||||||
|
* Update fs.py to optimize stat() calls
|
||||||
|
* Fix Error when delete after scan (#988)
|
||||||
|
* Update directory scanning to use os.scandir() and DirEntry objects
|
||||||
|
* Improve performance of Directories.get_state()
|
||||||
|
* Migrate from hscommon.path to pathlib
|
||||||
|
* Switch file hashing to xxhash with fallback to md5
|
||||||
|
* Add update check feature to about box
|
||||||
|
|
||||||
|
=== 4.2.1 (2022-03-25)
|
||||||
|
* Default to English on unsupported system language (#976)
|
||||||
|
* Fix image viewer zoom datatype issue (#978)
|
||||||
|
* Fix errors from window change event (#937, #980)
|
||||||
|
* Fix deprecation warning from SQLite
|
||||||
|
* Enforce minimum Windows version in installer (#983)
|
||||||
|
* Fix help path for local files
|
||||||
|
* Drop python 3.6 support
|
||||||
|
* VS Code project settings added, yaml validation for GitHub actions
|
||||||
|
|
||||||
=== 4.2.0 (2021-01-24)
|
=== 4.2.0 (2021-01-24)
|
||||||
|
|
||||||
* Add Malay and Turkish
|
* Add Malay and Turkish
|
||||||
@ -29,7 +57,7 @@
|
|||||||
|
|
||||||
=== 4.1.1 (2021-03-21)
|
=== 4.1.1 (2021-03-21)
|
||||||
|
|
||||||
* Add Japanese
|
* Add Japanese
|
||||||
* Update internationalization and translations to be up to date with current UI.
|
* Update internationalization and translations to be up to date with current UI.
|
||||||
* Minor translation and UI language updates
|
* Minor translation and UI language updates
|
||||||
* Fix language selection issues on Windows (#760)
|
* Fix language selection issues on Windows (#760)
|
||||||
@ -391,7 +419,7 @@
|
|||||||
|
|
||||||
=== 2.6.1 (2009-03-27)
|
=== 2.6.1 (2009-03-27)
|
||||||
* **Fixed** an occasional crash caused by permission issues.
|
* **Fixed** an occasional crash caused by permission issues.
|
||||||
* **Fixed** a bug where the "X discarded" notice would show a too large number of discarded
|
* **Fixed** a bug where the "X discarded" notice would show a too large number of discarded
|
||||||
duplicates.
|
duplicates.
|
||||||
|
|
||||||
=== 2.6.0 (2008-09-10)
|
=== 2.6.0 (2008-09-10)
|
||||||
@ -425,14 +453,14 @@
|
|||||||
* **Added** the "Remove empty folders" option.
|
* **Added** the "Remove empty folders" option.
|
||||||
* **Fixed** results load/save issues.
|
* **Fixed** results load/save issues.
|
||||||
* **Fixed** occasional status bar inaccuracies when the results are filtered.
|
* **Fixed** occasional status bar inaccuracies when the results are filtered.
|
||||||
|
|
||||||
|
|
||||||
=== 2.5.0 (2007-09-15)
|
=== 2.5.0 (2007-09-15)
|
||||||
|
|
||||||
* **Added** post scan filtering.
|
* **Added** post scan filtering.
|
||||||
* **Fixed** issues with the rename feature under Windows
|
* **Fixed** issues with the rename feature under Windows
|
||||||
* **Fixed** some user interface annoyances under Windows
|
* **Fixed** some user interface annoyances under Windows
|
||||||
|
|
||||||
|
|
||||||
=== 2.4.8 (2007-04-14)
|
=== 2.4.8 (2007-04-14)
|
||||||
|
|
||||||
@ -448,7 +476,7 @@
|
|||||||
|
|
||||||
* **Added** Re-orderable columns. In fact, I re-added the feature which was lost in the C# conversion in 2.4.0 (Windows).
|
* **Added** Re-orderable columns. In fact, I re-added the feature which was lost in the C# conversion in 2.4.0 (Windows).
|
||||||
* **Changed** the behavior of the scanning engine when setting the hardness to 100. It will now only match files that have their words in the same order.
|
* **Changed** the behavior of the scanning engine when setting the hardness to 100. It will now only match files that have their words in the same order.
|
||||||
* **Fixed** a bug with all the Delete/Move/Copy actions with certain kinds of files.
|
* **Fixed** a bug with all the Delete/Move/Copy actions with certain kinds of files.
|
||||||
|
|
||||||
=== 2.4.5 (2007-01-11)
|
=== 2.4.5 (2007-01-11)
|
||||||
|
|
||||||
@ -486,7 +514,7 @@
|
|||||||
|
|
||||||
=== 2.3.4 (2006-11-07)
|
=== 2.3.4 (2006-11-07)
|
||||||
|
|
||||||
* **Improved** speed and memory usage of the scanning engine, again. Does it mean there was a lot of improvements to be made? Nah...
|
* **Improved** speed and memory usage of the scanning engine, again. Does it mean there was a lot of improvements to be made? Nah...
|
||||||
|
|
||||||
=== 2.3.3 (2006-11-02)
|
=== 2.3.3 (2006-11-02)
|
||||||
|
|
||||||
@ -544,7 +572,7 @@
|
|||||||
=== 2.2.3 (2006-06-15)
|
=== 2.2.3 (2006-06-15)
|
||||||
|
|
||||||
* **Improved** duplicate scanning speed.
|
* **Improved** duplicate scanning speed.
|
||||||
* **Added** a warning that a file couldn't be renamed if a file with the same name already exists.
|
* **Added** a warning that a file couldn't be renamed if a file with the same name already exists.
|
||||||
|
|
||||||
=== 2.2.2 (2006-06-07)
|
=== 2.2.2 (2006-06-07)
|
||||||
|
|
||||||
@ -588,9 +616,9 @@
|
|||||||
|
|
||||||
=== 2.0.0 (2006-03-17)
|
=== 2.0.0 (2006-03-17)
|
||||||
|
|
||||||
* Complete rewrite.
|
* Complete rewrite.
|
||||||
* Now runs on Mac OS X.
|
* Now runs on Mac OS X.
|
||||||
|
|
||||||
=== 1.0.0 (2004-09-24)
|
=== 1.0.0 (2004-09-24)
|
||||||
|
|
||||||
* Initial release.
|
* Initial release.
|
||||||
|
@ -71,7 +71,7 @@ Häufig gestellte Fragen
|
|||||||
* Klicken Sie **Markieren --> Alle Markieren**.
|
* Klicken Sie **Markieren --> Alle Markieren**.
|
||||||
|
|
||||||
.. only:: edition_me
|
.. only:: edition_me
|
||||||
|
|
||||||
.. topic:: Ich möchte alle Stücke markieren, die mehr als 3 Sekunden von ihrer Referenz verschieden sind. Was kann ich tun?
|
.. topic:: Ich möchte alle Stücke markieren, die mehr als 3 Sekunden von ihrer Referenz verschieden sind. Was kann ich tun?
|
||||||
|
|
||||||
* Aktivieren Sie den :doc:`Nur Duplikate <results>` Modus.
|
* Aktivieren Sie den :doc:`Nur Duplikate <results>` Modus.
|
||||||
@ -83,7 +83,7 @@ Häufig gestellte Fragen
|
|||||||
* Klicken Sie auf **Entferne Ausgewählte von den Ergebnissen**.
|
* Klicken Sie auf **Entferne Ausgewählte von den Ergebnissen**.
|
||||||
|
|
||||||
.. topic:: Ich möchte meine Stücke mit der höchsten Bitrate zur Referenz machen. Was kann ich tun?
|
.. topic:: Ich möchte meine Stücke mit der höchsten Bitrate zur Referenz machen. Was kann ich tun?
|
||||||
|
|
||||||
* Aktivieren Sie den :doc:`Nur Duplikate <results>` Modus.
|
* Aktivieren Sie den :doc:`Nur Duplikate <results>` Modus.
|
||||||
* Aktivieren Sie den **Deltawerte** Modus.
|
* Aktivieren Sie den **Deltawerte** Modus.
|
||||||
* Klicken Sie auf die "Bitrate" Spalte, um nach Bitrate zu sortieren.
|
* Klicken Sie auf die "Bitrate" Spalte, um nach Bitrate zu sortieren.
|
||||||
@ -92,9 +92,9 @@ Häufig gestellte Fragen
|
|||||||
* Klicken Sie auf **Mache Ausgewählte zur Referenz**.
|
* Klicken Sie auf **Mache Ausgewählte zur Referenz**.
|
||||||
|
|
||||||
.. topic:: Ich möchte nicht das [live] und [remix] Versionen meiner Stücke als Duplikate erkannt werden. Was kann ich tun?
|
.. topic:: Ich möchte nicht das [live] und [remix] Versionen meiner Stücke als Duplikate erkannt werden. Was kann ich tun?
|
||||||
|
|
||||||
Ist Ihre Vergleichsschwelle niedrig genug, werden möglicherweise die live und remix Versionen in der Ergebnisliste landen. Das kann nicht verhindert werden, aber es gibt die Möglichkeit die Ergebnisse nach dem Scan zu entfernen, mittels dem Filter. Möchten Sie jedes Stück mit irgendetwas in eckigen Klammern [] im Dateinamen entfernen, so:
|
Ist Ihre Vergleichsschwelle niedrig genug, werden möglicherweise die live und remix Versionen in der Ergebnisliste landen. Das kann nicht verhindert werden, aber es gibt die Möglichkeit die Ergebnisse nach dem Scan zu entfernen, mittels dem Filter. Möchten Sie jedes Stück mit irgendetwas in eckigen Klammern [] im Dateinamen entfernen, so:
|
||||||
|
|
||||||
* **Windows**: Klicken Sie auf **Aktionen --> Filter anwenden**, geben "[*]" ein und klicken OK.
|
* **Windows**: Klicken Sie auf **Aktionen --> Filter anwenden**, geben "[*]" ein und klicken OK.
|
||||||
* **Mac OS X**: Geben Sie "[*]" in das "Filter" Feld der Werkzeugleiste ein.
|
* **Mac OS X**: Geben Sie "[*]" in das "Filter" Feld der Werkzeugleiste ein.
|
||||||
* Klicken Sie auf **Markieren --> Alle Markieren**.
|
* Klicken Sie auf **Markieren --> Alle Markieren**.
|
||||||
|
@ -16,7 +16,7 @@ Jeder Ordner kann in einem von 3 Zuständen sein:
|
|||||||
* **Referenz:** Duplikate in diesem Ordner können **nicht** gelöscht werden. Dateien dieses Ordners können sich nur in der **Referenz** Position einer Duplikatgruppe befinden. Ist mehr als eine Datei des Referenzordners in derselben Duplikatgruppe, so wird nur Eine behalten. Die Anderen werden aus der Gruppe entfernt.
|
* **Referenz:** Duplikate in diesem Ordner können **nicht** gelöscht werden. Dateien dieses Ordners können sich nur in der **Referenz** Position einer Duplikatgruppe befinden. Ist mehr als eine Datei des Referenzordners in derselben Duplikatgruppe, so wird nur Eine behalten. Die Anderen werden aus der Gruppe entfernt.
|
||||||
* **Ausgeschlossen:** Dateien in diesem Verzeichnis sind nicht im Scan eingeschlossen.
|
* **Ausgeschlossen:** Dateien in diesem Verzeichnis sind nicht im Scan eingeschlossen.
|
||||||
|
|
||||||
Der Standardzustand eines Ordners ist natürlich **Normal**. Sie können den **Referenz** Zustand für Ordner nutzen, in denen auf keinen Fall eine Datei gelöscht werden soll.
|
Der Standardzustand eines Ordners ist natürlich **Normal**. Sie können den **Referenz** Zustand für Ordner nutzen, in denen auf keinen Fall eine Datei gelöscht werden soll.
|
||||||
|
|
||||||
Wenn sie einen Zustand für ein Verzeichnis setzen, erben alle Unterordner automatisch diesen Zustand, es sei denn Sie ändern den Zustand der Unterordner explizit.
|
Wenn sie einen Zustand für ein Verzeichnis setzen, erben alle Unterordner automatisch diesen Zustand, es sei denn Sie ändern den Zustand der Unterordner explizit.
|
||||||
|
|
||||||
|
@ -21,7 +21,7 @@ Inhalte:
|
|||||||
|
|
||||||
.. toctree::
|
.. toctree::
|
||||||
:maxdepth: 2
|
:maxdepth: 2
|
||||||
|
|
||||||
quick_start
|
quick_start
|
||||||
folders
|
folders
|
||||||
preferences
|
preferences
|
||||||
|
@ -4,9 +4,9 @@ Einstellungen
|
|||||||
.. only:: edition_se
|
.. only:: edition_se
|
||||||
|
|
||||||
**Scan Typ:** Diese Option bestimmt nach welcher Eigenschaft die Dateien in einem Duplikate Scan verglichen werden. Wenn Sie **Dateiname** auswählen, wird dupeGuru jeden Dateinamen Wort für Wort vergleichen und, abhängig von den unteren Einstellungen, feststellen ob genügend Wörter übereinstimmen, um 2 Dateien als Duplikate zu betrachten. Wenn Sie **Inhalt** wählen, werden nur Dateien mit dem exakt gleichen Inhalt zusammenpassen.
|
**Scan Typ:** Diese Option bestimmt nach welcher Eigenschaft die Dateien in einem Duplikate Scan verglichen werden. Wenn Sie **Dateiname** auswählen, wird dupeGuru jeden Dateinamen Wort für Wort vergleichen und, abhängig von den unteren Einstellungen, feststellen ob genügend Wörter übereinstimmen, um 2 Dateien als Duplikate zu betrachten. Wenn Sie **Inhalt** wählen, werden nur Dateien mit dem exakt gleichen Inhalt zusammenpassen.
|
||||||
|
|
||||||
Der **Ordner** Scan Typ ist etwas speziell. Wird er ausgewählt, scannt dupeGuru nach doppelten Ordnern anstelle von Dateien. Um festzustellen ob 2 Ordner identisch sind, werden alle Datein im Ordner gescannt und wenn die Inhalte aller Dateien der Ordner übereinstimmen, werden die Ordner als Duplikate erkannt.
|
Der **Ordner** Scan Typ ist etwas speziell. Wird er ausgewählt, scannt dupeGuru nach doppelten Ordnern anstelle von Dateien. Um festzustellen ob 2 Ordner identisch sind, werden alle Datein im Ordner gescannt und wenn die Inhalte aller Dateien der Ordner übereinstimmen, werden die Ordner als Duplikate erkannt.
|
||||||
|
|
||||||
**Filterempfindlichkeit:** Wenn Sie den **Dateiname** Scan Typ wählen, bestimmt diese Option wie ähnlich 2 Dateinamen für dupeGuru sein müssen, um Duplikate zu sein. Ist die Empfindlichkeit zum Beispiel 80, müssen 80% der Worte der 2 Dateinamen übereinstimmen. Um den Übereinstimmungsanteil herauszufinden, zählt dupeGuru zuerst die Gesamtzahl der Wörter **beider** Dateinamen, dann werden die gleichen Wörter gezählt (jedes Wort zählt als 2) und durch die Gesamtzahl der Wörter dividiert. Ist das Resultat größer oder gleich der Filterempfindlichkeit, haben wir ein Duplikat. Zum Beispiel, "a b c d" und "c d e" haben einen Übereinstimmungsanteil von 57 (4 gleiche Wörter, insgesamt 7 Wörter).
|
**Filterempfindlichkeit:** Wenn Sie den **Dateiname** Scan Typ wählen, bestimmt diese Option wie ähnlich 2 Dateinamen für dupeGuru sein müssen, um Duplikate zu sein. Ist die Empfindlichkeit zum Beispiel 80, müssen 80% der Worte der 2 Dateinamen übereinstimmen. Um den Übereinstimmungsanteil herauszufinden, zählt dupeGuru zuerst die Gesamtzahl der Wörter **beider** Dateinamen, dann werden die gleichen Wörter gezählt (jedes Wort zählt als 2) und durch die Gesamtzahl der Wörter dividiert. Ist das Resultat größer oder gleich der Filterempfindlichkeit, haben wir ein Duplikat. Zum Beispiel, "a b c d" und "c d e" haben einen Übereinstimmungsanteil von 57 (4 gleiche Wörter, insgesamt 7 Wörter).
|
||||||
|
|
||||||
.. only:: edition_me
|
.. only:: edition_me
|
||||||
@ -33,7 +33,7 @@ Einstellungen
|
|||||||
.. only:: edition_pe
|
.. only:: edition_pe
|
||||||
|
|
||||||
**Scan Typ:** Diese option bestimmt, welcher Scan Typ bei Ihren Bildern angewendet wird. Der **Inhalte** Scan Typ vergleicht den Inhalt der Bilder auf eine ungenaue Art und Weise (so werden nicht nur exakte Duplikate gefunden, sondern auch Ähnliche). Der **EXIF Zeitstempel** Scan Typ schaut auf die EXIF Metadaten der Bilder (wenn vorhanden) und erkennt Bilder die den Selben haben. Er ist viel schneller als der Inhalte Scan. **Warnung:** Veränderte Bilder behalten oft den selben EXIF Zeitstempel, also achten Sie auf Falschpositive bei der Nutzung dieses Scans.
|
**Scan Typ:** Diese option bestimmt, welcher Scan Typ bei Ihren Bildern angewendet wird. Der **Inhalte** Scan Typ vergleicht den Inhalt der Bilder auf eine ungenaue Art und Weise (so werden nicht nur exakte Duplikate gefunden, sondern auch Ähnliche). Der **EXIF Zeitstempel** Scan Typ schaut auf die EXIF Metadaten der Bilder (wenn vorhanden) und erkennt Bilder die den Selben haben. Er ist viel schneller als der Inhalte Scan. **Warnung:** Veränderte Bilder behalten oft den selben EXIF Zeitstempel, also achten Sie auf Falschpositive bei der Nutzung dieses Scans.
|
||||||
|
|
||||||
**Filterempfindlichkeit:** *Nur Inhalte Scan.* Je höher diese Einstellung, desto strenger ist der Filter (Mit anderen Worten, desto weniger Ergebnisse erhalten Sie). Die meisten Bilder der selben Qualität stimmen zu 100% überein, selbst wenn das Format anders ist (PNG und JPG zum Beispiel). Wie auch immer, wenn ein PNG mit einem JPG niederiger Qualität übereinstimmen soll, muss die Filterempfindlichkeit kleiner als 100 sein. Die Voreinstellung, 95, ist eine gute Wahl.
|
**Filterempfindlichkeit:** *Nur Inhalte Scan.* Je höher diese Einstellung, desto strenger ist der Filter (Mit anderen Worten, desto weniger Ergebnisse erhalten Sie). Die meisten Bilder der selben Qualität stimmen zu 100% überein, selbst wenn das Format anders ist (PNG und JPG zum Beispiel). Wie auch immer, wenn ein PNG mit einem JPG niederiger Qualität übereinstimmen soll, muss die Filterempfindlichkeit kleiner als 100 sein. Die Voreinstellung, 95, ist eine gute Wahl.
|
||||||
|
|
||||||
**Bilder unterschiedlicher Abmessung gleich:** Wird diese Box gewählt, dürfen Bilder unterschiedlicher Abmessung in einer Duplikategruppe sein..
|
**Bilder unterschiedlicher Abmessung gleich:** Wird diese Box gewählt, dürfen Bilder unterschiedlicher Abmessung in einer Duplikategruppe sein..
|
||||||
@ -57,7 +57,7 @@ Auf jeden Fall behandelt dupeGuru Namenskonflikte indem es dem Ziel-Dateinamen e
|
|||||||
**Eigener Befehl:** Diese Einstellung bestimmt den Befehl der durch "Führe eigenen Befehl aus" ausgeführt wird. Sie können jede externe Anwendung durch diese Aktion aufrufen. Dies ist zum Beispiel hilfreich, wenn Sie eine gute diff-Anwendung installiert haben.
|
**Eigener Befehl:** Diese Einstellung bestimmt den Befehl der durch "Führe eigenen Befehl aus" ausgeführt wird. Sie können jede externe Anwendung durch diese Aktion aufrufen. Dies ist zum Beispiel hilfreich, wenn Sie eine gute diff-Anwendung installiert haben.
|
||||||
|
|
||||||
Das Format des Befehls ist das Selbe wie in einer Befehlszeile, außer das 2 Platzhalter vorhanden sind: **%d** und **%r**. Diese Platzhalter werden durch den Pfad des markierten Duplikates (%d) und dem Pfad der Duplikatereferenz ersetzt (%r).
|
Das Format des Befehls ist das Selbe wie in einer Befehlszeile, außer das 2 Platzhalter vorhanden sind: **%d** und **%r**. Diese Platzhalter werden durch den Pfad des markierten Duplikates (%d) und dem Pfad der Duplikatereferenz ersetzt (%r).
|
||||||
|
|
||||||
Wenn der Pfad Ihrer ausführbaren Datei Leerzeichen enthält, so schließen sie ihn bitte mit "" Zeichen ein. Sie sollten auch Platzhalter mit den Zitatzeichen einschließen, denn es ist möglich, das die Pfade der Duplikate und Referenzen ebenfalls Leerzeichen enthalten. Hier ist ein Beispiel eines eigenen Befehls::
|
Wenn der Pfad Ihrer ausführbaren Datei Leerzeichen enthält, so schließen sie ihn bitte mit "" Zeichen ein. Sie sollten auch Platzhalter mit den Zitatzeichen einschließen, denn es ist möglich, das die Pfade der Duplikate und Referenzen ebenfalls Leerzeichen enthalten. Hier ist ein Beispiel eines eigenen Befehls::
|
||||||
|
|
||||||
"C:\Program Files\SuperDiffProg\SuperDiffProg.exe" "%d" "%r"
|
"C:\Program Files\SuperDiffProg\SuperDiffProg.exe" "%d" "%r"
|
||||||
|
@ -22,4 +22,4 @@ criterion is used and so on and so on. For example, if your arguments are "Size
|
|||||||
"Filename (Doesn't end with a number)", the reference file that will be picked in a group will be
|
"Filename (Doesn't end with a number)", the reference file that will be picked in a group will be
|
||||||
the biggest file, and if two or more files have the same size, the one that has a filename that
|
the biggest file, and if two or more files have the same size, the one that has a filename that
|
||||||
doesn't end with a number will be used. When all criteria result in ties, the order in which dupes
|
doesn't end with a number will be used. When all criteria result in ties, the order in which dupes
|
||||||
previously were in the group will be used.
|
previously were in the group will be used.
|
||||||
|
@ -98,4 +98,4 @@ Aktionen Menü
|
|||||||
* **Ausgewählte umbenennen:** Fragt nach einem neuen Namen und benennt die ausgewählte Datei um.
|
* **Ausgewählte umbenennen:** Fragt nach einem neuen Namen und benennt die ausgewählte Datei um.
|
||||||
|
|
||||||
.. todo:: Add Move and iPhoto/iTunes warning
|
.. todo:: Add Move and iPhoto/iTunes warning
|
||||||
.. todo:: Add "Deletion Options" section.
|
.. todo:: Add "Deletion Options" section.
|
||||||
|
@ -12,7 +12,7 @@ a community around this project.
|
|||||||
|
|
||||||
So, whatever your skills, if you're interested in contributing to dupeGuru, please do so. Normally,
|
So, whatever your skills, if you're interested in contributing to dupeGuru, please do so. Normally,
|
||||||
this documentation should be enough to get you started, but if it isn't, then **please**,
|
this documentation should be enough to get you started, but if it isn't, then **please**,
|
||||||
`let me know`_ because it's a problem that I'm committed to fix. If there's any situation where you'd
|
open a discussion at https://github.com/arsenetar/dupeguru/discussions. If there's any situation where you'd
|
||||||
wish to contribute but some doubt you're having prevent you from going forward, please contact me.
|
wish to contribute but some doubt you're having prevent you from going forward, please contact me.
|
||||||
I'd much prefer to spend the time figuring out with you whether (and how) you can contribute than
|
I'd much prefer to spend the time figuring out with you whether (and how) you can contribute than
|
||||||
taking the chance of missing that opportunity.
|
taking the chance of missing that opportunity.
|
||||||
@ -24,7 +24,7 @@ Development process
|
|||||||
* `Issue Tracker`_
|
* `Issue Tracker`_
|
||||||
* `Issue labels meaning`_
|
* `Issue labels meaning`_
|
||||||
|
|
||||||
dupeGuru's source code is on Github and thus managed in a Git repository. At all times, you should
|
dupeGuru's source code is on GitHub and thus managed in a Git repository. At all times, you should
|
||||||
be able to build from source a fresh checkout of the ``master`` branch using instructions from the
|
be able to build from source a fresh checkout of the ``master`` branch using instructions from the
|
||||||
``README.md`` file at the root of this project. If you can't, it's a bug. Please report it.
|
``README.md`` file at the root of this project. If you can't, it's a bug. Please report it.
|
||||||
|
|
||||||
@ -61,7 +61,7 @@ It's the same thing with feature requests. Description of a feature request, whe
|
|||||||
already been given to how such a feature would fit in the current design, are precious to developers
|
already been given to how such a feature would fit in the current design, are precious to developers
|
||||||
and help them figure out a clear roadmap for the project.
|
and help them figure out a clear roadmap for the project.
|
||||||
|
|
||||||
So, even if you're not a developer, you can always open a Github account and create/comment issues.
|
So, even if you're not a developer, you can always open a GitHub account and create/comment issues.
|
||||||
Your contribution will be much appreciated.
|
Your contribution will be much appreciated.
|
||||||
|
|
||||||
**Documentation**. This is a bit trickier because dupeGuru's documentation is written with a rather
|
**Documentation**. This is a bit trickier because dupeGuru's documentation is written with a rather
|
||||||
@ -82,10 +82,9 @@ agree on what should be added to the documentation.
|
|||||||
dupeGuru. For more information about how to do that, you can refer to the `translator guide`_.
|
dupeGuru. For more information about how to do that, you can refer to the `translator guide`_.
|
||||||
|
|
||||||
.. _been open source: https://www.hardcoded.net/articles/free-as-in-speech-fair-as-in-trade
|
.. _been open source: https://www.hardcoded.net/articles/free-as-in-speech-fair-as-in-trade
|
||||||
.. _let me know: mailto:hsoft@hardcoded.net
|
|
||||||
.. _Source code repository: https://github.com/arsenetar/dupeguru
|
.. _Source code repository: https://github.com/arsenetar/dupeguru
|
||||||
.. _Issue Tracker: https://github.com/hsoft/arsenetar/issues
|
.. _Issue Tracker: https://github.com/arsenetar/issues
|
||||||
.. _Issue labels meaning: https://github.com/hsoft/arsenetar/wiki/issue-labels
|
.. _Issue labels meaning: https://github.com/arsenetar/wiki/issue-labels
|
||||||
.. _Sphinx: http://sphinx-doc.org/
|
.. _Sphinx: http://sphinx-doc.org/
|
||||||
.. _reST: http://en.wikipedia.org/wiki/ReStructuredText
|
.. _reST: http://en.wikipedia.org/wiki/ReStructuredText
|
||||||
.. _translator guide: https://github.com/hsoft/arsenetar/wiki/Translator-Guide
|
.. _translator guide: https://github.com/arsenetar/wiki/Translator-Guide
|
||||||
|
@ -2,12 +2,12 @@ core.engine
|
|||||||
===========
|
===========
|
||||||
|
|
||||||
.. automodule:: core.engine
|
.. automodule:: core.engine
|
||||||
|
|
||||||
.. autoclass:: Match
|
.. autoclass:: Match
|
||||||
|
|
||||||
.. autoclass:: Group
|
.. autoclass:: Group
|
||||||
:members:
|
:members:
|
||||||
|
|
||||||
.. autofunction:: build_word_dict
|
.. autofunction:: build_word_dict
|
||||||
.. autofunction:: compare
|
.. autofunction:: compare
|
||||||
.. autofunction:: compare_fields
|
.. autofunction:: compare_fields
|
||||||
@ -16,7 +16,7 @@ core.engine
|
|||||||
.. autofunction:: get_groups
|
.. autofunction:: get_groups
|
||||||
.. autofunction:: merge_similar_words
|
.. autofunction:: merge_similar_words
|
||||||
.. autofunction:: reduce_common_words
|
.. autofunction:: reduce_common_words
|
||||||
|
|
||||||
.. _fields:
|
.. _fields:
|
||||||
|
|
||||||
Fields
|
Fields
|
||||||
|
@ -6,5 +6,5 @@ core.gui
|
|||||||
|
|
||||||
.. toctree::
|
.. toctree::
|
||||||
:maxdepth: 2
|
:maxdepth: 2
|
||||||
|
|
||||||
deletion_options
|
deletion_options
|
||||||
|
@ -3,7 +3,7 @@ core
|
|||||||
|
|
||||||
.. toctree::
|
.. toctree::
|
||||||
:maxdepth: 2
|
:maxdepth: 2
|
||||||
|
|
||||||
app
|
app
|
||||||
fs
|
fs
|
||||||
engine
|
engine
|
||||||
|
@ -4,9 +4,9 @@ hscommon.gui.base
|
|||||||
.. automodule:: hscommon.gui.base
|
.. automodule:: hscommon.gui.base
|
||||||
|
|
||||||
.. autosummary::
|
.. autosummary::
|
||||||
|
|
||||||
GUIObject
|
GUIObject
|
||||||
|
|
||||||
.. autoclass:: GUIObject
|
.. autoclass:: GUIObject
|
||||||
:members:
|
:members:
|
||||||
:private-members:
|
:private-members:
|
||||||
|
@ -4,22 +4,22 @@ hscommon.gui.column
|
|||||||
.. automodule:: hscommon.gui.column
|
.. automodule:: hscommon.gui.column
|
||||||
|
|
||||||
.. autosummary::
|
.. autosummary::
|
||||||
|
|
||||||
Columns
|
Columns
|
||||||
Column
|
Column
|
||||||
ColumnsView
|
ColumnsView
|
||||||
PrefAccessInterface
|
PrefAccessInterface
|
||||||
|
|
||||||
.. autoclass:: Columns
|
.. autoclass:: Columns
|
||||||
:members:
|
:members:
|
||||||
:private-members:
|
:private-members:
|
||||||
|
|
||||||
.. autoclass:: Column
|
.. autoclass:: Column
|
||||||
:members:
|
:members:
|
||||||
:private-members:
|
:private-members:
|
||||||
|
|
||||||
.. autoclass:: ColumnsView
|
.. autoclass:: ColumnsView
|
||||||
:members:
|
:members:
|
||||||
|
|
||||||
.. autoclass:: PrefAccessInterface
|
.. autoclass:: PrefAccessInterface
|
||||||
:members:
|
:members:
|
||||||
|
@ -4,15 +4,14 @@ hscommon.gui.progress_window
|
|||||||
.. automodule:: hscommon.gui.progress_window
|
.. automodule:: hscommon.gui.progress_window
|
||||||
|
|
||||||
.. autosummary::
|
.. autosummary::
|
||||||
|
|
||||||
ProgressWindow
|
ProgressWindow
|
||||||
ProgressWindowView
|
ProgressWindowView
|
||||||
|
|
||||||
.. autoclass:: ProgressWindow
|
.. autoclass:: ProgressWindow
|
||||||
:members:
|
:members:
|
||||||
:private-members:
|
:private-members:
|
||||||
|
|
||||||
.. autoclass:: ProgressWindowView
|
.. autoclass:: ProgressWindowView
|
||||||
:members:
|
:members:
|
||||||
:private-members:
|
:private-members:
|
||||||
|
|
||||||
|
@ -4,23 +4,23 @@ hscommon.gui.selectable_list
|
|||||||
.. automodule:: hscommon.gui.selectable_list
|
.. automodule:: hscommon.gui.selectable_list
|
||||||
|
|
||||||
.. autosummary::
|
.. autosummary::
|
||||||
|
|
||||||
Selectable
|
Selectable
|
||||||
SelectableList
|
SelectableList
|
||||||
GUISelectableList
|
GUISelectableList
|
||||||
GUISelectableListView
|
GUISelectableListView
|
||||||
|
|
||||||
.. autoclass:: Selectable
|
.. autoclass:: Selectable
|
||||||
:members:
|
:members:
|
||||||
:private-members:
|
:private-members:
|
||||||
|
|
||||||
.. autoclass:: SelectableList
|
.. autoclass:: SelectableList
|
||||||
:members:
|
:members:
|
||||||
:private-members:
|
:private-members:
|
||||||
|
|
||||||
.. autoclass:: GUISelectableList
|
.. autoclass:: GUISelectableList
|
||||||
:members:
|
:members:
|
||||||
:private-members:
|
:private-members:
|
||||||
|
|
||||||
.. autoclass:: GUISelectableListView
|
.. autoclass:: GUISelectableListView
|
||||||
:members:
|
:members:
|
||||||
|
@ -2,18 +2,18 @@ hscommon.gui.table
|
|||||||
==================
|
==================
|
||||||
|
|
||||||
.. automodule:: hscommon.gui.table
|
.. automodule:: hscommon.gui.table
|
||||||
|
|
||||||
.. autosummary::
|
.. autosummary::
|
||||||
|
|
||||||
Table
|
Table
|
||||||
Row
|
Row
|
||||||
GUITable
|
GUITable
|
||||||
GUITableView
|
GUITableView
|
||||||
|
|
||||||
.. autoclass:: Table
|
.. autoclass:: Table
|
||||||
:members:
|
:members:
|
||||||
:private-members:
|
:private-members:
|
||||||
|
|
||||||
.. autoclass:: Row
|
.. autoclass:: Row
|
||||||
:members:
|
:members:
|
||||||
:private-members:
|
:private-members:
|
||||||
@ -21,6 +21,6 @@ hscommon.gui.table
|
|||||||
.. autoclass:: GUITable
|
.. autoclass:: GUITable
|
||||||
:members:
|
:members:
|
||||||
:private-members:
|
:private-members:
|
||||||
|
|
||||||
.. autoclass:: GUITableView
|
.. autoclass:: GUITableView
|
||||||
:members:
|
:members:
|
||||||
|
@ -4,10 +4,10 @@ hscommon.gui.text_field
|
|||||||
.. automodule:: hscommon.gui.text_field
|
.. automodule:: hscommon.gui.text_field
|
||||||
|
|
||||||
.. autosummary::
|
.. autosummary::
|
||||||
|
|
||||||
TextField
|
TextField
|
||||||
TextFieldView
|
TextFieldView
|
||||||
|
|
||||||
.. autoclass:: TextField
|
.. autoclass:: TextField
|
||||||
:members:
|
:members:
|
||||||
:private-members:
|
:private-members:
|
||||||
|
@ -2,17 +2,16 @@ hscommon.gui.tree
|
|||||||
=================
|
=================
|
||||||
|
|
||||||
.. automodule:: hscommon.gui.tree
|
.. automodule:: hscommon.gui.tree
|
||||||
|
|
||||||
.. autosummary::
|
.. autosummary::
|
||||||
|
|
||||||
Tree
|
Tree
|
||||||
Node
|
Node
|
||||||
|
|
||||||
.. autoclass:: Tree
|
.. autoclass:: Tree
|
||||||
:members:
|
:members:
|
||||||
:private-members:
|
:private-members:
|
||||||
|
|
||||||
.. autoclass:: Node
|
.. autoclass:: Node
|
||||||
:members:
|
:members:
|
||||||
:private-members:
|
:private-members:
|
||||||
|
|
||||||
|
@ -4,7 +4,7 @@ hscommon
|
|||||||
.. toctree::
|
.. toctree::
|
||||||
:maxdepth: 2
|
:maxdepth: 2
|
||||||
:glob:
|
:glob:
|
||||||
|
|
||||||
build
|
build
|
||||||
conflict
|
conflict
|
||||||
desktop
|
desktop
|
||||||
@ -13,4 +13,3 @@ hscommon
|
|||||||
util
|
util
|
||||||
jobprogress/*
|
jobprogress/*
|
||||||
gui/*
|
gui/*
|
||||||
|
|
||||||
|
@ -4,14 +4,13 @@ hscommon.jobprogress.job
|
|||||||
.. automodule:: hscommon.jobprogress.job
|
.. automodule:: hscommon.jobprogress.job
|
||||||
|
|
||||||
.. autosummary::
|
.. autosummary::
|
||||||
|
|
||||||
Job
|
Job
|
||||||
NullJob
|
NullJob
|
||||||
|
|
||||||
.. autoclass:: Job
|
.. autoclass:: Job
|
||||||
:members:
|
:members:
|
||||||
:private-members:
|
:private-members:
|
||||||
|
|
||||||
.. autoclass:: NullJob
|
.. autoclass:: NullJob
|
||||||
:members:
|
:members:
|
||||||
|
|
||||||
|
@ -4,9 +4,8 @@ hscommon.jobprogress.performer
|
|||||||
.. automodule:: hscommon.jobprogress.performer
|
.. automodule:: hscommon.jobprogress.performer
|
||||||
|
|
||||||
.. autosummary::
|
.. autosummary::
|
||||||
|
|
||||||
ThreadedJobPerformer
|
ThreadedJobPerformer
|
||||||
|
|
||||||
.. autoclass:: ThreadedJobPerformer
|
.. autoclass:: ThreadedJobPerformer
|
||||||
:members:
|
:members:
|
||||||
|
|
||||||
|
@ -69,6 +69,6 @@ API
|
|||||||
|
|
||||||
.. toctree::
|
.. toctree::
|
||||||
:maxdepth: 2
|
:maxdepth: 2
|
||||||
|
|
||||||
core/index
|
core/index
|
||||||
hscommon/index
|
hscommon/index
|
||||||
|
@ -30,8 +30,8 @@ that makes sure that you will **always** keep at least one member of the duplica
|
|||||||
How can I report a bug a suggest a feature?
|
How can I report a bug a suggest a feature?
|
||||||
-------------------------------------------
|
-------------------------------------------
|
||||||
|
|
||||||
dupeGuru is hosted on `Github`_ and it's also where issues are tracked. The best way to report a
|
dupeGuru is hosted on `GitHub`_ and it's also where issues are tracked. The best way to report a
|
||||||
bug or suggest a feature is to sign up on Github and `open an issue`_.
|
bug or suggest a feature is to sign up on GitHub and `open an issue`_.
|
||||||
|
|
||||||
The mark box of a file I want to delete is disabled. What must I do?
|
The mark box of a file I want to delete is disabled. What must I do?
|
||||||
--------------------------------------------------------------------
|
--------------------------------------------------------------------
|
||||||
@ -176,6 +176,5 @@ Preferences are stored elsewhere:
|
|||||||
* Linux: ``~/.config/Hardcoded Software/dupeGuru.conf``
|
* Linux: ``~/.config/Hardcoded Software/dupeGuru.conf``
|
||||||
* Mac OS X: In the built-in ``defaults`` system, as ``com.hardcoded-software.dupeguru``
|
* Mac OS X: In the built-in ``defaults`` system, as ``com.hardcoded-software.dupeguru``
|
||||||
|
|
||||||
.. _Github: https://github.com/arsenetar/dupeguru
|
.. _GitHub: https://github.com/arsenetar/dupeguru
|
||||||
.. _open an issue: https://github.com/arsenetar/dupeguru/wiki/issue-labels
|
.. _open an issue: https://github.com/arsenetar/dupeguru/wiki/issue-labels
|
||||||
|
|
||||||
|
@ -24,7 +24,7 @@ Contents:
|
|||||||
|
|
||||||
.. toctree::
|
.. toctree::
|
||||||
:maxdepth: 2
|
:maxdepth: 2
|
||||||
|
|
||||||
contribute
|
contribute
|
||||||
quick_start
|
quick_start
|
||||||
folders
|
folders
|
||||||
|
@ -14,6 +14,10 @@ Preferences
|
|||||||
If you check this box, pictures of different dimensions will be allowed in the same
|
If you check this box, pictures of different dimensions will be allowed in the same
|
||||||
duplicate group.
|
duplicate group.
|
||||||
|
|
||||||
|
**Match pictures of different rotations:**
|
||||||
|
If you check this box, pictures of different rotations will be allowed in the same
|
||||||
|
duplicate group.
|
||||||
|
|
||||||
.. _filter-hardness:
|
.. _filter-hardness:
|
||||||
|
|
||||||
**Filter Hardness:**
|
**Filter Hardness:**
|
||||||
@ -67,11 +71,11 @@ filename if the filename already exists in the destination.
|
|||||||
The format of the command is the same as what you would write in the command line, except that there
|
The format of the command is the same as what you would write in the command line, except that there
|
||||||
are 2 placeholders: **%d** and **%r**. These placeholders will be replaced by the path of the
|
are 2 placeholders: **%d** and **%r**. These placeholders will be replaced by the path of the
|
||||||
selected dupe (%d) and the path of the selected dupe's reference file (%r).
|
selected dupe (%d) and the path of the selected dupe's reference file (%r).
|
||||||
|
|
||||||
If the path to your executable contains space characters, you should enclose it in "" quotes. You
|
If the path to your executable contains space characters, you should enclose it in "" quotes. You
|
||||||
should also enclose placeholders in quotes because it's very possible that paths to dupes and refs
|
should also enclose placeholders in quotes because it's very possible that paths to dupes and refs
|
||||||
will contain spaces. Here's an example custom command::
|
will contain spaces. Here's an example custom command::
|
||||||
|
|
||||||
"C:\Program Files\SuperDiffProg\SuperDiffProg.exe" "%d" "%r"
|
"C:\Program Files\SuperDiffProg\SuperDiffProg.exe" "%d" "%r"
|
||||||
|
|
||||||
.. _inode: http://en.wikipedia.org/wiki/Inode
|
.. _inode: http://en.wikipedia.org/wiki/Inode
|
||||||
|
@ -22,4 +22,4 @@ criterion is used and so on and so on. For example, if your arguments are "Size
|
|||||||
"Filename (Doesn't end with a number)", the reference file that will be picked in a group will be
|
"Filename (Doesn't end with a number)", the reference file that will be picked in a group will be
|
||||||
the biggest file, and if two or more files have the same size, the one that has a filename that
|
the biggest file, and if two or more files have the same size, the one that has a filename that
|
||||||
doesn't end with a number will be used. When all criteria result in ties, the order in which dupes
|
doesn't end with a number will be used. When all criteria result in ties, the order in which dupes
|
||||||
previously were in the group will be used.
|
previously were in the group will be used.
|
||||||
|
@ -180,7 +180,7 @@ any of them.
|
|||||||
the file's path. If the original file is deleted or moved, the link is broken. A hardlink is a
|
the file's path. If the original file is deleted or moved, the link is broken. A hardlink is a
|
||||||
link to the file *itself*. That link is as good as a "real" file. Only when *all* hardlinks to a
|
link to the file *itself*. That link is as good as a "real" file. Only when *all* hardlinks to a
|
||||||
file are deleted is the file itself deleted.
|
file are deleted is the file itself deleted.
|
||||||
|
|
||||||
On OSX and Linux, this feature is supported fully, but under Windows, it's a bit complicated.
|
On OSX and Linux, this feature is supported fully, but under Windows, it's a bit complicated.
|
||||||
Windows XP doesn't support it, but Vista and up support it. However, for the feature to work,
|
Windows XP doesn't support it, but Vista and up support it. However, for the feature to work,
|
||||||
dupeGuru has to run with administrative privileges.
|
dupeGuru has to run with administrative privileges.
|
||||||
|
@ -51,7 +51,7 @@ Tour groupe de doublons contient au moins un fichier dit "référence" et ce fic
|
|||||||
effacé. Par contre, ce que vous pouvez faire c'est de le remplacer par un autre fichier du groupe.
|
effacé. Par contre, ce que vous pouvez faire c'est de le remplacer par un autre fichier du groupe.
|
||||||
Pour ce faire, sélectionnez un fichier du groupe et cliquez sur l'action **Transformer sélectionnés
|
Pour ce faire, sélectionnez un fichier du groupe et cliquez sur l'action **Transformer sélectionnés
|
||||||
en références**.
|
en références**.
|
||||||
|
|
||||||
Notez que si le fichier référence du groupe vient d'un dossier qui a été défini comme dossier
|
Notez que si le fichier référence du groupe vient d'un dossier qui a été défini comme dossier
|
||||||
référence, ce fichier ne peut pas être déplacé de sa position de référence du groupe.
|
référence, ce fichier ne peut pas être déplacé de sa position de référence du groupe.
|
||||||
|
|
||||||
@ -71,7 +71,7 @@ doublons. Example: Nous avons 3 fichiers, A, B et C. Nous les comparons en utili
|
|||||||
de filtre. La comparaison détermine que A est un double de B, A est un double C, mais que B n'est
|
de filtre. La comparaison détermine que A est un double de B, A est un double C, mais que B n'est
|
||||||
**pas** un double de C. dupeGuru a ici un problème. Il ne peut pas créer un groupe avec A, B et C.
|
**pas** un double de C. dupeGuru a ici un problème. Il ne peut pas créer un groupe avec A, B et C.
|
||||||
Il décide donc de jeter C hors du groupe. C'est de là que vient la notice '(X hors-groupe)'.
|
Il décide donc de jeter C hors du groupe. C'est de là que vient la notice '(X hors-groupe)'.
|
||||||
|
|
||||||
Cette notice veut dire que si jamais vous effacez tout les doubles contenus dans vos résultats et
|
Cette notice veut dire que si jamais vous effacez tout les doubles contenus dans vos résultats et
|
||||||
que vous faites un nouveau scan, vous pourriez avoir de nouveaux résultats.
|
que vous faites un nouveau scan, vous pourriez avoir de nouveaux résultats.
|
||||||
|
|
||||||
|
@ -3,7 +3,7 @@ Sélection de dossiers
|
|||||||
|
|
||||||
La première fenêtre qui apparaît lorsque dupeGuru démarre est la fenêtre de sélection de dossiers à scanner. Elle détermine la liste des dossiers qui seront scannés lorsque vous cliquerez sur **Scan**.
|
La première fenêtre qui apparaît lorsque dupeGuru démarre est la fenêtre de sélection de dossiers à scanner. Elle détermine la liste des dossiers qui seront scannés lorsque vous cliquerez sur **Scan**.
|
||||||
|
|
||||||
Pour ajouter un dossier, cliquez sur le bouton **+**. Si vous avez ajouté des dossiers dans le passé, un menu vous permettra de rapidement choisir un de ceux ci. Autrement, il vous sera demandé d'indiquer le dossier à ajouter.
|
Pour ajouter un dossier, cliquez sur le bouton **+**. Si vous avez ajouté des dossiers dans le passé, un menu vous permettra de rapidement choisir un de ceux ci. Autrement, il vous sera demandé d'indiquer le dossier à ajouter.
|
||||||
|
|
||||||
Vous pouvez aussi utiliser le drag & drop pour ajouter des dossiers à la liste.
|
Vous pouvez aussi utiliser le drag & drop pour ajouter des dossiers à la liste.
|
||||||
|
|
||||||
@ -26,14 +26,14 @@ Le type d'un dossier s'applique à ses sous-dossiers, excepté si un sous-dossie
|
|||||||
|
|
||||||
Bibliothèques iPhoto et Aperture
|
Bibliothèques iPhoto et Aperture
|
||||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
dupeGuru PE supporte iPhoto et Aperture, ce qui veut dire qu'il sait comment lire le contenu de
|
dupeGuru PE supporte iPhoto et Aperture, ce qui veut dire qu'il sait comment lire le contenu de
|
||||||
ces bibliothèques et comment communiquer avec ces applications pour correctement supprimer des
|
ces bibliothèques et comment communiquer avec ces applications pour correctement supprimer des
|
||||||
photos de celles-ci. Pour utiliser cette fonctionnalité, vous devez ajouter iPhoto et/ou
|
photos de celles-ci. Pour utiliser cette fonctionnalité, vous devez ajouter iPhoto et/ou
|
||||||
Aperture avec les boutons spéciaux "Ajouter librairie iPhoto" et "Ajouter librairie Aperture",
|
Aperture avec les boutons spéciaux "Ajouter librairie iPhoto" et "Ajouter librairie Aperture",
|
||||||
qui apparaissent quand on clique sur le petit "+". Les dossiers ajoutés seront alors
|
qui apparaissent quand on clique sur le petit "+". Les dossiers ajoutés seront alors
|
||||||
correctement interprétés par dupeGuru.
|
correctement interprétés par dupeGuru.
|
||||||
|
|
||||||
Quand une photo est supprimée d'iPhoto, elle est envoyée dans la corbeille d'iPhoto.
|
Quand une photo est supprimée d'iPhoto, elle est envoyée dans la corbeille d'iPhoto.
|
||||||
|
|
||||||
Quand une photo est supprimée d'Aperture, il n'est malheureusement pas possible de l'envoyer
|
Quand une photo est supprimée d'Aperture, il n'est malheureusement pas possible de l'envoyer
|
||||||
@ -45,13 +45,13 @@ Le type d'un dossier s'applique à ses sous-dossiers, excepté si un sous-dossie
|
|||||||
|
|
||||||
Bibliothèques iTunes
|
Bibliothèques iTunes
|
||||||
^^^^^^^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
dupeGuru ME supporte iTunes, ce qui veut dire qu'il sait comment lire le contenu de sa
|
dupeGuru ME supporte iTunes, ce qui veut dire qu'il sait comment lire le contenu de sa
|
||||||
bibliothèque et comment communiquer avec iTunes pour correctement supprimer des chansons de sa
|
bibliothèque et comment communiquer avec iTunes pour correctement supprimer des chansons de sa
|
||||||
bibliothèque. Pour utiliser cette fonctionnalité, vous devez ajouter iTunes avec le bouton
|
bibliothèque. Pour utiliser cette fonctionnalité, vous devez ajouter iTunes avec le bouton
|
||||||
spécial "Ajouter librairie iTunes", qui apparait quand on clique sur le petit "+". Le dossier
|
spécial "Ajouter librairie iTunes", qui apparait quand on clique sur le petit "+". Le dossier
|
||||||
ajouté sera alors correctement interprété par dupeGuru.
|
ajouté sera alors correctement interprété par dupeGuru.
|
||||||
|
|
||||||
Quand une chanson est supprimée d'iTunes, elle est envoyée à la corebeille du système, comme un
|
Quand une chanson est supprimée d'iTunes, elle est envoyée à la corebeille du système, comme un
|
||||||
fichier normal. La différence ici, c'est qu'après la suppression, iTunes est correctement mis au
|
fichier normal. La différence ici, c'est qu'après la suppression, iTunes est correctement mis au
|
||||||
fait de cette suppression et retire sa référence à cette chanson de sa bibliothèque.
|
fait de cette suppression et retire sa référence à cette chanson de sa bibliothèque.
|
||||||
|
@ -21,7 +21,7 @@ Contents:
|
|||||||
|
|
||||||
.. toctree::
|
.. toctree::
|
||||||
:maxdepth: 2
|
:maxdepth: 2
|
||||||
|
|
||||||
quick_start
|
quick_start
|
||||||
folders
|
folders
|
||||||
preferences
|
preferences
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user