Update changelog

Add vscode extension recommendation
Fix internal links in CONTRIBUTING.md
2026-01-25 16:11:39 +00:00 · 2022-03-25 23:37:46 -05:00 · 2022-03-21 22:27:16 -05:00 · 2022-03-21 22:19:58 -05:00 · 2022-03-21 22:18:22 -05:00 · 2022-03-21 22:04:45 -05:00
218 changed files with 8865 additions and 4510 deletions
--- a/.github/FUNDING.yml
+++ b/.github/FUNDING.yml
@@ -0,0 +1,13 @@
 # These are supported funding model platforms
 github: arsenetar
 patreon: # Replace with a single Patreon username
 open_collective: # Replace with a single Open Collective username
 ko_fi: # Replace with a single Ko-fi username
 tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
 community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
 liberapay: # Replace with a single Liberapay username
 issuehunt: # Replace with a single IssueHunt username
 otechie: # Replace with a single Otechie username
 lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
 custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -0,0 +1,50 @@
 name: "CodeQL"
 on:
  push:
    branches: [master]
  pull_request:
    # The branches below must be a subset of the branches above
    branches: [master]
  schedule:
    - cron: "24 20 * * 2"
 jobs:
  analyze:
    name: Analyze
    runs-on: ubuntu-latest
    permissions:
      actions: read
      contents: read
      security-events: write
    strategy:
      fail-fast: false
      matrix:
        language: ["cpp", "python"]
    steps:
      - name: Checkout repository
        uses: actions/checkout@v2
      # Initializes the CodeQL tools for scanning.
      - name: Initialize CodeQL
        uses: github/codeql-action/init@v1
        with:
          languages: ${{ matrix.language }}
          # If you wish to specify custom queries, you can do so here or in a config file.
          # By default, queries listed here will override any specified in a config file.
          # Prefix the list here with "+" to use these queries and those in the config file.
          # queries: ./path/to/local/query, your-org/your-repo/queries@main
      - if: matrix.language == 'cpp'
        name: Build Cpp
        run: |
          sudo apt-get update
          sudo apt-get install python3-pyqt5
          make modules
      - if: matrix.language == 'python'
        name: Autobuild
        uses: github/codeql-action/autobuild@v1
      # Analysis
      - name: Perform CodeQL Analysis
        uses: github/codeql-action/analyze@v1
--- a/.github/workflows/default.yml
+++ b/.github/workflows/default.yml
@@ -0,0 +1,84 @@
 # Workflow lints, and checks format in parallel then runs tests on all platforms
 name: Default CI/CD
 on:
  push:
    branches: [master]
  pull_request:
    branches: [master]
 jobs:
  lint:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v2
      - name: Set up Python 3.10
        uses: actions/setup-python@v2
        with:
          python-version: "3.10"
      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
          pip install -r requirements.txt -r requirements-extra.txt
      - name: Lint with flake8
        run: |
          flake8 .
  format:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v2
      - name: Set up Python 3.10
        uses: actions/setup-python@v2
        with:
          python-version: "3.10"
      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
          pip install -r requirements.txt -r requirements-extra.txt
      - name: Check format with black
        run: |
          black .
  test:
    needs: [lint, format]
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ubuntu-latest, macos-latest, windows-latest]
        python-version: [3.7, 3.8, 3.9, "3.10"]
        exclude:
          - os: macos-latest
            python-version: 3.7
          - os: macos-latest
            python-version: 3.8
          - os: macos-latest
            python-version: 3.9
          - os: windows-latest
            python-version: 3.7
          - os: windows-latest
            python-version: 3.8
          - os: windows-latest
            python-version: 3.9
    steps:
      - uses: actions/checkout@v2
      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@v2
        with:
          python-version: ${{ matrix.python-version }}
      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
          pip install -r requirements.txt -r requirements-extra.txt
      - name: Build python modules
        run: |
          python build.py --modules
      - name: Run tests
        run: |
          pytest core hscommon
      - name: Upload Artifacts
        if: matrix.os == 'ubuntu-latest'
        uses: actions/upload-artifact@v3
        with:
          name: modules ${{ matrix.python-version }}
          path: ${{ github.workspace }}/**/*.so
--- a/.gitignore
+++ b/.gitignore
@@ -1,28 +1,111 @@
-.DS_Store
+# Byte-compiled / optimized / DLL files
-__pycache__
+__pycache__/
 *.py[cod]
 *$py.class
 # C extensions
 *.so
 # Distribution / packaging
 .Python
 build/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 lib/
 lib64/
 parts/
 sdist/
 var/
 wheels/
 share/python-wheels/
 *.egg-info/
 .installed.cfg
 *.egg
 MANIFEST
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
 *.spec
 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt
 # Unit test / coverage reports
 htmlcov/
 .tox/
 .nox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *.cover
 *.py,cover
 .hypothesis/
 .pytest_cache/
 cover/
 # Translations
 *.mo
-*.waf*
+#*.pot
 .lock-waf*
 .tox
 /tags
-build
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
-dist
+__pypackages__/
 env*
 /deps
 cocoa/autogen
-/run.py
+# Environments
-/cocoa/*/Info.plist
+.env
-/cocoa/*/build
+.venv
 env*/
 venv/
 ENV/
 env.bak/
 venv.bak/
 # mypy
 .mypy_cache/
 .dmypy.json
 dmypy.json
 # Pyre type checker
 .pyre/
 # pytype static type analyzer
 .pytype/
 # Cython debug symbols
 cython_debug/
 # macOS
 .DS_Store
 # Visual Studio Code
 .vscode/*
 !.vscode/settings.json
 #!.vscode/tasks.json
 #!.vscode/launch.json
 !.vscode/extensions.json
 !.vscode/*.code-snippets
 # Local History for Visual Studio Code
 .history/
 # Built Visual Studio Code Extensions
 *.vsix
 # dupeGuru Specific
 /qt/*_rc.py
 /help/*/conf.py
 /help/*/changelog.rst
-/transifex
+cocoa/autogen
 /cocoa/*/Info.plist
 /cocoa/*/build
-*.pyd
+*.waf*
-*.exe
+.lock-waf*
-*.spec
+/tags
 .vscode
--- a/.sonarcloud.properties
+++ b/.sonarcloud.properties
@@ -0,0 +1 @@
 sonar.python.version=3.7, 3.8, 3.9, 3.10
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,27 +0,0 @@
 sudo: false
 language: python
 install: 
        - pip3 install -r requirements.txt -r requirements-extra.txt
 script: tox
 matrix:
        include:
                - os: "linux"
                  dist: "xenial"
                  python: "3.6"
                - os: "linux"
                  dist: "xenial"
                  python: "3.7"
                - os: "linux"
                  dist: "focal"
                  python: "3.8"
                - os: "linux"
                  dist: "focal"
                  python: "3.9"
                - os: "windows"
                  language: shell
                  python: "3.8"
                  env: "PATH=/c/python38:/c/python38/Scripts:$PATH"
                  before_install:
                          - choco install python --version=3.8.6
                          - cp /c/python38/python.exe /c/python38/python3.exe
                  script: tox -e py38
--- a/.tx/config
+++ b/.tx/config
@@ -1,19 +1,25 @@
 [main]
 host = https://www.transifex.com
-[dupeguru.core]
+[o:voltaicideas:p:dupeguru-1:r:columns]
 file_filter = locale/<lang>/LC_MESSAGES/core.po
 source_file = locale/core.pot
 source_lang = en
 type = PO
 [dupeguru.columns]
 file_filter = locale/<lang>/LC_MESSAGES/columns.po
 source_file = locale/columns.pot
 source_lang = en
 type        = PO
-[dupeguru.ui]
+[o:voltaicideas:p:dupeguru-1:r:core]
 file_filter = locale/<lang>/LC_MESSAGES/core.po
 source_file = locale/core.pot
 source_lang = en
 type        = PO
 [o:voltaicideas:p:dupeguru-1:r:qtlib]
 file_filter = qtlib/locale/<lang>/LC_MESSAGES/qtlib.po
 source_file = qtlib/locale/qtlib.pot
 source_lang = en
 type        = PO
 [o:voltaicideas:p:dupeguru-1:r:ui]
 file_filter = locale/<lang>/LC_MESSAGES/ui.po
 source_file = locale/ui.pot
 source_lang = en
--- a/.vscode/extensions.json
+++ b/.vscode/extensions.json
@@ -0,0 +1,10 @@
 {
    // List of extensions which should be recommended for users of this workspace.
    "recommendations": [
        "redhat.vscode-yaml",
        "ms-python.vscode-pylance",
        "ms-python.python"
    ],
    // List of extensions recommended by VS Code that should not be recommended for users of this workspace.
    "unwantedRecommendations": []
 }
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -0,0 +1,12 @@
 {
    "python.formatting.provider": "black",
    "cSpell.words": [
        "Dupras",
        "hscommon"
    ],
    "python.languageServer": "Pylance",
    "yaml.schemaStore.enable": true,
    "yaml.schemas": {
        "https://json.schemastore.org/github-workflow.json": ".github/workflows/*.yml"
    }
 }
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -0,0 +1,88 @@
 # Contributing to dupeGuru
 The following is a set of guidelines and information for contributing to dupeGuru.
 #### Table of Contents
 [Things to Know Before Starting](#things-to-know-before-starting)
 [Ways to Contribute](#ways-to-contribute)
  * [Reporting Bugs](#reporting-bugs)
  * [Suggesting Enhancements](#suggesting-enhancements)
  * [Localization](#localization)
  * [Code Contribution](#code-contribution)
  * [Pull Requests](#pull-requests)
 [Style Guides](#style-guides)
  * [Git Commit Messages](#git-commit-messages)
  * [Python Style Guide](#python-style-guide)
  * [Documentation Style Guide](#documentation-style-guide)
 [Additional Notes](#additional-notes)
  * [Issue and Pull Request Labels](#issue-and-pull-request-labels)
 ## Things to Know Before Starting
 **TODO**
 ## Ways to contribute
 ### Reporting Bugs
 **TODO**
 ### Suggesting Enhancements
 **TODO**
 ### Localization
 **TODO**
 ### Code Contribution
 **TODO**
 ### Pull Requests
 Please follow these steps to have your contribution considered by the maintainers:
 1. Keep Pull Request specific to one feature or bug.
 2. Follow the [style guides](#style-guides)
 3. After you submit your pull request, verify that all [status checks](https://help.github.com/articles/about-status-checks/) are passing <details><summary>What if the status checks are failing?</summary>If a status check is failing, and you believe that the failure is unrelated to your change, please leave a comment on the pull request explaining why you believe the failure is unrelated. A maintainer will re-run the status check for you. If we conclude that the failure was a false positive, then we will open an issue to track that problem with our status check suite.</details>
 While the prerequisites above must be satisfied prior to having your pull request reviewed, the reviewer(s) may ask you to complete additional design work, tests, or other changes before your pull request can be ultimately accepted.
 ## Style Guides
 ### Git Commit Messages
 - Use the present tense ("Add feature" not "Added feature")
 - Use the imperative mood ("Move cursor to..." not "Moves cursor to...")
 - Limit the first line to 72 characters or less
 - Reference issues and pull requests liberally after the first line
 ### Python Style Guide
 - All files are formatted with [Black](https://github.com/psf/black)
 - Follow [PEP 8](https://peps.python.org/pep-0008/) as much as practical
 - Pass [flake8](https://flake8.pycqa.org/en/latest/) linting
 - Include [PEP 484](https://peps.python.org/pep-0484/) type hints (new code)
 ### Documentation Style Guide
 **TODO**
 ## Additional Notes
 ### Issue and Pull Request Labels
 This section lists and describes the various labels used with issues and pull requests.  Each of the labels is listed with a search link as well.
 #### Issue Type and Status
 | Label name | Search | Description |
 |------------|--------|-------------|
 | `enhancement` | [search](https://github.com/arsenetar/dupeguru/issues?q=is%3Aopen+is%3Aissue+label%3Aenhancement) | Feature requests and enhancements. |
 | `bug` | [search](https://github.com/arsenetar/dupeguru/issues?q=is%3Aopen+is%3Aissue+label%3Abug) | Bug reports. |
 | `duplicate` | [search](https://github.com/arsenetar/dupeguru/issues?q=is%3Aopen+is%3Aissue+label%3Aduplicate) | Issue is a duplicate of existing issue. |
 | `needs-reproduction` | [search](https://github.com/arsenetar/dupeguru/issues?q=is%3Aopen+is%3Aissue+label%3Aneeds-reproduction) | A bug that has not been able to be reproduced. |
 | `needs-information` | [search](https://github.com/arsenetar/dupeguru/issues?q=is%3Aopen+is%3Aissue+label%3Aneeds-information) | More information needs to be collected about these problems or feature requests (e.g. steps to reproduce). |
 | `blocked` | [search](https://github.com/arsenetar/dupeguru/issues?q=is%3Aopen+is%3Aissue+label%3Ablocked) | Issue blocked by other issues. |
 | `beginner` | [search](https://github.com/arsenetar/dupeguru/issues?q=is%3Aopen+is%3Aissue+label%3Abeginner) | Less complex issues for users who want to start contributing. |
 #### Category Labels
 | Label name | Search | Description |
 |------------|--------|-------------|
 | `3rd party` | [search](https://github.com/arsenetar/dupeguru/issues?q=is%3Aopen+is%3Aissue+label%3A%223rd%20party%22)  | Related to a 3rd party dependency. |
 | `crash` | [search](https://github.com/arsenetar/dupeguru/issues?q=is%3Aopen+is%3Aissue+label%3Acrash) | Related to crashes (complete, or unhandled). |
 | `documentation` | [search](https://github.com/arsenetar/dupeguru/issues?q=is%3Aopen+is%3Aissue+label%3Adocumentation) | Related to any documentation. |
 | `linux` | [search](https://github.com/arsenetar/dupeguru/issues?q=is%3Aopen+is%3Aissue+label%3linux) | Related to running on Linux. |
 | `mac` | [search](https://github.com/arsenetar/dupeguru/issues?q=is%3Aopen+is%3Aissue+label%3Amac) | Related to running on macOS. |
 | `performance` | [search](https://github.com/arsenetar/dupeguru/issues?q=is%3Aopen+is%3Aissue+label%3Aperformance) | Related to the performance. |
 | `ui` | [search](https://github.com/arsenetar/dupeguru/issues?q=is%3Aopen+is%3Aissue+label%3Aui)| Related to the visual design. |
 | `windows` | [search](https://github.com/arsenetar/dupeguru/issues?q=is%3Aopen+is%3Aissue+label%3Awindows) | Related to running on Windows. |
 #### Pull Request Labels
 None at this time, if the volume of Pull Requests increase labels may be added to manage.
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -0,0 +1,6 @@
 recursive-include core *.h
 recursive-include core *.m
 include run.py
 graft locale
 graft help
 graft qtlib/locale
--- a/4
+++ b/4
@@ -1,7 +1,7 @@
 PYTHON ?= python3
 PYTHON_VERSION_MINOR := $(shell ${PYTHON} -c "import sys; print(sys.version_info.minor)")
 PYRCC5 ?= pyrcc5
-REQ_MINOR_VERSION = 6
+REQ_MINOR_VERSION = 7
 PREFIX ?= /usr/local
 # Window compatability via Msys2 
@@ -53,7 +53,7 @@ pyc: | env
 	${VENV_PYTHON} -m compileall ${packages}
 reqs:
-ifneq ($(shell test $(PYTHON_VERSION_MINOR) -gt $(REQ_MINOR_VERSION); echo $$?),0)
+ifneq ($(shell test $(PYTHON_VERSION_MINOR) -ge $(REQ_MINOR_VERSION); echo $$?),0)
 	$(error "Python 3.${REQ_MINOR_VERSION}+ required. Aborting.")
 endif
 ifndef NO_VENV
--- a/README.md
+++ b/README.md
@@ -36,7 +36,7 @@ For windows instructions see the [Windows Instructions](Windows.md).
 For macos instructions (qt version) see the [macOS Instructions](macos.md).
 ### Prerequisites
-* [Python 3.6+][python]
+* [Python 3.7+][python]
 * PyQt5
 ### System Setup
--- a/Windows.md
+++ b/Windows.md
@@ -2,7 +2,7 @@
 ### Prerequisites
- [Python 3.6+][python]
+- [Python 3.7+][python]
 - [Visual Studio 2019][vs] or [Visual Studio Build Tools 2019][vsBuildTools] with the Windows 10 SDK
 - [nsis][nsis] (for installer creation)
 - [msys2][msys2] (for using makefile method)
@@ -16,7 +16,7 @@ After installing python it is recommended to update setuptools before compiling
 More details on setting up python for compiling packages on windows can be found on the [python wiki][pythonWindowsCompilers] Take note of the required vc++ versions.
 ### With build.py (preferred)
-To build with a different python version 3.6 vs 3.8 or 32 bit vs 64 bit specify that version instead of -3.8 to the `py` command below.  If you want to build additional versions while keeping all virtual environments setup use a different location for each virtual environment.
+To build with a different python version 3.7 vs 3.8 or 32 bit vs 64 bit specify that version instead of -3.8 to the `py` command below.  If you want to build additional versions while keeping all virtual environments setup use a different location for each virtual environment.
    $ cd <dupeGuru directory>
    $ py -3.8 -m venv .\env
--- a/build.py
+++ b/build.py
@@ -4,19 +4,17 @@
 # which should be included with this package. The terms are also available at
 # http://www.gnu.org/licenses/gpl-3.0.html
-import os
+from pathlib import Path
-import os.path as op
+import sys
 from optparse import OptionParser
 import shutil
-from pathlib import Path
+from multiprocessing import Pool
 from setuptools import setup, Extension
 from setuptools import sandbox
 from hscommon import sphinxgen
 from hscommon.build import (
    add_to_pythonpath,
    print_and_do,
    move_all,
    fix_qt_resource_file,
 )
 from hscommon import loc
@@ -31,12 +29,9 @@ def parse_args():
        dest="clean",
        help="Clean build folder before building",
    )
-    parser.add_option(
+    parser.add_option("--doc", action="store_true", dest="doc", help="Build only the help file (en)")
-        "--doc", action="store_true", dest="doc", help="Build only the help file"
+    parser.add_option("--alldoc", action="store_true", dest="all_doc", help="Build only the help file in all languages")
-    )
+    parser.add_option("--loc", action="store_true", dest="loc", help="Build only localization")
    parser.add_option(
        "--loc", action="store_true", dest="loc", help="Build only localization"
    )
    parser.add_option(
        "--updatepot",
        action="store_true",
@@ -61,26 +56,20 @@ def parse_args():
        dest="modules",
        help="Build the python modules.",
    )
    parser.add_option(
        "--importpo",
        action="store_true",
        dest="importpo",
        help="Import all PO files downloaded from transifex.",
    )
    (options, args) = parser.parse_args()
    return options
-def build_help():
+def build_one_help(language):
-    print("Generating Help")
+    print("Generating Help in {}".format(language))
-    current_path = op.abspath(".")
+    current_path = Path(".").absolute()
-    help_basepath = op.join(current_path, "help", "en")
+    changelog_path = current_path.joinpath("help", "changelog")
    help_destpath = op.join(current_path, "build", "help")
    changelog_path = op.join(current_path, "help", "changelog")
    tixurl = "https://github.com/arsenetar/dupeguru/issues/{}"
-    confrepl = {"language": "en"}
+    changelogtmpl = current_path.joinpath("help", "changelog.tmpl")
-    changelogtmpl = op.join(current_path, "help", "changelog.tmpl")
+    conftmpl = current_path.joinpath("help", "conf.tmpl")
-    conftmpl = op.join(current_path, "help", "conf.tmpl")
+    help_basepath = current_path.joinpath("help", language)
    help_destpath = current_path.joinpath("build", "help", language)
    confrepl = {"language": language}
    sphinxgen.gen(
        help_basepath,
        help_destpath,
@@ -92,103 +81,59 @@ def build_help():
    )
 def build_help():
    languages = ["en", "de", "fr", "hy", "ru", "uk"]
    # Running with Pools as for some reason sphinx seems to cross contaminate the output otherwise
    with Pool(len(languages)) as p:
        p.map(build_one_help, languages)
 def build_qt_localizations():
-    loc.compile_all_po(op.join("qtlib", "locale"))
+    loc.compile_all_po(Path("qtlib", "locale"))
-    loc.merge_locale_dir(op.join("qtlib", "locale"), "locale")
+    loc.merge_locale_dir(Path("qtlib", "locale"), "locale")
 def build_localizations():
    loc.compile_all_po("locale")
    build_qt_localizations()
-    locale_dest = op.join("build", "locale")
+    locale_dest = Path("build", "locale")
-    if op.exists(locale_dest):
+    if locale_dest.exists():
        shutil.rmtree(locale_dest)
-    shutil.copytree(
+    shutil.copytree("locale", locale_dest, ignore=shutil.ignore_patterns("*.po", "*.pot"))
        "locale", locale_dest, ignore=shutil.ignore_patterns("*.po", "*.pot")
    )
 def build_updatepot():
    print("Building .pot files from source files")
    print("Building core.pot")
-    loc.generate_pot(["core"], op.join("locale", "core.pot"), ["tr"])
+    loc.generate_pot(["core"], Path("locale", "core.pot"), ["tr"])
    print("Building columns.pot")
-    loc.generate_pot(["core"], op.join("locale", "columns.pot"), ["coltr"])
+    loc.generate_pot(["core"], Path("locale", "columns.pot"), ["coltr"])
    print("Building ui.pot")
    # When we're not under OS X, we don't want to overwrite ui.pot because it contains Cocoa locs
    # We want to merge the generated pot with the old pot in the most preserving way possible.
-    ui_packages = ["qt", op.join("cocoa", "inter")]
+    ui_packages = ["qt", Path("cocoa", "inter")]
-    loc.generate_pot(ui_packages, op.join("locale", "ui.pot"), ["tr"], merge=True)
+    loc.generate_pot(ui_packages, Path("locale", "ui.pot"), ["tr"], merge=True)
    print("Building qtlib.pot")
-    loc.generate_pot(["qtlib"], op.join("qtlib", "locale", "qtlib.pot"), ["tr"])
+    loc.generate_pot(["qtlib"], Path("qtlib", "locale", "qtlib.pot"), ["tr"])
 def build_mergepot():
    print("Updating .po files using .pot files")
    loc.merge_pots_into_pos("locale")
-    loc.merge_pots_into_pos(op.join("qtlib", "locale"))
+    loc.merge_pots_into_pos(Path("qtlib", "locale"))
-    # loc.merge_pots_into_pos(op.join("cocoalib", "locale"))
+    # loc.merge_pots_into_pos(Path("cocoalib", "locale"))
 def build_normpo():
    loc.normalize_all_pos("locale")
-    loc.normalize_all_pos(op.join("qtlib", "locale"))
+    loc.normalize_all_pos(Path("qtlib", "locale"))
-    # loc.normalize_all_pos(op.join("cocoalib", "locale"))
+    # loc.normalize_all_pos(Path("cocoalib", "locale"))
 def build_importpo():
    basePath = Path.cwd()
    # expect a folder named transifex with all the .po files from the exports
    translationsPath = basePath.joinpath("transifex")
    # locations where the translation files go
    qtlibPath = basePath.joinpath("qtlib", "locale")
    localePath = basePath.joinpath("locale")
    for translation in translationsPath.iterdir():
        # transifex files are named resource_lang.po so split on first '_'
        parts = translation.stem.split("_", 1)
        resource = parts[0]
        language = parts[1]
        # make sure qtlib resources go to dedicated folder
        if resource == "qtlib":
            outputPath = qtlibPath
        else:
            outputPath = localePath
        outputFolder = outputPath.joinpath(language, "LC_MESSAGES")
        # create the language folder if it is new
        if not outputFolder.exists():
            outputFolder.mkdir(parents=True)
        # copy the po file over
        shutil.copy(translation, outputFolder.joinpath(resource + ".po"))
    # normalize files after complete
    build_normpo()
 def build_pe_modules():
    print("Building PE Modules")
-    exts = [
+    # Leverage setup.py to build modules
-        Extension(
+    sandbox.run_setup("setup.py", ["build_ext", "--inplace"])
            "_block",
            [
                op.join("core", "pe", "modules", "block.c"),
                op.join("core", "pe", "modules", "common.c"),
            ],
        ),
        Extension(
            "_cache",
            [
                op.join("core", "pe", "modules", "cache.c"),
                op.join("core", "pe", "modules", "common.c"),
            ],
        ),
    ]
    exts.append(Extension("_block_qt", [op.join("qt", "pe", "modules", "block.c")]))
    setup(
        script_args=["build_ext", "--inplace"],
        ext_modules=exts,
    )
    move_all("_block_qt*", op.join("qt", "pe"))
    move_all("_block*", op.join("core", "pe"))
    move_all("_cache*", op.join("core", "pe"))
 def build_normal():
@@ -199,21 +144,22 @@ def build_normal():
    print("Building localizations")
    build_localizations()
    print("Building Qt stuff")
-    print_and_do(
+    print_and_do("pyrcc5 {0} > {1}".format(Path("qt", "dg.qrc"), Path("qt", "dg_rc.py")))
-        "pyrcc5 {0} > {1}".format(op.join("qt", "dg.qrc"), op.join("qt", "dg_rc.py"))
+    fix_qt_resource_file(Path("qt", "dg_rc.py"))
    )
    fix_qt_resource_file(op.join("qt", "dg_rc.py"))
    build_help()
 def main():
    if sys.version_info < (3, 7):
        sys.exit("Python < 3.7 is unsupported.")
    options = parse_args()
-    if options.clean:
+    if options.clean and Path("build").exists():
        if op.exists("build"):
        shutil.rmtree("build")
-    if not op.exists("build"):
+    if not Path("build").exists():
-        os.mkdir("build")
+        Path("build").mkdir()
    if options.doc:
        build_one_help("en")
    elif options.all_doc:
        build_help()
    elif options.loc:
        build_localizations()
@@ -225,8 +171,6 @@ def main():
        build_normpo()
    elif options.modules:
        build_pe_modules()
    elif options.importpo:
        build_importpo()
    else:
        build_normal()
--- a/core/init.py
+++ b/core/init.py
@@ -1,2 +1,2 @@
-__version__ = "4.1.1"
+__version__ = "4.2.1"
 __appname__ = "dupeGuru"
--- a/core/app.py
+++ b/core/app.py
@@ -48,31 +48,31 @@ MSG_MANY_FILES_TO_OPEN = tr(
 class DestType:
-    Direct = 0
+    DIRECT = 0
-    Relative = 1
+    RELATIVE = 1
-    Absolute = 2
+    ABSOLUTE = 2
 class JobType:
-    Scan = "job_scan"
+    SCAN = "job_scan"
-    Load = "job_load"
+    LOAD = "job_load"
-    Move = "job_move"
+    MOVE = "job_move"
-    Copy = "job_copy"
+    COPY = "job_copy"
-    Delete = "job_delete"
+    DELETE = "job_delete"
 class AppMode:
-    Standard = 0
+    STANDARD = 0
-    Music = 1
+    MUSIC = 1
-    Picture = 2
+    PICTURE = 2
 JOBID2TITLE = {
-    JobType.Scan: tr("Scanning for duplicates"),
+    JobType.SCAN: tr("Scanning for duplicates"),
-    JobType.Load: tr("Loading"),
+    JobType.LOAD: tr("Loading"),
-    JobType.Move: tr("Moving"),
+    JobType.MOVE: tr("Moving"),
-    JobType.Copy: tr("Copying"),
+    JobType.COPY: tr("Copying"),
-    JobType.Delete: tr("Sending to Trash"),
+    JobType.DELETE: tr("Sending to Trash"),
 }
@@ -126,20 +126,20 @@ class DupeGuru(Broadcaster):
    PICTURE_CACHE_TYPE = "sqlite"  # set to 'shelve' for a ShelveCache
-    def __init__(self, view):
+    def __init__(self, view, portable=False):
        if view.get_default(DEBUG_MODE_PREFERENCE):
            logging.getLogger().setLevel(logging.DEBUG)
            logging.debug("Debug mode enabled")
        Broadcaster.__init__(self)
        self.view = view
-        self.appdata = desktop.special_folder_path(
+        self.appdata = desktop.special_folder_path(desktop.SpecialFolder.APPDATA, appname=self.NAME, portable=portable)
            desktop.SpecialFolder.AppData, appname=self.NAME
        )
        if not op.exists(self.appdata):
            os.makedirs(self.appdata)
-        self.app_mode = AppMode.Standard
+        self.app_mode = AppMode.STANDARD
        self.discarded_file_count = 0
        self.exclude_list = ExcludeList()
        hash_cache_file = op.join(self.appdata, "hash_cache.db")
        fs.filesdb.connect(hash_cache_file)
        self.directories = directories.Directories(self.exclude_list)
        self.results = results.Results(self)
        self.ignore_list = IgnoreList()
@@ -150,7 +150,7 @@ class DupeGuru(Broadcaster):
            "escape_filter_regexp": True,
            "clean_empty_dirs": False,
            "ignore_hardlink_matches": False,
-            "copymove_dest_type": DestType.Relative,
+            "copymove_dest_type": DestType.RELATIVE,
            "picture_cache_type": self.PICTURE_CACHE_TYPE,
        }
        self.selected_dupes = []
@@ -171,9 +171,9 @@ class DupeGuru(Broadcaster):
    def _recreate_result_table(self):
        if self.result_table is not None:
            self.result_table.disconnect()
-        if self.app_mode == AppMode.Picture:
+        if self.app_mode == AppMode.PICTURE:
            self.result_table = pe.result_table.ResultTable(self)
-        elif self.app_mode == AppMode.Music:
+        elif self.app_mode == AppMode.MUSIC:
            self.result_table = me.result_table.ResultTable(self)
        else:
            self.result_table = se.result_table.ResultTable(self)
@@ -182,20 +182,14 @@ class DupeGuru(Broadcaster):
    def _get_picture_cache_path(self):
        cache_type = self.options["picture_cache_type"]
-        cache_name = (
+        cache_name = "cached_pictures.shelve" if cache_type == "shelve" else "cached_pictures.db"
            "cached_pictures.shelve" if cache_type == "shelve" else "cached_pictures.db"
        )
        return op.join(self.appdata, cache_name)
    def _get_dupe_sort_key(self, dupe, get_group, key, delta):
-        if self.app_mode in (AppMode.Music, AppMode.Picture):
+        if self.app_mode in (AppMode.MUSIC, AppMode.PICTURE) and key == "folder_path":
-            if key == "folder_path":
+            dupe_folder_path = getattr(dupe, "display_folder_path", dupe.folder_path)
                dupe_folder_path = getattr(
                    dupe, "display_folder_path", dupe.folder_path
                )
            return str(dupe_folder_path).lower()
-        if self.app_mode == AppMode.Picture:
+        if self.app_mode == AppMode.PICTURE and delta and key == "dimensions":
            if delta and key == "dimensions":
            r = cmp_value(dupe, key)
            ref_value = cmp_value(get_group().ref, key)
            return get_delta_dimensions(r, ref_value)
@@ -218,11 +212,8 @@ class DupeGuru(Broadcaster):
        return result
    def _get_group_sort_key(self, group, key):
-        if self.app_mode in (AppMode.Music, AppMode.Picture):
+        if self.app_mode in (AppMode.MUSIC, AppMode.PICTURE) and key == "folder_path":
-            if key == "folder_path":
+            dupe_folder_path = getattr(group.ref, "display_folder_path", group.ref.folder_path)
                dupe_folder_path = getattr(
                    group.ref, "display_folder_path", group.ref.folder_path
                )
            return str(dupe_folder_path).lower()
        if key == "percentage":
            return group.percentage
@@ -235,9 +226,7 @@ class DupeGuru(Broadcaster):
    def _do_delete(self, j, link_deleted, use_hardlinks, direct_deletion):
        def op(dupe):
            j.add_progress()
-            return self._do_delete_dupe(
+            return self._do_delete_dupe(dupe, link_deleted, use_hardlinks, direct_deletion)
                dupe, link_deleted, use_hardlinks, direct_deletion
            )
        j.start_job(self.results.mark_count)
        self.results.perform_on_marked(op, True)
@@ -277,11 +266,7 @@ class DupeGuru(Broadcaster):
            return None
    def _get_export_data(self):
-        columns = [
+        columns = [col for col in self.result_table._columns.ordered_columns if col.visible and col.name != "marked"]
            col
            for col in self.result_table.columns.ordered_columns
            if col.visible and col.name != "marked"
        ]
        colnames = [col.display for col in columns]
        rows = []
        for group_id, group in enumerate(self.results.groups):
@@ -293,11 +278,7 @@ class DupeGuru(Broadcaster):
        return colnames, rows
    def _results_changed(self):
-        self.selected_dupes = [
+        self.selected_dupes = [d for d in self.selected_dupes if self.results.get_group_of_duplicate(d) is not None]
            d
            for d in self.selected_dupes
            if self.results.get_group_of_duplicate(d) is not None
        ]
        self.notify("results_changed")
    def _start_job(self, jobid, func, args=()):
@@ -312,34 +293,36 @@ class DupeGuru(Broadcaster):
            self.view.show_message(msg)
    def _job_completed(self, jobid):
-        if jobid == JobType.Scan:
+        if jobid == JobType.SCAN:
            self._results_changed()
            fs.filesdb.commit()
            if not self.results.groups:
                self.view.show_message(tr("No duplicates found."))
            else:
                self.view.show_results_window()
-        if jobid in {JobType.Move, JobType.Delete}:
+        if jobid in {JobType.MOVE, JobType.DELETE}:
            self._results_changed()
-        if jobid == JobType.Load:
+        if jobid == JobType.LOAD:
            self._recreate_result_table()
            self._results_changed()
            self.view.show_results_window()
-        if jobid in {JobType.Copy, JobType.Move, JobType.Delete}:
+        if jobid in {JobType.COPY, JobType.MOVE, JobType.DELETE}:
            if self.results.problems:
                self.problem_dialog.refresh()
                self.view.show_problem_dialog()
            else:
-                msg = {
+                if jobid == JobType.COPY:
-                    JobType.Copy: tr("All marked files were copied successfully."),
+                    msg = tr("All marked files were copied successfully.")
-                    JobType.Move: tr("All marked files were moved successfully."),
+                elif jobid == JobType.MOVE:
-                    JobType.Delete: tr(
+                    msg = tr("All marked files were moved successfully.")
-                        "All marked files were successfully sent to Trash."
+                elif jobid == JobType.DELETE and self.deletion_options.direct:
-                    ),
+                    msg = tr("All marked files were deleted successfully.")
-                }[jobid]
+                else:
                    msg = tr("All marked files were successfully sent to Trash.")
                self.view.show_message(msg)
    def _job_error(self, jobid, err):
-        if jobid == JobType.Load:
+        if jobid == JobType.LOAD:
            msg = tr("Could not load file: {}").format(err)
            self.view.show_message(msg)
            return False
@@ -369,17 +352,17 @@ class DupeGuru(Broadcaster):
    # --- Protected
    def _get_fileclasses(self):
-        if self.app_mode == AppMode.Picture:
+        if self.app_mode == AppMode.PICTURE:
            return [pe.photo.PLAT_SPECIFIC_PHOTO_CLASS]
-        elif self.app_mode == AppMode.Music:
+        elif self.app_mode == AppMode.MUSIC:
            return [me.fs.MusicFile]
        else:
            return [se.fs.File]
    def _prioritization_categories(self):
-        if self.app_mode == AppMode.Picture:
+        if self.app_mode == AppMode.PICTURE:
            return pe.prioritize.all_categories()
-        elif self.app_mode == AppMode.Music:
+        elif self.app_mode == AppMode.MUSIC:
            return me.prioritize.all_categories()
        else:
            return prioritize.all_categories()
@@ -401,35 +384,32 @@ class DupeGuru(Broadcaster):
            self.view.show_message(tr("'{}' does not exist.").format(d))
    def add_selected_to_ignore_list(self):
-        """Adds :attr:`selected_dupes` to :attr:`ignore_list`.
+        """Adds :attr:`selected_dupes` to :attr:`ignore_list`."""
        """
        dupes = self.without_ref(self.selected_dupes)
        if not dupes:
            self.view.show_message(MSG_NO_SELECTED_DUPES)
            return
-        msg = tr(
+        msg = tr("All selected %d matches are going to be ignored in all subsequent scans. Continue?")
            "All selected %d matches are going to be ignored in all subsequent scans. Continue?"
        )
        if not self.view.ask_yes_no(msg % len(dupes)):
            return
        for dupe in dupes:
            g = self.results.get_group_of_duplicate(dupe)
            for other in g:
                if other is not dupe:
-                    self.ignore_list.Ignore(str(other.path), str(dupe.path))
+                    self.ignore_list.ignore(str(other.path), str(dupe.path))
        self.remove_duplicates(dupes)
        self.ignore_list_dialog.refresh()
-    def apply_filter(self, filter):
+    def apply_filter(self, result_filter):
        """Apply a filter ``filter`` to the results so that it shows only dupe groups that match it.
        :param str filter: filter to apply
        """
        self.results.apply_filter(None)
        if self.options["escape_filter_regexp"]:
-            filter = escape(filter, set("()[]\\.|+?^"))
+            result_filter = escape(result_filter, set("()[]\\.|+?^"))
-            filter = escape(filter, "*", ".")
+            result_filter = escape(result_filter, "*", ".")
-        self.results.apply_filter(filter)
+        self.results.apply_filter(result_filter)
        self._results_changed()
    def clean_empty_dirs(self, path):
@@ -443,14 +423,17 @@ class DupeGuru(Broadcaster):
        except FileNotFoundError:
            pass  # we don't care
    def clear_hash_cache(self):
        fs.filesdb.clear()
    def copy_or_move(self, dupe, copy: bool, destination: str, dest_type: DestType):
        source_path = dupe.path
        location_path = first(p for p in self.directories if dupe.path in p)
        dest_path = Path(destination)
-        if dest_type in {DestType.Relative, DestType.Absolute}:
+        if dest_type in {DestType.RELATIVE, DestType.ABSOLUTE}:
            # no filename, no windows drive letter
            source_base = source_path.remove_drive_letter().parent()
-            if dest_type == DestType.Relative:
+            if dest_type == DestType.RELATIVE:
                source_base = source_base[location_path:]
            dest_path = dest_path[source_base]
        if not dest_path.exists():
@@ -483,16 +466,17 @@ class DupeGuru(Broadcaster):
            self.view.show_message(MSG_NO_MARKED_DUPES)
            return
        destination = self.view.select_dest_folder(
-            tr("Select a directory to copy marked files to") if copy
+            tr("Select a directory to copy marked files to")
-            else tr("Select a directory to move marked files to"))
+            if copy
            else tr("Select a directory to move marked files to")
        )
        if destination:
            desttype = self.options["copymove_dest_type"]
-            jobid = JobType.Copy if copy else JobType.Move
+            jobid = JobType.COPY if copy else JobType.MOVE
            self._start_job(jobid, do)
    def delete_marked(self):
-        """Start an async job to send marked duplicates to the trash.
+        """Start an async job to send marked duplicates to the trash."""
        """
        if not self.results.mark_count:
            self.view.show_message(MSG_NO_MARKED_DUPES)
            return
@@ -504,7 +488,7 @@ class DupeGuru(Broadcaster):
            self.deletion_options.direct,
        ]
        logging.debug("Starting deletion job with args %r", args)
-        self._start_job(JobType.Delete, self._do_delete, args=args)
+        self._start_job(JobType.DELETE, self._do_delete, args=args)
    def export_to_xhtml(self):
        """Export current results to XHTML.
@@ -523,9 +507,7 @@ class DupeGuru(Broadcaster):
        The columns and their order in the resulting CSV file is determined in the same way as in
        :meth:`export_to_xhtml`.
        """
-        dest_file = self.view.select_dest_file(
+        dest_file = self.view.select_dest_file(tr("Select a destination for your exported CSV"), "csv")
            tr("Select a destination for your exported CSV"), "csv"
        )
        if dest_file:
            colnames, rows = self._get_export_data()
            try:
@@ -542,9 +524,7 @@ class DupeGuru(Broadcaster):
        try:
            return dupe.get_display_info(group, delta)
        except Exception as e:
-            logging.warning(
+            logging.warning("Exception (type: %s) on GetDisplayInfo for %s: %s", type(e), str(dupe.path), str(e))
                "Exception (type: %s) on GetDisplayInfo for %s: %s",
                type(e), str(dupe.path), str(e))
            return empty_data()
    def invoke_custom_command(self):
@@ -556,19 +536,17 @@ class DupeGuru(Broadcaster):
        """
        cmd = self.view.get_default("CustomCommand")
        if not cmd:
-            msg = tr(
+            msg = tr("You have no custom command set up. Set it up in your preferences.")
                "You have no custom command set up. Set it up in your preferences."
            )
            self.view.show_message(msg)
            return
        if not self.selected_dupes:
            return
-        dupe = self.selected_dupes[0]
+        dupes = self.selected_dupes
-        group = self.results.get_group_of_duplicate(dupe)
+        refs = [self.results.get_group_of_duplicate(dupe).ref for dupe in dupes]
-        ref = group.ref
+        for dupe, ref in zip(dupes, refs):
-        cmd = cmd.replace("%d", str(dupe.path))
+            dupe_cmd = cmd.replace("%d", str(dupe.path))
-        cmd = cmd.replace("%r", str(ref.path))
+            dupe_cmd = dupe_cmd.replace("%r", str(ref.path))
-        match = re.match(r'"([^"]+)"(.*)', cmd)
+            match = re.match(r'"([^"]+)"(.*)', dupe_cmd)
            if match is not None:
                # This code here is because subprocess. Popen doesn't seem to accept, under Windows,
                # executable paths with spaces in it, *even* when they're enclosed in "". So this is
@@ -577,7 +555,7 @@ class DupeGuru(Broadcaster):
                path, exename = op.split(exepath)
                subprocess.Popen(exename + args, shell=True, cwd=path)
            else:
-            subprocess.Popen(cmd, shell=True)
+                subprocess.Popen(dupe_cmd, shell=True)
    def load(self):
        """Load directory selection and ignore list from files in appdata.
@@ -610,7 +588,7 @@ class DupeGuru(Broadcaster):
        def do(j):
            self.results.load_from_xml(filename, self._get_file, j)
-        self._start_job(JobType.Load, do)
+        self._start_job(JobType.LOAD, do)
    def make_selected_reference(self):
        """Promote :attr:`selected_dupes` to reference position within their respective groups.
@@ -623,8 +601,7 @@ class DupeGuru(Broadcaster):
        changed_groups = set()
        for dupe in dupes:
            g = self.results.get_group_of_duplicate(dupe)
-            if g not in changed_groups:
+            if g not in changed_groups and self.results.make_ref(dupe):
                if self.results.make_ref(dupe):
                changed_groups.add(g)
        # It's not always obvious to users what this action does, so to make it a bit clearer,
        # we change our selection to the ref of all changed groups. However, we also want to keep
@@ -634,9 +611,7 @@ class DupeGuru(Broadcaster):
        if not self.result_table.power_marker:
            if changed_groups:
                self.selected_dupes = [
-                    d
+                    d for d in self.selected_dupes if self.results.get_group_of_duplicate(d).ref is d
                    for d in self.selected_dupes
                    if self.results.get_group_of_duplicate(d).ref is d
                ]
            self.notify("results_changed")
        else:
@@ -648,20 +623,17 @@ class DupeGuru(Broadcaster):
            self.notify("results_changed_but_keep_selection")
    def mark_all(self):
-        """Set all dupes in the results as marked.
+        """Set all dupes in the results as marked."""
        """
        self.results.mark_all()
        self.notify("marking_changed")
    def mark_none(self):
-        """Set all dupes in the results as unmarked.
+        """Set all dupes in the results as unmarked."""
        """
        self.results.mark_none()
        self.notify("marking_changed")
    def mark_invert(self):
-        """Invert the marked state of all dupes in the results.
+        """Invert the marked state of all dupes in the results."""
        """
        self.results.mark_invert()
        self.notify("marking_changed")
@@ -679,18 +651,15 @@ class DupeGuru(Broadcaster):
        self.notify("marking_changed")
    def open_selected(self):
-        """Open :attr:`selected_dupes` with their associated application.
+        """Open :attr:`selected_dupes` with their associated application."""
-        """
+        if len(self.selected_dupes) > 10 and not self.view.ask_yes_no(MSG_MANY_FILES_TO_OPEN):
        if len(self.selected_dupes) > 10:
            if not self.view.ask_yes_no(MSG_MANY_FILES_TO_OPEN):
            return
        for dupe in self.selected_dupes:
            desktop.open_path(dupe.path)
    def purge_ignore_list(self):
-        """Remove files that don't exist from :attr:`ignore_list`.
+        """Remove files that don't exist from :attr:`ignore_list`."""
-        """
+        self.ignore_list.filter(lambda f, s: op.exists(f) and op.exists(s))
        self.ignore_list.Filter(lambda f, s: op.exists(f) and op.exists(s))
        self.ignore_list_dialog.refresh()
    def remove_directories(self, indexes):
@@ -719,8 +688,7 @@ class DupeGuru(Broadcaster):
        self.notify("results_changed_but_keep_selection")
    def remove_marked(self):
-        """Removed marked duplicates from the results (without touching the files themselves).
+        """Removed marked duplicates from the results (without touching the files themselves)."""
        """
        if not self.results.mark_count:
            self.view.show_message(MSG_NO_MARKED_DUPES)
            return
@@ -731,8 +699,7 @@ class DupeGuru(Broadcaster):
        self._results_changed()
    def remove_selected(self):
-        """Removed :attr:`selected_dupes` from the results (without touching the files themselves).
+        """Removed :attr:`selected_dupes` from the results (without touching the files themselves)."""
        """
        dupes = self.without_ref(self.selected_dupes)
        if not dupes:
            self.view.show_message(MSG_NO_SELECTED_DUPES)
@@ -770,10 +737,10 @@ class DupeGuru(Broadcaster):
        for group in self.results.groups:
            if group.prioritize(key_func=sort_key):
                count += 1
        if count:
            self.results.refresh_required = True
        self._results_changed()
-        msg = tr("{} duplicate groups were changed by the re-prioritization.").format(
+        msg = tr("{} duplicate groups were changed by the re-prioritization.").format(count)
            count
        )
        self.view.show_message(msg)
    def reveal_selected(self):
@@ -790,6 +757,9 @@ class DupeGuru(Broadcaster):
        self.exclude_list.save_to_xml(p)
        self.notify("save_session")
    def close(self):
        fs.filesdb.close()
    def save_as(self, filename):
        """Save results in ``filename``.
@@ -817,15 +787,13 @@ class DupeGuru(Broadcaster):
        """
        scanner = self.SCANNER_CLASS()
        if not self.directories.has_any_file():
-            self.view.show_message(
+            self.view.show_message(tr("The selected directories contain no scannable file."))
                tr("The selected directories contain no scannable file.")
            )
            return
        # Send relevant options down to the scanner instance
        for k, v in self.options.items():
            if hasattr(scanner, k):
                setattr(scanner, k, v)
-        if self.app_mode == AppMode.Picture:
+        if self.app_mode == AppMode.PICTURE:
            scanner.cache_path = self._get_picture_cache_path()
        self.results.groups = []
        self._recreate_result_table()
@@ -833,21 +801,17 @@ class DupeGuru(Broadcaster):
        def do(j):
            j.set_progress(0, tr("Collecting files to scan"))
-            if scanner.scan_type == ScanType.Folders:
+            if scanner.scan_type == ScanType.FOLDERS:
-                files = list(
+                files = list(self.directories.get_folders(folderclass=se.fs.Folder, j=j))
                    self.directories.get_folders(folderclass=se.fs.Folder, j=j)
                )
            else:
-                files = list(
+                files = list(self.directories.get_files(fileclasses=self.fileclasses, j=j))
                    self.directories.get_files(fileclasses=self.fileclasses, j=j)
                )
            if self.options["ignore_hardlink_matches"]:
                files = self._remove_hardlink_dupes(files)
            logging.info("Scanning %d files" % len(files))
            self.results.groups = scanner.get_dupe_groups(files, self.ignore_list, j)
            self.discarded_file_count = scanner.discarded_file_count
-        self._start_job(JobType.Scan, do)
+        self._start_job(JobType.SCAN, do)
    def toggle_selected_mark_state(self):
        selected = self.without_ref(self.selected_dupes)
@@ -862,13 +826,8 @@ class DupeGuru(Broadcaster):
        self.notify("marking_changed")
    def without_ref(self, dupes):
-        """Returns ``dupes`` with all reference elements removed.
+        """Returns ``dupes`` with all reference elements removed."""
-        """
+        return [dupe for dupe in dupes if self.results.get_group_of_duplicate(dupe).ref is not dupe]
        return [
            dupe
            for dupe in dupes
            if self.results.get_group_of_duplicate(dupe).ref is not dupe
        ]
    def get_default(self, key, fallback_value=None):
        result = nonone(self.view.get_default(key), fallback_value)
@@ -897,18 +856,18 @@ class DupeGuru(Broadcaster):
    @property
    def SCANNER_CLASS(self):
-        if self.app_mode == AppMode.Picture:
+        if self.app_mode == AppMode.PICTURE:
            return pe.scanner.ScannerPE
-        elif self.app_mode == AppMode.Music:
+        elif self.app_mode == AppMode.MUSIC:
            return me.scanner.ScannerME
        else:
            return se.scanner.ScannerSE
    @property
    def METADATA_TO_READ(self):
-        if self.app_mode == AppMode.Picture:
+        if self.app_mode == AppMode.PICTURE:
            return ["size", "mtime", "dimensions", "exif_timestamp"]
-        elif self.app_mode == AppMode.Music:
+        elif self.app_mode == AppMode.MUSIC:
            return [
                "size",
                "mtime",
--- a/core/directories.py
+++ b/core/directories.py
@@ -11,6 +11,7 @@ import logging
 from hscommon.jobprogress import job
 from hscommon.path import Path
 from hscommon.util import FileOrPath
 from hscommon.trans import tr
 from . import fs
@@ -30,9 +31,9 @@ class DirectoryState:
    * DirectoryState.Excluded: Don't scan this folder
    """
-    Normal = 0
+    NORMAL = 0
-    Reference = 1
+    REFERENCE = 1
-    Excluded = 2
+    EXCLUDED = 2
 class AlreadyThereError(Exception):
@@ -82,60 +83,49 @@ class Directories:
            # We iterate even if we only have one item here
            for denied_path_re in self._exclude_list.compiled:
                if denied_path_re.match(str(path.name)):
-                    return DirectoryState.Excluded
+                    return DirectoryState.EXCLUDED
            # return # We still use the old logic to force state on hidden dirs
        # Override this in subclasses to specify the state of some special folders.
        if path.name.startswith("."):
-            return DirectoryState.Excluded
+            return DirectoryState.EXCLUDED
    def _get_files(self, from_path, fileclasses, j):
        for root, dirs, files in os.walk(str(from_path)):
            j.check_if_cancelled()
-            rootPath = Path(root)
+            root_path = Path(root)
-            state = self.get_state(rootPath)
+            state = self.get_state(root_path)
-            if state == DirectoryState.Excluded:
+            if state == DirectoryState.EXCLUDED and not any(p[: len(root_path)] == root_path for p in self.states):
                # Recursively get files from folders with lots of subfolder is expensive. However, there
                # might be a subfolder in this path that is not excluded. What we want to do is to skim
                # through self.states and see if we must continue, or we can stop right here to save time
                if not any(p[: len(rootPath)] == rootPath for p in self.states):
                del dirs[:]
            try:
-                if state != DirectoryState.Excluded:
+                if state != DirectoryState.EXCLUDED:
                    # Old logic
                    if self._exclude_list is None or not self._exclude_list.mark_count:
-                        found_files = [fs.get_file(rootPath + f, fileclasses=fileclasses) for f in files]
+                        found_files = [fs.get_file(root_path + f, fileclasses=fileclasses) for f in files]
                    else:
                        found_files = []
                        # print(f"len of files: {len(files)} {files}")
                        for f in files:
-                            found = False
+                            if not self._exclude_list.is_excluded(root, f):
-                            for expr in self._exclude_list.compiled_files:
+                                found_files.append(fs.get_file(root_path + f, fileclasses=fileclasses))
                                if expr.match(f):
                                    found = True
                                    break
                            if not found:
                                for expr in self._exclude_list.compiled_paths:
                                    if expr.match(root + os.sep + f):
                                        found = True
                                        break
                            if not found:
                                found_files.append(fs.get_file(rootPath + f, fileclasses=fileclasses))
                    found_files = [f for f in found_files if f is not None]
                    # In some cases, directories can be considered as files by dupeGuru, which is
                    # why we have this line below. In fact, there only one case: Bundle files under
                    # OS X... In other situations, this forloop will do nothing.
                    for d in dirs[:]:
-                        f = fs.get_file(rootPath + d, fileclasses=fileclasses)
+                        f = fs.get_file(root_path + d, fileclasses=fileclasses)
                        if f is not None:
                            found_files.append(f)
                            dirs.remove(d)
                    logging.debug(
                        "Collected %d files in folder %s",
                        len(found_files),
-                        str(rootPath),
+                        str(root_path),
                    )
                    for file in found_files:
-                        file.is_ref = state == DirectoryState.Reference
+                        file.is_ref = state == DirectoryState.REFERENCE
                        yield file
            except (EnvironmentError, fs.InvalidPath):
                pass
@@ -147,8 +137,8 @@ class Directories:
                for folder in self._get_folders(subfolder, j):
                    yield folder
            state = self.get_state(from_folder.path)
-            if state != DirectoryState.Excluded:
+            if state != DirectoryState.EXCLUDED:
-                from_folder.is_ref = state == DirectoryState.Reference
+                from_folder.is_ref = state == DirectoryState.REFERENCE
                logging.debug("Yielding Folder %r state: %d", from_folder, state)
                yield from_folder
        except (EnvironmentError, fs.InvalidPath):
@@ -193,8 +183,12 @@ class Directories:
        """
        if fileclasses is None:
            fileclasses = [fs.File]
        file_count = 0
        for path in self._dirs:
            for file in self._get_files(path, fileclasses=fileclasses, j=j):
                file_count += 1
                if type(j) != job.NullJob:
                    j.set_progress(-1, tr("Collected {} files to scan").format(file_count))
                yield file
    def get_folders(self, folderclass=None, j=job.nulljob):
@@ -204,9 +198,13 @@ class Directories:
        """
        if folderclass is None:
            folderclass = fs.Folder
        folder_count = 0
        for path in self._dirs:
            from_folder = folderclass(path)
            for folder in self._get_folders(from_folder, j):
                folder_count += 1
                if type(j) != job.NullJob:
                    j.set_progress(-1, tr("Collected {} folders to scan").format(folder_count))
                yield folder
    def get_state(self, path):
@@ -217,9 +215,9 @@ class Directories:
        # direct match? easy result.
        if path in self.states:
            return self.states[path]
-        state = self._default_state_for_path(path) or DirectoryState.Normal
+        state = self._default_state_for_path(path) or DirectoryState.NORMAL
        # Save non-default states in cache, necessary for _get_files()
-        if state != DirectoryState.Normal:
+        if state != DirectoryState.NORMAL:
            self.states[path] = state
            return state
--- a/core/engine.py
+++ b/core/engine.py
@@ -17,17 +17,31 @@ from hscommon.util import flatten, multi_replace
 from hscommon.trans import tr
 from hscommon.jobprogress import job
-(WEIGHT_WORDS, MATCH_SIMILAR_WORDS, NO_FIELD_ORDER,) = range(3)
+(
    WEIGHT_WORDS,
    MATCH_SIMILAR_WORDS,
    NO_FIELD_ORDER,
 ) = range(3)
 JOB_REFRESH_RATE = 100
 PROGRESS_MESSAGE = tr("%d matches found from %d groups")
 def getwords(s):
    # We decompose the string so that ascii letters with accents can be part of the word.
    s = normalize("NFD", s)
    s = multi_replace(s, "-_&+():;\\[]{}.,<>/?~!@#$*", " ").lower()
    # logging.debug(f"DEBUG chars for: {s}\n"
    #               f"{[c for c in s if ord(c) != 32]}\n"
    #               f"{[ord(c) for c in s if ord(c) != 32]}")
    # HACK We shouldn't ignore non-ascii characters altogether. Any Unicode char
    # above common european characters that cannot be "sanitized" (ie. stripped
    # of their accents, etc.) are preserved as is. The arbitrary limit is
    # obtained from this one: ord("\u037e") GREEK QUESTION MARK
    s = "".join(
-        c for c in s if c in string.ascii_letters + string.digits + string.whitespace
+        c
        for c in s
        if (ord(c) <= 894 and c in string.ascii_letters + string.digits + string.whitespace) or ord(c) > 894
    )
    return [_f for _f in s.split(" ") if _f]  # remove empty elements
@@ -93,20 +107,18 @@ def compare_fields(first, second, flags=()):
        # We don't want to remove field directly in the list. We must work on a copy.
        second = second[:]
        for field1 in first:
-            max = 0
+            max_score = 0
            matched_field = None
            for field2 in second:
                r = compare(field1, field2, flags)
-                if r > max:
+                if r > max_score:
-                    max = r
+                    max_score = r
                    matched_field = field2
-            results.append(max)
+            results.append(max_score)
            if matched_field:
                second.remove(matched_field)
    else:
-        results = [
+        results = [compare(field1, field2, flags) for field1, field2 in zip(first, second)]
            compare(field1, field2, flags) for field1, field2 in zip(first, second)
        ]
    return min(results) if results else 0
@@ -119,9 +131,7 @@ def build_word_dict(objects, j=job.nulljob):
    The result will be a dict with words as keys, lists of objects as values.
    """
    result = defaultdict(set)
-    for object in j.iter_with_progress(
+    for object in j.iter_with_progress(objects, "Prepared %d/%d files", JOB_REFRESH_RATE):
        objects, "Prepared %d/%d files", JOB_REFRESH_RATE
    ):
        for word in unpack_fields(object.words):
            result[word].add(object)
    return result
@@ -156,9 +166,7 @@ def reduce_common_words(word_dict, threshold):
    The exception to this removal are the objects where all the words of the object are common.
    Because if we remove them, we will miss some duplicates!
    """
-    uncommon_words = set(
+    uncommon_words = set(word for word, objects in word_dict.items() if len(objects) < threshold)
        word for word, objects in word_dict.items() if len(objects) < threshold
    )
    for word, objects in list(word_dict.items()):
        if len(objects) < threshold:
            continue
@@ -241,10 +249,11 @@ def getmatches(
        match_flags.append(MATCH_SIMILAR_WORDS)
    if no_field_order:
        match_flags.append(NO_FIELD_ORDER)
-    j.start_job(len(word_dict), tr("0 matches found"))
+    j.start_job(len(word_dict), PROGRESS_MESSAGE % (0, 0))
    compared = defaultdict(set)
    result = []
    try:
        word_count = 0
        # This whole 'popping' thing is there to avoid taking too much memory at the same time.
        while word_dict:
            items = word_dict.popitem()[1]
@@ -259,41 +268,50 @@ def getmatches(
                        result.append(m)
                        if len(result) >= LIMIT:
                            return result
-            j.add_progress(desc=tr("%d matches found") % len(result))
+            word_count += 1
            j.add_progress(desc=PROGRESS_MESSAGE % (len(result), word_count))
    except MemoryError:
        # This is the place where the memory usage is at its peak during the scan.
        # Just continue the process with an incomplete list of matches.
        del compared  # This should give us enough room to call logging.
-        logging.warning(
+        logging.warning("Memory Overflow. Matches: %d. Word dict: %d" % (len(result), len(word_dict)))
            "Memory Overflow. Matches: %d. Word dict: %d"
            % (len(result), len(word_dict))
        )
        return result
    return result
-def getmatches_by_contents(files, j=job.nulljob):
+def getmatches_by_contents(files, bigsize=0, j=job.nulljob):
    """Returns a list of :class:`Match` within ``files`` if their contents is the same.
    :param bigsize: The size in bytes over which we consider files big enough to
                    justify taking samples of md5. If 0, compute md5 as usual.
    :param j: A :ref:`job progress instance <jobs>`.
    """
    size2files = defaultdict(set)
    for f in files:
        if f.size:
        size2files[f.size].add(f)
    del files
    possible_matches = [files for files in size2files.values() if len(files) > 1]
    del size2files
    result = []
-    j.start_job(len(possible_matches), tr("0 matches found"))
+    j.start_job(len(possible_matches), PROGRESS_MESSAGE % (0, 0))
    group_count = 0
    for group in possible_matches:
        for first, second in itertools.combinations(group, 2):
            if first.is_ref and second.is_ref:
                continue  # Don't spend time comparing two ref pics together.
            if first.size == 0 and second.size == 0:
                # skip md5 for zero length files
                result.append(Match(first, second, 100))
                continue
            if first.md5partial == second.md5partial:
                if bigsize > 0 and first.size > bigsize:
                    if first.md5samples == second.md5samples:
                        result.append(Match(first, second, 100))
                else:
                    if first.md5 == second.md5:
                        result.append(Match(first, second, 100))
-        j.add_progress(desc=tr("%d matches found") % len(result))
+        group_count += 1
        j.add_progress(desc=PROGRESS_MESSAGE % (len(result), group_count))
    return result
@@ -391,18 +409,13 @@ class Group:
        You can call this after the duplicate scanning process to free a bit of memory.
        """
-        discarded = set(
+        discarded = set(m for m in self.matches if not all(obj in self.unordered for obj in [m.first, m.second]))
            m
            for m in self.matches
            if not all(obj in self.unordered for obj in [m.first, m.second])
        )
        self.matches -= discarded
        self.candidates = defaultdict(set)
        return discarded
    def get_match_of(self, item):
-        """Returns the match pair between ``item`` and :attr:`ref`.
+        """Returns the match pair between ``item`` and :attr:`ref`."""
        """
        if item is self.ref:
            return
        for m in self._get_matches_for_ref():
@@ -418,8 +431,7 @@ class Group:
        """
        # tie_breaker(ref, dupe) --> True if dupe should be ref
        # Returns True if anything changed during prioritization.
-        master_key_func = lambda x: (-x.is_ref, key_func(x))
+        new_order = sorted(self.ordered, key=lambda x: (-x.is_ref, key_func(x)))
        new_order = sorted(self.ordered, key=master_key_func)
        changed = new_order != self.ordered
        self.ordered = new_order
        if tie_breaker is None:
@@ -442,9 +454,7 @@ class Group:
            self.unordered.remove(item)
            self._percentage = None
            self._matches_for_ref = None
-            if (len(self) > 1) and any(
+            if (len(self) > 1) and any(not getattr(item, "is_ref", False) for item in self):
                not getattr(item, "is_ref", False) for item in self
            ):
                if discard_matches:
                    self.matches = set(m for m in self.matches if item not in m)
            else:
@@ -453,8 +463,7 @@ class Group:
            pass
    def switch_ref(self, with_dupe):
-        """Make the :attr:`ref` dupe of the group switch position with ``with_dupe``.
+        """Make the :attr:`ref` dupe of the group switch position with ``with_dupe``."""
        """
        if self.ref.is_ref:
            return False
        try:
@@ -473,9 +482,7 @@ class Group:
        if self._percentage is None:
            if self.dupes:
                matches = self._get_matches_for_ref()
-                self._percentage = sum(match.percentage for match in matches) // len(
+                self._percentage = sum(match.percentage for match in matches) // len(matches)
                    matches
                )
            else:
                self._percentage = 0
        return self._percentage
@@ -530,12 +537,8 @@ def get_groups(matches):
    orphan_matches = []
    for group in groups:
        orphan_matches += {
-            m
+            m for m in group.discard_matches() if not any(obj in matched_files for obj in [m.first, m.second])
            for m in group.discard_matches()
            if not any(obj in matched_files for obj in [m.first, m.second])
        }
    if groups and orphan_matches:
-        groups += get_groups(
+        groups += get_groups(orphan_matches)  # no job, as it isn't supposed to take a long time
            orphan_matches
        )  # no job, as it isn't supposed to take a long time
    return groups
--- a/core/exclude.py
+++ b/core/exclude.py
@@ -4,6 +4,7 @@
 from .markable import Markable
 from xml.etree import ElementTree as ET
 # TODO: perhaps use regex module for better Unicode support? https://pypi.org/project/regex/
 # also https://pypi.org/project/re2/
 # TODO update the Result list with newly added regexes if possible
@@ -15,7 +16,8 @@ from hscommon.util import FileOrPath
 from hscommon.plat import ISWINDOWS
 import time
-default_regexes = [r"^thumbs\.db$",  # Obsolete after WindowsXP
+default_regexes = [
    r"^thumbs\.db$",  # Obsolete after WindowsXP
    r"^desktop\.ini$",  # Windows metadata
    r"^\.DS_Store$",  # MacOS metadata
    r"^\.Trash\-.*",  # Linux trash directories
@@ -34,6 +36,7 @@ def timer(func):
        end = time.perf_counter_ns()
        print(f"DEBUG: func {func.__name__!r} took {end - start} ns.")
        return value
    return wrapper_timer
@@ -45,11 +48,13 @@ def memoize(func):
        if args not in func.cache:
            func.cache[args] = func(*args)
        return func.cache[args]
    return _memoize
 class AlreadyThereException(Exception):
    """Expression already in the list"""
    def __init__(self, arg="Expression is already in excluded list."):
        super().__init__(arg)
@@ -81,7 +86,7 @@ class ExcludeList(Markable):
            yield self.is_marked(regex), regex
    def __contains__(self, item):
-        return self.isExcluded(item)
+        return self.has_entry(item)
    def __len__(self):
        """Returns the total number of regexes regardless of mark status."""
@@ -145,10 +150,7 @@ class ExcludeList(Markable):
    # @timer
    @memoize
    def _do_compile(self, expr):
        try:
        return re.compile(expr)
        except Exception as e:
            raise(e)
    # @timer
    # @memoize  # probably not worth memoizing this one if we memoize the above
@@ -169,11 +171,11 @@ class ExcludeList(Markable):
    def build_compiled_caches(self, union=False):
        if not union:
-            self._cached_compiled_files =\
+            self._cached_compiled_files = [x for x in self._excluded_compiled if not has_sep(x.pattern)]
-                [x for x in self._excluded_compiled if not has_sep(x.pattern)]
+            self._cached_compiled_paths = [x for x in self._excluded_compiled if has_sep(x.pattern)]
-            self._cached_compiled_paths =\
+            self._dirty = False
                [x for x in self._excluded_compiled if has_sep(x.pattern)]
            return
        marked_count = [x for marked, x in self if marked]
        # If there is no item, the compiled Pattern will be '' and match everything!
        if not marked_count:
@@ -183,28 +185,25 @@ class ExcludeList(Markable):
        else:
            # HACK returned as a tuple to get a free iterator and keep interface
            # the same regardless of whether the client asked for union or not
-            self._cached_compiled_union_all =\
+            self._cached_compiled_union_all = (re.compile("|".join(marked_count)),)
                (re.compile('|'.join(marked_count)),)
            files_marked = [x for x in marked_count if not has_sep(x)]
            if not files_marked:
                self._cached_compiled_union_files = tuple()
            else:
-                self._cached_compiled_union_files =\
+                self._cached_compiled_union_files = (re.compile("|".join(files_marked)),)
                    (re.compile('|'.join(files_marked)),)
            paths_marked = [x for x in marked_count if has_sep(x)]
            if not paths_marked:
                self._cached_compiled_union_paths = tuple()
            else:
-                self._cached_compiled_union_paths =\
+                self._cached_compiled_union_paths = (re.compile("|".join(paths_marked)),)
-                    (re.compile('|'.join(paths_marked)),)
+        self._dirty = False
    @property
    def compiled(self):
        """Should be used by other classes to retrieve the up-to-date list of patterns."""
        if self._use_union:
            if self._dirty:
-                self.build_compiled_caches(True)
+                self.build_compiled_caches(self._use_union)
                self._dirty = False
            return self._cached_compiled_union_all
        return self._excluded_compiled
@@ -215,29 +214,25 @@ class ExcludeList(Markable):
        The interface should be expected to be a generator, even if it returns only
        one item (one Pattern in the union case)."""
        if self._dirty:
-            self.build_compiled_caches(True if self._use_union else False)
+            self.build_compiled_caches(self._use_union)
-            self._dirty = False
+        return self._cached_compiled_union_files if self._use_union else self._cached_compiled_files
        return self._cached_compiled_union_files if self._use_union\
            else self._cached_compiled_files
    @property
    def compiled_paths(self):
        """Returns patterns with only separators in them, for more precise filtering."""
        if self._dirty:
-            self.build_compiled_caches(True if self._use_union else False)
+            self.build_compiled_caches(self._use_union)
-            self._dirty = False
+        return self._cached_compiled_union_paths if self._use_union else self._cached_compiled_paths
        return self._cached_compiled_union_paths if self._use_union\
            else self._cached_compiled_paths
    # ---Public
    def add(self, regex, forced=False):
        """This interface should throw exceptions if there is an error during
        regex compilation"""
-        if self.isExcluded(regex):
+        if self.has_entry(regex):
            # This exception should never be ignored
            raise AlreadyThereException()
        if regex in forbidden_regexes:
-            raise Exception("Forbidden (dangerous) expression.")
+            raise ValueError("Forbidden (dangerous) expression.")
        iscompilable, exception, compiled = self.compile_re(regex)
        if not iscompilable and not forced:
@@ -256,12 +251,27 @@ class ExcludeList(Markable):
        """Returns the number of marked regexes only."""
        return len([x for marked, x in self if marked])
-    def isExcluded(self, regex):
+    def has_entry(self, regex):
        for item in self._excluded:
            if regex == item[0]:
                return True
        return False
    def is_excluded(self, dirname, filename):
        """Return True if the file or the absolute path to file is supposed to be
        filtered out, False otherwise."""
        matched = False
        for expr in self.compiled_files:
            if expr.fullmatch(filename):
                matched = True
                break
        if not matched:
            for expr in self.compiled_paths:
                if expr.fullmatch(dirname + sep + filename):
                    matched = True
                    break
        return matched
    def remove(self, regex):
        for item in self._excluded:
            if item[0] == regex:
@@ -280,13 +290,14 @@ class ExcludeList(Markable):
                was_marked = self.is_marked(regex)
                is_compilable, exception, compiled = self.compile_re(newregex)
                # We overwrite the found entry
-                self._excluded[self._excluded.index(item)] =\
+                self._excluded[self._excluded.index(item)] = [newregex, is_compilable, exception, compiled]
                    [newregex, is_compilable, exception, compiled]
                self._remove_compiled(regex)
                break
        if not found:
            return
-        if is_compilable and was_marked:
+        if is_compilable:
            self._add_compiled(newregex)
            if was_marked:
                # Not marked by default when added, add it back
                self.mark(newregex)
@@ -300,7 +311,7 @@ class ExcludeList(Markable):
            if regex not in default_regexes:
                self.unmark(regex)
        for default_regex in default_regexes:
-            if not self.isExcluded(default_regex):
+            if not self.has_entry(default_regex):
                self.add(default_regex)
            self.mark(default_regex)
@@ -326,8 +337,10 @@ class ExcludeList(Markable):
                # "forced" avoids compilation exceptions and adds anyway
                self.add(regex_string, forced=True)
            except AlreadyThereException:
-                logging.error(f"Regex \"{regex_string}\" \
+                logging.error(
-loaded from XML was already present in the list.")
+                    f'Regex "{regex_string}" \
 loaded from XML was already present in the list.'
                )
                continue
            if exclude_item.get("marked") == "y":
                marked.add(regex_string)
@@ -352,6 +365,7 @@ loaded from XML was already present in the list.")
 class ExcludeDict(ExcludeList):
    """Exclusion list holding a set of regular expressions as keys, the compiled
    Pattern, compilation error and compilable boolean as values."""
    # Implemntation around a dictionary instead of a list, which implies
    # to keep the index of each string-key as its sub-element and keep it updated
    # whenever insert/remove is done.
@@ -399,9 +413,9 @@ class ExcludeDict(ExcludeList):
        if self._use_union:
            return
        try:
-            self._excluded_compiled.add(self._excluded[regex]["compiled"])
+            self._excluded_compiled.add(self._excluded.get(regex).get("compiled"))
        except Exception as e:
-            logging.warning(f"Exception while adding regex {regex} to compiled set: {e}")
+            logging.error(f"Exception while adding regex {regex} to compiled set: {e}")
            return
    def is_compilable(self, regex):
@@ -418,14 +432,9 @@ class ExcludeDict(ExcludeList):
        # and other indices should be pushed by one
        for value in self._excluded.values():
            value["index"] += 1
-        self._excluded[regex] = {
+        self._excluded[regex] = {"index": 0, "compilable": iscompilable, "error": exception, "compiled": compiled}
            "index": 0,
            "compilable": iscompilable,
            "error": exception,
            "compiled": compiled
        }
-    def isExcluded(self, regex):
+    def has_entry(self, regex):
        if regex in self._excluded.keys():
            return True
        return False
@@ -451,13 +460,15 @@ class ExcludeDict(ExcludeList):
        previous = self._excluded.pop(regex)
        iscompilable, error, compiled = self.compile_re(newregex)
        self._excluded[newregex] = {
-            "index": previous["index"],
+            "index": previous.get("index"),
            "compilable": iscompilable,
            "error": error,
-            "compiled": compiled
+            "compiled": compiled,
        }
        self._remove_compiled(regex)
-        if was_marked and iscompilable:
+        if iscompilable:
            self._add_compiled(newregex)
            if was_marked:
                self.mark(newregex)
    def save_to_xml(self, outfile):
@@ -492,8 +503,11 @@ def ordered_keys(_dict):
 if ISWINDOWS:
-    def has_sep(x):
+
-        return '\\' + sep in x
+    def has_sep(regexp):
        return "\\" + sep in regexp
 else:
-    def has_sep(x):
+
-        return sep in x
+    def has_sep(regexp):
        return sep in regexp
--- a/core/export.py
+++ b/core/export.py
@@ -131,15 +131,11 @@ def export_to_xhtml(colnames, rows):
            indented = "indented"
        filename = row[1]
        cells = "".join(CELL_TEMPLATE.format(value=value) for value in row[2:])
-        rendered_rows.append(
+        rendered_rows.append(ROW_TEMPLATE.format(indented=indented, filename=filename, cells=cells))
            ROW_TEMPLATE.format(indented=indented, filename=filename, cells=cells)
        )
        previous_group_id = row[0]
    rendered_rows = "".join(rendered_rows)
    # The main template can't use format because the css code uses {}
-    content = MAIN_TEMPLATE.replace("$colheaders", colheaders).replace(
+    content = MAIN_TEMPLATE.replace("$colheaders", colheaders).replace("$rows", rendered_rows)
        "$rows", rendered_rows
    )
    folder = mkdtemp()
    destpath = op.join(folder, "export.htm")
    fp = open(destpath, "wt", encoding="utf-8")
--- a/core/fs.py
+++ b/core/fs.py
@@ -12,8 +12,13 @@
 # and I'm doing it now.
 import hashlib
 from math import floor
 import logging
 import sqlite3
 from threading import Lock
 from typing import Any
 from hscommon.path import Path
 from hscommon.util import nonone, get_file_ext
 __all__ = [
@@ -30,6 +35,14 @@ __all__ = [
 NOT_SET = object()
 # The goal here is to not run out of memory on really big files. However, the chunk
 # size has to be large enough so that the python loop isn't too costly in terms of
 # CPU.
 CHUNK_SIZE = 1024 * 1024  # 1 MiB
 # Minimum size below which partial hashes don't need to be computed
 MIN_FILE_SIZE = 3 * CHUNK_SIZE  # 3MiB, because we take 3 samples
 class FSError(Exception):
    cls_message = "An error has occured on '{name}' in '{parent}'"
@@ -69,16 +82,86 @@ class OperationError(FSError):
    cls_message = "Operation on '{name}' failed."
-class File:
+class FilesDB:
-    """Represents a file and holds metadata to be used for scanning.
+
    create_table_query = "CREATE TABLE IF NOT EXISTS files (path TEXT PRIMARY KEY, size INTEGER, mtime_ns INTEGER, entry_dt DATETIME, md5 BLOB, md5partial BLOB)"
    drop_table_query = "DROP TABLE files;"
    select_query = "SELECT {key} FROM files WHERE path=:path AND size=:size and mtime_ns=:mtime_ns"
    insert_query = """
        INSERT INTO files (path, size, mtime_ns, entry_dt, {key}) VALUES (:path, :size, :mtime_ns, datetime('now'), :value)
        ON CONFLICT(path) DO UPDATE SET size=:size, mtime_ns=:mtime_ns, entry_dt=datetime('now'), {key}=:value;
    """
-    INITIAL_INFO = {
+    def __init__(self):
-        "size": 0,
+        self.conn = None
-        "mtime": 0,
+        self.cur = None
-        "md5": "",
+        self.lock = None
-        "md5partial": "",
+
-    }
+    def connect(self, path):
        # type: (str, ) -> None
        self.conn = sqlite3.connect(path, check_same_thread=False)
        self.cur = self.conn.cursor()
        self.cur.execute(self.create_table_query)
        self.lock = Lock()
    def clear(self):
        # type: () -> None
        with self.lock:
            self.cur.execute(self.drop_table_query)
            self.cur.execute(self.create_table_query)
    def get(self, path, key):
        # type: (Path, str) -> bytes
        stat = path.stat()
        size = stat.st_size
        mtime_ns = stat.st_mtime_ns
        with self.lock:
            self.cur.execute(self.select_query.format(key=key), {"path": str(path), "size": size, "mtime_ns": mtime_ns})
            result = self.cur.fetchone()
        if result:
            return result[0]
        return None
    def put(self, path, key, value):
        # type: (Path, str, Any) -> None
        stat = path.stat()
        size = stat.st_size
        mtime_ns = stat.st_mtime_ns
        with self.lock:
            self.cur.execute(
                self.insert_query.format(key=key),
                {"path": str(path), "size": size, "mtime_ns": mtime_ns, "value": value},
            )
    def commit(self):
        # type: () -> None
        with self.lock:
            self.conn.commit()
    def close(self):
        # type: () -> None
        with self.lock:
            self.cur.close()
            self.conn.close()
 filesdb = FilesDB()  # Singleton
 class File:
    """Represents a file and holds metadata to be used for scanning."""
    INITIAL_INFO = {"size": 0, "mtime": 0, "md5": b"", "md5partial": b"", "md5samples": b""}
    # Slots for File make us save quite a bit of memory. In a memory test I've made with a lot of
    # files, I saved 35% memory usage with "unread" files (no _read_info() call) and gains become
    # even greater when we take into account read attributes (70%!). Yeah, it's worth it.
@@ -98,38 +181,16 @@ class File:
            try:
                self._read_info(attrname)
            except Exception as e:
-                logging.warning(
+                logging.warning("An error '%s' was raised while decoding '%s'", e, repr(self.path))
                    "An error '%s' was raised while decoding '%s'", e, repr(self.path)
                )
            result = object.__getattribute__(self, attrname)
            if result is NOT_SET:
                result = self.INITIAL_INFO[attrname]
        return result
-    # This offset is where we should start reading the file to get a partial md5
+    def _calc_md5(self):
-    # For audio file, it should be where audio data starts
+        # type: () -> bytes
    def _get_md5partial_offset_and_size(self):
        return (0x4000, 0x4000)  # 16Kb
-    def _read_info(self, field):
+        with self.path.open("rb") as fp:
        if field in ("size", "mtime"):
            stats = self.path.stat()
            self.size = nonone(stats.st_size, 0)
            self.mtime = nonone(stats.st_mtime, 0)
        elif field == "md5partial":
            try:
                fp = self.path.open("rb")
                offset, size = self._get_md5partial_offset_and_size()
                fp.seek(offset)
                partialdata = fp.read(size)
                md5 = hashlib.md5(partialdata)
                self.md5partial = md5.digest()
                fp.close()
            except Exception:
                pass
        elif field == "md5":
            try:
                fp = self.path.open("rb")
            md5 = hashlib.md5()
            # The goal here is to not run out of memory on really big files. However, the chunk
            # size has to be large enough so that the python loop isn't too costly in terms of
@@ -139,10 +200,68 @@ class File:
            while filedata:
                md5.update(filedata)
                filedata = fp.read(CHUNK_SIZE)
-                self.md5 = md5.digest()
+            return md5.digest()
-                fp.close()
+
-            except Exception:
+    def _calc_md5partial(self):
-                pass
+        # type: () -> bytes
        # This offset is where we should start reading the file to get a partial md5
        # For audio file, it should be where audio data starts
        offset, size = (0x4000, 0x4000)
        with self.path.open("rb") as fp:
            fp.seek(offset)
            partialdata = fp.read(size)
            return hashlib.md5(partialdata).digest()
    def _read_info(self, field):
        # print(f"_read_info({field}) for {self}")
        if field in ("size", "mtime"):
            stats = self.path.stat()
            self.size = nonone(stats.st_size, 0)
            self.mtime = nonone(stats.st_mtime, 0)
        elif field == "md5partial":
            try:
                self.md5partial = filesdb.get(self.path, "md5partial")
                if self.md5partial is None:
                    self.md5partial = self._calc_md5partial()
                    filesdb.put(self.path, "md5partial", self.md5partial)
            except Exception as e:
                logging.warning("Couldn't get md5partial for %s: %s", self.path, e)
        elif field == "md5":
            try:
                self.md5 = filesdb.get(self.path, "md5")
                if self.md5 is None:
                    self.md5 = self._calc_md5()
                    filesdb.put(self.path, "md5", self.md5)
            except Exception as e:
                logging.warning("Couldn't get md5 for %s: %s", self.path, e)
        elif field == "md5samples":
            try:
                with self.path.open("rb") as fp:
                    size = self.size
                    # Might as well hash such small files entirely.
                    if size <= MIN_FILE_SIZE:
                        setattr(self, field, self.md5)
                        return
                    # Chunk at 25% of the file
                    fp.seek(floor(size * 25 / 100), 0)
                    filedata = fp.read(CHUNK_SIZE)
                    md5 = hashlib.md5(filedata)
                    # Chunk at 60% of the file
                    fp.seek(floor(size * 60 / 100), 0)
                    filedata = fp.read(CHUNK_SIZE)
                    md5.update(filedata)
                    # Last chunk of the file
                    fp.seek(-CHUNK_SIZE, 2)
                    filedata = fp.read(CHUNK_SIZE)
                    md5.update(filedata)
                    setattr(self, field, md5.digest())
            except Exception as e:
                logging.error(f"Error computing md5samples: {e}")
    def _read_all_info(self, attrnames=None):
        """Cache all possible info.
@@ -157,8 +276,7 @@ class File:
    # --- Public
    @classmethod
    def can_handle(cls, path):
-        """Returns whether this file wrapper class can handle ``path``.
+        """Returns whether this file wrapper class can handle ``path``."""
        """
        return not path.islink() and path.isfile()
    def rename(self, newname):
@@ -176,8 +294,7 @@ class File:
        self.path = destpath
    def get_display_info(self, group, delta):
-        """Returns a display-ready dict of dupe's data.
+        """Returns a display-ready dict of dupe's data."""
        """
        raise NotImplementedError()
    # --- Properties
@@ -197,7 +314,7 @@ class File:
 class Folder(File):
    """A wrapper around a folder path.
-    It has the size/md5 info of a File, but it's value are the sum of its subitems.
+    It has the size/md5 info of a File, but its value is the sum of its subitems.
    """
    __slots__ = File.__slots__ + ("_subfolders",)
@@ -212,15 +329,17 @@ class Folder(File):
        return folders + files
    def _read_info(self, field):
        # print(f"_read_info({field}) for Folder {self}")
        if field in {"size", "mtime"}:
            size = sum((f.size for f in self._all_items()), 0)
            self.size = size
            stats = self.path.stat()
            self.mtime = nonone(stats.st_mtime, 0)
-        elif field in {"md5", "md5partial"}:
+        elif field in {"md5", "md5partial", "md5samples"}:
            # What's sensitive here is that we must make sure that subfiles'
            # md5 are always added up in the same order, but we also want a
            # different md5 if a file gets moved in a different subdirectory.
            def get_dir_md5_concat():
                items = self._all_items()
                items.sort(key=lambda f: f.path)
@@ -234,9 +353,7 @@ class Folder(File):
    @property
    def subfolders(self):
        if self._subfolders is None:
-            subfolders = [
+            subfolders = [p for p in self.path.listdir() if not p.islink() and p.isdir()]
                p for p in self.path.listdir() if not p.islink() and p.isdir()
            ]
            self._subfolders = [self.__class__(p) for p in subfolders]
        return self._subfolders
--- a/core/gui/base.py
+++ b/core/gui/base.py
@@ -15,16 +15,21 @@ class DupeGuruGUIObject(Listener):
        self.app = app
    def directories_changed(self):
        # Implemented in child classes
        pass
    def dupes_selected(self):
        # Implemented in child classes
        pass
    def marking_changed(self):
        # Implemented in child classes
        pass
    def results_changed(self):
        # Implemented in child classes
        pass
    def results_changed_but_keep_selection(self):
        # Implemented in child classes
        pass
--- a/core/gui/deletion_options.py
+++ b/core/gui/deletion_options.py
@@ -29,8 +29,7 @@ class DeletionOptionsView:
    """
    def update_msg(self, msg: str):
-        """Update the dialog's prompt with ``str``.
+        """Update the dialog's prompt with ``str``."""
        """
    def show(self):
        """Show the dialog in a modal fashion.
@@ -39,8 +38,7 @@ class DeletionOptionsView:
        """
    def set_hardlink_option_enabled(self, is_enabled: bool):
-        """Enable or disable the widget controlling :attr:`DeletionOptions.use_hardlinks`.
+        """Enable or disable the widget controlling :attr:`DeletionOptions.use_hardlinks`."""
        """
 class DeletionOptions(GUIObject):
@@ -75,8 +73,7 @@ class DeletionOptions(GUIObject):
        return self.view.show()
    def supports_links(self):
-        """Returns whether our platform supports symlinks.
+        """Returns whether our platform supports symlinks."""
        """
        # When on a platform that doesn't implement it, calling os.symlink() (with the wrong number
        # of arguments) raises NotImplementedError, which allows us to gracefully check for the
        # feature.
--- a/core/gui/details_panel.py
+++ b/core/gui/details_panel.py
@@ -32,9 +32,7 @@ class DetailsPanel(GUIObject, DupeGuruGUIObject):
        # we don't want the two sides of the table to display the stats for the same file
        ref = group.ref if group is not None and group.ref is not dupe else None
        data2 = self.app.get_display_info(ref, group, False)
-        columns = self.app.result_table.COLUMNS[
+        columns = self.app.result_table.COLUMNS[1:]  # first column is the 'marked' column
            1:
        ]  # first column is the 'marked' column
        self._table = [(c.display, data1[c.name], data2[c.name]) for c in columns]
    # --- Public
@@ -46,5 +44,4 @@ class DetailsPanel(GUIObject, DupeGuruGUIObject):
    # --- Event Handlers
    def dupes_selected(self):
-        self._refresh()
+        self._view_updated()
        self.view.refresh()
--- a/core/gui/directory_tree.py
+++ b/core/gui/directory_tree.py
@@ -11,7 +11,7 @@ from hscommon.gui.tree import Tree, Node
 from ..directories import DirectoryState
 from .base import DupeGuruGUIObject
-STATE_ORDER = [DirectoryState.Normal, DirectoryState.Reference, DirectoryState.Excluded]
+STATE_ORDER = [DirectoryState.NORMAL, DirectoryState.REFERENCE, DirectoryState.EXCLUDED]
 # Lazily loads children
@@ -36,9 +36,7 @@ class DirectoryNode(Node):
        self._loaded = True
    def update_all_states(self):
-        self._state = STATE_ORDER.index(
+        self._state = STATE_ORDER.index(self._tree.app.directories.get_state(self._directory_path))
            self._tree.app.directories.get_state(self._directory_path)
        )
        for node in self:
            node.update_all_states()
@@ -88,9 +86,9 @@ class DirectoryTree(Tree, DupeGuruGUIObject):
        else:
            # All selected nodes or on second-or-more level, exclude them.
            nodes = self.selected_nodes
-            newstate = DirectoryState.Excluded
+            newstate = DirectoryState.EXCLUDED
-            if all(node.state == DirectoryState.Excluded for node in nodes):
+            if all(node.state == DirectoryState.EXCLUDED for node in nodes):
-                newstate = DirectoryState.Normal
+                newstate = DirectoryState.NORMAL
            for node in nodes:
                node.state = newstate
@@ -105,5 +103,4 @@ class DirectoryTree(Tree, DupeGuruGUIObject):
    # --- Event Handlers
    def directories_changed(self):
-        self._refresh()
+        self._view_updated()
        self.view.refresh()
--- a/core/gui/exclude_list_dialog.py
+++ b/core/gui/exclude_list_dialog.py
@@ -5,8 +5,9 @@
 # which should be included with this package. The terms are also available at
 # http://www.gnu.org/licenses/gpl-3.0.html
 # from hscommon.trans import tr
 from .exclude_list_table import ExcludeListTable
 from core.exclude import has_sep
 from os import sep
 import logging
@@ -30,9 +31,10 @@ class ExcludeListDialogCore:
        self.refresh()
    def rename_selected(self, newregex):
-        """Renames the selected regex to ``newregex``.
+        """Rename the selected regex to ``newregex``.
-        If there's more than one selected row, the first one is used.
+        If there is more than one selected row, the first one is used.
        :param str newregex: The regex to rename the row's regex to.
        :return bool: true if success, false if error.
        """
        try:
            r = self.exclude_list_table.selected_rows[0]
@@ -44,25 +46,42 @@ class ExcludeListDialogCore:
        return False
    def add(self, regex):
        try:
        self.exclude_list.add(regex)
        except Exception as e:
            raise(e)
        self.exclude_list.mark(regex)
        self.exclude_list_table.add(regex)
    def test_string(self, test_string):
-        """Sets property on row to highlight if its regex matches test_string supplied."""
+        """Set the highlight property on each row when its regex matches the
        test_string supplied. Return True if any row matched."""
        matched = False
        for row in self.exclude_list_table.rows:
            compiled_regex = self.exclude_list.get_compiled(row.regex)
-            if compiled_regex and compiled_regex.match(test_string):
+
-                matched = True
+            if self.is_match(test_string, compiled_regex):
                row.highlight = True
                matched = True
            else:
                row.highlight = False
        return matched
    def is_match(self, test_string, compiled_regex):
        # This method is like an inverted version of ExcludeList.is_excluded()
        if not compiled_regex:
            return False
        matched = False
        # Test only the filename portion of the path
        if not has_sep(compiled_regex.pattern) and sep in test_string:
            filename = test_string.rsplit(sep, 1)[1]
            if compiled_regex.fullmatch(filename):
                matched = True
            return matched
        # Test the entire path + filename
        if compiled_regex.fullmatch(test_string):
            matched = True
        return matched
    def reset_rows_highlight(self):
        for row in self.exclude_list_table.rows:
            row.highlight = False
--- a/core/gui/exclude_list_table.py
+++ b/core/gui/exclude_list_table.py
@@ -6,19 +6,17 @@ from .base import DupeGuruGUIObject
 from hscommon.gui.table import GUITable, Row
 from hscommon.gui.column import Column, Columns
 from hscommon.trans import trget
 tr = trget("ui")
 class ExcludeListTable(GUITable, DupeGuruGUIObject):
-    COLUMNS = [
+    COLUMNS = [Column("marked", ""), Column("regex", tr("Regular Expressions"))]
        Column("marked", ""),
        Column("regex", tr("Regular Expressions"))
    ]
    def __init__(self, exclude_list_dialog, app):
        GUITable.__init__(self)
        DupeGuruGUIObject.__init__(self, app)
-        self.columns = Columns(self)
+        self._columns = Columns(self)
        self.dialog = exclude_list_dialog
    def rename_selected(self, newname):
@@ -36,7 +34,7 @@ class ExcludeListTable(GUITable, DupeGuruGUIObject):
        return ExcludeListRow(self, self.dialog.exclude_list.is_marked(regex), regex), 0
    def _do_delete(self):
-        self.dalog.exclude_list.remove(self.selected_row.regex)
+        self.dialog.exclude_list.remove(self.selected_row.regex)
    # --- Override
    def add(self, regex):
--- a/core/gui/ignore_list_dialog.py
+++ b/core/gui/ignore_list_dialog.py
@@ -22,11 +22,9 @@ class IgnoreListDialog:
    def clear(self):
        if not self.ignore_list:
            return
-        msg = tr(
+        msg = tr("Do you really want to remove all %d items from the ignore list?") % len(self.ignore_list)
            "Do you really want to remove all %d items from the ignore list?"
        ) % len(self.ignore_list)
        if self.app.view.ask_yes_no(msg):
-            self.ignore_list.Clear()
+            self.ignore_list.clear()
            self.refresh()
    def refresh(self):
--- a/core/gui/ignore_list_table.py
+++ b/core/gui/ignore_list_table.py
@@ -22,7 +22,7 @@ class IgnoreListTable(GUITable):
    def __init__(self, ignore_list_dialog):
        GUITable.__init__(self)
-        self.columns = Columns(self)
+        self._columns = Columns(self)
        self.view = None
        self.dialog = ignore_list_dialog
--- a/core/gui/problem_table.py
+++ b/core/gui/problem_table.py
@@ -21,7 +21,7 @@ class ProblemTable(GUITable):
    def __init__(self, problem_dialog):
        GUITable.__init__(self)
-        self.columns = Columns(self)
+        self._columns = Columns(self)
        self.dialog = problem_dialog
    # --- Override
--- a/core/gui/result_table.py
+++ b/core/gui/result_table.py
@@ -41,11 +41,11 @@ class DupeRow(Row):
            # table.DELTA_COLUMNS are always "delta"
            self._delta_columns = self.table.DELTA_COLUMNS.copy()
            dupe_info = self.data
            if self._group.ref is None:
                return False
            ref_info = self._group.ref.get_display_info(group=self._group, delta=False)
            for key, value in dupe_info.items():
-                if (key not in self._delta_columns) and (
+                if (key not in self._delta_columns) and (ref_info[key].lower() != value.lower()):
                    ref_info[key].lower() != value.lower()
                ):
                    self._delta_columns.add(key)
        return column_name in self._delta_columns
@@ -82,7 +82,7 @@ class ResultTable(GUITable, DupeGuruGUIObject):
    def __init__(self, app):
        GUITable.__init__(self)
        DupeGuruGUIObject.__init__(self, app)
-        self.columns = Columns(self, prefaccess=app, savename="ResultTable")
+        self._columns = Columns(self, prefaccess=app, savename="ResultTable")
        self._power_marker = False
        self._delta_values = False
        self._sort_descriptors = ("name", True)
@@ -190,4 +190,4 @@ class ResultTable(GUITable, DupeGuruGUIObject):
        self.view.refresh()
    def save_session(self):
-        self.columns.save_columns()
+        self._columns.save_columns()
--- a/core/ignore.py
+++ b/core/ignore.py
@@ -20,8 +20,7 @@ class IgnoreList:
    # ---Override
    def __init__(self):
-        self._ignored = {}
+        self.clear()
        self._count = 0
    def __iter__(self):
        for first, seconds in self._ignored.items():
@@ -32,7 +31,7 @@ class IgnoreList:
        return self._count
    # ---Public
-    def AreIgnored(self, first, second):
+    def are_ignored(self, first, second):
        def do_check(first, second):
            try:
                matches = self._ignored[first]
@@ -42,23 +41,23 @@ class IgnoreList:
        return do_check(first, second) or do_check(second, first)
-    def Clear(self):
+    def clear(self):
        self._ignored = {}
        self._count = 0
-    def Filter(self, func):
+    def filter(self, func):
        """Applies a filter on all ignored items, and remove all matches where func(first,second)
        doesn't return True.
        """
        filtered = IgnoreList()
        for first, second in self:
            if func(first, second):
-                filtered.Ignore(first, second)
+                filtered.ignore(first, second)
        self._ignored = filtered._ignored
        self._count = filtered._count
-    def Ignore(self, first, second):
+    def ignore(self, first, second):
-        if self.AreIgnored(first, second):
+        if self.are_ignored(first, second):
            return
        try:
            matches = self._ignored[first]
@@ -88,8 +87,7 @@ class IgnoreList:
            except KeyError:
                return False
-        if not inner(first, second):
+        if not inner(first, second) and not inner(second, first):
            if not inner(second, first):
            raise ValueError()
    def load_from_xml(self, infile):
@@ -110,7 +108,7 @@ class IgnoreList:
            for sfn in subfile_elems:
                subfile_path = sfn.get("path")
                if subfile_path:
-                    self.Ignore(file_path, subfile_path)
+                    self.ignore(file_path, subfile_path)
    def save_to_xml(self, outfile):
        """Create a XML file that can be used by load_from_xml.
--- a/core/markable.py
+++ b/core/markable.py
@@ -17,9 +17,11 @@ class Markable:
    # in self.__marked, and is not affected by __inverted. Thus, self.mark while __inverted
    # is True will launch _DidUnmark.
    def _did_mark(self, o):
        # Implemented in child classes
        pass
    def _did_unmark(self, o):
        # Implemented in child classes
        pass
    def _get_markable_count(self):
--- a/core/me/fs.py
+++ b/core/me/fs.py
@@ -6,7 +6,7 @@
 # which should be included with this package. The terms are also available at
 # http://www.gnu.org/licenses/gpl-3.0.html
-from hsaudiotag import auto
+import mutagen
 from hscommon.util import get_file_ext, format_size, format_time
 from core.util import format_timestamp, format_perc, format_words, format_dupe_count
@@ -26,6 +26,9 @@ TAG_FIELDS = {
    "comment",
 }
 # This is a temporary workaround for migration from hsaudiotag for the can_handle method
 SUPPORTED_EXTS = {"mp3", "wma", "m4a", "m4p", "ogg", "flac", "aif", "aiff", "aifc"}
 class MusicFile(fs.File):
    INITIAL_INFO = fs.File.INITIAL_INFO.copy()
@@ -50,7 +53,7 @@ class MusicFile(fs.File):
    def can_handle(cls, path):
        if not fs.File.can_handle(path):
            return False
-        return get_file_ext(path.name) in auto.EXT2CLASS
+        return get_file_ext(path.name) in SUPPORTED_EXTS
    def get_display_info(self, group, delta):
        size = self.size
@@ -95,21 +98,23 @@ class MusicFile(fs.File):
        }
    def _get_md5partial_offset_and_size(self):
-        f = auto.File(str(self.path))
+        # No longer calculating the offset and audio size, just whole file
-        return (f.audio_offset, f.audio_size)
+        size = self.path.stat().st_size
        return (0, size)
    def _read_info(self, field):
        fs.File._read_info(self, field)
        if field in TAG_FIELDS:
-            f = auto.File(str(self.path))
+            # The various conversions here are to make this look like the previous implementation
-            self.audiosize = f.audio_size
+            file = mutagen.File(str(self.path), easy=True)
-            self.bitrate = f.bitrate
+            self.audiosize = self.path.stat().st_size
-            self.duration = f.duration
+            self.bitrate = file.info.bitrate / 1000
-            self.samplerate = f.sample_rate
+            self.duration = file.info.length
-            self.artist = f.artist
+            self.samplerate = file.info.sample_rate
-            self.album = f.album
+            self.artist = ", ".join(file.tags.get("artist") or [])
-            self.title = f.title
+            self.album = ", ".join(file.tags.get("album") or [])
-            self.genre = f.genre
+            self.title = ", ".join(file.tags.get("title") or [])
-            self.comment = f.comment
+            self.genre = ", ".join(file.tags.get("genre") or [])
-            self.year = f.year
+            self.comment = ", ".join(file.tags.get("comment") or [""])
-            self.track = f.track
+            self.year = ", ".join(file.tags.get("date") or [])
            self.track = (file.tags.get("tracknumber") or [""])[0]
--- a/core/me/scanner.py
+++ b/core/me/scanner.py
@@ -17,9 +17,9 @@ class ScannerME(ScannerBase):
    @staticmethod
    def get_scan_options():
        return [
-            ScanOption(ScanType.Filename, tr("Filename")),
+            ScanOption(ScanType.FILENAME, tr("Filename")),
-            ScanOption(ScanType.Fields, tr("Filename - Fields")),
+            ScanOption(ScanType.FIELDS, tr("Filename - Fields")),
-            ScanOption(ScanType.FieldsNoOrder, tr("Filename - Fields (No Order)")),
+            ScanOption(ScanType.FIELDSNOORDER, tr("Filename - Fields (No Order)")),
-            ScanOption(ScanType.Tag, tr("Tags")),
+            ScanOption(ScanType.TAG, tr("Tags")),
-            ScanOption(ScanType.Contents, tr("Contents")),
+            ScanOption(ScanType.CONTENTS, tr("Contents")),
        ]
--- a/core/pe/cache_shelve.py
+++ b/core/pe/cache_shelve.py
@@ -33,8 +33,7 @@ CacheRow = namedtuple("CacheRow", "id path blocks mtime")
 class ShelveCache:
-    """A class to cache picture blocks in a shelve backend.
+    """A class to cache picture blocks in a shelve backend."""
    """
    def __init__(self, db=None, readonly=False):
        self.istmp = db is None
@@ -81,9 +80,7 @@ class ShelveCache:
        self.shelve[wrap_id(rowid)] = wrap_path(path_str)
    def _compute_maxid(self):
-        return max(
+        return max((unwrap_id(k) for k in self.shelve if k.startswith("id:")), default=1)
            (unwrap_id(k) for k in self.shelve if k.startswith("id:")), default=1
        )
    def _get_new_id(self):
        self.maxid += 1
--- a/core/pe/cache_sqlite.py
+++ b/core/pe/cache_sqlite.py
@@ -13,8 +13,7 @@ from .cache import string_to_colors, colors_to_string
 class SqliteCache:
-    """A class to cache picture blocks in a sqlite backend.
+    """A class to cache picture blocks in a sqlite backend."""
    """
    def __init__(self, db=":memory:", readonly=False):
        # readonly is not used in the sqlite version of the cache
@@ -71,18 +70,14 @@ class SqliteCache:
        except sqlite.OperationalError:
            logging.warning("Picture cache could not set value for key %r", path_str)
        except sqlite.DatabaseError as e:
-            logging.warning(
+            logging.warning("DatabaseError while setting value for key %r: %s", path_str, str(e))
                "DatabaseError while setting value for key %r: %s", path_str, str(e)
            )
    def _create_con(self, second_try=False):
        def create_tables():
            logging.debug("Creating picture cache tables.")
            self.con.execute("drop table if exists pictures")
            self.con.execute("drop index if exists idx_path")
-            self.con.execute(
+            self.con.execute("create table pictures(path TEXT, mtime INTEGER, blocks TEXT)")
                "create table pictures(path TEXT, mtime INTEGER, blocks TEXT)"
            )
            self.con.execute("create index idx_path on pictures (path)")
        self.con = sqlite.connect(self.dbname, isolation_level=None)
@@ -93,9 +88,7 @@ class SqliteCache:
        except sqlite.DatabaseError as e:  # corrupted db
            if second_try:
                raise  # Something really strange is happening
-            logging.warning(
+            logging.warning("Could not create picture cache because of an error: %s", str(e))
                "Could not create picture cache because of an error: %s", str(e)
            )
            self.con.close()
            os.remove(self.dbname)
            self._create_con(second_try=True)
@@ -125,9 +118,7 @@ class SqliteCache:
            raise ValueError(path)
    def get_multiple(self, rowids):
-        sql = "select rowid, blocks from pictures where rowid in (%s)" % ",".join(
+        sql = "select rowid, blocks from pictures where rowid in (%s)" % ",".join(map(str, rowids))
            map(str, rowids)
        )
        cur = self.con.execute(sql)
        return ((rowid, string_to_colors(blocks)) for rowid, blocks in cur)
@@ -148,7 +139,5 @@ class SqliteCache:
                    continue
            todelete.append(rowid)
        if todelete:
-            sql = "delete from pictures where rowid in (%s)" % ",".join(
+            sql = "delete from pictures where rowid in (%s)" % ",".join(map(str, todelete))
                map(str, todelete)
            )
            self.con.execute(sql)
--- a/core/pe/exif.py
+++ b/core/pe/exif.py
@@ -193,8 +193,8 @@ class TIFF_file:
        self.s2nfunc = s2n_intel if self.endian == INTEL_ENDIAN else s2n_motorola
    def s2n(self, offset, length, signed=0, debug=False):
-        slice = self.data[offset : offset + length]
+        data_slice = self.data[offset : offset + length]
-        val = self.s2nfunc(slice)
+        val = self.s2nfunc(data_slice)
        # Sign extension ?
        if signed:
            msb = 1 << (8 * length - 1)
@@ -206,7 +206,7 @@ class TIFF_file:
                "Slice for offset %d length %d: %r and value: %d",
                offset,
                length,
-                slice,
+                data_slice,
                val,
            )
        return val
@@ -236,10 +236,10 @@ class TIFF_file:
        for i in range(entries):
            entry = ifd + 2 + 12 * i
            tag = self.s2n(entry, 2)
-            type = self.s2n(entry + 2, 2)
+            entry_type = self.s2n(entry + 2, 2)
-            if not 1 <= type <= 10:
+            if not 1 <= entry_type <= 10:
                continue  # not handled
-            typelen = [1, 1, 2, 4, 8, 1, 1, 2, 4, 8][type - 1]
+            typelen = [1, 1, 2, 4, 8, 1, 1, 2, 4, 8][entry_type - 1]
            count = self.s2n(entry + 4, 4)
            if count > MAX_COUNT:
                logging.debug("Probably corrupt. Aborting.")
@@ -247,25 +247,23 @@ class TIFF_file:
            offset = entry + 8
            if count * typelen > 4:
                offset = self.s2n(offset, 4)
-            if type == 2:
+            if entry_type == 2:
                # Special case: nul-terminated ASCII string
                values = str(self.data[offset : offset + count - 1], encoding="latin-1")
            else:
                values = []
-                signed = type == 6 or type >= 8
+                signed = entry_type == 6 or entry_type >= 8
-                for j in range(count):
+                for _ in range(count):
-                    if type in {5, 10}:
+                    if entry_type in {5, 10}:
                        # The type is either 5 or 10
-                        value_j = Fraction(
+                        value_j = Fraction(self.s2n(offset, 4, signed), self.s2n(offset + 4, 4, signed))
                            self.s2n(offset, 4, signed), self.s2n(offset + 4, 4, signed)
                        )
                    else:
                        # Not a fraction
                        value_j = self.s2n(offset, typelen, signed)
                    values.append(value_j)
                    offset = offset + typelen
            # Now "values" is either a string or an array
-            a.append((tag, type, values))
+            a.append((tag, entry_type, values))
        return a
@@ -296,13 +294,11 @@ def get_fields(fp):
    logging.debug("Exif header length: %d bytes", length)
    data = fp.read(length - 8)
    data_format = data[0]
-    logging.debug(
+    logging.debug("%s format", {INTEL_ENDIAN: "Intel", MOTOROLA_ENDIAN: "Motorola"}[data_format])
        "%s format", {INTEL_ENDIAN: "Intel", MOTOROLA_ENDIAN: "Motorola"}[data_format]
    )
    T = TIFF_file(data)
    # There may be more than one IFD per file, but we only read the first one because others are
    # most likely thumbnails.
-    main_IFD_offset = T.first_IFD()
+    main_ifd_offset = T.first_IFD()
    result = {}
    def add_tag_to_result(tag, values):
@@ -314,8 +310,8 @@ def get_fields(fp):
            return  # don't overwrite data
        result[stag] = values
-    logging.debug("IFD at offset %d", main_IFD_offset)
+    logging.debug("IFD at offset %d", main_ifd_offset)
-    IFD = T.dump_IFD(main_IFD_offset)
+    IFD = T.dump_IFD(main_ifd_offset)
    exif_off = gps_off = 0
    for tag, type, values in IFD:
        if tag == 0x8769:
--- a/core/pe/matchblock.py
+++ b/core/pe/matchblock.py
@@ -95,9 +95,7 @@ def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob):
                    picture.unicode_path,
                    picture.size,
                )
-                if (
+                if picture.size < 10 * 1024 * 1024:  # We're really running out of memory
                    picture.size < 10 * 1024 * 1024
                ):  # We're really running out of memory
                    raise
    except MemoryError:
        logging.warning("Ran out of memory while preparing pictures")
@@ -106,9 +104,7 @@ def prepare_pictures(pictures, cache_path, with_dimensions, j=job.nulljob):
 def get_chunks(pictures):
-    min_chunk_count = (
+    min_chunk_count = multiprocessing.cpu_count() * 2  # have enough chunks to feed all subprocesses
        multiprocessing.cpu_count() * 2
    )  # have enough chunks to feed all subprocesses
    chunk_count = len(pictures) // DEFAULT_CHUNK_SIZE
    chunk_count = max(min_chunk_count, chunk_count)
    chunk_size = (len(pictures) // chunk_count) + 1
@@ -185,9 +181,7 @@ def getmatches(pictures, cache_path, threshold, match_scaled=False, j=job.nulljo
        j.set_progress(comparison_count, progress_msg)
    j = j.start_subjob([3, 7])
-    pictures = prepare_pictures(
+    pictures = prepare_pictures(pictures, cache_path, with_dimensions=not match_scaled, j=j)
        pictures, cache_path, with_dimensions=not match_scaled, j=j
    )
    j = j.start_subjob([9, 1], tr("Preparing for matching"))
    cache = get_cache(cache_path)
    id2picture = {}
@@ -231,12 +225,8 @@ def getmatches(pictures, cache_path, threshold, match_scaled=False, j=job.nulljo
            chunks,
            pictures,
        )  # some wiggle room for the next statements
-        logging.warning(
+        logging.warning("Ran out of memory when scanning! We had %d matches.", len(matches))
-            "Ran out of memory when scanning! We had %d matches.", len(matches)
+        del matches[-len(matches) // 3 :]  # some wiggle room to ensure we don't run out of memory again.
        )
        del matches[
            -len(matches) // 3 :
        ]  # some wiggle room to ensure we don't run out of memory again.
    pool.close()
    result = []
    myiter = j.iter_with_progress(
--- a/core/pe/modules/block.c
+++ b/core/pe/modules/block.c
@@ -2,9 +2,9 @@
 * Created On: 2010-01-30
 * Copyright 2014 Hardcoded Software (http://www.hardcoded.net)
 *
- * This software is licensed under the "BSD" License as described in the "LICENSE" file, 
+ * This software is licensed under the "BSD" License as described in the
- * which should be included with this package. The terms are also available at 
+ * "LICENSE" file, which should be included with this package. The terms are
- * http://www.hardcoded.net/licenses/bsd_license
+ * also available at http://www.hardcoded.net/licenses/bsd_license
 */
 #include "common.h"
@@ -17,8 +17,7 @@ static PyObject *DifferentBlockCountError;
 /* Returns a 3 sized tuple containing the mean color of 'image'.
 * image: a PIL image or crop.
 */
-static PyObject* getblock(PyObject *image)
+static PyObject *getblock(PyObject *image) {
 {
  int i, totr, totg, totb;
  Py_ssize_t pixel_count;
  PyObject *ppixels;
@@ -65,8 +64,7 @@ static PyObject* getblock(PyObject *image)
 /* Returns the difference between the first block and the second.
 * It returns an absolute sum of the 3 differences (RGB).
 */
-static int diff(PyObject *first, PyObject *second)
+static int diff(PyObject *first, PyObject *second) {
 {
  int r1, g1, b1, r2, b2, g2;
  PyObject *pr, *pg, *pb;
  pr = PySequence_ITEM(first, 0);
@@ -101,8 +99,7 @@ If it is 10, for example, 100 blocks will be returns (10 width, 10 height). The
 necessarely cover square areas. The area covered by each block will be proportional to the image\n\
 itself.\n");
-static PyObject* block_getblocks2(PyObject *self, PyObject *args)
+static PyObject *block_getblocks2(PyObject *self, PyObject *args) {
 {
  int block_count_per_side, width, height, block_width, block_height, ih;
  PyObject *image;
  PyObject *pimage_size, *pwidth, *pheight;
@@ -128,7 +125,7 @@ static PyObject* block_getblocks2(PyObject *self, PyObject *args)
  block_width = max(width / block_count_per_side, 1);
  block_height = max(height / block_count_per_side, 1);
-    result = PyList_New(block_count_per_side * block_count_per_side);
+  result = PyList_New((Py_ssize_t)block_count_per_side * block_count_per_side);
  if (result == NULL) {
    return NULL;
  }
@@ -174,14 +171,14 @@ PyDoc_STRVAR(block_avgdiff_doc,
 If the result surpasses limit, limit + 1 is returned, except if less than min_iterations\n\
 iterations have been made in the blocks.\n");
-static PyObject* block_avgdiff(PyObject *self, PyObject *args)
+static PyObject *block_avgdiff(PyObject *self, PyObject *args) {
 {
  PyObject *first, *second;
  int limit, min_iterations;
  Py_ssize_t count;
  int sum, i, result;
-    if (!PyArg_ParseTuple(args, "OOii", &first, &second, &limit, &min_iterations)) {
+  if (!PyArg_ParseTuple(args, "OOii", &first, &second, &limit,
                        &min_iterations)) {
    return NULL;
  }
@@ -206,7 +203,8 @@ static PyObject* block_avgdiff(PyObject *self, PyObject *args)
    sum += diff(item1, item2);
    Py_DECREF(item1);
    Py_DECREF(item2);
-        if ((sum > limit*iteration_count) && (iteration_count >= min_iterations)) {
+    if ((sum > limit * iteration_count) &&
        (iteration_count >= min_iterations)) {
      return PyLong_FromLong(limit + 1);
    }
  }
@@ -224,8 +222,7 @@ static PyMethodDef BlockMethods[] = {
    {NULL, NULL, 0, NULL} /* Sentinel */
 };
-static struct PyModuleDef BlockDef = {
+static struct PyModuleDef BlockDef = {PyModuleDef_HEAD_INIT,
    PyModuleDef_HEAD_INIT,
                                      "_block",
                                      NULL,
                                      -1,
@@ -233,12 +230,9 @@ static struct PyModuleDef BlockDef = {
                                      NULL,
                                      NULL,
                                      NULL,
-    NULL
+                                      NULL};
 };
-PyObject *
+PyObject *PyInit__block(void) {
 PyInit__block(void)
 {
  PyObject *m = PyModule_Create(&BlockDef);
  if (m == NULL) {
    return NULL;
@@ -246,7 +240,8 @@ PyInit__block(void)
  NoBlocksError = PyErr_NewException("_block.NoBlocksError", NULL, NULL);
  PyModule_AddObject(m, "NoBlocksError", NoBlocksError);
-    DifferentBlockCountError = PyErr_NewException("_block.DifferentBlockCountError", NULL, NULL);
+  DifferentBlockCountError =
      PyErr_NewException("_block.DifferentBlockCountError", NULL, NULL);
  PyModule_AddObject(m, "DifferentBlockCountError", DifferentBlockCountError);
  return m;
--- a/core/pe/modules/block_osx.m
+++ b/core/pe/modules/block_osx.m
@@ -10,6 +10,8 @@
 #include "common.h"
 #import <Foundation/Foundation.h>
 #import <CoreGraphics/CoreGraphics.h>
 #import <ImageIO/ImageIO.h>
 #define RADIANS( degrees ) ( degrees * M_PI / 180 )
--- a/core/pe/scanner.py
+++ b/core/pe/scanner.py
@@ -18,12 +18,12 @@ class ScannerPE(Scanner):
    @staticmethod
    def get_scan_options():
        return [
-            ScanOption(ScanType.FuzzyBlock, tr("Contents")),
+            ScanOption(ScanType.FUZZYBLOCK, tr("Contents")),
-            ScanOption(ScanType.ExifTimestamp, tr("EXIF Timestamp")),
+            ScanOption(ScanType.EXIFTIMESTAMP, tr("EXIF Timestamp")),
        ]
    def _getmatches(self, files, j):
-        if self.scan_type == ScanType.FuzzyBlock:
+        if self.scan_type == ScanType.FUZZYBLOCK:
            return matchblock.getmatches(
                files,
                cache_path=self.cache_path,
@@ -31,7 +31,7 @@ class ScannerPE(Scanner):
                match_scaled=self.match_scaled,
                j=j,
            )
-        elif self.scan_type == ScanType.ExifTimestamp:
+        elif self.scan_type == ScanType.EXIFTIMESTAMP:
            return matchexif.getmatches(files, self.match_scaled, j)
        else:
-            raise Exception("Invalid scan type")
+            raise ValueError("Invalid scan type")
--- a/core/results.py
+++ b/core/results.py
@@ -52,6 +52,7 @@ class Results(Markable):
        self.app = app
        self.problems = []  # (dupe, error_msg)
        self.is_modified = False
        self.refresh_required = False
    def _did_mark(self, dupe):
        self.__marked_size += dupe.size
@@ -94,8 +95,9 @@ class Results(Markable):
    # ---Private
    def __get_dupe_list(self):
-        if self.__dupes is None:
+        if self.__dupes is None or self.refresh_required:
            self.__dupes = flatten(group.dupes for group in self.groups)
            self.refresh_required = False
            if None in self.__dupes:
                # This is debug logging to try to figure out #44
                logging.warning(
@@ -104,9 +106,7 @@ class Results(Markable):
                    self.groups,
                )
            if self.__filtered_dupes:
-                self.__dupes = [
+                self.__dupes = [dupe for dupe in self.__dupes if dupe in self.__filtered_dupes]
                    dupe for dupe in self.__dupes if dupe in self.__filtered_dupes
                ]
            sd = self.__dupes_sort_descriptor
            if sd:
                self.sort_dupes(sd[0], sd[1], sd[2])
@@ -125,18 +125,10 @@ class Results(Markable):
            total_count = self.__total_count
            total_size = self.__total_size
        else:
-            mark_count = len(
+            mark_count = len([dupe for dupe in self.__filtered_dupes if self.is_marked(dupe)])
-                [dupe for dupe in self.__filtered_dupes if self.is_marked(dupe)]
+            marked_size = sum(dupe.size for dupe in self.__filtered_dupes if self.is_marked(dupe))
-            )
+            total_count = len([dupe for dupe in self.__filtered_dupes if self.is_markable(dupe)])
-            marked_size = sum(
+            total_size = sum(dupe.size for dupe in self.__filtered_dupes if self.is_markable(dupe))
                dupe.size for dupe in self.__filtered_dupes if self.is_marked(dupe)
            )
            total_count = len(
                [dupe for dupe in self.__filtered_dupes if self.is_markable(dupe)]
            )
            total_size = sum(
                dupe.size for dupe in self.__filtered_dupes if self.is_markable(dupe)
            )
        if self.mark_inverted:
            marked_size = self.__total_size - marked_size
        result = tr("%d / %d (%s / %s) duplicates marked.") % (
@@ -199,11 +191,7 @@ class Results(Markable):
            self.__filters.append(filter_str)
            if self.__filtered_dupes is None:
                self.__filtered_dupes = flatten(g[:] for g in self.groups)
-            self.__filtered_dupes = set(
+            self.__filtered_dupes = set(dupe for dupe in self.__filtered_dupes if filter_re.search(str(dupe.path)))
                dupe
                for dupe in self.__filtered_dupes
                if filter_re.search(str(dupe.path))
            )
            filtered_groups = set()
            for dupe in self.__filtered_dupes:
                filtered_groups.add(self.get_group_of_duplicate(dupe))
@@ -215,8 +203,7 @@ class Results(Markable):
        self.__dupes = None
    def get_group_of_duplicate(self, dupe):
-        """Returns :class:`~core.engine.Group` in which ``dupe`` belongs.
+        """Returns :class:`~core.engine.Group` in which ``dupe`` belongs."""
        """
        try:
            return self.__group_of_duplicate[dupe]
        except (TypeError, KeyError):
@@ -282,8 +269,7 @@ class Results(Markable):
        self.is_modified = False
    def make_ref(self, dupe):
-        """Make ``dupe`` take the :attr:`~core.engine.Group.ref` position of its group.
+        """Make ``dupe`` take the :attr:`~core.engine.Group.ref` position of its group."""
        """
        g = self.get_group_of_duplicate(dupe)
        r = g.ref
        if not g.switch_ref(dupe):
@@ -410,10 +396,10 @@ class Results(Markable):
        """
        if not self.__dupes:
            self.__get_dupe_list()
-        keyfunc = lambda d: self.app._get_dupe_sort_key(
+        self.__dupes.sort(
-            d, lambda: self.get_group_of_duplicate(d), key, delta
+            key=lambda d: self.app._get_dupe_sort_key(d, lambda: self.get_group_of_duplicate(d), key, delta),
            reverse=not asc,
        )
        self.__dupes.sort(key=keyfunc, reverse=not asc)
        self.__dupes_sort_descriptor = (key, asc, delta)
    def sort_groups(self, key, asc=True):
@@ -424,8 +410,7 @@ class Results(Markable):
        :param str key: key attribute name to sort with.
        :param bool asc: If false, sorting is reversed.
        """
-        keyfunc = lambda g: self.app._get_group_sort_key(g, key)
+        self.groups.sort(key=lambda g: self.app._get_group_sort_key(g, key), reverse=not asc)
        self.groups.sort(key=keyfunc, reverse=not asc)
        self.__groups_sort_descriptor = (key, asc)
    # ---Properties
--- a/core/scanner.py
+++ b/core/scanner.py
@@ -21,16 +21,16 @@ from . import engine
 class ScanType:
-    Filename = 0
+    FILENAME = 0
-    Fields = 1
+    FIELDS = 1
-    FieldsNoOrder = 2
+    FIELDSNOORDER = 2
-    Tag = 3
+    TAG = 3
-    Folders = 4
+    FOLDERS = 4
-    Contents = 5
+    CONTENTS = 5
    # PE
-    FuzzyBlock = 10
+    FUZZYBLOCK = 10
-    ExifTimestamp = 11
+    EXIFTIMESTAMP = 11
 ScanOption = namedtuple("ScanOption", "scan_type label")
@@ -77,30 +77,37 @@ class Scanner:
        self.discarded_file_count = 0
    def _getmatches(self, files, j):
-        if self.size_threshold or self.scan_type in {
+        if (
-            ScanType.Contents,
+            self.size_threshold
-            ScanType.Folders,
+            or self.large_size_threshold
-        }:
+            or self.scan_type
            in {
                ScanType.CONTENTS,
                ScanType.FOLDERS,
            }
        ):
            j = j.start_subjob([2, 8])
            for f in j.iter_with_progress(files, tr("Read size of %d/%d files")):
                f.size  # pre-read, makes a smoother progress if read here (especially for bundles)
            if self.size_threshold:
                files = [f for f in files if f.size >= self.size_threshold]
-        if self.scan_type in {ScanType.Contents, ScanType.Folders}:
+            if self.large_size_threshold:
-            return engine.getmatches_by_contents(files, j=j)
+                files = [f for f in files if f.size <= self.large_size_threshold]
        if self.scan_type in {ScanType.CONTENTS, ScanType.FOLDERS}:
            return engine.getmatches_by_contents(files, bigsize=self.big_file_size_threshold, j=j)
        else:
            j = j.start_subjob([2, 8])
            kw = {}
            kw["match_similar_words"] = self.match_similar_words
            kw["weight_words"] = self.word_weighting
            kw["min_match_percentage"] = self.min_match_percentage
-            if self.scan_type == ScanType.FieldsNoOrder:
+            if self.scan_type == ScanType.FIELDSNOORDER:
-                self.scan_type = ScanType.Fields
+                self.scan_type = ScanType.FIELDS
                kw["no_field_order"] = True
            func = {
-                ScanType.Filename: lambda f: engine.getwords(rem_file_ext(f.name)),
+                ScanType.FILENAME: lambda f: engine.getwords(rem_file_ext(f.name)),
-                ScanType.Fields: lambda f: engine.getfields(rem_file_ext(f.name)),
+                ScanType.FIELDS: lambda f: engine.getfields(rem_file_ext(f.name)),
-                ScanType.Tag: lambda f: [
+                ScanType.TAG: lambda f: [
                    engine.getwords(str(getattr(f, attrname)))
                    for attrname in SCANNABLE_TAGS
                    if attrname in self.scanned_tags
@@ -150,7 +157,7 @@ class Scanner:
        # "duplicated duplicates if you will). Then, we also don't want mixed file kinds if the
        # option isn't enabled, we want matches for which both files exist and, lastly, we don't
        # want matches with both files as ref.
-        if self.scan_type == ScanType.Folders and matches:
+        if self.scan_type == ScanType.FOLDERS and matches:
            allpath = {m.first.path for m in matches}
            allpath |= {m.second.path for m in matches}
            sortedpaths = sorted(allpath)
@@ -161,38 +168,22 @@ class Scanner:
                    toremove.add(p)
                else:
                    last_parent_path = p
-            matches = [
+            matches = [m for m in matches if m.first.path not in toremove or m.second.path not in toremove]
                m
                for m in matches
                if m.first.path not in toremove or m.second.path not in toremove
            ]
        if not self.mix_file_kind:
-            matches = [
+            matches = [m for m in matches if get_file_ext(m.first.name) == get_file_ext(m.second.name)]
-                m
+        matches = [m for m in matches if m.first.path.exists() and m.second.path.exists()]
                for m in matches
                if get_file_ext(m.first.name) == get_file_ext(m.second.name)
            ]
        matches = [
            m for m in matches if m.first.path.exists() and m.second.path.exists()
        ]
        matches = [m for m in matches if not (m.first.is_ref and m.second.is_ref)]
        if ignore_list:
-            matches = [
+            matches = [m for m in matches if not ignore_list.are_ignored(str(m.first.path), str(m.second.path))]
                m
                for m in matches
                if not ignore_list.AreIgnored(str(m.first.path), str(m.second.path))
            ]
        logging.info("Grouping matches")
        groups = engine.get_groups(matches)
        if self.scan_type in {
-            ScanType.Filename,
+            ScanType.FILENAME,
-            ScanType.Fields,
+            ScanType.FIELDS,
-            ScanType.FieldsNoOrder,
+            ScanType.FIELDSNOORDER,
-            ScanType.Tag,
+            ScanType.TAG,
        }:
-            matched_files = dedupe(
+            matched_files = dedupe([m.first for m in matches] + [m.second for m in matches])
                [m.first for m in matches] + [m.second for m in matches]
            )
            self.discarded_file_count = len(matched_files) - sum(len(g) for g in groups)
        else:
            # Ticket #195
@@ -215,7 +206,9 @@ class Scanner:
    match_similar_words = False
    min_match_percentage = 80
    mix_file_kind = True
-    scan_type = ScanType.Filename
+    scan_type = ScanType.FILENAME
    scanned_tags = {"artist", "title"}
    size_threshold = 0
    large_size_threshold = 0
    big_file_size_threshold = 0
    word_weighting = False
--- a/core/se/scanner.py
+++ b/core/se/scanner.py
@@ -13,7 +13,7 @@ class ScannerSE(ScannerBase):
    @staticmethod
    def get_scan_options():
        return [
-            ScanOption(ScanType.Filename, tr("Filename")),
+            ScanOption(ScanType.FILENAME, tr("Filename")),
-            ScanOption(ScanType.Contents, tr("Contents")),
+            ScanOption(ScanType.CONTENTS, tr("Contents")),
-            ScanOption(ScanType.Folders, tr("Folders")),
+            ScanOption(ScanType.FOLDERS, tr("Folders")),
        ]
--- a/core/tests/app_test.py
+++ b/core/tests/app_test.py
@@ -23,15 +23,13 @@ from ..scanner import ScanType
 def add_fake_files_to_directories(directories, files):
    directories.get_files = lambda j=None: iter(files)
-    directories._dirs.append("this is just so Scan() doesnt return 3")
+    directories._dirs.append("this is just so Scan() doesn't return 3")
 class TestCaseDupeGuru:
    def test_apply_filter_calls_results_apply_filter(self, monkeypatch):
        dgapp = TestApp().app
-        monkeypatch.setattr(
+        monkeypatch.setattr(dgapp.results, "apply_filter", log_calls(dgapp.results.apply_filter))
            dgapp.results, "apply_filter", log_calls(dgapp.results.apply_filter)
        )
        dgapp.apply_filter("foo")
        eq_(2, len(dgapp.results.apply_filter.calls))
        call = dgapp.results.apply_filter.calls[0]
@@ -41,15 +39,11 @@ class TestCaseDupeGuru:
    def test_apply_filter_escapes_regexp(self, monkeypatch):
        dgapp = TestApp().app
-        monkeypatch.setattr(
+        monkeypatch.setattr(dgapp.results, "apply_filter", log_calls(dgapp.results.apply_filter))
            dgapp.results, "apply_filter", log_calls(dgapp.results.apply_filter)
        )
        dgapp.apply_filter("()[]\\.|+?^abc")
        call = dgapp.results.apply_filter.calls[1]
        eq_("\\(\\)\\[\\]\\\\\\.\\|\\+\\?\\^abc", call["filter_str"])
-        dgapp.apply_filter(
+        dgapp.apply_filter("(*)")  # In "simple mode", we want the * to behave as a wildcard
            "(*)"
        )  # In "simple mode", we want the * to behave as a wilcard
        call = dgapp.results.apply_filter.calls[3]
        eq_(r"\(.*\)", call["filter_str"])
        dgapp.options["escape_filter_regexp"] = False
@@ -70,9 +64,7 @@ class TestCaseDupeGuru:
        )
        # XXX This monkeypatch is temporary. will be fixed in a better monkeypatcher.
        monkeypatch.setattr(app, "smart_copy", hscommon.conflict.smart_copy)
-        monkeypatch.setattr(
+        monkeypatch.setattr(os, "makedirs", lambda path: None)  # We don't want the test to create that fake directory
            os, "makedirs", lambda path: None
        )  # We don't want the test to create that fake directory
        dgapp = TestApp().app
        dgapp.directories.add_path(p)
        [f] = dgapp.directories.get_files()
@@ -96,14 +88,14 @@ class TestCaseDupeGuru:
        eq_(1, len(calls))
        eq_(sourcepath, calls[0]["path"])
-    def test_Scan_with_objects_evaluating_to_false(self):
+    def test_scan_with_objects_evaluating_to_false(self):
        class FakeFile(fs.File):
            def __bool__(self):
                return False
        # At some point, any() was used in a wrong way that made Scan() wrongly return 1
        app = TestApp().app
-        f1, f2 = [FakeFile("foo") for i in range(2)]
+        f1, f2 = [FakeFile("foo") for _ in range(2)]
        f1.is_ref, f2.is_ref = (False, False)
        assert not (bool(f1) and bool(f2))
        add_fake_files_to_directories(app.directories, [f1, f2])
@@ -118,7 +110,7 @@ class TestCaseDupeGuru:
        os.link(str(tmppath["myfile"]), str(tmppath["hardlink"]))
        app = TestApp().app
        app.directories.add_path(tmppath)
-        app.options["scan_type"] = ScanType.Contents
+        app.options["scan_type"] = ScanType.CONTENTS
        app.options["ignore_hardlink_matches"] = True
        app.start_scanning()
        eq_(len(app.results.groups), 0)
@@ -132,7 +124,7 @@ class TestCaseDupeGuru:
        assert not dgapp.result_table.rename_selected("foo")  # no crash
-class TestCaseDupeGuru_clean_empty_dirs:
+class TestCaseDupeGuruCleanEmptyDirs:
    @pytest.fixture
    def do_setup(self, request):
        monkeypatch = request.getfixturevalue("monkeypatch")
@@ -192,7 +184,7 @@ class TestCaseDupeGuruWithResults:
        tmppath["bar"].mkdir()
        self.app.directories.add_path(tmppath)
-    def test_GetObjects(self, do_setup):
+    def test_get_objects(self, do_setup):
        objects = self.objects
        groups = self.groups
        r = self.rtable[0]
@@ -205,7 +197,7 @@ class TestCaseDupeGuruWithResults:
        assert r._group is groups[1]
        assert r._dupe is objects[4]
-    def test_GetObjects_after_sort(self, do_setup):
+    def test_get_objects_after_sort(self, do_setup):
        objects = self.objects
        groups = self.groups[:]  # we need an un-sorted reference
        self.rtable.sort("name", False)
@@ -220,7 +212,7 @@ class TestCaseDupeGuruWithResults:
        # The first 2 dupes have been removed. The 3rd one is a ref. it stays there, in first pos.
        eq_(self.rtable.selected_indexes, [1])  # no exception
-    def test_selectResultNodePaths(self, do_setup):
+    def test_select_result_node_paths(self, do_setup):
        app = self.app
        objects = self.objects
        self.rtable.select([1, 2])
@@ -228,7 +220,7 @@ class TestCaseDupeGuruWithResults:
        assert app.selected_dupes[0] is objects[1]
        assert app.selected_dupes[1] is objects[2]
-    def test_selectResultNodePaths_with_ref(self, do_setup):
+    def test_select_result_node_paths_with_ref(self, do_setup):
        app = self.app
        objects = self.objects
        self.rtable.select([1, 2, 3])
@@ -237,7 +229,7 @@ class TestCaseDupeGuruWithResults:
        assert app.selected_dupes[1] is objects[2]
        assert app.selected_dupes[2] is self.groups[1].ref
-    def test_selectResultNodePaths_after_sort(self, do_setup):
+    def test_select_result_node_paths_after_sort(self, do_setup):
        app = self.app
        objects = self.objects
        groups = self.groups[:]  # To keep the old order in memory
@@ -264,7 +256,7 @@ class TestCaseDupeGuruWithResults:
        app.remove_selected()
        eq_(self.rtable.selected_indexes, [])  # no exception
-    def test_selectPowerMarkerRows_after_sort(self, do_setup):
+    def test_select_powermarker_rows_after_sort(self, do_setup):
        app = self.app
        objects = self.objects
        self.rtable.power_marker = True
@@ -303,7 +295,7 @@ class TestCaseDupeGuruWithResults:
        app.toggle_selected_mark_state()
        eq_(app.results.mark_count, 0)
-    def test_refreshDetailsWithSelected(self, do_setup):
+    def test_refresh_details_with_selected(self, do_setup):
        self.rtable.select([1, 4])
        eq_(self.dpanel.row(0), ("Filename", "bar bleh", "foo bar"))
        self.dpanel.view.check_gui_calls(["refresh"])
@@ -311,7 +303,7 @@ class TestCaseDupeGuruWithResults:
        eq_(self.dpanel.row(0), ("Filename", "---", "---"))
        self.dpanel.view.check_gui_calls(["refresh"])
-    def test_makeSelectedReference(self, do_setup):
+    def test_make_selected_reference(self, do_setup):
        app = self.app
        objects = self.objects
        groups = self.groups
@@ -320,9 +312,7 @@ class TestCaseDupeGuruWithResults:
        assert groups[0].ref is objects[1]
        assert groups[1].ref is objects[4]
-    def test_makeSelectedReference_by_selecting_two_dupes_in_the_same_group(
+    def test_make_selected_reference_by_selecting_two_dupes_in_the_same_group(self, do_setup):
        self, do_setup
    ):
        app = self.app
        objects = self.objects
        groups = self.groups
@@ -332,7 +322,7 @@ class TestCaseDupeGuruWithResults:
        assert groups[0].ref is objects[1]
        assert groups[1].ref is objects[4]
-    def test_removeSelected(self, do_setup):
+    def test_remove_selected(self, do_setup):
        app = self.app
        self.rtable.select([1, 4])
        app.remove_selected()
@@ -340,7 +330,7 @@ class TestCaseDupeGuruWithResults:
        app.remove_selected()
        eq_(len(app.results.dupes), 0)
-    def test_addDirectory_simple(self, do_setup):
+    def test_add_directory_simple(self, do_setup):
        # There's already a directory in self.app, so adding another once makes 2 of em
        app = self.app
        # any other path that isn't a parent or child of the already added path
@@ -348,7 +338,7 @@ class TestCaseDupeGuruWithResults:
        app.add_directory(otherpath)
        eq_(len(app.directories), 2)
-    def test_addDirectory_already_there(self, do_setup):
+    def test_add_directory_already_there(self, do_setup):
        app = self.app
        otherpath = Path(op.dirname(__file__))
        app.add_directory(otherpath)
@@ -356,7 +346,7 @@ class TestCaseDupeGuruWithResults:
        eq_(len(app.view.messages), 1)
        assert "already" in app.view.messages[0]
-    def test_addDirectory_does_not_exist(self, do_setup):
+    def test_add_directory_does_not_exist(self, do_setup):
        app = self.app
        app.add_directory("/does_not_exist")
        eq_(len(app.view.messages), 1)
@@ -372,30 +362,30 @@ class TestCaseDupeGuruWithResults:
        # BOTH the ref and the other dupe should have been added
        eq_(len(app.ignore_list), 3)
-    def test_purgeIgnoreList(self, do_setup, tmpdir):
+    def test_purge_ignorelist(self, do_setup, tmpdir):
        app = self.app
        p1 = str(tmpdir.join("file1"))
        p2 = str(tmpdir.join("file2"))
        open(p1, "w").close()
        open(p2, "w").close()
        dne = "/does_not_exist"
-        app.ignore_list.Ignore(dne, p1)
+        app.ignore_list.ignore(dne, p1)
-        app.ignore_list.Ignore(p2, dne)
+        app.ignore_list.ignore(p2, dne)
-        app.ignore_list.Ignore(p1, p2)
+        app.ignore_list.ignore(p1, p2)
        app.purge_ignore_list()
        eq_(1, len(app.ignore_list))
-        assert app.ignore_list.AreIgnored(p1, p2)
+        assert app.ignore_list.are_ignored(p1, p2)
-        assert not app.ignore_list.AreIgnored(dne, p1)
+        assert not app.ignore_list.are_ignored(dne, p1)
    def test_only_unicode_is_added_to_ignore_list(self, do_setup):
-        def FakeIgnore(first, second):
+        def fake_ignore(first, second):
            if not isinstance(first, str):
                self.fail()
            if not isinstance(second, str):
                self.fail()
        app = self.app
-        app.ignore_list.Ignore = FakeIgnore
+        app.ignore_list.ignore = fake_ignore
        self.rtable.select([4])
        app.add_selected_to_ignore_list()
@@ -404,9 +394,7 @@ class TestCaseDupeGuruWithResults:
        # results table.
        app = self.app
        app.JOB = Job(1, lambda *args, **kw: False)  # Cancels the task
-        add_fake_files_to_directories(
+        add_fake_files_to_directories(app.directories, self.objects)  # We want the scan to at least start
            app.directories, self.objects
        )  # We want the scan to at least start
        app.start_scanning()  # will be cancelled immediately
        eq_(len(app.result_table), 0)
@@ -431,7 +419,7 @@ class TestCaseDupeGuruWithResults:
        # don't crash
-class TestCaseDupeGuru_renameSelected:
+class TestCaseDupeGuruRenameSelected:
    @pytest.fixture
    def do_setup(self, request):
        tmpdir = request.getfixturevalue("tmpdir")
@@ -514,7 +502,6 @@ class TestAppWithDirectoriesInTree:
        # refreshed.
        node = self.dtree[0]
        eq_(len(node), 3)  # a len() call is required for subnodes to be loaded
        subnode = node[0]
        node.state = 1  # the state property is a state index
        node = self.dtree[0]
        eq_(len(node), 3)
--- a/core/tests/base.py
+++ b/core/tests/base.py
@@ -88,6 +88,7 @@ class NamedObject:
        self.size = size
        self.md5partial = name
        self.md5 = name
        self.md5samples = name
        if with_words:
            self.words = getwords(name)
        self.is_ref = False
@@ -139,9 +140,7 @@ def GetTestGroups():
    matches = engine.getmatches(objects)  # we should have 5 matches
    groups = engine.get_groups(matches)  # We should have 2 groups
    for g in groups:
-        g.prioritize(
+        g.prioritize(lambda x: objects.index(x))  # We want the dupes to be in the same order as the list is
            lambda x: objects.index(x)
        )  # We want the dupes to be in the same order as the list is
    groups.sort(key=len, reverse=True)  # We want the group with 3 members to be first.
    return (objects, matches, groups)
@@ -152,8 +151,8 @@ class TestApp(TestAppBase):
    def __init__(self):
        def link_gui(gui):
            gui.view = self.make_logger()
-            if hasattr(gui, "columns"):  # tables
+            if hasattr(gui, "_columns"):  # tables
-                gui.columns.view = self.make_logger()
+                gui._columns.view = self.make_logger()
            return gui
        TestAppBase.__init__(self)
--- a/core/tests/block_test.py
+++ b/core/tests/block_test.py
@@ -14,9 +14,7 @@ except ImportError:
    skip("Can't import the block module, probably hasn't been compiled.")
-def my_avgdiff(
+def my_avgdiff(first, second, limit=768, min_iter=3):  # this is so I don't have to re-write every call
    first, second, limit=768, min_iter=3
 ):  # this is so I don't have to re-write every call
    return avgdiff(first, second, limit, min_iter)
@@ -75,99 +73,6 @@ class TestCasegetblock:
        eq_((meanred, meangreen, meanblue), b)
 # class TCdiff(unittest.TestCase):
 #     def test_diff(self):
 #         b1 = (10, 20, 30)
 #         b2 = (1, 2, 3)
 #         eq_(9 + 18 + 27, diff(b1, b2))
 #
 #     def test_diff_negative(self):
 #         b1 = (10, 20, 30)
 #         b2 = (1, 2, 3)
 #         eq_(9 + 18 + 27, diff(b2, b1))
 #
 #     def test_diff_mixed_positive_and_negative(self):
 #         b1 = (1, 5, 10)
 #         b2 = (10, 1, 15)
 #         eq_(9 + 4 + 5, diff(b1, b2))
 #
 # class TCgetblocks(unittest.TestCase):
 #     def test_empty_image(self):
 #         im = empty()
 #         blocks = getblocks(im, 1)
 #         eq_(0, len(blocks))
 #
 #     def test_one_block_image(self):
 #         im = four_pixels()
 #         blocks = getblocks2(im, 1)
 #         eq_(1, len(blocks))
 #         block = blocks[0]
 #         meanred = (0xff + 0x80) // 4
 #         meangreen = (0x80 + 0x40) // 4
 #         meanblue = (0xff + 0x80) // 4
 #         eq_((meanred, meangreen, meanblue), block)
 #
 #     def test_not_enough_height_to_fit_a_block(self):
 #         im = FakeImage((2, 1), [BLACK, BLACK])
 #         blocks = getblocks(im, 2)
 #         eq_(0, len(blocks))
 #
 #     def xtest_dont_include_leftovers(self):
 #         # this test is disabled because getblocks is not used and getblock in cdeffed
 #         pixels = [
 #             RED,(0, 0x80, 0xff), BLACK,
 #             (0x80, 0, 0),(0, 0x40, 0x80), BLACK,
 #             BLACK, BLACK, BLACK
 #         ]
 #         im = FakeImage((3, 3), pixels)
 #         blocks = getblocks(im, 2)
 #         block = blocks[0]
 #         #Because the block is smaller than the image, only blocksize must be considered.
 #         meanred = (0xff + 0x80) // 4
 #         meangreen = (0x80 + 0x40) // 4
 #         meanblue = (0xff + 0x80) // 4
 #         eq_((meanred, meangreen, meanblue), block)
 #
 #     def xtest_two_blocks(self):
 #         # this test is disabled because getblocks is not used and getblock in cdeffed
 #         pixels = [BLACK for i in xrange(4 * 2)]
 #         pixels[0] = RED
 #         pixels[1] = (0, 0x80, 0xff)
 #         pixels[4] = (0x80, 0, 0)
 #         pixels[5] = (0, 0x40, 0x80)
 #         im = FakeImage((4, 2), pixels)
 #         blocks = getblocks(im, 2)
 #         eq_(2, len(blocks))
 #         block = blocks[0]
 #         #Because the block is smaller than the image, only blocksize must be considered.
 #         meanred = (0xff + 0x80) // 4
 #         meangreen = (0x80 + 0x40) // 4
 #         meanblue = (0xff + 0x80) // 4
 #         eq_((meanred, meangreen, meanblue), block)
 #         eq_(BLACK, blocks[1])
 #
 #     def test_four_blocks(self):
 #         pixels = [BLACK for i in xrange(4 * 4)]
 #         pixels[0] = RED
 #         pixels[1] = (0, 0x80, 0xff)
 #         pixels[4] = (0x80, 0, 0)
 #         pixels[5] = (0, 0x40, 0x80)
 #         im = FakeImage((4, 4), pixels)
 #         blocks = getblocks2(im, 2)
 #         eq_(4, len(blocks))
 #         block = blocks[0]
 #         #Because the block is smaller than the image, only blocksize must be considered.
 #         meanred = (0xff + 0x80) // 4
 #         meangreen = (0x80 + 0x40) // 4
 #         meanblue = (0xff + 0x80) // 4
 #         eq_((meanred, meangreen, meanblue), block)
 #         eq_(BLACK, blocks[1])
 #         eq_(BLACK, blocks[2])
 #         eq_(BLACK, blocks[3])
 #
 class TestCasegetblocks2:
    def test_empty_image(self):
        im = empty()
@@ -272,8 +177,8 @@ class TestCaseavgdiff:
    def test_return_at_least_1_at_the_slightest_difference(self):
        ref = (0, 0, 0)
        b1 = (1, 0, 0)
-        blocks1 = [ref for i in range(250)]
+        blocks1 = [ref for _ in range(250)]
-        blocks2 = [ref for i in range(250)]
+        blocks2 = [ref for _ in range(250)]
        blocks2[0] = b1
        eq_(1, my_avgdiff(blocks1, blocks2))
@@ -282,41 +187,3 @@ class TestCaseavgdiff:
        blocks1 = [ref, ref]
        blocks2 = [ref, ref]
        eq_(0, my_avgdiff(blocks1, blocks2))
 # class TCmaxdiff(unittest.TestCase):
 #     def test_empty(self):
 #         self.assertRaises(NoBlocksError, maxdiff,[],[])
 #
 #     def test_two_blocks(self):
 #         b1 = (5, 10, 15)
 #         b2 = (255, 250, 245)
 #         b3 = (0, 0, 0)
 #         b4 = (255, 0, 255)
 #         blocks1 = [b1, b2]
 #         blocks2 = [b3, b4]
 #         expected1 = 5 + 10 + 15
 #         expected2 = 0 + 250 + 10
 #         expected = max(expected1, expected2)
 #         eq_(expected, maxdiff(blocks1, blocks2))
 #
 #     def test_blocks_not_the_same_size(self):
 #         b = (0, 0, 0)
 #         self.assertRaises(DifferentBlockCountError, maxdiff,[b, b],[b])
 #
 #     def test_first_arg_is_empty_but_not_second(self):
 #         #Don't return 0 (as when the 2 lists are empty), raise!
 #         b = (0, 0, 0)
 #         self.assertRaises(DifferentBlockCountError, maxdiff,[],[b])
 #
 #     def test_limit(self):
 #         b1 = (5, 10, 15)
 #         b2 = (255, 250, 245)
 #         b3 = (0, 0, 0)
 #         b4 = (255, 0, 255)
 #         blocks1 = [b1, b2]
 #         blocks2 = [b3, b4]
 #         expected1 = 5 + 10 + 15
 #         expected2 = 0 + 250 + 10
 #         eq_(expected1, maxdiff(blocks1, blocks2, expected1 - 1))
 #
--- a/core/tests/cache_test.py
+++ b/core/tests/cache_test.py
@@ -17,7 +17,7 @@ except ImportError:
    skip("Can't import the cache module, probably hasn't been compiled.")
-class TestCasecolors_to_string:
+class TestCaseColorsToString:
    def test_no_color(self):
        eq_("", colors_to_string([]))
@@ -30,7 +30,7 @@ class TestCasecolors_to_string:
        eq_("000102030405", colors_to_string([(0, 1, 2), (3, 4, 5)]))
-class TestCasestring_to_colors:
+class TestCaseStringToColors:
    def test_empty(self):
        eq_([], string_to_colors(""))
--- a/core/tests/directories_test.py
+++ b/core/tests/directories_test.py
@@ -92,7 +92,7 @@ def test_add_path():
    assert p in d
-def test_AddPath_when_path_is_already_there():
+def test_add_path_when_path_is_already_there():
    d = Directories()
    p = testpath["onefile"]
    d.add_path(p)
@@ -112,7 +112,7 @@ def test_add_path_containing_paths_already_there():
    eq_(d[0], testpath)
-def test_AddPath_non_latin(tmpdir):
+def test_add_path_non_latin(tmpdir):
    p = Path(str(tmpdir))
    to_add = p["unicode\u201a"]
    os.mkdir(str(to_add))
@@ -140,20 +140,20 @@ def test_states():
    d = Directories()
    p = testpath["onefile"]
    d.add_path(p)
-    eq_(DirectoryState.Normal, d.get_state(p))
+    eq_(DirectoryState.NORMAL, d.get_state(p))
-    d.set_state(p, DirectoryState.Reference)
+    d.set_state(p, DirectoryState.REFERENCE)
-    eq_(DirectoryState.Reference, d.get_state(p))
+    eq_(DirectoryState.REFERENCE, d.get_state(p))
-    eq_(DirectoryState.Reference, d.get_state(p["dir1"]))
+    eq_(DirectoryState.REFERENCE, d.get_state(p["dir1"]))
    eq_(1, len(d.states))
    eq_(p, list(d.states.keys())[0])
-    eq_(DirectoryState.Reference, d.states[p])
+    eq_(DirectoryState.REFERENCE, d.states[p])
 def test_get_state_with_path_not_there():
    # When the path's not there, just return DirectoryState.Normal
    d = Directories()
    d.add_path(testpath["onefile"])
-    eq_(d.get_state(testpath), DirectoryState.Normal)
+    eq_(d.get_state(testpath), DirectoryState.NORMAL)
 def test_states_overwritten_when_larger_directory_eat_smaller_ones():
@@ -162,20 +162,20 @@ def test_states_overwritten_when_larger_directory_eat_smaller_ones():
    d = Directories()
    p = testpath["onefile"]
    d.add_path(p)
-    d.set_state(p, DirectoryState.Excluded)
+    d.set_state(p, DirectoryState.EXCLUDED)
    d.add_path(testpath)
-    d.set_state(testpath, DirectoryState.Reference)
+    d.set_state(testpath, DirectoryState.REFERENCE)
-    eq_(d.get_state(p), DirectoryState.Reference)
+    eq_(d.get_state(p), DirectoryState.REFERENCE)
-    eq_(d.get_state(p["dir1"]), DirectoryState.Reference)
+    eq_(d.get_state(p["dir1"]), DirectoryState.REFERENCE)
-    eq_(d.get_state(testpath), DirectoryState.Reference)
+    eq_(d.get_state(testpath), DirectoryState.REFERENCE)
 def test_get_files():
    d = Directories()
    p = testpath["fs"]
    d.add_path(p)
-    d.set_state(p["dir1"], DirectoryState.Reference)
+    d.set_state(p["dir1"], DirectoryState.REFERENCE)
-    d.set_state(p["dir2"], DirectoryState.Excluded)
+    d.set_state(p["dir2"], DirectoryState.EXCLUDED)
    files = list(d.get_files())
    eq_(5, len(files))
    for f in files:
@@ -204,8 +204,8 @@ def test_get_folders():
    d = Directories()
    p = testpath["fs"]
    d.add_path(p)
-    d.set_state(p["dir1"], DirectoryState.Reference)
+    d.set_state(p["dir1"], DirectoryState.REFERENCE)
-    d.set_state(p["dir2"], DirectoryState.Excluded)
+    d.set_state(p["dir2"], DirectoryState.EXCLUDED)
    folders = list(d.get_folders())
    eq_(len(folders), 3)
    ref = [f for f in folders if f.is_ref]
@@ -220,7 +220,7 @@ def test_get_files_with_inherited_exclusion():
    d = Directories()
    p = testpath["onefile"]
    d.add_path(p)
-    d.set_state(p, DirectoryState.Excluded)
+    d.set_state(p, DirectoryState.EXCLUDED)
    eq_([], list(d.get_files()))
@@ -233,14 +233,14 @@ def test_save_and_load(tmpdir):
    p2.mkdir()
    d1.add_path(p1)
    d1.add_path(p2)
-    d1.set_state(p1, DirectoryState.Reference)
+    d1.set_state(p1, DirectoryState.REFERENCE)
-    d1.set_state(p1["dir1"], DirectoryState.Excluded)
+    d1.set_state(p1["dir1"], DirectoryState.EXCLUDED)
    tmpxml = str(tmpdir.join("directories_testunit.xml"))
    d1.save_to_file(tmpxml)
    d2.load_from_file(tmpxml)
    eq_(2, len(d2))
-    eq_(DirectoryState.Reference, d2.get_state(p1))
+    eq_(DirectoryState.REFERENCE, d2.get_state(p1))
-    eq_(DirectoryState.Excluded, d2.get_state(p1["dir1"]))
+    eq_(DirectoryState.EXCLUDED, d2.get_state(p1["dir1"]))
 def test_invalid_path():
@@ -254,7 +254,12 @@ def test_invalid_path():
 def test_set_state_on_invalid_path():
    d = Directories()
    try:
-        d.set_state(Path("foobar",), DirectoryState.Normal)
+        d.set_state(
            Path(
                "foobar",
            ),
            DirectoryState.NORMAL,
        )
    except LookupError:
        assert False
@@ -282,7 +287,7 @@ def test_unicode_save(tmpdir):
    p1.mkdir()
    p1["foo\xe9"].mkdir()
    d.add_path(p1)
-    d.set_state(p1["foo\xe9"], DirectoryState.Excluded)
+    d.set_state(p1["foo\xe9"], DirectoryState.EXCLUDED)
    tmpxml = str(tmpdir.join("directories_testunit.xml"))
    try:
        d.save_to_file(tmpxml)
@@ -316,10 +321,10 @@ def test_get_state_returns_excluded_by_default_for_hidden_directories(tmpdir):
    hidden_dir_path = p[".foo"]
    p[".foo"].mkdir()
    d.add_path(p)
-    eq_(d.get_state(hidden_dir_path), DirectoryState.Excluded)
+    eq_(d.get_state(hidden_dir_path), DirectoryState.EXCLUDED)
    # But it can be overriden
-    d.set_state(hidden_dir_path, DirectoryState.Normal)
+    d.set_state(hidden_dir_path, DirectoryState.NORMAL)
-    eq_(d.get_state(hidden_dir_path), DirectoryState.Normal)
+    eq_(d.get_state(hidden_dir_path), DirectoryState.NORMAL)
 def test_default_path_state_override(tmpdir):
@@ -327,7 +332,7 @@ def test_default_path_state_override(tmpdir):
    class MyDirectories(Directories):
        def _default_state_for_path(self, path):
            if "foobar" in path:
-                return DirectoryState.Excluded
+                return DirectoryState.EXCLUDED
    d = MyDirectories()
    p1 = Path(str(tmpdir))
@@ -336,24 +341,26 @@ def test_default_path_state_override(tmpdir):
    p1["foobaz"].mkdir()
    p1["foobaz/somefile"].open("w").close()
    d.add_path(p1)
-    eq_(d.get_state(p1["foobaz"]), DirectoryState.Normal)
+    eq_(d.get_state(p1["foobaz"]), DirectoryState.NORMAL)
-    eq_(d.get_state(p1["foobar"]), DirectoryState.Excluded)
+    eq_(d.get_state(p1["foobar"]), DirectoryState.EXCLUDED)
    eq_(len(list(d.get_files())), 1)  # only the 'foobaz' file is there
    # However, the default state can be changed
-    d.set_state(p1["foobar"], DirectoryState.Normal)
+    d.set_state(p1["foobar"], DirectoryState.NORMAL)
-    eq_(d.get_state(p1["foobar"]), DirectoryState.Normal)
+    eq_(d.get_state(p1["foobar"]), DirectoryState.NORMAL)
    eq_(len(list(d.get_files())), 2)
-class TestExcludeList():
+class TestExcludeList:
    def setup_method(self, method):
        self.d = Directories(exclude_list=ExcludeList(union_regex=False))
    def get_files_and_expect_num_result(self, num_result):
        """Calls get_files(), get the filenames only, print for debugging.
        num_result is how many files are expected as a result."""
-        print(f"EXCLUDED REGEX: paths {self.d._exclude_list.compiled_paths} \
+        print(
-files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled}")
+            f"EXCLUDED REGEX: paths {self.d._exclude_list.compiled_paths} \
 files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled}"
        )
        files = list(self.d.get_files())
        files = [file.name for file in files]
        print(f"FINAL FILES {files}")
@@ -368,11 +375,11 @@ files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled
        p1["$Recycle.Bin"].mkdir()
        p1["$Recycle.Bin"]["subdir"].mkdir()
        self.d.add_path(p1)
-        eq_(self.d.get_state(p1["$Recycle.Bin"]), DirectoryState.Excluded)
+        eq_(self.d.get_state(p1["$Recycle.Bin"]), DirectoryState.EXCLUDED)
-        # By default, subdirs should be excluded too, but this can be overriden separately
+        # By default, subdirs should be excluded too, but this can be overridden separately
-        eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Excluded)
+        eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.EXCLUDED)
-        self.d.set_state(p1["$Recycle.Bin"]["subdir"], DirectoryState.Normal)
+        self.d.set_state(p1["$Recycle.Bin"]["subdir"], DirectoryState.NORMAL)
-        eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Normal)
+        eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.NORMAL)
    def test_exclude_refined(self, tmpdir):
        regex1 = r"^\$Recycle\.Bin$"
@@ -391,16 +398,16 @@ files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled
        self.d.add_path(p1["$Recycle.Bin"])
        # Filter should set the default state to Excluded
-        eq_(self.d.get_state(p1["$Recycle.Bin"]), DirectoryState.Excluded)
+        eq_(self.d.get_state(p1["$Recycle.Bin"]), DirectoryState.EXCLUDED)
        # The subdir should inherit its parent state
-        eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Excluded)
+        eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.EXCLUDED)
-        eq_(self.d.get_state(p1["$Recycle.Bin"]["subdar"]), DirectoryState.Excluded)
+        eq_(self.d.get_state(p1["$Recycle.Bin"]["subdar"]), DirectoryState.EXCLUDED)
        # Override a child path's state
-        self.d.set_state(p1["$Recycle.Bin"]["subdir"], DirectoryState.Normal)
+        self.d.set_state(p1["$Recycle.Bin"]["subdir"], DirectoryState.NORMAL)
-        eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Normal)
+        eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.NORMAL)
        # Parent should keep its default state, and the other child too
-        eq_(self.d.get_state(p1["$Recycle.Bin"]), DirectoryState.Excluded)
+        eq_(self.d.get_state(p1["$Recycle.Bin"]), DirectoryState.EXCLUDED)
-        eq_(self.d.get_state(p1["$Recycle.Bin"]["subdar"]), DirectoryState.Excluded)
+        eq_(self.d.get_state(p1["$Recycle.Bin"]["subdar"]), DirectoryState.EXCLUDED)
        # print(f"get_folders(): {[x for x in self.d.get_folders()]}")
        # only the 2 files directly under the Normal directory
@@ -412,8 +419,8 @@ files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled
        assert "somesubdirfile.png" in files
        assert "unwanted_subdirfile.gif" in files
        # Overriding the parent should enable all children
-        self.d.set_state(p1["$Recycle.Bin"], DirectoryState.Normal)
+        self.d.set_state(p1["$Recycle.Bin"], DirectoryState.NORMAL)
-        eq_(self.d.get_state(p1["$Recycle.Bin"]["subdar"]), DirectoryState.Normal)
+        eq_(self.d.get_state(p1["$Recycle.Bin"]["subdar"]), DirectoryState.NORMAL)
        # all files there
        files = self.get_files_and_expect_num_result(6)
        assert "somefile.png" in files
@@ -437,7 +444,7 @@ files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled
        assert self.d._exclude_list.error(regex3) is None
        # print(f"get_folders(): {[x for x in self.d.get_folders()]}")
        # Directory shouldn't change its state here, unless explicitely done by user
-        eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Normal)
+        eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.NORMAL)
        files = self.get_files_and_expect_num_result(5)
        assert "unwanted_subdirfile.gif" not in files
        assert "unwanted_subdarfile.png" in files
@@ -447,14 +454,14 @@ files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled
        self.d._exclude_list.rename(regex3, regex4)
        assert self.d._exclude_list.error(regex4) is None
        p1["$Recycle.Bin"]["subdar"]["file_ending_with_subdir"].open("w").close()
-        eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Excluded)
+        eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.EXCLUDED)
        files = self.get_files_and_expect_num_result(4)
        assert "file_ending_with_subdir" not in files
        assert "somesubdarfile.jpeg" in files
        assert "somesubdirfile.png" not in files
        assert "unwanted_subdirfile.gif" not in files
-        self.d.set_state(p1["$Recycle.Bin"]["subdir"], DirectoryState.Normal)
+        self.d.set_state(p1["$Recycle.Bin"]["subdir"], DirectoryState.NORMAL)
-        eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Normal)
+        eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.NORMAL)
        # print(f"get_folders(): {[x for x in self.d.get_folders()]}")
        files = self.get_files_and_expect_num_result(6)
        assert "file_ending_with_subdir" not in files
@@ -464,7 +471,7 @@ files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled
        regex5 = r".*subdir.*"
        self.d._exclude_list.rename(regex4, regex5)
        # Files containing substring should be filtered
-        eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.Normal)
+        eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.NORMAL)
        # The path should not match, only the filename, the "subdir" in the directory name shouldn't matter
        p1["$Recycle.Bin"]["subdir"]["file_which_shouldnt_match"].open("w").close()
        files = self.get_files_and_expect_num_result(5)
@@ -473,6 +480,29 @@ files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled
        assert "file_ending_with_subdir" not in files
        assert "file_which_shouldnt_match" in files
        # This should match the directory only
        regex6 = r".*/.*subdir.*/.*"
        if ISWINDOWS:
            regex6 = r".*\\.*subdir.*\\.*"
        assert os.sep in regex6
        self.d._exclude_list.rename(regex5, regex6)
        self.d._exclude_list.remove(regex1)
        eq_(len(self.d._exclude_list.compiled), 1)
        assert regex1 not in self.d._exclude_list
        assert regex5 not in self.d._exclude_list
        assert self.d._exclude_list.error(regex6) is None
        assert regex6 in self.d._exclude_list
        # This still should not be affected
        eq_(self.d.get_state(p1["$Recycle.Bin"]["subdir"]), DirectoryState.NORMAL)
        files = self.get_files_and_expect_num_result(5)
        # These files are under the "/subdir" directory
        assert "somesubdirfile.png" not in files
        assert "unwanted_subdirfile.gif" not in files
        # This file under "subdar" directory should not be filtered out
        assert "file_ending_with_subdir" in files
        # This file is in a directory that should be filtered out
        assert "file_which_shouldnt_match" not in files
    def test_japanese_unicode(self, tmpdir):
        p1 = Path(str(tmpdir))
        p1["$Recycle.Bin"].mkdir()
@@ -488,7 +518,7 @@ files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled
        self.d._exclude_list.add(regex3)
        self.d._exclude_list.mark(regex3)
        # print(f"get_folders(): {[x for x in self.d.get_folders()]}")
-        eq_(self.d.get_state(p1["$Recycle.Bin"]["思叫物語"]), DirectoryState.Excluded)
+        eq_(self.d.get_state(p1["$Recycle.Bin"]["思叫物語"]), DirectoryState.EXCLUDED)
        files = self.get_files_and_expect_num_result(2)
        assert "過去白濁物語～]_カラー.jpg" not in files
        assert "なししろ会う前" not in files
@@ -497,7 +527,7 @@ files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled
        regex4 = r".*物語$"
        self.d._exclude_list.rename(regex3, regex4)
        assert self.d._exclude_list.error(regex4) is None
-        self.d.set_state(p1["$Recycle.Bin"]["思叫物語"], DirectoryState.Normal)
+        self.d.set_state(p1["$Recycle.Bin"]["思叫物語"], DirectoryState.NORMAL)
        files = self.get_files_and_expect_num_result(5)
        assert "過去白濁物語～]_カラー.jpg" in files
        assert "なししろ会う前" in files
@@ -516,8 +546,8 @@ files: {self.d._exclude_list.compiled_files} all: {self.d._exclude_list.compiled
        p1["foobar"][".hidden_dir"][".hidden_subfile.png"].open("w").close()
        self.d.add_path(p1["foobar"])
        # It should not inherit its parent's state originally
-        eq_(self.d.get_state(p1["foobar"][".hidden_dir"]), DirectoryState.Excluded)
+        eq_(self.d.get_state(p1["foobar"][".hidden_dir"]), DirectoryState.EXCLUDED)
-        self.d.set_state(p1["foobar"][".hidden_dir"], DirectoryState.Normal)
+        self.d.set_state(p1["foobar"][".hidden_dir"], DirectoryState.NORMAL)
        # The files should still be filtered
        files = self.get_files_and_expect_num_result(1)
        eq_(len(self.d._exclude_list.compiled_paths), 0)
--- a/core/tests/engine_test.py
+++ b/core/tests/engine_test.py
@@ -69,6 +69,10 @@ class TestCasegetwords:
        eq_(["a", "b", "c", "d"], getwords("a b c d"))
        eq_(["a", "b", "c", "d"], getwords(" a  b  c d "))
    def test_unicode(self):
        eq_(["e", "c", "0", "a", "o", "u", "e", "u"], getwords("é ç 0 à ö û è ¤ ù"))
        eq_(["02", "君のこころは輝いてるかい？", "国木田花丸", "solo", "ver"], getwords("02 君のこころは輝いてるかい？ 国木田花丸 Solo Ver"))
    def test_splitter_chars(self):
        eq_(
            [chr(i) for i in range(ord("a"), ord("z") + 1)],
@@ -85,7 +89,7 @@ class TestCasegetwords:
        eq_(["foo", "bar"], getwords("FOO BAR"))
    def test_decompose_unicode(self):
-        eq_(getwords("foo\xe9bar"), ["fooebar"])
+        eq_(["fooebar"], getwords("foo\xe9bar"))
 class TestCasegetfields:
@@ -99,10 +103,9 @@ class TestCasegetfields:
        expected = [["a", "bc", "def"]]
        actual = getfields(" - a bc def")
        eq_(expected, actual)
        expected = [["bc", "def"]]
-class TestCaseunpack_fields:
+class TestCaseUnpackFields:
    def test_with_fields(self):
        expected = ["a", "b", "c", "d", "e", "f"]
        actual = unpack_fields([["a"], ["b", "c"], ["d", "e", "f"]])
@@ -173,9 +176,7 @@ class TestCaseWordCompareWithFields:
    def test_simple(self):
        eq_(
            67,
-            compare_fields(
+            compare_fields([["a", "b"], ["c", "d", "e"]], [["a", "b"], ["c", "d", "f"]]),
                [["a", "b"], ["c", "d", "e"]], [["a", "b"], ["c", "d", "f"]]
            ),
        )
    def test_empty(self):
@@ -216,24 +217,24 @@ class TestCaseWordCompareWithFields:
        eq_([["c", "d", "f"], ["a", "b"]], second)
-class TestCasebuild_word_dict:
+class TestCaseBuildWordDict:
    def test_with_standard_words(self):
-        itemList = [NamedObject("foo bar", True)]
+        item_list = [NamedObject("foo bar", True)]
-        itemList.append(NamedObject("bar baz", True))
+        item_list.append(NamedObject("bar baz", True))
-        itemList.append(NamedObject("baz bleh foo", True))
+        item_list.append(NamedObject("baz bleh foo", True))
-        d = build_word_dict(itemList)
+        d = build_word_dict(item_list)
        eq_(4, len(d))
        eq_(2, len(d["foo"]))
-        assert itemList[0] in d["foo"]
+        assert item_list[0] in d["foo"]
-        assert itemList[2] in d["foo"]
+        assert item_list[2] in d["foo"]
        eq_(2, len(d["bar"]))
-        assert itemList[0] in d["bar"]
+        assert item_list[0] in d["bar"]
-        assert itemList[1] in d["bar"]
+        assert item_list[1] in d["bar"]
        eq_(2, len(d["baz"]))
-        assert itemList[1] in d["baz"]
+        assert item_list[1] in d["baz"]
-        assert itemList[2] in d["baz"]
+        assert item_list[2] in d["baz"]
        eq_(1, len(d["bleh"]))
-        assert itemList[2] in d["bleh"]
+        assert item_list[2] in d["bleh"]
    def test_unpack_fields(self):
        o = NamedObject("")
@@ -261,15 +262,13 @@ class TestCasebuild_word_dict:
        j = job.Job(1, do_progress)
        self.log = []
        s = "foo bar"
-        build_word_dict(
+        build_word_dict([NamedObject(s, True), NamedObject(s, True), NamedObject(s, True)], j)
            [NamedObject(s, True), NamedObject(s, True), NamedObject(s, True)], j
        )
        # We don't have intermediate log because iter_with_progress is called with every > 1
        eq_(0, self.log[0])
        eq_(100, self.log[1])
-class TestCasemerge_similar_words:
+class TestCaseMergeSimilarWords:
    def test_some_similar_words(self):
        d = {
            "foobar": set([1]),
@@ -281,11 +280,11 @@ class TestCasemerge_similar_words:
        eq_(3, len(d["foobar"]))
-class TestCasereduce_common_words:
+class TestCaseReduceCommonWords:
    def test_typical(self):
        d = {
-            "foo": set([NamedObject("foo bar", True) for i in range(50)]),
+            "foo": set([NamedObject("foo bar", True) for _ in range(50)]),
-            "bar": set([NamedObject("foo bar", True) for i in range(49)]),
+            "bar": set([NamedObject("foo bar", True) for _ in range(49)]),
        }
        reduce_common_words(d, 50)
        assert "foo" not in d
@@ -293,10 +292,7 @@ class TestCasereduce_common_words:
    def test_dont_remove_objects_with_only_common_words(self):
        d = {
-            "common": set(
+            "common": set([NamedObject("common uncommon", True) for _ in range(50)] + [NamedObject("common", True)]),
                [NamedObject("common uncommon", True) for i in range(50)]
                + [NamedObject("common", True)]
            ),
            "uncommon": set([NamedObject("common uncommon", True)]),
        }
        reduce_common_words(d, 50)
@@ -305,23 +301,20 @@ class TestCasereduce_common_words:
    def test_values_still_are_set_instances(self):
        d = {
-            "common": set(
+            "common": set([NamedObject("common uncommon", True) for _ in range(50)] + [NamedObject("common", True)]),
                [NamedObject("common uncommon", True) for i in range(50)]
                + [NamedObject("common", True)]
            ),
            "uncommon": set([NamedObject("common uncommon", True)]),
        }
        reduce_common_words(d, 50)
        assert isinstance(d["common"], set)
        assert isinstance(d["uncommon"], set)
-    def test_dont_raise_KeyError_when_a_word_has_been_removed(self):
+    def test_dont_raise_keyerror_when_a_word_has_been_removed(self):
        # If a word has been removed by the reduce, an object in a subsequent common word that
        # contains the word that has been removed would cause a KeyError.
        d = {
-            "foo": set([NamedObject("foo bar baz", True) for i in range(50)]),
+            "foo": set([NamedObject("foo bar baz", True) for _ in range(50)]),
-            "bar": set([NamedObject("foo bar baz", True) for i in range(50)]),
+            "bar": set([NamedObject("foo bar baz", True) for _ in range(50)]),
-            "baz": set([NamedObject("foo bar baz", True) for i in range(49)]),
+            "baz": set([NamedObject("foo bar baz", True) for _ in range(49)]),
        }
        try:
            reduce_common_words(d, 50)
@@ -335,7 +328,7 @@ class TestCasereduce_common_words:
            o.words = [["foo", "bar"], ["baz"]]
            return o
-        d = {"foo": set([create_it() for i in range(50)])}
+        d = {"foo": set([create_it() for _ in range(50)])}
        try:
            reduce_common_words(d, 50)
        except TypeError:
@@ -348,13 +341,9 @@ class TestCasereduce_common_words:
        # would not stay in 'bar' because 'foo' is not a common word anymore.
        only_common = NamedObject("foo bar", True)
        d = {
-            "foo": set(
+            "foo": set([NamedObject("foo bar baz", True) for _ in range(49)] + [only_common]),
-                [NamedObject("foo bar baz", True) for i in range(49)] + [only_common]
+            "bar": set([NamedObject("foo bar baz", True) for _ in range(49)] + [only_common]),
-            ),
+            "baz": set([NamedObject("foo bar baz", True) for _ in range(49)]),
            "bar": set(
                [NamedObject("foo bar baz", True) for i in range(49)] + [only_common]
            ),
            "baz": set([NamedObject("foo bar baz", True) for i in range(49)]),
        }
        reduce_common_words(d, 50)
        eq_(1, len(d["foo"]))
@@ -362,7 +351,7 @@ class TestCasereduce_common_words:
        eq_(49, len(d["baz"]))
-class TestCaseget_match:
+class TestCaseGetMatch:
    def test_simple(self):
        o1 = NamedObject("foo bar", True)
        o2 = NamedObject("bar bleh", True)
@@ -382,9 +371,7 @@ class TestCaseget_match:
        assert object() not in m
    def test_word_weight(self):
-        m = get_match(
+        m = get_match(NamedObject("foo bar", True), NamedObject("bar bleh", True), (WEIGHT_WORDS,))
            NamedObject("foo bar", True), NamedObject("bar bleh", True), (WEIGHT_WORDS,)
        )
        eq_(m.percentage, int((6.0 / 13.0) * 100))
@@ -393,12 +380,12 @@ class TestCaseGetMatches:
        eq_(getmatches([]), [])
    def test_simple(self):
-        itemList = [
+        item_list = [
            NamedObject("foo bar"),
            NamedObject("bar bleh"),
            NamedObject("a b c foo"),
        ]
-        r = getmatches(itemList)
+        r = getmatches(item_list)
        eq_(2, len(r))
        m = first(m for m in r if m.percentage == 50)  # "foo bar" and "bar bleh"
        assert_match(m, "foo bar", "bar bleh")
@@ -406,40 +393,40 @@ class TestCaseGetMatches:
        assert_match(m, "foo bar", "a b c foo")
    def test_null_and_unrelated_objects(self):
-        itemList = [
+        item_list = [
            NamedObject("foo bar"),
            NamedObject("bar bleh"),
            NamedObject(""),
            NamedObject("unrelated object"),
        ]
-        r = getmatches(itemList)
+        r = getmatches(item_list)
        eq_(len(r), 1)
        m = r[0]
        eq_(m.percentage, 50)
        assert_match(m, "foo bar", "bar bleh")
    def test_twice_the_same_word(self):
-        itemList = [NamedObject("foo foo bar"), NamedObject("bar bleh")]
+        item_list = [NamedObject("foo foo bar"), NamedObject("bar bleh")]
-        r = getmatches(itemList)
+        r = getmatches(item_list)
        eq_(1, len(r))
    def test_twice_the_same_word_when_preworded(self):
-        itemList = [NamedObject("foo foo bar", True), NamedObject("bar bleh", True)]
+        item_list = [NamedObject("foo foo bar", True), NamedObject("bar bleh", True)]
-        r = getmatches(itemList)
+        r = getmatches(item_list)
        eq_(1, len(r))
    def test_two_words_match(self):
-        itemList = [NamedObject("foo bar"), NamedObject("foo bar bleh")]
+        item_list = [NamedObject("foo bar"), NamedObject("foo bar bleh")]
-        r = getmatches(itemList)
+        r = getmatches(item_list)
        eq_(1, len(r))
    def test_match_files_with_only_common_words(self):
        # If a word occurs more than 50 times, it is excluded from the matching process
        # The problem with the common_word_threshold is that the files containing only common
        # words will never be matched together. We *should* match them.
-        # This test assumes that the common word threashold const is 50
+        # This test assumes that the common word threshold const is 50
-        itemList = [NamedObject("foo") for i in range(50)]
+        item_list = [NamedObject("foo") for _ in range(50)]
-        r = getmatches(itemList)
+        r = getmatches(item_list)
        eq_(1225, len(r))
    def test_use_words_already_there_if_there(self):
@@ -462,28 +449,28 @@ class TestCaseGetMatches:
        eq_(100, self.log[-1])
    def test_weight_words(self):
-        itemList = [NamedObject("foo bar"), NamedObject("bar bleh")]
+        item_list = [NamedObject("foo bar"), NamedObject("bar bleh")]
-        m = getmatches(itemList, weight_words=True)[0]
+        m = getmatches(item_list, weight_words=True)[0]
        eq_(int((6.0 / 13.0) * 100), m.percentage)
    def test_similar_word(self):
-        itemList = [NamedObject("foobar"), NamedObject("foobars")]
+        item_list = [NamedObject("foobar"), NamedObject("foobars")]
-        eq_(len(getmatches(itemList, match_similar_words=True)), 1)
+        eq_(len(getmatches(item_list, match_similar_words=True)), 1)
-        eq_(getmatches(itemList, match_similar_words=True)[0].percentage, 100)
+        eq_(getmatches(item_list, match_similar_words=True)[0].percentage, 100)
-        itemList = [NamedObject("foobar"), NamedObject("foo")]
+        item_list = [NamedObject("foobar"), NamedObject("foo")]
-        eq_(len(getmatches(itemList, match_similar_words=True)), 0)  # too far
+        eq_(len(getmatches(item_list, match_similar_words=True)), 0)  # too far
-        itemList = [NamedObject("bizkit"), NamedObject("bizket")]
+        item_list = [NamedObject("bizkit"), NamedObject("bizket")]
-        eq_(len(getmatches(itemList, match_similar_words=True)), 1)
+        eq_(len(getmatches(item_list, match_similar_words=True)), 1)
-        itemList = [NamedObject("foobar"), NamedObject("foosbar")]
+        item_list = [NamedObject("foobar"), NamedObject("foosbar")]
-        eq_(len(getmatches(itemList, match_similar_words=True)), 1)
+        eq_(len(getmatches(item_list, match_similar_words=True)), 1)
    def test_single_object_with_similar_words(self):
-        itemList = [NamedObject("foo foos")]
+        item_list = [NamedObject("foo foos")]
-        eq_(len(getmatches(itemList, match_similar_words=True)), 0)
+        eq_(len(getmatches(item_list, match_similar_words=True)), 0)
    def test_double_words_get_counted_only_once(self):
-        itemList = [NamedObject("foo bar foo bleh"), NamedObject("foo bar bleh bar")]
+        item_list = [NamedObject("foo bar foo bleh"), NamedObject("foo bar bleh bar")]
-        m = getmatches(itemList)[0]
+        m = getmatches(item_list)[0]
        eq_(75, m.percentage)
    def test_with_fields(self):
@@ -503,13 +490,13 @@ class TestCaseGetMatches:
        eq_(m.percentage, 50)
    def test_only_match_similar_when_the_option_is_set(self):
-        itemList = [NamedObject("foobar"), NamedObject("foobars")]
+        item_list = [NamedObject("foobar"), NamedObject("foobars")]
-        eq_(len(getmatches(itemList, match_similar_words=False)), 0)
+        eq_(len(getmatches(item_list, match_similar_words=False)), 0)
    def test_dont_recurse_do_match(self):
        # with nosetests, the stack is increased. The number has to be high enough not to be failing falsely
        sys.setrecursionlimit(200)
-        files = [NamedObject("foo bar") for i in range(201)]
+        files = [NamedObject("foo bar") for _ in range(201)]
        try:
            getmatches(files)
        except RuntimeError:
@@ -518,38 +505,60 @@ class TestCaseGetMatches:
            sys.setrecursionlimit(1000)
    def test_min_match_percentage(self):
-        itemList = [
+        item_list = [
            NamedObject("foo bar"),
            NamedObject("bar bleh"),
            NamedObject("a b c foo"),
        ]
-        r = getmatches(itemList, min_match_percentage=50)
+        r = getmatches(item_list, min_match_percentage=50)
        eq_(1, len(r))  # Only "foo bar" / "bar bleh" should match
-    def test_MemoryError(self, monkeypatch):
+    def test_memory_error(self, monkeypatch):
        @log_calls
        def mocked_match(first, second, flags):
            if len(mocked_match.calls) > 42:
                raise MemoryError()
            return Match(first, second, 0)
-        objects = [NamedObject() for i in range(10)]  # results in 45 matches
+        objects = [NamedObject() for _ in range(10)]  # results in 45 matches
        monkeypatch.setattr(engine, "get_match", mocked_match)
        try:
            r = getmatches(objects)
        except MemoryError:
-            self.fail("MemorryError must be handled")
+            self.fail("MemoryError must be handled")
        eq_(42, len(r))
 class TestCaseGetMatchesByContents:
-    def test_dont_compare_empty_files(self):
+    def test_big_file_partial_hashes(self):
-        o1, o2 = no(size=0), no(size=0)
+        smallsize = 1
-        assert not getmatches_by_contents([o1, o2])
+        bigsize = 100 * 1024 * 1024  # 100MB
        f = [
            no("bigfoo", size=bigsize),
            no("bigbar", size=bigsize),
            no("smallfoo", size=smallsize),
            no("smallbar", size=smallsize),
        ]
        f[0].md5 = f[0].md5partial = f[0].md5samples = "foobar"
        f[1].md5 = f[1].md5partial = f[1].md5samples = "foobar"
        f[2].md5 = f[2].md5partial = "bleh"
        f[3].md5 = f[3].md5partial = "bleh"
        r = getmatches_by_contents(f, bigsize=bigsize)
        eq_(len(r), 2)
        # User disabled optimization for big files, compute hashes as usual
        r = getmatches_by_contents(f, bigsize=0)
        eq_(len(r), 2)
        # Other file is now slightly different, md5partial is still the same
        f[1].md5 = f[1].md5samples = "foobardiff"
        r = getmatches_by_contents(f, bigsize=bigsize)
        # Successfully filter it out
        eq_(len(r), 1)
        r = getmatches_by_contents(f, bigsize=0)
        eq_(len(r), 1)
 class TestCaseGroup:
-    def test_empy(self):
+    def test_empty(self):
        g = Group()
        eq_(None, g.ref)
        eq_([], g.dupes)
@@ -723,8 +732,7 @@ class TestCaseGroup:
        # if the ref has the same key as one or more of the dupe, run the tie_breaker func among them
        g = get_test_group()
        o1, o2, o3 = g.ordered
-        tie_breaker = lambda ref, dupe: dupe is o3
+        g.prioritize(lambda x: 0, lambda ref, dupe: dupe is o3)
        g.prioritize(lambda x: 0, tie_breaker)
        assert g.ref is o3
    def test_prioritize_with_tie_breaker_runs_on_all_dupes(self):
@@ -735,8 +743,7 @@ class TestCaseGroup:
        o1.foo = 1
        o2.foo = 2
        o3.foo = 3
-        tie_breaker = lambda ref, dupe: dupe.foo > ref.foo
+        g.prioritize(lambda x: 0, lambda ref, dupe: dupe.foo > ref.foo)
        g.prioritize(lambda x: 0, tie_breaker)
        assert g.ref is o3
    def test_prioritize_with_tie_breaker_runs_only_on_tie_dupes(self):
@@ -749,9 +756,7 @@ class TestCaseGroup:
        o1.bar = 1
        o2.bar = 2
        o3.bar = 3
-        key_func = lambda x: -x.foo
+        g.prioritize(lambda x: -x.foo, lambda ref, dupe: dupe.bar > ref.bar)
        tie_breaker = lambda ref, dupe: dupe.bar > ref.bar
        g.prioritize(key_func, tie_breaker)
        assert g.ref is o2
    def test_prioritize_with_ref_dupe(self):
@@ -792,14 +797,14 @@ class TestCaseGroup:
        eq_(0, len(g.candidates))
-class TestCaseget_groups:
+class TestCaseGetGroups:
    def test_empty(self):
        r = get_groups([])
        eq_([], r)
    def test_simple(self):
-        itemList = [NamedObject("foo bar"), NamedObject("bar bleh")]
+        item_list = [NamedObject("foo bar"), NamedObject("bar bleh")]
-        matches = getmatches(itemList)
+        matches = getmatches(item_list)
        m = matches[0]
        r = get_groups(matches)
        eq_(1, len(r))
@@ -809,15 +814,15 @@ class TestCaseget_groups:
    def test_group_with_multiple_matches(self):
        # This results in 3 matches
-        itemList = [NamedObject("foo"), NamedObject("foo"), NamedObject("foo")]
+        item_list = [NamedObject("foo"), NamedObject("foo"), NamedObject("foo")]
-        matches = getmatches(itemList)
+        matches = getmatches(item_list)
        r = get_groups(matches)
        eq_(1, len(r))
        g = r[0]
        eq_(3, len(g))
    def test_must_choose_a_group(self):
-        itemList = [
+        item_list = [
            NamedObject("a b"),
            NamedObject("a b"),
            NamedObject("b c"),
@@ -826,13 +831,13 @@ class TestCaseget_groups:
        ]
        # There will be 2 groups here: group "a b" and group "c d"
        # "b c" can go either of them, but not both.
-        matches = getmatches(itemList)
+        matches = getmatches(item_list)
        r = get_groups(matches)
        eq_(2, len(r))
        eq_(5, len(r[0]) + len(r[1]))
    def test_should_all_go_in_the_same_group(self):
-        itemList = [
+        item_list = [
            NamedObject("a b"),
            NamedObject("a b"),
            NamedObject("a b"),
@@ -840,7 +845,7 @@ class TestCaseget_groups:
        ]
        # There will be 2 groups here: group "a b" and group "c d"
        # "b c" can fit in both, but it must be in only one of them
-        matches = getmatches(itemList)
+        matches = getmatches(item_list)
        r = get_groups(matches)
        eq_(1, len(r))
@@ -859,8 +864,8 @@ class TestCaseget_groups:
        assert o3 in g
    def test_four_sized_group(self):
-        itemList = [NamedObject("foobar") for i in range(4)]
+        item_list = [NamedObject("foobar") for _ in range(4)]
-        m = getmatches(itemList)
+        m = getmatches(item_list)
        r = get_groups(m)
        eq_(1, len(r))
        eq_(4, len(r[0]))
@@ -883,9 +888,7 @@ class TestCaseget_groups:
        m1 = Match(A, B, 90)  # This is the strongest "A" match
        m2 = Match(A, C, 80)  # Because C doesn't match with B, it won't be in the group
        m3 = Match(A, D, 80)  # Same thing for D
-        m4 = Match(
+        m4 = Match(C, D, 70)  # However, because C and D match, they should have their own group.
            C, D, 70
        )  # However, because C and D match, they should have their own group.
        groups = get_groups([m1, m2, m3, m4])
        eq_(len(groups), 2)
        g1, g2 = groups
--- a/core/tests/exclude_test.py
+++ b/core/tests/exclude_test.py
@@ -5,11 +5,8 @@
 # http://www.gnu.org/licenses/gpl-3.0.html
 import io
 # import os.path as op
 from xml.etree import ElementTree as ET
 # from pytest import raises
 from hscommon.testutil import eq_
 from hscommon.plat import ISWINDOWS
@@ -104,7 +101,7 @@ class TestCaseListEmpty:
        regex1 = r"one"
        regex2 = r"two"
        self.exclude_list.add(regex1)
-        assert(regex1 in self.exclude_list)
+        assert regex1 in self.exclude_list
        self.exclude_list.add(regex2)
        self.exclude_list.mark(regex1)
        self.exclude_list.mark(regex2)
@@ -113,7 +110,7 @@ class TestCaseListEmpty:
        compiled_files = [x for x in self.exclude_list.compiled_files]
        eq_(len(compiled_files), 2)
        self.exclude_list.remove(regex2)
-        assert(regex2 not in self.exclude_list)
+        assert regex2 not in self.exclude_list
        eq_(len(self.exclude_list), 1)
    def test_add_duplicate(self):
@@ -143,11 +140,7 @@ class TestCaseListEmpty:
    def test_force_add_not_compilable(self):
        """Used when loading from XML for example"""
        regex = r"one))"
        try:
        self.exclude_list.add(regex, forced=True)
        except Exception as e:
            # Should not get an exception here unless it's a duplicate regex
            raise e
        marked = self.exclude_list.mark(regex)
        eq_(marked, False)  # can't be marked since not compilable
        eq_(len(self.exclude_list), 1)
@@ -188,6 +181,28 @@ class TestCaseListEmpty:
        self.exclude_list.rename(regex_renamed_compilable, regex_compilable)
        eq_(self.exclude_list.is_marked(regex_compilable), True)
    def test_rename_regex_file_to_path(self):
        regex = r".*/one.*"
        if ISWINDOWS:
            regex = r".*\\one.*"
        regex2 = r".*one.*"
        self.exclude_list.add(regex)
        self.exclude_list.mark(regex)
        compiled_re = [x.pattern for x in self.exclude_list._excluded_compiled]
        files_re = [x.pattern for x in self.exclude_list.compiled_files]
        paths_re = [x.pattern for x in self.exclude_list.compiled_paths]
        assert regex in compiled_re
        assert regex not in files_re
        assert regex in paths_re
        self.exclude_list.rename(regex, regex2)
        compiled_re = [x.pattern for x in self.exclude_list._excluded_compiled]
        files_re = [x.pattern for x in self.exclude_list.compiled_files]
        paths_re = [x.pattern for x in self.exclude_list.compiled_paths]
        assert regex not in compiled_re
        assert regex2 in compiled_re
        assert regex2 in files_re
        assert regex2 not in paths_re
    def test_restore_default(self):
        """Only unmark previously added regexes and mark the pre-defined ones"""
        regex = r"one"
@@ -209,25 +224,162 @@ class TestCaseListEmpty:
                    found = True
            if not found:
                raise (Exception(f"Default RE {re} not found in compiled list."))
            continue
        eq_(len(default_regexes), len(self.exclude_list.compiled))
 class TestCaseListEmptyUnion(TestCaseListEmpty):
    """Same but with union regex"""
    def setup_method(self, method):
        self.app = DupeGuru()
        self.app.exclude_list = ExcludeList(union_regex=True)
        self.exclude_list = self.app.exclude_list
    def test_add_mark_and_remove_regex(self):
        regex1 = r"one"
        regex2 = r"two"
        self.exclude_list.add(regex1)
        assert regex1 in self.exclude_list
        self.exclude_list.add(regex2)
        self.exclude_list.mark(regex1)
        self.exclude_list.mark(regex2)
        eq_(len(self.exclude_list), 2)
        eq_(len(self.exclude_list.compiled), 1)
        compiled_files = [x for x in self.exclude_list.compiled_files]
        eq_(len(compiled_files), 1)  # Two patterns joined together into one
        assert "|" in compiled_files[0].pattern
        self.exclude_list.remove(regex2)
        assert regex2 not in self.exclude_list
        eq_(len(self.exclude_list), 1)
    def test_rename_regex_file_to_path(self):
        regex = r".*/one.*"
        if ISWINDOWS:
            regex = r".*\\one.*"
        regex2 = r".*one.*"
        self.exclude_list.add(regex)
        self.exclude_list.mark(regex)
        eq_(len([x for x in self.exclude_list]), 1)
        compiled_re = [x.pattern for x in self.exclude_list.compiled]
        files_re = [x.pattern for x in self.exclude_list.compiled_files]
        paths_re = [x.pattern for x in self.exclude_list.compiled_paths]
        assert regex in compiled_re
        assert regex not in files_re
        assert regex in paths_re
        self.exclude_list.rename(regex, regex2)
        eq_(len([x for x in self.exclude_list]), 1)
        compiled_re = [x.pattern for x in self.exclude_list.compiled]
        files_re = [x.pattern for x in self.exclude_list.compiled_files]
        paths_re = [x.pattern for x in self.exclude_list.compiled_paths]
        assert regex not in compiled_re
        assert regex2 in compiled_re
        assert regex2 in files_re
        assert regex2 not in paths_re
    def test_restore_default(self):
        """Only unmark previously added regexes and mark the pre-defined ones"""
        regex = r"one"
        self.exclude_list.add(regex)
        self.exclude_list.mark(regex)
        self.exclude_list.restore_defaults()
        eq_(len(default_regexes), self.exclude_list.marked_count)
        # added regex shouldn't be marked
        eq_(self.exclude_list.is_marked(regex), False)
        # added regex shouldn't be in compiled list either
        compiled = [x for x in self.exclude_list.compiled]
        assert regex not in compiled
        # Need to escape both to get the same strings after compilation
        compiled_escaped = set([x.encode("unicode-escape").decode() for x in compiled[0].pattern.split("|")])
        default_escaped = set([x.encode("unicode-escape").decode() for x in default_regexes])
        assert compiled_escaped == default_escaped
        eq_(len(default_regexes), len(compiled[0].pattern.split("|")))
 class TestCaseDictEmpty(TestCaseListEmpty):
    """Same, but with dictionary implementation"""
    def setup_method(self, method):
        self.app = DupeGuru()
        self.app.exclude_list = ExcludeDict(union_regex=False)
        self.exclude_list = self.app.exclude_list
 class TestCaseDictEmptyUnion(TestCaseDictEmpty):
    """Same, but with union regex"""
    def setup_method(self, method):
        self.app = DupeGuru()
        self.app.exclude_list = ExcludeDict(union_regex=True)
        self.exclude_list = self.app.exclude_list
    def test_add_mark_and_remove_regex(self):
        regex1 = r"one"
        regex2 = r"two"
        self.exclude_list.add(regex1)
        assert regex1 in self.exclude_list
        self.exclude_list.add(regex2)
        self.exclude_list.mark(regex1)
        self.exclude_list.mark(regex2)
        eq_(len(self.exclude_list), 2)
        eq_(len(self.exclude_list.compiled), 1)
        compiled_files = [x for x in self.exclude_list.compiled_files]
        # two patterns joined into one
        eq_(len(compiled_files), 1)
        self.exclude_list.remove(regex2)
        assert regex2 not in self.exclude_list
        eq_(len(self.exclude_list), 1)
    def test_rename_regex_file_to_path(self):
        regex = r".*/one.*"
        if ISWINDOWS:
            regex = r".*\\one.*"
        regex2 = r".*one.*"
        self.exclude_list.add(regex)
        self.exclude_list.mark(regex)
        marked_re = [x for marked, x in self.exclude_list if marked]
        eq_(len(marked_re), 1)
        compiled_re = [x.pattern for x in self.exclude_list.compiled]
        files_re = [x.pattern for x in self.exclude_list.compiled_files]
        paths_re = [x.pattern for x in self.exclude_list.compiled_paths]
        assert regex in compiled_re
        assert regex not in files_re
        assert regex in paths_re
        self.exclude_list.rename(regex, regex2)
        compiled_re = [x.pattern for x in self.exclude_list.compiled]
        files_re = [x.pattern for x in self.exclude_list.compiled_files]
        paths_re = [x.pattern for x in self.exclude_list.compiled_paths]
        assert regex not in compiled_re
        assert regex2 in compiled_re
        assert regex2 in files_re
        assert regex2 not in paths_re
    def test_restore_default(self):
        """Only unmark previously added regexes and mark the pre-defined ones"""
        regex = r"one"
        self.exclude_list.add(regex)
        self.exclude_list.mark(regex)
        self.exclude_list.restore_defaults()
        eq_(len(default_regexes), self.exclude_list.marked_count)
        # added regex shouldn't be marked
        eq_(self.exclude_list.is_marked(regex), False)
        # added regex shouldn't be in compiled list either
        compiled = [x for x in self.exclude_list.compiled]
        assert regex not in compiled
        # Need to escape both to get the same strings after compilation
        compiled_escaped = set([x.encode("unicode-escape").decode() for x in compiled[0].pattern.split("|")])
        default_escaped = set([x.encode("unicode-escape").decode() for x in default_regexes])
        assert compiled_escaped == default_escaped
        eq_(len(default_regexes), len(compiled[0].pattern.split("|")))
 def split_union(pattern_object):
    """Returns list of strings for each union pattern"""
    return [x for x in pattern_object.pattern.split("|")]
-class TestCaseCompiledList():
+class TestCaseCompiledList:
    """Test consistency between union or and separate versions."""
    def setup_method(self, method):
        self.e_separate = ExcludeList(union_regex=False)
        self.e_separate.restore_defaults()
@@ -275,6 +427,7 @@ class TestCaseCompiledList():
 class TestCaseCompiledDict(TestCaseCompiledList):
    """Test the dictionary version"""
    def setup_method(self, method):
        self.e_separate = ExcludeDict(union_regex=False)
        self.e_separate.restore_defaults()
--- a/core/tests/fs_test.py
+++ b/core/tests/fs_test.py
@@ -7,6 +7,7 @@
 # http://www.gnu.org/licenses/gpl-3.0.html
 import hashlib
 from os import urandom
 from hscommon.path import Path
 from hscommon.testutil import eq_
@@ -15,6 +16,36 @@ from core.tests.directories_test import create_fake_fs
 from .. import fs
 def create_fake_fs_with_random_data(rootpath):
    rootpath = rootpath["fs"]
    rootpath.mkdir()
    rootpath["dir1"].mkdir()
    rootpath["dir2"].mkdir()
    rootpath["dir3"].mkdir()
    fp = rootpath["file1.test"].open("wb")
    data1 = urandom(200 * 1024)  # 200KiB
    data2 = urandom(1024 * 1024)  # 1MiB
    data3 = urandom(10 * 1024 * 1024)  # 10MiB
    fp.write(data1)
    fp.close()
    fp = rootpath["file2.test"].open("wb")
    fp.write(data2)
    fp.close()
    fp = rootpath["file3.test"].open("wb")
    fp.write(data3)
    fp.close()
    fp = rootpath["dir1"]["file1.test"].open("wb")
    fp.write(data1)
    fp.close()
    fp = rootpath["dir2"]["file2.test"].open("wb")
    fp.write(data2)
    fp.close()
    fp = rootpath["dir3"]["file3.test"].open("wb")
    fp.write(data3)
    fp.close()
    return rootpath
 def test_size_aggregates_subfiles(tmpdir):
    p = create_fake_fs(Path(str(tmpdir)))
    b = fs.Folder(p)
@@ -25,7 +56,7 @@ def test_md5_aggregate_subfiles_sorted(tmpdir):
    # dir.allfiles can return child in any order. Thus, bundle.md5 must aggregate
    # all files' md5 it contains, but it must make sure that it does so in the
    # same order everytime.
-    p = create_fake_fs(Path(str(tmpdir)))
+    p = create_fake_fs_with_random_data(Path(str(tmpdir)))
    b = fs.Folder(p)
    md51 = fs.File(p["dir1"]["file1.test"]).md5
    md52 = fs.File(p["dir2"]["file2.test"]).md5
@@ -41,6 +72,36 @@ def test_md5_aggregate_subfiles_sorted(tmpdir):
    eq_(b.md5, md5.digest())
 def test_partial_md5_aggregate_subfile_sorted(tmpdir):
    p = create_fake_fs_with_random_data(Path(str(tmpdir)))
    b = fs.Folder(p)
    md51 = fs.File(p["dir1"]["file1.test"]).md5partial
    md52 = fs.File(p["dir2"]["file2.test"]).md5partial
    md53 = fs.File(p["dir3"]["file3.test"]).md5partial
    md54 = fs.File(p["file1.test"]).md5partial
    md55 = fs.File(p["file2.test"]).md5partial
    md56 = fs.File(p["file3.test"]).md5partial
    # The expected md5 is the md5 of md5s for folders and the direct md5 for files
    folder_md51 = hashlib.md5(md51).digest()
    folder_md52 = hashlib.md5(md52).digest()
    folder_md53 = hashlib.md5(md53).digest()
    md5 = hashlib.md5(folder_md51 + folder_md52 + folder_md53 + md54 + md55 + md56)
    eq_(b.md5partial, md5.digest())
    md51 = fs.File(p["dir1"]["file1.test"]).md5samples
    md52 = fs.File(p["dir2"]["file2.test"]).md5samples
    md53 = fs.File(p["dir3"]["file3.test"]).md5samples
    md54 = fs.File(p["file1.test"]).md5samples
    md55 = fs.File(p["file2.test"]).md5samples
    md56 = fs.File(p["file3.test"]).md5samples
    # The expected md5 is the md5 of md5s for folders and the direct md5 for files
    folder_md51 = hashlib.md5(md51).digest()
    folder_md52 = hashlib.md5(md52).digest()
    folder_md53 = hashlib.md5(md53).digest()
    md5 = hashlib.md5(folder_md51 + folder_md52 + folder_md53 + md54 + md55 + md56)
    eq_(b.md5samples, md5.digest())
 def test_has_file_attrs(tmpdir):
    # a Folder must behave like a file, so it must have mtime attributes
    b = fs.Folder(Path(str(tmpdir)))
--- a/core/tests/ignore_test.py
+++ b/core/tests/ignore_test.py
@@ -16,54 +16,54 @@ from ..ignore import IgnoreList
 def test_empty():
    il = IgnoreList()
    eq_(0, len(il))
-    assert not il.AreIgnored("foo", "bar")
+    assert not il.are_ignored("foo", "bar")
 def test_simple():
    il = IgnoreList()
-    il.Ignore("foo", "bar")
+    il.ignore("foo", "bar")
-    assert il.AreIgnored("foo", "bar")
+    assert il.are_ignored("foo", "bar")
-    assert il.AreIgnored("bar", "foo")
+    assert il.are_ignored("bar", "foo")
-    assert not il.AreIgnored("foo", "bleh")
+    assert not il.are_ignored("foo", "bleh")
-    assert not il.AreIgnored("bleh", "bar")
+    assert not il.are_ignored("bleh", "bar")
    eq_(1, len(il))
 def test_multiple():
    il = IgnoreList()
-    il.Ignore("foo", "bar")
+    il.ignore("foo", "bar")
-    il.Ignore("foo", "bleh")
+    il.ignore("foo", "bleh")
-    il.Ignore("bleh", "bar")
+    il.ignore("bleh", "bar")
-    il.Ignore("aybabtu", "bleh")
+    il.ignore("aybabtu", "bleh")
-    assert il.AreIgnored("foo", "bar")
+    assert il.are_ignored("foo", "bar")
-    assert il.AreIgnored("bar", "foo")
+    assert il.are_ignored("bar", "foo")
-    assert il.AreIgnored("foo", "bleh")
+    assert il.are_ignored("foo", "bleh")
-    assert il.AreIgnored("bleh", "bar")
+    assert il.are_ignored("bleh", "bar")
-    assert not il.AreIgnored("aybabtu", "bar")
+    assert not il.are_ignored("aybabtu", "bar")
    eq_(4, len(il))
 def test_clear():
    il = IgnoreList()
-    il.Ignore("foo", "bar")
+    il.ignore("foo", "bar")
-    il.Clear()
+    il.clear()
-    assert not il.AreIgnored("foo", "bar")
+    assert not il.are_ignored("foo", "bar")
-    assert not il.AreIgnored("bar", "foo")
+    assert not il.are_ignored("bar", "foo")
    eq_(0, len(il))
 def test_add_same_twice():
    il = IgnoreList()
-    il.Ignore("foo", "bar")
+    il.ignore("foo", "bar")
-    il.Ignore("bar", "foo")
+    il.ignore("bar", "foo")
    eq_(1, len(il))
 def test_save_to_xml():
    il = IgnoreList()
-    il.Ignore("foo", "bar")
+    il.ignore("foo", "bar")
-    il.Ignore("foo", "bleh")
+    il.ignore("foo", "bleh")
-    il.Ignore("bleh", "bar")
+    il.ignore("bleh", "bar")
    f = io.BytesIO()
    il.save_to_xml(f)
    f.seek(0)
@@ -73,50 +73,46 @@ def test_save_to_xml():
    eq_(len(root), 2)
    eq_(len([c for c in root if c.tag == "file"]), 2)
    f1, f2 = root[:]
-    subchildren = [c for c in f1 if c.tag == "file"] + [
+    subchildren = [c for c in f1 if c.tag == "file"] + [c for c in f2 if c.tag == "file"]
        c for c in f2 if c.tag == "file"
    ]
    eq_(len(subchildren), 3)
-def test_SaveThenLoad():
+def test_save_then_load():
    il = IgnoreList()
-    il.Ignore("foo", "bar")
+    il.ignore("foo", "bar")
-    il.Ignore("foo", "bleh")
+    il.ignore("foo", "bleh")
-    il.Ignore("bleh", "bar")
+    il.ignore("bleh", "bar")
-    il.Ignore("\u00e9", "bar")
+    il.ignore("\u00e9", "bar")
    f = io.BytesIO()
    il.save_to_xml(f)
    f.seek(0)
    il = IgnoreList()
    il.load_from_xml(f)
    eq_(4, len(il))
-    assert il.AreIgnored("\u00e9", "bar")
+    assert il.are_ignored("\u00e9", "bar")
-def test_LoadXML_with_empty_file_tags():
+def test_load_xml_with_empty_file_tags():
    f = io.BytesIO()
-    f.write(
+    f.write(b'<?xml version="1.0" encoding="utf-8"?><ignore_list><file><file/></file></ignore_list>')
        b'<?xml version="1.0" encoding="utf-8"?><ignore_list><file><file/></file></ignore_list>'
    )
    f.seek(0)
    il = IgnoreList()
    il.load_from_xml(f)
    eq_(0, len(il))
-def test_AreIgnore_works_when_a_child_is_a_key_somewhere_else():
+def test_are_ignore_works_when_a_child_is_a_key_somewhere_else():
    il = IgnoreList()
-    il.Ignore("foo", "bar")
+    il.ignore("foo", "bar")
-    il.Ignore("bar", "baz")
+    il.ignore("bar", "baz")
-    assert il.AreIgnored("bar", "foo")
+    assert il.are_ignored("bar", "foo")
 def test_no_dupes_when_a_child_is_a_key_somewhere_else():
    il = IgnoreList()
-    il.Ignore("foo", "bar")
+    il.ignore("foo", "bar")
-    il.Ignore("bar", "baz")
+    il.ignore("bar", "baz")
-    il.Ignore("bar", "foo")
+    il.ignore("bar", "foo")
    eq_(2, len(il))
@@ -125,7 +121,7 @@ def test_iterate():
    il = IgnoreList()
    expected = [("foo", "bar"), ("bar", "baz"), ("foo", "baz")]
    for i in expected:
-        il.Ignore(i[0], i[1])
+        il.ignore(i[0], i[1])
    for i in il:
        expected.remove(i)  # No exception should be raised
    assert not expected  # expected should be empty
@@ -133,18 +129,18 @@ def test_iterate():
 def test_filter():
    il = IgnoreList()
-    il.Ignore("foo", "bar")
+    il.ignore("foo", "bar")
-    il.Ignore("bar", "baz")
+    il.ignore("bar", "baz")
-    il.Ignore("foo", "baz")
+    il.ignore("foo", "baz")
-    il.Filter(lambda f, s: f == "bar")
+    il.filter(lambda f, s: f == "bar")
    eq_(1, len(il))
-    assert not il.AreIgnored("foo", "bar")
+    assert not il.are_ignored("foo", "bar")
-    assert il.AreIgnored("bar", "baz")
+    assert il.are_ignored("bar", "baz")
 def test_save_with_non_ascii_items():
    il = IgnoreList()
-    il.Ignore("\xac", "\xbf")
+    il.ignore("\xac", "\xbf")
    f = io.BytesIO()
    try:
        il.save_to_xml(f)
@@ -155,29 +151,29 @@ def test_save_with_non_ascii_items():
 def test_len():
    il = IgnoreList()
    eq_(0, len(il))
-    il.Ignore("foo", "bar")
+    il.ignore("foo", "bar")
    eq_(1, len(il))
 def test_nonzero():
    il = IgnoreList()
    assert not il
-    il.Ignore("foo", "bar")
+    il.ignore("foo", "bar")
    assert il
 def test_remove():
    il = IgnoreList()
-    il.Ignore("foo", "bar")
+    il.ignore("foo", "bar")
-    il.Ignore("foo", "baz")
+    il.ignore("foo", "baz")
    il.remove("bar", "foo")
    eq_(len(il), 1)
-    assert not il.AreIgnored("foo", "bar")
+    assert not il.are_ignored("foo", "bar")
 def test_remove_non_existant():
    il = IgnoreList()
-    il.Ignore("foo", "bar")
+    il.ignore("foo", "bar")
-    il.Ignore("foo", "baz")
+    il.ignore("foo", "baz")
    with raises(ValueError):
        il.remove("foo", "bleh")
--- a/core/tests/results_test.py
+++ b/core/tests/results_test.py
@@ -117,9 +117,7 @@ class TestCaseResultsWithSomeGroups:
        assert d is g.ref
    def test_sort_groups(self):
-        self.results.make_ref(
+        self.results.make_ref(self.objects[1])  # We want to make the 1024 sized object to go ref.
            self.objects[1]
        )  # We want to make the 1024 sized object to go ref.
        g1, g2 = self.groups
        self.results.sort_groups("size")
        assert self.results.groups[0] is g2
@@ -129,9 +127,7 @@ class TestCaseResultsWithSomeGroups:
        assert self.results.groups[1] is g2
    def test_set_groups_when_sorted(self):
-        self.results.make_ref(
+        self.results.make_ref(self.objects[1])  # We want to make the 1024 sized object to go ref.
            self.objects[1]
        )  # We want to make the 1024 sized object to go ref.
        self.results.sort_groups("size")
        objects, matches, groups = GetTestGroups()
        g1, g2 = groups
@@ -406,7 +402,7 @@ class TestCaseResultsMarkings:
        self.results.make_ref(d)
        eq_("0 / 3 (0.00 B / 3.00 B) duplicates marked.", self.results.stat_line)
-    def test_SaveXML(self):
+    def test_save_xml(self):
        self.results.mark(self.objects[1])
        self.results.mark_invert()
        f = io.BytesIO()
@@ -423,7 +419,7 @@ class TestCaseResultsMarkings:
        eq_("n", d1.get("marked"))
        eq_("y", d2.get("marked"))
-    def test_LoadXML(self):
+    def test_load_xml(self):
        def get_file(path):
            return [f for f in self.objects if str(f.path) == path][0]
@@ -489,7 +485,7 @@ class TestCaseResultsXML:
        eq_("ibabtu", d1.get("words"))
        eq_("ibabtu", d2.get("words"))
-    def test_LoadXML(self):
+    def test_load_xml(self):
        def get_file(path):
            return [f for f in self.objects if str(f.path) == path][0]
@@ -521,7 +517,7 @@ class TestCaseResultsXML:
        eq_(["ibabtu"], g2[0].words)
        eq_(["ibabtu"], g2[1].words)
-    def test_LoadXML_with_filename(self, tmpdir):
+    def test_load_xml_with_filename(self, tmpdir):
        def get_file(path):
            return [f for f in self.objects if str(f.path) == path][0]
@@ -533,7 +529,7 @@ class TestCaseResultsXML:
        r.load_from_xml(filename, get_file)
        eq_(2, len(r.groups))
-    def test_LoadXML_with_some_files_that_dont_exist_anymore(self):
+    def test_load_xml_with_some_files_that_dont_exist_anymore(self):
        def get_file(path):
            if path.endswith("ibabtu 2"):
                return None
@@ -549,7 +545,7 @@ class TestCaseResultsXML:
        eq_(1, len(r.groups))
        eq_(3, len(r.groups[0]))
-    def test_LoadXML_missing_attributes_and_bogus_elements(self):
+    def test_load_xml_missing_attributes_and_bogus_elements(self):
        def get_file(path):
            return [f for f in self.objects if str(f.path) == path][0]
@@ -601,9 +597,7 @@ class TestCaseResultsXML:
        matches = engine.getmatches(objects)  # we should have 5 matches
        groups = engine.get_groups(matches)  # We should have 2 groups
        for g in groups:
-            g.prioritize(
+            g.prioritize(lambda x: objects.index(x))  # We want the dupes to be in the same order as the list is
                lambda x: objects.index(x)
            )  # We want the dupes to be in the same order as the list is
        app = DupeGuru()
        results = Results(app)
        results.groups = groups
@@ -807,9 +801,7 @@ class TestCaseResultsFilter:
        # Now the stats should display *2* markable dupes (instead of 1)
        expected = "0 / 2 (0.00 B / 2.00 B) duplicates marked. filter: foo"
        eq_(expected, self.results.stat_line)
-        self.results.apply_filter(
+        self.results.apply_filter(None)  # Now let's make sure our unfiltered results aren't fucked up
            None
        )  # Now let's make sure our unfiltered results aren't fucked up
        expected = "0 / 3 (0.00 B / 3.00 B) duplicates marked."
        eq_(expected, self.results.stat_line)
--- a/core/tests/scanner_test.py
+++ b/core/tests/scanner_test.py
@@ -52,10 +52,13 @@ def test_empty(fake_fileexists):
 def test_default_settings(fake_fileexists):
    s = Scanner()
    eq_(s.min_match_percentage, 80)
-    eq_(s.scan_type, ScanType.Filename)
+    eq_(s.scan_type, ScanType.FILENAME)
    eq_(s.mix_file_kind, True)
    eq_(s.word_weighting, False)
    eq_(s.match_similar_words, False)
    eq_(s.size_threshold, 0)
    eq_(s.large_size_threshold, 0)
    eq_(s.big_file_size_threshold, 0)
 def test_simple_with_default_settings(fake_fileexists):
@@ -97,7 +100,7 @@ def test_trim_all_ref_groups(fake_fileexists):
    eq_(s.discarded_file_count, 0)
-def test_priorize(fake_fileexists):
+def test_prioritize(fake_fileexists):
    s = Scanner()
    f = [
        no("foo", path="p1"),
@@ -118,11 +121,11 @@ def test_priorize(fake_fileexists):
 def test_content_scan(fake_fileexists):
    s = Scanner()
-    s.scan_type = ScanType.Contents
+    s.scan_type = ScanType.CONTENTS
    f = [no("foo"), no("bar"), no("bleh")]
-    f[0].md5 = f[0].md5partial = "foobar"
+    f[0].md5 = f[0].md5partial = f[0].md5samples = "foobar"
-    f[1].md5 = f[1].md5partial = "foobar"
+    f[1].md5 = f[1].md5partial = f[1].md5samples = "foobar"
-    f[2].md5 = f[2].md5partial = "bleh"
+    f[2].md5 = f[2].md5partial = f[1].md5samples = "bleh"
    r = s.get_dupe_groups(f)
    eq_(len(r), 1)
    eq_(len(r[0]), 2)
@@ -132,22 +135,95 @@ def test_content_scan(fake_fileexists):
 def test_content_scan_compare_sizes_first(fake_fileexists):
    class MyFile(no):
        @property
-        def md5(file):
+        def md5(self):
            raise AssertionError()
    s = Scanner()
-    s.scan_type = ScanType.Contents
+    s.scan_type = ScanType.CONTENTS
    f = [MyFile("foo", 1), MyFile("bar", 2)]
    eq_(len(s.get_dupe_groups(f)), 0)
 def test_ignore_file_size(fake_fileexists):
    s = Scanner()
    s.scan_type = ScanType.CONTENTS
    small_size = 10  # 10KB
    s.size_threshold = 0
    large_size = 100 * 1024 * 1024  # 100MB
    s.large_size_threshold = 0
    f = [
        no("smallignore1", small_size - 1),
        no("smallignore2", small_size - 1),
        no("small1", small_size),
        no("small2", small_size),
        no("large1", large_size),
        no("large2", large_size),
        no("largeignore1", large_size + 1),
        no("largeignore2", large_size + 1),
    ]
    f[0].md5 = f[0].md5partial = f[0].md5samples = "smallignore"
    f[1].md5 = f[1].md5partial = f[1].md5samples = "smallignore"
    f[2].md5 = f[2].md5partial = f[2].md5samples = "small"
    f[3].md5 = f[3].md5partial = f[3].md5samples = "small"
    f[4].md5 = f[4].md5partial = f[4].md5samples = "large"
    f[5].md5 = f[5].md5partial = f[5].md5samples = "large"
    f[6].md5 = f[6].md5partial = f[6].md5samples = "largeignore"
    f[7].md5 = f[7].md5partial = f[7].md5samples = "largeignore"
    r = s.get_dupe_groups(f)
    # No ignores
    eq_(len(r), 4)
    # Ignore smaller
    s.size_threshold = small_size
    r = s.get_dupe_groups(f)
    eq_(len(r), 3)
    # Ignore larger
    s.size_threshold = 0
    s.large_size_threshold = large_size
    r = s.get_dupe_groups(f)
    eq_(len(r), 3)
    # Ignore both
    s.size_threshold = small_size
    r = s.get_dupe_groups(f)
    eq_(len(r), 2)
 def test_big_file_partial_hashes(fake_fileexists):
    s = Scanner()
    s.scan_type = ScanType.CONTENTS
    smallsize = 1
    bigsize = 100 * 1024 * 1024  # 100MB
    s.big_file_size_threshold = bigsize
    f = [no("bigfoo", bigsize), no("bigbar", bigsize), no("smallfoo", smallsize), no("smallbar", smallsize)]
    f[0].md5 = f[0].md5partial = f[0].md5samples = "foobar"
    f[1].md5 = f[1].md5partial = f[1].md5samples = "foobar"
    f[2].md5 = f[2].md5partial = "bleh"
    f[3].md5 = f[3].md5partial = "bleh"
    r = s.get_dupe_groups(f)
    eq_(len(r), 2)
    # md5partial is still the same, but the file is actually different
    f[1].md5 = f[1].md5samples = "difffoobar"
    # here we compare the full md5s, as the user disabled the optimization
    s.big_file_size_threshold = 0
    r = s.get_dupe_groups(f)
    eq_(len(r), 1)
    # here we should compare the md5samples, and see they are different
    s.big_file_size_threshold = bigsize
    r = s.get_dupe_groups(f)
    eq_(len(r), 1)
 def test_min_match_perc_doesnt_matter_for_content_scan(fake_fileexists):
    s = Scanner()
-    s.scan_type = ScanType.Contents
+    s.scan_type = ScanType.CONTENTS
    f = [no("foo"), no("bar"), no("bleh")]
-    f[0].md5 = f[0].md5partial = "foobar"
+    f[0].md5 = f[0].md5partial = f[0].md5samples = "foobar"
-    f[1].md5 = f[1].md5partial = "foobar"
+    f[1].md5 = f[1].md5partial = f[1].md5samples = "foobar"
-    f[2].md5 = f[2].md5partial = "bleh"
+    f[2].md5 = f[2].md5partial = f[2].md5samples = "bleh"
    s.min_match_percentage = 101
    r = s.get_dupe_groups(f)
    eq_(len(r), 1)
@@ -160,15 +236,12 @@ def test_min_match_perc_doesnt_matter_for_content_scan(fake_fileexists):
 def test_content_scan_doesnt_put_md5_in_words_at_the_end(fake_fileexists):
    s = Scanner()
-    s.scan_type = ScanType.Contents
+    s.scan_type = ScanType.CONTENTS
    f = [no("foo"), no("bar")]
-    f[0].md5 = f[
+    f[0].md5 = f[0].md5partial = f[0].md5samples = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-        0
+    f[1].md5 = f[1].md5partial = f[1].md5samples = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
    ].md5partial = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
    f[1].md5 = f[
        1
    ].md5partial = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
    r = s.get_dupe_groups(f)
    # FIXME looks like we are missing something here?
    r[0]
@@ -229,7 +302,7 @@ def test_similar_words(fake_fileexists):
 def test_fields(fake_fileexists):
    s = Scanner()
-    s.scan_type = ScanType.Fields
+    s.scan_type = ScanType.FIELDS
    f = [no("The White Stripes - Little Ghost"), no("The White Stripes - Little Acorn")]
    r = s.get_dupe_groups(f)
    eq_(len(r), 0)
@@ -237,7 +310,7 @@ def test_fields(fake_fileexists):
 def test_fields_no_order(fake_fileexists):
    s = Scanner()
-    s.scan_type = ScanType.FieldsNoOrder
+    s.scan_type = ScanType.FIELDSNOORDER
    f = [no("The White Stripes - Little Ghost"), no("Little Ghost - The White Stripes")]
    r = s.get_dupe_groups(f)
    eq_(len(r), 1)
@@ -245,7 +318,7 @@ def test_fields_no_order(fake_fileexists):
 def test_tag_scan(fake_fileexists):
    s = Scanner()
-    s.scan_type = ScanType.Tag
+    s.scan_type = ScanType.TAG
    o1 = no("foo")
    o2 = no("bar")
    o1.artist = "The White Stripes"
@@ -258,7 +331,7 @@ def test_tag_scan(fake_fileexists):
 def test_tag_with_album_scan(fake_fileexists):
    s = Scanner()
-    s.scan_type = ScanType.Tag
+    s.scan_type = ScanType.TAG
    s.scanned_tags = set(["artist", "album", "title"])
    o1 = no("foo")
    o2 = no("bar")
@@ -278,7 +351,7 @@ def test_tag_with_album_scan(fake_fileexists):
 def test_that_dash_in_tags_dont_create_new_fields(fake_fileexists):
    s = Scanner()
-    s.scan_type = ScanType.Tag
+    s.scan_type = ScanType.TAG
    s.scanned_tags = set(["artist", "album", "title"])
    s.min_match_percentage = 50
    o1 = no("foo")
@@ -295,7 +368,7 @@ def test_that_dash_in_tags_dont_create_new_fields(fake_fileexists):
 def test_tag_scan_with_different_scanned(fake_fileexists):
    s = Scanner()
-    s.scan_type = ScanType.Tag
+    s.scan_type = ScanType.TAG
    s.scanned_tags = set(["track", "year"])
    o1 = no("foo")
    o2 = no("bar")
@@ -313,7 +386,7 @@ def test_tag_scan_with_different_scanned(fake_fileexists):
 def test_tag_scan_only_scans_existing_tags(fake_fileexists):
    s = Scanner()
-    s.scan_type = ScanType.Tag
+    s.scan_type = ScanType.TAG
    s.scanned_tags = set(["artist", "foo"])
    o1 = no("foo")
    o2 = no("bar")
@@ -327,7 +400,7 @@ def test_tag_scan_only_scans_existing_tags(fake_fileexists):
 def test_tag_scan_converts_to_str(fake_fileexists):
    s = Scanner()
-    s.scan_type = ScanType.Tag
+    s.scan_type = ScanType.TAG
    s.scanned_tags = set(["track"])
    o1 = no("foo")
    o2 = no("bar")
@@ -342,7 +415,7 @@ def test_tag_scan_converts_to_str(fake_fileexists):
 def test_tag_scan_non_ascii(fake_fileexists):
    s = Scanner()
-    s.scan_type = ScanType.Tag
+    s.scan_type = ScanType.TAG
    s.scanned_tags = set(["title"])
    o1 = no("foo")
    o2 = no("bar")
@@ -364,8 +437,8 @@ def test_ignore_list(fake_fileexists):
    f2.path = Path("dir2/foobar")
    f3.path = Path("dir3/foobar")
    ignore_list = IgnoreList()
-    ignore_list.Ignore(str(f1.path), str(f2.path))
+    ignore_list.ignore(str(f1.path), str(f2.path))
-    ignore_list.Ignore(str(f1.path), str(f3.path))
+    ignore_list.ignore(str(f1.path), str(f3.path))
    r = s.get_dupe_groups([f1, f2, f3], ignore_list=ignore_list)
    eq_(len(r), 1)
    g = r[0]
@@ -388,8 +461,8 @@ def test_ignore_list_checks_for_unicode(fake_fileexists):
    f2.path = Path("foo2\u00e9")
    f3.path = Path("foo3\u00e9")
    ignore_list = IgnoreList()
-    ignore_list.Ignore(str(f1.path), str(f2.path))
+    ignore_list.ignore(str(f1.path), str(f2.path))
-    ignore_list.Ignore(str(f1.path), str(f3.path))
+    ignore_list.ignore(str(f1.path), str(f3.path))
    r = s.get_dupe_groups([f1, f2, f3], ignore_list=ignore_list)
    eq_(len(r), 1)
    g = r[0]
@@ -493,7 +566,7 @@ def test_dont_group_files_that_dont_exist(tmpdir):
    # In this test, we have to delete one of the files between the get_matches() part and the
    # get_groups() part.
    s = Scanner()
-    s.scan_type = ScanType.Contents
+    s.scan_type = ScanType.CONTENTS
    p = Path(str(tmpdir))
    p["file1"].open("w").write("foo")
    p["file2"].open("w").write("foo")
@@ -512,23 +585,23 @@ def test_folder_scan_exclude_subfolder_matches(fake_fileexists):
    # when doing a Folders scan type, don't include matches for folders whose parent folder already
    # match.
    s = Scanner()
-    s.scan_type = ScanType.Folders
+    s.scan_type = ScanType.FOLDERS
    topf1 = no("top folder 1", size=42)
-    topf1.md5 = topf1.md5partial = b"some_md5_1"
+    topf1.md5 = topf1.md5partial = topf1.md5samples = b"some_md5_1"
    topf1.path = Path("/topf1")
    topf2 = no("top folder 2", size=42)
-    topf2.md5 = topf2.md5partial = b"some_md5_1"
+    topf2.md5 = topf2.md5partial = topf2.md5samples = b"some_md5_1"
    topf2.path = Path("/topf2")
    subf1 = no("sub folder 1", size=41)
-    subf1.md5 = subf1.md5partial = b"some_md5_2"
+    subf1.md5 = subf1.md5partial = subf1.md5samples = b"some_md5_2"
    subf1.path = Path("/topf1/sub")
    subf2 = no("sub folder 2", size=41)
-    subf2.md5 = subf2.md5partial = b"some_md5_2"
+    subf2.md5 = subf2.md5partial = subf2.md5samples = b"some_md5_2"
    subf2.path = Path("/topf2/sub")
    eq_(len(s.get_dupe_groups([topf1, topf2, subf1, subf2])), 1)  # only top folders
    # however, if another folder matches a subfolder, keep in in the matches
    otherf = no("other folder", size=41)
-    otherf.md5 = otherf.md5partial = b"some_md5_2"
+    otherf.md5 = otherf.md5partial = otherf.md5samples = b"some_md5_2"
    otherf.path = Path("/otherfolder")
    eq_(len(s.get_dupe_groups([topf1, topf2, subf1, subf2, otherf])), 2)
@@ -547,21 +620,21 @@ def test_dont_count_ref_files_as_discarded(fake_fileexists):
    # However, this causes problems in "discarded" counting and we make sure here that we don't
    # report discarded matches in exact duplicate scans.
    s = Scanner()
-    s.scan_type = ScanType.Contents
+    s.scan_type = ScanType.CONTENTS
    o1 = no("foo", path="p1")
    o2 = no("foo", path="p2")
    o3 = no("foo", path="p3")
-    o1.md5 = o1.md5partial = "foobar"
+    o1.md5 = o1.md5partial = o1.md5samples = "foobar"
-    o2.md5 = o2.md5partial = "foobar"
+    o2.md5 = o2.md5partial = o2.md5samples = "foobar"
-    o3.md5 = o3.md5partial = "foobar"
+    o3.md5 = o3.md5partial = o3.md5samples = "foobar"
    o1.is_ref = True
    o2.is_ref = True
    eq_(len(s.get_dupe_groups([o1, o2, o3])), 1)
    eq_(s.discarded_file_count, 0)
-def test_priorize_me(fake_fileexists):
+def test_prioritize_me(fake_fileexists):
-    # in ScannerME, bitrate goes first (right after is_ref) in priorization
+    # in ScannerME, bitrate goes first (right after is_ref) in prioritization
    s = ScannerME()
    o1, o2 = no("foo", path="p1"), no("foo", path="p2")
    o1.bitrate = 1
--- a/core/util.py
+++ b/core/util.py
@@ -5,6 +5,8 @@
 # http://www.gnu.org/licenses/gpl-3.0.html
 import time
 import sys
 import os
 from hscommon.util import format_time_decimal
@@ -58,3 +60,7 @@ def fix_surrogate_encoding(s, encoding="utf-8"):
        return s.encode(encoding, "replace").decode(encoding)
    else:
        return s
 def executable_folder():
    return os.path.dirname(os.path.abspath(sys.argv[0]))
--- a/help/changelog
+++ b/help/changelog
@@ -1,3 +1,42 @@
 === 4.2.1 (2022-03-25)
 * Default to English on unsupported system language (#976)
 * Fix image viewer zoom datatype issue (#978)
 * Fix errors from window change event (#937, #980)
 * Fix deprecation warning from SQLite
 * Enforce minimum Windows version in installer (#983)
 * Fix help path for local files
 * Drop python 3.6 support
 * VS Code project settings added, yaml validation for GitHub actions
 === 4.2.0 (2021-01-24)
 * Add Malay and Turkish
 * Add dark style for windows (#900)
 * Add caching md5 file hashes (#942)
 * Add feature to partially hash large files, with user adjustable preference (#908)
 * Add portable mode (store settings next to executable)
 * Add file association for .dupeguru files on windows
 * Add ability to pass .dupeguru file to load on startup (#902)
 * Add ability to reveal in explorer/finder (#895)
 * Switch audio tag processing from hsaudiotag to mutagen (#440)
 * Add ability to use Qt dialogs instead of native OS dialogs for some file selection operations
 * Add OS and Python details to error dialog to assist in troubleshooting
 * Add preference to ignore large files with threshold (#430)
 * Fix error on close from DetailsPanel (#857, #873)
 * Change reference background color (#894, #898)
 * Remove stripping of unicode characters when matching names (#879)
 * Fix exception when deleting in delta view (#863, #905)
 * Fix dupes only view not updating after re-prioritize results (#757, #910, #911)
 * Fix ability to drag'n'drop file/folder with certain characters in name (#897)
 * Fix window position opening partially offscreen (#653)
 * Fix TypeError is photo mode (#551)
 * Change message for when files are deleted directly (#904)
 * Add more feedback during scan (#700)
 * Add Python version check to build.py (#589)
 * General code cleanups
 * Improvements to using standardized build tooling
 * Moved CI/CD to github actions, added codeql, SonarCloud
 === 4.1.1 (2021-03-21)
 * Add Japanese 
--- a/help/de/faq.rst
+++ b/help/de/faq.rst
@@ -1,7 +1,7 @@
 Häufig gestellte Fragen
 ==========================
-.. topic:: What is |appname|?
+.. topic:: What is dupeGuru?
    .. only:: edition_se
@@ -25,7 +25,7 @@ Häufig gestellte Fragen
 .. topic:: Was sind die Demo-Einschränkungen von dupeGuru?
-    Keine, |appname| ist `Fairware <http://open.hardcoded.net/about/>`_.
+    Keine, dupeGuru ist `Fairware <http://open.hardcoded.net/about/>`_.
 .. topic:: Die Markierungsbox einer Datei, die ich löschen möchte, ist deaktiviert. Was muss ich tun?
--- a/help/de/index.rst
+++ b/help/de/index.rst
@@ -1,21 +1,13 @@
-|appname| Hilfe
+dupeGuru Hilfe
 ===============
 .. only:: edition_se
-    Dieses Dokument ist auch auf `Englisch <http://www.hardcoded.net/dupeguru/help/en/>`__ und `Französisch <http://www.hardcoded.net/dupeguru/help/fr/>`__ verfügbar.
+    Dieses Dokument ist auch auf `Englisch <http://dupeguru.voltaicideas.net/help/en/>`__ und `Französisch <http://dupeguru.voltaicideas.net/help/fr/>`__ verfügbar.
 .. only:: edition_me
    Dieses Dokument ist auch auf `Englisch <http://www.hardcoded.net/dupeguru/help/en/>`__ und `Französisch <http://www.hardcoded.net/dupeguru_me/help/fr/>`__ verfügbar.
 .. only:: edition_pe
    Dieses Dokument ist auch auf `Englisch <http://www.hardcoded.net/dupeguru/help/en/>`__ und `Französisch <http://www.hardcoded.net/dupeguru_pe/help/fr/>`__ verfügbar.
 .. only:: edition_se or edition_me
-    |appname| ist ein Tool zum Auffinden von Duplikaten auf Ihrem Computer. Es kann entweder Dateinamen oder Inhalte scannen. Der Dateiname-Scan stellt einen lockeren Suchalgorithmus zur Verfügung, der sogar Duplikate findet, die nicht den exakten selben Namen haben.
+    dupeGuru ist ein Tool zum Auffinden von Duplikaten auf Ihrem Computer. Es kann entweder Dateinamen oder Inhalte scannen. Der Dateiname-Scan stellt einen lockeren Suchalgorithmus zur Verfügung, der sogar Duplikate findet, die nicht den exakten selben Namen haben.
 .. only:: edition_pe
@@ -23,7 +15,7 @@
 Obwohl dupeGuru auch leicht ohne Dokumentation genutzt werden kann, ist es sinnvoll die Hilfe zu lesen. Wenn Sie nach einer Führung für den ersten Duplikatscan suchen, werfen Sie einen Blick auf die :doc:`Schnellstart <quick_start>` Sektion
-Es ist eine gute Idee |appname| aktuell zu halten. Sie können die neueste Version auf der `homepage`_ finden.
+Es ist eine gute Idee dupeGuru aktuell zu halten. Sie können die neueste Version auf der http://dupeguru.voltaicideas.net finden.
 Inhalte:
--- a/help/en/contribute.rst
+++ b/help/en/contribute.rst
@@ -12,7 +12,7 @@ a community around this project.
 So, whatever your skills, if you're interested in contributing to dupeGuru, please do so. Normally,
 this documentation should be enough to get you started, but if it isn't, then **please**,
-`let me know`_ because it's a problem that I'm committed to fix. If there's any situation where you'd
+open a discussion at https://github.com/arsenetar/dupeguru/discussions.  If there's any situation where you'd
 wish to contribute but some doubt you're having prevent you from going forward, please contact me.
 I'd much prefer to spend the time figuring out with you whether (and how) you can contribute than
 taking the chance of missing that opportunity.
@@ -82,10 +82,9 @@ agree on what should be added to the documentation.
 dupeGuru. For more information about how to do that, you can refer to the `translator guide`_.
 .. _been open source: https://www.hardcoded.net/articles/free-as-in-speech-fair-as-in-trade
-.. _let me know: mailto:hsoft@hardcoded.net
+.. _Source code repository: https://github.com/arsenetar/dupeguru
-.. _Source code repository: https://github.com/hsoft/dupeguru
+.. _Issue Tracker: https://github.com/arsenetar/issues
-.. _Issue Tracker: https://github.com/hsoft/dupeguru/issues
+.. _Issue labels meaning: https://github.com/arsenetar/wiki/issue-labels
 .. _Issue labels meaning: https://github.com/hsoft/dupeguru/wiki/issue-labels
 .. _Sphinx: http://sphinx-doc.org/
 .. _reST: http://en.wikipedia.org/wiki/ReStructuredText
-.. _translator guide: https://github.com/hsoft/dupeguru/wiki/Translator-Guide
+.. _translator guide: https://github.com/arsenetar/wiki/Translator-Guide
--- a/help/en/developer/hscommon/jobprogress/qt.rst
+++ b/help/en/developer/hscommon/jobprogress/qt.rst
@@ -1,12 +0,0 @@
 hscommon.jobprogress.qt
 =======================
 .. automodule:: hscommon.jobprogress.qt
    .. autosummary::
        Progress
    .. autoclass:: Progress
        :members:
--- a/help/en/faq.rst
+++ b/help/en/faq.rst
@@ -151,8 +151,6 @@ delete files" option that is offered to you when you activate Send to Trash. Thi
 files to the Trash, but delete them immediately. In some cases, for example on network storage
 (NAS), this has been known to work when normal deletion didn't.
 If this fail, `HS forums`_ might be of some help.
 Why is Picture mode's contents scan so slow?
 --------------------------------------------
@@ -178,7 +176,6 @@ Preferences are stored elsewhere:
 * Linux: ``~/.config/Hardcoded Software/dupeGuru.conf``
 * Mac OS X: In the built-in ``defaults`` system, as ``com.hardcoded-software.dupeguru``
-.. _HS forums: https://forum.hardcoded.net/
+.. _Github: https://github.com/arsenetar/dupeguru
-.. _Github: https://github.com/hsoft/dupeguru
+.. _open an issue: https://github.com/arsenetar/dupeguru/wiki/issue-labels
 .. _open an issue: https://github.com/hsoft/dupeguru/wiki/issue-labels
--- a/help/en/index.rst
+++ b/help/en/index.rst
@@ -3,11 +3,11 @@ dupeGuru help
 This help document is also available in these languages:
-* `French <http://www.hardcoded.net/dupeguru/help/fr>`__
+* `French <http://dupeguru.voltaicideas.net/help/fr>`__
-* `German <http://www.hardcoded.net/dupeguru/help/de>`__
+* `German <http://dupeguru.voltaicideas.net/help/de>`__
-* `Armenian <http://www.hardcoded.net/dupeguru/help/hy>`__
+* `Armenian <http://dupeguru.voltaicideas.net/help/hy>`__
-* `Russian <http://www.hardcoded.net/dupeguru/help/ru>`__
+* `Russian <http://dupeguru.voltaicideas.net/help/ru>`__
-* `Ukrainian <http://www.hardcoded.net/dupeguru/help/uk>`__
+* `Ukrainian <http://dupeguru.voltaicideas.net/help/uk>`__
 dupeGuru is a tool to find duplicate files on your computer. It has three
 modes, Standard, Music and Picture, with each mode having its own scan types
@@ -42,4 +42,4 @@ Indices and tables
 * :ref:`genindex`
 * :ref:`search`
-.. _homepage: https://www.hardcoded.net/dupeguru
+.. _homepage: https://dupeguru.voltaicideas.net/
--- a/help/fr/faq.rst
+++ b/help/fr/faq.rst
@@ -3,7 +3,7 @@ Foire aux questions
 .. contents::
-Qu'est-ce que |appname|?
+Qu'est-ce que dupeGuru?
 ------------------------
 .. only:: edition_se
--- a/help/fr/index.rst
+++ b/help/fr/index.rst
@@ -1,21 +1,13 @@
-Aide |appname|
+Aide dupeGuru
 ===============
 .. only:: edition_se
-    Ce document est aussi disponible en `anglais <http://www.hardcoded.net/dupeguru/help/en/>`__, en `allemand <http://www.hardcoded.net/dupeguru/help/de/>`__ et en `arménien <http://www.hardcoded.net/dupeguru/help/hy/>`__.
+    Ce document est aussi disponible en `anglais <http://dupeguru.voltaicideas.net/help/en/>`__, en `allemand <http://dupeguru.voltaicideas.net/help/de/>`__ et en `arménien <http://dupeguru.voltaicideas.net/help/hy/>`__.
 .. only:: edition_me
    Ce document est aussi disponible en `anglais <http://www.hardcoded.net/dupeguru_me/help/en/>`__, en `allemand <http://www.hardcoded.net/dupeguru_me/help/de/>`__ et en `arménien <http://www.hardcoded.net/dupeguru_me/help/hy/>`__.
 .. only:: edition_pe
    Ce document est aussi disponible en `anglais <http://www.hardcoded.net/dupeguru_pe/help/en/>`__, en `allemand <http://www.hardcoded.net/dupeguru_pe/help/de/>`__ et en `arménien <http://www.hardcoded.net/dupeguru_pe/help/hy/>`__.
 .. only:: edition_se or edition_me
-    |appname| est un outil pour trouver des doublons parmi vos fichiers. Il peut comparer soit les noms de fichiers, soit le contenu. Le comparateur de nom de fichier peut trouver des doublons même si les noms ne sont pas exactement pareils.
+    dupeGuru est un outil pour trouver des doublons parmi vos fichiers. Il peut comparer soit les noms de fichiers, soit le contenu. Le comparateur de nom de fichier peut trouver des doublons même si les noms ne sont pas exactement pareils.
 .. only:: edition_pe
@@ -23,7 +15,7 @@ Aide |appname|
 Bien que dupeGuru puisse être utilisé sans lire l'aide, une telle lecture vous permettra de bien comprendre comment l'application fonctionne. Pour un guide rapide pour une première utilisation, référez vous à la section :doc:`Démarrage Rapide <quick_start>`.
-C'est toujours une bonne idée de garder |appname| à jour. Vous pouvez télécharger la dernière version sur sa `page web`_.
+C'est toujours une bonne idée de garder dupeGuru à jour. Vous pouvez télécharger la dernière version sur sa http://dupeguru.voltaicideas.net.
 Contents:
--- a/help/hy/faq.rst
+++ b/help/hy/faq.rst
@@ -1,7 +1,7 @@
 Հաճախ Տրվող Հարցեր
 ==========================
-.. topic:: Ի՞նչ է |appname|-ը:
+.. topic:: Ի՞նչ է dupeGuru-ը:
    .. only:: edition_se
--- a/help/hy/index.rst
+++ b/help/hy/index.rst
@@ -1,21 +1,13 @@
-|appname| help
+dupeGuru help
 ===============
 .. only:: edition_se
-    Այս փաստաթուղթը հասանելի է նաև՝ `Ֆրանսերեն <http://www.hardcoded.net/dupeguru/help/fr/>`__ և `Գերմաներեն <http://www.hardcoded.net/dupeguru/help/de/>`__.
+    Այս փաստաթուղթը հասանելի է նաև՝ `Ֆրանսերեն <http://dupeguru.voltaicideas.net/help/fr/>`__ և `Գերմաներեն <http://dupeguru.voltaicideas.net/help/de/>`__.
 .. only:: edition_me
    Այս փաստաթուղթը հասանելի է նաև՝ `Ֆրանսերեն <http://www.hardcoded.net/dupeguru_me/help/fr/>`__ և `Գերմաներեն <http://www.hardcoded.net/dupeguru_me/help/de/>`__.
 .. only:: edition_pe
    Այս փաստաթուղթը հասանելի է նաև՝ `Ֆրանսերեն <http://www.hardcoded.net/dupeguru_pe/help/fr/>`__ և `Գերմաներեն <http://www.hardcoded.net/dupeguru_pe/help/de/>`__.
 .. only:: edition_se or edition_me
-    |appname| ծրագիր է՝ գտնելու կրկնօրինակ ունեցող ֆայլեր Ձեր համակարգչում: Այն կարող է անգամ ստուգել ֆայլի անունները կան բովանդակությունը: Ֆայլի անվան ստուգման հնարավորությունները ոչ ճշգրիտ համընկման ալգորիթմով, որը կարող է գտնել ֆայլի անվան կրկնօրինակներ, անգամ եթե դրանք նույնը չեն:
+    dupeGuru ծրագիր է՝ գտնելու կրկնօրինակ ունեցող ֆայլեր Ձեր համակարգչում: Այն կարող է անգամ ստուգել ֆայլի անունները կան բովանդակությունը: Ֆայլի անվան ստուգման հնարավորությունները ոչ ճշգրիտ համընկման ալգորիթմով, որը կարող է գտնել ֆայլի անվան կրկնօրինակներ, անգամ եթե դրանք նույնը չեն:
 .. only:: edition_pe
@@ -23,7 +15,7 @@
 Չնայած dupeGuru-ն կարող է հեշտությամբ օգտագործվել առանց օգնության, այնուհանդերձ եթե կարդաք այս ֆայլը, այն մեծապես կօգնի Ձեզ ընկալելու ծրագրի աշխատանքը: Եթե Դուք նայում եք ձեռնարկը կրկնօրինակների առաջին ստուգման համար, ապա կարող եք ընտրել :doc:`Արագ Սկիզբ <quick_start>` հատվածը:
-Շատ լավ միտք է պահելու |appname| թարմացված: Կարող եք բեռնել վեբ կայքի համապատասխան էջից `homepage`_:
+Շատ լավ միտք է պահելու dupeGuru թարմացված: Կարող եք բեռնել վեբ կայքի համապատասխան էջից http://dupeguru.voltaicideas.net:
 Պարունակությունը.
--- a/help/ru/faq.rst
+++ b/help/ru/faq.rst
@@ -1,7 +1,7 @@
 Часто задаваемые вопросы
 ==========================
-.. topic:: Что такое |appname|?
+.. topic:: Что такое dupeGuru?
    .. only:: edition_se
--- a/help/ru/index.rst
+++ b/help/ru/index.rst
@@ -1,21 +1,11 @@
-|appname| help
+dupeGuru help
 ===============
-.. only:: edition_se
+Этот документ также доступна на `французском <http://dupeguru.voltaicideas.net/help/fr/>`__, `немецком <http://dupeguru.voltaicideas.net/help/de/>`__ и `армянский <http://dupeguru.voltaicideas.net/help/hy/>`__.
    Этот документ также доступна на `французском <http://www.hardcoded.net/dupeguru/help/fr/>`__, `немецком <http://www.hardcoded.net/dupeguru/help/de/>`__ и `армянский <http://www.hardcoded.net/dupeguru/help/hy/>`__.
 .. only:: edition_me
    Этот документ также доступна на `французском <http://www.hardcoded.net/dupeguru_me/help/fr/>`__, `немецкий <http://www.hardcoded.net/dupeguru_me/help/de/>`__ и `армянский <http://www.hardcoded.net/dupeguru_me/help/hy/>`__.
 .. only:: edition_pe
    Этот документ также доступна на `французском <http://www.hardcoded.net/dupeguru_pe/help/fr/>`__, `немецкий <http://www.hardcoded.net/dupeguru_pe/help/de/>`__ и `армянский <http://www.hardcoded.net/dupeguru_pe/help/hy/>`__.
 .. only:: edition_se or edition_me
-    |appname| есть инструмент для поиска дубликатов файлов на вашем компьютере. Он может сканировать либо имен файлов или содержимого.Имя файла функций сканирования нечеткого соответствия алгоритма, который позволяет найти одинаковые имена файлов, даже если они не совсем то же самое.
+    dupeGuru есть инструмент для поиска дубликатов файлов на вашем компьютере. Он может сканировать либо имен файлов или содержимого.Имя файла функций сканирования нечеткого соответствия алгоритма, который позволяет найти одинаковые имена файлов, даже если они не совсем то же самое.
 .. only:: edition_pe
@@ -23,7 +13,7 @@
 Хотя dupeGuru может быть легко использована без документации, чтение этого файла поможет вам освоить его. Если вы ищете руководство для вашей первой дублировать сканирования, вы можете взглянуть на раздел :doc:`Быстрый <quick_start>` Начало.
-Это хорошая идея, чтобы сохранить |appname| обновлен. Вы можете скачать последнюю версию на своей `homepage`_.
+Это хорошая идея, чтобы сохранить dupeGuru обновлен. Вы можете скачать последнюю версию на своей http://dupeguru.voltaicideas.net.
 Содержание:
 .. toctree::
--- a/help/uk/faq.rst
+++ b/help/uk/faq.rst
@@ -1,7 +1,7 @@
 Часті питання
 ==========================
-.. topic:: Що таке |appname|?
+.. topic:: Що таке dupeGuru?
    .. only:: edition_se
--- a/help/uk/index.rst
+++ b/help/uk/index.rst
@@ -1,21 +1,13 @@
-|appname| help
+dupeGuru help
 ===============
 .. only:: edition_se
-    Цей документ також доступна на `французькому <http://www.hardcoded.net/dupeguru/help/fr/>`__, `німецький <http://www.hardcoded.net/dupeguru/help/de/>`__ і `Вірменський <http://www.hardcoded.net/dupeguru/help/hy/>`__.
+    Цей документ також доступна на `французькому <http://dupeguru.voltaicideas.net/help/fr/>`__, `німецький <http://dupeguru.voltaicideas.net/help/de/>`__ і `Вірменський <http://dupeguru.voltaicideas.net/help/hy/>`__.
 .. only:: edition_me
    Цей документ також доступна на `французькому  <http://www.hardcoded.net/dupeguru_me/help/fr/>`__, `німецький <http://www.hardcoded.net/dupeguru_me/help/de/>`__ і `Вірменський <http://www.hardcoded.net/dupeguru_me/help/hy/>`__.
 .. only:: edition_pe
    Цей документ також доступна на `французькому <http://www.hardcoded.net/dupeguru_pe/help/fr/>`__, `німецький <http://www.hardcoded.net/dupeguru_pe/help/de/>`__ і `Вірменський <http://www.hardcoded.net/dupeguru_pe/help/hy/>`__.
 .. only:: edition_se or edition_me
-    |appname| це інструмент для пошуку дублікатів файлів на вашому комп'ютері. Він може сканувати або імен файлів або вмісту. Файл функцій сканування нечіткого відповідності алгоритму, який дозволяє знайти однакові імена файлів, навіть якщо вони не зовсім те ж саме.
+    dupeGuru це інструмент для пошуку дублікатів файлів на вашому комп'ютері. Він може сканувати або імен файлів або вмісту. Файл функцій сканування нечіткого відповідності алгоритму, який дозволяє знайти однакові імена файлів, навіть якщо вони не зовсім те ж саме.
 .. only:: edition_pe
@@ -23,7 +15,7 @@
 Хоча dupeGuru може бути легко використана без документації, читання цього файлу допоможе вам освоїти його. Якщо ви шукаєте керівництво для вашої першої дублювати сканування, ви можете поглянути на: :doc:`Quick Start <quick_start>` 
-Це гарна ідея, щоб зберегти |appname| оновлено. Ви можете завантажити останню версію на своєму `homepage`_.
+Це гарна ідея, щоб зберегти dupeGuru оновлено. Ви можете завантажити останню версію на своєму http://dupeguru.voltaicideas.net.
 Contents:
--- a/hscommon/build.py
+++ b/hscommon/build.py
@@ -30,8 +30,7 @@ from .util import ensure_folder, delete_files_with_pattern
 def print_and_do(cmd):
-    """Prints ``cmd`` and executes it in the shell.
+    """Prints ``cmd`` and executes it in the shell."""
    """
    print(cmd)
    p = Popen(cmd, shell=True)
    return p.wait()
@@ -91,16 +90,14 @@ def copy_all(pattern, dst):
 def ensure_empty_folder(path):
-    """Make sure that the path exists and that it's an empty folder.
+    """Make sure that the path exists and that it's an empty folder."""
    """
    if op.exists(path):
        shutil.rmtree(path)
    os.mkdir(path)
 def filereplace(filename, outfilename=None, **kwargs):
-    """Reads `filename`, replaces all {variables} in kwargs, and writes the result to `outfilename`.
+    """Reads `filename`, replaces all {variables} in kwargs, and writes the result to `outfilename`."""
    """
    if outfilename is None:
        outfilename = filename
    fp = open(filename, "rt", encoding="utf-8")
@@ -152,9 +149,7 @@ def package_cocoa_app_in_dmg(app_path, destfolder, args):
    # a valid signature.
    if args.sign_identity:
        sign_identity = "Developer ID Application: {}".format(args.sign_identity)
-        result = print_and_do(
+        result = print_and_do('codesign --force --deep --sign "{}" "{}"'.format(sign_identity, app_path))
            'codesign --force --deep --sign "{}" "{}"'.format(sign_identity, app_path)
        )
        if result != 0:
            print("ERROR: Signing failed. Aborting packaging.")
            return
@@ -182,10 +177,7 @@ def build_dmg(app_path, destfolder):
    )
    print("Building %s" % dmgname)
    # UDBZ = bzip compression. UDZO (zip compression) was used before, but it compresses much less.
-    print_and_do(
+    print_and_do('hdiutil create "%s" -format UDBZ -nocrossdev -srcdir "%s"' % (op.join(destfolder, dmgname), dmgpath))
        'hdiutil create "%s" -format UDBZ -nocrossdev -srcdir "%s"'
        % (op.join(destfolder, dmgname), dmgpath)
    )
    print("Build Complete")
@@ -207,8 +199,7 @@ sysconfig.get_config_h_filename = lambda: op.join(op.dirname(__file__), 'pyconfi
 def add_to_pythonpath(path):
-    """Adds ``path`` to both ``PYTHONPATH`` env and ``sys.path``.
+    """Adds ``path`` to both ``PYTHONPATH`` env and ``sys.path``."""
    """
    abspath = op.abspath(path)
    pythonpath = os.environ.get("PYTHONPATH", "")
    pathsep = ";" if ISWINDOWS else ":"
@@ -231,9 +222,7 @@ def copy_packages(packages_names, dest, create_links=False, extra_ignores=None):
        create_links = False
    if not extra_ignores:
        extra_ignores = []
-    ignore = shutil.ignore_patterns(
+    ignore = shutil.ignore_patterns(".hg*", "tests", "testdata", "modules", "docs", "locale", *extra_ignores)
        ".hg*", "tests", "testdata", "modules", "docs", "locale", *extra_ignores
    )
    for package_name in packages_names:
        if op.exists(package_name):
            source_path = package_name
@@ -347,7 +336,6 @@ def read_changelog_file(filename):
    with open(filename, "rt", encoding="utf-8") as fp:
        contents = fp.read()
    splitted = re_changelog_header.split(contents)[1:]  # the first item is empty
    # splitted = [version1, date1, desc1, version2, date2, ...]
    result = []
    for version, date_str, description in iter_by_three(iter(splitted)):
        date = datetime.strptime(date_str, "%Y-%m-%d").date()
@@ -410,8 +398,8 @@ def create_osx_app_structure(
    # `resources`: A list of paths of files or folders going in the "Resources" folder.
    # `frameworks`: Same as above for "Frameworks".
    # `symlink_resources`: If True, will symlink resources into the structure instead of copying them.
-    app = OSXAppStructure(dest, infoplist)
+    app = OSXAppStructure(dest)
-    app.create()
+    app.create(infoplist)
    app.copy_executable(executable)
    app.copy_resources(*resources, use_symlinks=symlink_resources)
    app.copy_frameworks(*frameworks)
@@ -444,11 +432,10 @@ class OSXFrameworkStructure:
    def create_symlinks(self):
        # Only call this after create() and copy_executable()
        rel = lambda path: op.relpath(path, self.dest)
        os.symlink("A", op.join(self.dest, "Versions", "Current"))
-        os.symlink(rel(self.executablepath), op.join(self.dest, self.executablename))
+        os.symlink(op.relpath(self.executablepath, self.dest), op.join(self.dest, self.executablename))
-        os.symlink(rel(self.headers), op.join(self.dest, "Headers"))
+        os.symlink(op.relpath(self.headers, self.dest), op.join(self.dest, "Headers"))
-        os.symlink(rel(self.resources), op.join(self.dest, "Resources"))
+        os.symlink(op.relpath(self.resources, self.dest), op.join(self.dest, "Resources"))
    def copy_executable(self, executable):
        copy(executable, self.executablepath)
@@ -481,9 +468,7 @@ def copy_embeddable_python_dylib(dst):
 def collect_stdlib_dependencies(script, dest_folder, extra_deps=None):
    sysprefix = sys.prefix  # could be a virtualenv
    basesysprefix = sys.base_prefix  # seems to be path to non-virtual sys
-    real_lib_prefix = sysconfig.get_config_var(
+    real_lib_prefix = sysconfig.get_config_var("LIBDEST")  # leaving this in case it is neede
        "LIBDEST"
    )  # leaving this in case it is neede
    def is_stdlib_path(path):
        # A module path is only a stdlib path if it's in either sys.prefix or
@@ -493,11 +478,7 @@ def collect_stdlib_dependencies(script, dest_folder, extra_deps=None):
            return False
        if "site-package" in path:
            return False
-        if not (
+        if not (path.startswith(sysprefix) or path.startswith(basesysprefix) or path.startswith(real_lib_prefix)):
            path.startswith(sysprefix)
            or path.startswith(basesysprefix)
            or path.startswith(real_lib_prefix)
        ):
            return False
        return True
@@ -511,9 +492,7 @@ def collect_stdlib_dependencies(script, dest_folder, extra_deps=None):
            relpath = op.relpath(p, real_lib_prefix)
        elif p.startswith(sysprefix):
            relpath = op.relpath(p, sysprefix)
-            assert relpath.startswith(
+            assert relpath.startswith("lib/python3.")  # we want to get rid of that lib/python3.x part
                "lib/python3."
            )  # we want to get rid of that lib/python3.x part
            relpath = relpath[len("lib/python3.X/") :]
        elif p.startswith(basesysprefix):
            relpath = op.relpath(p, basesysprefix)
@@ -521,9 +500,7 @@ def collect_stdlib_dependencies(script, dest_folder, extra_deps=None):
            relpath = relpath[len("lib/python3.X/") :]
        else:
            raise AssertionError()
-        if relpath.startswith(
+        if relpath.startswith("lib-dynload"):  # We copy .so files in lib-dynload directly in our dest
            "lib-dynload"
        ):  # We copy .so files in lib-dynload directly in our dest
            relpath = relpath[len("lib-dynload/") :]
        if relpath.startswith("encodings") or relpath.startswith("distutils"):
            # We force their inclusion later.
@@ -562,9 +539,7 @@ def fix_qt_resource_file(path):
        fp.write(b"\n".join(lines))
-def build_cocoa_ext(
+def build_cocoa_ext(extname, dest, source_files, extra_frameworks=(), extra_includes=()):
    extname, dest, source_files, extra_frameworks=(), extra_includes=()
 ):
    extra_link_args = ["-framework", "CoreFoundation", "-framework", "Foundation"]
    for extra in extra_frameworks:
        extra_link_args += ["-framework", extra]
--- a/hscommon/build_ext.py
+++ b/hscommon/build_ext.py
@@ -11,9 +11,7 @@ from setuptools import setup, Extension
 def get_parser():
    parser = argparse.ArgumentParser(description="Build an arbitrary Python extension.")
-    parser.add_argument(
+    parser.add_argument("source_files", nargs="+", help="List of source files to compile")
        "source_files", nargs="+", help="List of source files to compile"
    )
    parser.add_argument("name", nargs=1, help="Name of the resulting extension")
    return parser
@@ -23,7 +21,8 @@ def main():
    print("Building {}...".format(args.name[0]))
    ext = Extension(args.name[0], args.source_files)
    setup(
-        script_args=["build_ext", "--inplace"], ext_modules=[ext],
+        script_args=["build_ext", "--inplace"],
        ext_modules=[ext],
    )
--- a/hscommon/conflict.py
+++ b/hscommon/conflict.py
@@ -48,15 +48,13 @@ def get_unconflicted_name(name):
 def is_conflicted(name):
-    """Returns whether ``name`` is prepended with a bracketed number.
+    """Returns whether ``name`` is prepended with a bracketed number."""
    """
    return re_conflict.match(name) is not None
@pathify
 def _smart_move_or_copy(operation, source_path: Path, dest_path: Path):
-    """Use move() or copy() to move and copy file with the conflict management.
+    """Use move() or copy() to move and copy file with the conflict management."""
    """
    if dest_path.isdir() and not source_path.isdir():
        dest_path = dest_path[source_path.name]
    if dest_path.exists():
@@ -68,14 +66,12 @@ def _smart_move_or_copy(operation, source_path: Path, dest_path: Path):
 def smart_move(source_path, dest_path):
-    """Same as :func:`smart_copy`, but it moves files instead.
+    """Same as :func:`smart_copy`, but it moves files instead."""
    """
    _smart_move_or_copy(shutil.move, source_path, dest_path)
 def smart_copy(source_path, dest_path):
-    """Copies ``source_path`` to ``dest_path``, recursively and with conflict resolution.
+    """Copies ``source_path`` to ``dest_path``, recursively and with conflict resolution."""
    """
    try:
        _smart_move_or_copy(shutil.copy, source_path, dest_path)
    except IOError as e:
--- a/hscommon/debug.py
+++ b/hscommon/debug.py
@@ -13,8 +13,8 @@ import traceback
 # Taken from http://bzimmer.ziclix.com/2008/12/17/python-thread-dumps/
 def stacktraces():
    code = []
-    for threadId, stack in sys._current_frames().items():
+    for thread_id, stack in sys._current_frames().items():
-        code.append("\n# ThreadID: %s" % threadId)
+        code.append("\n# ThreadID: %s" % thread_id)
        for filename, lineno, name, line in traceback.extract_stack(stack):
            code.append('File: "%s", line %d, in %s' % (filename, lineno, name))
            if line:
--- a/hscommon/desktop.py
+++ b/hscommon/desktop.py
@@ -11,29 +11,26 @@ import logging
 class SpecialFolder:
-    AppData = 1
+    APPDATA = 1
-    Cache = 2
+    CACHE = 2
 def open_url(url):
-    """Open ``url`` with the default browser.
+    """Open ``url`` with the default browser."""
    """
    _open_url(url)
 def open_path(path):
-    """Open ``path`` with its associated application.
+    """Open ``path`` with its associated application."""
    """
    _open_path(str(path))
 def reveal_path(path):
-    """Open the folder containing ``path`` with the default file browser.
+    """Open the folder containing ``path`` with the default file browser."""
    """
    _reveal_path(str(path))
-def special_folder_path(special_folder, appname=None):
+def special_folder_path(special_folder, appname=None, portable=False):
    """Returns the path of ``special_folder``.
    ``special_folder`` is a SpecialFolder.* const. The result is the special folder for the current
@@ -41,7 +38,7 @@ def special_folder_path(special_folder, appname=None):
    You can override the application name with ``appname``. This argument is ingored under Qt.
    """
-    return _special_folder_path(special_folder, appname)
+    return _special_folder_path(special_folder, appname, portable=portable)
 try:
@@ -57,8 +54,8 @@ try:
    _open_path = proxy.openPath_
    _reveal_path = proxy.revealPath_
-    def _special_folder_path(special_folder, appname=None):
+    def _special_folder_path(special_folder, appname=None, portable=False):
-        if special_folder == SpecialFolder.Cache:
+        if special_folder == SpecialFolder.CACHE:
            base = proxy.getCachePath()
        else:
            base = proxy.getAppdataPath()
@@ -66,11 +63,14 @@ try:
            appname = proxy.bundleInfo_("CFBundleName")
        return op.join(base, appname)
 except ImportError:
    try:
        from PyQt5.QtCore import QUrl, QStandardPaths
        from PyQt5.QtGui import QDesktopServices
        from qtlib.util import get_appdata
        from core.util import executable_folder
        from hscommon.plat import ISWINDOWS, ISOSX
        import subprocess
        def _open_url(url):
            QDesktopServices.openUrl(QUrl(url))
@@ -80,14 +80,22 @@ except ImportError:
            QDesktopServices.openUrl(url)
        def _reveal_path(path):
            if ISWINDOWS:
                subprocess.run(["explorer", "/select,", op.abspath(path)])
            elif ISOSX:
                subprocess.run(["open", "-R", op.abspath(path)])
            else:
                _open_path(op.dirname(str(path)))
-        def _special_folder_path(special_folder, appname=None):
+        def _special_folder_path(special_folder, appname=None, portable=False):
-            if special_folder == SpecialFolder.Cache:
+            if special_folder == SpecialFolder.CACHE:
-                qtfolder = QStandardPaths.CacheLocation
+                if ISWINDOWS and portable:
                    folder = op.join(executable_folder(), "cache")
                else:
-                qtfolder = QStandardPaths.DataLocation
+                    folder = QStandardPaths.standardLocations(QStandardPaths.CacheLocation)[0]
-            return QStandardPaths.standardLocations(qtfolder)[0]
+            else:
                folder = get_appdata(portable)
            return folder
    except ImportError:
        # We're either running tests, and these functions don't matter much or we're in a really
@@ -95,10 +103,12 @@ except ImportError:
        logging.warning("Can't setup desktop functions!")
        def _open_path(path):
            # Dummy for tests
            pass
        def _reveal_path(path):
            # Dummy for tests
            pass
-        def _special_folder_path(special_folder, appname=None):
+        def _special_folder_path(special_folder, appname=None, portable=False):
            return "/tmp"
--- a/hscommon/geometry.py
+++ b/hscommon/geometry.py
@@ -149,8 +149,7 @@ class Rect:
        return l1, l2, l3, l4
    def scaled_rect(self, dx, dy):
-        """Returns a rect that has the same borders at self, but grown/shrunk by dx/dy on each side.
+        """Returns a rect that has the same borders at self, but grown/shrunk by dx/dy on each side."""
        """
        x, y, w, h = self
        x -= dx
        y -= dy
@@ -159,8 +158,7 @@ class Rect:
        return Rect(x, y, w, h)
    def united(self, other):
-        """Returns the bounding rectangle of this rectangle and `other`.
+        """Returns the bounding rectangle of this rectangle and `other`."""
        """
        # ul=upper left lr=lower right
        ulcorner1, lrcorner1 = self.corners()
        ulcorner2, lrcorner2 = other.corners()
--- a/hscommon/gui/column.py
+++ b/hscommon/gui/column.py
@@ -80,8 +80,7 @@ class PrefAccessInterface:
        """
    def set_default(self, key, value):
-        """Set the value ``value`` for ``key`` in the currently running app's preference store.
+        """Set the value ``value`` for ``key`` in the currently running app's preference store."""
        """
 class Columns(GUIObject):
@@ -140,33 +139,27 @@ class Columns(GUIObject):
    # --- Public
    def column_by_index(self, index):
-        """Return the :class:`Column` having the :attr:`~Column.logical_index` ``index``.
+        """Return the :class:`Column` having the :attr:`~Column.logical_index` ``index``."""
        """
        return self.column_list[index]
    def column_by_name(self, name):
-        """Return the :class:`Column` having the :attr:`~Column.name` ``name``.
+        """Return the :class:`Column` having the :attr:`~Column.name` ``name``."""
        """
        return self.coldata[name]
    def columns_count(self):
-        """Returns the number of columns in our set.
+        """Returns the number of columns in our set."""
        """
        return len(self.column_list)
    def column_display(self, colname):
-        """Returns display name for column named ``colname``, or ``''`` if there's none.
+        """Returns display name for column named ``colname``, or ``''`` if there's none."""
        """
        return self._get_colname_attr(colname, "display", "")
    def column_is_visible(self, colname):
-        """Returns visibility for column named ``colname``, or ``True`` if there's none.
+        """Returns visibility for column named ``colname``, or ``True`` if there's none."""
        """
        return self._get_colname_attr(colname, "visible", True)
    def column_width(self, colname):
-        """Returns width for column named ``colname``, or ``0`` if there's none.
+        """Returns width for column named ``colname``, or ``0`` if there's none."""
        """
        return self._get_colname_attr(colname, "width", 0)
    def columns_to_right(self, colname):
@@ -177,11 +170,7 @@ class Columns(GUIObject):
        """
        column = self.coldata[colname]
        index = column.ordered_index
-        return [
+        return [col.name for col in self.column_list if (col.visible and col.ordered_index > index)]
            col.name
            for col in self.column_list
            if (col.visible and col.ordered_index > index)
        ]
    def menu_items(self):
        """Returns a list of items convenient for quick visibility menu generation.
@@ -207,8 +196,7 @@ class Columns(GUIObject):
        self.set_column_order(colnames)
    def reset_to_defaults(self):
-        """Reset all columns' width and visibility to their default values.
+        """Reset all columns' width and visibility to their default values."""
        """
        self.set_column_order([col.name for col in self.column_list])
        for col in self._optional_columns():
            col.visible = col.default_visible
@@ -216,13 +204,11 @@ class Columns(GUIObject):
        self.view.restore_columns()
    def resize_column(self, colname, newwidth):
-        """Set column ``colname``'s width to ``newwidth``.
+        """Set column ``colname``'s width to ``newwidth``."""
        """
        self._set_colname_attr(colname, "width", newwidth)
    def restore_columns(self):
-        """Restore's column persistent attributes from the last :meth:`save_columns`.
+        """Restore's column persistent attributes from the last :meth:`save_columns`."""
        """
        if not (self.prefaccess and self.savename and self.coldata):
            if (not self.savename) and (self.coldata):
                # This is a table that will not have its coldata saved/restored. we should
@@ -241,8 +227,7 @@ class Columns(GUIObject):
        self.view.restore_columns()
    def save_columns(self):
-        """Save column attributes in persistent storage for restoration in :meth:`restore_columns`.
+        """Save column attributes in persistent storage for restoration in :meth:`restore_columns`."""
        """
        if not (self.prefaccess and self.savename and self.coldata):
            return
        for col in self.column_list:
@@ -263,15 +248,13 @@ class Columns(GUIObject):
            col.ordered_index = i
    def set_column_visible(self, colname, visible):
-        """Set the visibility of column ``colname``.
+        """Set the visibility of column ``colname``."""
        """
        self.table.save_edits()  # the table on the GUI side will stop editing when the columns change
        self._set_colname_attr(colname, "visible", visible)
        self.view.set_column_visible(colname, visible)
    def set_default_width(self, colname, width):
-        """Set the default width or column ``colname``.
+        """Set the default width or column ``colname``."""
        """
        self._set_colname_attr(colname, "default_width", width)
    def toggle_menu_item(self, index):
@@ -289,14 +272,10 @@ class Columns(GUIObject):
    # --- Properties
    @property
    def ordered_columns(self):
-        """List of :class:`Column` in visible order.
+        """List of :class:`Column` in visible order."""
-        """
+        return [col for col in sorted(self.column_list, key=lambda col: col.ordered_index)]
        return [
            col for col in sorted(self.column_list, key=lambda col: col.ordered_index)
        ]
    @property
    def colnames(self):
-        """List of column names in visible order.
+        """List of column names in visible order."""
        """
        return [col.name for col in self.ordered_columns]
--- a/hscommon/gui/progress_window.py
+++ b/hscommon/gui/progress_window.py
@@ -21,12 +21,10 @@ class ProgressWindowView:
    """
    def show(self):
-        """Show the dialog.
+        """Show the dialog."""
        """
    def close(self):
-        """Close the dialog.
+        """Close the dialog."""
        """
    def set_progress(self, progress):
        """Set the progress of the progress bar to ``progress``.
@@ -76,8 +74,7 @@ class ProgressWindow(GUIObject, ThreadedJobPerformer):
        self.jobid = None
    def cancel(self):
-        """Call for a user-initiated job cancellation.
+        """Call for a user-initiated job cancellation."""
        """
        # The UI is sometimes a bit buggy and calls cancel() on self.view.close(). We just want to
        # make sure that this doesn't lead us to think that the user acually cancelled the task, so
        # we verify that the job is still running.
--- a/hscommon/gui/selectable_list.py
+++ b/hscommon/gui/selectable_list.py
@@ -27,9 +27,7 @@ class Selectable(Sequence):
            self._selected_indexes = []
        if not self._selected_indexes:
            return
-        self._selected_indexes = [
+        self._selected_indexes = [index for index in self._selected_indexes if index < len(self)]
            index for index in self._selected_indexes if index < len(self)
        ]
        if not self._selected_indexes:
            self._selected_indexes = [len(self) - 1]
--- a/hscommon/gui/table.py
+++ b/hscommon/gui/table.py
@@ -97,8 +97,7 @@ class Table(MutableSequence, Selectable):
            self._rows.pop(0)
        if self._footer is not None:
            self._rows.pop()
-        key = lambda row: row.sort_key_for_column(column_name)
+        self._rows.sort(key=lambda row: row.sort_key_for_column(column_name), reverse=desc)
        self._rows.sort(key=key, reverse=desc)
        if self._header is not None:
            self._rows.insert(0, self._header)
        if self._footer is not None:
@@ -277,8 +276,7 @@ class GUITable(Table, GUIObject):
        raise NotImplementedError()
    def _do_delete(self):
-        """(Virtual) Delete the selected rows.
+        """(Virtual) Delete the selected rows."""
        """
        pass
    def _fill(self):
--- a/hscommon/gui/text_field.py
+++ b/hscommon/gui/text_field.py
@@ -71,8 +71,7 @@ class TextField(GUIObject):
    # --- Public
    def refresh(self):
-        """Triggers a view :meth:`~TextFieldView.refresh`.
+        """Triggers a view :meth:`~TextFieldView.refresh`."""
        """
        self.view.refresh()
    @property
--- a/hscommon/gui/tree.py
+++ b/hscommon/gui/tree.py
@@ -55,8 +55,7 @@ class Node(MutableSequence):
    # --- Public
    def clear(self):
-        """Clears the node of all its children.
+        """Clears the node of all its children."""
        """
        del self[:]
    def find(self, predicate, include_self=True):
@@ -103,14 +102,12 @@ class Node(MutableSequence):
    @property
    def children_count(self):
-        """Same as ``len(self)``.
+        """Same as ``len(self)``."""
        """
        return len(self)
    @property
    def name(self):
-        """Name for the node, supplied on init.
+        """Name for the node, supplied on init."""
        """
        return self._name
    @property
--- a/hscommon/jobprogress/job.py
+++ b/hscommon/jobprogress/job.py
@@ -56,8 +56,7 @@ class Job:
    # ---Private
    def _subjob_callback(self, progress, desc=""):
-        """This is the callback passed to children jobs.
+        """This is the callback passed to children jobs."""
        """
        self.set_progress(progress, desc)
        return True  # if JobCancelled has to be raised, it will be at the highest level
@@ -140,31 +139,34 @@ class Job:
        self._progress = progress
        if self._progress > self._currmax:
            self._progress = self._currmax
        if self._progress < 0:
            self._progress = 0
        self._do_update(desc)
 class NullJob:
    def __init__(self, *args, **kwargs):
        # Null job does nothing
        pass
    def add_progress(self, *args, **kwargs):
        # Null job does nothing
        pass
    def check_if_cancelled(self):
        # Null job does nothing
        pass
    def iter_with_progress(self, sequence, *args, **kwargs):
        return iter(sequence)
    def start_job(self, *args, **kwargs):
        # Null job does nothing
        pass
    def start_subjob(self, *args, **kwargs):
        return NullJob()
    def set_progress(self, *args, **kwargs):
        # Null job does nothing
        pass
--- a/hscommon/jobprogress/qt.py
+++ b/hscommon/jobprogress/qt.py
@@ -1,52 +0,0 @@
 # Created By: Virgil Dupras
 # Created On: 2009-09-14
 # Copyright 2011 Hardcoded Software (http://www.hardcoded.net)
 #
 # This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
 # which should be included with this package. The terms are also available at
 # http://www.gnu.org/licenses/gpl-3.0.html
 from PyQt5.QtCore import pyqtSignal, Qt, QTimer
 from PyQt5.QtWidgets import QProgressDialog
 from . import performer
 class Progress(QProgressDialog, performer.ThreadedJobPerformer):
    finished = pyqtSignal(["QString"])
    def __init__(self, parent):
        flags = Qt.CustomizeWindowHint | Qt.WindowTitleHint | Qt.WindowSystemMenuHint
        QProgressDialog.__init__(self, "", "Cancel", 0, 100, parent, flags)
        self.setModal(True)
        self.setAutoReset(False)
        self.setAutoClose(False)
        self._timer = QTimer()
        self._jobid = ""
        self._timer.timeout.connect(self.updateProgress)
    def updateProgress(self):
        # the values might change before setValue happens
        last_progress = self.last_progress
        last_desc = self.last_desc
        if not self._job_running or last_progress is None:
            self._timer.stop()
            self.close()
            if not self.job_cancelled:
                self.finished.emit(self._jobid)
            return
        if self.wasCanceled():
            self.job_cancelled = True
            return
        if last_desc:
            self.setLabelText(last_desc)
        self.setValue(last_progress)
    def run(self, jobid, title, target, args=()):
        self._jobid = jobid
        self.reset()
        self.setLabelText("")
        self.run_threaded(target, args)
        self.setWindowTitle(title)
        self.show()
        self._timer.start(500)
--- a/hscommon/loc.py
+++ b/hscommon/loc.py
@@ -21,6 +21,8 @@ PO2COCOA = {
 COCOA2PO = {v: k for k, v in PO2COCOA.items()}
 STRING_EXT = ".strings"
 def get_langs(folder):
    return [name for name in os.listdir(folder) if op.isdir(op.join(folder, name))]
@@ -152,11 +154,9 @@ def strings2pot(target, dest):
 def allstrings2pot(lprojpath, dest, excludes=None):
-    allstrings = files_with_ext(lprojpath, ".strings")
+    allstrings = files_with_ext(lprojpath, STRING_EXT)
    if excludes:
-        allstrings = [
+        allstrings = [p for p in allstrings if op.splitext(op.basename(p))[0] not in excludes]
            p for p in allstrings if op.splitext(op.basename(p))[0] not in excludes
        ]
    for strings_path in allstrings:
        strings2pot(strings_path, dest)
@@ -195,11 +195,7 @@ def generate_cocoa_strings_from_code(code_folder, dest_folder):
    # genstrings produces utf-16 files with comments. After having generated the files, we convert
    # them to utf-8 and remove the comments.
    ensure_empty_folder(dest_folder)
-    print_and_do(
+    print_and_do('genstrings -o "{}" `find "{}" -name *.m | xargs`'.format(dest_folder, code_folder))
        'genstrings -o "{}" `find "{}" -name *.m | xargs`'.format(
            dest_folder, code_folder
        )
    )
    for stringsfile in os.listdir(dest_folder):
        stringspath = op.join(dest_folder, stringsfile)
        with open(stringspath, "rt", encoding="utf-16") as fp:
@@ -214,11 +210,9 @@ def generate_cocoa_strings_from_code(code_folder, dest_folder):
 def generate_cocoa_strings_from_xib(xib_folder):
-    xibs = [
+    xibs = [op.join(xib_folder, fn) for fn in os.listdir(xib_folder) if fn.endswith(".xib")]
        op.join(xib_folder, fn) for fn in os.listdir(xib_folder) if fn.endswith(".xib")
    ]
    for xib in xibs:
-        dest = xib.replace(".xib", ".strings")
+        dest = xib.replace(".xib", STRING_EXT)
        print_and_do("ibtool {} --generate-strings-file {}".format(xib, dest))
        print_and_do("iconv -f utf-16 -t utf-8 {0} | tee {0}".format(dest))
@@ -234,10 +228,6 @@ def localize_stringsfile(stringsfile, dest_root_folder):
 def localize_all_stringsfiles(src_folder, dest_root_folder):
-    stringsfiles = [
+    stringsfiles = [op.join(src_folder, fn) for fn in os.listdir(src_folder) if fn.endswith(STRING_EXT)]
        op.join(src_folder, fn)
        for fn in os.listdir(src_folder)
        if fn.endswith(".strings")
    ]
    for path in stringsfiles:
        localize_stringsfile(path, dest_root_folder)
--- a/hscommon/notify.py
+++ b/hscommon/notify.py
@@ -16,8 +16,7 @@ from collections import defaultdict
 class Broadcaster:
-    """Broadcasts messages that are received by all listeners.
+    """Broadcasts messages that are received by all listeners."""
    """
    def __init__(self):
        self.listeners = set()
@@ -39,8 +38,7 @@ class Broadcaster:
 class Listener:
-    """A listener is initialized with the broadcaster it's going to listen to. Initially, it is not connected.
+    """A listener is initialized with the broadcaster it's going to listen to. Initially, it is not connected."""
    """
    def __init__(self, broadcaster):
        self.broadcaster = broadcaster
@@ -57,13 +55,11 @@ class Listener:
            self._bound_notifications[message].append(func)
    def connect(self):
-        """Connects the listener to its broadcaster.
+        """Connects the listener to its broadcaster."""
        """
        self.broadcaster.add_listener(self)
    def disconnect(self):
-        """Disconnects the listener from its broadcaster.
+        """Disconnects the listener from its broadcaster."""
        """
        self.broadcaster.remove_listener(self)
    def dispatch(self, msg):
--- a/hscommon/path.py
+++ b/hscommon/path.py
@@ -85,9 +85,7 @@ class Path(tuple):
    def __getitem__(self, key):
        if isinstance(key, slice):
            if isinstance(key.start, Path):
-                equal_elems = list(
+                equal_elems = list(takewhile(lambda pair: pair[0] == pair[1], zip(self, key.start)))
                    takewhile(lambda pair: pair[0] == pair[1], zip(self, key.start))
                )
                key = slice(len(equal_elems), key.stop, key.step)
            if isinstance(key.stop, Path):
                equal_elems = list(
@@ -226,9 +224,7 @@ def pathify(f):
    Calling ``foo('/bar', 0)`` will convert ``'/bar'`` to ``Path('/bar')``.
    """
    sig = signature(f)
-    pindexes = {
+    pindexes = {i for i, p in enumerate(sig.parameters.values()) if p.annotation is Path}
        i for i, p in enumerate(sig.parameters.values()) if p.annotation is Path
    }
    pkeys = {k: v for k, v in sig.parameters.items() if v.annotation is Path}
    def path_or_none(p):
@@ -236,9 +232,7 @@ def pathify(f):
    @wraps(f)
    def wrapped(*args, **kwargs):
-        args = tuple(
+        args = tuple((path_or_none(a) if i in pindexes else a) for i, a in enumerate(args))
            (path_or_none(a) if i in pindexes else a) for i, a in enumerate(args)
        )
        kwargs = {k: (path_or_none(v) if k in pkeys else v) for k, v in kwargs.items()}
        return f(*args, **kwargs)
@@ -246,8 +240,7 @@ def pathify(f):
 def log_io_error(func):
-    """ Catches OSError, IOError and WindowsError and log them
+    """Catches OSError, IOError and WindowsError and log them"""
    """
    @wraps(func)
    def wrapper(path, *args, **kwargs):
--- a/hscommon/pygettext.py
+++ b/hscommon/pygettext.py
@@ -110,22 +110,14 @@ def _visit_pyfiles(list, dirname, names):
    # get extension for python source files
    if "_py_ext" not in globals():
        global _py_ext
-        _py_ext = [
+        _py_ext = [triple[0] for triple in imp.get_suffixes() if triple[2] == imp.PY_SOURCE][0]
            triple[0] for triple in imp.get_suffixes() if triple[2] == imp.PY_SOURCE
        ][0]
    # don't recurse into CVS directories
    if "CVS" in names:
        names.remove("CVS")
    # add all *.py files to list
-    list.extend(
+    list.extend([os.path.join(dirname, file) for file in names if os.path.splitext(file)[1] == _py_ext])
        [
            os.path.join(dirname, file)
            for file in names
            if os.path.splitext(file)[1] == _py_ext
        ]
    )
 def _get_modpkg_path(dotted_name, pathlist=None):
@@ -175,10 +167,10 @@ def getFilesForName(name):
        # check for glob chars
        if containsAny(name, "*?[]"):
            files = glob.glob(name)
-            list = []
+            file_list = []
            for file in files:
-                list.extend(getFilesForName(file))
+                file_list.extend(getFilesForName(file))
-            return list
+            return file_list
        # try to find module or package
        name = _get_modpkg_path(name)
@@ -187,9 +179,9 @@ def getFilesForName(name):
    if os.path.isdir(name):
        # find all python files in directory
-        list = []
+        file_list = []
-        os.walk(name, _visit_pyfiles, list)
+        os.walk(name, _visit_pyfiles, file_list)
-        return list
+        return file_list
    elif os.path.exists(name):
        # a single file
        return [name]
@@ -406,8 +398,7 @@ def main(source_files, outpath, keywords=None):
                    eater(*_token)
            except tokenize.TokenError as e:
                print(
-                    "%s: %s, line %d, column %d"
+                    "%s: %s, line %d, column %d" % (e.args[0], filename, e.args[1][0], e.args[1][1]),
                    % (e.args[0], filename, e.args[1][0], e.args[1][1]),
                    file=sys.stderr,
                )
        finally:
--- a/hscommon/sphinxgen.py
+++ b/hscommon/sphinxgen.py
@@ -4,13 +4,11 @@
 # which should be included with this package. The terms are also available at
 # http://www.gnu.org/licenses/gpl-3.0.html
-import os.path as op
+from pathlib import Path
 import re
 from distutils.version import LooseVersion
 from pkg_resources import load_entry_point, get_distribution
 from .build import read_changelog_file, filereplace
 from sphinx.cmd.build import build_main as sphinx_build
 CHANGELOG_FORMAT = """
 {version} ({date})
@@ -24,9 +22,7 @@ def tixgen(tixurl):
    """This is a filter *generator*. tixurl is a url pattern for the tix with a {0} placeholder
    for the tix #
    """
-    urlpattern = tixurl.format(
+    urlpattern = tixurl.format("\\1")  # will be replaced buy the content of the first group in re
        "\\1"
    )  # will be replaced buy the content of the first group in re
    R = re.compile(r"#(\d+)")
    repl = "`#\\1 <{}>`__".format(urlpattern)
    return lambda text: R.sub(repl, text)
@@ -52,9 +48,9 @@ def gen(
    if confrepl is None:
        confrepl = {}
    if confpath is None:
-        confpath = op.join(basepath, "conf.tmpl")
+        confpath = Path(basepath, "conf.tmpl")
    if changelogtmpl is None:
-        changelogtmpl = op.join(basepath, "changelog.tmpl")
+        changelogtmpl = Path(basepath, "changelog.tmpl")
    changelog = read_changelog_file(changelogpath)
    tix = tixgen(tixurl)
    rendered_logs = []
@@ -63,36 +59,16 @@ def gen(
        # The format of the changelog descriptions is in markdown, but since we only use bulled list
        # and links, it's not worth depending on the markdown package. A simple regexp suffice.
        description = re.sub(r"\[(.*?)\]\((.*?)\)", "`\\1 <\\2>`__", description)
-        rendered = CHANGELOG_FORMAT.format(
+        rendered = CHANGELOG_FORMAT.format(version=log["version"], date=log["date_str"], description=description)
            version=log["version"], date=log["date_str"], description=description
        )
        rendered_logs.append(rendered)
    confrepl["version"] = changelog[0]["version"]
-    changelog_out = op.join(basepath, "changelog.rst")
+    changelog_out = Path(basepath, "changelog.rst")
    filereplace(changelogtmpl, changelog_out, changelog="\n".join(rendered_logs))
-    if op.exists(confpath):
+    if Path(confpath).exists():
-        conf_out = op.join(basepath, "conf.py")
+        conf_out = Path(basepath, "conf.py")
        filereplace(confpath, conf_out, **confrepl)
    if LooseVersion(get_distribution("sphinx").version) >= LooseVersion("1.7.0"):
        from sphinx.cmd.build import build_main as sphinx_build
    # Call the sphinx_build function, which is the same as doing sphinx-build from cli
    try:
-            sphinx_build([basepath, destpath])
+        sphinx_build([str(basepath), str(destpath)])
    except SystemExit:
-            print(
+        print("Sphinx called sys.exit(), but we're cancelling it because we don't actually want to exit")
                "Sphinx called sys.exit(), but we're cancelling it because we don't actually want to exit"
            )
    else:
        # We used to call sphinx-build with print_and_do(), but the problem was that the virtualenv
        # of the calling python wasn't correctly considered and caused problems with documentation
        # relying on autodoc (which tries to import the module to auto-document, but fail because of
        # missing dependencies which are in the virtualenv). Here, we do exactly what is done when
        # calling the command from bash.
        cmd = load_entry_point("Sphinx", "console_scripts", "sphinx-build")
        try:
            cmd(["sphinx-build", basepath, destpath])
        except SystemExit:
            print(
                "Sphinx called sys.exit(), but we're cancelling it because we don't actually want to exit"
            )
--- a/hscommon/sqlite.py
+++ b/hscommon/sqlite.py
@@ -45,7 +45,7 @@ class _ActualThread(threading.Thread):
        self._lock = threading.Lock()
        self._run = True
        self.lastrowid = -1
-        self.setDaemon(True)
+        self.daemon = True
        self.start()
    def _query(self, query):
--- a/hscommon/tests/conflict_test.py
+++ b/hscommon/tests/conflict_test.py
@@ -19,7 +19,7 @@ from ..path import Path
 from ..testutil import eq_
-class TestCase_GetConflictedName:
+class TestCaseGetConflictedName:
    def test_simple(self):
        name = get_conflicted_name(["bar"], "bar")
        eq_("[000] bar", name)
@@ -46,7 +46,7 @@ class TestCase_GetConflictedName:
        eq_("[000] bar", name)
-class TestCase_GetUnconflictedName:
+class TestCaseGetUnconflictedName:
    def test_main(self):
        eq_("foobar", get_unconflicted_name("[000] foobar"))
        eq_("foobar", get_unconflicted_name("[9999] foobar"))
@@ -56,7 +56,7 @@ class TestCase_GetUnconflictedName:
        eq_("foo [000] bar", get_unconflicted_name("foo [000] bar"))
-class TestCase_IsConflicted:
+class TestCaseIsConflicted:
    def test_main(self):
        assert is_conflicted("[000] foobar")
        assert is_conflicted("[9999] foobar")
@@ -66,7 +66,7 @@ class TestCase_IsConflicted:
        assert not is_conflicted("foo [000] bar")
-class TestCase_move_copy:
+class TestCaseMoveCopy:
    @pytest.fixture
    def do_setup(self, request):
        tmpdir = request.getfixturevalue("tmpdir")
@@ -80,9 +80,7 @@ class TestCase_move_copy:
        assert self.path["baz"].exists()
        assert not self.path["foo"].exists()
-    def test_copy_no_conflict(
+    def test_copy_no_conflict(self, do_setup):  # No need to duplicate the rest of the tests... Let's just test on move
        self, do_setup
    ):  # No need to duplicate the rest of the tests... Let's just test on move
        smart_copy(self.path + "foo", self.path + "baz")
        assert self.path["baz"].exists()
        assert self.path["foo"].exists()
--- a/hscommon/tests/notify_test.py
+++ b/hscommon/tests/notify_test.py
@@ -128,9 +128,7 @@ def test_repeater_with_repeated_notifications():
    r.connect()
    listener.connect()
    b.notify("hello")
-    b.notify(
+    b.notify("foo")  # if the repeater repeated this notif, we'd get a crash on HelloListener
        "foo"
    )  # if the repeater repeated this notif, we'd get a crash on HelloListener
    eq_(r.hello_count, 1)
    eq_(listener.hello_count, 1)
    eq_(r.foo_count, 1)
--- a/hscommon/tests/path_test.py
+++ b/hscommon/tests/path_test.py
@@ -51,7 +51,7 @@ def test_init_with_tuple_and_list(force_ossep):
 def test_init_with_invalid_value(force_ossep):
    try:
-        path = Path(42)  # noqa: F841
+        Path(42)
        assert False
    except TypeError:
        pass
@@ -87,8 +87,7 @@ def test_filename(force_ossep):
 def test_deal_with_empty_components(force_ossep):
-    """Keep ONLY a leading space, which means we want a leading slash.
+    """Keep ONLY a leading space, which means we want a leading slash."""
    """
    eq_("foo//bar", str(Path(("foo", "", "bar"))))
    eq_("/foo/bar", str(Path(("", "foo", "bar"))))
    eq_("foo/bar", str(Path("foo/bar/")))
@@ -143,8 +142,6 @@ def test_path_slice(force_ossep):
    eq_((), foobar[:foobar])
    abcd = Path("a/b/c/d")
    a = Path("a")
    b = Path("b")  # noqa: #F841
    c = Path("c")  # noqa: #F841
    d = Path("d")
    z = Path("z")
    eq_("b/c", abcd[a:d])
@@ -154,8 +151,7 @@ def test_path_slice(force_ossep):
 def test_add_with_root_path(force_ossep):
-    """if I perform /a/b/c + /d/e/f, I want /a/b/c/d/e/f, not /a/b/c//d/e/f
+    """if I perform /a/b/c + /d/e/f, I want /a/b/c/d/e/f, not /a/b/c//d/e/f"""
    """
    eq_("/foo/bar", str(Path("/foo") + Path("/bar")))
@@ -166,8 +162,7 @@ def test_create_with_tuple_that_have_slash_inside(force_ossep, monkeypatch):
 def test_auto_decode_os_sep(force_ossep, monkeypatch):
-    """Path should decode any either / or os.sep, but always encode in os.sep.
+    """Path should decode any either / or os.sep, but always encode in os.sep."""
    """
    eq_(("foo\\bar", "bleh"), Path("foo\\bar/bleh"))
    monkeypatch.setattr(os, "sep", "\\")
    eq_(("foo", "bar/bleh"), Path("foo\\bar/bleh"))
@@ -219,7 +214,7 @@ def test_str_repr_of_mix_between_non_ascii_str_and_unicode(force_ossep):
    eq_("foo\u00e9/bar".encode(sys.getfilesystemencoding()), p.tobytes())
-def test_Path_of_a_Path_returns_self(force_ossep):
+def test_path_of_a_path_returns_self(force_ossep):
    # if Path() is called with a path as value, just return value.
    p = Path("foo/bar")
    assert Path(p) is p
--- a/hscommon/tests/selectable_list_test.py
+++ b/hscommon/tests/selectable_list_test.py
@@ -44,9 +44,7 @@ def test_guicalls():
    # A GUISelectableList appropriately calls its view.
    sl = GUISelectableList(["foo", "bar"])
    sl.view = CallLogger()
-    sl.view.check_gui_calls(
+    sl.view.check_gui_calls(["refresh"])  # Upon setting the view, we get a call to refresh()
        ["refresh"]
    )  # Upon setting the view, we get a call to refresh()
    sl[1] = "baz"
    sl.view.check_gui_calls(["refresh"])
    sl.append("foo")
--- a/hscommon/tests/sqlite_test.py
+++ b/hscommon/tests/sqlite_test.py
@@ -91,7 +91,7 @@ def test_make_sure_theres_no_messup_between_queries():
    threads = []
    for i in range(1, 101):
        t = threading.Thread(target=run, args=(i,))
-        t.start
+        t.start()
        threads.append(t)
    while threads:
        time.sleep(0.1)
--- a/hscommon/tests/table_test.py
+++ b/hscommon/tests/table_test.py
@@ -19,6 +19,7 @@ class TestRow(Row):
        self._index = index
    def load(self):
        # Does nothing for test
        pass
    def save(self):
@@ -75,14 +76,17 @@ def test_allow_edit_when_attr_is_property_with_fset():
    class TestRow(Row):
        @property
        def foo(self):
            # property only for existence checks
            pass
        @property
        def bar(self):
            # property only for existence checks
            pass
        @bar.setter
        def bar(self, value):
            # setter only for existence checks
            pass
    row = TestRow(Table())
@@ -97,10 +101,12 @@ def test_can_edit_prop_has_priority_over_fset_checks():
    class TestRow(Row):
        @property
        def bar(self):
            # property only for existence checks
            pass
        @bar.setter
        def bar(self, value):
            # setter only for existence checks
            pass
        can_edit_bar = False
--- a/hscommon/tests/tree_test.py
+++ b/hscommon/tests/tree_test.py
@@ -105,9 +105,7 @@ def test_findall_dont_include_self():
    # When calling findall with include_self=False, the node itself is never evaluated.
    t = tree_with_some_nodes()
    del t._name  # so that if the predicate is called on `t`, we crash
-    r = t.findall(
+    r = t.findall(lambda n: not n.name.startswith("sub"), include_self=False)  # no crash
        lambda n: not n.name.startswith("sub"), include_self=False
    )  # no crash
    eq_(set(r), set([t[0], t[1], t[2]]))
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Andrew Senetar	c408873d20	Update changelog	2022-03-25 23:37:46 -05:00
Andrew Senetar	bbcdfbf698	Add vscode extension recommendation	2022-03-21 22:27:16 -05:00
Andrew Senetar	8cee1a9467	Fix internal links in CONTRIBUTING.md	2022-03-21 22:19:58 -05:00
Andrew Senetar	448d33dcb6	Add workflow yml validation settings - Add yml validation to project for vscode - Allow .vscode/settings.json - Apply formatting to workflow files	2022-03-21 22:18:22 -05:00
Andrew Senetar	8d414cadac	Add initial partial CONTRIBUTING.md - Adopt a CONTRIBUTING.md format similar to that used by atom/atom. - Add label section as replacement to wiki - Add style guide section - Setup basic document structure TODO: - Migrate some existing wiki information here where applicable. - Migrate some existing help information here. - Finish up remaining sections.	2022-03-21 22:04:45 -05:00
Andrew Senetar	f902ee889a	Add configuration for isort to pyproject.toml	2022-03-21 00:25:36 -05:00
Andrew Senetar	bc89e71935	Update .gitignore - Pull from github/gitignore to cover some things better - Organize remaining items - Remove a few no longer relevant items	2022-03-20 23:25:01 -05:00
Andrew Senetar	17b83c8001	Move polib to setup_requires instead of install_requires	2022-03-20 22:48:03 -05:00
Andrew Senetar	0f845ee67a	Update min python version in Makefile	2022-03-20 01:23:01 -05:00
Andrew Senetar	d40e32a143	Update transifex config & pull latest updates - Update transifex configuration to new format - Pull translation updates	2022-03-19 20:21:14 -05:00
Andrew Senetar	1bc206e62d	Bump version to 4.2.1	2022-03-19 19:02:41 -05:00
Andrew Senetar	106a0feaba	Add sponsor information	2022-03-19 17:46:12 -05:00
Andrew Senetar	984e0c4094	Fix help path for local files and some help doc updates	2022-03-19 17:43:11 -05:00
Andrew Senetar	9321e811d7	Enforce minimum Windows version ref #983	2022-03-19 17:01:54 -05:00
Andrew Senetar	a64fcbfb5c	Fix deprecation warning from sqlite	2022-03-19 17:01:53 -05:00
Andrew Senetar	cff07a12d6	Black formatter changes	2022-03-19 17:01:53 -05:00
Alfonso Montero	b9c7832c4a	Apply @arsenetar 's proposed change to fix for errors on window change event. Solves #937 . (#980 )	2022-03-15 20:47:48 -05:00
Andrew Senetar	b9dfeac2f3	Drop Python 3.6 Support	2022-03-15 05:10:41 -05:00
Andrew Senetar	efc99eee96	Merge pull request #978 from glubsy/fix_zoom_scrollbar Fix image viewer scrollbar zoom	2022-03-14 20:43:40 -05:00
glubsy	ff7733bb73	Fix image viewer When zooming in or out, the value computed might be a float instead of an int, which is what the QScrollBar expect for its setValue method. Simply casting to int should be enough here.	2022-03-12 22:36:17 +01:00
Andrew Senetar	4b2fbe87ea	Default to English on unsupported system language Fix #976 - Add check for supported language to system locale detection - Fall-back to English when not a supported locale	2022-03-12 04:36:13 -06:00
Andrew Senetar	9e4b41feb5	Fix BASE_PATH for frozen macOS app	2022-03-09 06:50:41 -06:00
Andrew Senetar	cbfa8720f1	Update imports for objc module	2022-03-09 05:01:12 -06:00
Andrew Senetar	a02c5e5b9b	Add built modules as artifacts	2022-03-04 01:14:01 -06:00
Andrew Senetar	35e6ffd6af	Fix macOS packaging issue	2022-02-09 22:33:41 -06:00
Andrew Senetar	e957f840da	Fix python version check in makefile, close #971	2022-02-09 21:59:35 -06:00
Andrew Senetar	85e22089bd	Black formatting changes	2022-02-09 21:49:51 -06:00
Andrew Senetar	b7d68b4458	Update debian control template depends	2022-02-09 21:45:45 -06:00
Andrew Senetar	8f440603ee	Add Python 3.10 to tox.ini	2022-01-25 10:39:52 -06:00
Andrew Senetar	5d8e559ca3	Fix issue introduced in fix for #900	2022-01-25 10:39:08 -06:00
Andrew Senetar	2c11eecf97	Update version and changelog to 4.2.0	2022-01-24 22:28:40 -06:00
Andrew Senetar	02803f738b	Update translation files including Malay	2022-01-24 21:05:33 -06:00
Andrew Senetar	db27e6a645	Add Malay to language selection	2022-01-24 21:02:57 -06:00
Andrew Senetar	c9c35cc60d	Add translation source file for dark style change.	2022-01-24 19:33:42 -06:00
Andrew Senetar	880205dbc8	Fix python 3.10 in default action	2022-01-24 19:30:42 -06:00
Andrew Senetar	6456e64328	Update python versions for CI/CD - Update python versions for Default action - Set python versions for sonarcloud	2022-01-24 19:27:29 -06:00
Andrew Senetar	f6a0c0cc6d	Add initial dark style for use in Windows - Other platforms can achieve this with the OS theme so not enabled for them at this time. - Adds preference in display options to use dark style, default is false.	2022-01-24 19:14:30 -06:00
Andrew Senetar	eb57d269fc	Update translation source files	2021-11-23 21:11:30 -06:00
Andrew Senetar	34f41dc522	Merge pull request #942 from Dobatymo/hash-cache Implement hash cache for md5 hash based on sqlite	2021-11-23 21:08:22 -06:00
Dobatymo	77460045c4	clean up abstraction	2021-10-29 15:24:47 +08:00
Dobatymo	9753afba74	change FilesDB to singleton class move hash calculation back in to Files class clear cache now clears hash cache in addition to picture cache	2021-10-29 15:12:40 +08:00
Dobatymo	1ea108fc2b	changed cache filename	2021-10-29 15:12:40 +08:00
Dobatymo	2f02a6010d	implement hash cache for md5 hash based on sqlite	2021-10-29 15:12:40 +08:00
Andrew Senetar	b80489fd66	Update translation source files	2021-09-15 20:15:09 -05:00
Andrew Senetar	1d60e124ee	Update invoke_custom_command to run for all selected items	2021-09-02 20:48:25 -05:00
Andrew Senetar	e22d7d2fc9	Remove filtering of 0 size files in engine Files size is already able to be filtered at a higher level, some users may decide to see zero length files. Fix #321.	2021-08-28 18:16:22 -05:00
Andrew Senetar	0a0694e095	Expand fix for #630 to fix #551	2021-08-28 17:29:25 -05:00
Andrew Senetar	3da9d5d869	Update documentation files, add multi-language doc build - Update links in documentation, and some errors - Remove non-existent page - Update build to build all languages with --alldoc flag - Fix one minor debugging change introduced in package.py	2021-08-28 17:07:18 -05:00
Andrew Senetar	78fb052d77	Add more progress details to getmatches, ref #700	2021-08-28 04:58:22 -05:00
Andrew Senetar	9805cba10d	Use different message for direct delete success, close #904	2021-08-28 04:27:34 -05:00
Andrew Senetar	4c3dfe2f1f	Provide more feedback during scans - Add output for number of collected files / folders - Update to allow indeterminate progress bar - Remove unused hscommon\jobprogress\qt.py	2021-08-28 04:05:07 -05:00
Andrew Senetar	b0baa5bfd6	Add windows position handling at open, fix #653 - Move offscreen windows back on screen - Restore maximized state without impacting resored size - Fullscreen comes back on primary screen, needs further work to support restore on other screens	2021-08-27 23:26:19 -05:00
Andrew Senetar	22996ee914	Merge pull request #935 from chchia/master resize preference dialog file size box	2021-08-27 21:57:03 -05:00
chchia	31ec9c667f	resize preference dialog file size box	2021-08-28 10:28:06 +08:00
Andrew Senetar	3045361243	Add preference to ignore large files, close #430	2021-08-27 05:35:54 -05:00
Andrew Senetar	809116c764	Fix CodeQL Alerts - Cast int to Py_ssize_t for multiplication	2021-08-26 03:43:31 -05:00
Andrew Senetar	83f401595d	Minor Updates - Cleanup extension modules in setup.py to use correct namespaces - Update build.py to leverage setup.py for modules - Roll mutagen required version back to 1.44.0 to support more distros - Change build.py and sphinxgen.py to use pathlib - Remove hsaudiotag from package list for debian and arch	2021-08-26 03:29:24 -05:00
Andrew Senetar	814d145366	Updates to setup files - Include additional non-python files in MANIFEST.in (package_data in setup.cfg was not including the files) - Update requirements in setup.cfg	2021-08-25 04:10:38 -05:00
Andrew Senetar	efb76c7686	Add OS and Python Information to error dialog	2021-08-25 02:05:18 -05:00
Andrew Senetar	47dbe805bb	More cleanup and fixed a flake8 build issue	2021-08-25 01:11:24 -05:00
Andrew Senetar	f11fccc889	More cleanups - Cleanup columns.py and tables - Other misc cleanups - Remove text_field.py from qtlib as it is not used - Remove unused variables from image_viewer method	2021-08-25 00:46:33 -05:00
Andrew Senetar	2e13c4ccb5	Update internationalization files	2021-08-24 03:54:54 -05:00
Andrew Senetar	da72ffd1fd	Add ability to use non-native dialog for directories - Add preference for native dialogs - Add non-native directory selection to allow selecting multiple folders fixes #874 when using non-native.	2021-08-24 03:52:43 -05:00
Andrew Senetar	2c9437bef4	Fix #897	2021-08-24 03:13:03 -05:00
Andrew Senetar	f9085386a6	First pass code cleanup in qt/qtlib	2021-08-24 00:12:23 -05:00
Andrew Senetar	d576a7043c	Code cleanups in core and other affected files	2021-08-21 18:02:02 -05:00
Andrew Senetar	1ef5f56158	Code cleanups in hscommon & external effects	2021-08-21 16:56:27 -05:00
Andrew Senetar	f9316de244	Code cleanups in hscommon\tests	2021-08-21 16:25:33 -05:00
Andrew Senetar	0189c29f47	Misc cleanups in core/tests	2021-08-21 03:52:09 -05:00
Andrew Senetar	b4fa1d68f0	Add check for python version to build.py, close #589	2021-08-20 23:49:20 -05:00
Andrew Senetar	16df882481	Update requirements.txt for previous change	2021-08-19 00:17:46 -05:00
Andrew Senetar	58c04ff9ad	Switch from hsaudiotag to mutagen, close #440 - This opens up the ability to support more tags and audio information - Also makes progress on #333	2021-08-19 00:14:26 -05:00
Andrew Senetar	6b8f85e39a	Reveal in Explorer / Finder, close #895	2021-08-18 20:51:45 -05:00
Andrew Senetar	2fff1a3436	Add ablity to load results at start, closes #902 - Add ablility to load .dupguru file at start by passing as first argument - Add file association to .dupeguru file in windows at install	2021-08-18 19:24:14 -05:00
Andrew Senetar	a685524dd5	Add files for more standardized build tools - Prior investigation into linux packaging (not using pyinstaller) suggested having setuptools files could make packaging easier and automatable - Add setup.cfg and setup.py as initial starting point - Add MANIFEST.in (at least temporarily) Currently with the python build module this almost works for main application. It does not include all the extra data files right now.	2021-08-18 04:12:38 -05:00
Andrew Senetar	74918e2c56	Attempt to fix apt-get failure	2021-08-18 03:07:47 -05:00
Andrew Senetar	18895d983b	Fix syntax error in codeql-analysis.yml	2021-08-18 03:04:44 -05:00
Andrew Senetar	fe720208ea	Add minimum custom build for codeql cpp	2021-08-18 02:49:20 -05:00
Andrew Senetar	091d9e9239	Create codeql-analysis.yml Test out codeql	2021-08-18 02:33:40 -05:00
Andrew Senetar	5a4958cff9	Update translation .pot files	2021-08-17 21:18:47 -05:00
Andrew Senetar	be10b462fc	Add portable mode If settings.ini is present next to the executable, will run in portable mode. This results in settings, data, and cache all being in same folder as dupeGuru.	2021-08-17 21:12:32 -05:00
Andrew Senetar	d62b13bcdb	Removing travis - All CI is now covered by Github Actions - Remove .travis.yml - Remove tox-travis in requirements-extra.txt	2021-08-17 18:16:20 -05:00
Andrew Senetar	06eca11f0b	Remove format check from lint job	2021-08-17 00:52:14 -05:00
Andrew Senetar	2879f18e0d	Run linting and formatting check in parallel before test	2021-08-17 00:50:41 -05:00
Andrew Senetar	3ee21771f9	Fix workflow file format	2021-08-17 00:33:54 -05:00
Andrew Senetar	c0ba6fb57a	Test out github actions Add a workflow to test	2021-08-17 00:31:15 -05:00
Andrew Senetar	bc942b8263	Add black format check to tox runs	2021-08-15 04:10:46 -05:00
Andrew Senetar	ffe6b7047c	Format all files with black correcting line length	2021-08-15 04:10:18 -05:00
Andrew Senetar	9446f37fad	Remove flake8 E731 Errors Note: black formatting is now applying correctly as well.	2021-08-15 03:53:43 -05:00
Andrew Senetar	af19660c18	Update flake8 and black configuration - Update black to now use 120 lines - Update flake8 to use recommended settings for black integration	2021-08-15 03:32:31 -05:00
Andrew Senetar	99ad297906	Change preferences to use spinboxes where applicable - Change LineEdit to Spinbox for minimum file size 0-1,000,000KB - Change LineEdit to Spinbox for big file size 0-1,000,000MB	2021-08-15 02:11:42 -05:00
Andrew Senetar	e11f996dfc	Merge pull request #908 from glubsy/hash_sample_optimization Hash sample optimization	2021-08-13 23:41:17 -05:00
glubsy	e95306e58f	Fix flake 8	2021-08-14 02:52:00 +02:00
glubsy	891a875990	Cache constant expression Perhaps the python byte code is already optimized, but just in case it is not, keep pre-compute the constant expression.	2021-08-13 21:33:21 +02:00
glubsy	545a5a75fb	Fix for older python versions The "walrus" operator is only available in python 3.8 and later. Fall back to more traditional notation.	2021-08-13 20:56:33 +02:00
glubsy	7b764f183e	Avoid partially hashing small files Computing 3 hash samples for files less than 3MiB (3 * CHUNK_SIZE) is not efficient since spans of later samples would overlap a previous one. Therefore we can simply return the hash of the entire small file instead.	2021-08-13 20:47:01 +02:00
Andrew Senetar	fdc8a17d26	Update .travis.yml - Windows test uses 3.9.6 now - Intentation changes	2021-08-07 19:35:57 -05:00
Andrew Senetar	cb3bbbec6e	Upgrade Requirement Minimums - Upgrade requirements to specify more current minimums - Remove compatability code from sphinxgen for old versions - Upgrade pyinstaller to a minimum version that works with latest macOS	2021-08-07 19:28:41 -05:00
Andrew Senetar	c51a82a2ce	Fix Issues from Translation Update - Add Qtlib to transifex config - Pull latest qtlib translations - Fix flake8 error - Remove code for manual translation import, use transifex-client instead	2021-08-06 22:21:35 -05:00
Andrew Senetar	0cd8f5e948	Update translation pot files	2021-08-06 21:41:52 -05:00
Andrew Senetar	9c09607c08	Add Turkish & Updates from Transifex - Pull updates from Transifex - Add Turkish - Sort language lists in code - Remove old locale conversion code as it appears to work correctly on windows without different conversions.	2021-08-06 21:41:52 -05:00
Andrew Senetar	3bd342770c	Update configurations - Enable Unicode for NSIS Installer - Update transifex config to new project	2021-08-06 21:41:52 -05:00
Andrew Senetar	14b456dcf9	Merge pull request #927 from glubsy/fix_directories_tests Fix Directories regex test	2021-08-06 20:08:27 -05:00
glubsy	3dccb686e2	Fix Directories regex test The entire path to the file would match unless another path separator is added.	2021-08-06 17:18:23 +02:00
Andrew Senetar	0db66baace	Merge pull request #907 from glubsy/missing_renamed_regex Missing renamed regex	2021-08-03 22:26:08 -05:00
Andrew Senetar	e3828ae2ca	Merge pull request #911 from glubsy/fix_757_fix_regression Fix infinite recursion	2021-06-22 22:44:12 -05:00
glubsy	23c59787e5	Fix infinite recursion Force the Results to update its internal __dupes list whenever at least one group has re-prioritized and changed its dupes/ref.	2021-06-23 05:36:10 +02:00
Andrew Senetar	2f8d603251	Merge pull request #910 from glubsy/757_fix Fix refs appearing in dupes-only view	2021-06-22 21:54:49 -05:00
glubsy	a51f263632	Fix refs appearing in dupes-only view * Some refs appeared in the dupes-only view after a re-prioritization was done a second time. * It seems the core.Results.__dupes list was not properly updated whenever core.app.Dupeguru.reprioritize_groups() -> core.Results.sort_dupes() was called. When a re-prioritization is done, some refs became dupe, and some dupes became ref in their place. So we need to update the new state of the internal list of dupes kept by the Results object, instead of relying on the outdated cached one. * Fix #757.	2021-06-22 22:57:57 +02:00
glubsy	718ca5b313	Remove unused import	2021-06-22 02:41:33 +02:00
glubsy	277bc3fbb8	Add unit tests for hash sample optimization * Instead of keeping md5 samples separate, merge them as one hash computed from the various selected chunks we picked. * We don't need to keep a boolean to see whether or not the user chose to optimize; we can simply compare the value of the threshold, since 0 means no optimization currently active.	2021-06-21 22:44:05 +02:00
glubsy	e07dfd5955	Add partial hashes optimization for big files * Big files above the user selected threshold can be partially hashed in 3 places. * If the user is willing to take the risk, we consider files with identical md5samples as being identical.	2021-06-21 19:03:21 +02:00
Andrew Senetar	4641bd6ec9	Merge pull request #905 from glubsy/fix_863 Fix exception when deleting while in delta view	2021-06-19 20:29:47 -05:00
glubsy	a6f83ad3d7	Fix missing regexp after rename * Doing a full match should be safer to avoid partial results which would result in overly aggressive filtering. * Add new tests to test suite to cover this issue. * Fixes #903.	2021-06-19 02:00:25 +02:00
glubsy	ab8750eedb	Fix partial regex match yielding false positive	2021-06-17 03:49:59 +02:00
glubsy	22033211d6	Fix exception when deleting while in delta view	2021-05-31 23:49:21 +02:00
Andrew Senetar	0b46ca2222	Merge pull request #879 from glubsy/fix_unicode Fix stripping (japanese) unicode characters	2021-05-25 19:11:19 -05:00
Andrew Senetar	72e0f76242	Merge pull request #898 from AlttiRi/master Change reference background color #894	2021-05-25 19:10:31 -05:00
[Alt'tiRi]	65c1d463f8	Change reference background color #894	2021-05-22 02:52:41 +03:00
Andrew Senetar	e6c791ab0a	Merge pull request #884 from samusz/master Small typo	2021-05-09 23:32:32 -05:00
Sacha Muszlak	78f5088101	Merge pull request #1 from samusz/samusz-patch-1 typo correction	2021-05-07 09:41:47 +02:00
Sacha Muszlak	095df5eb95	typo correction	2021-05-07 09:40:08 +02:00
glubsy	f1ae478433	Fix including character at the border	2021-04-29 05:29:35 +02:00
glubsy	c4dcfd3d4b	Fix stripping (japanese) unicode characters * Accents are getting removed from Unicode characters to generate similar "words". * Non-latin characters which cannot be processed that way (eg. japanese, greek, russian, etc.) should not be filtered out at all otherwise files are erroneously skipped or detected as dupes if only some characters make it passed the filter. * Starting from an arbitrary unicode codepoint (converted to decimal), above which we know it is pointless to try any sort of processing, we leave the characters as is. * Fix #878.	2021-04-29 05:15:34 +02:00
Andrew Senetar	0840104edf	Merge pull request #873 from glubsy/fix_857 Fix 857	2021-04-20 20:05:05 -05:00
glubsy	6b4b436251	Fix crash on shutdown * Fixes "'DetailsPanel' object has no attribute '_table'" error on shutdown if the Results table is updated (item removed) while the Details Dialog is shown as a floating window. * It seems that QApplication.quit() triggers some sort of refresh on the floating QDockWidget, which in turn makes calls to the underlying model that is possibly being destroyed, ie. there might be a race condition here. * Closing or hiding the QDockWidget before the cal to quit() is a workaround. Similarly, this is already done in the quitTriggered() method anyway. * This fixes #857.	2021-04-16 17:54:49 +02:00
glubsy	d18b8c10ec	Remove redundant assignment The "app" field is already set in the parent class.	2021-04-15 18:03:00 +02:00
`@@ -1,2 +1,2 @@`
	`__version__ = "4.1.1"`	`__version__ = "4.2.1"`
	`__appname__ = "dupeGuru"`	`__appname__ = "dupeGuru"`