Converted to py3k. There's probably some bugs still. So far, I managed to run dupeGuru SE under pyobjc and qt.

2025-09-11 17:58:17 +00:00 · 2010-08-11 16:39:06 +02:00 · 2010-08-11 16:39:06 +02:00 · 854d194f88
commit 854d194f88
parent fb79daad6a
57 changed files with 394 additions and 338 deletions
--- a/build.py
+++ b/build.py
@ -20,10 +20,10 @@ from hscommon.build import add_to_pythonpath, print_and_do, build_all_qt_ui, cop

 def build_cocoa(edition, dev, help_destpath):
    if not dev:
-        print "Building help index"
+        print("Building help index")
        os.system('open -a /Developer/Applications/Utilities/Help\\ Indexer.app {0}'.format(help_destpath))
    
-    print "Building dg_cocoa.plugin"
+    print("Building dg_cocoa.plugin")
    if op.exists('build'):
        shutil.rmtree('build')
    os.mkdir('build')
@ -54,7 +54,7 @@ def build_cocoa(edition, dev, help_destpath):
        pthpath = op.join(pluginpath, 'Contents/Resources/dev.pth')
        open(pthpath, 'w').write(op.abspath('.'))
    os.chdir(cocoa_project_path)
-    print "Building the XCode project"
+    print("Building the XCode project")
    args = []
    if dev:
        args.append('-configuration dev')
@ -68,10 +68,10 @@ def build_qt(edition, dev):
    build_all_qt_ui(op.join('qtlib', 'ui'))
    build_all_qt_ui(op.join('qt', 'base'))
    build_all_qt_ui(op.join('qt', edition))
-    print_and_do("pyrcc4 {0} > {1}".format(op.join('qt', 'base', 'dg.qrc'), op.join('qt', 'base', 'dg_rc.py')))
+    print_and_do("pyrcc4 -py3 {0} > {1}".format(op.join('qt', 'base', 'dg.qrc'), op.join('qt', 'base', 'dg_rc.py')))
    if edition == 'pe':
        os.chdir(op.join('qt', edition))
-        os.system('python gen.py')
+        os.system('python3 gen.py')
        os.chdir(op.join('..', '..'))

 def main():
@ -79,11 +79,11 @@ def main():
    edition = conf['edition']
    ui = conf['ui']
    dev = conf['dev']
-    print "Building dupeGuru {0} with UI {1}".format(edition.upper(), ui)
+    print("Building dupeGuru {0} with UI {1}".format(edition.upper(), ui))
    if dev:
-        print "Building in Dev mode"
+        print("Building in Dev mode")
    add_to_pythonpath('.')
-    print "Generating Help"
+    print("Generating Help")
    windows = sys.platform == 'win32'
    profile = 'win_en' if windows else 'osx_en'
    help_dir = 'help_{0}'.format(edition)
@ -91,10 +91,10 @@ def main():
    help_basepath = op.abspath(help_dir)
    help_destpath = op.abspath(op.join(help_dir, dest_dir))
    helpgen.gen(help_basepath, help_destpath, profile=profile)
-    print "Building dupeGuru"
+    print("Building dupeGuru")
    if edition == 'pe':
        os.chdir('core_pe')
-        os.system('python gen.py')
+        os.system('python3 gen.py')
        os.chdir('..')
    if ui == 'cocoa':
        build_cocoa(edition, dev, help_destpath)
--- a/cocoa/pe/dg_cocoa.py
+++ b/cocoa/pe/dg_cocoa.py
@ -23,10 +23,10 @@ class PyDupeGuru(PyDupeGuruBase):
    
    #---Information    
    def getSelectedDupePath(self):
-        return unicode(self.py.selected_dupe_path())
+        return str(self.py.selected_dupe_path())
    
    def getSelectedDupeRefPath(self):
-        return unicode(self.py.selected_dupe_ref_path())
+        return str(self.py.selected_dupe_ref_path())
    
    #---Properties
    def setMatchScaled_(self,match_scaled):
--- a/cocoa/se/dg_cocoa.py
+++ b/cocoa/se/dg_cocoa.py
@ -11,8 +11,10 @@ from core.app_cocoa_inter import PyDupeGuruBase, PyDetailsPanel
 from core_se.app_cocoa import DupeGuru

 # Fix py2app imports with chokes on relative imports and other stuff
-from core_se import fs, data
-from lxml import etree, _elementpath
+import hsutil.conflict
+import core.engine, core.fs, core.app
+import core_se.fs, core_se.data
+import lxml.etree, lxml._elementpath
 import gzip

 class PyDupeGuru(PyDupeGuruBase):
--- a/configure.py
+++ b/configure.py
@ -18,7 +18,7 @@ def main(edition, ui, dev):
    if ui not in ('cocoa', 'qt'):
        ui = 'cocoa' if sys.platform == 'darwin' else 'qt'
    build_type = 'Dev' if dev else 'Release'
-    print "Configuring dupeGuru {0} for UI {1} ({2})".format(edition.upper(), ui, build_type)
+    print("Configuring dupeGuru {0} for UI {1} ({2})".format(edition.upper(), ui, build_type))
    conf = {
        'edition': edition,
        'ui': ui,
--- a/core/app.py
+++ b/core/app.py
@ -6,7 +6,7 @@
 # which should be included with this package. The terms are also available at 
 # http://www.hardcoded.net/licenses/hs_license

-from __future__ import unicode_literals
+

 import os
 import os.path as op
@ -76,7 +76,7 @@ class DupeGuru(RegistrableApplication, Broadcaster):
    def _do_delete_dupe(self, dupe):
        if not io.exists(dupe.path):
            return
-        send2trash(unicode(dupe.path)) # Raises OSError when there's a problem
+        send2trash(str(dupe.path)) # Raises OSError when there's a problem
        self.clean_empty_dirs(dupe.path[:-1])
    
    def _do_load(self, j):
@ -100,7 +100,7 @@ class DupeGuru(RegistrableApplication, Broadcaster):
        try:
            return self.data.GetDisplayInfo(dupe, group, delta)
        except Exception as e:
-            logging.warning("Exception on GetDisplayInfo for %s: %s", unicode(dupe.path), unicode(e))
+            logging.warning("Exception on GetDisplayInfo for %s: %s", str(dupe.path), str(e))
            return ['---'] * len(self.data.COLUMNS)
    
    def _get_file(self, str_path):
@ -149,7 +149,7 @@ class DupeGuru(RegistrableApplication, Broadcaster):
            g = self.results.get_group_of_duplicate(dupe)
            for other in g:
                if other is not dupe:
-                    self.scanner.ignore_list.Ignore(unicode(other.path), unicode(dupe.path))
+                    self.scanner.ignore_list.Ignore(str(other.path), str(dupe.path))
        self.remove_duplicates(dupes)
    
    def apply_filter(self, filter):
@ -208,7 +208,7 @@ class DupeGuru(RegistrableApplication, Broadcaster):
    
    def export_to_xhtml(self, column_ids):
        column_ids = [colid for colid in column_ids if colid.isdigit()]
-        column_ids = map(int, column_ids)
+        column_ids = list(map(int, column_ids))
        column_ids.sort()
        colnames = [col['display'] for i, col in enumerate(self.data.COLUMNS) if i in column_ids]
        rows = []
@ -232,8 +232,8 @@ class DupeGuru(RegistrableApplication, Broadcaster):
        dupe = self.selected_dupes[0]
        group = self.results.get_group_of_duplicate(dupe)
        ref = group.ref
-        cmd = cmd.replace('%d', unicode(dupe.path))
-        cmd = cmd.replace('%r', unicode(ref.path))
+        cmd = cmd.replace('%d', str(dupe.path))
+        cmd = cmd.replace('%r', str(ref.path))
        match = re.match(r'"([^"]+)"(.*)', cmd)
        if match is not None:
            # This code here is because subprocess. Popen doesn't seem to accept, under Windows,
@ -313,7 +313,7 @@ class DupeGuru(RegistrableApplication, Broadcaster):
            d.rename(newname)
            return True
        except (IndexError, fs.FSError) as e:
-            logging.warning("dupeGuru Warning: %s" % unicode(e))
+            logging.warning("dupeGuru Warning: %s" % str(e))
        return False
    
    def reveal_selected(self):
--- a/core/app_cocoa.py
+++ b/core/app_cocoa.py
@ -49,11 +49,11 @@ class DupeGuru(app.DupeGuru):
    #--- Override
    @staticmethod
    def _open_path(path):
-        NSWorkspace.sharedWorkspace().openFile_(unicode(path))
+        NSWorkspace.sharedWorkspace().openFile_(str(path))
    
    @staticmethod
    def _reveal_path(path):
-        NSWorkspace.sharedWorkspace().selectFile_inFileViewerRootedAtPath_(unicode(path), '')
+        NSWorkspace.sharedWorkspace().selectFile_inFileViewerRootedAtPath_(str(path), '')
    
    def _start_job(self, jobid, func):
        try:
--- a/core/data.py
+++ b/core/data.py
@ -11,7 +11,7 @@ from hsutil.str import format_time, FT_DECIMAL, format_size
 import time

 def format_path(p):
-    return unicode(p[:-1])
+    return str(p[:-1])

 def format_timestamp(t, delta):
    if delta:
@ -38,4 +38,4 @@ def format_dupe_count(c):
    return str(c) if c else '---'

 def cmp_value(value):
-    return value.lower() if isinstance(value, basestring) else value
+    return value.lower() if isinstance(value, str) else value
--- a/core/directories.py
+++ b/core/directories.py
@ -151,11 +151,11 @@ class Directories(object):
            root = etree.Element('directories')
            for root_path in self:
                root_path_node = etree.SubElement(root, 'root_directory')
-                root_path_node.set('path', unicode(root_path))
-            for path, state in self.states.iteritems():
+                root_path_node.set('path', str(root_path))
+            for path, state in self.states.items():
                state_node = etree.SubElement(root, 'state')
-                state_node.set('path', unicode(path))
-                state_node.set('value', unicode(state))
+                state_node.set('path', str(path))
+                state_node.set('value', str(state))
            tree = etree.ElementTree(root)
            tree.write(fp, encoding='utf-8')
    
--- a/core/engine.py
+++ b/core/engine.py
@ -6,7 +6,7 @@
 # which should be included with this package. The terms are also available at 
 # http://www.hardcoded.net/licenses/hs_license

-from __future__ import division
+
 import difflib
 import itertools
 import logging
@ -25,15 +25,15 @@ NO_FIELD_ORDER) = range(3)
 JOB_REFRESH_RATE = 100

 def getwords(s):
-    if isinstance(s, unicode):
+    if isinstance(s, str):
        s = normalize('NFD', s)
    s = multi_replace(s, "-_&+():;\\[]{}.,<>/?~!@#$*", ' ').lower()
    s = ''.join(c for c in s if c in string.ascii_letters + string.digits + string.whitespace)
-    return filter(None, s.split(' ')) # filter() is to remove empty elements
+    return [_f for _f in s.split(' ') if _f] # remove empty elements

 def getfields(s):
    fields = [getwords(field) for field in s.split(' - ')]
-    return filter(None, fields)
+    return [_f for _f in fields if _f]

 def unpack_fields(fields):
    result = []
@ -118,7 +118,7 @@ def build_word_dict(objects, j=job.nulljob):
 def merge_similar_words(word_dict):
    """Take all keys in word_dict that are similar, and merge them together.
    """
-    keys = word_dict.keys()
+    keys = list(word_dict.keys())
    keys.sort(key=len)# we want the shortest word to stay
    while keys:
        key = keys.pop(0)
@ -138,7 +138,7 @@ def reduce_common_words(word_dict, threshold):
    Because if we remove them, we will miss some duplicates!
    """
    uncommon_words = set(word for word, objects in word_dict.items() if len(objects) < threshold)
-    for word, objects in word_dict.items():
+    for word, objects in list(word_dict.items()):
        if len(objects) < threshold:
            continue
        reduced = set()
--- a/core/export.py
+++ b/core/export.py
@ -13,7 +13,7 @@ from tempfile import mkdtemp
 # Yes, this is a very low-tech solution, but at least it doesn't have all these annoying dependency
 # and resource problems.

-MAIN_TEMPLATE = u"""
+MAIN_TEMPLATE = """
 <?xml version="1.0" encoding="utf-8"?>
 <!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.0 Strict//EN' 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'>
 <html xmlns="http://www.w3.org/1999/xhtml">
@ -104,33 +104,33 @@ $rows
 </html>
 """

-COLHEADERS_TEMPLATE = u"<th>{name}</th>"
+COLHEADERS_TEMPLATE = "<th>{name}</th>"

-ROW_TEMPLATE = u"""
+ROW_TEMPLATE = """
 <tr>
    <td class="{indented}">{filename}</td>{cells}
 </tr>
 """

-CELL_TEMPLATE = u"""<td>{value}</td>"""
+CELL_TEMPLATE = """<td>{value}</td>"""

 def export_to_xhtml(colnames, rows):
    # a row is a list of values with the first value being a flag indicating if the row should be indented
    if rows:
        assert len(rows[0]) == len(colnames) + 1 # + 1 is for the "indented" flag
-    colheaders = u''.join(COLHEADERS_TEMPLATE.format(name=name) for name in colnames)
+    colheaders = ''.join(COLHEADERS_TEMPLATE.format(name=name) for name in colnames)
    rendered_rows = []
    for row in rows:
        # [2:] is to remove the indented flag + filename
-        indented = u'indented' if row[0] else u''
+        indented = 'indented' if row[0] else ''
        filename = row[1]
-        cells = u''.join(CELL_TEMPLATE.format(value=value) for value in row[2:])
+        cells = ''.join(CELL_TEMPLATE.format(value=value) for value in row[2:])
        rendered_rows.append(ROW_TEMPLATE.format(indented=indented, filename=filename, cells=cells))
-    rendered_rows = u''.join(rendered_rows)
+    rendered_rows = ''.join(rendered_rows)
    # The main template can't use format because the css code uses {}
    content = MAIN_TEMPLATE.replace('$colheaders', colheaders).replace('$rows', rendered_rows)
    folder = mkdtemp()
-    destpath = op.join(folder, u'export.htm')
+    destpath = op.join(folder, 'export.htm')
    fp = open(destpath, 'w')
    fp.write(content.encode('utf-8'))
    fp.close()
--- a/core/fs.py
+++ b/core/fs.py
@ -12,7 +12,7 @@
 # resulting needless complexity and memory usage. It's been a while since I wanted to do that fork,
 # and I'm doing it now.

-from __future__ import unicode_literals
+

 import hashlib
 import logging
@ -25,13 +25,13 @@ class FSError(Exception):
    cls_message = "An error has occured on '{name}' in '{parent}'"
    def __init__(self, fsobject, parent=None):
        message = self.cls_message
-        if isinstance(fsobject, basestring):
+        if isinstance(fsobject, str):
            name = fsobject
        elif isinstance(fsobject, File):
            name = fsobject.name
        else:
            name = ''
-        parentname = unicode(parent) if parent is not None else ''
+        parentname = str(parent) if parent is not None else ''
        Exception.__init__(self, message.format(name=name, parent=parentname))
    

@ -119,7 +119,7 @@ class File(object):
        If `attrnames` is not None, caches only attrnames.
        """
        if attrnames is None:
-            attrnames = self.INITIAL_INFO.keys()
+            attrnames = list(self.INITIAL_INFO.keys())
        for attrname in attrnames:
            if attrname not in self.__dict__:
                self._read_info(attrname)
--- a/core/gui/details_panel.py
+++ b/core/gui/details_panel.py
@ -32,7 +32,7 @@ class DetailsPanel(GUIObject):
        ref = group.ref if group is not None and group.ref is not dupe else None
        l2 = self.app._get_display_info(ref, group, False)
        names = [c['display'] for c in self.app.data.COLUMNS]
-        self._table = zip(names, l1, l2)
+        self._table = list(zip(names, l1, l2))
    
    #--- Public
    def row_count(self):
--- a/core/gui/directory_tree.py
+++ b/core/gui/directory_tree.py
@ -62,7 +62,7 @@ class DirectoryTree(GUIObject, Tree):
    def _refresh(self):
        self.clear()
        for path in self.app.directories:
-            self.append(DirectoryNode(self.app, path, unicode(path)))
+            self.append(DirectoryNode(self.app, path, str(path)))
    
    def add_directory(self, path):
        self.app.add_directory(path)
--- a/core/gui/problem_table.py
+++ b/core/gui/problem_table.py
@ -39,5 +39,5 @@ class ProblemRow(Row):
        Row.__init__(self, table)
        self.dupe = dupe
        self.msg = msg
-        self.path = unicode(dupe.path)
+        self.path = str(dupe.path)
    
--- a/core/gui/result_tree.py
+++ b/core/gui/result_tree.py
@ -63,7 +63,7 @@ class ResultTree(GUIObject, Tree):
    
    def _select_nodes(self, nodes):
        Tree._select_nodes(self, nodes)
-        self.app._select_dupes(map(attrgetter('_dupe'), nodes))
+        self.app._select_dupes(list(map(attrgetter('_dupe'), nodes)))
    
    #--- Private
    def _refresh(self):
--- a/core/ignore.py
+++ b/core/ignore.py
@ -22,7 +22,7 @@ class IgnoreList(object):
        self._count = 0
    
    def __iter__(self):
-        for first,seconds in self._ignored.iteritems():
+        for first,seconds in self._ignored.items():
            for second in seconds:
                yield (first,second)
    
--- a/core/results.py
+++ b/core/results.py
@ -147,7 +147,7 @@ class Results(Markable):
            self.__filters.append(filter_str)
            if self.__filtered_dupes is None:
                self.__filtered_dupes = flatten(g[:] for g in self.groups)
-            self.__filtered_dupes = set(dupe for dupe in self.__filtered_dupes if filter_re.search(unicode(dupe.path)))
+            self.__filtered_dupes = set(dupe for dupe in self.__filtered_dupes if filter_re.search(str(dupe.path)))
            filtered_groups = set()
            for dupe in self.__filtered_dupes:
                filtered_groups.add(self.get_group_of_duplicate(dupe))
@ -241,7 +241,7 @@ class Results(Markable):
                func(dupe)
                to_remove.append(dupe)
            except EnvironmentError as e:
-                self.problems.append((dupe, unicode(e)))
+                self.problems.append((dupe, str(e)))
        if remove_from_results:
            self.remove_duplicates(to_remove)
            self.mark_none()
@ -285,7 +285,7 @@ class Results(Markable):
                    words = ()
                file_elem = etree.SubElement(group_elem, 'file')
                try:
-                    file_elem.set('path', unicode(d.path))
+                    file_elem.set('path', str(d.path))
                    file_elem.set('words', ','.join(words))
                except ValueError: # If there's an invalid character, just skip the file
                    file_elem.set('path', '')
@ -293,9 +293,9 @@ class Results(Markable):
                file_elem.set('marked', ('y' if self.is_marked(d) else 'n'))
            for match in g.matches:
                match_elem = etree.SubElement(group_elem, 'match')
-                match_elem.set('first', unicode(dupe2index[match.first]))
-                match_elem.set('second', unicode(dupe2index[match.second]))
-                match_elem.set('percentage', unicode(int(match.percentage)))
+                match_elem.set('first', str(dupe2index[match.first]))
+                match_elem.set('second', str(dupe2index[match.second]))
+                match_elem.set('percentage', str(int(match.percentage)))
        tree = etree.ElementTree(root)
        with FileOrPath(outfile, 'wb') as fp:
            tree.write(fp, encoding='utf-8')
--- a/core/scanner.py
+++ b/core/scanner.py
@ -1,109 +1,113 @@
-# Created By: Virgil Dupras
-# Created On: 2006/03/03
-# Copyright 2010 Hardcoded Software (http://www.hardcoded.net)
-# 
-# This software is licensed under the "HS" License as described in the "LICENSE" file, 
-# which should be included with this package. The terms are also available at 
-# http://www.hardcoded.net/licenses/hs_license
-
-import logging
-
-
-from hscommon import job
-from hsutil import io
-from hsutil.misc import dedupe
-from hsutil.str import get_file_ext, rem_file_ext
-
-from . import engine
-from .ignore import IgnoreList
-
-(SCAN_TYPE_FILENAME,
-SCAN_TYPE_FIELDS,
-SCAN_TYPE_FIELDS_NO_ORDER,
-SCAN_TYPE_TAG,
-UNUSED, # Must not be removed. Constants here are what scan_type in the prefs are.
-SCAN_TYPE_CONTENT,
-SCAN_TYPE_CONTENT_AUDIO) = range(7)
-
-SCANNABLE_TAGS = ['track', 'artist', 'album', 'title', 'genre', 'year']
-
-class Scanner(object):
-    def __init__(self):
-        self.ignore_list = IgnoreList()
-        self.discarded_file_count = 0
-    
-    def _getmatches(self, files, j):
-        if self.size_threshold:
-            j = j.start_subjob([2, 8])
-            for f in j.iter_with_progress(files, 'Read size of %d/%d files'):
-                f.size # pre-read, makes a smoother progress if read here (especially for bundles)
-            files = [f for f in files if f.size >= self.size_threshold]
-        if self.scan_type in (SCAN_TYPE_CONTENT, SCAN_TYPE_CONTENT_AUDIO):
-            sizeattr = 'size' if self.scan_type == SCAN_TYPE_CONTENT else 'audiosize'
-            return engine.getmatches_by_contents(files, sizeattr, partial=self.scan_type==SCAN_TYPE_CONTENT_AUDIO, j=j)
-        else:
-            j = j.start_subjob([2, 8])
-            kw = {}
-            kw['match_similar_words'] = self.match_similar_words
-            kw['weight_words'] = self.word_weighting
-            kw['min_match_percentage'] = self.min_match_percentage
-            if self.scan_type == SCAN_TYPE_FIELDS_NO_ORDER:
-                self.scan_type = SCAN_TYPE_FIELDS
-                kw['no_field_order'] = True
-            func = {
-                SCAN_TYPE_FILENAME: lambda f: engine.getwords(rem_file_ext(f.name)),
-                SCAN_TYPE_FIELDS: lambda f: engine.getfields(rem_file_ext(f.name)),
-                SCAN_TYPE_TAG: lambda f: [engine.getwords(unicode(getattr(f, attrname))) for attrname in SCANNABLE_TAGS if attrname in self.scanned_tags],
-            }[self.scan_type]
-            for f in j.iter_with_progress(files, 'Read metadata of %d/%d files'):
-                f.words = func(f)
-            return engine.getmatches(files, j=j, **kw)
-    
-    @staticmethod
-    def _key_func(dupe):
-        return (not dupe.is_ref, -dupe.size)
-    
-    @staticmethod
-    def _tie_breaker(ref, dupe):
-        refname = rem_file_ext(ref.name).lower()
-        dupename = rem_file_ext(dupe.name).lower()
-        if 'copy' in refname and 'copy' not in dupename:
-            return True
-        if refname.startswith(dupename) and (refname[len(dupename):].strip().isdigit()):
-            return True
-        return len(dupe.path) > len(ref.path)
-    
-    def GetDupeGroups(self, files, j=job.nulljob):
-        j = j.start_subjob([8, 2])
-        for f in [f for f in files if not hasattr(f, 'is_ref')]:
-            f.is_ref = False
-        logging.info('Getting matches')
-        matches = self._getmatches(files, j)
-        logging.info('Found %d matches' % len(matches))
-        j.set_progress(100, 'Removing false matches')
-        if not self.mix_file_kind:
-            matches = [m for m in matches if get_file_ext(m.first.name) == get_file_ext(m.second.name)]
-        matches = [m for m in matches if io.exists(m.first.path) and io.exists(m.second.path)]
-        if self.ignore_list:
-            j = j.start_subjob(2)
-            iter_matches = j.iter_with_progress(matches, 'Processed %d/%d matches against the ignore list')
-            matches = [m for m in iter_matches 
-                if not self.ignore_list.AreIgnored(unicode(m.first.path), unicode(m.second.path))]
-        logging.info('Grouping matches')
-        groups = engine.get_groups(matches, j)
-        matched_files = dedupe([m.first for m in matches] + [m.second for m in matches])
-        self.discarded_file_count = len(matched_files) - sum(len(g) for g in groups)
-        groups = [g for g in groups if any(not f.is_ref for f in g)]
-        logging.info('Created %d groups' % len(groups))
-        j.set_progress(100, 'Doing group prioritization')
-        for g in groups:
-            g.prioritize(self._key_func, self._tie_breaker)
-        return groups
-    
-    match_similar_words  = False
-    min_match_percentage = 80
-    mix_file_kind        = True
-    scan_type            = SCAN_TYPE_FILENAME
-    scanned_tags         = set(['artist', 'title'])
-    size_threshold       = 0
-    word_weighting       = False
+# Created By: Virgil Dupras
+# Created On: 2006/03/03
+# Copyright 2010 Hardcoded Software (http://www.hardcoded.net)
+# 
+# This software is licensed under the "HS" License as described in the "LICENSE" file, 
+# which should be included with this package. The terms are also available at 
+# http://www.hardcoded.net/licenses/hs_license
+
+import logging
+
+
+from hscommon import job
+from hsutil import io
+from hsutil.misc import dedupe
+from hsutil.str import get_file_ext, rem_file_ext
+
+from . import engine
+from .ignore import IgnoreList
+
+(SCAN_TYPE_FILENAME,
+SCAN_TYPE_FIELDS,
+SCAN_TYPE_FIELDS_NO_ORDER,
+SCAN_TYPE_TAG,
+UNUSED, # Must not be removed. Constants here are what scan_type in the prefs are.
+SCAN_TYPE_CONTENT,
+SCAN_TYPE_CONTENT_AUDIO) = range(7)
+
+SCANNABLE_TAGS = ['track', 'artist', 'album', 'title', 'genre', 'year']
+
+class Scanner(object):
+    def __init__(self):
+        self.ignore_list = IgnoreList()
+        self.discarded_file_count = 0
+    
+    def _getmatches(self, files, j):
+        if self.size_threshold:
+            j = j.start_subjob([2, 8])
+            for f in j.iter_with_progress(files, 'Read size of %d/%d files'):
+                f.size # pre-read, makes a smoother progress if read here (especially for bundles)
+            files = [f for f in files if f.size >= self.size_threshold]
+        if self.scan_type in (SCAN_TYPE_CONTENT, SCAN_TYPE_CONTENT_AUDIO):
+            sizeattr = 'size' if self.scan_type == SCAN_TYPE_CONTENT else 'audiosize'
+            return engine.getmatches_by_contents(files, sizeattr, partial=self.scan_type==SCAN_TYPE_CONTENT_AUDIO, j=j)
+        else:
+            j = j.start_subjob([2, 8])
+            kw = {}
+            kw['match_similar_words'] = self.match_similar_words
+            kw['weight_words'] = self.word_weighting
+            kw['min_match_percentage'] = self.min_match_percentage
+            if self.scan_type == SCAN_TYPE_FIELDS_NO_ORDER:
+                self.scan_type = SCAN_TYPE_FIELDS
+                kw['no_field_order'] = True
+            func = {
+                SCAN_TYPE_FILENAME: lambda f: engine.getwords(rem_file_ext(f.name)),
+                SCAN_TYPE_FIELDS: lambda f: engine.getfields(rem_file_ext(f.name)),
+                SCAN_TYPE_TAG: lambda f: [engine.getwords(str(getattr(f, attrname))) for attrname in SCANNABLE_TAGS if attrname in self.scanned_tags],
+            }[self.scan_type]
+            for f in j.iter_with_progress(files, 'Read metadata of %d/%d files'):
+                f.words = func(f)
+            return engine.getmatches(files, j=j, **kw)
+    
+    @staticmethod
+    def _key_func(dupe):
+        return (not dupe.is_ref, -dupe.size)
+    
+    @staticmethod
+    def _tie_breaker(ref, dupe):
+        refname = rem_file_ext(ref.name).lower()
+        dupename = rem_file_ext(dupe.name).lower()
+        if 'copy' in dupename:
+            return False
+        if 'copy' in refname:
+            return True
+        if dupename.startswith(refname) and (dupename[len(refname):].strip().isdigit()):
+            return False
+        if refname.startswith(dupename) and (refname[len(dupename):].strip().isdigit()):
+            return True
+        return len(dupe.path) > len(ref.path)
+    
+    def GetDupeGroups(self, files, j=job.nulljob):
+        j = j.start_subjob([8, 2])
+        for f in [f for f in files if not hasattr(f, 'is_ref')]:
+            f.is_ref = False
+        logging.info('Getting matches')
+        matches = self._getmatches(files, j)
+        logging.info('Found %d matches' % len(matches))
+        j.set_progress(100, 'Removing false matches')
+        if not self.mix_file_kind:
+            matches = [m for m in matches if get_file_ext(m.first.name) == get_file_ext(m.second.name)]
+        matches = [m for m in matches if io.exists(m.first.path) and io.exists(m.second.path)]
+        if self.ignore_list:
+            j = j.start_subjob(2)
+            iter_matches = j.iter_with_progress(matches, 'Processed %d/%d matches against the ignore list')
+            matches = [m for m in iter_matches 
+                if not self.ignore_list.AreIgnored(str(m.first.path), str(m.second.path))]
+        logging.info('Grouping matches')
+        groups = engine.get_groups(matches, j)
+        matched_files = dedupe([m.first for m in matches] + [m.second for m in matches])
+        self.discarded_file_count = len(matched_files) - sum(len(g) for g in groups)
+        groups = [g for g in groups if any(not f.is_ref for f in g)]
+        logging.info('Created %d groups' % len(groups))
+        j.set_progress(100, 'Doing group prioritization')
+        for g in groups:
+            g.prioritize(self._key_func, self._tie_breaker)
+        return groups
+    
+    match_similar_words  = False
+    min_match_percentage = 80
+    mix_file_kind        = True
+    scan_type            = SCAN_TYPE_FILENAME
+    scanned_tags         = set(['artist', 'title'])
+    size_threshold       = 0
+    word_weighting       = False
--- a/core/tests/app_test.py
+++ b/core/tests/app_test.py
@ -109,7 +109,7 @@ class TCDupeGuru(TestCase):
    
    def test_Scan_with_objects_evaluating_to_false(self):
        class FakeFile(fs.File):
-            def __nonzero__(self):
+            def __bool__(self):
                return False
            
        
@ -200,11 +200,11 @@ class TCDupeGuruWithResults(TestCase):
        if expected is not None:
            expected = set(expected)
            not_called = expected - calls
-            assert not not_called, u"These calls haven't been made: {0}".format(not_called)
+            assert not not_called, "These calls haven't been made: {0}".format(not_called)
        if not_expected is not None:
            not_expected = set(not_expected)
            called = not_expected & calls
-            assert not called, u"These calls shouldn't have been made: {0}".format(called)
+            assert not called, "These calls shouldn't have been made: {0}".format(called)
        gui.clear_calls()
    
    def clear_gui_calls(self):
@ -409,9 +409,9 @@ class TCDupeGuruWithResults(TestCase):
    
    def test_only_unicode_is_added_to_ignore_list(self):
        def FakeIgnore(first,second):
-            if not isinstance(first,unicode):
+            if not isinstance(first,str):
                self.fail()
-            if not isinstance(second,unicode):
+            if not isinstance(second,str):
                self.fail()
        
        app = self.app
@ -423,11 +423,11 @@ class TCDupeGuruWithResults(TestCase):
 class TCDupeGuru_renameSelected(TestCase):
    def setUp(self):
        p = self.tmppath()
-        fp = open(unicode(p + 'foo bar 1'),mode='w')
+        fp = open(str(p + 'foo bar 1'),mode='w')
        fp.close()
-        fp = open(unicode(p + 'foo bar 2'),mode='w')
+        fp = open(str(p + 'foo bar 2'),mode='w')
        fp.close()
-        fp = open(unicode(p + 'foo bar 3'),mode='w')
+        fp = open(str(p + 'foo bar 3'),mode='w')
        fp.close()
        files = fs.get_files(p)
        matches = engine.getmatches(files)
--- a/core/tests/directories_test.py
+++ b/core/tests/directories_test.py
@ -82,8 +82,8 @@ class TCDirectories(TestCase):
    
    def test_AddPath_non_latin(self):
    	p = Path(self.tmpdir())
-    	to_add = p + u'unicode\u201a'
-    	os.mkdir(unicode(to_add))
+    	to_add = p + 'unicode\u201a'
+    	os.mkdir(str(to_add))
    	d = Directories()
    	try:
    		d.add_path(to_add)
@ -111,7 +111,7 @@ class TCDirectories(TestCase):
        self.assertEqual(STATE_REFERENCE,d.get_state(p))
        self.assertEqual(STATE_REFERENCE,d.get_state(p + 'dir1'))
        self.assertEqual(1,len(d.states))
-        self.assertEqual(p,d.states.keys()[0])
+        self.assertEqual(p,list(d.states.keys())[0])
        self.assertEqual(STATE_REFERENCE,d.states[p])
    
    def test_get_state_with_path_not_there(self):
@ -213,11 +213,11 @@ class TCDirectories(TestCase):
    
    def test_unicode_save(self):
        d = Directories()
-        p1 = self.tmppath() + u'hello\xe9'
+        p1 = self.tmppath() + 'hello\xe9'
        io.mkdir(p1)
-        io.mkdir(p1 + u'foo\xe9')
+        io.mkdir(p1 + 'foo\xe9')
        d.add_path(p1)
-        d.set_state(p1 + u'foo\xe9', STATE_EXCLUDED)
+        d.set_state(p1 + 'foo\xe9', STATE_EXCLUDED)
        tmpxml = op.join(self.tmpdir(), 'directories_testunit.xml')
        try:
            d.save_to_file(tmpxml)
--- a/core/tests/engine_test.py
+++ b/core/tests/engine_test.py
@ -62,12 +62,12 @@ class TCgetwords(TestCase):
    
    def test_splitter_chars(self):
        self.assertEqual(
-            [chr(i) for i in xrange(ord('a'),ord('z')+1)],
+            [chr(i) for i in range(ord('a'),ord('z')+1)],
            getwords("a-b_c&d+e(f)g;h\\i[j]k{l}m:n.o,p<q>r/s?t~u!v@w#x$y*z")
        )
    
    def test_joiner_chars(self):
-        self.assertEqual(["aec"], getwords(u"a'e\u0301c"))
+        self.assertEqual(["aec"], getwords("a'e\u0301c"))
    
    def test_empty(self):
        self.assertEqual([], getwords(''))
@ -76,7 +76,7 @@ class TCgetwords(TestCase):
        self.assertEqual(['foo', 'bar'], getwords('FOO BAR'))
    
    def test_decompose_unicode(self):
-        self.assertEqual(getwords(u'foo\xe9bar'), ['fooebar'])
+        self.assertEqual(getwords('foo\xe9bar'), ['fooebar'])
    

 class TCgetfields(TestCase):
@ -768,7 +768,7 @@ class TCget_groups(TestCase):
        self.assert_(o3 in g)
    
    def test_four_sized_group(self):
-        l = [NamedObject("foobar") for i in xrange(4)]
+        l = [NamedObject("foobar") for i in range(4)]
        m = getmatches(l)
        r = get_groups(m)
        self.assertEqual(1,len(r))
--- a/core/tests/ignore_test.py
+++ b/core/tests/ignore_test.py
@ -6,7 +6,7 @@
 # which should be included with this package. The terms are also available at 
 # http://www.hardcoded.net/licenses/hs_license

-import cStringIO
+import io
 from lxml import etree

 from hsutil.testutil import eq_
@ -59,7 +59,7 @@ def test_save_to_xml():
    il.Ignore('foo','bar')
    il.Ignore('foo','bleh')
    il.Ignore('bleh','bar')
-    f = cStringIO.StringIO()
+    f = io.BytesIO()
    il.save_to_xml(f)
    f.seek(0)
    doc = etree.parse(f)
@ -76,19 +76,19 @@ def test_SaveThenLoad():
    il.Ignore('foo', 'bar')
    il.Ignore('foo', 'bleh')
    il.Ignore('bleh', 'bar')
-    il.Ignore(u'\u00e9', 'bar')
-    f = cStringIO.StringIO()
+    il.Ignore('\u00e9', 'bar')
+    f = io.BytesIO()
    il.save_to_xml(f)
    f.seek(0)
    f.seek(0)
    il = IgnoreList()
    il.load_from_xml(f)
    eq_(4,len(il))
-    assert il.AreIgnored(u'\u00e9','bar')
+    assert il.AreIgnored('\u00e9','bar')
    
 def test_LoadXML_with_empty_file_tags():
-    f = cStringIO.StringIO()
-    f.write('<?xml version="1.0" encoding="utf-8"?><ignore_list><file><file/></file></ignore_list>')
+    f = io.BytesIO()
+    f.write(b'<?xml version="1.0" encoding="utf-8"?><ignore_list><file><file/></file></ignore_list>')
    f.seek(0)
    il = IgnoreList()
    il.load_from_xml(f)
@ -130,12 +130,12 @@ def test_filter():

 def test_save_with_non_ascii_items():
    il = IgnoreList()
-    il.Ignore(u'\xac', u'\xbf')
-    f = cStringIO.StringIO()
+    il.Ignore('\xac', '\xbf')
+    f = io.BytesIO()
    try:
        il.save_to_xml(f)
    except Exception as e:
-        raise AssertionError(unicode(e))
+        raise AssertionError(str(e))

 def test_len():
    il = IgnoreList()
--- a/core/tests/results_test.py
+++ b/core/tests/results_test.py
@ -7,7 +7,7 @@
 # which should be included with this package. The terms are also available at 
 # http://www.hardcoded.net/licenses/hs_license

-import StringIO
+import io
 import os.path as op

 from lxml import etree
@ -25,7 +25,7 @@ class NamedObject(engine_test.NamedObject):
    path = property(lambda x:Path('basepath') + x.name)
    is_ref = False
    
-    def __nonzero__(self):
+    def __bool__(self):
        return False #Make sure that operations are made correctly when the bool value of files is false.

 # Returns a group set that looks like that:
@ -63,7 +63,7 @@ class TCResultsEmpty(TestCase):
        self.assert_(self.results.get_group_of_duplicate('foo') is None)
    
    def test_save_to_xml(self):
-        f = StringIO.StringIO()
+        f = io.BytesIO()
        self.results.save_to_xml(f)
        f.seek(0)
        doc = etree.parse(f)
@ -324,7 +324,7 @@ class TCResultsMarkings(TestCase):
    def test_SaveXML(self):
        self.results.mark(self.objects[1])
        self.results.mark_invert()
-        f = StringIO.StringIO()
+        f = io.BytesIO()
        self.results.save_to_xml(f)
        f.seek(0)
        doc = etree.parse(f)
@ -345,7 +345,7 @@ class TCResultsMarkings(TestCase):
        self.objects[4].name = 'ibabtu 2' #we can't have 2 files with the same path
        self.results.mark(self.objects[1])
        self.results.mark_invert()
-        f = StringIO.StringIO()
+        f = io.BytesIO()
        self.results.save_to_xml(f)
        f.seek(0)
        r = Results(data)
@ -369,7 +369,7 @@ class TCResultsXML(TestCase):
    def test_save_to_xml(self):
        self.objects[0].is_ref = True
        self.objects[0].words = [['foo','bar']]
-        f = StringIO.StringIO()
+        f = io.BytesIO()
        self.results.save_to_xml(f)
        f.seek(0)
        doc = etree.parse(f)
@ -408,7 +408,7 @@ class TCResultsXML(TestCase):
        
        self.objects[0].is_ref = True
        self.objects[4].name = 'ibabtu 2' #we can't have 2 files with the same path
-        f = StringIO.StringIO()
+        f = io.BytesIO()
        self.results.save_to_xml(f)
        f.seek(0)
        r = Results(data)
@ -451,7 +451,7 @@ class TCResultsXML(TestCase):
            return [f for f in self.objects if str(f.path) == path][0]
        
        self.objects[4].name = 'ibabtu 2' #we can't have 2 files with the same path
-        f = StringIO.StringIO()
+        f = io.BytesIO()
        self.results.save_to_xml(f)
        f.seek(0)
        r = Results(data)
@ -490,7 +490,7 @@ class TCResultsXML(TestCase):
        match_node.set('percentage', 'baz')
        group_node = etree.SubElement(root, 'foobar') #invalid group
        group_node = etree.SubElement(root, 'group') #empty group
-        f = StringIO.StringIO()
+        f = io.BytesIO()
        tree = etree.ElementTree(root)
        tree.write(f, encoding='utf-8')
        f.seek(0)
@ -501,30 +501,30 @@ class TCResultsXML(TestCase):
    
    def test_xml_non_ascii(self):
        def get_file(path):
-            if path == op.join('basepath',u'\xe9foo bar'):
+            if path == op.join('basepath','\xe9foo bar'):
                return objects[0]
-            if path == op.join('basepath',u'bar bleh'):
+            if path == op.join('basepath','bar bleh'):
                return objects[1]
        
-        objects = [NamedObject(u"\xe9foo bar",True),NamedObject("bar bleh",True)]
+        objects = [NamedObject("\xe9foo bar",True),NamedObject("bar bleh",True)]
        matches = engine.getmatches(objects) #we should have 5 matches
        groups = engine.get_groups(matches) #We should have 2 groups
        for g in groups:
            g.prioritize(lambda x:objects.index(x)) #We want the dupes to be in the same order as the list is
        results = Results(data)
        results.groups = groups
-        f = StringIO.StringIO()
+        f = io.BytesIO()
        results.save_to_xml(f)
        f.seek(0)
        r = Results(data)
        r.load_from_xml(f,get_file)
        g = r.groups[0]
-        self.assertEqual(u"\xe9foo bar",g[0].name)
+        self.assertEqual("\xe9foo bar",g[0].name)
        self.assertEqual(['efoo','bar'],g[0].words)
    
    def test_load_invalid_xml(self):
-        f = StringIO.StringIO()
-        f.write('<this is invalid')
+        f = io.BytesIO()
+        f.write(b'<this is invalid')
        f.seek(0)
        r = Results(data)
        r.load_from_xml(f,None)
@ -546,7 +546,7 @@ class TCResultsXML(TestCase):
        fake_matches.add(engine.Match(d1, d3, 43))
        fake_matches.add(engine.Match(d2, d3, 46))
        group.matches = fake_matches
-        f = StringIO.StringIO()
+        f = io.BytesIO()
        results = self.results
        results.save_to_xml(f)
        f.seek(0)
@ -564,7 +564,7 @@ class TCResultsXML(TestCase):
    
    def test_save_and_load(self):
        # previously, when reloading matches, they wouldn't be reloaded as namedtuples
-        f = StringIO.StringIO()
+        f = io.BytesIO()
        self.results.save_to_xml(f)
        f.seek(0)
        self.results.load_from_xml(f, self.get_file)
@ -572,13 +572,13 @@ class TCResultsXML(TestCase):
    
    def test_apply_filter_works_on_paths(self):
        # apply_filter() searches on the whole path, not just on the filename.
-        self.results.apply_filter(u'basepath')
+        self.results.apply_filter('basepath')
        eq_(len(self.results.groups), 2)
    
    def test_save_xml_with_invalid_characters(self):
        # Don't crash when saving files that have invalid xml characters in their path
-        self.objects[0].name = u'foo\x19'
-        self.results.save_to_xml(StringIO.StringIO()) # don't crash
+        self.objects[0].name = 'foo\x19'
+        self.results.save_to_xml(io.BytesIO()) # don't crash
    

 class TCResultsFilter(TestCase):
--- a/core/tests/scanner_test.py
+++ b/core/tests/scanner_test.py
@ -25,6 +25,9 @@ class NamedObject(object):
        self.path = Path('')
        self.words = getwords(name)
    
+    def __repr__(self):
+        return '<NamedObject %r>' % self.name
+    

 no = NamedObject

@ -297,8 +300,8 @@ class ScannerTestFakeFiles(TestCase):
        s.scanned_tags = set(['title'])
        o1 = no('foo')
        o2 = no('bar')
-        o1.title = u'foobar\u00e9'
-        o2.title = u'foobar\u00e9'
+        o1.title = 'foobar\u00e9'
+        o2.title = 'foobar\u00e9'
        try:
            r = s.GetDupeGroups([o1, o2])
        except UnicodeEncodeError:
@ -362,11 +365,11 @@ class ScannerTestFakeFiles(TestCase):
        f1 = no('foobar')
        f2 = no('foobar')
        f3 = no('foobar')
-        f1.path = Path(u'foo1\u00e9')
-        f2.path = Path(u'foo2\u00e9')
-        f3.path = Path(u'foo3\u00e9')
-        s.ignore_list.Ignore(unicode(f1.path),unicode(f2.path))
-        s.ignore_list.Ignore(unicode(f1.path),unicode(f3.path))
+        f1.path = Path('foo1\u00e9')
+        f2.path = Path('foo2\u00e9')
+        f3.path = Path('foo3\u00e9')
+        s.ignore_list.Ignore(str(f1.path),str(f2.path))
+        s.ignore_list.Ignore(str(f1.path),str(f3.path))
        r = s.GetDupeGroups([f1,f2,f3])
        eq_(len(r), 1)
        g = r[0]
@ -379,7 +382,7 @@ class ScannerTestFakeFiles(TestCase):
        # A very wrong way to use any() was added at some point, causing resulting group list
        # to be empty.
        class FalseNamedObject(NamedObject):
-            def __nonzero__(self):
+            def __bool__(self):
                return False
        
    
--- a/core_me/app_cocoa.py
+++ b/core_me/app_cocoa.py
@ -41,7 +41,7 @@ class DupeGuruME(DupeGuruBase):
                try:
                    track.delete(timeout=0)
                except CommandError as e:
-                    logging.warning('Error while trying to remove a track from iTunes: %s' % unicode(e))
+                    logging.warning('Error while trying to remove a track from iTunes: %s' % str(e))
        
        self._start_job(JOB_REMOVE_DEAD_TRACKS, do)
    
--- a/core_me/fs.py
+++ b/core_me/fs.py
@ -42,12 +42,12 @@ class Mp3File(MusicFile):
    HANDLED_EXTS = set(['mp3'])
    def _read_info(self, field):
        if field == 'md5partial':
-            fileinfo = mpeg.Mpeg(unicode(self.path))
+            fileinfo = mpeg.Mpeg(str(self.path))
            self._md5partial_offset = fileinfo.audio_offset
            self._md5partial_size = fileinfo.audio_size
        MusicFile._read_info(self, field)
        if field in TAG_FIELDS:
-            fileinfo = mpeg.Mpeg(unicode(self.path))
+            fileinfo = mpeg.Mpeg(str(self.path))
            self.audiosize = fileinfo.audio_size
            self.bitrate = fileinfo.bitrate
            self.duration = fileinfo.duration
@ -70,12 +70,12 @@ class WmaFile(MusicFile):
    HANDLED_EXTS = set(['wma'])
    def _read_info(self, field):
        if field == 'md5partial':
-            dec = wma.WMADecoder(unicode(self.path))
+            dec = wma.WMADecoder(str(self.path))
            self._md5partial_offset = dec.audio_offset
            self._md5partial_size = dec.audio_size
        MusicFile._read_info(self, field)
        if field in TAG_FIELDS:
-            dec = wma.WMADecoder(unicode(self.path))
+            dec = wma.WMADecoder(str(self.path))
            self.audiosize = dec.audio_size
            self.bitrate = dec.bitrate
            self.duration = dec.duration
@ -92,13 +92,13 @@ class Mp4File(MusicFile):
    HANDLED_EXTS = set(['m4a', 'm4p'])
    def _read_info(self, field):
        if field == 'md5partial':
-            dec = mp4.File(unicode(self.path))
+            dec = mp4.File(str(self.path))
            self._md5partial_offset = dec.audio_offset
            self._md5partial_size = dec.audio_size
            dec.close()
        MusicFile._read_info(self, field)
        if field in TAG_FIELDS:
-            dec = mp4.File(unicode(self.path))
+            dec = mp4.File(str(self.path))
            self.audiosize = dec.audio_size
            self.bitrate = dec.bitrate
            self.duration = dec.duration
@ -116,12 +116,12 @@ class OggFile(MusicFile):
    HANDLED_EXTS = set(['ogg'])
    def _read_info(self, field):
        if field == 'md5partial':
-            dec = ogg.Vorbis(unicode(self.path))
+            dec = ogg.Vorbis(str(self.path))
            self._md5partial_offset = dec.audio_offset
            self._md5partial_size = dec.audio_size
        MusicFile._read_info(self, field)
        if field in TAG_FIELDS:
-            dec = ogg.Vorbis(unicode(self.path))
+            dec = ogg.Vorbis(str(self.path))
            self.audiosize = dec.audio_size
            self.bitrate = dec.bitrate
            self.duration = dec.duration
@ -138,12 +138,12 @@ class FlacFile(MusicFile):
    HANDLED_EXTS = set(['flac'])
    def _read_info(self, field):
        if field == 'md5partial':
-            dec = flac.FLAC(unicode(self.path))
+            dec = flac.FLAC(str(self.path))
            self._md5partial_offset = dec.audio_offset
            self._md5partial_size = dec.audio_size
        MusicFile._read_info(self, field)
        if field in TAG_FIELDS:
-            dec = flac.FLAC(unicode(self.path))
+            dec = flac.FLAC(str(self.path))
            self.audiosize = dec.audio_size
            self.bitrate = dec.bitrate
            self.duration = dec.duration
@ -160,12 +160,12 @@ class AiffFile(MusicFile):
    HANDLED_EXTS = set(['aif', 'aiff', 'aifc'])
    def _read_info(self, field):
        if field == 'md5partial':
-            dec = aiff.File(unicode(self.path))
+            dec = aiff.File(str(self.path))
            self._md5partial_offset = dec.audio_offset
            self._md5partial_size = dec.audio_size
        MusicFile._read_info(self, field)
        if field in TAG_FIELDS:
-            dec = aiff.File(unicode(self.path))
+            dec = aiff.File(str(self.path))
            self.audiosize = dec.audio_size
            self.bitrate = dec.bitrate
            self.duration = dec.duration
--- a/core_pe/app_cocoa.py
+++ b/core_pe/app_cocoa.py
@ -37,15 +37,15 @@ class Photo(fs.File):
    def _read_info(self, field):
        fs.File._read_info(self, field)
        if field == 'dimensions':
-            self.dimensions = _block_osx.get_image_size(unicode(self.path))
+            self.dimensions = _block_osx.get_image_size(str(self.path))
    
    def get_blocks(self, block_count_per_side):
        try:
-            blocks = _block_osx.getblocks(unicode(self.path), block_count_per_side)
+            blocks = _block_osx.getblocks(str(self.path), block_count_per_side)
        except Exception as e:
-            raise IOError('The reading of "%s" failed with "%s"' % (unicode(self.path), unicode(e)))
+            raise IOError('The reading of "%s" failed with "%s"' % (str(self.path), str(e)))
        if not blocks:
-            raise IOError('The picture %s could not be read' % unicode(self.path))
+            raise IOError('The picture %s could not be read' % str(self.path))
        return blocks
    

@ -140,7 +140,7 @@ class DupeGuruPE(app_cocoa.DupeGuru):
                photos = as_fetch(a.photo_library_album().photos, k.item)
                for photo in j.iter_with_progress(photos):
                    try:
-                        self.path2iphoto[unicode(photo.image_path(timeout=0))] = photo
+                        self.path2iphoto[str(photo.image_path(timeout=0))] = photo
                    except CommandError:
                        pass
            except (CommandError, RuntimeError):
@ -151,15 +151,15 @@ class DupeGuruPE(app_cocoa.DupeGuru):
    
    def _do_delete_dupe(self, dupe):
        if isinstance(dupe, IPhoto):
-            if unicode(dupe.path) in self.path2iphoto:
-                photo = self.path2iphoto[unicode(dupe.path)]
+            if str(dupe.path) in self.path2iphoto:
+                photo = self.path2iphoto[str(dupe.path)]
                try:
                    a = app('iPhoto')
                    a.remove(photo, timeout=0)
                except (CommandError, RuntimeError) as e:
-                    raise EnvironmentError(unicode(e))
+                    raise EnvironmentError(str(e))
            else:
-                msg = u"Could not find photo %s in iPhoto Library" % unicode(dupe.path)
+                msg = "Could not find photo %s in iPhoto Library" % str(dupe.path)
                raise EnvironmentError(msg)
        else:
            app_cocoa.DupeGuru._do_delete_dupe(self, dupe)
--- a/core_pe/block.py
+++ b/core_pe/block.py
@ -6,7 +6,7 @@
 # which should be included with this package. The terms are also available at 
 # http://www.hardcoded.net/licenses/hs_license

-from _block import NoBlocksError, DifferentBlockCountError, avgdiff, getblocks2
+from ._block import NoBlocksError, DifferentBlockCountError, avgdiff, getblocks2

 # Converted to C
 # def getblock(image):
--- a/core_pe/cache.py
+++ b/core_pe/cache.py
@ -82,7 +82,7 @@ class Cache(object):
            self.con.execute(sql, [value, key])
        except sqlite.OperationalError:
            logging.warning('Picture cache could not set %r for key %r', value, key)
-        except sqlite.DatabaseError, e:
+        except sqlite.DatabaseError as e:
            logging.warning('DatabaseError while setting %r for key %r: %s', value, key, str(e))
    
    def _create_con(self, second_try=False):
@ -97,7 +97,7 @@ class Cache(object):
            self.con.execute("select * from pictures where 1=2")
        except sqlite.OperationalError: # new db
            create_tables()
-        except sqlite.DatabaseError, e: # corrupted db
+        except sqlite.DatabaseError as e: # corrupted db
            if second_try:
                raise # Something really strange is happening
            logging.warning('Could not create picture cache because of an error: %s', str(e))
--- a/core_pe/gen.py
+++ b/core_pe/gen.py
@ -15,11 +15,11 @@ def move(src, dst):
        return
    if op.exists(dst):
        os.remove(dst)
-    print 'Moving %s --> %s' % (src, dst)
+    print('Moving %s --> %s' % (src, dst))
    os.rename(src, dst)

 os.chdir('modules')
-os.system('python setup.py build_ext --inplace')
+os.system('python3 setup.py build_ext --inplace')
 os.chdir('..')
 move(op.join('modules', '_block.so'), '_block.so')
 move(op.join('modules', '_block.pyd'), '_block.pyd')
--- a/core_pe/matchbase.py
+++ b/core_pe/matchbase.py
@ -34,16 +34,16 @@ def prepare_pictures(pictures, cache_path, j=job.nulljob):
    try:
        for picture in j.iter_with_progress(pictures, 'Analyzed %d/%d pictures'):
            picture.dimensions
-            picture.unicode_path = unicode(picture.path)
+            picture.unicode_path = str(picture.path)
            try:
                if picture.unicode_path not in cache:
                    blocks = picture.get_blocks(BLOCK_COUNT_PER_SIDE)
                    cache[picture.unicode_path] = blocks
                prepared.append(picture)
            except (IOError, ValueError) as e:
-                logging.warning(unicode(e))
+                logging.warning(str(e))
            except MemoryError:
-                logging.warning(u'Ran out of memory while reading %s of size %d' % (picture.unicode_path, picture.size))
+                logging.warning('Ran out of memory while reading %s of size %d' % (picture.unicode_path, picture.size))
                if picture.size < 10 * 1024 * 1024: # We're really running out of memory
                    raise
    except MemoryError:
--- a/core_pe/modules/block.c
+++ b/core_pe/modules/block.c
@ -39,9 +39,9 @@ static PyObject* getblock(PyObject *image)
        pg = PySequence_ITEM(ppixel, 1);
        pb = PySequence_ITEM(ppixel, 2);
        Py_DECREF(ppixel);
-        r = PyInt_AsSsize_t(pr);
-        g = PyInt_AsSsize_t(pg);
-        b = PyInt_AsSsize_t(pb);
+        r = PyLong_AsLong(pr);
+        g = PyLong_AsLong(pg);
+        b = PyLong_AsLong(pb);
        Py_DECREF(pr);
        Py_DECREF(pg);
        Py_DECREF(pb);
@ -67,14 +67,14 @@ static PyObject* getblock(PyObject *image)
 */
 static int diff(PyObject *first, PyObject *second)
 {
-    Py_ssize_t r1, g1, b1, r2, b2, g2;
+    int r1, g1, b1, r2, b2, g2;
    PyObject *pr, *pg, *pb;
    pr = PySequence_ITEM(first, 0);
    pg = PySequence_ITEM(first, 1);
    pb = PySequence_ITEM(first, 2);
-    r1 = PyInt_AsSsize_t(pr);
-    g1 = PyInt_AsSsize_t(pg);
-    b1 = PyInt_AsSsize_t(pb);
+    r1 = PyLong_AsLong(pr);
+    g1 = PyLong_AsLong(pg);
+    b1 = PyLong_AsLong(pb);
    Py_DECREF(pr);
    Py_DECREF(pg);
    Py_DECREF(pb);
@ -82,9 +82,9 @@ static int diff(PyObject *first, PyObject *second)
    pr = PySequence_ITEM(second, 0);
    pg = PySequence_ITEM(second, 1);
    pb = PySequence_ITEM(second, 2);
-    r2 = PyInt_AsSsize_t(pr);
-    g2 = PyInt_AsSsize_t(pg);
-    b2 = PyInt_AsSsize_t(pb);
+    r2 = PyLong_AsLong(pr);
+    g2 = PyLong_AsLong(pg);
+    b2 = PyLong_AsLong(pb);
    Py_DECREF(pr);
    Py_DECREF(pg);
    Py_DECREF(pb);
@ -115,8 +115,8 @@ static PyObject* block_getblocks2(PyObject *self, PyObject *args)
    pimage_size = PyObject_GetAttrString(image, "size");
    pwidth = PySequence_ITEM(pimage_size, 0);
    pheight = PySequence_ITEM(pimage_size, 1);
-    width = PyInt_AsSsize_t(pwidth);
-    height = PyInt_AsSsize_t(pheight);
+    width = PyLong_AsLong(pwidth);
+    height = PyLong_AsLong(pheight);
    Py_DECREF(pimage_size);
    Py_DECREF(pwidth);
    Py_DECREF(pheight);
@ -147,8 +147,8 @@ static PyObject* block_getblocks2(PyObject *self, PyObject *args)
            left = min(iw*block_width, width-block_width);
            right = left + block_width;
            pbox = inttuple(4, left, top, right, bottom);
-            pmethodname = PyString_FromString("crop");
-            pcrop = PyObject_CallMethodObjArgs(image, pmethodname, pbox);
+            pmethodname = PyUnicode_FromString("crop");
+            pcrop = PyObject_CallMethodObjArgs(image, pmethodname, pbox, NULL);
            Py_DECREF(pmethodname);
            Py_DECREF(pbox);
            if (pcrop == NULL) {
@ -207,7 +207,7 @@ static PyObject* block_avgdiff(PyObject *self, PyObject *args)
        Py_DECREF(item1);
        Py_DECREF(item2);
        if ((sum > limit*iteration_count) && (iteration_count >= min_iterations)) {
-            return PyInt_FromSsize_t(limit + 1);
+            return PyLong_FromLong(limit + 1);
        }
    }
    
@ -215,7 +215,7 @@ static PyObject* block_avgdiff(PyObject *self, PyObject *args)
    if (!result && sum) {
        result = 1;
    }
-    return PyInt_FromSsize_t(result);
+    return PyLong_FromLong(result);
 }

 static PyMethodDef BlockMethods[] = {
@ -224,16 +224,30 @@ static PyMethodDef BlockMethods[] = {
    {NULL, NULL, 0, NULL} /* Sentinel */
 };

-PyMODINIT_FUNC
-init_block(void)
+static struct PyModuleDef BlockDef = {
+    PyModuleDef_HEAD_INIT,
+    "_block",
+    NULL,
+    -1,
+    BlockMethods,
+    NULL,
+    NULL,
+    NULL,
+    NULL
+};
+
+PyObject *
+PyInit__block(void)
 {
-    PyObject *m = Py_InitModule("_block", BlockMethods);
+    PyObject *m = PyModule_Create(&BlockDef);
    if (m == NULL) {
-        return;
+        return NULL;
    }
    
    NoBlocksError = PyErr_NewException("_block.NoBlocksError", NULL, NULL);
    PyModule_AddObject(m, "NoBlocksError", NoBlocksError);
    DifferentBlockCountError = PyErr_NewException("_block.DifferentBlockCountError", NULL, NULL);
    PyModule_AddObject(m, "DifferentBlockCountError", DifferentBlockCountError);
+
+    return m;
 }
--- a/core_pe/modules/block_osx.m
+++ b/core_pe/modules/block_osx.m
@ -29,8 +29,8 @@ pystring2cfstring(PyObject *pystring)
        Py_INCREF(encoded);
    }
    
-    s = (UInt8*)PyString_AS_STRING(encoded);
-    size = PyString_GET_SIZE(encoded);
+    s = (UInt8*)PyBytes_AS_STRING(encoded);
+    size = PyUnicode_GET_SIZE(encoded);
    result = CFStringCreateWithBytes(NULL, s, size, kCFStringEncodingUTF8, FALSE);
    Py_DECREF(encoded);
    return result;
@ -43,7 +43,7 @@ static PyObject* block_osx_get_image_size(PyObject *self, PyObject *args)
    CFURLRef image_url;
    CGImageSourceRef source;
    CGImageRef image;
-    size_t width, height;
+    long width, height;
    PyObject *pwidth, *pheight;
    PyObject *result;
    
@ -72,11 +72,11 @@ static PyObject* block_osx_get_image_size(PyObject *self, PyObject *args)
        CFRelease(source);
    }
    
-    pwidth = PyInt_FromSsize_t(width);
+    pwidth = PyLong_FromLong(width);
    if (pwidth == NULL) {
        return NULL;
    }
-    pheight = PyInt_FromSsize_t(height);
+    pheight = PyLong_FromLong(height);
    if (pheight == NULL) {
        return NULL;
    }
@ -228,8 +228,24 @@ static PyMethodDef BlockOsxMethods[] = {
    {NULL, NULL, 0, NULL} /* Sentinel */
 };

-PyMODINIT_FUNC
-init_block_osx(void)
+static struct PyModuleDef BlockOsxDef = {
+    PyModuleDef_HEAD_INIT,
+    "_block_osx",
+    NULL,
+    -1,
+    BlockOsxMethods,
+    NULL,
+    NULL,
+    NULL,
+    NULL
+};
+
+PyObject *
+PyInit__block_osx(void)
 {
-    Py_InitModule("_block_osx", BlockOsxMethods);
+    PyObject *m = PyModule_Create(&BlockOsxDef);
+    if (m == NULL) {
+        return NULL;
+    }
+    return m;
 }
--- a/core_pe/modules/cache.c
+++ b/core_pe/modules/cache.c
@ -72,8 +72,24 @@ static PyMethodDef CacheMethods[] = {
    {NULL, NULL, 0, NULL}        /* Sentinel */
 };

-PyMODINIT_FUNC
-init_cache(void)
+static struct PyModuleDef CacheDef = {
+    PyModuleDef_HEAD_INIT,
+    "_cache",
+    NULL,
+    -1,
+    CacheMethods,
+    NULL,
+    NULL,
+    NULL,
+    NULL
+};
+
+PyObject *
+PyInit__cache(void)
 {
-    (void)Py_InitModule("_cache", CacheMethods);
-}
+    PyObject *m = PyModule_Create(&CacheDef);
+    if (m == NULL) {
+        return NULL;
+    }
+    return m;
+}
--- a/core_pe/modules/common.c
+++ b/core_pe/modules/common.c
@ -32,7 +32,7 @@ PyObject* inttuple(int n, ...)
    result = PyTuple_New(n);
    
    for (i=0; i<n; i++) {
-        pnumber = PyInt_FromLong(va_arg(numbers, int));
+        pnumber = PyLong_FromLong(va_arg(numbers, long));
        if (pnumber == NULL) {
            Py_DECREF(result);
            return NULL;
--- a/core_pe/tests/block_test.py
+++ b/core_pe/tests/block_test.py
@ -258,8 +258,8 @@ class TCavgdiff(unittest.TestCase):
    def test_return_at_least_1_at_the_slightest_difference(self):
        ref = (0,0,0)
        b1 = (1,0,0)
-        blocks1 = [ref for i in xrange(250)]
-        blocks2 = [ref for i in xrange(250)]
+        blocks1 = [ref for i in range(250)]
+        blocks2 = [ref for i in range(250)]
        blocks2[0] = b1
        self.assertEqual(1,my_avgdiff(blocks1,blocks2))
    
--- a/core_pe/tests/cache_test.py
+++ b/core_pe/tests/cache_test.py
@ -6,10 +6,8 @@
 # which should be included with this package. The terms are also available at 
 # http://www.hardcoded.net/licenses/hs_license

-from StringIO import StringIO
 import os.path as op
 import os
-import threading

 from hsutil.testcase import TestCase

--- a/core_se/app_cocoa.py
+++ b/core_se/app_cocoa.py
@ -6,7 +6,7 @@
 # which should be included with this package. The terms are also available at 
 # http://www.hardcoded.net/licenses/hs_license

-from __future__ import unicode_literals
+

 import logging

@ -24,17 +24,17 @@ def is_bundle(str_path):
    sw = NSWorkspace.sharedWorkspace()
    uti, error = sw.typeOfFile_error_(str_path, None)
    if error is not None:
-        logging.warning(u'There was an error trying to detect the UTI of %s', str_path)
+        logging.warning('There was an error trying to detect the UTI of %s', str_path)
    return sw.type_conformsToType_(uti, 'com.apple.bundle') or sw.type_conformsToType_(uti, 'com.apple.package')

 class Bundle(BundleBase):
    @classmethod
    def can_handle(cls, path):
-        return not io.islink(path) and io.isdir(path) and is_bundle(unicode(path))
+        return not io.islink(path) and io.isdir(path) and is_bundle(str(path))
    

 class Directories(DirectoriesBase):
-    ROOT_PATH_TO_EXCLUDE = map(Path, ['/Library', '/Volumes', '/System', '/bin', '/sbin', '/opt', '/private', '/dev'])
+    ROOT_PATH_TO_EXCLUDE = list(map(Path, ['/Library', '/Volumes', '/System', '/bin', '/sbin', '/opt', '/private', '/dev']))
    HOME_PATH_TO_EXCLUDE = [Path('Library')]
    def __init__(self):
        DirectoriesBase.__init__(self, fileclasses=[Bundle, fs.File])
--- a/core_se/fs.py
+++ b/core_se/fs.py
@ -35,7 +35,7 @@ class Bundle(fs.File):
                files = fs.get_all_files(self.path)
                files.sort(key=lambda f:f.path)
                md5s = [getattr(f, field) for f in files]
-                return ''.join(md5s)
+                return b''.join(md5s)
            
            md5 = hashlib.md5(get_dir_md5_concat())
            digest = md5.digest()
--- a/package.py
+++ b/package.py
@ -30,7 +30,7 @@ def package_windows(edition, dev):
    # On Windows, PyInstaller is used to build an exe (py2exe creates a very bad looking icon)
    # The release version is outdated. Use at least r672 on http://svn.pyinstaller.org/trunk
    if sys.platform != "win32":
-        print "Qt packaging only works under Windows."
+        print("Qt packaging only works under Windows.")
        return
    add_to_pythonpath('.')
    add_to_pythonpath('qt')
@ -60,7 +60,7 @@ def package_windows(edition, dev):
    help_basedir = '..\\..\\help_{0}'.format(edition)
    help_dir = 'dupeguru_{0}_help'.format(edition) if edition != 'se' else 'dupeguru_help'
    help_path = op.join(help_basedir, help_dir)
-    print "Copying {0} to dist\\help".format(help_path)
+    print("Copying {0} to dist\\help".format(help_path))
    shutil.copytree(help_path, 'dist\\help')

    # AdvancedInstaller.com has to be in your PATH
@ -106,7 +106,7 @@ def main():
    edition = conf['edition']
    ui = conf['ui']
    dev = conf['dev']
-    print "Packaging dupeGuru {0} with UI {1}".format(edition.upper(), ui)
+    print("Packaging dupeGuru {0} with UI {1}".format(edition.upper(), ui))
    if ui == 'cocoa':
        package_cocoa(edition)
    elif ui == 'qt':
@ -115,7 +115,7 @@ def main():
        elif sys.platform == "linux2":
            package_debian(edition)
        else:
-            print "Qt packaging only works under Windows or Linux."
+            print("Qt packaging only works under Windows or Linux.")

 if __name__ == '__main__':
    main()
--- a/qt/base/app.py
+++ b/qt/base/app.py
@ -6,7 +6,7 @@
 # which should be included with this package. The terms are also available at 
 # http://www.hardcoded.net/licenses/hs_license

-from __future__ import unicode_literals
+

 import logging
 import os
@ -54,7 +54,7 @@ class DupeGuru(DupeGuruBase, QObject):
    DELTA_COLUMNS = frozenset()
    
    def __init__(self, data_module, appid):
-        appdata = unicode(QDesktopServices.storageLocation(QDesktopServices.DataLocation))
+        appdata = str(QDesktopServices.storageLocation(QDesktopServices.DataLocation))
        if not op.exists(appdata):
            os.makedirs(appdata)
        # For basicConfig() to work, we have to be sure that no logging has taken place before this call.
@ -120,7 +120,7 @@ class DupeGuru(DupeGuruBase, QObject):
    #--- Override
    @staticmethod
    def _open_path(path):
-        url = QUrl.fromLocalFile(unicode(path))
+        url = QUrl.fromLocalFile(str(path))
        QDesktopServices.openUrl(url)
    
    @staticmethod
@ -150,7 +150,7 @@ class DupeGuru(DupeGuruBase, QObject):
        opname = 'copy' if copy else 'move'
        title = "Select a directory to {0} marked files to".format(opname)
        flags = QFileDialog.ShowDirsOnly
-        destination = unicode(QFileDialog.getExistingDirectory(self.main_window, title, '', flags))
+        destination = str(QFileDialog.getExistingDirectory(self.main_window, title, '', flags))
        if not destination:
            return
        recreate_path = self.prefs.destination_type
--- a/qt/base/directories_dialog.py
+++ b/qt/base/directories_dialog.py
@ -52,9 +52,9 @@ class DirectoriesDialog(QDialog, Ui_DirectoriesDialog):
        # label = 'Remove' if node.parent is None else 'Exclude'
    
    def addButtonClicked(self):
-        title = u"Select a directory to add to the scanning list"
+        title = "Select a directory to add to the scanning list"
        flags = QFileDialog.ShowDirsOnly
-        dirpath = unicode(QFileDialog.getExistingDirectory(self, title, self.lastAddedFolder, flags))
+        dirpath = str(QFileDialog.getExistingDirectory(self, title, self.lastAddedFolder, flags))
        if not dirpath:
            return
        self.lastAddedFolder = dirpath
--- a/qt/base/directories_model.py
+++ b/qt/base/directories_model.py
@ -6,7 +6,7 @@
 # which should be included with this package. The terms are also available at 
 # http://www.hardcoded.net/licenses/hs_license

-import urllib
+import urllib.parse

 from PyQt4.QtCore import QModelIndex, Qt, QRect, QEvent, QPoint, QUrl
 from PyQt4.QtGui import (QComboBox, QStyledItemDelegate, QMouseEvent, QApplication, QBrush, QStyle,
@ -101,9 +101,9 @@ class DirectoriesModel(TreeModel):
        if not mimeData.hasFormat('text/uri-list'):
            return False
        data = str(mimeData.data('text/uri-list'))
-        unquoted = urllib.unquote(data)
-        urls = unicode(unquoted, 'utf-8').split('\r\n')
-        paths = [unicode(QUrl(url).toLocalFile()) for url in urls if url]
+        unquoted = urllib.parse.unquote(data)
+        urls = str(unquoted, 'utf-8').split('\r\n')
+        paths = [str(QUrl(url).toLocalFile()) for url in urls if url]
        for path in paths:
            self.model.add_directory(path)
        self.reset()
--- a/qt/base/main_window.py
+++ b/qt/base/main_window.py
@ -16,10 +16,10 @@ from hsutil.misc import nonone

 from core.app import NoScannableFileError, AllFilesAreRefError

-import dg_rc
-from main_window_ui import Ui_MainWindow
-from results_model import ResultsModel
-from stats_label import StatsLabel
+from . import dg_rc
+from .main_window_ui import Ui_MainWindow
+from .results_model import ResultsModel
+from .stats_label import StatsLabel

 class MainWindow(QMainWindow, Ui_MainWindow):
    def __init__(self, app):
@ -104,7 +104,7 @@ class MainWindow(QMainWindow, Ui_MainWindow):
        h = self.resultsView.header()
        h.setResizeMode(QHeaderView.Interactive)
        prefs = self.app.prefs
-        attrs = zip(prefs.columns_width, prefs.columns_visible)
+        attrs = list(zip(prefs.columns_width, prefs.columns_visible))
        for index, (width, visible) in enumerate(attrs):
            h.resizeSection(index, width)
            h.setSectionHidden(index, not visible)
@ -145,7 +145,7 @@ class MainWindow(QMainWindow, Ui_MainWindow):
        answer, ok = QInputDialog.getText(self, title, msg, QLineEdit.Normal, text)
        if not ok:
            return
-        answer = unicode(answer)
+        answer = str(answer)
        self.app.apply_filter(answer)
        self._last_filter = answer
    
--- a/qt/base/platform.py
+++ b/qt/base/platform.py
@ -11,10 +11,10 @@ import logging
 import sys

 if sys.platform == 'win32':
-    from platform_win import *
+    from .platform_win import *
 elif sys.platform == 'darwin':
-    from platform_osx import *
+    from .platform_osx import *
 elif sys.platform == 'linux2':
-    from platform_lnx import *
+    from .platform_lnx import *
 else:
    pass # unsupported platform
--- a/qt/base/platform_lnx.py
+++ b/qt/base/platform_lnx.py
@ -7,5 +7,5 @@
 # which should be included with this package. The terms are also available at 
 # http://www.hardcoded.net/licenses/hs_license

-INITIAL_FOLDER_IN_DIALOGS = u'/'
+INITIAL_FOLDER_IN_DIALOGS = '/'
 HELP_PATH = '/usr/local/share/dupeguru_{0}/help'
--- a/qt/base/platform_osx.py
+++ b/qt/base/platform_osx.py
@ -9,5 +9,5 @@

 # dummy unit to allow the app to run under OSX during development

-INITIAL_FOLDER_IN_DIALOGS = u'/'
+INITIAL_FOLDER_IN_DIALOGS = '/'
 HELP_PATH = ''
--- a/qt/base/platform_win.py
+++ b/qt/base/platform_win.py
@ -7,5 +7,5 @@
 # which should be included with this package. The terms are also available at 
 # http://www.hardcoded.net/licenses/hs_license

-INITIAL_FOLDER_IN_DIALOGS = u'C:\\'
+INITIAL_FOLDER_IN_DIALOGS = 'C:\\'
 HELP_PATH = 'help'
--- a/qt/base/results_model.py
+++ b/qt/base/results_model.py
@ -112,7 +112,7 @@ class ResultsModel(TreeModel):
                return True
        if role == Qt.EditRole:
            if index.column() == 0:
-                value = unicode(value.toString())
+                value = str(value.toString())
                return self.model.rename_selected(value)
        return False
    
--- a/qt/me/preferences_dialog.py
+++ b/qt/me/preferences_dialog.py
@ -76,7 +76,7 @@ class PreferencesDialog(QDialog, Ui_PreferencesDialog):
        prefs.use_regexp = ischecked(self.useRegexpBox)
        prefs.remove_empty_folders = ischecked(self.removeEmptyFoldersBox)
        prefs.destination_type = self.copyMoveDestinationComboBox.currentIndex()
-        prefs.custom_command = unicode(self.customCommandEdit.text())
+        prefs.custom_command = str(self.customCommandEdit.text())
    
    def resetToDefaults(self):
        self.load(preferences.Preferences())
--- a/qt/pe/app.py
+++ b/qt/pe/app.py
@ -41,14 +41,14 @@ class File(fs.File):
        fs.File._read_info(self, field)
        if field == 'dimensions':
            try:
-                im = PIL.Image.open(unicode(self.path))
+                im = PIL.Image.open(str(self.path))
                self.dimensions = im.size
            except IOError:
                self.dimensions = (0, 0)
-                logging.warning(u"Could not read image '%s'", unicode(self.path))
+                logging.warning("Could not read image '%s'", str(self.path))
    
    def get_blocks(self, block_count_per_side):
-        image = QImage(unicode(self.path))
+        image = QImage(str(self.path))
        image = image.convertToFormat(QImage.Format_RGB888)
        return getblocks(image, block_count_per_side)
    
--- a/qt/pe/details_dialog.py
+++ b/qt/pe/details_dialog.py
@ -28,11 +28,11 @@ class DetailsDialog(DetailsDialogBase, Ui_DetailsDialog):
        group = self.app.results.get_group_of_duplicate(dupe)
        ref = group.ref
        
-        self.selectedPixmap = QPixmap(unicode(dupe.path))
+        self.selectedPixmap = QPixmap(str(dupe.path))
        if ref is dupe:
            self.referencePixmap = None
        else:
-            self.referencePixmap = QPixmap(unicode(ref.path))
+            self.referencePixmap = QPixmap(str(ref.path))
        self._updateImages()
    
    def _updateImages(self):
--- a/qt/pe/gen.py
+++ b/qt/pe/gen.py
@ -15,7 +15,7 @@ def move(src, dst):
        return
    if op.exists(dst):
        os.remove(dst)
-    print 'Moving %s --> %s' % (src, dst)
+    print('Moving %s --> %s' % (src, dst))
    os.rename(src, dst)

 os.chdir('modules')
--- a/qt/pe/preferences_dialog.py
+++ b/qt/pe/preferences_dialog.py
@ -46,7 +46,7 @@ class PreferencesDialog(QDialog, Ui_PreferencesDialog):
        prefs.use_regexp = ischecked(self.useRegexpBox)
        prefs.remove_empty_folders = ischecked(self.removeEmptyFoldersBox)
        prefs.destination_type = self.copyMoveDestinationComboBox.currentIndex()
-        prefs.custom_command = unicode(self.customCommandEdit.text())
+        prefs.custom_command = str(self.customCommandEdit.text())
    
    def resetToDefaults(self):
        self.load(preferences.Preferences())
--- a/qt/se/preferences_dialog.py
+++ b/qt/se/preferences_dialog.py
@ -48,7 +48,7 @@ class PreferencesDialog(QDialog, Ui_PreferencesDialog):
        setchecked(self.useRegexpBox, prefs.use_regexp)
        setchecked(self.removeEmptyFoldersBox, prefs.remove_empty_folders)
        setchecked(self.ignoreSmallFilesBox, prefs.ignore_small_files)
-        self.sizeThresholdEdit.setText(unicode(prefs.small_file_threshold))
+        self.sizeThresholdEdit.setText(str(prefs.small_file_threshold))
        self.copyMoveDestinationComboBox.setCurrentIndex(prefs.destination_type)
        self.customCommandEdit.setText(prefs.custom_command)
    
@ -65,7 +65,7 @@ class PreferencesDialog(QDialog, Ui_PreferencesDialog):
        prefs.ignore_small_files = ischecked(self.ignoreSmallFilesBox)
        prefs.small_file_threshold = tryint(self.sizeThresholdEdit.text())
        prefs.destination_type = self.copyMoveDestinationComboBox.currentIndex()
-        prefs.custom_command = unicode(self.customCommandEdit.text())
+        prefs.custom_command = str(self.customCommandEdit.text())
    
    def resetToDefaults(self):
        self.load(preferences.Preferences())
--- a/qt/se/start.py
+++ b/qt/se/start.py
@ -6,6 +6,8 @@
 # http://www.hardcoded.net/licenses/hs_license

 import sys
+import sip
+sip.setapi('QVariant', 1)

 from PyQt4.QtCore import QCoreApplication
 from PyQt4.QtGui import QApplication, QIcon, QPixmap
--- a/run.py
+++ b/run.py
@ -20,7 +20,7 @@ def main():
    edition = conf['edition']
    ui = conf['ui']
    dev = conf['dev']
-    print "Running dupeGuru {0} with UI {1}".format(edition.upper(), ui)
+    print("Running dupeGuru {0} with UI {1}".format(edition.upper(), ui))
    if ui == 'cocoa':
        subfolder = 'dev' if dev else 'release'
        app_path = {
@ -32,8 +32,9 @@ def main():
    elif ui == 'qt':
        add_to_pythonpath('.')
        add_to_pythonpath('qt')
+        add_to_pythonpath(op.join('qt', 'base'))
        os.chdir(op.join('qt', edition))
-        os.system('python start.py')
+        os.system('python3 start.py')
        os.chdir('..')

 if __name__ == '__main__':