2009-08-05 08:59:46 +00:00
|
|
|
# Created By: Virgil Dupras
|
|
|
|
# Created On: 2006/11/11
|
2010-01-01 20:11:34 +00:00
|
|
|
# Copyright 2010 Hardcoded Software (http://www.hardcoded.net)
|
2009-08-05 08:59:46 +00:00
|
|
|
#
|
2010-09-30 10:17:41 +00:00
|
|
|
# This software is licensed under the "BSD" License as described in the "LICENSE" file,
|
2009-08-05 08:59:46 +00:00
|
|
|
# which should be included with this package. The terms are also available at
|
2010-09-30 10:17:41 +00:00
|
|
|
# http://www.hardcoded.net/licenses/bsd_license
|
2009-08-05 08:59:46 +00:00
|
|
|
|
2009-06-01 09:55:11 +00:00
|
|
|
import os
|
|
|
|
import os.path as op
|
|
|
|
import logging
|
2010-04-12 15:43:24 +00:00
|
|
|
import subprocess
|
2010-04-13 08:02:09 +00:00
|
|
|
import re
|
2009-06-01 09:55:11 +00:00
|
|
|
|
2010-04-07 07:11:36 +00:00
|
|
|
from send2trash import send2trash
|
2010-09-29 14:49:50 +00:00
|
|
|
from hscommon.reg import RegistrableApplication
|
2010-07-13 06:08:18 +00:00
|
|
|
from hscommon.notify import Broadcaster
|
2009-10-23 12:56:52 +00:00
|
|
|
from hsutil import io, files
|
2009-06-01 09:55:11 +00:00
|
|
|
from hsutil.path import Path
|
|
|
|
from hsutil.misc import flatten, first
|
|
|
|
from hsutil.str import escape
|
|
|
|
|
2009-10-23 12:56:52 +00:00
|
|
|
from . import directories, results, scanner, export, fs
|
2009-06-01 09:55:11 +00:00
|
|
|
|
|
|
|
JOB_SCAN = 'job_scan'
|
|
|
|
JOB_LOAD = 'job_load'
|
|
|
|
JOB_MOVE = 'job_move'
|
|
|
|
JOB_COPY = 'job_copy'
|
|
|
|
JOB_DELETE = 'job_delete'
|
|
|
|
|
|
|
|
class NoScannableFileError(Exception):
|
|
|
|
pass
|
|
|
|
|
2010-02-05 20:09:04 +00:00
|
|
|
class DupeGuru(RegistrableApplication, Broadcaster):
|
2010-09-29 14:49:50 +00:00
|
|
|
def __init__(self, data_module, appdata):
|
|
|
|
RegistrableApplication.__init__(self, appid=1)
|
2010-02-05 20:09:04 +00:00
|
|
|
Broadcaster.__init__(self)
|
2009-06-01 09:55:11 +00:00
|
|
|
self.appdata = appdata
|
|
|
|
if not op.exists(self.appdata):
|
|
|
|
os.makedirs(self.appdata)
|
|
|
|
self.data = data_module
|
|
|
|
self.directories = directories.Directories()
|
|
|
|
self.results = results.Results(data_module)
|
|
|
|
self.scanner = scanner.Scanner()
|
|
|
|
self.options = {
|
|
|
|
'escape_filter_regexp': True,
|
|
|
|
'clean_empty_dirs': False,
|
2010-09-25 10:28:34 +00:00
|
|
|
'ignore_hardlink_matches': False,
|
2009-06-01 09:55:11 +00:00
|
|
|
}
|
2010-02-05 20:09:04 +00:00
|
|
|
self.selected_dupes = []
|
2009-06-01 09:55:11 +00:00
|
|
|
|
2010-09-25 13:37:18 +00:00
|
|
|
def _do_delete(self, j, replace_with_hardlinks):
|
2009-06-01 09:55:11 +00:00
|
|
|
def op(dupe):
|
|
|
|
j.add_progress()
|
2010-09-25 13:37:18 +00:00
|
|
|
return self._do_delete_dupe(dupe, replace_with_hardlinks)
|
2009-06-01 09:55:11 +00:00
|
|
|
|
|
|
|
j.start_job(self.results.mark_count)
|
2010-04-12 10:21:01 +00:00
|
|
|
self.results.perform_on_marked(op, True)
|
2009-06-01 09:55:11 +00:00
|
|
|
|
2010-09-25 13:37:18 +00:00
|
|
|
def _do_delete_dupe(self, dupe, replace_with_hardlinks):
|
2009-06-01 09:55:11 +00:00
|
|
|
if not io.exists(dupe.path):
|
2010-04-12 10:21:01 +00:00
|
|
|
return
|
2010-08-11 14:39:06 +00:00
|
|
|
send2trash(str(dupe.path)) # Raises OSError when there's a problem
|
2010-09-25 13:37:18 +00:00
|
|
|
if replace_with_hardlinks:
|
|
|
|
group = self.results.get_group_of_duplicate(dupe)
|
|
|
|
ref = group.ref
|
|
|
|
os.link(str(ref.path), str(dupe.path))
|
2009-06-01 09:55:11 +00:00
|
|
|
self.clean_empty_dirs(dupe.path[:-1])
|
|
|
|
|
|
|
|
def _do_load(self, j):
|
2009-06-18 18:37:53 +00:00
|
|
|
self.directories.load_from_file(op.join(self.appdata, 'last_directories.xml'))
|
2010-02-07 14:26:50 +00:00
|
|
|
self.notify('directories_changed')
|
2009-06-01 09:55:11 +00:00
|
|
|
self.results.load_from_xml(op.join(self.appdata, 'last_results.xml'), self._get_file, j)
|
|
|
|
|
2009-09-02 10:21:11 +00:00
|
|
|
def _get_display_info(self, dupe, group, delta=False):
|
|
|
|
if (dupe is None) or (group is None):
|
|
|
|
return ['---'] * len(self.data.COLUMNS)
|
|
|
|
try:
|
|
|
|
return self.data.GetDisplayInfo(dupe, group, delta)
|
|
|
|
except Exception as e:
|
2010-08-11 14:39:06 +00:00
|
|
|
logging.warning("Exception on GetDisplayInfo for %s: %s", str(dupe.path), str(e))
|
2009-09-02 10:21:11 +00:00
|
|
|
return ['---'] * len(self.data.COLUMNS)
|
|
|
|
|
2009-06-01 09:55:11 +00:00
|
|
|
def _get_file(self, str_path):
|
2009-10-23 12:56:52 +00:00
|
|
|
path = Path(str_path)
|
2010-08-12 13:57:47 +00:00
|
|
|
f = fs.get_file(path, self.directories.fileclasses)
|
2010-08-15 12:23:16 +00:00
|
|
|
if f is None:
|
|
|
|
return None
|
2010-08-12 13:57:47 +00:00
|
|
|
try:
|
|
|
|
f._read_all_info(attrnames=self.data.METADATA_TO_READ)
|
|
|
|
return f
|
|
|
|
except EnvironmentError:
|
|
|
|
return None
|
2009-06-01 09:55:11 +00:00
|
|
|
|
2010-02-11 16:52:18 +00:00
|
|
|
def _job_completed(self, jobid):
|
|
|
|
# Must be called by subclasses when they detect that an async job is completed.
|
2010-04-12 10:21:01 +00:00
|
|
|
if jobid == JOB_SCAN:
|
|
|
|
self.notify('results_changed')
|
|
|
|
elif jobid in (JOB_LOAD, JOB_MOVE, JOB_DELETE):
|
2010-02-11 16:52:18 +00:00
|
|
|
self.notify('results_changed')
|
2010-04-12 10:21:01 +00:00
|
|
|
self.notify('problems_changed')
|
2010-02-11 16:52:18 +00:00
|
|
|
|
2010-02-06 11:36:43 +00:00
|
|
|
@staticmethod
|
|
|
|
def _open_path(path):
|
|
|
|
raise NotImplementedError()
|
|
|
|
|
2010-02-06 14:31:35 +00:00
|
|
|
@staticmethod
|
|
|
|
def _reveal_path(path):
|
|
|
|
raise NotImplementedError()
|
|
|
|
|
2010-09-25 10:28:34 +00:00
|
|
|
@staticmethod
|
|
|
|
def _remove_hardlink_dupes(files):
|
|
|
|
seen_inodes = set()
|
|
|
|
result = []
|
|
|
|
for file in files:
|
|
|
|
inode = io.stat(file.path).st_ino
|
|
|
|
if inode not in seen_inodes:
|
|
|
|
seen_inodes.add(inode)
|
|
|
|
result.append(file)
|
|
|
|
return result
|
|
|
|
|
2010-02-05 20:09:04 +00:00
|
|
|
def _select_dupes(self, dupes):
|
|
|
|
if dupes == self.selected_dupes:
|
|
|
|
return
|
|
|
|
self.selected_dupes = dupes
|
|
|
|
self.notify('dupes_selected')
|
|
|
|
|
2010-09-25 13:37:18 +00:00
|
|
|
def _start_job(self, jobid, func, *args):
|
|
|
|
# func(j, *args)
|
2009-06-01 09:55:11 +00:00
|
|
|
raise NotImplementedError()
|
|
|
|
|
2009-06-07 07:11:52 +00:00
|
|
|
def add_directory(self, d):
|
2009-06-01 09:55:11 +00:00
|
|
|
try:
|
|
|
|
self.directories.add_path(Path(d))
|
2010-02-07 14:26:50 +00:00
|
|
|
self.notify('directories_changed')
|
2009-06-01 09:55:11 +00:00
|
|
|
return 0
|
|
|
|
except directories.AlreadyThereError:
|
|
|
|
return 1
|
|
|
|
except directories.InvalidPathError:
|
|
|
|
return 2
|
|
|
|
|
2010-02-06 11:12:20 +00:00
|
|
|
def add_selected_to_ignore_list(self):
|
|
|
|
dupes = self.without_ref(self.selected_dupes)
|
|
|
|
for dupe in dupes:
|
2010-02-06 11:14:33 +00:00
|
|
|
g = self.results.get_group_of_duplicate(dupe)
|
|
|
|
for other in g:
|
|
|
|
if other is not dupe:
|
2010-08-11 14:39:06 +00:00
|
|
|
self.scanner.ignore_list.Ignore(str(other.path), str(dupe.path))
|
2010-02-06 11:12:20 +00:00
|
|
|
self.remove_duplicates(dupes)
|
|
|
|
|
2009-06-07 07:14:47 +00:00
|
|
|
def apply_filter(self, filter):
|
2009-06-01 09:55:11 +00:00
|
|
|
self.results.apply_filter(None)
|
|
|
|
if self.options['escape_filter_regexp']:
|
|
|
|
filter = escape(filter, '()[]\\.|+?^')
|
|
|
|
filter = escape(filter, '*', '.')
|
|
|
|
self.results.apply_filter(filter)
|
2010-02-11 17:47:45 +00:00
|
|
|
self.notify('results_changed')
|
2009-06-01 09:55:11 +00:00
|
|
|
|
|
|
|
def clean_empty_dirs(self, path):
|
|
|
|
if self.options['clean_empty_dirs']:
|
|
|
|
while files.delete_if_empty(path, ['.DS_Store']):
|
|
|
|
path = path[:-1]
|
|
|
|
|
2009-06-07 07:15:56 +00:00
|
|
|
def copy_or_move(self, dupe, copy, destination, dest_type):
|
2009-06-01 09:55:11 +00:00
|
|
|
"""
|
|
|
|
copy: True = Copy False = Move
|
|
|
|
destination: string.
|
|
|
|
dest_type: 0 = right in destination.
|
|
|
|
1 = relative re-creation.
|
|
|
|
2 = absolute re-creation.
|
|
|
|
"""
|
|
|
|
source_path = dupe.path
|
2009-10-23 12:56:52 +00:00
|
|
|
location_path = first(p for p in self.directories if dupe.path in p)
|
2009-06-01 09:55:11 +00:00
|
|
|
dest_path = Path(destination)
|
|
|
|
if dest_type == 2:
|
|
|
|
dest_path = dest_path + source_path[1:-1] #Remove drive letter and filename
|
|
|
|
elif dest_type == 1:
|
|
|
|
dest_path = dest_path + source_path[location_path:-1]
|
2010-04-12 10:21:01 +00:00
|
|
|
if not io.exists(dest_path):
|
|
|
|
io.makedirs(dest_path)
|
|
|
|
# Raises an EnvironmentError if there's a problem
|
|
|
|
if copy:
|
|
|
|
files.copy(source_path, dest_path)
|
|
|
|
else:
|
|
|
|
files.move(source_path, dest_path)
|
|
|
|
self.clean_empty_dirs(source_path[:-1])
|
2009-06-01 09:55:11 +00:00
|
|
|
|
|
|
|
def copy_or_move_marked(self, copy, destination, recreate_path):
|
|
|
|
def do(j):
|
|
|
|
def op(dupe):
|
|
|
|
j.add_progress()
|
2010-04-12 10:21:01 +00:00
|
|
|
self.copy_or_move(dupe, copy, destination, recreate_path)
|
2009-06-01 09:55:11 +00:00
|
|
|
|
|
|
|
j.start_job(self.results.mark_count)
|
2010-04-12 10:21:01 +00:00
|
|
|
self.results.perform_on_marked(op, not copy)
|
2009-06-01 09:55:11 +00:00
|
|
|
|
|
|
|
jobid = JOB_COPY if copy else JOB_MOVE
|
|
|
|
self._start_job(jobid, do)
|
|
|
|
|
2010-09-25 13:37:18 +00:00
|
|
|
def delete_marked(self, replace_with_hardlinks=False):
|
|
|
|
self._start_job(JOB_DELETE, self._do_delete, replace_with_hardlinks)
|
2009-06-01 09:55:11 +00:00
|
|
|
|
2009-09-01 14:05:00 +00:00
|
|
|
def export_to_xhtml(self, column_ids):
|
|
|
|
column_ids = [colid for colid in column_ids if colid.isdigit()]
|
2010-08-11 14:39:06 +00:00
|
|
|
column_ids = list(map(int, column_ids))
|
2009-09-01 14:05:00 +00:00
|
|
|
column_ids.sort()
|
|
|
|
colnames = [col['display'] for i, col in enumerate(self.data.COLUMNS) if i in column_ids]
|
|
|
|
rows = []
|
|
|
|
for group in self.results.groups:
|
|
|
|
for dupe in group:
|
2009-09-02 10:21:11 +00:00
|
|
|
data = self._get_display_info(dupe, group)
|
2009-09-01 14:05:00 +00:00
|
|
|
row = [data[colid] for colid in column_ids]
|
|
|
|
row.insert(0, dupe is not group.ref)
|
|
|
|
rows.append(row)
|
|
|
|
return export.export_to_xhtml(colnames, rows)
|
|
|
|
|
2010-04-12 15:43:24 +00:00
|
|
|
def invoke_command(self, cmd):
|
|
|
|
"""Calls command `cmd` with %d and %r placeholders replaced.
|
|
|
|
|
|
|
|
Using the current selection, %d is replaced with the currently selected dupe and %r is
|
|
|
|
replaced with that dupe's ref file. If there's no selection, the command is not invoked.
|
|
|
|
If the dupe is a ref, %d and %r will be the same.
|
|
|
|
"""
|
|
|
|
if not self.selected_dupes:
|
|
|
|
return
|
|
|
|
dupe = self.selected_dupes[0]
|
|
|
|
group = self.results.get_group_of_duplicate(dupe)
|
|
|
|
ref = group.ref
|
2010-08-11 14:39:06 +00:00
|
|
|
cmd = cmd.replace('%d', str(dupe.path))
|
|
|
|
cmd = cmd.replace('%r', str(ref.path))
|
2010-04-13 08:02:09 +00:00
|
|
|
match = re.match(r'"([^"]+)"(.*)', cmd)
|
|
|
|
if match is not None:
|
|
|
|
# This code here is because subprocess. Popen doesn't seem to accept, under Windows,
|
|
|
|
# executable paths with spaces in it, *even* when they're enclosed in "". So this is
|
|
|
|
# a workaround to make the damn thing work.
|
|
|
|
exepath, args = match.groups()
|
|
|
|
path, exename = op.split(exepath)
|
|
|
|
subprocess.Popen(exename + args, shell=True, cwd=path)
|
|
|
|
else:
|
|
|
|
subprocess.Popen(cmd, shell=True)
|
2010-04-12 15:43:24 +00:00
|
|
|
|
2009-06-01 09:55:11 +00:00
|
|
|
def load(self):
|
|
|
|
self._start_job(JOB_LOAD, self._do_load)
|
2009-06-07 07:16:58 +00:00
|
|
|
self.load_ignore_list()
|
2009-06-01 09:55:11 +00:00
|
|
|
|
2010-08-13 11:06:18 +00:00
|
|
|
def load_from(self, filename):
|
|
|
|
def do(j):
|
|
|
|
self.results.load_from_xml(filename, self._get_file, j)
|
|
|
|
self._start_job(JOB_LOAD, do)
|
|
|
|
|
2009-06-07 07:16:58 +00:00
|
|
|
def load_ignore_list(self):
|
2009-06-01 09:55:11 +00:00
|
|
|
p = op.join(self.appdata, 'ignore_list.xml')
|
|
|
|
self.scanner.ignore_list.load_from_xml(p)
|
|
|
|
|
2010-02-06 11:27:11 +00:00
|
|
|
def make_selected_reference(self):
|
|
|
|
dupes = self.without_ref(self.selected_dupes)
|
2009-06-01 09:55:11 +00:00
|
|
|
changed_groups = set()
|
2010-02-06 11:27:11 +00:00
|
|
|
for dupe in dupes:
|
2009-06-01 09:55:11 +00:00
|
|
|
g = self.results.get_group_of_duplicate(dupe)
|
|
|
|
if g not in changed_groups:
|
|
|
|
self.results.make_ref(dupe)
|
|
|
|
changed_groups.add(g)
|
2010-02-17 17:05:19 +00:00
|
|
|
self.notify('results_changed_but_keep_selection')
|
2009-06-01 09:55:11 +00:00
|
|
|
|
2010-02-11 17:47:45 +00:00
|
|
|
def mark_all(self):
|
|
|
|
self.results.mark_all()
|
2010-02-12 10:21:39 +00:00
|
|
|
self.notify('marking_changed')
|
2010-02-11 17:47:45 +00:00
|
|
|
|
|
|
|
def mark_none(self):
|
|
|
|
self.results.mark_none()
|
2010-02-12 10:21:39 +00:00
|
|
|
self.notify('marking_changed')
|
2010-02-11 17:47:45 +00:00
|
|
|
|
|
|
|
def mark_invert(self):
|
|
|
|
self.results.mark_invert()
|
2010-02-12 10:21:39 +00:00
|
|
|
self.notify('marking_changed')
|
2010-02-11 17:47:45 +00:00
|
|
|
|
2010-02-12 11:30:00 +00:00
|
|
|
def mark_dupe(self, dupe, marked):
|
|
|
|
if marked:
|
|
|
|
self.results.mark(dupe)
|
|
|
|
else:
|
|
|
|
self.results.unmark(dupe)
|
|
|
|
self.notify('marking_changed')
|
|
|
|
|
2010-02-06 11:36:43 +00:00
|
|
|
def open_selected(self):
|
|
|
|
if self.selected_dupes:
|
|
|
|
self._open_path(self.selected_dupes[0].path)
|
|
|
|
|
2010-02-12 11:43:50 +00:00
|
|
|
def purge_ignore_list(self):
|
|
|
|
self.scanner.ignore_list.Filter(lambda f,s:op.exists(f) and op.exists(s))
|
|
|
|
|
2010-02-07 14:26:50 +00:00
|
|
|
def remove_directory(self,index):
|
|
|
|
try:
|
|
|
|
del self.directories[index]
|
|
|
|
self.notify('directories_changed')
|
|
|
|
except IndexError:
|
|
|
|
pass
|
|
|
|
|
2010-02-06 11:12:20 +00:00
|
|
|
def remove_duplicates(self, duplicates):
|
2010-02-12 16:15:48 +00:00
|
|
|
self.results.remove_duplicates(self.without_ref(duplicates))
|
2010-02-17 17:05:19 +00:00
|
|
|
self.notify('results_changed_but_keep_selection')
|
2010-02-06 11:12:20 +00:00
|
|
|
|
2010-02-12 12:39:50 +00:00
|
|
|
def remove_marked(self):
|
2010-04-12 10:21:01 +00:00
|
|
|
self.results.perform_on_marked(lambda x:None, True)
|
2010-02-12 12:39:50 +00:00
|
|
|
self.notify('results_changed')
|
|
|
|
|
2010-02-06 11:44:21 +00:00
|
|
|
def remove_selected(self):
|
|
|
|
self.remove_duplicates(self.selected_dupes)
|
|
|
|
|
2010-02-12 11:43:50 +00:00
|
|
|
def rename_selected(self, newname):
|
|
|
|
try:
|
|
|
|
d = self.selected_dupes[0]
|
|
|
|
d.rename(newname)
|
|
|
|
return True
|
|
|
|
except (IndexError, fs.FSError) as e:
|
2010-08-11 14:39:06 +00:00
|
|
|
logging.warning("dupeGuru Warning: %s" % str(e))
|
2010-02-12 11:43:50 +00:00
|
|
|
return False
|
|
|
|
|
2010-02-06 14:31:35 +00:00
|
|
|
def reveal_selected(self):
|
|
|
|
if self.selected_dupes:
|
|
|
|
self._reveal_path(self.selected_dupes[0].path)
|
|
|
|
|
2009-06-07 07:18:59 +00:00
|
|
|
def save(self):
|
2010-02-05 16:24:20 +00:00
|
|
|
if not op.exists(self.appdata):
|
|
|
|
os.makedirs(self.appdata)
|
2010-02-05 15:51:00 +00:00
|
|
|
self.directories.save_to_file(op.join(self.appdata, 'last_directories.xml'))
|
2010-08-13 09:48:05 +00:00
|
|
|
if self.results.is_modified:
|
|
|
|
self.results.save_to_xml(op.join(self.appdata, 'last_results.xml'))
|
2009-06-01 09:55:11 +00:00
|
|
|
|
2010-08-13 11:06:18 +00:00
|
|
|
def save_as(self, filename):
|
|
|
|
self.results.save_to_xml(filename)
|
|
|
|
# It's not because we saved it here that we don't want to save it in appdata when we quit
|
|
|
|
self.results.is_modified = True
|
|
|
|
|
2009-06-07 07:17:56 +00:00
|
|
|
def save_ignore_list(self):
|
2010-02-05 16:24:20 +00:00
|
|
|
if not op.exists(self.appdata):
|
|
|
|
os.makedirs(self.appdata)
|
2009-06-01 09:55:11 +00:00
|
|
|
p = op.join(self.appdata, 'ignore_list.xml')
|
|
|
|
self.scanner.ignore_list.save_to_xml(p)
|
|
|
|
|
|
|
|
def start_scanning(self):
|
|
|
|
def do(j):
|
|
|
|
j.set_progress(0, 'Collecting files to scan')
|
|
|
|
files = list(self.directories.get_files())
|
2010-09-25 10:28:34 +00:00
|
|
|
if self.options['ignore_hardlink_matches']:
|
|
|
|
files = self._remove_hardlink_dupes(files)
|
2009-06-01 09:55:11 +00:00
|
|
|
logging.info('Scanning %d files' % len(files))
|
|
|
|
self.results.groups = self.scanner.GetDupeGroups(files, j)
|
|
|
|
|
2010-08-15 13:07:44 +00:00
|
|
|
if not self.directories.has_any_file():
|
2009-06-01 09:55:11 +00:00
|
|
|
raise NoScannableFileError()
|
|
|
|
self.results.groups = []
|
|
|
|
self._start_job(JOB_SCAN, do)
|
|
|
|
|
2010-02-11 16:52:18 +00:00
|
|
|
def toggle_selected_mark_state(self):
|
|
|
|
for dupe in self.selected_dupes:
|
|
|
|
self.results.mark_toggle(dupe)
|
2010-02-12 10:21:39 +00:00
|
|
|
self.notify('marking_changed')
|
2010-02-11 16:52:18 +00:00
|
|
|
|
2010-02-06 11:12:20 +00:00
|
|
|
def without_ref(self, dupes):
|
|
|
|
return [dupe for dupe in dupes if self.results.get_group_of_duplicate(dupe).ref is not dupe]
|
|
|
|
|
2009-06-01 09:55:11 +00:00
|
|
|
#--- Properties
|
|
|
|
@property
|
|
|
|
def stat_line(self):
|
|
|
|
result = self.results.stat_line
|
|
|
|
if self.scanner.discarded_file_count:
|
|
|
|
result = '%s (%d discarded)' % (result, self.scanner.discarded_file_count)
|
|
|
|
return result
|
|
|
|
|