1
0
mirror of https://github.com/arsenetar/dupeguru.git synced 2026-01-22 14:41:39 +00:00

Added the EXIF Timestamp scan type in dgpe.

--HG--
rename : core_pe/matchbase.py => core_pe/matchblock.py
This commit is contained in:
Virgil Dupras
2011-04-21 17:17:19 +02:00
parent a0e2b11663
commit 275c6be108
18 changed files with 690 additions and 121 deletions

View File

@@ -8,8 +8,6 @@
# Heavily based on http://topo.math.u-psud.fr/~bousch/exifdump.py by Thierry Bousch (Public Domain)
import os
import sys
import logging
EXIF_TAGS = {
@@ -260,7 +258,6 @@ def read_exif_header(fp):
try:
index = large_data.index(b'Exif')
data = large_data[index-6:index+6]
print('hello!', data)
# large_data omits the first 12 bytes, and the index is at the middle of the header, so we
# must seek index + 18
fp.seek(index+18)
@@ -324,25 +321,3 @@ def get_fields(fp):
for tag, type, values in IFD:
add_tag_to_result(tag, values)
return result
def main():
# logging.getLogger().setLevel(logging.DEBUG)
if len(sys.argv) < 2:
filenames = os.listdir('.')
else:
filenames = sys.argv[1:]
for filename in filenames:
print(filename+':')
try:
file = open(filename, 'rb')
fields = get_fields(file)
if 'DateTime' in fields:
print(fields['DateTime'])
else:
print(repr(fields))
except (IOError, ValueError):
print(' Cannot open file')
sys.exit(0)
if __name__ == '__main__':
main()

34
core_pe/matchexif.py Normal file
View File

@@ -0,0 +1,34 @@
# Created By: Virgil Dupras
# Created On: 2011-04-20
# Copyright 2011 Hardcoded Software (http://www.hardcoded.net)
#
# This software is licensed under the "BSD" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
# http://www.hardcoded.net/licenses/bsd_license
import logging
from collections import defaultdict
from itertools import combinations
from hscommon import io
from hscommon.trans import tr
from core.engine import Match
from . import exif
def getmatches(files, j):
timestamp2pic = defaultdict(set)
for picture in j.iter_with_progress(files, tr("Read EXIF of %d/%d pictures")):
try:
with io.open(picture.path, 'rb') as fp:
exifdata = exif.get_fields(fp)
timestamp = exifdata['DateTimeOriginal']
timestamp2pic[timestamp].add(picture)
except Exception:
logging.warning("Couldn't read EXIF of picture: %s", picture.path)
if '0000:00:00 00:00:00' in timestamp2pic: # very likely false matches
del timestamp2pic['0000:00:00 00:00:00']
matches = []
for pictures in timestamp2pic.values():
matches += [Match(p1, p2, 100) for p1, p2 in combinations(pictures, 2)]
return matches

View File

@@ -1,4 +1,3 @@
# -*- coding: utf-8 -*-
# Created By: Virgil Dupras
# Created On: 2009-10-18
# Copyright 2011 Hardcoded Software (http://www.hardcoded.net)
@@ -7,9 +6,9 @@
# which should be included with this package. The terms are also available at
# http://www.hardcoded.net/licenses/bsd_license
from core.scanner import Scanner
from core.scanner import Scanner, ScanType
from . import matchbase
from . import matchblock, matchexif
from .cache import Cache
class ScannerPE(Scanner):
@@ -18,7 +17,12 @@ class ScannerPE(Scanner):
threshold = 75
def _getmatches(self, files, j):
return matchbase.getmatches(files, self.cache_path, self.threshold, self.match_scaled, j)
if self.scan_type == ScanType.FuzzyBlock:
return matchblock.getmatches(files, self.cache_path, self.threshold, self.match_scaled, j)
elif self.scan_type == ScanType.ExifTimestamp:
return matchexif.getmatches(files, j)
else:
raise Exception("Invalid scan type")
def clear_picture_cache(self):
cache = Cache(self.cache_path)