Add debugging data to iPhoto plist parsing

Fixes #233.
This commit is contained in:
Virgil Dupras 2014-03-15 13:59:15 -04:00
parent a29e007475
commit 95c6a7d41f
2 changed files with 33 additions and 2 deletions

View File

@ -6,9 +6,9 @@
# which should be included with this package. The terms are also available at
# http://www.hardcoded.net/licenses/bsd_license
import plistlib
import logging
import re
import io
from appscript import app, its, k, CommandError, ApplicationNotFoundError
@ -23,6 +23,7 @@ from core.app import JobType
from core_pe import _block_osx
from core_pe.photo import Photo as PhotoBase
from core_pe.app import DupeGuru as DupeGuruBase
from core_pe.iphoto_plist import IPhotoPlistParser
from .app import PyDupeGuruBase
tr = trget('ui')
@ -89,7 +90,12 @@ def get_iphoto_or_aperture_pictures(plistpath: Path, photo_class):
s, count = re.subn(r'&(?![a-zA-Z0-9_-]+|#[0-9]+|#x[0-9a-fA-F]+;)', '', s)
if count:
logging.warning("%d invalid XML entities replacement made", count)
plist = plistlib.readPlistFromBytes(s.encode('utf-8'))
parser = IPhotoPlistParser()
try:
plist = parser.parse(io.BytesIO(s.encode('utf-8')))
except Exception:
logging.warning("iPhoto plist parsing choked on data: %r", parser.lastdata)
raise
result = []
for key, photo_data in plist['Master Image List'].items():
if photo_data['MediaType'] != 'Image':

25
core_pe/iphoto_plist.py Normal file
View File

@ -0,0 +1,25 @@
# Created By: Virgil Dupras
# Created On: 2014-03-15
# Copyright 2014 Hardcoded Software (http://www.hardcoded.net)
#
# This software is licensed under the "BSD" License as described in the "LICENSE" file,
# which should be included with this package. The terms are also available at
# http://www.hardcoded.net/licenses/bsd_license
import plistlib
class IPhotoPlistParser(plistlib.PlistParser):
"""A parser for iPhoto plists.
iPhoto plists tend to be malformed, so we have to subclass the built-in parser to be a bit more
lenient.
"""
def __init__(self):
plistlib.PlistParser.__init__(self)
# For debugging purposes, we remember the last bit of data to be analyzed so that we can
# log it in case of an exception
self.lastdata = ''
def getData(self):
self.lastdata = plistlib.PlistParser.getData(self)
return self.lastdata