From b17ca66f7314d36c0ca39463485ee82d35ef320c Mon Sep 17 00:00:00 2001 From: Virgil Dupras Date: Mon, 1 Mar 2010 12:20:21 +0100 Subject: [PATCH] Fixed crashes when reading invalid iPhoto AlbumData file. This time, I used lxml's "recover" feature to filter out crap in the XML, so it should cover most cases of invalid stuff in iPhoto data files. --- core_pe/app_cocoa.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/core_pe/app_cocoa.py b/core_pe/app_cocoa.py index f30c2d4f..625acf32 100644 --- a/core_pe/app_cocoa.py +++ b/core_pe/app_cocoa.py @@ -11,6 +11,7 @@ import logging import plistlib import re +from lxml import etree from appscript import app, k, CommandError from hsutil import io @@ -68,15 +69,10 @@ def get_iphoto_database_path(): def get_iphoto_pictures(plistpath): if not io.exists(plistpath): return [] - s = io.open(plistpath).read() - # There was a case where a guy had 0x10 chars in his plist, causing expat errors on loading - s = s.replace('\x10', '') - # It seems that iPhoto sometimes doesn't properly escape & chars. The regexp below is to find - # any & char that is not a &-based entity (&, ", etc.). based on TextMate's XML - # bundle's regexp - s, count = re.subn(r'&(?![a-zA-Z0-9_-]+|#[0-9]+|#x[0-9a-fA-F]+;)', '', s) - if count: - logging.warning("%d invalid XML entities replacement made", count) + # We make the xml go through lxml so that it can fix broken xml which iPhoto sometimes produces. + parser = etree.XMLParser(recover=True) + root = etree.parse(io.open(plistpath), parser=parser).getroot() + s = etree.tostring(root) plist = plistlib.readPlistFromString(s) result = [] for photo_data in plist['Master Image List'].values():