Fixed crashes when reading invalid iPhoto AlbumData file. This time, I used lxml's "recover" feature to filter out crap in the XML, so it should cover most cases of invalid stuff in iPhoto data files.

This commit is contained in:
Virgil Dupras 2010-03-01 12:20:21 +01:00
parent 93bc609026
commit b17ca66f73
1 changed files with 5 additions and 9 deletions

View File

@ -11,6 +11,7 @@ import logging
import plistlib import plistlib
import re import re
from lxml import etree
from appscript import app, k, CommandError from appscript import app, k, CommandError
from hsutil import io from hsutil import io
@ -68,15 +69,10 @@ def get_iphoto_database_path():
def get_iphoto_pictures(plistpath): def get_iphoto_pictures(plistpath):
if not io.exists(plistpath): if not io.exists(plistpath):
return [] return []
s = io.open(plistpath).read() # We make the xml go through lxml so that it can fix broken xml which iPhoto sometimes produces.
# There was a case where a guy had 0x10 chars in his plist, causing expat errors on loading parser = etree.XMLParser(recover=True)
s = s.replace('\x10', '') root = etree.parse(io.open(plistpath), parser=parser).getroot()
# It seems that iPhoto sometimes doesn't properly escape & chars. The regexp below is to find s = etree.tostring(root)
# any & char that is not a &-based entity (&, ", etc.). based on TextMate's XML
# bundle's regexp
s, count = re.subn(r'&(?![a-zA-Z0-9_-]+|#[0-9]+|#x[0-9a-fA-F]+;)', '', s)
if count:
logging.warning("%d invalid XML entities replacement made", count)
plist = plistlib.readPlistFromString(s) plist = plistlib.readPlistFromString(s)
result = [] result = []
for photo_data in plist['Master Image List'].values(): for photo_data in plist['Master Image List'].values():