Fixed crashes when reading invalid iPhoto AlbumData file. This time, I used lxml's "recover" feature to filter out crap in the XML, so it should cover most cases of invalid stuff in iPhoto data files.

2025-09-11 17:58:17 +00:00 · 2010-03-01 12:20:21 +01:00 · 2010-03-01 12:20:21 +01:00 · b17ca66f73
commit b17ca66f73
parent 93bc609026
1 changed files with 5 additions and 9 deletions
--- a/core_pe/app_cocoa.py
+++ b/core_pe/app_cocoa.py
@ -11,6 +11,7 @@ import logging
 import plistlib
 import re
 from lxml import etree
 from appscript import app, k, CommandError
 from hsutil import io
@ -68,15 +69,10 @@ def get_iphoto_database_path():
 def get_iphoto_pictures(plistpath):
    if not io.exists(plistpath):
        return []
-    s = io.open(plistpath).read()
+    # We make the xml go through lxml so that it can fix broken xml which iPhoto sometimes produces.
-    # There was a case where a guy had 0x10 chars in his plist, causing expat errors on loading
+    parser = etree.XMLParser(recover=True)
-    s = s.replace('\x10', '')
+    root = etree.parse(io.open(plistpath), parser=parser).getroot()
-    # It seems that iPhoto sometimes doesn't properly escape & chars. The regexp below is to find
+    s = etree.tostring(root)
    # any & char that is not a &-based entity (&amp;, &quot;, etc.). based on TextMate's XML
    # bundle's regexp
    s, count = re.subn(r'&(?![a-zA-Z0-9_-]+|#[0-9]+|#x[0-9a-fA-F]+;)', '', s)
    if count:
        logging.warning("%d invalid XML entities replacement made", count)
    plist = plistlib.readPlistFromString(s)
    result = []
    for photo_data in plist['Master Image List'].values():