mirror of
https://github.com/arsenetar/dupeguru.git
synced 2024-11-16 12:19:03 +00:00
Fixed crashes when reading invalid iPhoto AlbumData file. This time, I used lxml's "recover" feature to filter out crap in the XML, so it should cover most cases of invalid stuff in iPhoto data files.
This commit is contained in:
parent
93bc609026
commit
b17ca66f73
@ -11,6 +11,7 @@ import logging
|
|||||||
import plistlib
|
import plistlib
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
from lxml import etree
|
||||||
from appscript import app, k, CommandError
|
from appscript import app, k, CommandError
|
||||||
|
|
||||||
from hsutil import io
|
from hsutil import io
|
||||||
@ -68,15 +69,10 @@ def get_iphoto_database_path():
|
|||||||
def get_iphoto_pictures(plistpath):
|
def get_iphoto_pictures(plistpath):
|
||||||
if not io.exists(plistpath):
|
if not io.exists(plistpath):
|
||||||
return []
|
return []
|
||||||
s = io.open(plistpath).read()
|
# We make the xml go through lxml so that it can fix broken xml which iPhoto sometimes produces.
|
||||||
# There was a case where a guy had 0x10 chars in his plist, causing expat errors on loading
|
parser = etree.XMLParser(recover=True)
|
||||||
s = s.replace('\x10', '')
|
root = etree.parse(io.open(plistpath), parser=parser).getroot()
|
||||||
# It seems that iPhoto sometimes doesn't properly escape & chars. The regexp below is to find
|
s = etree.tostring(root)
|
||||||
# any & char that is not a &-based entity (&, ", etc.). based on TextMate's XML
|
|
||||||
# bundle's regexp
|
|
||||||
s, count = re.subn(r'&(?![a-zA-Z0-9_-]+|#[0-9]+|#x[0-9a-fA-F]+;)', '', s)
|
|
||||||
if count:
|
|
||||||
logging.warning("%d invalid XML entities replacement made", count)
|
|
||||||
plist = plistlib.readPlistFromString(s)
|
plist = plistlib.readPlistFromString(s)
|
||||||
result = []
|
result = []
|
||||||
for photo_data in plist['Master Image List'].values():
|
for photo_data in plist['Master Image List'].values():
|
||||||
|
Loading…
Reference in New Issue
Block a user