1
0
mirror of https://github.com/arsenetar/dupeguru.git synced 2024-11-10 17:59:02 +00:00

Made core.fs.File slotted to save a lot of memory usage.

This commit is contained in:
Virgil Dupras 2012-05-29 17:39:54 -04:00
parent 65d2581f74
commit 1171705921
3 changed files with 33 additions and 22 deletions

View File

@ -15,7 +15,9 @@ import hashlib
import logging import logging
from hscommon import io from hscommon import io
from hscommon.util import nonone, flatten, get_file_ext from hscommon.util import nonone, get_file_ext
NOT_SET = object()
class FSError(Exception): class FSError(Exception):
cls_message = "An error has occured on '{name}' in '{parent}'" cls_message = "An error has occured on '{name}' in '{parent}'"
@ -55,29 +57,35 @@ class File:
'md5': '', 'md5': '',
'md5partial': '', 'md5partial': '',
} }
# Slots for File make us save quite a bit of memory. In a memory test I've made with a lot of
# files, I saved 35% memory usage with "unread" files (no _read_info() call) and gains become
# even greater when we take into account read attributes (70%!). Yeah, it's worth it.
__slots__ = ('path', 'is_ref', 'words') + tuple(INITIAL_INFO.keys())
def __init__(self, path): def __init__(self, path):
self.path = path self.path = path
#This offset is where we should start reading the file to get a partial md5 for attrname in self.INITIAL_INFO:
#For audio file, it should be where audio data starts setattr(self, attrname, NOT_SET)
self._md5partial_offset = 0x4000 #16Kb
self._md5partial_size = 0x4000 #16Kb
def __repr__(self): def __repr__(self):
return "<{} {}>".format(self.__class__.__name__, str(self.path)) return "<{} {}>".format(self.__class__.__name__, str(self.path))
def __getattr__(self, attrname): def __getattribute__(self, attrname):
# Only called when attr is not there result = object.__getattribute__(self, attrname)
if attrname in self.INITIAL_INFO: if result is NOT_SET:
try: try:
self._read_info(attrname) self._read_info(attrname)
except Exception as e: except Exception as e:
logging.warning("An error '%s' was raised while decoding '%s'", e, repr(self.path)) logging.warning("An error '%s' was raised while decoding '%s'", e, repr(self.path))
try: result = object.__getattribute__(self, attrname)
return self.__dict__[attrname] if result is NOT_SET:
except KeyError: result = self.INITIAL_INFO[attrname]
return self.INITIAL_INFO[attrname] return result
raise AttributeError()
#This offset is where we should start reading the file to get a partial md5
#For audio file, it should be where audio data starts
def _get_md5partial_offset_and_size(self):
return (0x4000, 0x4000) #16Kb
def _read_info(self, field): def _read_info(self, field):
if field in ('size', 'mtime'): if field in ('size', 'mtime'):
@ -87,8 +95,7 @@ class File:
elif field == 'md5partial': elif field == 'md5partial':
try: try:
fp = io.open(self.path, 'rb') fp = io.open(self.path, 'rb')
offset = self._md5partial_offset offset, size = self._get_md5partial_offset_and_size()
size = self._md5partial_size
fp.seek(offset) fp.seek(offset)
partialdata = fp.read(size) partialdata = fp.read(size)
md5 = hashlib.md5(partialdata) md5 = hashlib.md5(partialdata)
@ -116,10 +123,9 @@ class File:
If `attrnames` is not None, caches only attrnames. If `attrnames` is not None, caches only attrnames.
""" """
if attrnames is None: if attrnames is None:
attrnames = list(self.INITIAL_INFO.keys()) attrnames = self.INITIAL_INFO.keys()
for attrname in attrnames: for attrname in attrnames:
if attrname not in self.__dict__: getattr(self, attrname)
self._read_info(attrname)
#--- Public #--- Public
@classmethod @classmethod
@ -159,6 +165,8 @@ class Folder(File):
It has the size/md5 info of a File, but it's value are the sum of its subitems. It has the size/md5 info of a File, but it's value are the sum of its subitems.
""" """
__slots__ = File.__slots__ + ('_subfolders', )
def __init__(self, path): def __init__(self, path):
File.__init__(self, path) File.__init__(self, path)
self._subfolders = None self._subfolders = None

View File

@ -28,6 +28,7 @@ class MusicFile(fs.File):
'year' : '', 'year' : '',
'track' : 0, 'track' : 0,
}) })
__slots__ = fs.File.__slots__ + tuple(INITIAL_INFO.keys())
@classmethod @classmethod
def can_handle(cls, path): def can_handle(cls, path):
@ -35,11 +36,11 @@ class MusicFile(fs.File):
return False return False
return get_file_ext(path[-1]) in auto.EXT2CLASS return get_file_ext(path[-1]) in auto.EXT2CLASS
def _get_md5partial_offset_and_size(self):
f = auto.File(str(self.path))
return (f.audio_offset, f.audio_size)
def _read_info(self, field): def _read_info(self, field):
if field == 'md5partial':
f = auto.File(str(self.path))
self._md5partial_offset = f.audio_offset
self._md5partial_size = f.audio_size
fs.File._read_info(self, field) fs.File._read_info(self, field)
if field in TAG_FIELDS: if field in TAG_FIELDS:
f = auto.File(str(self.path)) f = auto.File(str(self.path))

View File

@ -16,6 +16,8 @@ class Photo(fs.File):
INITIAL_INFO.update({ INITIAL_INFO.update({
'dimensions': (0,0), 'dimensions': (0,0),
}) })
__slots__ = fs.File.__slots__ + tuple(INITIAL_INFO.keys())
# These extensions are supported on all platforms # These extensions are supported on all platforms
HANDLED_EXTS = {'png', 'jpg', 'jpeg', 'gif', 'bmp', 'tiff', 'tif'} HANDLED_EXTS = {'png', 'jpg', 'jpeg', 'gif', 'bmp', 'tiff', 'tif'}