From 1171705921c660f1bcee1d011d169b8ab60b58f0 Mon Sep 17 00:00:00 2001 From: Virgil Dupras Date: Tue, 29 May 2012 17:39:54 -0400 Subject: [PATCH] Made core.fs.File slotted to save a lot of memory usage. --- core/fs.py | 44 ++++++++++++++++++++++++++------------------ core_me/fs.py | 9 +++++---- core_pe/photo.py | 2 ++ 3 files changed, 33 insertions(+), 22 deletions(-) diff --git a/core/fs.py b/core/fs.py index 92c22736..93aed241 100644 --- a/core/fs.py +++ b/core/fs.py @@ -15,7 +15,9 @@ import hashlib import logging from hscommon import io -from hscommon.util import nonone, flatten, get_file_ext +from hscommon.util import nonone, get_file_ext + +NOT_SET = object() class FSError(Exception): cls_message = "An error has occured on '{name}' in '{parent}'" @@ -55,29 +57,35 @@ class File: 'md5': '', 'md5partial': '', } + # Slots for File make us save quite a bit of memory. In a memory test I've made with a lot of + # files, I saved 35% memory usage with "unread" files (no _read_info() call) and gains become + # even greater when we take into account read attributes (70%!). Yeah, it's worth it. + __slots__ = ('path', 'is_ref', 'words') + tuple(INITIAL_INFO.keys()) def __init__(self, path): self.path = path - #This offset is where we should start reading the file to get a partial md5 - #For audio file, it should be where audio data starts - self._md5partial_offset = 0x4000 #16Kb - self._md5partial_size = 0x4000 #16Kb + for attrname in self.INITIAL_INFO: + setattr(self, attrname, NOT_SET) def __repr__(self): return "<{} {}>".format(self.__class__.__name__, str(self.path)) - def __getattr__(self, attrname): - # Only called when attr is not there - if attrname in self.INITIAL_INFO: + def __getattribute__(self, attrname): + result = object.__getattribute__(self, attrname) + if result is NOT_SET: try: self._read_info(attrname) except Exception as e: logging.warning("An error '%s' was raised while decoding '%s'", e, repr(self.path)) - try: - return self.__dict__[attrname] - except KeyError: - return self.INITIAL_INFO[attrname] - raise AttributeError() + result = object.__getattribute__(self, attrname) + if result is NOT_SET: + result = self.INITIAL_INFO[attrname] + return result + + #This offset is where we should start reading the file to get a partial md5 + #For audio file, it should be where audio data starts + def _get_md5partial_offset_and_size(self): + return (0x4000, 0x4000) #16Kb def _read_info(self, field): if field in ('size', 'mtime'): @@ -87,8 +95,7 @@ class File: elif field == 'md5partial': try: fp = io.open(self.path, 'rb') - offset = self._md5partial_offset - size = self._md5partial_size + offset, size = self._get_md5partial_offset_and_size() fp.seek(offset) partialdata = fp.read(size) md5 = hashlib.md5(partialdata) @@ -116,10 +123,9 @@ class File: If `attrnames` is not None, caches only attrnames. """ if attrnames is None: - attrnames = list(self.INITIAL_INFO.keys()) + attrnames = self.INITIAL_INFO.keys() for attrname in attrnames: - if attrname not in self.__dict__: - self._read_info(attrname) + getattr(self, attrname) #--- Public @classmethod @@ -159,6 +165,8 @@ class Folder(File): It has the size/md5 info of a File, but it's value are the sum of its subitems. """ + __slots__ = File.__slots__ + ('_subfolders', ) + def __init__(self, path): File.__init__(self, path) self._subfolders = None diff --git a/core_me/fs.py b/core_me/fs.py index eae01478..4733be15 100644 --- a/core_me/fs.py +++ b/core_me/fs.py @@ -28,6 +28,7 @@ class MusicFile(fs.File): 'year' : '', 'track' : 0, }) + __slots__ = fs.File.__slots__ + tuple(INITIAL_INFO.keys()) @classmethod def can_handle(cls, path): @@ -35,11 +36,11 @@ class MusicFile(fs.File): return False return get_file_ext(path[-1]) in auto.EXT2CLASS + def _get_md5partial_offset_and_size(self): + f = auto.File(str(self.path)) + return (f.audio_offset, f.audio_size) + def _read_info(self, field): - if field == 'md5partial': - f = auto.File(str(self.path)) - self._md5partial_offset = f.audio_offset - self._md5partial_size = f.audio_size fs.File._read_info(self, field) if field in TAG_FIELDS: f = auto.File(str(self.path)) diff --git a/core_pe/photo.py b/core_pe/photo.py index d87b2090..cd398a7e 100644 --- a/core_pe/photo.py +++ b/core_pe/photo.py @@ -16,6 +16,8 @@ class Photo(fs.File): INITIAL_INFO.update({ 'dimensions': (0,0), }) + __slots__ = fs.File.__slots__ + tuple(INITIAL_INFO.keys()) + # These extensions are supported on all platforms HANDLED_EXTS = {'png', 'jpg', 'jpeg', 'gif', 'bmp', 'tiff', 'tif'}