mirror of
https://github.com/arsenetar/dupeguru.git
synced 2024-11-10 17:59:02 +00:00
Made core.fs.File slotted to save a lot of memory usage.
This commit is contained in:
parent
65d2581f74
commit
1171705921
44
core/fs.py
44
core/fs.py
@ -15,7 +15,9 @@ import hashlib
|
|||||||
import logging
|
import logging
|
||||||
|
|
||||||
from hscommon import io
|
from hscommon import io
|
||||||
from hscommon.util import nonone, flatten, get_file_ext
|
from hscommon.util import nonone, get_file_ext
|
||||||
|
|
||||||
|
NOT_SET = object()
|
||||||
|
|
||||||
class FSError(Exception):
|
class FSError(Exception):
|
||||||
cls_message = "An error has occured on '{name}' in '{parent}'"
|
cls_message = "An error has occured on '{name}' in '{parent}'"
|
||||||
@ -55,29 +57,35 @@ class File:
|
|||||||
'md5': '',
|
'md5': '',
|
||||||
'md5partial': '',
|
'md5partial': '',
|
||||||
}
|
}
|
||||||
|
# Slots for File make us save quite a bit of memory. In a memory test I've made with a lot of
|
||||||
|
# files, I saved 35% memory usage with "unread" files (no _read_info() call) and gains become
|
||||||
|
# even greater when we take into account read attributes (70%!). Yeah, it's worth it.
|
||||||
|
__slots__ = ('path', 'is_ref', 'words') + tuple(INITIAL_INFO.keys())
|
||||||
|
|
||||||
def __init__(self, path):
|
def __init__(self, path):
|
||||||
self.path = path
|
self.path = path
|
||||||
#This offset is where we should start reading the file to get a partial md5
|
for attrname in self.INITIAL_INFO:
|
||||||
#For audio file, it should be where audio data starts
|
setattr(self, attrname, NOT_SET)
|
||||||
self._md5partial_offset = 0x4000 #16Kb
|
|
||||||
self._md5partial_size = 0x4000 #16Kb
|
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return "<{} {}>".format(self.__class__.__name__, str(self.path))
|
return "<{} {}>".format(self.__class__.__name__, str(self.path))
|
||||||
|
|
||||||
def __getattr__(self, attrname):
|
def __getattribute__(self, attrname):
|
||||||
# Only called when attr is not there
|
result = object.__getattribute__(self, attrname)
|
||||||
if attrname in self.INITIAL_INFO:
|
if result is NOT_SET:
|
||||||
try:
|
try:
|
||||||
self._read_info(attrname)
|
self._read_info(attrname)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.warning("An error '%s' was raised while decoding '%s'", e, repr(self.path))
|
logging.warning("An error '%s' was raised while decoding '%s'", e, repr(self.path))
|
||||||
try:
|
result = object.__getattribute__(self, attrname)
|
||||||
return self.__dict__[attrname]
|
if result is NOT_SET:
|
||||||
except KeyError:
|
result = self.INITIAL_INFO[attrname]
|
||||||
return self.INITIAL_INFO[attrname]
|
return result
|
||||||
raise AttributeError()
|
|
||||||
|
#This offset is where we should start reading the file to get a partial md5
|
||||||
|
#For audio file, it should be where audio data starts
|
||||||
|
def _get_md5partial_offset_and_size(self):
|
||||||
|
return (0x4000, 0x4000) #16Kb
|
||||||
|
|
||||||
def _read_info(self, field):
|
def _read_info(self, field):
|
||||||
if field in ('size', 'mtime'):
|
if field in ('size', 'mtime'):
|
||||||
@ -87,8 +95,7 @@ class File:
|
|||||||
elif field == 'md5partial':
|
elif field == 'md5partial':
|
||||||
try:
|
try:
|
||||||
fp = io.open(self.path, 'rb')
|
fp = io.open(self.path, 'rb')
|
||||||
offset = self._md5partial_offset
|
offset, size = self._get_md5partial_offset_and_size()
|
||||||
size = self._md5partial_size
|
|
||||||
fp.seek(offset)
|
fp.seek(offset)
|
||||||
partialdata = fp.read(size)
|
partialdata = fp.read(size)
|
||||||
md5 = hashlib.md5(partialdata)
|
md5 = hashlib.md5(partialdata)
|
||||||
@ -116,10 +123,9 @@ class File:
|
|||||||
If `attrnames` is not None, caches only attrnames.
|
If `attrnames` is not None, caches only attrnames.
|
||||||
"""
|
"""
|
||||||
if attrnames is None:
|
if attrnames is None:
|
||||||
attrnames = list(self.INITIAL_INFO.keys())
|
attrnames = self.INITIAL_INFO.keys()
|
||||||
for attrname in attrnames:
|
for attrname in attrnames:
|
||||||
if attrname not in self.__dict__:
|
getattr(self, attrname)
|
||||||
self._read_info(attrname)
|
|
||||||
|
|
||||||
#--- Public
|
#--- Public
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -159,6 +165,8 @@ class Folder(File):
|
|||||||
|
|
||||||
It has the size/md5 info of a File, but it's value are the sum of its subitems.
|
It has the size/md5 info of a File, but it's value are the sum of its subitems.
|
||||||
"""
|
"""
|
||||||
|
__slots__ = File.__slots__ + ('_subfolders', )
|
||||||
|
|
||||||
def __init__(self, path):
|
def __init__(self, path):
|
||||||
File.__init__(self, path)
|
File.__init__(self, path)
|
||||||
self._subfolders = None
|
self._subfolders = None
|
||||||
|
@ -28,6 +28,7 @@ class MusicFile(fs.File):
|
|||||||
'year' : '',
|
'year' : '',
|
||||||
'track' : 0,
|
'track' : 0,
|
||||||
})
|
})
|
||||||
|
__slots__ = fs.File.__slots__ + tuple(INITIAL_INFO.keys())
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def can_handle(cls, path):
|
def can_handle(cls, path):
|
||||||
@ -35,11 +36,11 @@ class MusicFile(fs.File):
|
|||||||
return False
|
return False
|
||||||
return get_file_ext(path[-1]) in auto.EXT2CLASS
|
return get_file_ext(path[-1]) in auto.EXT2CLASS
|
||||||
|
|
||||||
|
def _get_md5partial_offset_and_size(self):
|
||||||
|
f = auto.File(str(self.path))
|
||||||
|
return (f.audio_offset, f.audio_size)
|
||||||
|
|
||||||
def _read_info(self, field):
|
def _read_info(self, field):
|
||||||
if field == 'md5partial':
|
|
||||||
f = auto.File(str(self.path))
|
|
||||||
self._md5partial_offset = f.audio_offset
|
|
||||||
self._md5partial_size = f.audio_size
|
|
||||||
fs.File._read_info(self, field)
|
fs.File._read_info(self, field)
|
||||||
if field in TAG_FIELDS:
|
if field in TAG_FIELDS:
|
||||||
f = auto.File(str(self.path))
|
f = auto.File(str(self.path))
|
||||||
|
@ -16,6 +16,8 @@ class Photo(fs.File):
|
|||||||
INITIAL_INFO.update({
|
INITIAL_INFO.update({
|
||||||
'dimensions': (0,0),
|
'dimensions': (0,0),
|
||||||
})
|
})
|
||||||
|
__slots__ = fs.File.__slots__ + tuple(INITIAL_INFO.keys())
|
||||||
|
|
||||||
# These extensions are supported on all platforms
|
# These extensions are supported on all platforms
|
||||||
HANDLED_EXTS = {'png', 'jpg', 'jpeg', 'gif', 'bmp', 'tiff', 'tif'}
|
HANDLED_EXTS = {'png', 'jpg', 'jpeg', 'gif', 'bmp', 'tiff', 'tif'}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user