mirror of
https://github.com/arsenetar/dupeguru.git
synced 2025-03-10 05:34:36 +00:00
directories: un-recurse get_files() and get_state()
These methods were previously called recursively and it seemed to cause problems in some cases. The recursive nature of these functions not bringing any notable advantage and `os.walk()` being of better style anyway, I removed that recursive nature. Hopefully fixes #421
This commit is contained in:
parent
899a42f6a9
commit
d5fef949e9
@ -1,11 +1,10 @@
|
|||||||
# Created By: Virgil Dupras
|
# Copyright 2017 Virgil Dupras
|
||||||
# Created On: 2006/02/27
|
|
||||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
|
||||||
#
|
#
|
||||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||||
# which should be included with this package. The terms are also available at
|
# which should be included with this package. The terms are also available at
|
||||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||||
|
|
||||||
|
import os
|
||||||
from xml.etree import ElementTree as ET
|
from xml.etree import ElementTree as ET
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
@ -76,31 +75,34 @@ class Directories:
|
|||||||
return DirectoryState.Excluded
|
return DirectoryState.Excluded
|
||||||
|
|
||||||
def _get_files(self, from_path, fileclasses, j):
|
def _get_files(self, from_path, fileclasses, j):
|
||||||
j.check_if_cancelled()
|
for root, dirs, files in os.walk(str(from_path)):
|
||||||
state = self.get_state(from_path)
|
j.check_if_cancelled()
|
||||||
if state == DirectoryState.Excluded:
|
root = Path(root)
|
||||||
# Recursively get files from folders with lots of subfolder is expensive. However, there
|
state = self.get_state(root)
|
||||||
# might be a subfolder in this path that is not excluded. What we want to do is to skim
|
if state == DirectoryState.Excluded:
|
||||||
# through self.states and see if we must continue, or we can stop right here to save time
|
# Recursively get files from folders with lots of subfolder is expensive. However, there
|
||||||
if not any(p[:len(from_path)] == from_path for p in self.states):
|
# might be a subfolder in this path that is not excluded. What we want to do is to skim
|
||||||
return
|
# through self.states and see if we must continue, or we can stop right here to save time
|
||||||
try:
|
if not any(p[:len(root)] == root for p in self.states):
|
||||||
filepaths = set()
|
del dirs[:]
|
||||||
if state != DirectoryState.Excluded:
|
try:
|
||||||
found_files = fs.get_files(from_path, fileclasses=fileclasses)
|
if state != DirectoryState.Excluded:
|
||||||
logging.debug("Collected %d files in folder %s", len(found_files), str(from_path))
|
found_files = [fs.get_file(root + f, fileclasses=fileclasses) for f in files]
|
||||||
for file in found_files:
|
found_files = [f for f in found_files if f is not None]
|
||||||
file.is_ref = state == DirectoryState.Reference
|
# In some cases, directories can be considered as files by dupeGuru, which is
|
||||||
filepaths.add(file.path)
|
# why we have this line below. In fact, there only one case: Bundle files under
|
||||||
yield file
|
# OS X... In other situations, this forloop will do nothing.
|
||||||
# it's possible that a folder (bundle) gets into the file list. in that case, we don't
|
for d in dirs[:]:
|
||||||
# want to recurse into it
|
f = fs.get_file(root + d, fileclasses=fileclasses)
|
||||||
subfolders = [p for p in from_path.listdir() if not p.islink() and p.isdir() and p not in filepaths]
|
if f is not None:
|
||||||
for subfolder in subfolders:
|
found_files.append(f)
|
||||||
for file in self._get_files(subfolder, fileclasses=fileclasses, j=j):
|
dirs.remove(d)
|
||||||
yield file
|
logging.debug("Collected %d files in folder %s", len(found_files), str(from_path))
|
||||||
except (EnvironmentError, fs.InvalidPath):
|
for file in found_files:
|
||||||
pass
|
file.is_ref = state == DirectoryState.Reference
|
||||||
|
yield file
|
||||||
|
except (EnvironmentError, fs.InvalidPath):
|
||||||
|
pass
|
||||||
|
|
||||||
def _get_folders(self, from_folder, j):
|
def _get_folders(self, from_folder, j):
|
||||||
j.check_if_cancelled()
|
j.check_if_cancelled()
|
||||||
@ -176,16 +178,17 @@ class Directories:
|
|||||||
|
|
||||||
:rtype: :class:`DirectoryState`
|
:rtype: :class:`DirectoryState`
|
||||||
"""
|
"""
|
||||||
|
# direct match? easy result.
|
||||||
if path in self.states:
|
if path in self.states:
|
||||||
return self.states[path]
|
return self.states[path]
|
||||||
default_state = self._default_state_for_path(path)
|
state = self._default_state_for_path(path) or DirectoryState.Normal
|
||||||
if default_state is not None:
|
prevlen = 0
|
||||||
return default_state
|
# we loop through the states to find the longest matching prefix
|
||||||
parent = path.parent()
|
for p, s in self.states.items():
|
||||||
if parent in self:
|
if p.is_parent_of(path) and len(p) > prevlen:
|
||||||
return self.get_state(parent)
|
prevlen = len(p)
|
||||||
else:
|
state = s
|
||||||
return DirectoryState.Normal
|
return state
|
||||||
|
|
||||||
def has_any_file(self):
|
def has_any_file(self):
|
||||||
"""Returns whether selected folders contain any file.
|
"""Returns whether selected folders contain any file.
|
||||||
|
@ -13,6 +13,7 @@ from pytest import raises
|
|||||||
from hscommon.path import Path
|
from hscommon.path import Path
|
||||||
from hscommon.testutil import eq_
|
from hscommon.testutil import eq_
|
||||||
|
|
||||||
|
from ..fs import File
|
||||||
from ..directories import Directories, DirectoryState, AlreadyThereError, InvalidPathError
|
from ..directories import Directories, DirectoryState, AlreadyThereError, InvalidPathError
|
||||||
|
|
||||||
def create_fake_fs(rootpath):
|
def create_fake_fs(rootpath):
|
||||||
@ -162,6 +163,20 @@ def test_get_files():
|
|||||||
else:
|
else:
|
||||||
assert not f.is_ref
|
assert not f.is_ref
|
||||||
|
|
||||||
|
def test_get_files_with_folders():
|
||||||
|
# When fileclasses handle folders, return them and stop recursing!
|
||||||
|
class FakeFile(File):
|
||||||
|
@classmethod
|
||||||
|
def can_handle(cls, path):
|
||||||
|
return True
|
||||||
|
|
||||||
|
d = Directories()
|
||||||
|
p = testpath['fs']
|
||||||
|
d.add_path(p)
|
||||||
|
files = list(d.get_files(fileclasses=[FakeFile]))
|
||||||
|
# We have the 3 root files and the 3 root dirs
|
||||||
|
eq_(6, len(files))
|
||||||
|
|
||||||
def test_get_folders():
|
def test_get_folders():
|
||||||
d = Directories()
|
d = Directories()
|
||||||
p = testpath['fs']
|
p = testpath['fs']
|
||||||
|
Loading…
x
Reference in New Issue
Block a user