mirror of
https://github.com/arsenetar/dupeguru.git
synced 2025-03-09 21:24:36 +00:00
directories: un-recurse get_files() and get_state()
These methods were previously called recursively and it seemed to cause problems in some cases. The recursive nature of these functions not bringing any notable advantage and `os.walk()` being of better style anyway, I removed that recursive nature. Hopefully fixes #421
This commit is contained in:
parent
899a42f6a9
commit
d5fef949e9
@ -1,11 +1,10 @@
|
||||
# Created By: Virgil Dupras
|
||||
# Created On: 2006/02/27
|
||||
# Copyright 2015 Hardcoded Software (http://www.hardcoded.net)
|
||||
# Copyright 2017 Virgil Dupras
|
||||
#
|
||||
# This software is licensed under the "GPLv3" License as described in the "LICENSE" file,
|
||||
# which should be included with this package. The terms are also available at
|
||||
# http://www.gnu.org/licenses/gpl-3.0.html
|
||||
|
||||
import os
|
||||
from xml.etree import ElementTree as ET
|
||||
import logging
|
||||
|
||||
@ -76,31 +75,34 @@ class Directories:
|
||||
return DirectoryState.Excluded
|
||||
|
||||
def _get_files(self, from_path, fileclasses, j):
|
||||
j.check_if_cancelled()
|
||||
state = self.get_state(from_path)
|
||||
if state == DirectoryState.Excluded:
|
||||
# Recursively get files from folders with lots of subfolder is expensive. However, there
|
||||
# might be a subfolder in this path that is not excluded. What we want to do is to skim
|
||||
# through self.states and see if we must continue, or we can stop right here to save time
|
||||
if not any(p[:len(from_path)] == from_path for p in self.states):
|
||||
return
|
||||
try:
|
||||
filepaths = set()
|
||||
if state != DirectoryState.Excluded:
|
||||
found_files = fs.get_files(from_path, fileclasses=fileclasses)
|
||||
logging.debug("Collected %d files in folder %s", len(found_files), str(from_path))
|
||||
for file in found_files:
|
||||
file.is_ref = state == DirectoryState.Reference
|
||||
filepaths.add(file.path)
|
||||
yield file
|
||||
# it's possible that a folder (bundle) gets into the file list. in that case, we don't
|
||||
# want to recurse into it
|
||||
subfolders = [p for p in from_path.listdir() if not p.islink() and p.isdir() and p not in filepaths]
|
||||
for subfolder in subfolders:
|
||||
for file in self._get_files(subfolder, fileclasses=fileclasses, j=j):
|
||||
yield file
|
||||
except (EnvironmentError, fs.InvalidPath):
|
||||
pass
|
||||
for root, dirs, files in os.walk(str(from_path)):
|
||||
j.check_if_cancelled()
|
||||
root = Path(root)
|
||||
state = self.get_state(root)
|
||||
if state == DirectoryState.Excluded:
|
||||
# Recursively get files from folders with lots of subfolder is expensive. However, there
|
||||
# might be a subfolder in this path that is not excluded. What we want to do is to skim
|
||||
# through self.states and see if we must continue, or we can stop right here to save time
|
||||
if not any(p[:len(root)] == root for p in self.states):
|
||||
del dirs[:]
|
||||
try:
|
||||
if state != DirectoryState.Excluded:
|
||||
found_files = [fs.get_file(root + f, fileclasses=fileclasses) for f in files]
|
||||
found_files = [f for f in found_files if f is not None]
|
||||
# In some cases, directories can be considered as files by dupeGuru, which is
|
||||
# why we have this line below. In fact, there only one case: Bundle files under
|
||||
# OS X... In other situations, this forloop will do nothing.
|
||||
for d in dirs[:]:
|
||||
f = fs.get_file(root + d, fileclasses=fileclasses)
|
||||
if f is not None:
|
||||
found_files.append(f)
|
||||
dirs.remove(d)
|
||||
logging.debug("Collected %d files in folder %s", len(found_files), str(from_path))
|
||||
for file in found_files:
|
||||
file.is_ref = state == DirectoryState.Reference
|
||||
yield file
|
||||
except (EnvironmentError, fs.InvalidPath):
|
||||
pass
|
||||
|
||||
def _get_folders(self, from_folder, j):
|
||||
j.check_if_cancelled()
|
||||
@ -176,16 +178,17 @@ class Directories:
|
||||
|
||||
:rtype: :class:`DirectoryState`
|
||||
"""
|
||||
# direct match? easy result.
|
||||
if path in self.states:
|
||||
return self.states[path]
|
||||
default_state = self._default_state_for_path(path)
|
||||
if default_state is not None:
|
||||
return default_state
|
||||
parent = path.parent()
|
||||
if parent in self:
|
||||
return self.get_state(parent)
|
||||
else:
|
||||
return DirectoryState.Normal
|
||||
state = self._default_state_for_path(path) or DirectoryState.Normal
|
||||
prevlen = 0
|
||||
# we loop through the states to find the longest matching prefix
|
||||
for p, s in self.states.items():
|
||||
if p.is_parent_of(path) and len(p) > prevlen:
|
||||
prevlen = len(p)
|
||||
state = s
|
||||
return state
|
||||
|
||||
def has_any_file(self):
|
||||
"""Returns whether selected folders contain any file.
|
||||
|
@ -13,6 +13,7 @@ from pytest import raises
|
||||
from hscommon.path import Path
|
||||
from hscommon.testutil import eq_
|
||||
|
||||
from ..fs import File
|
||||
from ..directories import Directories, DirectoryState, AlreadyThereError, InvalidPathError
|
||||
|
||||
def create_fake_fs(rootpath):
|
||||
@ -162,6 +163,20 @@ def test_get_files():
|
||||
else:
|
||||
assert not f.is_ref
|
||||
|
||||
def test_get_files_with_folders():
|
||||
# When fileclasses handle folders, return them and stop recursing!
|
||||
class FakeFile(File):
|
||||
@classmethod
|
||||
def can_handle(cls, path):
|
||||
return True
|
||||
|
||||
d = Directories()
|
||||
p = testpath['fs']
|
||||
d.add_path(p)
|
||||
files = list(d.get_files(fileclasses=[FakeFile]))
|
||||
# We have the 3 root files and the 3 root dirs
|
||||
eq_(6, len(files))
|
||||
|
||||
def test_get_folders():
|
||||
d = Directories()
|
||||
p = testpath['fs']
|
||||
|
Loading…
x
Reference in New Issue
Block a user