mirror of
https://github.com/arsenetar/dupeguru.git
synced 2025-05-07 09:19:50 +00:00
Compare commits
5 Commits
143147cb8e
...
a37b5b0eeb
Author | SHA1 | Date | |
---|---|---|---|
a37b5b0eeb | |||
efd500ecc1 | |||
43fcc52291 | |||
50f5db1543 | |||
a5b0ccdd02 |
@ -248,7 +248,7 @@ class DupeGuru(Broadcaster):
|
|||||||
ref = group.ref
|
ref = group.ref
|
||||||
linkfunc = os.link if use_hardlinks else os.symlink
|
linkfunc = os.link if use_hardlinks else os.symlink
|
||||||
linkfunc(str(ref.path), str_path)
|
linkfunc(str(ref.path), str_path)
|
||||||
self.clean_empty_dirs(dupe.path.parent())
|
self.clean_empty_dirs(dupe.path.parent)
|
||||||
|
|
||||||
def _create_file(self, path):
|
def _create_file(self, path):
|
||||||
# We add fs.Folder to fileclasses in case the file we're loading contains folder paths.
|
# We add fs.Folder to fileclasses in case the file we're loading contains folder paths.
|
||||||
|
@ -90,47 +90,45 @@ class Directories:
|
|||||||
return DirectoryState.EXCLUDED
|
return DirectoryState.EXCLUDED
|
||||||
|
|
||||||
def _get_files(self, from_path, fileclasses, j):
|
def _get_files(self, from_path, fileclasses, j):
|
||||||
for root, dirs, files in os.walk(str(from_path)):
|
try:
|
||||||
j.check_if_cancelled()
|
with os.scandir(from_path) as iter:
|
||||||
root_path = Path(root)
|
root_path = Path(from_path)
|
||||||
state = self.get_state(root_path)
|
state = self.get_state(root_path)
|
||||||
if state == DirectoryState.EXCLUDED and not any(
|
# if we have no un-excluded dirs under this directory skip going deeper
|
||||||
p.parts[: len(root_path.parts)] == root_path.parts for p in self.states
|
skip_dirs = state == DirectoryState.EXCLUDED and not any(
|
||||||
):
|
p.parts[: len(root_path.parts)] == root_path.parts for p in self.states
|
||||||
# Recursively get files from folders with lots of subfolder is expensive. However, there
|
)
|
||||||
# might be a subfolder in this path that is not excluded. What we want to do is to skim
|
count = 0
|
||||||
# through self.states and see if we must continue, or we can stop right here to save time
|
for item in iter:
|
||||||
del dirs[:]
|
j.check_if_cancelled()
|
||||||
try:
|
try:
|
||||||
if state != DirectoryState.EXCLUDED:
|
if item.is_dir():
|
||||||
# Old logic
|
if skip_dirs:
|
||||||
if self._exclude_list is None or not self._exclude_list.mark_count:
|
continue
|
||||||
found_files = [fs.get_file(root_path.joinpath(f), fileclasses=fileclasses) for f in files]
|
yield from self._get_files(item.path, fileclasses, j)
|
||||||
else:
|
continue
|
||||||
found_files = []
|
elif state == DirectoryState.EXCLUDED:
|
||||||
# print(f"len of files: {len(files)} {files}")
|
continue
|
||||||
for f in files:
|
# File excluding or not
|
||||||
if not self._exclude_list.is_excluded(root, f):
|
if (
|
||||||
found_files.append(fs.get_file(root_path.joinpath(f), fileclasses=fileclasses))
|
self._exclude_list is None
|
||||||
found_files = [f for f in found_files if f is not None]
|
or not self._exclude_list.mark_count
|
||||||
# In some cases, directories can be considered as files by dupeGuru, which is
|
or not self._exclude_list.is_excluded(str(from_path), item.name)
|
||||||
# why we have this line below. In fact, there only one case: Bundle files under
|
):
|
||||||
# OS X... In other situations, this forloop will do nothing.
|
file = fs.get_file(item, fileclasses=fileclasses)
|
||||||
for d in dirs[:]:
|
if file:
|
||||||
f = fs.get_file(root_path.joinpath(d), fileclasses=fileclasses)
|
file.is_ref = state == DirectoryState.REFERENCE
|
||||||
if f is not None:
|
count += 1
|
||||||
found_files.append(f)
|
yield file
|
||||||
dirs.remove(d)
|
except (EnvironmentError, OSError, fs.InvalidPath):
|
||||||
logging.debug(
|
pass
|
||||||
"Collected %d files in folder %s",
|
logging.debug(
|
||||||
len(found_files),
|
"Collected %d files in folder %s",
|
||||||
str(root_path),
|
count,
|
||||||
)
|
str(root_path),
|
||||||
for file in found_files:
|
)
|
||||||
file.is_ref = state == DirectoryState.REFERENCE
|
except OSError:
|
||||||
yield file
|
pass
|
||||||
except (EnvironmentError, fs.InvalidPath):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def _get_folders(self, from_folder, j):
|
def _get_folders(self, from_folder, j):
|
||||||
j.check_if_cancelled()
|
j.check_if_cancelled()
|
||||||
@ -222,14 +220,11 @@ class Directories:
|
|||||||
if state != DirectoryState.NORMAL:
|
if state != DirectoryState.NORMAL:
|
||||||
self.states[path] = state
|
self.states[path] = state
|
||||||
return state
|
return state
|
||||||
|
# find the longest parent path that is in states and return that state if found
|
||||||
prevlen = 0
|
# NOTE: path.parents is ordered longest to shortest
|
||||||
# we loop through the states to find the longest matching prefix
|
for parent_path in path.parents:
|
||||||
# if the parent has a state in cache, return that state
|
if parent_path in self.states:
|
||||||
for p, s in self.states.items():
|
return self.states[parent_path]
|
||||||
if p in path.parents and len(p.parts) > prevlen:
|
|
||||||
prevlen = len(p.parts)
|
|
||||||
state = s
|
|
||||||
return state
|
return state
|
||||||
|
|
||||||
def has_any_file(self):
|
def has_any_file(self):
|
||||||
|
16
core/fs.py
16
core/fs.py
@ -377,8 +377,9 @@ class Folder(File):
|
|||||||
@property
|
@property
|
||||||
def subfolders(self):
|
def subfolders(self):
|
||||||
if self._subfolders is None:
|
if self._subfolders is None:
|
||||||
subfolders = [p for p in self.path.glob("*") if not p.is_symlink() and p.is_dir()]
|
with os.scandir(self.path) as iter:
|
||||||
self._subfolders = [self.__class__(p) for p in subfolders]
|
subfolders = [p.path for p in iter if not p.is_symlink() and p.is_dir()]
|
||||||
|
self._subfolders = [self.__class__(Path(p)) for p in subfolders]
|
||||||
return self._subfolders
|
return self._subfolders
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -396,6 +397,8 @@ def get_file(path, fileclasses=[File]):
|
|||||||
"""
|
"""
|
||||||
for fileclass in fileclasses:
|
for fileclass in fileclasses:
|
||||||
if fileclass.can_handle(path):
|
if fileclass.can_handle(path):
|
||||||
|
if type(path) is os.DirEntry:
|
||||||
|
return fileclass(Path(path.path))
|
||||||
return fileclass(path)
|
return fileclass(path)
|
||||||
|
|
||||||
|
|
||||||
@ -408,10 +411,11 @@ def get_files(path, fileclasses=[File]):
|
|||||||
assert all(issubclass(fileclass, File) for fileclass in fileclasses)
|
assert all(issubclass(fileclass, File) for fileclass in fileclasses)
|
||||||
try:
|
try:
|
||||||
result = []
|
result = []
|
||||||
for path in path.glob("*"):
|
with os.scandir(path) as iter:
|
||||||
file = get_file(path, fileclasses=fileclasses)
|
for item in iter:
|
||||||
if file is not None:
|
file = get_file(item, fileclasses=fileclasses)
|
||||||
result.append(file)
|
if file is not None:
|
||||||
|
result.append(file)
|
||||||
return result
|
return result
|
||||||
except EnvironmentError:
|
except EnvironmentError:
|
||||||
raise InvalidPath(path)
|
raise InvalidPath(path)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user