mirror of
https://github.com/arsenetar/dupeguru.git
synced 2025-05-08 09:49:51 +00:00
Compare commits
5 Commits
e6c791ab0a
...
0b46ca2222
Author | SHA1 | Date | |
---|---|---|---|
0b46ca2222 | |||
72e0f76242 | |||
|
65c1d463f8 | ||
|
f1ae478433 | ||
|
c4dcfd3d4b |
@ -26,8 +26,19 @@ def getwords(s):
|
|||||||
# We decompose the string so that ascii letters with accents can be part of the word.
|
# We decompose the string so that ascii letters with accents can be part of the word.
|
||||||
s = normalize("NFD", s)
|
s = normalize("NFD", s)
|
||||||
s = multi_replace(s, "-_&+():;\\[]{}.,<>/?~!@#$*", " ").lower()
|
s = multi_replace(s, "-_&+():;\\[]{}.,<>/?~!@#$*", " ").lower()
|
||||||
|
# logging.debug(f"DEBUG chars for: {s}\n"
|
||||||
|
# f"{[c for c in s if ord(c) != 32]}\n"
|
||||||
|
# f"{[ord(c) for c in s if ord(c) != 32]}")
|
||||||
|
# HACK We shouldn't ignore non-ascii characters altogether. Any Unicode char
|
||||||
|
# above common european characters that cannot be "sanitized" (ie. stripped
|
||||||
|
# of their accents, etc.) are preserved as is. The arbitrary limit is
|
||||||
|
# obtained from this one: ord("\u037e") GREEK QUESTION MARK
|
||||||
s = "".join(
|
s = "".join(
|
||||||
c for c in s if c in string.ascii_letters + string.digits + string.whitespace
|
c for c in s
|
||||||
|
if (ord(c) <= 894
|
||||||
|
and c in string.ascii_letters + string.digits + string.whitespace
|
||||||
|
)
|
||||||
|
or ord(c) > 894
|
||||||
)
|
)
|
||||||
return [_f for _f in s.split(" ") if _f] # remove empty elements
|
return [_f for _f in s.split(" ") if _f] # remove empty elements
|
||||||
|
|
||||||
|
@ -69,6 +69,10 @@ class TestCasegetwords:
|
|||||||
eq_(["a", "b", "c", "d"], getwords("a b c d"))
|
eq_(["a", "b", "c", "d"], getwords("a b c d"))
|
||||||
eq_(["a", "b", "c", "d"], getwords(" a b c d "))
|
eq_(["a", "b", "c", "d"], getwords(" a b c d "))
|
||||||
|
|
||||||
|
def test_unicode(self):
|
||||||
|
eq_(["e", "c", "0", "a", "o", "u", "e", "u"], getwords("é ç 0 à ö û è ¤ ù"))
|
||||||
|
eq_(["02", "君のこころは輝いてるかい?", "国木田花丸", "solo", "ver"], getwords("02 君のこころは輝いてるかい? 国木田花丸 Solo Ver"))
|
||||||
|
|
||||||
def test_splitter_chars(self):
|
def test_splitter_chars(self):
|
||||||
eq_(
|
eq_(
|
||||||
[chr(i) for i in range(ord("a"), ord("z") + 1)],
|
[chr(i) for i in range(ord("a"), ord("z") + 1)],
|
||||||
@ -85,7 +89,7 @@ class TestCasegetwords:
|
|||||||
eq_(["foo", "bar"], getwords("FOO BAR"))
|
eq_(["foo", "bar"], getwords("FOO BAR"))
|
||||||
|
|
||||||
def test_decompose_unicode(self):
|
def test_decompose_unicode(self):
|
||||||
eq_(getwords("foo\xe9bar"), ["fooebar"])
|
eq_(["fooebar"], getwords("foo\xe9bar"))
|
||||||
|
|
||||||
|
|
||||||
class TestCasegetfields:
|
class TestCasegetfields:
|
||||||
|
@ -102,7 +102,7 @@ class Preferences(PreferencesBase):
|
|||||||
self.details_dialog_override_theme_icons = False if not ISLINUX else True
|
self.details_dialog_override_theme_icons = False if not ISLINUX else True
|
||||||
self.details_dialog_viewers_show_scrollbars = True
|
self.details_dialog_viewers_show_scrollbars = True
|
||||||
self.result_table_ref_foreground_color = QColor(Qt.blue)
|
self.result_table_ref_foreground_color = QColor(Qt.blue)
|
||||||
self.result_table_ref_background_color = QColor(Qt.darkGray)
|
self.result_table_ref_background_color = QColor(Qt.lightGray)
|
||||||
self.result_table_delta_foreground_color = QColor(255, 142, 40) # orange
|
self.result_table_delta_foreground_color = QColor(255, 142, 40) # orange
|
||||||
self.resultWindowIsMaximized = False
|
self.resultWindowIsMaximized = False
|
||||||
self.resultWindowRect = None
|
self.resultWindowRect = None
|
||||||
|
Loading…
x
Reference in New Issue
Block a user