mirror of
https://github.com/kohya-ss/sd-scripts.git
synced 2026-04-09 06:45:09 +00:00
Add cleaning patterns
This commit is contained in:
@@ -13,17 +13,26 @@ import library.train_util as train_util
|
|||||||
|
|
||||||
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||||
|
|
||||||
PATTERN_REPLACE = [re.compile(r'with the (words?|letters?) (" ?[^"]*"|\w+)( on (the)? ?\w+)?'),
|
PATTERN_REPLACE = [
|
||||||
re.compile(r'that says (" ?[^"]*"|\w+)')]
|
re.compile(r'(has|with|and) the (words?|letters?|name) (" ?[^"]*"|\w+)( ?(is )?(on|in) (the |her |their |him )?\w+)?'),
|
||||||
|
re.compile(r'(with a sign )?that says ?(" ?[^"]*"|\w+)( ?on it)?'),
|
||||||
|
re.compile(r"(with a sign )?that says ?(' ?(i'm)?[^']*'|\w+)( ?on it)?"),
|
||||||
|
re.compile(r'with the number \d+ on (it|\w+ \w+)'),
|
||||||
|
re.compile(r'with the words "'),
|
||||||
|
re.compile(r'word \w+ on it'),
|
||||||
|
re.compile(r'that says the word \w+ on it'),
|
||||||
|
re.compile('that says\'the word "( on it)?'),
|
||||||
|
]
|
||||||
|
|
||||||
# 誤検知しまくりの with the word xxxx を消す
|
# 誤検知しまくりの with the word xxxx を消す
|
||||||
|
|
||||||
|
|
||||||
def remove_words(captions, debug):
|
def remove_words(captions, debug):
|
||||||
removed_caps = []
|
removed_caps = []
|
||||||
for caption in captions:
|
for caption in captions:
|
||||||
cap = caption
|
cap = caption
|
||||||
for pat in PATTERN_REPLACE:
|
for pat in PATTERN_REPLACE:
|
||||||
cap = pat.sub("", caption)
|
cap = pat.sub("", cap)
|
||||||
if debug and cap != caption:
|
if debug and cap != caption:
|
||||||
print(caption)
|
print(caption)
|
||||||
print(cap)
|
print(cap)
|
||||||
|
|||||||
Reference in New Issue
Block a user