Change system prompt to inject Prompt Start special token

This commit is contained in:
rockerBOO
2025-02-27 02:47:04 -05:00
parent 7b83d50dc0
commit ce2610d29b
4 changed files with 15 additions and 8 deletions

View File

@@ -1692,7 +1692,8 @@ class BaseDataset(torch.utils.data.Dataset):
text_encoder_outputs_list.append(text_encoder_outputs)
if tokenization_required:
system_prompt = subset.system_prompt or ""
system_prompt_special_token = "<Prompt Start>"
system_prompt = f"{subset.system_prompt} {system_prompt_special_token} " if subset.system_prompt else ""
caption = self.process_caption(subset, image_info.caption)
input_ids = [ids[0] for ids in self.tokenize_strategy.tokenize(system_prompt + caption)] # remove batch dimension
# if self.XTI_layers:
@@ -2091,7 +2092,8 @@ class DreamBoothDataset(BaseDataset):
else:
num_train_images += num_repeats * len(img_paths)
system_prompt = self.system_prompt or subset.system_prompt or ""
system_prompt_special_token = "<Prompt Start>"
system_prompt = f"{self.system_prompt or subset.system_prompt} {system_prompt_special_token} " if self.system_prompt or subset.system_prompt else ""
for img_path, caption, size in zip(img_paths, captions, sizes):
info = ImageInfo(img_path, num_repeats, system_prompt + caption, subset.is_reg, img_path)
if size is not None: