Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
---
job: "extension"
config:
name: "my_first_qwen_image_edit_plus2509_lora_v3"
process:
- type: "sd_trainer"
training_folder: "/home/ubuntu/ai-toolkit/output"
device: "cuda:0"
network:
type: "lora"
linear: 4
linear_alpha: 4
save:
dtype: "float16"
save_every: 500
max_step_saves_to_keep: 12
datasets:
- folder_path: "/home/ubuntu/ai-toolkit/datasets/end"
control_path: "/home/ubuntu/ai-toolkit/datasets/start"
control_path2: "/home/ubuntu/ai-toolkit/datasets/reference"
full_size_control_images: true
caption_ext: "txt"
caption_dropout_rate: 0.05
buckets: true
cache_latents_to_disk: true
keep_native_size: true
random_scale: false
random_crop: false
square_crop: false
scale: 0.1
train:
batch_size: 1
unload_text_encoder: false
cache_text_embeddings: true
steps: 3000
gradient_accumulation: 1
timestep_type: "weighted"
train_unet: true
train_text_encoder: false
gradient_checkpointing: false
noise_scheduler: "flowmatch"
optimizer: "adamw8bit"
lr: 0.0001
dtype: "bf16"
model:
name_or_path: "Qwen/Qwen-Image-Edit-2509"
arch: "qwen_image_edit_plus2509"
quantize: true
qtype: "qfloat8"
quantize_te: true
qtype_te: "qfloat8"
low_vram: true
sample:
sampler: "flowmatch"
sample_every: 500
width: 1024
height: 1024
samples:
- prompt: "Swap the furniture inside the red bounding box of the Image 1 by the reference furniture of the Image 2"
ctrl_img: "/home/ubuntu/ai-toolkit/datasets/start_full/94a31bd5-b0ce-42bc-a12e-a76a676c1314_2_to_1.png"
ctrl_img2: "/home/ubuntu/ai-toolkit/datasets/reference_full/94a31bd5-b0ce-42bc-a12e-a76a676c1314_2_to_1.png"
width: 1024
height: 535
# - prompt: "Swap the furniture inside the red bounding box of the Image 1 by the reference furniture of the Image 2"
# width: 796
# height: 1024
# ctrl_img: "/home/ubuntu/ai-toolkit/datasets/start_full/95dabfac-acf1-43cb-a85a-aa795bc52739_1_to_2.png"
# ctrl_img2: "/home/ubuntu/ai-toolkit/datasets/reference_full/95dabfac-acf1-43cb-a85a-aa795bc52739_1_to_2.png"
# - prompt: "Swap the furniture inside the red bounding box of the Image 1 by the reference furniture of the Image 2"
# width: 961
# height: 1024
# ctrl_img: "/home/ubuntu/ai-toolkit/datasets/start_full/a3912256-a57f-4254-89df-fd06cdfe1c3c_2_to_1.png"
# ctrl_img2: "/home/ubuntu/ai-toolkit/datasets/reference_full/a3912256-a57f-4254-89df-fd06cdfe1c3c_2_to_1.png"
# - prompt: "Swap the furniture inside the red bounding box of the Image 1 by the reference furniture of the Image 2"
# width: 1024
# height: 1024
# ctrl_img: "/home/ubuntu/ai-toolkit/datasets/start_full/a71024fc-89bc-4f57-a1da-50a4b0c4ef3b_2_to_1.png"
# ctrl_img2: "/home/ubuntu/ai-toolkit/datasets/reference_full/a71024fc-89bc-4f57-a1da-50a4b0c4ef3b_2_to_1.png"
# - prompt: "Swap the furniture inside the red bounding box of the Image 1 by the reference furniture of the Image 2"
# width: 1024
# height: 489
# ctrl_img: "/home/ubuntu/ai-toolkit/datasets/start_full/a8ddc0e8-be33-4717-9bec-1a99b35593c3_1_to_2.png"
# ctrl_img2: "/home/ubuntu/ai-toolkit/datasets/reference_full/a8ddc0e8-be33-4717-9bec-1a99b35593c3_1_to_2.png"
neg: ""
seed: 42
walk_seed: false
guidance_scale: 3
sample_steps: 25
sqlite_db_path: "./aitk_db.db"
performance_log_every: 10
meta:
name: "[name]"
version: "1.0"
2 changes: 1 addition & 1 deletion config/examples/train_lora_omnigen2_24gb.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ config:
# will probably need this if gpu supports it for omnigen2, other dtypes may not work correctly
dtype: bf16
model:
name_or_path: "OmniGen2/OmniGen2
name_or_path: "OmniGen2/OmniGen2"
arch: "omnigen2"
quantize_te: true # quantize_only te
# quantize: true # quantize transformer
Expand Down
3 changes: 1 addition & 2 deletions config/examples/train_lora_qwen_image_edit_32gb.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,11 @@ config:
name_or_path: "Qwen/Qwen-Image-Edit"
arch: "qwen_image_edit"
quantize: true
# qtype_te: "qfloat8" Default float8 qquantization
# qtype_te: "qfloat8" # Default float8 qquantization
# to use the ARA use the | pipe to point to hf path, or a local path if you have one.
# 3bit is required for 32GB
qtype: "uint3|qwen_image_edit_torchao_uint3.safetensors"
quantize_te: true
qtype_te: "qfloat8"
low_vram: true
sample:
sampler: "flowmatch" # must match train.noise_scheduler
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
---
job: extension
config:
# this name will be the folder and filename name
name: "my_first_qwen_image_edit_plus_20lora_v1"
process:
- type: 'sd_trainer'
# root folder to save training sessions/samples/weights
training_folder: "output"
# uncomment to see performance stats in the terminal every N steps
# performance_log_every: 1000
device: cuda:0
# if a trigger word is specified, it will be added to captions of training data if it does not already exist
# alternatively, in your captions you can add [trigger] and it will be replaced with the trigger word
# Trigger words will not work when caching text embeddings
# trigger_word: "p3r5on"
network:
type: "lora"
linear: 16
linear_alpha: 16
save:
dtype: float16 # precision to save
save_every: 250 # save every this many steps
max_step_saves_to_keep: 4 # how many intermittent saves to keep
datasets:
# datasets are a folder of images. captions need to be txt files with the same name as the image
# for instance image2.jpg and image2.txt. Only jpg, jpeg, and png are supported currently
# images will automatically be resized and bucketed into the resolution specified
# on windows, escape back slashes with another backslash so
# "C:\\path\\to\\images\\folder"
- folder_path: "/path/to/images/folder"
control_path: "/path/to/control/images/folder"
control_path2: "/path/to/control/images/folder2"
# control_path3: "/path/to/control/images/folder3"
caption_ext: "txt"
# default_caption: "a person" # if caching text embeddings, if you don't have captions, this will get cached
caption_dropout_rate: 0.05 # will drop out the caption 5% of time
resolution: [ 512, 768, 1024 ] # qwen image enjoys multiple resolutions
train:
batch_size: 1
# caching text embeddings is required for 32GB
cache_text_embeddings: true

steps: 3000 # total number of steps to train 500 - 4000 is a good range
gradient_accumulation: 1
timestep_type: "weighted"
train_unet: true
train_text_encoder: false # probably won't work with qwen image
gradient_checkpointing: true # need the on unless you have a ton of vram
noise_scheduler: "flowmatch" # for training only
optimizer: "adamw8bit"
lr: 1e-4
# uncomment this to skip the pre training sample
# skip_first_sample: true
# uncomment to completely disable sampling
# disable_sampling: true
dtype: bf16
model:
# huggingface model name or path
name_or_path: "Qwen/Qwen-Image-Edit-2509"
arch: "qwen_image_edit_plus2509"
quantize: true
# qtype_te: "qfloat8" # Default float8 qquantization
# to use the ARA use the | pipe to point to hf path, or a local path if you have one.
# 3bit is required for 32GB
#! TODO: Quantize and add this to hf
# qtype: "uint3|qwen_image_edit_torchao_uint3.safetensors"
qtype: "qfloat8"
quantize_te: true
low_vram: true
sample:
sampler: "flowmatch" # must match train.noise_scheduler
sample_every: 250 # sample every this many steps
width: 1024
height: 1024
samples:
- prompt: "do the thing to it"
ctrl_img: "/path/to/control/image.jpg"
ctrl_img2: "/path/to/control2/image.jpg"
# ctrl_img3: "/path/to/control3/image.jpg"
- prompt: "do the thing to it"
ctrl_img: "/path/to/control/image.jpg"
ctrl_img2: "/path/to/control2/image.jpg"
- prompt: "do the thing to it"
ctrl_img: "/path/to/control/image.jpg"
ctrl_img2: "/path/to/control2/image.jpg"
- prompt: "do the thing to it"
ctrl_img: "/path/to/control/image.jpg"
ctrl_img2: "/path/to/control2/image.jpg"
- prompt: "do the thing to it"
ctrl_img: "/path/to/control/image.jpg"
ctrl_img2: "/path/to/control2/image.jpg"
- prompt: "do the thing to it"
ctrl_img: "/path/to/control/image.jpg"
- prompt: "do the thing to it"
ctrl_img: "/path/to/control/image.jpg"
ctrl_img2: "/path/to/control2/image.jpg"
- prompt: "do the thing to it"
ctrl_img: "/path/to/control/image.jpg"
- prompt: "do the thing to it"
ctrl_img: "/path/to/control/image.jpg"
ctrl_img2: "/path/to/control2/image.jpg"
- prompt: "do the thing to it"
ctrl_img: "/path/to/control/image.jpg"
ctrl_img2: "/path/to/control2/image.jpg"
- prompt: "do the thing to it"
ctrl_img: "/path/to/control/image.jpg"
ctrl_img2: "/path/to/control2/image.jpg"
neg: ""
seed: 42
walk_seed: true
guidance_scale: 3
sample_steps: 25
# you can add any additional meta info here. [name] is replaced with config name at top
meta:
name: "[name]"
version: '1.0'
3 changes: 2 additions & 1 deletion extensions_built_in/diffusion_models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from .omnigen2 import OmniGen2Model
from .flux_kontext import FluxKontextModel
from .wan22 import Wan225bModel, Wan2214bModel, Wan2214bI2VModel
from .qwen_image import QwenImageModel, QwenImageEditModel
from .qwen_image import QwenImageModel, QwenImageEditModel, QwenImageEditPlus2509Model

AI_TOOLKIT_MODELS = [
# put a list of models here
Expand All @@ -20,4 +20,5 @@
Wan2214bModel,
QwenImageModel,
QwenImageEditModel,
QwenImageEditPlus2509Model,
]
3 changes: 2 additions & 1 deletion extensions_built_in/diffusion_models/qwen_image/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from .qwen_image import QwenImageModel
from .qwen_image_edit import QwenImageEditModel
from .qwen_image_edit import QwenImageEditModel
from .qwen_image_edit_plus2509 import QwenImageEditPlus2509Model
Loading