From 2b9943ea91a2e25b270da43e7043354e41e8d601 Mon Sep 17 00:00:00 2001 From: illeatmyhat Date: Thu, 25 Aug 2022 12:08:26 -0700 Subject: [PATCH 1/4] support cpu --- .gitignore | 3 +++ ldm/models/diffusion/ddim.py | 7 ++++--- ldm/models/diffusion/plms.py | 7 ++++--- ldm/modules/encoders/modules.py | 19 ++++++++--------- ldm/util.py | 16 +++++++++++++++ scripts/img2img.py | 20 +++++++++--------- scripts/txt2img.py | 36 +++++++++++++++++++-------------- 7 files changed, 69 insertions(+), 39 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..81ddef162 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +__pycache__ +outputs/ +*.egg-info/ \ No newline at end of file diff --git a/ldm/models/diffusion/ddim.py b/ldm/models/diffusion/ddim.py index fb31215db..e402353c3 100644 --- a/ldm/models/diffusion/ddim.py +++ b/ldm/models/diffusion/ddim.py @@ -3,7 +3,7 @@ import torch import numpy as np from tqdm import tqdm -from functools import partial +from ldm.util import torch_device from ldm.modules.diffusionmodules.util import make_ddim_sampling_parameters, make_ddim_timesteps, noise_like, \ extract_into_tensor @@ -15,11 +15,12 @@ def __init__(self, model, schedule="linear", **kwargs): self.model = model self.ddpm_num_timesteps = model.num_timesteps self.schedule = schedule + self.device_available = torch_device.type def register_buffer(self, name, attr): if type(attr) == torch.Tensor: - if attr.device != torch.device("cuda"): - attr = attr.to(torch.device("cuda")) + if attr.device != torch.device(self.device_available): + attr = attr.to(torch.float32).to(torch.device(self.device_available)) setattr(self, name, attr) def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True): diff --git a/ldm/models/diffusion/plms.py b/ldm/models/diffusion/plms.py index 78eeb1003..0dce4e8c0 100644 --- a/ldm/models/diffusion/plms.py +++ b/ldm/models/diffusion/plms.py @@ -3,7 +3,7 @@ import torch import numpy as np from tqdm import tqdm -from functools import partial +from ldm.util import torch_device from ldm.modules.diffusionmodules.util import make_ddim_sampling_parameters, make_ddim_timesteps, noise_like @@ -14,11 +14,12 @@ def __init__(self, model, schedule="linear", **kwargs): self.model = model self.ddpm_num_timesteps = model.num_timesteps self.schedule = schedule + self.device_available = torch_device.type def register_buffer(self, name, attr): if type(attr) == torch.Tensor: - if attr.device != torch.device("cuda"): - attr = attr.to(torch.device("cuda")) + if attr.device != torch.device(self.device_available): + attr = attr.to(torch.float32).to(torch.device(self.device_available)) setattr(self, name, attr) def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True): diff --git a/ldm/modules/encoders/modules.py b/ldm/modules/encoders/modules.py index ededbe43e..4f3adb106 100644 --- a/ldm/modules/encoders/modules.py +++ b/ldm/modules/encoders/modules.py @@ -5,9 +5,9 @@ from einops import rearrange, repeat from transformers import CLIPTokenizer, CLIPTextModel import kornia - +from ldm.util import torch_device from ldm.modules.x_transformer import Encoder, TransformerWrapper # TODO: can we directly rely on lucidrains code and simply add this as a reuirement? --> test - + class AbstractEncoder(nn.Module): def __init__(self): @@ -35,7 +35,7 @@ def forward(self, batch, key=None): class TransformerEmbedder(AbstractEncoder): """Some transformer encoder layers""" - def __init__(self, n_embed, n_layer, vocab_size, max_seq_len=77, device="cuda"): + def __init__(self, n_embed, n_layer, vocab_size, max_seq_len=77, device=torch_device): super().__init__() self.device = device self.transformer = TransformerWrapper(num_tokens=vocab_size, max_seq_len=max_seq_len, @@ -52,7 +52,7 @@ def encode(self, x): class BERTTokenizer(AbstractEncoder): """ Uses a pretrained BERT tokenizer by huggingface. Vocab size: 30522 (?)""" - def __init__(self, device="cuda", vq_interface=True, max_length=77): + def __init__(self, device=torch_device, vq_interface=True, max_length=77): super().__init__() from transformers import BertTokenizerFast # TODO: add to reuquirements self.tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased") @@ -80,7 +80,7 @@ def decode(self, text): class BERTEmbedder(AbstractEncoder): """Uses the BERT tokenizr model and add some transformer encoder layers""" def __init__(self, n_embed, n_layer, vocab_size=30522, max_seq_len=77, - device="cuda",use_tokenizer=True, embedding_dropout=0.0): + device=torch_device,use_tokenizer=True, embedding_dropout=0.0): super().__init__() self.use_tknz_fn = use_tokenizer if self.use_tknz_fn: @@ -134,9 +134,10 @@ def forward(self,x): def encode(self, x): return self(x) + class FrozenCLIPEmbedder(AbstractEncoder): """Uses the CLIP transformer encoder for text (from Hugging Face)""" - def __init__(self, version="openai/clip-vit-large-patch14", device="cuda", max_length=77): + def __init__(self, version="openai/clip-vit-large-patch14", device=torch_device, max_length=77): super().__init__() self.tokenizer = CLIPTokenizer.from_pretrained(version) self.transformer = CLIPTextModel.from_pretrained(version) @@ -166,7 +167,7 @@ class FrozenCLIPTextEmbedder(nn.Module): """ Uses the CLIP transformer encoder for text. """ - def __init__(self, version='ViT-L/14', device="cuda", max_length=77, n_repeat=1, normalize=True): + def __init__(self, version='ViT-L/14', device=torch_device, max_length=77, n_repeat=1, normalize=True): super().__init__() self.model, _ = clip.load(version, jit=False, device="cpu") self.device = device @@ -202,7 +203,7 @@ def __init__( self, model, jit=False, - device='cuda' if torch.cuda.is_available() else 'cpu', + device=torch_device, antialias=False, ): super().__init__() @@ -231,4 +232,4 @@ def forward(self, x): if __name__ == "__main__": from ldm.util import count_params model = FrozenCLIPEmbedder() - count_params(model, verbose=True) \ No newline at end of file + count_params(model, verbose=True) diff --git a/ldm/util.py b/ldm/util.py index 8ba38853e..bcde49cb6 100644 --- a/ldm/util.py +++ b/ldm/util.py @@ -14,6 +14,22 @@ from PIL import Image, ImageDraw, ImageFont +# noinspection PyBroadException +def get_device(): + try: + if torch.cuda.is_available(): + return 'cuda' + elif torch.backends.mps.is_available(): + return 'mps' + else: + return 'cpu' + except Exception: + return 'cpu' + + +torch_device = torch.device(get_device()) + + def log_txt_as_img(wh, xc, size=10): # wh a tuple of (width, height) # xc a list of captions to plot diff --git a/scripts/img2img.py b/scripts/img2img.py index 421e2151d..d19aeacb8 100644 --- a/scripts/img2img.py +++ b/scripts/img2img.py @@ -15,7 +15,7 @@ import time from pytorch_lightning import seed_everything -from ldm.util import instantiate_from_config +from ldm.util import instantiate_from_config, torch_device as device from ldm.models.diffusion.ddim import DDIMSampler from ldm.models.diffusion.plms import PLMSSampler @@ -40,7 +40,7 @@ def load_model_from_config(config, ckpt, verbose=False): print("unexpected keys:") print(u) - model.cuda() + model.to(device) model.eval() return model @@ -170,13 +170,13 @@ def main(): parser.add_argument( "--config", type=str, - default="configs/stable-diffusion/v1-inference.yaml", + default="configs/stable_diffusion/v1-inference.yaml", help="path to config which constructs model", ) parser.add_argument( "--ckpt", type=str, - default="models/ldm/stable-diffusion-v1/model.ckpt", + default="models/ldm/stable_diffusion-v1/model.ckpt", help="path to checkpoint of model", ) parser.add_argument( @@ -199,7 +199,6 @@ def main(): config = OmegaConf.load(f"{opt.config}") model = load_model_from_config(config, f"{opt.ckpt}") - device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") model = model.to(device) if opt.plms: @@ -230,7 +229,7 @@ def main(): grid_count = len(os.listdir(outpath)) - 1 assert os.path.isfile(opt.init_img) - init_image = load_img(opt.init_img).to(device) + init_image = load_img(opt.init_img).to(torch_device) init_image = repeat(init_image, '1 ... -> b ...', b=batch_size) init_latent = model.get_first_stage_encoding(model.encode_first_stage(init_image)) # move to latent space @@ -240,9 +239,12 @@ def main(): t_enc = int(opt.strength * opt.ddim_steps) print(f"target t_enc is {t_enc} steps") - precision_scope = autocast if opt.precision == "autocast" else nullcontext + if torch_device.type in ['mps', 'cpu']: + precision_scope = nullcontext # have to use f32 on mps + else: + precision_scope = autocast if opt.precision == "autocast" else nullcontext with torch.no_grad(): - with precision_scope("cuda"): + with precision_scope(torch_device.type): with model.ema_scope(): tic = time.time() all_samples = list() @@ -256,7 +258,7 @@ def main(): c = model.get_learned_conditioning(prompts) # encode (scaled latent) - z_enc = sampler.stochastic_encode(init_latent, torch.tensor([t_enc]*batch_size).to(device)) + z_enc = sampler.stochastic_encode(init_latent, torch.tensor([t_enc]*batch_size).to(torch_device)) # decode it samples = sampler.decode(z_enc, c, t_enc, unconditional_guidance_scale=opt.scale, unconditional_conditioning=uc,) diff --git a/scripts/txt2img.py b/scripts/txt2img.py index 59c16a1db..b17cf8379 100644 --- a/scripts/txt2img.py +++ b/scripts/txt2img.py @@ -1,22 +1,27 @@ -import argparse, os, sys, glob import cv2 -import torch +import argparse +import os +import time +import intel_extension_for_pytorch as ipex + import numpy as np -from omegaconf import OmegaConf +import torch + +from contextlib import nullcontext from PIL import Image -from tqdm import tqdm, trange from imwatermark import WatermarkEncoder from itertools import islice from einops import rearrange -from torchvision.utils import make_grid -import time +from omegaconf import OmegaConf from pytorch_lightning import seed_everything from torch import autocast -from contextlib import contextmanager, nullcontext +from torchvision.utils import make_grid +from tqdm import tqdm, trange -from ldm.util import instantiate_from_config from ldm.models.diffusion.ddim import DDIMSampler from ldm.models.diffusion.plms import PLMSSampler +from ldm.util import instantiate_from_config, torch_device as device + from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from transformers import AutoFeatureExtractor @@ -60,7 +65,7 @@ def load_model_from_config(config, ckpt, verbose=False): print("unexpected keys:") print(u) - model.cuda() + model.to(device) model.eval() return model @@ -238,9 +243,7 @@ def main(): config = OmegaConf.load(f"{opt.config}") model = load_model_from_config(config, f"{opt.ckpt}") - - device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") - model = model.to(device) + model = ipex.optimize(model) if opt.plms: sampler = PLMSSampler(model) @@ -277,13 +280,16 @@ def main(): if opt.fixed_code: start_code = torch.randn([opt.n_samples, opt.C, opt.H // opt.f, opt.W // opt.f], device=device) - precision_scope = autocast if opt.precision=="autocast" else nullcontext + if device.type in ['mps', 'cpu']: + precision_scope = nullcontext # have to use f32 on mps + else: + precision_scope = autocast if opt.precision == "autocast" else nullcontext with torch.no_grad(): - with precision_scope("cuda"): + with precision_scope(device.type): with model.ema_scope(): tic = time.time() all_samples = list() - for n in trange(opt.n_iter, desc="Sampling"): + for _ in trange(opt.n_iter, desc="Sampling"): for prompts in tqdm(data, desc="data"): uc = None if opt.scale != 1.0: From a5ffb20fec974a0704bfadc8b300129169e71fed Mon Sep 17 00:00:00 2001 From: illeatmyhat Date: Thu, 25 Aug 2022 19:31:27 -0700 Subject: [PATCH 2/4] better packaging --- .gitignore | 7 ++++++- ldm/__init__.py | 0 ldm/models/__init__.py | 0 ldm/modules/__init__.py | 0 pyproject.toml | 39 +++++++++++++++++++++++++++++++++++++++ setup.py | 17 +++++------------ 6 files changed, 50 insertions(+), 13 deletions(-) create mode 100644 ldm/__init__.py create mode 100644 ldm/models/__init__.py create mode 100644 ldm/modules/__init__.py create mode 100644 pyproject.toml diff --git a/.gitignore b/.gitignore index 81ddef162..fdc7554f1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,8 @@ __pycache__ +build/ +inputs/ outputs/ -*.egg-info/ \ No newline at end of file +*.egg-info/ +dist/ +*.zip +*.ckpt \ No newline at end of file diff --git a/ldm/__init__.py b/ldm/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/ldm/models/__init__.py b/ldm/models/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/ldm/modules/__init__.py b/ldm/modules/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..0f5f7b914 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,39 @@ +[build-system] +requires = ["setuptools", "wheel", "setuptools-scm[toml]"] +build-backend = "setuptools.build_meta" + +[project] +name = "latent-diffusion" +version = "0.0.1" +description = "" +requires-python = ">=3.9" +#license = {file = "LICENSE.txt"} +keywords = ["stable-diffusion"] +dependencies = [ + "einops", # tensor utils used in LDM + "gradio", # GUI + "numpy", # core + "omegaconf", # used to load model config YAMLs + "pytorch-lightning", # core + "torch", # core + "transformers", # downloads CLIP from huggingface + "tqdm", # progress bar + "taming-transformers @ git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers", #core + "clip @ git+https://github.com/openai/CLIP.git@main#egg=clip", # core +] + +[project.urls] +homepage = "https://github.com/CompVis/stable-diffusion" +repository = "https://github.com/CompVis/stable-diffusion" + +[tool.setuptools] +include-package-data = true + +[tool.setuptools.package-data] +"*" = ["*", "*/*", "*/*/*", "*/*/*/*"] # apparently setuptools doesn't support **/* + +[tool.setuptools.exclude-package-data] +"*" = ["__pycache__", "**/*.ckpt", "**/*.zip"] + +[tool.setuptools.packages.find] +include = ["ldm", "assets", "configs", "data", "models"] \ No newline at end of file diff --git a/setup.py b/setup.py index a24d54167..5f7fe5682 100644 --- a/setup.py +++ b/setup.py @@ -1,13 +1,6 @@ -from setuptools import setup, find_packages +#!/usr/bin/env python -setup( - name='latent-diffusion', - version='0.0.1', - description='', - packages=find_packages(), - install_requires=[ - 'torch', - 'numpy', - 'tqdm', - ], -) \ No newline at end of file +import setuptools + +if __name__ == '__main__': + setuptools.setup() From 7923a99ff68c9d64873c289ea183690e7d3377b7 Mon Sep 17 00:00:00 2001 From: illeatmyhat Date: Thu, 25 Aug 2022 19:48:52 -0700 Subject: [PATCH 3/4] fix param --- scripts/img2img.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/img2img.py b/scripts/img2img.py index d19aeacb8..83e30d2cd 100644 --- a/scripts/img2img.py +++ b/scripts/img2img.py @@ -170,13 +170,13 @@ def main(): parser.add_argument( "--config", type=str, - default="configs/stable_diffusion/v1-inference.yaml", + default="configs/stable-diffusion/v1-inference.yaml", help="path to config which constructs model", ) parser.add_argument( "--ckpt", type=str, - default="models/ldm/stable_diffusion-v1/model.ckpt", + default="models/ldm/stable-diffusion-v1/model.ckpt", help="path to checkpoint of model", ) parser.add_argument( From e5d11e6b311f4a6f8f6d6de7cdb5d828555421dd Mon Sep 17 00:00:00 2001 From: illeatmyhat Date: Thu, 25 Aug 2022 20:56:30 -0700 Subject: [PATCH 4/4] remove python guard --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 0f5f7b914..7633faf55 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,6 @@ build-backend = "setuptools.build_meta" name = "latent-diffusion" version = "0.0.1" description = "" -requires-python = ">=3.9" #license = {file = "LICENSE.txt"} keywords = ["stable-diffusion"] dependencies = [