From a791e54d3086e953a96c5370127b92873b4b4db2 Mon Sep 17 00:00:00 2001
From: Charles Saluski <Hugehead123@gmail.com>
Date: Mon, 22 Aug 2022 22:50:41 -0700
Subject: [PATCH 01/17] Remove safety

---
 .../stable_diffusion/pipeline_stable_diffusion.py          | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
index 550513b5c943..5d2647cbefa3 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
@@ -155,9 +155,10 @@ def __call__(
         image = image.cpu().permute(0, 2, 3, 1).numpy()
 
         # run safety checker
-        safety_cheker_input = self.feature_extractor(self.numpy_to_pil(image), return_tensors="pt").to(self.device)
-        image, has_nsfw_concept = self.safety_checker(images=image, clip_input=safety_cheker_input.pixel_values)
-
+        #safety_cheker_input = self.feature_extractor(self.numpy_to_pil(image), return_tensors="pt").to(self.device)
+        #image, has_nsfw_concept = self.safety_checker(images=image, clip_input=safety_cheker_input.pixel_values)
+        has_nsft_concept = False
+        
         if output_type == "pil":
             image = self.numpy_to_pil(image)
 

From 4ec28732d9284dafba3350760547dc03d0e76449 Mon Sep 17 00:00:00 2001
From: Charles Saluski <Hugehead123@gmail.com>
Date: Mon, 22 Aug 2022 23:10:29 -0700
Subject: [PATCH 02/17] Update pipeline_stable_diffusion.py

---
 .../pipelines/stable_diffusion/pipeline_stable_diffusion.py     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
index 5d2647cbefa3..0e9bb6c23b33 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
@@ -157,7 +157,7 @@ def __call__(
         # run safety checker
         #safety_cheker_input = self.feature_extractor(self.numpy_to_pil(image), return_tensors="pt").to(self.device)
         #image, has_nsfw_concept = self.safety_checker(images=image, clip_input=safety_cheker_input.pixel_values)
-        has_nsft_concept = False
+        has_nsfw_concept = False
         
         if output_type == "pil":
             image = self.numpy_to_pil(image)

From 556fa26ec2abc8004339044adc668e71ef75a75e Mon Sep 17 00:00:00 2001
From: Charles Saluski <cs3293@nau.edu>
Date: Tue, 23 Aug 2022 12:45:06 -0700
Subject: [PATCH 03/17] Add linear interpolation between two prompts

---
 .../pipeline_stable_diffusion.py              | 124 ++++++++++++++++++
 1 file changed, 124 insertions(+)

diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
index 0e9bb6c23b33..60e3c0e1c1a1 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
@@ -1,5 +1,6 @@
 import inspect
 import warnings
+import random
 from typing import List, Optional, Union
 
 import torch
@@ -163,3 +164,126 @@ def __call__(
             image = self.numpy_to_pil(image)
 
         return {"sample": image, "nsfw_content_detected": has_nsfw_concept}
+
+    def get_text_latent_space(self, prompt):
+
+        # get prompt text embeddings
+        text_input = self.tokenizer(
+            prompt,
+            padding="max_length",
+            max_length=self.tokenizer.model_max_length,
+            truncation=True,
+            return_tensors="pt",
+        )
+        text_embeddings = self.text_encoder(text_input.input_ids.to(self.device))[0]
+        return text_embeddings
+    
+    def lerp_between_prompts(self, first_prompt, second_prompt, seed = None, length = 10, save=False, **kwargs):
+        first_embedding = self.get_text_latent_space(first_prompt)
+        second_embedding = self.get_text_latent_space(second_prompt)
+        if not seed:
+            seed = random.randint()
+        generator = torch.Generator("cuda")
+        lerp_embed_points = []
+        for i in range(length):
+            weight = i / length
+            tensor_lerp = torch.lerp(first_embedding, second_embedding, weight)
+            lerp_embed_points.extend(tensor_lerp)
+        images = []
+        for idx, latent_point in enumerate(lerp_embed_points):
+            generator.manual_seed(seed)
+            image = self.image_from_latent_space(latent_point, **kwargs)
+            images.extend(image)
+            if save:
+                image.save(f"{first_prompt}-{second_prompt}-{idx:02d}")
+        return images
+
+
+    def image_from_latent_space(self, text_embeddings, 
+        height: Optional[int] = 512,
+        width: Optional[int] = 512,
+        num_inference_steps: Optional[int] = 50,
+        guidance_scale: Optional[float] = 7.5,
+        eta: Optional[float] = 0.0,
+        generator: Optional[torch.Generator] = None,
+        output_type: Optional[str] = "pil",
+        **kwargs,):
+
+        # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
+        # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
+        # corresponds to doing no classifier free guidance.
+        do_classifier_free_guidance = guidance_scale > 1.0
+        # get unconditional embeddings for classifier free guidance
+        if do_classifier_free_guidance:
+            max_length = text_input.input_ids.shape[-1]
+            uncond_input = self.tokenizer(
+                [""] * batch_size, padding="max_length", max_length=max_length, return_tensors="pt"
+            )
+            uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(self.device))[0]
+
+            # For classifier free guidance, we need to do two forward passes.
+            # Here we concatenate the unconditional and text embeddings into a single batch
+            # to avoid doing two forward passes
+            text_embeddings = torch.cat([uncond_embeddings, text_embeddings])
+
+        # get the intial random noise
+        latents = torch.randn(
+            (batch_size, self.unet.in_channels, height // 8, width // 8),
+            generator=generator,
+            device=self.device,
+        )
+
+        # set timesteps
+        accepts_offset = "offset" in set(inspect.signature(self.scheduler.set_timesteps).parameters.keys())
+        extra_set_kwargs = {}
+        if accepts_offset:
+            extra_set_kwargs["offset"] = 1
+
+        self.scheduler.set_timesteps(num_inference_steps, **extra_set_kwargs)
+
+        # if we use LMSDiscreteScheduler, let's make sure latents are mulitplied by sigmas
+        if isinstance(self.scheduler, LMSDiscreteScheduler):
+            latents = latents * self.scheduler.sigmas[0]
+
+        # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
+        # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
+        # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
+        # and should be between [0, 1]
+        accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
+        extra_step_kwargs = {}
+        if accepts_eta:
+            extra_step_kwargs["eta"] = eta
+
+        for i, t in tqdm(enumerate(self.scheduler.timesteps)):
+            # expand the latents if we are doing classifier free guidance
+            latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
+            if isinstance(self.scheduler, LMSDiscreteScheduler):
+                sigma = self.scheduler.sigmas[i]
+                latent_model_input = latent_model_input / ((sigma**2 + 1) ** 0.5)
+
+            # predict the noise residual
+            noise_pred = self.unet(latent_model_input, t, encoder_hidden_states=text_embeddings)["sample"]
+
+            # perform guidance
+            if do_classifier_free_guidance:
+                noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
+                noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
+
+            # compute the previous noisy sample x_t -> x_t-1
+            if isinstance(self.scheduler, LMSDiscreteScheduler):
+                latents = self.scheduler.step(noise_pred, i, latents, **extra_step_kwargs)["prev_sample"]
+            else:
+                latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs)["prev_sample"]
+
+        # scale and decode the image latents with vae
+        latents = 1 / 0.18215 * latents
+        image = self.vae.decode(latents)
+
+        image = (image / 2 + 0.5).clamp(0, 1)
+        image = image.cpu().permute(0, 2, 3, 1).numpy()
+        
+        if output_type == "pil":
+            image = self.numpy_to_pil(image)
+
+        return image
+

From 97cbc2b1d3dd2716e71719fee94e7fcb31e0565f Mon Sep 17 00:00:00 2001
From: Charles Saluski <cs3293@nau.edu>
Date: Tue, 23 Aug 2022 13:43:00 -0700
Subject: [PATCH 04/17] Fix a couple issues with variables that don't exist

---
 .../pipeline_stable_diffusion.py              | 37 +++++++++++--------
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
index 60e3c0e1c1a1..a88736dbfb5e 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
@@ -165,7 +165,7 @@ def __call__(
 
         return {"sample": image, "nsfw_content_detected": has_nsfw_concept}
 
-    def get_text_latent_space(self, prompt):
+    def get_text_latent_space(self, prompt, guidance_scale):
 
         # get prompt text embeddings
         text_input = self.tokenizer(
@@ -176,9 +176,27 @@ def get_text_latent_space(self, prompt):
             return_tensors="pt",
         )
         text_embeddings = self.text_encoder(text_input.input_ids.to(self.device))[0]
+        
+        # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
+        # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
+        # corresponds to doing no classifier free guidance.
+        do_classifier_free_guidance = guidance_scale > 1.0
+        # get unconditional embeddings for classifier free guidance
+        if do_classifier_free_guidance:
+            max_length = text_input.input_ids.shape[-1]
+            uncond_input = self.tokenizer(
+                [""], padding="max_length", max_length=max_length, return_tensors="pt"
+            )
+            uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(self.device))[0]
+
+            # For classifier free guidance, we need to do two forward passes.
+            # Here we concatenate the unconditional and text embeddings into a single batch
+            # to avoid doing two forward passes
+            text_embeddings = torch.cat([uncond_embeddings, text_embeddings])
+
         return text_embeddings
     
-    def lerp_between_prompts(self, first_prompt, second_prompt, seed = None, length = 10, save=False, **kwargs):
+    def lerp_between_prompts(self, first_prompt, second_prompt, seed = None, length = 10, save=False, guidance_scale: Optional[float] = 7.5, **kwargs):
         first_embedding = self.get_text_latent_space(first_prompt)
         second_embedding = self.get_text_latent_space(second_prompt)
         if not seed:
@@ -209,23 +227,12 @@ def image_from_latent_space(self, text_embeddings,
         output_type: Optional[str] = "pil",
         **kwargs,):
 
+        batch_size = 1
+        
         # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
         # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
         # corresponds to doing no classifier free guidance.
         do_classifier_free_guidance = guidance_scale > 1.0
-        # get unconditional embeddings for classifier free guidance
-        if do_classifier_free_guidance:
-            max_length = text_input.input_ids.shape[-1]
-            uncond_input = self.tokenizer(
-                [""] * batch_size, padding="max_length", max_length=max_length, return_tensors="pt"
-            )
-            uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(self.device))[0]
-
-            # For classifier free guidance, we need to do two forward passes.
-            # Here we concatenate the unconditional and text embeddings into a single batch
-            # to avoid doing two forward passes
-            text_embeddings = torch.cat([uncond_embeddings, text_embeddings])
-
         # get the intial random noise
         latents = torch.randn(
             (batch_size, self.unet.in_channels, height // 8, width // 8),

From ac2f99befe3cfe216eef9d58bc0f172b781bda48 Mon Sep 17 00:00:00 2001
From: Charles Saluski <cs3293@nau.edu>
Date: Tue, 23 Aug 2022 15:15:30 -0700
Subject: [PATCH 05/17] Disable gradient of image_from_latent_space, should
 drastically decrease memory consumption

---
 .../stable_diffusion/pipeline_stable_diffusion.py          | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
index a88736dbfb5e..0f6a304b5656 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
@@ -1,6 +1,7 @@
 import inspect
 import warnings
 import random
+import sys
 from typing import List, Optional, Union
 
 import torch
@@ -165,7 +166,7 @@ def __call__(
 
         return {"sample": image, "nsfw_content_detected": has_nsfw_concept}
 
-    def get_text_latent_space(self, prompt, guidance_scale):
+    def get_text_latent_space(self, prompt, guidance_scale = 7.5):
 
         # get prompt text embeddings
         text_input = self.tokenizer(
@@ -200,7 +201,7 @@ def lerp_between_prompts(self, first_prompt, second_prompt, seed = None, length
         first_embedding = self.get_text_latent_space(first_prompt)
         second_embedding = self.get_text_latent_space(second_prompt)
         if not seed:
-            seed = random.randint()
+            seed = random.randint(0, sys.maxsize)
         generator = torch.Generator("cuda")
         lerp_embed_points = []
         for i in range(length):
@@ -216,7 +217,7 @@ def lerp_between_prompts(self, first_prompt, second_prompt, seed = None, length
                 image.save(f"{first_prompt}-{second_prompt}-{idx:02d}")
         return images
 
-
+    @torch.no_grad()
     def image_from_latent_space(self, text_embeddings, 
         height: Optional[int] = 512,
         width: Optional[int] = 512,

From ae352e115dfa20d1b35394330f8c2389dae4eb04 Mon Sep 17 00:00:00 2001
From: Charles Saluski <cs3293@nau.edu>
Date: Tue, 23 Aug 2022 22:29:37 -0700
Subject: [PATCH 06/17] Add variation function and make all functions return
 the state of the generator in order to help facilitate this

---
 .../pipeline_stable_diffusion.py              | 23 +++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
index 0f6a304b5656..5e6fc5eefd2f 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
@@ -203,6 +203,8 @@ def lerp_between_prompts(self, first_prompt, second_prompt, seed = None, length
         if not seed:
             seed = random.randint(0, sys.maxsize)
         generator = torch.Generator("cuda")
+        generator.manual_seed(seed)
+        generator_state = generator.get_state()
         lerp_embed_points = []
         for i in range(length):
             weight = i / length
@@ -210,12 +212,12 @@ def lerp_between_prompts(self, first_prompt, second_prompt, seed = None, length
             lerp_embed_points.extend(tensor_lerp)
         images = []
         for idx, latent_point in enumerate(lerp_embed_points):
-            generator.manual_seed(seed)
-            image = self.image_from_latent_space(latent_point, **kwargs)
+            generator.set_state(generator_state)
+            image = self.image_from_latent_space(latent_point, **kwargs)["image"][0]
             images.extend(image)
             if save:
                 image.save(f"{first_prompt}-{second_prompt}-{idx:02d}")
-        return images
+        return {"images": images, "generator_state": generator_state}
 
     @torch.no_grad()
     def image_from_latent_space(self, text_embeddings, 
@@ -230,6 +232,9 @@ def image_from_latent_space(self, text_embeddings,
 
         batch_size = 1
         
+        if generator == None:
+            generator = torch.Generator("cuda")
+        generator_state = generator.get_state()
         # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
         # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
         # corresponds to doing no classifier free guidance.
@@ -293,5 +298,15 @@ def image_from_latent_space(self, text_embeddings,
         if output_type == "pil":
             image = self.numpy_to_pil(image)
 
-        return image
+        return {"image": image, "generator_state": generator_state}
 
+    def variation(self, text_embeddings, generator_state, variation_magnitude = 100, **kwargs):
+        # random vector to move in latent space
+        rand_t = (torch.rand(text_embeddings.shape) * 2) - 1
+        rand_mag = torch.sum(torch.abs(rand_t)) / variation_magnitude
+        scaled_rand_t = rand_t / rand_mag
+        variation_embedding = text_embeddings + scaled_rand_t
+        
+        generator = torch.Generator("cuda")
+        generator.set_state(generator_state)
+        return self.image_from_latent_space(variation_embedding, generator=generator, **kwargs)

From 7e3fe6818498284952c6c208ea2b6883ca7cf031 Mon Sep 17 00:00:00 2001
From: Charles Saluski <cs3293@nau.edu>
Date: Tue, 23 Aug 2022 22:35:32 -0700
Subject: [PATCH 07/17] Account for device in variation generator

---
 .../pipelines/stable_diffusion/pipeline_stable_diffusion.py     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
index 5e6fc5eefd2f..7c632c3a246f 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
@@ -302,7 +302,7 @@ def image_from_latent_space(self, text_embeddings,
 
     def variation(self, text_embeddings, generator_state, variation_magnitude = 100, **kwargs):
         # random vector to move in latent space
-        rand_t = (torch.rand(text_embeddings.shape) * 2) - 1
+        rand_t = (torch.rand(text_embeddings.shape, device = self.device) * 2) - 1
         rand_mag = torch.sum(torch.abs(rand_t)) / variation_magnitude
         scaled_rand_t = rand_t / rand_mag
         variation_embedding = text_embeddings + scaled_rand_t

From cf80ce75ee50f0eb7e979c9f6e1954be63e0ed74 Mon Sep 17 00:00:00 2001
From: Charles Saluski <cs3293@nau.edu>
Date: Wed, 24 Aug 2022 13:29:28 -0700
Subject: [PATCH 08/17] Fix lerp_between_prompts and add more items to the
 return dictionaries

---
 .../stable_diffusion/pipeline_stable_diffusion.py      | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
index 7c632c3a246f..b17f088648da 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
@@ -209,15 +209,15 @@ def lerp_between_prompts(self, first_prompt, second_prompt, seed = None, length
         for i in range(length):
             weight = i / length
             tensor_lerp = torch.lerp(first_embedding, second_embedding, weight)
-            lerp_embed_points.extend(tensor_lerp)
+            lerp_embed_points.append(tensor_lerp)
         images = []
         for idx, latent_point in enumerate(lerp_embed_points):
             generator.set_state(generator_state)
             image = self.image_from_latent_space(latent_point, **kwargs)["image"][0]
-            images.extend(image)
+            images.append(image)
             if save:
                 image.save(f"{first_prompt}-{second_prompt}-{idx:02d}")
-        return {"images": images, "generator_state": generator_state}
+        return {"images": images, "latent_points": lerp_embed_points,"generator_state": generator_state}
 
     @torch.no_grad()
     def image_from_latent_space(self, text_embeddings, 
@@ -309,4 +309,6 @@ def variation(self, text_embeddings, generator_state, variation_magnitude = 100,
         
         generator = torch.Generator("cuda")
         generator.set_state(generator_state)
-        return self.image_from_latent_space(variation_embedding, generator=generator, **kwargs)
+        result = self.image_from_latent_space(variation_embedding, generator=generator, **kwargs)
+        result.update({"latent_point": variation_embedding})
+        return result

From 52138b29a6cbc9ba444bdc0829aadc050345ad81 Mon Sep 17 00:00:00 2001
From: Charles Saluski <cs3293@nau.edu>
Date: Thu, 25 Aug 2022 15:50:16 -0700
Subject: [PATCH 09/17] Add slerp_through_seeds, which navigates between 2
 points in the noise space with the prompt being held constant

---
 .../pipeline_stable_diffusion.py              | 79 +++++++++++++++++--
 1 file changed, 72 insertions(+), 7 deletions(-)

diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
index b17f088648da..33a3ed6ad62c 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
@@ -5,6 +5,7 @@
 from typing import List, Optional, Union
 
 import torch
+import numpy as np
 
 from tqdm.auto import tqdm
 from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
@@ -197,12 +198,40 @@ def get_text_latent_space(self, prompt, guidance_scale = 7.5):
 
         return text_embeddings
     
+    def slerp(t, v0, v1, DOT_THRESHOLD=0.9995):
+        """ helper function to spherically interpolate two arrays v1 v2 
+        from https://gist.github.com/karpathy/00103b0037c5aaea32fe1da1af553355 
+        this should be better than lerping for moving between noise spaces """
+
+        if not isinstance(v0, np.ndarray):
+            inputs_are_torch = True
+            input_device = v0.device
+            v0 = v0.cpu().numpy()
+            v1 = v1.cpu().numpy()
+
+        dot = np.sum(v0 * v1 / (np.linalg.norm(v0) * np.linalg.norm(v1)))
+        if np.abs(dot) > DOT_THRESHOLD:
+            v2 = (1 - t) * v0 + t * v1
+        else:
+            theta_0 = np.arccos(dot)
+            sin_theta_0 = np.sin(theta_0)
+            theta_t = theta_0 * t
+            sin_theta_t = np.sin(theta_t)
+            s0 = np.sin(theta_0 - theta_t) / sin_theta_0
+            s1 = sin_theta_t / sin_theta_0
+            v2 = s0 * v0 + s1 * v1
+
+        if inputs_are_torch:
+            v2 = torch.from_numpy(v2).to(input_device)
+
+        return v2
+
     def lerp_between_prompts(self, first_prompt, second_prompt, seed = None, length = 10, save=False, guidance_scale: Optional[float] = 7.5, **kwargs):
         first_embedding = self.get_text_latent_space(first_prompt)
         second_embedding = self.get_text_latent_space(second_prompt)
         if not seed:
             seed = random.randint(0, sys.maxsize)
-        generator = torch.Generator("cuda")
+        generator = torch.Generator(self.device)
         generator.manual_seed(seed)
         generator_state = generator.get_state()
         lerp_embed_points = []
@@ -213,14 +242,51 @@ def lerp_between_prompts(self, first_prompt, second_prompt, seed = None, length
         images = []
         for idx, latent_point in enumerate(lerp_embed_points):
             generator.set_state(generator_state)
-            image = self.image_from_latent_space(latent_point, **kwargs)["image"][0]
+            image = self.diffuse_from_inits(latent_point, **kwargs)["image"][0]
             images.append(image)
             if save:
                 image.save(f"{first_prompt}-{second_prompt}-{idx:02d}")
         return {"images": images, "latent_points": lerp_embed_points,"generator_state": generator_state}
 
+    def slerp_through_seeds(self,
+        prompt,
+        height: Optional[int] = 512,
+        width: Optional[int] = 512,
+        save = False,
+        seed = None, steps = 10, **kwargs):
+
+        if not seed:
+            seed = random.randint(0, sys.maxsize)
+        generator = torch.Generator(self.device)
+        generator.manual_seed(seed)
+        init_start = torch.randn(
+            (1, self.unet.in_channels, height // 8, width // 8), 
+            generator = generator, device = self.device)
+        init_end = torch.randn(
+            (1, self.unet.in_channels, height // 8, width // 8), 
+            generator = generator, device = self.device)
+        generator_state = generator.get_state()
+        slerp_embed_points = []
+        # weight from 0 to 1/(steps - 1), add init_end specifically so that we 
+        # have len(images) = steps
+        for i in range(steps - 1):
+            weight = i / steps
+            tensor_slerp = self.slerp(weight, init_start, init_end)
+            slerp_embed_points.append(tensor_slerp)
+        slerp_embed_points.append(init_end)
+        images = []
+        embed_point = self.get_text_latent_space(prompt, **kwargs)
+        for idx, noise_point in enumerate(slerp_embed_points):
+            generator.set_state(generator_state)
+            image = self.diffuse_from_inits(embed_point, init = noise_point, **kwargs)["image"][0]
+            images.append(image)
+            if save:
+                image.save(f"{seed}-{idx:02d}")
+        return {"images": images, "noise_samples": slerp_embed_points,"generator_state": generator_state}
+
     @torch.no_grad()
-    def image_from_latent_space(self, text_embeddings, 
+    def diffuse_from_inits(self, text_embeddings, 
+        init = None,
         height: Optional[int] = 512,
         width: Optional[int] = 512,
         num_inference_steps: Optional[int] = 50,
@@ -240,11 +306,10 @@ def image_from_latent_space(self, text_embeddings,
         # corresponds to doing no classifier free guidance.
         do_classifier_free_guidance = guidance_scale > 1.0
         # get the intial random noise
-        latents = torch.randn(
+        latents = init if init else torch.randn(
             (batch_size, self.unet.in_channels, height // 8, width // 8),
             generator=generator,
-            device=self.device,
-        )
+            device=self.device,)
 
         # set timesteps
         accepts_offset = "offset" in set(inspect.signature(self.scheduler.set_timesteps).parameters.keys())
@@ -309,6 +374,6 @@ def variation(self, text_embeddings, generator_state, variation_magnitude = 100,
         
         generator = torch.Generator("cuda")
         generator.set_state(generator_state)
-        result = self.image_from_latent_space(variation_embedding, generator=generator, **kwargs)
+        result = self.diffuse_from_inits(variation_embedding, generator=generator, **kwargs)
         result.update({"latent_point": variation_embedding})
         return result

From 2cd40fb751e72c9d28c557dd1c0fb7eea487f854 Mon Sep 17 00:00:00 2001
From: Charles Saluski <cs3293@nau.edu>
Date: Thu, 25 Aug 2022 16:17:21 -0700
Subject: [PATCH 10/17] I forgot self haha

---
 .../pipelines/stable_diffusion/pipeline_stable_diffusion.py     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
index 33a3ed6ad62c..3ec1965f168c 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
@@ -198,7 +198,7 @@ def get_text_latent_space(self, prompt, guidance_scale = 7.5):
 
         return text_embeddings
     
-    def slerp(t, v0, v1, DOT_THRESHOLD=0.9995):
+    def slerp(self, t, v0, v1, DOT_THRESHOLD=0.9995):
         """ helper function to spherically interpolate two arrays v1 v2 
         from https://gist.github.com/karpathy/00103b0037c5aaea32fe1da1af553355 
         this should be better than lerping for moving between noise spaces """

From 59fd5f2e6cd98c63920436c988aa48257f2e7a00 Mon Sep 17 00:00:00 2001
From: Charles Saluski <cs3293@nau.edu>
Date: Thu, 25 Aug 2022 16:25:09 -0700
Subject: [PATCH 11/17] Can't check if tensor is truthy, check if it's not none
 instead

---
 .../pipelines/stable_diffusion/pipeline_stable_diffusion.py     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
index 3ec1965f168c..8c1e69908b68 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
@@ -306,7 +306,7 @@ def diffuse_from_inits(self, text_embeddings,
         # corresponds to doing no classifier free guidance.
         do_classifier_free_guidance = guidance_scale > 1.0
         # get the intial random noise
-        latents = init if init else torch.randn(
+        latents = init if init is not None else torch.randn(
             (batch_size, self.unet.in_channels, height // 8, width // 8),
             generator=generator,
             device=self.device,)

From 587ea2bf5b31d17d5d81b75d8220eb80520541a3 Mon Sep 17 00:00:00 2001
From: Charles Saluski <cs3293@nau.edu>
Date: Fri, 26 Aug 2022 21:18:27 -0700
Subject: [PATCH 12/17] Fix saving ahhfdafafaa

---
 .../pipelines/stable_diffusion/pipeline_stable_diffusion.py   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
index fd1ae3101175..67a0c8793b26 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
@@ -252,7 +252,7 @@ def lerp_between_prompts(self, first_prompt, second_prompt, seed = None, length
             image = self.diffuse_from_inits(latent_point, **kwargs)["image"][0]
             images.append(image)
             if save:
-                image.save(f"{first_prompt}-{second_prompt}-{idx:02d}")
+                image.save(f"{first_prompt}-{second_prompt}-{idx:02d}.png", "PNG")
         return {"images": images, "latent_points": lerp_embed_points,"generator_state": generator_state}
 
     def slerp_through_seeds(self,
@@ -288,7 +288,7 @@ def slerp_through_seeds(self,
             image = self.diffuse_from_inits(embed_point, init = noise_point, **kwargs)["image"][0]
             images.append(image)
             if save:
-                image.save(f"{seed}-{idx:02d}")
+                image.save(f"{seed}-{idx:02d}.png", "PNG")
         return {"images": images, "noise_samples": slerp_embed_points,"generator_state": generator_state}
 
     @torch.no_grad()

From f235c32916b25d4ab2966cf9a340086ab44159aa Mon Sep 17 00:00:00 2001
From: Charles Saluski <cs3293@nau.edu>
Date: Fri, 26 Aug 2022 22:51:05 -0700
Subject: [PATCH 13/17] Don't pass kwargs to functions that don't kwargs

---
 .../pipelines/stable_diffusion/pipeline_stable_diffusion.py     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
index 67a0c8793b26..43650caeaad2 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
@@ -282,7 +282,7 @@ def slerp_through_seeds(self,
             slerp_embed_points.append(tensor_slerp)
         slerp_embed_points.append(init_end)
         images = []
-        embed_point = self.get_text_latent_space(prompt, **kwargs)
+        embed_point = self.get_text_latent_space(prompt)
         for idx, noise_point in enumerate(slerp_embed_points):
             generator.set_state(generator_state)
             image = self.diffuse_from_inits(embed_point, init = noise_point, **kwargs)["image"][0]

From 8a111dd06a22985ab5523962f44b1d90bc7a64f9 Mon Sep 17 00:00:00 2001
From: Charles Saluski <Hugehead123@gmail.com>
Date: Sat, 24 Sep 2022 22:16:28 -0700
Subject: [PATCH 14/17] TQDM got removed during merge

---
 .../pipelines/stable_diffusion/pipeline_stable_diffusion.py     | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
index 5536396854fb..b4ccc4f33631 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
@@ -7,6 +7,8 @@
 import torch
 import numpy as np
 
+from tqdm.auto import tqdm
+
 from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
 
 from ...models import AutoencoderKL, UNet2DConditionModel

From 7ae0aaa8ae44a5301d87cfa63867444914e0f135 Mon Sep 17 00:00:00 2001
From: Charles Saluski <cs3293@nau.edu>
Date: Sat, 24 Sep 2022 22:23:54 -0700
Subject: [PATCH 15/17] Output from decoding latents was changed to a data
 wrapping object, unwrap it.

---
 .../pipelines/stable_diffusion/pipeline_stable_diffusion.py     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
index b4ccc4f33631..a0cd93ef06f9 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
@@ -366,7 +366,7 @@ def diffuse_from_inits(self, text_embeddings,
 
         # scale and decode the image latents with vae
         latents = 1 / 0.18215 * latents
-        image = self.vae.decode(latents)
+        image = self.vae.decode(latents).sample
 
         image = (image / 2 + 0.5).clamp(0, 1)
         image = image.cpu().permute(0, 2, 3, 1).numpy()

From 7ac1af647d6f3a9b7cda7c1156480f0afa4b949e Mon Sep 17 00:00:00 2001
From: Charles Saluski <cs3293@nau.edu>
Date: Sat, 24 Sep 2022 23:34:32 -0700
Subject: [PATCH 16/17] Add option to save the intermediate latent spaces from
 the diffusion

---
 .../stable_diffusion/pipeline_stable_diffusion.py     | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
index a0cd93ef06f9..07d047cbf14c 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
@@ -305,6 +305,7 @@ def diffuse_from_inits(self, text_embeddings,
         eta: Optional[float] = 0.0,
         generator: Optional[torch.Generator] = None,
         output_type: Optional[str] = "pil",
+        save_n_steps: Optional[int] = None,
         **kwargs,):
 
         batch_size = 1
@@ -342,8 +343,14 @@ def diffuse_from_inits(self, text_embeddings,
         extra_step_kwargs = {}
         if accepts_eta:
             extra_step_kwargs["eta"] = eta
-
+        if save_n_steps:
+            diffuse_latents = []
+        else: 
+            diffuse_latents = None
         for i, t in tqdm(enumerate(self.scheduler.timesteps)):
+            if save_n_steps:
+                if i % save_n_steps == 0:
+                    diffuse_latents.append(latents)
             # expand the latents if we are doing classifier free guidance
             latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
             if isinstance(self.scheduler, LMSDiscreteScheduler):
@@ -374,7 +381,7 @@ def diffuse_from_inits(self, text_embeddings,
         if output_type == "pil":
             image = self.numpy_to_pil(image)
 
-        return {"image": image, "generator_state": generator_state}
+        return {"image": image, "generator_state": generator_state, "latents": diffuse_latents}
 
     def variation(self, text_embeddings, generator_state, variation_magnitude = 100, **kwargs):
         # random vector to move in latent space

From 28dccaad33e2ec09a3cb5771b2328f5f91513954 Mon Sep 17 00:00:00 2001
From: Charles Saluski <cs3293@nau.edu>
Date: Sat, 24 Sep 2022 23:47:24 -0700
Subject: [PATCH 17/17] Add option to save the intermediate latent spaces from
 the diffusion as images instead of raw latent tensor

---
 .../pipeline_stable_diffusion.py              | 21 +++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
index 07d047cbf14c..5b3e5ec2611f 100644
--- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
+++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
@@ -344,13 +344,26 @@ def diffuse_from_inits(self, text_embeddings,
         if accepts_eta:
             extra_step_kwargs["eta"] = eta
         if save_n_steps:
-            diffuse_latents = []
+            mid_latents = []
+            mid_images = []
         else: 
-            diffuse_latents = None
+            mid_latents = None
+            mid_images = None
         for i, t in tqdm(enumerate(self.scheduler.timesteps)):
             if save_n_steps:
                 if i % save_n_steps == 0:
-                    diffuse_latents.append(latents)
+                    # scale and decode the image latents with vae
+                    dec_mid_latents = 1 / 0.18215 * latents
+                    mid_latents.append(dec_mid_latents)
+                    image = self.vae.decode(dec_mid_latents).sample
+
+                    image = (image / 2 + 0.5).clamp(0, 1)
+                    image = image.cpu().permute(0, 2, 3, 1).numpy()
+                    
+                    if output_type == "pil":
+                        image = self.numpy_to_pil(image)
+                    mid_latents.append(image)
+                    mid_images.append(image)
             # expand the latents if we are doing classifier free guidance
             latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
             if isinstance(self.scheduler, LMSDiscreteScheduler):
@@ -381,7 +394,7 @@ def diffuse_from_inits(self, text_embeddings,
         if output_type == "pil":
             image = self.numpy_to_pil(image)
 
-        return {"image": image, "generator_state": generator_state, "latents": diffuse_latents}
+        return {"image": image, "generator_state": generator_state, "mid_latents": mid_latents, "mid_images": mid_images}
 
     def variation(self, text_embeddings, generator_state, variation_magnitude = 100, **kwargs):
         # random vector to move in latent space