Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Too slow on CPU
  • Loading branch information
s2t2 committed Jan 14, 2024
commit 38683ebd880d5bbfc04429a2e8d2025aab434208
30 changes: 25 additions & 5 deletions app/meta/llm.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@

# adapted from youtube video about llama and langchain: ________________

# this is so slow on CPU though...
# https://stackoverflow.com/a/77022488/670433


import os
from dotenv import load_dotenv
import textwrap

import torch
#import transformers
Expand All @@ -11,7 +16,7 @@
from langchain.chains import LLMChain
from langchain.llms.huggingface_pipeline import HuggingFacePipeline

from app.meta.prompts import get_prompt, parse_text, cut_off_text, remove_substring
#from app.meta.prompts import get_prompt, parse_text, cut_off_text, remove_substring

load_dotenv()

Expand Down Expand Up @@ -44,13 +49,20 @@ def compile_prompt(prompt, system_prompt=DEFAULT_SYSTEM_PROMPT, input_variables=
return PromptTemplate(template=formatted_prompt, input_variables=input_variables)




class HuggingFaceService:
def __init__(self, model_name=MODEL_NAME, temp=TEMP, token=HUGGINGFACE_TOKEN): # device_type="cpu",
def __init__(self, model_name=MODEL_NAME, temp=TEMP, token=HUGGINGFACE_TOKEN, device_type="cpu"):
self.model_name = model_name
self.token = token # hugging face api token
self.temp = temp

#self.device_type = device_type # "cpu" for local dev, or "cuda" for colab gpu
self.device_type = device_type # "cpu" for local dev, or "cuda" for colab gpu

# https://stackoverflow.com/a/73530618/670433
# https://huggingface.co/openlm-research/open_llama_7b_v2/discussions/2
# https://pytorch.org/docs/stable/tensors.html
self.torch_dtype = torch.float32 if self.device_type == "cpu" else torch.float16

@property
def tokenizer(self):
Expand All @@ -63,7 +75,8 @@ def model(self):
return AutoModelForCausalLM.from_pretrained(self.model_name, token=self.token,
device_map="auto",
#torch_dtype=torch.float16, # GPU ONLY? https://stackoverflow.com/a/73530618/670433
torch_dtype=torch.float32 # CPU
#torch_dtype=torch.float32 # CPU
torch_dtype=self.torch_dtype
)

@property
Expand All @@ -75,7 +88,8 @@ def pipeline(self):
max_new_tokens=512, do_sample=True, top_k=30, num_return_sequences=1,
eos_token_id=self.tokenizer.eos_token_id,
#torch_dtype=torch.bfloat16, # GPU ONLY? https://stackoverflow.com/a/73530618/670433
torch_dtype=torch.float32, # CPU
#torch_dtype=torch.float32, # CPU
torch_dtype=self.torch_dtype
)

@property
Expand Down Expand Up @@ -116,6 +130,12 @@ def llm(self):



def parse_text(text):
wrapped_text = textwrap.fill(text, width=100)
print(wrapped_text +'\n\n')
# return assistant_text




if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ plotly


openai # 1.3.8
langchain # 0.0.348
langchain # 0.0.348 ... 0.0.353
tiktoken
faiss-cpu

Expand Down