Translate code from llama notebook

s2t2 · s2t2 · Dec 27, 2023 · Jan 1, 2024 · Jan 1, 2024 · Jan 1, 2024
commit 9c56d6f49dcd62a6c745ef688cc34cf1f81ed2ae
diff --git a/README.md b/README.md
@@ -42,10 +42,22 @@ Setup submission files:
 3. Move a copy of the starter notebook (which contains instructions and some starer code) into the submissions directory, and note the filename (i.e. `STARTER_FILENAME`).
 
 
-### OpenAI Setup
+### LLM Setup
+
+Choose an LLM provider (OpenAI or Meta Llama). OpenAI might be easier to get started, but costs money. Whereas Meta Llama is free, and for this reason is the recommended LLM provider. Based on your chosen LLM provider, see the corresponding setup instructions below.
+
+#### OpenAI Setup
 
 Obtain an OpenAI API Key (i.e. `OPENAI_API_KEY`).
 
+#### Llama Setup
+
+See: https://huggingface.co/meta-llama/Llama-2-7b-chat-hf
+
+First, visit the [Meta Llama website](https://ai.meta.com/resources/models-and-libraries/llama-downloads/), fill out the request form, and wait until your request is accepted.
+
+Then, create a [Hugging Face account](https://huggingface.co) (using the same email address from step 1), and obtain a [user access token](https://huggingface.co/docs/hub/security-tokens) (i.e. `HUGGING_FACE_TOKEN`).
+
 
 ### Environment Variables Setup
 
@@ -55,6 +67,8 @@ Create ".env" file and set environment variables:
 # this is the ".env" file...
 
 OPENAI_API_KEY="sk-..."
+# or:
+HUGGINGFACE_TOKEN="..."
 
 SUBMISSIONS_DIRPATH="/Users/USERNAME/Desktop/GRADING HW 4"
 STARTER_FILENAME="Homework_X_STARTER.ipynb"

diff --git a/app/llama_chain.py b/app/llama_chain.py
@@ -0,0 +1,72 @@
+# adapted from youtube video about llama and langchain: ________________
+
+
+import os
+from dotenv import load_dotenv
+
+from langchain import HuggingFacePipeline
+from langchain import PromptTemplate,  LLMChain
+
+from app.llama_prompts import get_prompt, parse_text
+from app.llama_llm import LlamaService
+
+
+load_dotenv()
+
+TEMP = float(os.getenv("TEMP", default="0.0")) # @param {type:"slider", min:0, max:1, step:0.1}
+
+
+if __name__ == "__main__":
+
+    service = LlamaService()
+    pipeline = service.pipeline
+    llm = HuggingFacePipeline(pipeline=pipeline, model_kwargs={"temperature":TEMP})
+    print(llm)
+
+    # SIMPLE LLM CHAIN
+
+    system_prompt = "You are an advanced assistant that excels at translation. "
+    instruction = "Convert the following text from English to French:\n\n {text}"
+    template = get_prompt(instruction, system_prompt)
+    print(template)
+    prompt = PromptTemplate(template=template, input_variables=["text"])
+
+    llm_chain = LLMChain(prompt=prompt, llm=llm)
+
+    query = "how are you today?"
+    response = llm_chain.run(query)
+    parse_text(response)
+
+
+    # CHAT CHAIN
+
+    if input("Continue to chat (Y/N): ").upper() != "Y":
+        exit()
+
+
+    from langchain.memory import ConversationBufferMemory
+    from langchain import LLMChain, PromptTemplate
+
+    prompt = PromptTemplate(template=template, input_variables=["chat_history", "user_input"])
+    memory = ConversationBufferMemory(memory_key="chat_history")
+
+
+
+    # for chat, with memory
+    instruction = "Chat History:\n\n{chat_history} \n\nUser: {user_input}"
+    system_prompt = "You are a helpful assistant, you always only answer for the assistant then you stop. read the chat history to get context"
+
+    template = get_prompt(instruction, system_prompt)
+    print(template)
+
+    llm_chain = LLMChain(prompt=prompt, llm=llm,
+        verbose=True, memory=memory,
+    )
+
+    query = ""
+    while query != "":
+        query = input("Please ask a question: ")
+        print(query)
+
+        response = llm_chain.predict(user_input=query)
+        print(response)
diff --git a/app/llama_llm.py b/app/llama_llm.py
@@ -0,0 +1,60 @@
+
+# adapted from youtube video about llama and langchain: ________________
+
+import os
+from dotenv import load_dotenv
+
+import torch
+import transformers
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+
+from app.llama_prompts import get_prompt, cut_off_text, remove_substring
+
+load_dotenv()
+
+HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
+
+MODEL_NAME = "meta-llama/Llama-2-7b-chat-hf"
+
+
+class LlamaService:
+    def __init__(self, model_name=MODEL_NAME, hf_token=HUGGINGFACE_TOKEN):
+        self.model_name = model_name
+        self.hf_token = hf_token
+
+    @property
+    def tokenizer(self):
+        # https://huggingface.co/transformers/v2.11.0/model_doc/auto.html?highlight=autotokenizer#autotokenizer
+        return AutoTokenizer.from_pretrained(self.model_name, token=self.hf_token)
+
+    @property
+    def model(self):
+        # https://huggingface.co/docs/transformers/model_doc/auto#transformers.AutoModelForCausalLM
+        return AutoModelForCausalLM.from_pretrained(self.model_name, token=self.hf_token,
+                                                    device_map='auto', torch_dtype=torch.float16,
+        )
+
+    @property
+    def pipeline(self):
+        # https://huggingface.co/docs/transformers/main_classes/pipelines
+        return pipeline(task="text-generation", model=self.model, tokenizer= self.tokenizer,
+                        device_map="auto", torch_dtype=torch.bfloat16,
+                        max_new_tokens=512, do_sample=True, top_k=30, num_return_sequences=1,
+                        eos_token_id=self.tokenizer.eos_token_id
+        )
+
+
+    def generate(self, text):
+        prompt = get_prompt(text)
+        with torch.autocast('cuda', dtype=torch.bfloat16):
+            inputs = self.tokenizer(prompt, return_tensors="pt").to('cuda')
+            outputs = self.model.generate(**inputs,
+                                    max_new_tokens=512,
+                                    eos_token_id=self.tokenizer.eos_token_id,
+                                    pad_token_id=self.tokenizer.eos_token_id,
+            )
+            final_outputs = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
+            final_outputs = cut_off_text(final_outputs, '</s>')
+            final_outputs = remove_substring(final_outputs, prompt)
+
+        return final_outputs#, outputs
diff --git a/app/llama_prompts.py b/app/llama_prompts.py
@@ -0,0 +1,38 @@
+
+# adapted from youtube video about llama and langchain: ________________
+
+#import json
+import textwrap
+
+B_INST, E_INST = "[INST]", "[/INST]"
+
+B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
+
+DEFAULT_SYSTEM_PROMPT = """\
+You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
+
+If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."""
+
+# TODO: refactor
+
+def get_prompt(instruction, new_system_prompt=DEFAULT_SYSTEM_PROMPT):
+    SYSTEM_PROMPT = B_SYS + new_system_prompt + E_SYS
+    prompt_template =  B_INST + SYSTEM_PROMPT + instruction + E_INST
+    return prompt_template
+
+def cut_off_text(text, prompt):
+    cutoff_phrase = prompt
+    index = text.find(cutoff_phrase)
+    if index != -1:
+        return text[:index]
+    else:
+        return text
+
+def remove_substring(string, substring):
+    return string.replace(substring, "")
+
+
+def parse_text(text):
+    wrapped_text = textwrap.fill(text, width=100)
+    print(wrapped_text +'\n\n')
+    # return assistant_text
diff --git a/requirements.txt b/requirements.txt
@@ -14,6 +14,12 @@ langchain # 0.0.348
 tiktoken
 faiss-cpu
 
+# llama:
+torch # 2.1.0+cu121
+transformers # 4.35.2
+#accelerate # 0.25.0
+# torchtext # 0.16.0
+
 
 
 pytest