Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Translate code from llama notebook
  • Loading branch information
s2t2 committed Jan 14, 2024
commit 9c56d6f49dcd62a6c745ef688cc34cf1f81ed2ae
16 changes: 15 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,22 @@ Setup submission files:
3. Move a copy of the starter notebook (which contains instructions and some starer code) into the submissions directory, and note the filename (i.e. `STARTER_FILENAME`).


### OpenAI Setup
### LLM Setup

Choose an LLM provider (OpenAI or Meta Llama). OpenAI might be easier to get started, but costs money. Whereas Meta Llama is free, and for this reason is the recommended LLM provider. Based on your chosen LLM provider, see the corresponding setup instructions below.

#### OpenAI Setup

Obtain an OpenAI API Key (i.e. `OPENAI_API_KEY`).

#### Llama Setup

See: https://huggingface.co/meta-llama/Llama-2-7b-chat-hf

First, visit the [Meta Llama website](https://ai.meta.com/resources/models-and-libraries/llama-downloads/), fill out the request form, and wait until your request is accepted.

Then, create a [Hugging Face account](https://huggingface.co) (using the same email address from step 1), and obtain a [user access token](https://huggingface.co/docs/hub/security-tokens) (i.e. `HUGGING_FACE_TOKEN`).


### Environment Variables Setup

Expand All @@ -55,6 +67,8 @@ Create ".env" file and set environment variables:
# this is the ".env" file...

OPENAI_API_KEY="sk-..."
# or:
HUGGINGFACE_TOKEN="..."

SUBMISSIONS_DIRPATH="/Users/USERNAME/Desktop/GRADING HW 4"
STARTER_FILENAME="Homework_X_STARTER.ipynb"
Expand Down
72 changes: 72 additions & 0 deletions app/llama_chain.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# adapted from youtube video about llama and langchain: ________________


import os
from dotenv import load_dotenv

from langchain import HuggingFacePipeline
from langchain import PromptTemplate, LLMChain

from app.llama_prompts import get_prompt, parse_text
from app.llama_llm import LlamaService


load_dotenv()

TEMP = float(os.getenv("TEMP", default="0.0")) # @param {type:"slider", min:0, max:1, step:0.1}


if __name__ == "__main__":

service = LlamaService()
pipeline = service.pipeline
llm = HuggingFacePipeline(pipeline=pipeline, model_kwargs={"temperature":TEMP})
print(llm)

# SIMPLE LLM CHAIN

system_prompt = "You are an advanced assistant that excels at translation. "
instruction = "Convert the following text from English to French:\n\n {text}"
template = get_prompt(instruction, system_prompt)
print(template)
prompt = PromptTemplate(template=template, input_variables=["text"])

llm_chain = LLMChain(prompt=prompt, llm=llm)

query = "how are you today?"
response = llm_chain.run(query)
parse_text(response)


# CHAT CHAIN

if input("Continue to chat (Y/N): ").upper() != "Y":
exit()


from langchain.memory import ConversationBufferMemory
from langchain import LLMChain, PromptTemplate

prompt = PromptTemplate(template=template, input_variables=["chat_history", "user_input"])
memory = ConversationBufferMemory(memory_key="chat_history")



# for chat, with memory
instruction = "Chat History:\n\n{chat_history} \n\nUser: {user_input}"
system_prompt = "You are a helpful assistant, you always only answer for the assistant then you stop. read the chat history to get context"

template = get_prompt(instruction, system_prompt)
print(template)

llm_chain = LLMChain(prompt=prompt, llm=llm,
verbose=True, memory=memory,
)

query = ""
while query != "":
query = input("Please ask a question: ")
print(query)

response = llm_chain.predict(user_input=query)
print(response)
60 changes: 60 additions & 0 deletions app/llama_llm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@

# adapted from youtube video about llama and langchain: ________________

import os
from dotenv import load_dotenv

import torch
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

from app.llama_prompts import get_prompt, cut_off_text, remove_substring

load_dotenv()

HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")

MODEL_NAME = "meta-llama/Llama-2-7b-chat-hf"


class LlamaService:
def __init__(self, model_name=MODEL_NAME, hf_token=HUGGINGFACE_TOKEN):
self.model_name = model_name
self.hf_token = hf_token

@property
def tokenizer(self):
# https://huggingface.co/transformers/v2.11.0/model_doc/auto.html?highlight=autotokenizer#autotokenizer
return AutoTokenizer.from_pretrained(self.model_name, token=self.hf_token)

@property
def model(self):
# https://huggingface.co/docs/transformers/model_doc/auto#transformers.AutoModelForCausalLM
return AutoModelForCausalLM.from_pretrained(self.model_name, token=self.hf_token,
device_map='auto', torch_dtype=torch.float16,
)

@property
def pipeline(self):
# https://huggingface.co/docs/transformers/main_classes/pipelines
return pipeline(task="text-generation", model=self.model, tokenizer= self.tokenizer,
device_map="auto", torch_dtype=torch.bfloat16,
max_new_tokens=512, do_sample=True, top_k=30, num_return_sequences=1,
eos_token_id=self.tokenizer.eos_token_id
)


def generate(self, text):
prompt = get_prompt(text)
with torch.autocast('cuda', dtype=torch.bfloat16):
inputs = self.tokenizer(prompt, return_tensors="pt").to('cuda')
outputs = self.model.generate(**inputs,
max_new_tokens=512,
eos_token_id=self.tokenizer.eos_token_id,
pad_token_id=self.tokenizer.eos_token_id,
)
final_outputs = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
final_outputs = cut_off_text(final_outputs, '</s>')
final_outputs = remove_substring(final_outputs, prompt)

return final_outputs#, outputs
38 changes: 38 additions & 0 deletions app/llama_prompts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@

# adapted from youtube video about llama and langchain: ________________

#import json
import textwrap

B_INST, E_INST = "[INST]", "[/INST]"

B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"

DEFAULT_SYSTEM_PROMPT = """\
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.

If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."""

# TODO: refactor

def get_prompt(instruction, new_system_prompt=DEFAULT_SYSTEM_PROMPT):
SYSTEM_PROMPT = B_SYS + new_system_prompt + E_SYS
prompt_template = B_INST + SYSTEM_PROMPT + instruction + E_INST
return prompt_template

def cut_off_text(text, prompt):
cutoff_phrase = prompt
index = text.find(cutoff_phrase)
if index != -1:
return text[:index]
else:
return text

def remove_substring(string, substring):
return string.replace(substring, "")


def parse_text(text):
wrapped_text = textwrap.fill(text, width=100)
print(wrapped_text +'\n\n')
# return assistant_text
6 changes: 6 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@ langchain # 0.0.348
tiktoken
faiss-cpu

# llama:
torch # 2.1.0+cu121
transformers # 4.35.2
#accelerate # 0.25.0
# torchtext # 0.16.0



pytest