Skip to content

Commit eaff7a8

Browse files
committed
Initial commit of auto docker
1 parent e5d596e commit eaff7a8

File tree

4 files changed

+214
-0
lines changed

4 files changed

+214
-0
lines changed

docker/Dockerfile

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# Define the image argument and provide a default value
2+
ARG IMAGE=python:3-slim-bullseye
3+
4+
# Use the image as specified
5+
FROM ${IMAGE}
6+
7+
# Re-declare the ARG after FROM
8+
ARG IMAGE
9+
10+
# Update and upgrade the existing packages
11+
RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-recommends \
12+
python3 \
13+
python3-pip \
14+
ninja-build \
15+
build-essential
16+
17+
RUN python3 -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette
18+
19+
# Perform the conditional installations based on the image
20+
RUN echo "Image: ${IMAGE}" && \
21+
if [ "${IMAGE}" = "python:3-slim-bullseye" ] ; then \
22+
echo "OpenBLAS install:" && \
23+
apt-get install -y --no-install-recommends libopenblas-dev && \
24+
LLAMA_OPENBLAS=1 pip install llama-cpp-python --verbose; \
25+
else \
26+
echo "CuBLAS install:" && \
27+
LLAMA_CUBLAS=1 pip install llama-cpp-python --verbose; \
28+
fi
29+
30+
# Clean up apt cache
31+
RUN rm -rf /var/lib/apt/lists/*
32+
33+
# Set a working directory for better clarity
34+
WORKDIR /app
35+
36+
# Copy files to the app directory
37+
RUN echo "Installing model...this can take some time..."
38+
COPY ./model.bin /app/model.bin
39+
COPY ./start_server.sh /app/start_server.sh
40+
41+
# Make the server start script executable
42+
RUN chmod +x /app/start_server.sh
43+
44+
# Set environment variable for the host
45+
ENV HOST=0.0.0.0
46+
47+
# Expose a port for the server
48+
EXPOSE 8000
49+
50+
# Run the server start script
51+
CMD ["/bin/sh", "/app/start_server.sh"]

docker/README.md

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# Get model from Hugging Face
2+
`python3 ./hug_model.py`
3+
4+
You should now have a model in the current directory and model.bin symlinked to it for the subsequent Docker build and copy step. e.g.
5+
```
6+
docker $ ls -lh *.bin
7+
-rw-rw-r-- 1 user user 4.8G May 23 18:30 llama-7b.ggmlv3.q5_1.bin
8+
lrwxrwxrwx 1 user user 24 May 23 18:30 model.bin -> <downloaded-model-file>.q5_1.bin
9+
```
10+
- Note #1: Make sure you have enough disk space to d/l the model. As the model is then copied into the image you will need at least
11+
**TWICE** as much disk space as the size of the model:
12+
13+
| Model | Quantized size |
14+
|------:|----------------:|
15+
| 7B | 5 GB |
16+
| 13B | 10 GB |
17+
| 30B | 25 GB |
18+
| 65B | 50 GB |
19+
20+
- Note #2: If you want to pass or tune additional parameters, customise `./start_server.sh` before running `docker build ...`
21+
22+
# Use OpenBLAS (No NVidia GPU, defaults to `python:3-slim-bullseye` Docker base image)
23+
## Build:
24+
`docker build --build-arg -t openblas .`
25+
## Run:
26+
`docker run --cap-add SYS_RESOURCE -t openblas`
27+
28+
# Use CuBLAS
29+
Requires NVidia GPU and Docker NVidia support (see https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html)
30+
## Build:
31+
`docker build --build-arg IMAGE=nvidia/cuda:12.1.1-devel-ubuntu22.04 -t opencuda .`
32+
## Run:
33+
`docker run --cap-add SYS_RESOURCE -t cublas`

docker/hug_model.py

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
import requests
2+
import json
3+
import os
4+
import struct
5+
6+
def make_request(url, params=None):
7+
print(f"Making request to {url}...")
8+
response = requests.get(url, params=params)
9+
if response.status_code == 200:
10+
return json.loads(response.text)
11+
else:
12+
print(f"Request failed with status code {response.status_code}")
13+
return None
14+
15+
def check_magic_and_version(filename):
16+
with open(filename, 'rb') as f:
17+
# Read the first 6 bytes from the file
18+
data = f.read(6)
19+
20+
# Unpack the binary data, interpreting the first 4 bytes as a little-endian unsigned int
21+
# and the next 2 bytes as a little-endian unsigned short
22+
magic, version = struct.unpack('<I H', data)
23+
24+
print(f"magic: 0x{magic:08x}, version: 0x{version:04x}, file: {filename}")
25+
26+
return magic, version
27+
28+
import os
29+
import requests
30+
31+
def download_file(url, destination):
32+
print(f"Downloading {url} to {destination}...")
33+
response = requests.get(url, stream=True)
34+
if response.status_code == 200:
35+
with open(destination, 'wb') as f:
36+
total_downloaded = 0
37+
for chunk in response.iter_content(chunk_size=1024):
38+
if chunk: # filter out keep-alive new chunks
39+
f.write(chunk)
40+
total_downloaded += len(chunk)
41+
if total_downloaded >= 10485760: # 10 MB
42+
print('.', end='', flush=True)
43+
total_downloaded = 0
44+
print("\nDownload complete.")
45+
46+
# Creating a symbolic link from destination to "model.bin"
47+
if os.path.isfile("model.bin"):
48+
os.remove("model.bin") # remove the existing link if any
49+
os.symlink(destination, "model.bin")
50+
else:
51+
print(f"Download failed with status code {response.status_code}")
52+
53+
def get_user_choice(model_list):
54+
# Print the enumerated list
55+
print("\n")
56+
for i, (model_id, rfilename) in enumerate(model_list):
57+
print(f"{i+1}: Model ID: {model_id}, RFilename: {rfilename}")
58+
59+
# Get user's choice
60+
choice = input("Choose a model to download by entering the corresponding number: ")
61+
try:
62+
index = int(choice) - 1
63+
if 0 <= index < len(model_list):
64+
# Return the chosen model
65+
return model_list[index]
66+
else:
67+
print("Invalid choice.")
68+
except ValueError:
69+
print("Invalid input. Please enter a number corresponding to a model.")
70+
except IndexError:
71+
print("Invalid choice. Index out of range.")
72+
73+
return None
74+
75+
import argparse
76+
77+
def main():
78+
# Create an argument parser
79+
parser = argparse.ArgumentParser(description='Process the model version.')
80+
parser.add_argument('-v', '--version', type=int, default=0x0003,
81+
help='an integer for the version to be used')
82+
83+
# Parse the arguments
84+
args = parser.parse_args()
85+
86+
# Define the parameters
87+
params = {
88+
"author": "TheBloke", # Filter by author
89+
"tags": "llama"
90+
}
91+
92+
models = make_request('https://huggingface.co/api/models', params=params)
93+
if models is None:
94+
return
95+
96+
model_list = []
97+
# Iterate over the models
98+
for model in models:
99+
model_id = model['id']
100+
model_info = make_request(f'https://huggingface.co/api/models/{model_id}')
101+
if model_info is None:
102+
continue
103+
104+
for sibling in model_info.get('siblings', []):
105+
rfilename = sibling.get('rfilename')
106+
if rfilename and 'q5_1' in rfilename:
107+
model_list.append((model_id, rfilename))
108+
109+
model_choice = get_user_choice(model_list)
110+
if model_choice is not None:
111+
model_id, rfilename = model_choice
112+
url = f"https://huggingface.co/{model_id}/resolve/main/{rfilename}"
113+
download_file(url, rfilename)
114+
_, version = check_magic_and_version(rfilename)
115+
if version != args.version:
116+
print(f"Warning: Expected version {args.version}, but found different version in the file.")
117+
118+
if __name__ == '__main__':
119+
main()

docker/start_server.sh

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#!/bin/sh
2+
3+
# For mmap support
4+
ulimit -l unlimited
5+
6+
if [ "$IMAGE" = "python:3-slim-bullseye" ]; then
7+
python3 -B -m llama_cpp.server --model /app/model.bin
8+
else
9+
# You may have to reduce --n_gpu_layers=1000 to 20 or less if you don't have enough VRAM
10+
python3 -B -m llama_cpp.server --model /app/model.bin --n_gpu_layers=1000
11+
fi

0 commit comments

Comments
 (0)