Skip to content

Commit 1da3574

Browse files
authored
New GenerationConfig parameter for generate routes (#47)
- deprecates `json_schema` and `detail` parameters
1 parent e97f2cd commit 1da3574

File tree

7 files changed

+118
-139
lines changed

7 files changed

+118
-139
lines changed

README.md

Lines changed: 6 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -48,55 +48,23 @@ The package provides optional features that can be installed based on your needs
4848
### Basic Usage
4949

5050
```python
51+
from PIL import Image
5152
from vlmrun.client import VLMRun
53+
from vlmrun.client.types import GenerationConfig
5254
from vlmrun.hub.schemas.document.invoice import Invoice
5355

5456
# Initialize the client
5557
client = VLMRun(api_key="your-api-key")
5658

5759
# Process an image
60+
image: Image.Image = Image.open("image.jpg")
5861
response = client.image.generate(
59-
image="https://example.com/invoice.jpg",
62+
images=[image],
6063
model="vlm-1",
6164
domain="document.invoice",
62-
json_schema=Invoice.model_json_schema(),
65+
config=GenerationConfig(json_schema=Invoice.model_json_schema()),
6366
)
64-
```
65-
66-
### Image Utilities
67-
68-
```python
69-
from vlmrun.common.image import encode_image
70-
from vlmrun.common.utils import download_image
71-
from PIL import Image
72-
73-
# Convert image to base64 or binary
74-
image = Image.open("image.jpg")
75-
base64_str = encode_image(image, format="PNG") # or format="JPEG"
76-
binary_data = encode_image(image, format="binary")
77-
78-
# Download image from URL
79-
image = download_image("https://example.com/image.jpg")
80-
```
81-
82-
</details>
83-
84-
## 📂 Directory Structure
85-
86-
```bash
87-
vlmrun/
88-
├── client/ # Client implementation
89-
│ ├── client.py # Main VLMRun class
90-
│ ├── base_requestor.py # Low-level request logic
91-
│ ├── files.py # File operations
92-
│ ├── models.py # Model operations
93-
│ ├── finetune.py # Fine-tuning operations
94-
│ └── types.py # Type definitions
95-
├── common/ # Common utilities
96-
│ ├── auth.py # Authentication utilities
97-
│ └── image.py # Image processing utilities
98-
└── types/ # Type definitions
99-
└── abstract.py # Abstract base classes
67+
print(response)
10068
```
10169

10270
## 🔗 Quick Links

tests/cli/test_cli_generate.py

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,30 @@
11
"""Test generate subcommand."""
22

3+
from pathlib import Path
4+
35
from vlmrun.cli.cli import app
6+
from vlmrun.common.utils import download_artifact
47

58

69
def test_generate_image(runner, mock_client, tmp_path):
710
"""Test generate image command."""
8-
test_image = tmp_path / "test.jpg"
9-
test_image.write_bytes(b"test image data")
10-
result = runner.invoke(app, ["generate", "image", str(test_image)])
11-
assert result.exit_code == 0
12-
13-
14-
def test_generate_video(runner, mock_client, tmp_path):
15-
"""Test generate video command."""
16-
result = runner.invoke(app, ["generate", "video", "test prompt"])
11+
path: Path = download_artifact(
12+
"https://storage.googleapis.com/vlm-data-public-prod/hub/examples/document.invoice/invoice_1.jpg",
13+
format="file",
14+
)
15+
result = runner.invoke(
16+
app, ["generate", "image", str(path), "--domain", "document.invoice"]
17+
)
1718
assert result.exit_code == 0
1819

1920

2021
def test_generate_document(runner, mock_client, tmp_path):
2122
"""Test generate document command."""
22-
result = runner.invoke(app, ["generate", "document", "test prompt"])
23+
path: Path = download_artifact(
24+
"https://storage.googleapis.com/vlm-data-public-prod/hub/examples/document.bank-statement/lending_bankstatement.pdf",
25+
format="file",
26+
)
27+
result = runner.invoke(
28+
app, ["generate", "document", str(path), "--domain", "document.bank-statement"]
29+
)
2330
assert result.exit_code == 0

vlmrun/cli/_cli/generate.py

Lines changed: 22 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
"""Generation API commands."""
22

33
from pathlib import Path
4-
from typing import Optional
54

65
import typer
6+
from PIL import Image
77
from rich import print as rprint
88

99
from vlmrun.client import VLMRun
10+
from vlmrun.client.types import PredictionResponse
1011

11-
app = typer.Typer(help="Generation operations")
12+
app = typer.Typer(help="Generation operations", no_args_is_help=True)
1213

1314

1415
@app.command()
@@ -17,60 +18,34 @@ def image(
1718
image: Path = typer.Argument(
1819
..., help="Input image file", exists=True, readable=True
1920
),
20-
output: Optional[Path] = typer.Option(None, help="Output file path"),
21+
domain: str = typer.Option(
22+
..., help="Domain to use for generation (e.g. `document.invoice`)"
23+
),
2124
) -> None:
2225
"""Generate an image."""
2326
client: VLMRun = ctx.obj
24-
response = client.image.generate(images=[image], model="vlm-1", domain="image")
25-
if output and response and hasattr(response, "response"):
26-
if isinstance(response.response, bytes):
27-
output.write_bytes(response.response)
28-
rprint(f"Image saved to {output}")
29-
else:
30-
rprint("Error: Response does not contain valid image data")
31-
else:
32-
rprint("Image data generated (use --output to save to file)")
27+
if not Path(image).is_file():
28+
raise typer.Abort(f"Image file does not exist: {image}")
3329

34-
35-
@app.command()
36-
def video(
37-
ctx: typer.Context,
38-
prompt: str = typer.Argument(..., help="Video generation prompt"),
39-
output: Optional[Path] = typer.Option(None, help="Output file path"),
40-
) -> None:
41-
"""Generate a video."""
42-
client: VLMRun = ctx.obj
43-
response = client.video.generate(
44-
file_or_url=prompt, model="vlm-1", domain="video" # Using prompt as input text
45-
)
46-
if output and response and hasattr(response, "response"):
47-
if isinstance(response.response, bytes):
48-
output.write_bytes(response.response)
49-
rprint(f"Video saved to {output}")
50-
else:
51-
rprint("Error: Response does not contain valid video data")
52-
else:
53-
rprint("Video data generated (use --output to save to file)")
30+
img: Image.Image = Image.open(image)
31+
response: PredictionResponse = client.image.generate(images=[img], domain=domain)
32+
rprint(response)
5433

5534

5635
@app.command()
5736
def document(
5837
ctx: typer.Context,
59-
prompt: str = typer.Argument(..., help="Document generation prompt"),
60-
output: Optional[Path] = typer.Option(None, help="Output file path"),
38+
path: Path = typer.Argument(
39+
..., help="Path to the document file", exists=True, readable=True
40+
),
41+
domain: str = typer.Option(
42+
..., help="Domain to use for generation (e.g. `document.invoice`)"
43+
),
6144
) -> None:
6245
"""Generate a document."""
6346
client: VLMRun = ctx.obj
64-
response = client.document.generate(
65-
file_or_url=prompt, # Using prompt as input text
66-
model="vlm-1",
67-
domain="document",
68-
)
69-
if output and response and hasattr(response, "response"):
70-
if isinstance(response.response, bytes):
71-
output.write_bytes(response.response)
72-
rprint(f"Document saved to {output}")
73-
else:
74-
rprint("Error: Response does not contain valid document data")
75-
else:
76-
rprint("Document data generated (use --output to save to file)")
47+
if not Path(path).is_file():
48+
raise typer.Abort(f"Document file does not exist: {path}")
49+
50+
response = client.document.generate(file=path, domain=domain)
51+
rprint(response)

vlmrun/cli/cli.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
name="vlmrun",
2121
help="CLI for VLM Run (https://app.vlm.run)",
2222
add_completion=True,
23+
no_args_is_help=True,
2324
)
2425

2526

vlmrun/client/predictions.py

Lines changed: 56 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,12 @@
1010
from vlmrun.common.image import encode_image
1111
from vlmrun.client.base_requestor import APIRequestor
1212
from vlmrun.types.abstract import VLMRunProtocol
13-
from vlmrun.client.types import PredictionResponse, FileResponse
13+
from vlmrun.client.types import (
14+
PredictionResponse,
15+
FileResponse,
16+
GenerationConfig,
17+
RequestMetadata,
18+
)
1419

1520

1621
class Predictions:
@@ -82,23 +87,19 @@ class ImagePredictions(Predictions):
8287
def generate(
8388
self,
8489
images: list[Path | Image.Image],
85-
model: str,
8690
domain: str,
87-
json_schema: dict | None = None,
88-
detail: str = "auto",
8991
batch: bool = False,
90-
metadata: dict = {},
92+
metadata: RequestMetadata | None = None,
93+
config: GenerationConfig | None = None,
9194
callback_url: str | None = None,
9295
) -> PredictionResponse:
9396
"""Generate a document prediction.
9497
9598
Args:
9699
images: List of images to generate predictions from
97-
model: Model to use for prediction
98100
domain: Domain to use for prediction
99-
json_schema: JSON schema to use for prediction
100-
detail: Detail level for prediction
101101
batch: Whether to run prediction in batch mode
102+
config: GenerateConfig to use for prediction
102103
metadata: Metadata to include in prediction
103104
callback_url: URL to call when prediction is complete
104105
@@ -117,18 +118,20 @@ def generate(
117118
else:
118119
raise ValueError("Image must be a path or a PIL Image")
119120

121+
additional_kwargs = {}
122+
if config:
123+
additional_kwargs["config"] = config.model_dump()
124+
if metadata:
125+
additional_kwargs["metadata"] = metadata.model_dump()
120126
response, status_code, headers = self._requestor.request(
121127
method="POST",
122128
url="image/generate",
123129
data={
124130
"image": encode_image(images[0], format="JPEG"),
125-
"model": model,
126131
"domain": domain,
127-
"json_schema": json_schema,
128-
"detail": detail,
129132
"batch": batch,
130-
"metadata": metadata,
131133
"callback_url": callback_url,
134+
**additional_kwargs,
132135
},
133136
)
134137
if not isinstance(response, dict):
@@ -144,64 +147,75 @@ class _FilePredictions(Predictions):
144147

145148
def generate(
146149
self,
147-
file_or_url: str | Path,
148-
model: str,
149-
domain: str,
150-
json_schema: dict | None = None,
151-
detail: str = "auto",
150+
file: Path | str | None = None,
151+
url: str | None = None,
152+
domain: str | None = None,
152153
batch: bool = False,
153-
metadata: dict = {},
154+
config: GenerationConfig | None = GenerationConfig(),
155+
metadata: RequestMetadata | None = RequestMetadata(),
154156
callback_url: str | None = None,
155157
) -> PredictionResponse:
156158
"""Generate a document prediction.
157159
158160
Args:
159-
file_or_url: File (pathlib.Path) or file_id or URL to generate prediction from
160-
model: Model to use for prediction
161+
file: File (pathlib.Path) or file_id to generate prediction from
162+
url: URL to generate prediction from
161163
domain: Domain to use for prediction
162-
json_schema: JSON schema to use for prediction
163-
detail: Detail level for prediction
164164
batch: Whether to run prediction in batch mode
165+
config: GenerateConfig to use for prediction
165166
metadata: Metadata to include in prediction
166167
callback_url: URL to call when prediction is complete
167168
168169
Returns:
169170
PredictionResponse: Prediction response
170171
"""
171172
is_url = False
172-
if isinstance(file_or_url, Path):
173-
logger.debug(
174-
f"Uploading file [path={file_or_url}, size={file_or_url.stat().st_size / 1024 / 1024:.2f} MB] to VLM Run"
175-
)
176-
upload_response, _, _ = self._client.files.upload(
177-
file=file_or_url, purpose="assistants"
178-
)
179-
if not isinstance(upload_response, dict):
180-
raise TypeError("Expected dict response")
181-
response = FileResponse(**upload_response)
182-
logger.debug(
183-
f"Uploaded file [file_id={response.id}, name={response.filename}]"
184-
)
185-
file_or_url = response.id
186-
elif isinstance(file_or_url, str):
187-
is_url = str(file_or_url).startswith(("http://", "https://"))
173+
if not file and not url:
174+
raise ValueError("Either `file` or `url` must be provided")
175+
if file and url:
176+
raise ValueError("Only one of `file` or `url` can be provided")
177+
if file:
178+
if isinstance(file, Path) or (
179+
isinstance(file, str) and Path(file).suffix
180+
):
181+
logger.debug(
182+
f"Uploading file [path={file}, size={file.stat().st_size / 1024 / 1024:.2f} MB] to VLM Run"
183+
)
184+
response: FileResponse = self._client.files.upload(
185+
file=Path(file), purpose="assistants"
186+
)
187+
logger.debug(
188+
f"Uploaded file [file_id={response.id}, name={response.filename}]"
189+
)
190+
file_or_url = response.id
191+
elif isinstance(file, str):
192+
logger.debug(f"Using file_id [file_id={file}]")
193+
assert not Path(file).suffix, "File must not have an extension"
194+
file_or_url = file
195+
else:
196+
raise ValueError("File must be a pathlib.Path or a string")
197+
elif url:
198+
is_url = True
199+
file_or_url = url
188200
else:
189201
raise ValueError(
190202
"File or URL must be a pathlib.Path, str, or AnyHttpUrl"
191203
)
192204

205+
additional_kwargs = {}
206+
if config:
207+
additional_kwargs["config"] = config.model_dump()
208+
if metadata:
209+
additional_kwargs["metadata"] = metadata.model_dump()
193210
response, status_code, headers = self._requestor.request(
194211
method="POST",
195212
url=f"{route}/generate",
196213
data={
197214
"url" if is_url else "file_id": file_or_url,
198-
"model": model,
199215
"domain": domain,
200-
"json_schema": json_schema,
201-
"detail": detail,
202216
"batch": batch,
203-
"metadata": metadata,
204217
"callback_url": callback_url,
218+
**additional_kwargs,
205219
},
206220
)
207221
if not isinstance(response, dict):

0 commit comments

Comments
 (0)