Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
8d67b7d
feat: add more robust handling for MM prompt
hhzhang16 Jun 4, 2025
b65efb5
feat: [WIP] generalize workers
hhzhang16 Jun 4, 2025
40c2154
Merge branch 'main' of github.com:ai-dynamo/dynamo into hannahz/dep-1…
hhzhang16 Jun 4, 2025
e13f827
feat: remove cls token
hhzhang16 Jun 4, 2025
a866d73
Merge branch 'main' of github.com:ai-dynamo/dynamo into hannahz/dep-1…
hhzhang16 Jun 4, 2025
0adb7e6
Merge branch 'main' of github.com:ai-dynamo/dynamo into hannahz/dep-1…
hhzhang16 Jun 5, 2025
86c6135
Merge branch 'main' of github.com:ai-dynamo/dynamo into hannahz/dep-1…
hhzhang16 Jun 5, 2025
19f2158
Merge branch 'main' of github.com:ai-dynamo/dynamo into hannahz/dep-1…
hhzhang16 Jun 6, 2025
a766509
feat: working multimodal agg for multiple vision models
hhzhang16 Jun 7, 2025
17aecda
Merge branch 'main' of github.com:ai-dynamo/dynamo into hannahz/dep-1…
hhzhang16 Jun 7, 2025
496ee57
feat: addressing ci comments
hhzhang16 Jun 9, 2025
820c7e3
feat: addressing ci comments
hhzhang16 Jun 9, 2025
bb4f95e
Merge branch 'main' of github.com:ai-dynamo/dynamo into hannahz/dep-1…
hhzhang16 Jun 9, 2025
027341a
Update examples/multimodal/README.md
hhzhang16 Jun 9, 2025
0eff4e0
feat: trust remote code when loading autoconfig
hhzhang16 Jun 9, 2025
d736895
feat: working code for phi3v
hhzhang16 Jun 10, 2025
36eacb9
docs: add phi3v to multimodal readme
hhzhang16 Jun 10, 2025
d586343
feat: working for Qwen 2.5 VL
hhzhang16 Jun 11, 2025
d5025a7
docs: fixing dash issue
hhzhang16 Jun 11, 2025
1b0efc0
Merge branch 'main' into hannahz/dep-114-generalize-vlm-embedding-ext…
hhzhang16 Jun 11, 2025
843d586
docs: add readme note about disagg support
hhzhang16 Jun 11, 2025
d12e86d
Merge branch 'hannahz/dep-114-generalize-vlm-embedding-extraction' of…
hhzhang16 Jun 11, 2025
073ad67
feat: remove pynvml from this MR
hhzhang16 Jun 11, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
feat: remove cls token
  • Loading branch information
hhzhang16 committed Jun 4, 2025
commit e13f827af465a422566b336708d87b5f0cb92ff1
6 changes: 3 additions & 3 deletions examples/multimodal/components/decode_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ async def async_init(self):

self.embedding_size = get_vision_embedding_size(self.engine_args.model)
else:
EMBEDDINGS_SHAPE = (1, 577, 4096)
EMBEDDINGS_SHAPE = (1, 576, 4096)
EMBEDDINGS_DTYPE = torch.float16
EMBEDDINGS_DEVICE = "cuda"

Expand Down Expand Up @@ -345,7 +345,7 @@ async def remote_prefill(self, request: vLLMMultimodalRequest) -> tuple:
# As a workaround, here we manually insert some placeholder dummy tokens based on the embedding size
# so that decode worker can pre-allocate the memory with the correct size.
# The structure of the prompt will be like: "\nUSER: <image> <dummy_tokens>\n<user_prompt>\nASSISTANT:".
# Since the "<image>" token is included in the prompt, only need to insert (embedding_size - 1) dummy tokens after the image token.
# Since the "<image>" token is included in the prompt, only need to insert embedding_size dummy tokens after the image token.
IMAGE_TOKEN_ID = 32000
DUMMY_TOKEN_ID = 0
# Find the index of the image token in the prompt token ids
Expand All @@ -355,7 +355,7 @@ async def remote_prefill(self, request: vLLMMultimodalRequest) -> tuple:
dummy_token_index = image_token_index + 1
prompt_ids = (
request.engine_prompt["prompt_token_ids"][:dummy_token_index]
+ [DUMMY_TOKEN_ID] * (self.embedding_size - 1)
+ [DUMMY_TOKEN_ID] * self.embedding_size
+ request.engine_prompt["prompt_token_ids"][dummy_token_index:]
)
logger.debug(
Expand Down
2 changes: 1 addition & 1 deletion examples/multimodal/components/encode_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ async def encode(self, request: EncodeRequest) -> AsyncIterator[EncodeResponse]:
with torch.no_grad():
embeddings = self.vision_model.get_multimodal_embeddings(**image_embeds)

logger.info(
logger.debug(
f"Embeddings: {{ shape: {embeddings.shape}, dtype: {embeddings.dtype}, device: {embeddings.device}, ptr: {embeddings.data_ptr()}, elements: {{ count: {embeddings.numel()}, size: {embeddings.element_size()} }} }}."
)

Expand Down
2 changes: 1 addition & 1 deletion examples/multimodal/components/prefill_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ async def generate(self, request: RemotePrefillRequest):
# TODO: make this more flexible/model-dependent
IMAGE_TOKEN_ID = 32000
embedding_size = embeddings.shape[1]
padding_size = embedding_size - 1
padding_size = embedding_size
image_token_index = request.prompt_token_ids.index(IMAGE_TOKEN_ID)
dummy_token_index = image_token_index + 1
prompt_token_ids = (
Expand Down
3 changes: 1 addition & 2 deletions examples/multimodal/utils/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,5 +80,4 @@ def get_vision_embedding_size(model_id: str) -> int:

num_patches = (h // ph) * (w // pw)

# 4. Add CLS token (standard in ViT architectures)
return num_patches + 1
return num_patches