| | from ops_mm_embedding_v1 import OpsMMEmbeddingV1, fetch_image |
| |
|
| |
|
| | model = OpsMMEmbeddingV1( |
| | "OpenSearch-AI/Ops-MM-embedding-v1-7B", |
| | device="cuda", |
| | attn_implementation="flash_attention_2" |
| | ) |
| |
|
| | t2i_prompt = "Find an image that matches the given text." |
| | texts = [ |
| | "The Tesla Cybertruck is a battery electric pickup truck built by Tesla, Inc. since 2023.", |
| | "Alibaba office.", |
| | "Alibaba office.", |
| | ] |
| | images = [ |
| | "https://upload.wikimedia.org/wikipedia/commons/e/e9/Tesla_Cybertruck_damaged_window.jpg", |
| | "https://upload.wikimedia.org/wikipedia/commons/e/e0/TaobaoCity_Alibaba_Xixi_Park.jpg", |
| | "https://upload.wikimedia.org/wikipedia/commons/thumb/b/b0/Alibaba_Binjiang_Park.jpg/1024px-Alibaba_Binjiang_Park.jpg" |
| | ] |
| |
|
| | images = [fetch_image(image) for image in images] |
| |
|
| | |
| | text_embeddings = model.get_text_embeddings(texts) |
| | image_embeddings = model.get_image_embeddings(images) |
| | print('Text and image embeddings', (text_embeddings @ image_embeddings.T).tolist()) |
| |
|
| | |
| | text_with_image_embeddings = model.get_fused_embeddings(texts=texts, images=images, instruction=t2i_prompt) |
| | print('Text and image embeddings', (text_embeddings @ image_embeddings.T).tolist()) |
| |
|
| | |
| | multi_images = [ |
| | [images[0]], |
| | [images[1], images[2]], |
| | ] |
| | multi_image_embeddings = model.get_image_embeddings(multi_images) |
| | print('Multi-image embeddings', (multi_image_embeddings @ multi_image_embeddings.T).tolist()) |
| |
|