| | from pymilvus import MilvusClient, AnnSearchRequest, RRFRanker |
| | from langchain_community.embeddings.ollama import OllamaEmbeddings |
| | from pymilvus import WeightedRanker |
| |
|
| | reranker = RRFRanker(k=10) |
| |
|
| | """ |
| | embed_model = OllamaEmbeddings(model="bge-m3") |
| | client = MilvusClient(uri="http://192.168.5.103:19530") |
| | |
| | |
| | query = "Can I take pills?" |
| | query_embedding = embed_model.embed_query(query) |
| | |
| | # single vector search |
| | res = client.search( |
| | collection_name="t_sur_sex_ed_article_spider", |
| | data=[query_embedding], |
| | limit=2, |
| | search_params={"metric_type": "COSINE", "params": {}}, |
| | anns_field="chunk_vector", |
| | output_fields=["title", "chunk", "link", "category"] |
| | ) |
| | """ |
| |
|
| | |
| | def hybrid_search(query, embed_model: OllamaEmbeddings, collection_name, client: MilvusClient): |
| | query_embedding = embed_model.embed_query(query) |
| | search_param1 = { |
| | "data":[query_embedding], |
| | "anns_field": "title_vector", |
| | "param":{ |
| | "metric_type": "COSINE", |
| | "params": {"nprobe":10, "level": 3} |
| | }, |
| | "limit": 3 |
| | } |
| | request1 = AnnSearchRequest(**search_param1) |
| |
|
| | search_param2 = { |
| | "data":[query_embedding], |
| | "anns_field": "chunk_vector", |
| | "param":{ |
| | "metric_type": "COSINE", |
| | "params": {"nprobe":10, "level": 3} |
| | }, |
| | "limit": 3 |
| | } |
| | request2 = AnnSearchRequest(**search_param2) |
| | |
| | search_param3 = { |
| | "data":[query_embedding], |
| | "anns_field": "tags", |
| | "param":{ |
| | "metric_type": "COSINE", |
| | "params": {"nprobe":10, "level": 3} |
| | }, |
| | "limit": 3 |
| | } |
| | request3 = AnnSearchRequest(**search_param3) |
| | |
| | candidates = [request1, request2, request3] |
| | |
| | rerank = WeightedRanker(0.3, 0.6, 0.1) |
| | res = client.hybrid_search( |
| | collection_name=collection_name, |
| | ranker=rerank, |
| | reqs=candidates, |
| | limit=3, |
| | output_fields=["title", "chunk", "link"] |
| | ) |
| | |
| | return res |
| | |
| |
|
| | def single_vector_search(query, embed_model: OllamaEmbeddings, collection_name, client: MilvusClient, anns_field): |
| | query_embedding = embed_model.embed_query(query) |
| | res = client.search( |
| | collection_name=collection_name, |
| | data=[query_embedding], |
| | limit=20, |
| | search_params={"metric_type": "COSINE", "params": {}}, |
| | anns_field=anns_field, |
| | filter="content_type == 'A'", |
| | output_fields=["title", "content", "url", "content_type", "likes", "dislikes"] |
| | )[0] |
| | res = sorted(res, key=lambda x: x["entity"]["likes"]/(x["entity"]["dislikes"] + 1), reverse=True) |
| | return res |
| | |
| |
|
| | if __name__ == "__main__": |
| | embed_model = OllamaEmbeddings(model="bge-m3") |
| | client = MilvusClient(uri="http://192.168.5.103:19530") |
| |
|
| | query = "How to make a good blow job" |
| | search_res = hybrid_search(query, embed_model, "t_sur_sex_ed_article_spider", client)[0] |
| | |
| | for res in search_res: |
| | print(res["entity"]["chunk"]) |
| | print("\n #############################") |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|