cdpearlman Cursor commited on
Commit
129c11e
·
1 Parent(s): 94b077a

Switch to cost-optimized paid models: Gemini 2.5 Flash Lite + text-embedding-3-small

Browse files
Files changed (3) hide show
  1. requirements.txt +1 -4
  2. todo.md +11 -10
  3. utils/openrouter_client.py +32 -86
requirements.txt CHANGED
@@ -19,7 +19,4 @@ pytest>=7.0.0
19
 
20
  # AI Chatbot dependencies (OpenRouter API)
21
  requests>=2.28.0
22
- python-dotenv>=1.0.0
23
-
24
- # Local embeddings (free, no API required)
25
- sentence-transformers>=2.2.0
 
19
 
20
  # AI Chatbot dependencies (OpenRouter API)
21
  requests>=2.28.0
22
+ python-dotenv>=1.0.0
 
 
 
todo.md CHANGED
@@ -171,13 +171,14 @@
171
  - [x] Update `requirements.txt`: remove `google-genai`, add `requests>=2.28.0`
172
  - [x] Environment variable: `GEMINI_API_KEY` → `OPENROUTER_API_KEY`
173
 
174
- ## Completed: Switch to Free Models
175
-
176
- - [x] Evaluate OpenRouter free models for chatbot use case
177
- - [x] Switch chat model: `google/gemini-2.0-flash-001` → `qwen/qwen3-next-80b-a3b-instruct:free`
178
- - [x] Implement local embeddings using `sentence-transformers` (all-MiniLM-L6-v2)
179
- - 384-dimensional embeddings, runs locally (free)
180
- - Lazy-loaded to avoid slow startup
181
- - [x] Remove OpenRouter embedding dependency (no free embedding models available)
182
- - [x] Add `sentence-transformers>=2.2.0` to requirements.txt
183
- - [x] Clear old embeddings cache (different dimensions: 1536 → 384)
 
 
171
  - [x] Update `requirements.txt`: remove `google-genai`, add `requests>=2.28.0`
172
  - [x] Environment variable: `GEMINI_API_KEY` → `OPENROUTER_API_KEY`
173
 
174
+ ## Completed: Switch to Paid OpenRouter Models (Cost-Optimized)
175
+
176
+ - [x] Evaluate OpenRouter models for chatbot use case (cost vs quality)
177
+ - [x] Switch chat model: `google/gemini-2.5-flash-lite`
178
+ - $0.10/$0.40 per 1M tokens (input/output)
179
+ - 1M context window, 318 tok/s, multimodal
180
+ - [x] Switch embedding model: `openai/text-embedding-3-small`
181
+ - $0.02 per 1M tokens
182
+ - 1536 dimensions, high quality
183
+ - [x] Remove local `sentence-transformers` dependency (simpler, no TF conflicts)
184
+ - [x] Estimated cost: ~$1.50/month for moderate usage
utils/openrouter_client.py CHANGED
@@ -1,8 +1,8 @@
1
  """
2
  OpenRouter API Client
3
 
4
- Wrapper for OpenRouter API providing text generation capabilities
5
- for the AI chatbot feature. Embeddings use local sentence-transformers (free).
6
 
7
  Uses the OpenAI-compatible API via requests.
8
  """
@@ -17,83 +17,11 @@ from typing import List, Dict, Optional
17
  # =============================================================================
18
  # Change these to switch models across the entire application
19
 
20
- DEFAULT_CHAT_MODEL = "qwen/qwen3-next-80b-a3b-instruct:free"
21
-
22
- # Local embedding model (runs locally, completely free)
23
- # all-MiniLM-L6-v2: 384 dimensions, fast, good quality for semantic search
24
- LOCAL_EMBEDDING_MODEL = "all-MiniLM-L6-v2"
25
-
26
- # =============================================================================
27
-
28
-
29
- # =============================================================================
30
- # LOCAL EMBEDDING SERVICE
31
- # =============================================================================
32
-
33
- class LocalEmbeddingService:
34
- """Local embedding service using sentence-transformers (free, no API required)."""
35
-
36
- _instance = None
37
- _model = None
38
-
39
- def __new__(cls):
40
- if cls._instance is None:
41
- cls._instance = super().__new__(cls)
42
- return cls._instance
43
-
44
- def _load_model(self):
45
- """Lazy load the embedding model."""
46
- if self._model is None:
47
- try:
48
- from sentence_transformers import SentenceTransformer
49
- print(f"Loading local embedding model: {LOCAL_EMBEDDING_MODEL}...")
50
- self._model = SentenceTransformer(LOCAL_EMBEDDING_MODEL)
51
- print("Local embedding model loaded successfully.")
52
- except ImportError:
53
- print("sentence-transformers not installed. Run: pip install sentence-transformers")
54
- return None
55
- except Exception as e:
56
- print(f"Error loading embedding model: {e}")
57
- return None
58
- return self._model
59
-
60
- def get_embedding(self, text: str) -> Optional[List[float]]:
61
- """
62
- Get embedding vector for text using local model.
63
-
64
- Args:
65
- text: Text to embed
66
-
67
- Returns:
68
- Embedding vector as list of floats, or None if failed
69
- """
70
- model = self._load_model()
71
- if model is None:
72
- return None
73
-
74
- try:
75
- embedding = model.encode(text, convert_to_numpy=True)
76
- return embedding.tolist()
77
- except Exception as e:
78
- print(f"Local embedding error: {e}")
79
- return None
80
-
81
- def get_query_embedding(self, query: str) -> Optional[List[float]]:
82
- """Get embedding for a query (same as document embedding for this model)."""
83
- return self.get_embedding(query)
84
-
85
-
86
- # Singleton instance for local embeddings
87
- _local_embedding_service: Optional[LocalEmbeddingService] = None
88
-
89
-
90
- def _get_local_embedding_service() -> LocalEmbeddingService:
91
- """Get or create the local embedding service."""
92
- global _local_embedding_service
93
- if _local_embedding_service is None:
94
- _local_embedding_service = LocalEmbeddingService()
95
- return _local_embedding_service
96
 
 
 
97
 
98
  # =============================================================================
99
 
@@ -280,9 +208,7 @@ class OpenRouterClient:
280
 
281
  def get_embedding(self, text: str) -> Optional[List[float]]:
282
  """
283
- Get embedding vector for text using local sentence-transformers model.
284
-
285
- Note: This uses a local model (free) instead of OpenRouter API.
286
 
287
  Args:
288
  text: Text to embed
@@ -290,12 +216,33 @@ class OpenRouterClient:
290
  Returns:
291
  Embedding vector as list of floats, or None if failed
292
  """
293
- service = _get_local_embedding_service()
294
- return service.get_embedding(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
295
 
296
  def get_query_embedding(self, query: str) -> Optional[List[float]]:
297
  """
298
- Get embedding vector for a query using local model.
 
 
 
299
 
300
  Args:
301
  query: Query text to embed
@@ -303,8 +250,7 @@ class OpenRouterClient:
303
  Returns:
304
  Embedding vector as list of floats, or None if failed
305
  """
306
- service = _get_local_embedding_service()
307
- return service.get_query_embedding(query)
308
 
309
 
310
  # Singleton instance
 
1
  """
2
  OpenRouter API Client
3
 
4
+ Wrapper for OpenRouter API providing text generation and embedding capabilities
5
+ for the AI chatbot feature.
6
 
7
  Uses the OpenAI-compatible API via requests.
8
  """
 
17
  # =============================================================================
18
  # Change these to switch models across the entire application
19
 
20
+ # Chat model: Gemini 2.5 Flash Lite - $0.10/$0.40 per 1M tokens, 1M context
21
+ DEFAULT_CHAT_MODEL = "google/gemini-2.5-flash-lite"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
+ # Embedding model: text-embedding-3-small - $0.02 per 1M tokens, 1536 dimensions
24
+ DEFAULT_EMBEDDING_MODEL = "openai/text-embedding-3-small"
25
 
26
  # =============================================================================
27
 
 
208
 
209
  def get_embedding(self, text: str) -> Optional[List[float]]:
210
  """
211
+ Get embedding vector for text using OpenRouter Embedding API.
 
 
212
 
213
  Args:
214
  text: Text to embed
 
216
  Returns:
217
  Embedding vector as list of floats, or None if failed
218
  """
219
+ if not self.is_available:
220
+ return None
221
+
222
+ try:
223
+ response = requests.post(
224
+ f"{OPENROUTER_BASE_URL}/embeddings",
225
+ headers=self._headers,
226
+ json={
227
+ "model": DEFAULT_EMBEDDING_MODEL,
228
+ "input": text
229
+ },
230
+ timeout=30
231
+ )
232
+ response.raise_for_status()
233
+
234
+ data = response.json()
235
+ return data["data"][0]["embedding"]
236
+ except Exception as e:
237
+ print(f"Embedding error: {e}")
238
+ return None
239
 
240
  def get_query_embedding(self, query: str) -> Optional[List[float]]:
241
  """
242
+ Get embedding vector for a query.
243
+
244
+ Note: OpenRouter doesn't have separate task types for embeddings,
245
+ so this calls the same endpoint as get_embedding.
246
 
247
  Args:
248
  query: Query text to embed
 
250
  Returns:
251
  Embedding vector as list of floats, or None if failed
252
  """
253
+ return self.get_embedding(query)
 
254
 
255
 
256
  # Singleton instance