| | import os |
| | from fastapi import FastAPI, Request |
| | from fastapi.responses import JSONResponse, FileResponse |
| | from huggingface_hub import InferenceClient |
| |
|
| | app = FastAPI() |
| |
|
| | HF_TOKEN = os.getenv("HF_TOKEN") |
| | MODEL_ID = "huihui-ai/Qwen2.5-72B-Instruct-abliterated" |
| | client = InferenceClient(token=HF_TOKEN) |
| |
|
| | @app.get("/") |
| | async def serve_index(): |
| | |
| | return FileResponse("index.html") |
| |
|
| | @app.get("/static/styles.css") |
| | async def serve_css(): |
| | |
| | return FileResponse("styles.css") |
| |
|
| | @app.post("/chat") |
| | async def chat_handler(request: Request): |
| | try: |
| | body = await request.json() |
| | messages = body.get("messages", []) |
| | response = client.chat_completion(model=MODEL_ID, messages=messages, max_tokens=1500) |
| | return JSONResponse({"choices": [{"message": {"role": "assistant", "content": response.choices[0].message.content}}]}) |
| | except Exception as e: |
| | return JSONResponse({"error": str(e)}, status_code=500) |
| |
|