| from transformers import AutoTokenizer, AutoModelForSequenceClassification |
| import torch |
|
|
| class EndpointHandler: |
| def __init__(self, path=""): |
| |
| self.tokenizer = AutoTokenizer.from_pretrained(path) |
| self.model = AutoModelForSequenceClassification.from_pretrained(path) |
| self.model.eval() |
| self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
| self.model.to(self.device) |
|
|
| def __call__(self, data): |
| """ |
| This method is called when the endpoint receives a request. |
| Expected input: { "inputs": "some string" } or { "inputs": ["a", "b", ...] } |
| """ |
| inputs = data.get("inputs", None) |
|
|
| if inputs is None: |
| return {"error": "No input provided"} |
|
|
| if isinstance(inputs, str): |
| inputs = [inputs] |
|
|
| results = [] |
| for text in inputs: |
| encoded = self.tokenizer( |
| text, |
| return_tensors="pt", |
| truncation=True, |
| padding="max_length", |
| max_length=4096, |
| ) |
| encoded = {k: v.to(self.device) for k, v in encoded.items()} |
|
|
| with torch.no_grad(): |
| outputs = self.model(**encoded) |
|
|
| raw_score = outputs.logits.squeeze().item() |
| clipped_score = min(max(raw_score, 0.0), 1.0) |
|
|
| results.append({"score": round(clipped_score, 4)}) |
|
|
| return results |
|
|