from flask import Flask, request, jsonify
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from onex import OneXMonitor
app = Flask(__name__)
monitor = OneXMonitor(
api_key="your-api-key", # Retrieve from https://dashboard.observability.getonex.ai
endpoint="onex-ingestion-endpoint", # Same dashboard provides the ingestion URL
config={
"payload_sample_items": 5,
"payload_tensor_sample": 32,
"request_metadata": {"app": "bert-api"},
},
)
MODEL_NAME = "nlptown/bert-base-multilingual-uncased-sentiment"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(
MODEL_NAME,
output_hidden_states=True,
output_attentions=True,
)
model.eval()
model = monitor.watch(model)
@app.route("/predict", methods=["POST"])
def predict():
payload = request.json or {}
text = payload.get("text", "")
with monitor.request_context({"text": text}, metadata={"route": "/predict"}) as ctx:
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
with torch.no_grad():
outputs = model(**inputs)
probs = torch.softmax(outputs.logits, dim=-1)
rating = int(torch.argmax(probs).item() + 1)
confidence = float(torch.max(probs).item())
api_response = {"rating": rating, "confidence": confidence, "text": text}
ctx.record_response(api_response)
return jsonify(api_response)
if __name__ == "__main__":
app.run()