El LLM de watsonx Orchestrate estaba alucinando nombres de columnas (procedure_name en lugar de procedure) y los valores de procedimientos en español sin traducir. Dos fixes: 1. Backend (benefits_api.py): mapas _COLUMN_ALIASES y _VALUE_ALIASES resuelven aliases comunes (procedure_name, name, plan, etc.) y traducciones ES→EN (radiografía → X Ray, resonancia → MRI, etc.) antes de aplicar el filtro. Mensajes de error ahora listan columnas válidas para que el agente se corrija solo. 2. OpenAPI spec (yaml + json): description de cada operación ahora enumera explícitamente las columnas válidas y los valores válidos del campo procedure, más una sección VALUE MAPPING ES→EN para que el LLM no tenga que adivinar.
171 lines
5.1 KiB
Python
171 lines
5.1 KiB
Python
import json
|
|
from pathlib import Path
|
|
|
|
import pandas as pd
|
|
from fastapi import APIRouter, HTTPException
|
|
|
|
router = APIRouter(prefix="/api", tags=["benefits"])
|
|
|
|
_DATA_DIR = Path(__file__).parent / "data"
|
|
|
|
with (_DATA_DIR / "member_insights.json").open() as f:
|
|
_MEMBER_INSIGHTS = json.load(f)
|
|
|
|
_SCHEDULE_TEXT = (_DATA_DIR / "schedule_response.txt").read_text(encoding="utf-8").strip()
|
|
|
|
_HISTORICAL = pd.read_csv(_DATA_DIR / "historical_procedures.csv")
|
|
_AVAILABLE = pd.read_csv(_DATA_DIR / "available_procedures.csv")
|
|
|
|
|
|
_OPS = {
|
|
"equals": lambda s, v: s == v,
|
|
"ne": lambda s, v: s != v,
|
|
"contains": lambda s, v: s.astype(str).str.contains(str(v), case=False, na=False),
|
|
"gt": lambda s, v: s > v,
|
|
"lt": lambda s, v: s < v,
|
|
"ge": lambda s, v: s >= v,
|
|
"le": lambda s, v: s <= v,
|
|
}
|
|
|
|
_COLUMN_ALIASES = {
|
|
"procedure_name": "procedure",
|
|
"procedurename": "procedure",
|
|
"proc": "procedure",
|
|
"name": "member_name",
|
|
"member": "member_name",
|
|
"membername": "member_name",
|
|
"location_name": "location",
|
|
"provider": "location",
|
|
"facility": "location",
|
|
"rating": "facility_rating",
|
|
"facilityrating": "facility_rating",
|
|
"distance": "distance_miles",
|
|
"plan": "member_plan",
|
|
"memberplan": "member_plan",
|
|
"type": "procedure_type",
|
|
"proceduretype": "procedure_type",
|
|
"cost": "total_cost",
|
|
"totalcost": "total_cost",
|
|
"price": "total_cost",
|
|
"in_network_only": "in_network",
|
|
"innetwork": "in_network",
|
|
}
|
|
|
|
_VALUE_ALIASES = {
|
|
"xray": "X Ray",
|
|
"x-ray": "X Ray",
|
|
"radiografia": "X Ray",
|
|
"radiografía": "X Ray",
|
|
"radiografias": "X Ray",
|
|
"radiografías": "X Ray",
|
|
"resonancia": "MRI",
|
|
"resonancia magnetica": "MRI",
|
|
"resonancia magnética": "MRI",
|
|
"tomografia": "CT Scan",
|
|
"tomografía": "CT Scan",
|
|
"ct": "CT Scan",
|
|
"limpieza dental": "Dental Cleaning",
|
|
"limpieza": "Dental Cleaning",
|
|
"examen visual": "Vision Exam",
|
|
"examen de la vista": "Vision Exam",
|
|
"vision": "Vision Exam",
|
|
"chequeo anual": "Annual Physical Exam",
|
|
"examen anual": "Annual Physical Exam",
|
|
"fisico anual": "Annual Physical Exam",
|
|
"apendicectomia": "Appendectomy",
|
|
"apendicectomía": "Appendectomy",
|
|
"analisis de sangre": "Blood Test",
|
|
"análisis de sangre": "Blood Test",
|
|
}
|
|
|
|
|
|
def _normalize_column(col: str, valid_cols: list[str]) -> str | None:
|
|
if col is None:
|
|
return None
|
|
if col in valid_cols:
|
|
return col
|
|
lower = col.lower().replace(" ", "").replace("-", "_")
|
|
if lower in _COLUMN_ALIASES:
|
|
candidate = _COLUMN_ALIASES[lower]
|
|
if candidate in valid_cols:
|
|
return candidate
|
|
return None
|
|
|
|
|
|
def _normalize_value(val):
|
|
if not isinstance(val, str):
|
|
return val
|
|
lower = val.lower().strip()
|
|
if lower in _VALUE_ALIASES:
|
|
return _VALUE_ALIASES[lower]
|
|
return val
|
|
|
|
|
|
def _apply_filters_and_group(df: pd.DataFrame, filters_raw, group_by_raw) -> list[dict]:
|
|
try:
|
|
filters = json.loads(filters_raw) if isinstance(filters_raw, str) else (filters_raw or [])
|
|
group_by = json.loads(group_by_raw) if isinstance(group_by_raw, str) else (group_by_raw or [])
|
|
except json.JSONDecodeError as exc:
|
|
raise HTTPException(status_code=400, detail=f"Invalid JSON in filters or group_by: {exc}")
|
|
|
|
valid_cols = list(df.columns)
|
|
result = df.copy()
|
|
for f in filters:
|
|
raw_col, op, val = f.get("column"), f.get("operator"), f.get("value")
|
|
col = _normalize_column(raw_col, valid_cols)
|
|
if col is None:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Unknown column: {raw_col}. Valid columns: {valid_cols}",
|
|
)
|
|
if op not in _OPS:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Unsupported operator: {op}. Valid operators: {list(_OPS.keys())}",
|
|
)
|
|
norm_val = _normalize_value(val)
|
|
result = result[_OPS[op](result[col], norm_val)]
|
|
|
|
if group_by:
|
|
norm_group = [_normalize_column(c, valid_cols) for c in group_by]
|
|
missing = [orig for orig, n in zip(group_by, norm_group) if n is None]
|
|
if missing:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Unknown group_by columns: {missing}. Valid columns: {valid_cols}",
|
|
)
|
|
numeric_cols = result.select_dtypes(include="number").columns.tolist()
|
|
result = result.groupby(norm_group)[numeric_cols].mean().reset_index()
|
|
|
|
return result.to_dict(orient="records")
|
|
|
|
|
|
@router.get("/member-insights")
|
|
def member_insights():
|
|
return _MEMBER_INSIGHTS
|
|
|
|
|
|
@router.get("/schedule")
|
|
def schedule():
|
|
return {"result": _SCHEDULE_TEXT}
|
|
|
|
|
|
@router.post("/historical-procedures")
|
|
def historical_procedures(payload: dict):
|
|
rows = _apply_filters_and_group(
|
|
_HISTORICAL,
|
|
payload.get("filters", "[]"),
|
|
payload.get("group_by", "[]"),
|
|
)
|
|
return {"result": rows}
|
|
|
|
|
|
@router.post("/available-procedures")
|
|
def available_procedures(payload: dict):
|
|
rows = _apply_filters_and_group(
|
|
_AVAILABLE,
|
|
payload.get("filters", "[]"),
|
|
payload.get("group_by", "[]"),
|
|
)
|
|
return {"result": rows}
|