Files
taller-wox/app/benefits_api.py
farentsen 715a6ca370 fix(api): tolerate column aliases (procedure_name → procedure) + Spanish value mappings
El LLM de watsonx Orchestrate estaba alucinando nombres de columnas
(procedure_name en lugar de procedure) y los valores de procedimientos
en español sin traducir. Dos fixes:

1. Backend (benefits_api.py): mapas _COLUMN_ALIASES y _VALUE_ALIASES
   resuelven aliases comunes (procedure_name, name, plan, etc.) y
   traducciones ES→EN (radiografía → X Ray, resonancia → MRI, etc.)
   antes de aplicar el filtro. Mensajes de error ahora listan columnas
   válidas para que el agente se corrija solo.

2. OpenAPI spec (yaml + json): description de cada operación ahora
   enumera explícitamente las columnas válidas y los valores válidos
   del campo procedure, más una sección VALUE MAPPING ES→EN para que
   el LLM no tenga que adivinar.
2026-05-13 14:51:31 +00:00

171 lines
5.1 KiB
Python

import json
from pathlib import Path
import pandas as pd
from fastapi import APIRouter, HTTPException
router = APIRouter(prefix="/api", tags=["benefits"])
_DATA_DIR = Path(__file__).parent / "data"
with (_DATA_DIR / "member_insights.json").open() as f:
_MEMBER_INSIGHTS = json.load(f)
_SCHEDULE_TEXT = (_DATA_DIR / "schedule_response.txt").read_text(encoding="utf-8").strip()
_HISTORICAL = pd.read_csv(_DATA_DIR / "historical_procedures.csv")
_AVAILABLE = pd.read_csv(_DATA_DIR / "available_procedures.csv")
_OPS = {
"equals": lambda s, v: s == v,
"ne": lambda s, v: s != v,
"contains": lambda s, v: s.astype(str).str.contains(str(v), case=False, na=False),
"gt": lambda s, v: s > v,
"lt": lambda s, v: s < v,
"ge": lambda s, v: s >= v,
"le": lambda s, v: s <= v,
}
_COLUMN_ALIASES = {
"procedure_name": "procedure",
"procedurename": "procedure",
"proc": "procedure",
"name": "member_name",
"member": "member_name",
"membername": "member_name",
"location_name": "location",
"provider": "location",
"facility": "location",
"rating": "facility_rating",
"facilityrating": "facility_rating",
"distance": "distance_miles",
"plan": "member_plan",
"memberplan": "member_plan",
"type": "procedure_type",
"proceduretype": "procedure_type",
"cost": "total_cost",
"totalcost": "total_cost",
"price": "total_cost",
"in_network_only": "in_network",
"innetwork": "in_network",
}
_VALUE_ALIASES = {
"xray": "X Ray",
"x-ray": "X Ray",
"radiografia": "X Ray",
"radiografía": "X Ray",
"radiografias": "X Ray",
"radiografías": "X Ray",
"resonancia": "MRI",
"resonancia magnetica": "MRI",
"resonancia magnética": "MRI",
"tomografia": "CT Scan",
"tomografía": "CT Scan",
"ct": "CT Scan",
"limpieza dental": "Dental Cleaning",
"limpieza": "Dental Cleaning",
"examen visual": "Vision Exam",
"examen de la vista": "Vision Exam",
"vision": "Vision Exam",
"chequeo anual": "Annual Physical Exam",
"examen anual": "Annual Physical Exam",
"fisico anual": "Annual Physical Exam",
"apendicectomia": "Appendectomy",
"apendicectomía": "Appendectomy",
"analisis de sangre": "Blood Test",
"análisis de sangre": "Blood Test",
}
def _normalize_column(col: str, valid_cols: list[str]) -> str | None:
if col is None:
return None
if col in valid_cols:
return col
lower = col.lower().replace(" ", "").replace("-", "_")
if lower in _COLUMN_ALIASES:
candidate = _COLUMN_ALIASES[lower]
if candidate in valid_cols:
return candidate
return None
def _normalize_value(val):
if not isinstance(val, str):
return val
lower = val.lower().strip()
if lower in _VALUE_ALIASES:
return _VALUE_ALIASES[lower]
return val
def _apply_filters_and_group(df: pd.DataFrame, filters_raw, group_by_raw) -> list[dict]:
try:
filters = json.loads(filters_raw) if isinstance(filters_raw, str) else (filters_raw or [])
group_by = json.loads(group_by_raw) if isinstance(group_by_raw, str) else (group_by_raw or [])
except json.JSONDecodeError as exc:
raise HTTPException(status_code=400, detail=f"Invalid JSON in filters or group_by: {exc}")
valid_cols = list(df.columns)
result = df.copy()
for f in filters:
raw_col, op, val = f.get("column"), f.get("operator"), f.get("value")
col = _normalize_column(raw_col, valid_cols)
if col is None:
raise HTTPException(
status_code=400,
detail=f"Unknown column: {raw_col}. Valid columns: {valid_cols}",
)
if op not in _OPS:
raise HTTPException(
status_code=400,
detail=f"Unsupported operator: {op}. Valid operators: {list(_OPS.keys())}",
)
norm_val = _normalize_value(val)
result = result[_OPS[op](result[col], norm_val)]
if group_by:
norm_group = [_normalize_column(c, valid_cols) for c in group_by]
missing = [orig for orig, n in zip(group_by, norm_group) if n is None]
if missing:
raise HTTPException(
status_code=400,
detail=f"Unknown group_by columns: {missing}. Valid columns: {valid_cols}",
)
numeric_cols = result.select_dtypes(include="number").columns.tolist()
result = result.groupby(norm_group)[numeric_cols].mean().reset_index()
return result.to_dict(orient="records")
@router.get("/member-insights")
def member_insights():
return _MEMBER_INSIGHTS
@router.get("/schedule")
def schedule():
return {"result": _SCHEDULE_TEXT}
@router.post("/historical-procedures")
def historical_procedures(payload: dict):
rows = _apply_filters_and_group(
_HISTORICAL,
payload.get("filters", "[]"),
payload.get("group_by", "[]"),
)
return {"result": rows}
@router.post("/available-procedures")
def available_procedures(payload: dict):
rows = _apply_filters_and_group(
_AVAILABLE,
payload.get("filters", "[]"),
payload.get("group_by", "[]"),
)
return {"result": rows}