fix(api): tolerate column aliases (procedure_name → procedure) + Spanish value mappings

El LLM de watsonx Orchestrate estaba alucinando nombres de columnas
(procedure_name en lugar de procedure) y los valores de procedimientos
en español sin traducir. Dos fixes:

1. Backend (benefits_api.py): mapas _COLUMN_ALIASES y _VALUE_ALIASES
   resuelven aliases comunes (procedure_name, name, plan, etc.) y
   traducciones ES→EN (radiografía → X Ray, resonancia → MRI, etc.)
   antes de aplicar el filtro. Mensajes de error ahora listan columnas
   válidas para que el agente se corrija solo.

2. OpenAPI spec (yaml + json): description de cada operación ahora
   enumera explícitamente las columnas válidas y los valores válidos
   del campo procedure, más una sección VALUE MAPPING ES→EN para que
   el LLM no tenga que adivinar.
This commit is contained in:
2026-05-13 14:51:31 +00:00
parent 1f2ad8d235
commit 715a6ca370
3 changed files with 501 additions and 353 deletions

View File

@@ -27,6 +27,79 @@ _OPS = {
"le": lambda s, v: s <= v,
}
_COLUMN_ALIASES = {
"procedure_name": "procedure",
"procedurename": "procedure",
"proc": "procedure",
"name": "member_name",
"member": "member_name",
"membername": "member_name",
"location_name": "location",
"provider": "location",
"facility": "location",
"rating": "facility_rating",
"facilityrating": "facility_rating",
"distance": "distance_miles",
"plan": "member_plan",
"memberplan": "member_plan",
"type": "procedure_type",
"proceduretype": "procedure_type",
"cost": "total_cost",
"totalcost": "total_cost",
"price": "total_cost",
"in_network_only": "in_network",
"innetwork": "in_network",
}
_VALUE_ALIASES = {
"xray": "X Ray",
"x-ray": "X Ray",
"radiografia": "X Ray",
"radiografía": "X Ray",
"radiografias": "X Ray",
"radiografías": "X Ray",
"resonancia": "MRI",
"resonancia magnetica": "MRI",
"resonancia magnética": "MRI",
"tomografia": "CT Scan",
"tomografía": "CT Scan",
"ct": "CT Scan",
"limpieza dental": "Dental Cleaning",
"limpieza": "Dental Cleaning",
"examen visual": "Vision Exam",
"examen de la vista": "Vision Exam",
"vision": "Vision Exam",
"chequeo anual": "Annual Physical Exam",
"examen anual": "Annual Physical Exam",
"fisico anual": "Annual Physical Exam",
"apendicectomia": "Appendectomy",
"apendicectomía": "Appendectomy",
"analisis de sangre": "Blood Test",
"análisis de sangre": "Blood Test",
}
def _normalize_column(col: str, valid_cols: list[str]) -> str | None:
if col is None:
return None
if col in valid_cols:
return col
lower = col.lower().replace(" ", "").replace("-", "_")
if lower in _COLUMN_ALIASES:
candidate = _COLUMN_ALIASES[lower]
if candidate in valid_cols:
return candidate
return None
def _normalize_value(val):
if not isinstance(val, str):
return val
lower = val.lower().strip()
if lower in _VALUE_ALIASES:
return _VALUE_ALIASES[lower]
return val
def _apply_filters_and_group(df: pd.DataFrame, filters_raw, group_by_raw) -> list[dict]:
try:
@@ -35,21 +108,34 @@ def _apply_filters_and_group(df: pd.DataFrame, filters_raw, group_by_raw) -> lis
except json.JSONDecodeError as exc:
raise HTTPException(status_code=400, detail=f"Invalid JSON in filters or group_by: {exc}")
valid_cols = list(df.columns)
result = df.copy()
for f in filters:
col, op, val = f.get("column"), f.get("operator"), f.get("value")
if col not in result.columns:
raise HTTPException(status_code=400, detail=f"Unknown column: {col}")
raw_col, op, val = f.get("column"), f.get("operator"), f.get("value")
col = _normalize_column(raw_col, valid_cols)
if col is None:
raise HTTPException(
status_code=400,
detail=f"Unknown column: {raw_col}. Valid columns: {valid_cols}",
)
if op not in _OPS:
raise HTTPException(status_code=400, detail=f"Unsupported operator: {op}")
result = result[_OPS[op](result[col], val)]
raise HTTPException(
status_code=400,
detail=f"Unsupported operator: {op}. Valid operators: {list(_OPS.keys())}",
)
norm_val = _normalize_value(val)
result = result[_OPS[op](result[col], norm_val)]
if group_by:
missing = [c for c in group_by if c not in result.columns]
norm_group = [_normalize_column(c, valid_cols) for c in group_by]
missing = [orig for orig, n in zip(group_by, norm_group) if n is None]
if missing:
raise HTTPException(status_code=400, detail=f"Unknown group_by columns: {missing}")
raise HTTPException(
status_code=400,
detail=f"Unknown group_by columns: {missing}. Valid columns: {valid_cols}",
)
numeric_cols = result.select_dtypes(include="number").columns.tolist()
result = result.groupby(group_by)[numeric_cols].mean().reset_index()
result = result.groupby(norm_group)[numeric_cols].mean().reset_index()
return result.to_dict(orient="records")