fix(api): tolerate column aliases (procedure_name → procedure) + Spanish value mappings
El LLM de watsonx Orchestrate estaba alucinando nombres de columnas (procedure_name en lugar de procedure) y los valores de procedimientos en español sin traducir. Dos fixes: 1. Backend (benefits_api.py): mapas _COLUMN_ALIASES y _VALUE_ALIASES resuelven aliases comunes (procedure_name, name, plan, etc.) y traducciones ES→EN (radiografía → X Ray, resonancia → MRI, etc.) antes de aplicar el filtro. Mensajes de error ahora listan columnas válidas para que el agente se corrija solo. 2. OpenAPI spec (yaml + json): description de cada operación ahora enumera explícitamente las columnas válidas y los valores válidos del campo procedure, más una sección VALUE MAPPING ES→EN para que el LLM no tenga que adivinar.
This commit is contained in:
@@ -27,6 +27,79 @@ _OPS = {
|
||||
"le": lambda s, v: s <= v,
|
||||
}
|
||||
|
||||
_COLUMN_ALIASES = {
|
||||
"procedure_name": "procedure",
|
||||
"procedurename": "procedure",
|
||||
"proc": "procedure",
|
||||
"name": "member_name",
|
||||
"member": "member_name",
|
||||
"membername": "member_name",
|
||||
"location_name": "location",
|
||||
"provider": "location",
|
||||
"facility": "location",
|
||||
"rating": "facility_rating",
|
||||
"facilityrating": "facility_rating",
|
||||
"distance": "distance_miles",
|
||||
"plan": "member_plan",
|
||||
"memberplan": "member_plan",
|
||||
"type": "procedure_type",
|
||||
"proceduretype": "procedure_type",
|
||||
"cost": "total_cost",
|
||||
"totalcost": "total_cost",
|
||||
"price": "total_cost",
|
||||
"in_network_only": "in_network",
|
||||
"innetwork": "in_network",
|
||||
}
|
||||
|
||||
_VALUE_ALIASES = {
|
||||
"xray": "X Ray",
|
||||
"x-ray": "X Ray",
|
||||
"radiografia": "X Ray",
|
||||
"radiografía": "X Ray",
|
||||
"radiografias": "X Ray",
|
||||
"radiografías": "X Ray",
|
||||
"resonancia": "MRI",
|
||||
"resonancia magnetica": "MRI",
|
||||
"resonancia magnética": "MRI",
|
||||
"tomografia": "CT Scan",
|
||||
"tomografía": "CT Scan",
|
||||
"ct": "CT Scan",
|
||||
"limpieza dental": "Dental Cleaning",
|
||||
"limpieza": "Dental Cleaning",
|
||||
"examen visual": "Vision Exam",
|
||||
"examen de la vista": "Vision Exam",
|
||||
"vision": "Vision Exam",
|
||||
"chequeo anual": "Annual Physical Exam",
|
||||
"examen anual": "Annual Physical Exam",
|
||||
"fisico anual": "Annual Physical Exam",
|
||||
"apendicectomia": "Appendectomy",
|
||||
"apendicectomía": "Appendectomy",
|
||||
"analisis de sangre": "Blood Test",
|
||||
"análisis de sangre": "Blood Test",
|
||||
}
|
||||
|
||||
|
||||
def _normalize_column(col: str, valid_cols: list[str]) -> str | None:
|
||||
if col is None:
|
||||
return None
|
||||
if col in valid_cols:
|
||||
return col
|
||||
lower = col.lower().replace(" ", "").replace("-", "_")
|
||||
if lower in _COLUMN_ALIASES:
|
||||
candidate = _COLUMN_ALIASES[lower]
|
||||
if candidate in valid_cols:
|
||||
return candidate
|
||||
return None
|
||||
|
||||
|
||||
def _normalize_value(val):
|
||||
if not isinstance(val, str):
|
||||
return val
|
||||
lower = val.lower().strip()
|
||||
if lower in _VALUE_ALIASES:
|
||||
return _VALUE_ALIASES[lower]
|
||||
return val
|
||||
|
||||
|
||||
def _apply_filters_and_group(df: pd.DataFrame, filters_raw, group_by_raw) -> list[dict]:
|
||||
try:
|
||||
@@ -35,21 +108,34 @@ def _apply_filters_and_group(df: pd.DataFrame, filters_raw, group_by_raw) -> lis
|
||||
except json.JSONDecodeError as exc:
|
||||
raise HTTPException(status_code=400, detail=f"Invalid JSON in filters or group_by: {exc}")
|
||||
|
||||
valid_cols = list(df.columns)
|
||||
result = df.copy()
|
||||
for f in filters:
|
||||
col, op, val = f.get("column"), f.get("operator"), f.get("value")
|
||||
if col not in result.columns:
|
||||
raise HTTPException(status_code=400, detail=f"Unknown column: {col}")
|
||||
raw_col, op, val = f.get("column"), f.get("operator"), f.get("value")
|
||||
col = _normalize_column(raw_col, valid_cols)
|
||||
if col is None:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Unknown column: {raw_col}. Valid columns: {valid_cols}",
|
||||
)
|
||||
if op not in _OPS:
|
||||
raise HTTPException(status_code=400, detail=f"Unsupported operator: {op}")
|
||||
result = result[_OPS[op](result[col], val)]
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Unsupported operator: {op}. Valid operators: {list(_OPS.keys())}",
|
||||
)
|
||||
norm_val = _normalize_value(val)
|
||||
result = result[_OPS[op](result[col], norm_val)]
|
||||
|
||||
if group_by:
|
||||
missing = [c for c in group_by if c not in result.columns]
|
||||
norm_group = [_normalize_column(c, valid_cols) for c in group_by]
|
||||
missing = [orig for orig, n in zip(group_by, norm_group) if n is None]
|
||||
if missing:
|
||||
raise HTTPException(status_code=400, detail=f"Unknown group_by columns: {missing}")
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Unknown group_by columns: {missing}. Valid columns: {valid_cols}",
|
||||
)
|
||||
numeric_cols = result.select_dtypes(include="number").columns.tolist()
|
||||
result = result.groupby(group_by)[numeric_cols].mean().reset_index()
|
||||
result = result.groupby(norm_group)[numeric_cols].mean().reset_index()
|
||||
|
||||
return result.to_dict(orient="records")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user