{
  "steward": "python-resilience-steward",
  "project": "Azure-AI-RAG-CSharp-Semantic-Kernel-Functions",
  "runDate": "2026-03-22",
  "runId": "2026-03-22T00-00-00",
  "findings": [
    {
      "id": "PYRES-RETRY-001",
      "title": "No retry policy in host.json",
      "severity": "critical",
      "category": "RETRY",
      "file": "src/DocumentLoaderFunction/host.json",
      "description": "host.json has no retry section and the Loader function has no @retry decorator. Transient failures (network errors, rate limits, API timeouts) will cause permanent document loss — Azure Functions will not retry the invocation.",
      "recommendation": "Add a retry block to host.json with strategy fixedDelay, maxRetryCount 5, and delayInterval 00:00:30.",
      "status": "open"
    },
    {
      "id": "PYRES-ERR-001",
      "title": "All exceptions swallowed — function always returns success on failure",
      "severity": "critical",
      "category": "ERR",
      "file": "src/DocumentLoaderFunction/function_app.py",
      "line": 95,
      "description": "Both except blocks in Loader (json.JSONDecodeError and Exception) log the error but do not re-raise. Azure Functions sees a normal return and considers the invocation successful. Even if a retry policy were configured, it would never activate because no failure is propagated.",
      "recommendation": "Add 'raise' after logging in both except handlers so the Azure Functions retry policy can activate on transient failures.",
      "status": "open"
    },
    {
      "id": "PYRES-ERR-002",
      "title": "Blob delete executed outside try block — undefined variable reference on error path",
      "severity": "critical",
      "category": "ERR",
      "file": "src/DocumentLoaderFunction/function_app.py",
      "line": 99,
      "description": "Lines 99-101 (blob_client creation and delete_blob) are outside the try/except block. On the error path, blob_service_client and container_name are unbound, causing UnboundLocalError. This accidentally prevents premature deletion but produces a confusing secondary error that masks the root cause.",
      "recommendation": "Move blob cleanup inside the try block and use a finally block or explicit success flag to ensure delete only runs when processing succeeded.",
      "status": "open"
    },
    {
      "id": "PYRES-TIMEOUT-001",
      "title": "No timeout on embedding API call — function can block indefinitely",
      "severity": "critical",
      "category": "TIMEOUT",
      "file": "src/DocumentLoaderFunction/function_app.py",
      "line": 168,
      "description": "embed_query(content) has no explicit timeout. Under Azure OpenAI service degradation, the call can block for the entire functionTimeout duration (default 5 minutes), holding a worker slot without making progress. No timeout is set on any other external call either.",
      "recommendation": "Configure a 30-60 second timeout on AzureOpenAIEmbeddings via the request_timeout parameter, and set explicit timeouts on SearchClient and BlobServiceClient via their connection_timeout/read_timeout settings.",
      "status": "open"
    },
    {
      "id": "PYRES-TIMEOUT-002",
      "title": "functionTimeout not explicitly set in host.json",
      "severity": "notable",
      "category": "TIMEOUT",
      "file": "src/DocumentLoaderFunction/host.json",
      "description": "host.json does not declare functionTimeout. The effective timeout is the platform default (5 minutes on Consumption plan, unlimited on Dedicated/Premium). If the hosting plan changes, the timeout changes silently, potentially causing runaway invocations.",
      "recommendation": "Add 'functionTimeout': '00:05:00' (or the appropriate value) to host.json to make the timeout explicit and independent of hosting plan defaults.",
      "status": "open"
    },
    {
      "id": "PYRES-CLEANUP-001",
      "title": "Blob clients and search clients never explicitly closed",
      "severity": "notable",
      "category": "CLEANUP",
      "file": "src/DocumentLoaderFunction/function_app.py",
      "line": 85,
      "description": "BlobServiceClient, SearchClient, and SearchIndexClient are created on every invocation and never closed. Azure SDK clients hold HTTP connection pools. In a long-lived worker processing many blobs, connections may accumulate.",
      "recommendation": "Close clients in a finally block using client.close(), or move them to module-level singletons to share connection pools across invocations.",
      "status": "open"
    },
    {
      "id": "PYRES-DLQ-001",
      "title": "No dead-letter handling for permanently unprocessable blobs",
      "severity": "notable",
      "category": "DLQ",
      "file": "src/DocumentLoaderFunction/function_app.py",
      "description": "If a blob is permanently unprocessable (malformed HTML, invalid reference code, persistent API errors), it remains in the load container silently. There is no mechanism to move it to a dead-letter location, preventing operator visibility and manual reprocessing.",
      "recommendation": "On permanent failure (after max retries exhausted or for non-retriable errors), move the blob to a dead-letter container with error metadata tags.",
      "status": "open"
    },
    {
      "id": "PYRES-ERR-003",
      "title": "Index existence check not guarded against non-ResourceNotFoundError exceptions",
      "severity": "notable",
      "category": "ERR",
      "file": "src/DocumentLoaderFunction/function_app.py",
      "line": 120,
      "description": "populate_search_index catches only ResourceNotFoundError from get_index. If a network error or service error occurs, index_exists stays False and create_index is called, which will fail with a conflict or network error instead of the true root cause.",
      "recommendation": "Re-raise unexpected exceptions from get_index so callers see the real error rather than a misleading create_index failure.",
      "status": "open"
    },
    {
      "id": "PYRES-RETRY-002",
      "title": "AzureOpenAIEmbeddings client created on every invocation — no connection reuse",
      "severity": "minor",
      "category": "RETRY",
      "file": "src/DocumentLoaderFunction/function_app.py",
      "line": 72,
      "description": "AzureOpenAIEmbeddings is instantiated inside the Loader function handler. Each invocation creates a new HTTP client and connection pool, adding latency and resource overhead.",
      "recommendation": "Move AzureOpenAIEmbeddings instantiation to module level (with lazy initialisation if configuration is only available at runtime) so the HTTP connection pool is shared across invocations.",
      "status": "open"
    },
    {
      "id": "PYRES-IDEM-001",
      "title": "AI Search upload_documents is effectively idempotent via key-based replace",
      "severity": "info",
      "category": "IDEM",
      "file": "src/DocumentLoaderFunction/function_app.py",
      "line": 178,
      "description": "Azure AI Search upload_documents replaces existing documents with the same key (reference_code). Re-processing the same blob overwrites rather than duplicates the document. This is correct resilience behaviour.",
      "recommendation": "No action required. Continue using upload (replace) semantics.",
      "status": "open"
    },
    {
      "id": "PYRES-IDEM-002",
      "title": "Blob copy to completed container uses overwrite=True — idempotent on retry",
      "severity": "info",
      "category": "IDEM",
      "file": "src/DocumentLoaderFunction/function_app.py",
      "line": 90,
      "description": "upload_blob(..., overwrite=True) ensures that re-processing the same blob on retry does not leave duplicate or conflicting blobs in the completed container. This is correct behaviour.",
      "recommendation": "No action required.",
      "status": "open"
    }
  ],
  "summary": {
    "critical": 4,
    "notable": 4,
    "minor": 1,
    "info": 2,
    "total": 11
  }
}
