{
  "steward": "python-best-practices-steward",
  "project": "Azure-AI-RAG-CSharp-Semantic-Kernel-Functions",
  "runDate": "2026-03-22",
  "runId": "2026-03-22T00-00-00",
  "findings": [
    {
      "id": "PYBP-ERRH-001",
      "title": "Cleanup code executes unconditionally after exception — risk of data loss",
      "severity": "critical",
      "category": "error-handling",
      "file": "src/DocumentLoaderFunction/function_app.py",
      "line": 99,
      "description": "The blob delete operation (lines 99-101) is outside the try/except block and runs unconditionally after any exception. If an exception is raised before the blob is successfully copied to the 'completed' container, the source blob is still deleted, resulting in irreversible data loss.",
      "recommendation": "Move the blob copy and delete operations inside the try block so they only execute on success. Use a finally block only for idempotent cleanup that is safe to run regardless of outcome.",
      "status": "open"
    },
    {
      "id": "PYBP-ERRH-002",
      "title": "Variables used outside try/except scope — NameError on exception path",
      "severity": "critical",
      "category": "error-handling",
      "file": "src/DocumentLoaderFunction/function_app.py",
      "line": 99,
      "description": "blob_service_client, container_name, and blob_client are assigned inside the try block (lines 85-89) but consumed at lines 99-101 which are outside the try/except. If any exception occurs before those assignments, a NameError is raised at the cleanup step, masking the original exception.",
      "recommendation": "Initialize blob_service_client, container_name, and blob_client to None before the try block, and guard the cleanup block with 'if blob_service_client is not None'.",
      "status": "open"
    },
    {
      "id": "PYBP-DEPS-001",
      "title": "Critical dependencies unpinned — langchain and langchain-openai prone to breaking changes",
      "severity": "critical",
      "category": "dependencies",
      "file": "src/DocumentLoaderFunction/requirements.txt",
      "description": "7 of 11 dependencies have no version pins, including langchain, langchain-openai, azure-functions, azure-storage-blob, and beautifulsoup4. LangChain in particular has a history of frequent breaking API changes between minor versions. Unpinned dependencies will cause non-deterministic deployments and unexpected breakage.",
      "recommendation": "Pin all dependencies with exact versions using '=='. Use pip-tools or uv to generate a fully locked requirements.txt from a fresh virtual environment.",
      "status": "open"
    },
    {
      "id": "PYBP-TYPE-001",
      "title": "No type annotations on public function signatures",
      "severity": "notable",
      "category": "type-annotations",
      "file": "src/DocumentLoaderFunction/function_app.py",
      "line": 34,
      "description": "The Loader trigger handler, html_to_json, and all AISearchIndexLoader methods lack type annotations on parameters and return types. This prevents static analysis tools (mypy, pyright) from catching type errors and makes the API surface harder to understand.",
      "recommendation": "Add type annotations: Loader(myblob: func.InputStream) -> None, html_to_json(html_content: bytes) -> tuple[dict, str], and annotate all AISearchIndexLoader method parameters and return types.",
      "status": "open"
    },
    {
      "id": "PYBP-TYPE-002",
      "title": "Unused typing imports (List, Optional) — annotations not applied",
      "severity": "notable",
      "category": "type-annotations",
      "file": "src/DocumentLoaderFunction/function_app.py",
      "line": 3,
      "description": "List and Optional are imported from typing but never used in any annotation in the file. This suggests annotations were planned but never implemented.",
      "recommendation": "Remove unused imports or apply them in type annotations as part of addressing PYBP-TYPE-001.",
      "status": "open"
    },
    {
      "id": "PYBP-DEPS-002",
      "title": "Unused dependencies in requirements.txt inflate deployment package",
      "severity": "notable",
      "category": "dependencies",
      "file": "src/DocumentLoaderFunction/requirements.txt",
      "description": "azure-keyvault-secrets, requests, and langchain-community are listed as dependencies but are not imported or used anywhere in function_app.py. These inflate the deployment package size and increase the attack surface unnecessarily.",
      "recommendation": "Remove azure-keyvault-secrets, requests, and langchain-community from requirements.txt if they are not used. Verify by searching all Python files in the project.",
      "status": "open"
    },
    {
      "id": "PYBP-DEPS-003",
      "title": "No dev dependency separation",
      "severity": "notable",
      "category": "dependencies",
      "file": "src/DocumentLoaderFunction/requirements.txt",
      "description": "There is no requirements-dev.txt, pyproject.toml with optional dev groups, or any mechanism to separate development dependencies (pytest, ruff, mypy) from production dependencies. This makes CI setup and local development setup ambiguous.",
      "recommendation": "Add a requirements-dev.txt or use pyproject.toml with a [project.optional-dependencies] dev group containing test and lint tooling (pytest, ruff or flake8, mypy or pyright).",
      "status": "open"
    },
    {
      "id": "PYBP-QUAL-001",
      "title": "logging module passed as constructor argument — fragile coupling",
      "severity": "notable",
      "category": "code-quality",
      "file": "src/DocumentLoaderFunction/function_app.py",
      "line": 107,
      "description": "The AISearchIndexLoader constructor accepts the logging module as a positional argument named 'logging', storing it as self.logger. This is unconventional, unannotated, and shadows the logging module import. The class should manage its own logger internally.",
      "recommendation": "Remove the logging parameter from AISearchIndexLoader.__init__ and replace with self.logger = logging.getLogger(__name__) inside the constructor.",
      "status": "open"
    },
    {
      "id": "PYBP-ERRH-003",
      "title": "raise ex loses original traceback — use bare raise",
      "severity": "notable",
      "category": "error-handling",
      "file": "src/DocumentLoaderFunction/function_app.py",
      "line": 185,
      "description": "In populate_search_index, the pattern 'except Exception as ex: ... raise ex' re-raises the exception but replaces the original traceback with one starting at this raise statement. This makes debugging significantly harder.",
      "recommendation": "Replace 'raise ex' with bare 'raise' to preserve the original call stack.",
      "status": "open"
    },
    {
      "id": "PYBP-QUAL-002",
      "title": "environ.get() and environ[] used inconsistently — silent None vs KeyError",
      "severity": "notable",
      "category": "code-quality",
      "file": "src/DocumentLoaderFunction/function_app.py",
      "line": 57,
      "description": "search_endpoint and index_name use environ[\"KEY\"] (raises KeyError on missing config), while Azure OpenAI settings use environ.get(\"KEY\") (returns None silently). The environ.get() calls will pass None into AzureOpenAIEmbeddings, producing obscure downstream errors instead of a clear missing-config message.",
      "recommendation": "Standardize on environ[\"KEY\"] for all required settings to fail fast with a clear error. Use environ.get(\"KEY\", default) only for genuinely optional settings with a safe default value.",
      "status": "open"
    },
    {
      "id": "PYBP-QUAL-003",
      "title": "Loader function name uses PascalCase — should be snake_case",
      "severity": "minor",
      "category": "code-quality",
      "file": "src/DocumentLoaderFunction/function_app.py",
      "line": 34,
      "description": "The function 'Loader' uses PascalCase, which is the Python convention for class names. PEP 8 requires function names to use snake_case. The Azure Functions SDK does not impose any casing requirement.",
      "recommendation": "Rename to 'loader' or 'blob_loader' to follow PEP 8.",
      "status": "open"
    },
    {
      "id": "PYBP-QUAL-004",
      "title": "Commented-out code blocks left in production file",
      "severity": "minor",
      "category": "code-quality",
      "file": "src/DocumentLoaderFunction/function_app.py",
      "line": 67,
      "description": "Lines 67, 123-126 contain commented-out code that was never removed. This adds noise and suggests incomplete or abandoned refactoring.",
      "recommendation": "Remove commented-out code. If the intent behind the comments needs to be preserved, move it to a JIRA/GitHub issue or decision log.",
      "status": "open"
    },
    {
      "id": "PYBP-QUAL-005",
      "title": "Magic strings for container names — use constants",
      "severity": "minor",
      "category": "code-quality",
      "file": "src/DocumentLoaderFunction/function_app.py",
      "line": 86,
      "description": "Container names 'load' and 'completed' appear as inline string literals. The 'load' container name is defined twice: once in the blob trigger decorator path and once at line 86. A drift between these two definitions would cause silent bugs.",
      "recommendation": "Extract BLOB_TRIGGER_CONTAINER = 'load' and BLOB_COMPLETED_CONTAINER = 'completed' as module-level constants and reference them in both the decorator path and the cleanup code.",
      "status": "open"
    },
    {
      "id": "PYBP-QUAL-006",
      "title": "Indentation inconsistencies — extra leading spaces",
      "severity": "minor",
      "category": "code-quality",
      "file": "src/DocumentLoaderFunction/function_app.py",
      "line": 84,
      "description": "Lines 84, 240, and 241 have extra leading spaces that do not match the surrounding indentation level, suggesting copy-paste editing artifacts. While Python does not raise an error if the logical indentation is consistent, these create visual noise and will be flagged by linters.",
      "recommendation": "Run ruff or flake8 with W (whitespace) rules to identify and auto-fix indentation inconsistencies.",
      "status": "open"
    },
    {
      "id": "PYBP-STRUCT-001",
      "title": "All application logic in a single file — should be modularized",
      "severity": "minor",
      "category": "project-structure",
      "file": "src/DocumentLoaderFunction/function_app.py",
      "description": "The trigger handler, HTML parsing logic, and Azure AI Search index management are all in function_app.py. While the codebase is small today, the html_to_json function and AISearchIndexLoader class are independently testable units that should live in separate modules.",
      "recommendation": "Extract html_parser.py for html_to_json and search_index.py for AISearchIndexLoader. Keep function_app.py as the thin entry point containing only the trigger handler and wiring.",
      "status": "open"
    },
    {
      "id": "PYBP-STRUCT-002",
      "title": "Azure Functions v2 programming model used correctly",
      "severity": "info",
      "category": "project-structure",
      "file": "src/DocumentLoaderFunction/function_app.py",
      "description": "The project correctly uses the Azure Functions Python v2 model: function_app.py as the decorator-based entry point, host.json present and version 2.0, no legacy function.json files. This is the recommended modern approach.",
      "status": "open"
    },
    {
      "id": "PYBP-ASYNC-001",
      "title": "Sync handler with all-sync I/O is functional but limits concurrency at scale",
      "severity": "info",
      "category": "async",
      "file": "src/DocumentLoaderFunction/function_app.py",
      "line": 34,
      "description": "The Loader function is synchronous and performs multiple blocking I/O operations (Azure AD token fetch, Azure OpenAI embeddings, Azure AI Search index creation and document upload, Blob Storage operations). This is functionally correct for a blob trigger but blocks the thread pool worker for the full duration, limiting concurrency when multiple blobs arrive simultaneously.",
      "recommendation": "For high-throughput scenarios, consider converting to async def with async Azure SDK clients (azure-storage-blob[aio], azure-search-documents[aio]) and awaitable LangChain calls. For current scale this is a future improvement, not an immediate requirement.",
      "status": "open"
    }
  ],
  "summary": {
    "critical": 3,
    "notable": 6,
    "minor": 5,
    "info": 2,
    "total": 16
  }
}
