{
  "steward": "api-otl-steward",
  "project": "Azure-AI-RAG-CSharp-Semantic-Kernel-Functions",
  "runDate": "2026-03-21",
  "runId": "2026-03-21T00-00-00",
  "findings": [
    {
      "id": "AOTL-HEALTH-001",
      "title": "No health check endpoint configured",
      "severity": "critical",
      "category": "HEALTH",
      "file": "src/ChatAPI/Program.cs",
      "description": "No AddHealthChecks() or MapHealthChecks() call exists. The API has three critical downstream dependencies (Cosmos DB, Azure AI Search, Azure OpenAI) with no health probe. Container orchestrators cannot determine readiness, and liveness failures will be invisible until traffic errors occur.",
      "recommendation": "Add builder.Services.AddHealthChecks() with named checks for Cosmos DB, Azure AI Search, and Azure OpenAI. Register /healthz (liveness) and /ready (readiness) endpoints via app.MapHealthChecks(). Exclude from authentication middleware.",
      "status": "open"
    },
    {
      "id": "AOTL-LOG-001",
      "title": "Exceptions logged at Information level instead of Error",
      "severity": "critical",
      "category": "LOG",
      "file": "src/ChatAPI/Plugins/AISearchDataPlugin.cs",
      "line": 46,
      "description": "AISearchDataPlugin.ResourceLookup and ProductDataPlugin.GetAzureProductDetailsById catch exceptions and re-throw them, but log using LogInformation(ex, ...) instead of LogError(ex, ...). This means actual failures are invisible to error-rate alerts and Application Insights exception tracking.",
      "recommendation": "Replace LogInformation(ex, ...) with LogError(ex, ...) on all catch blocks that represent failures. LogInformation is for normal business events, not exceptions.",
      "status": "open"
    },
    {
      "id": "AOTL-LOG-002",
      "title": "Full AI response and full product JSON logged — potential PII and data volume risk",
      "severity": "critical",
      "category": "LOG",
      "file": "src/ChatAPI/Services/ChatService.cs",
      "line": 55,
      "description": "ChatService logs the complete LLM response string (_logger.LogInformation(\"Response {response}\", resp)) and ProductDataPlugin logs full product JSON. The LLM response is derived from user input and retrieved documents; it may contain PII echoed from user questions or document content. Large payloads also inflate log ingestion costs.",
      "recommendation": "Truncate logged response to a maximum length (e.g., first 200 characters + length indicator). For product data, log only the product ID, not the full serialised JSON. Implement a log scrubbing or destructuring policy if Serilog is adopted.",
      "status": "open"
    },
    {
      "id": "AOTL-TRACE-001",
      "title": "No custom spans on critical LLM, search, and Cosmos DB paths",
      "severity": "notable",
      "category": "TRACE",
      "file": "src/ChatAPI/Services/ChatService.cs",
      "description": "No ActivitySource or custom Activity spans are created anywhere in ChatAPI. Operations including LLM completion, vector embedding generation, Azure AI Search queries, and Cosmos DB reads/writes are not explicitly traced. Latency contribution from each downstream service cannot be isolated in Application Insights.",
      "recommendation": "Define a static ActivitySource (e.g., new ActivitySource(\"ChatAPI\")). Wrap GetResponseAsync, RetrieveDocumentationAsync, and Cosmos DB data methods in using var activity = _activitySource.StartActivity(...) blocks. Add key span attributes such as sessionId, questionLength, and resultCount.",
      "status": "open"
    },
    {
      "id": "AOTL-PIPE-001",
      "title": "No explicit OpenTelemetry pipeline configuration — pipeline is entirely implicit",
      "severity": "notable",
      "category": "PIPE",
      "file": "src/ChatAPI/Program.cs",
      "line": 65,
      "description": "The entire observability pipeline is configured via UseAzureMonitor() with no explicit WithTracing(), WithMetrics(), or WithLogging() calls. The pipeline's actual content is opaque — there is no code evidence of which instrumentation libraries are active, what samplers are set, or which exporters are enabled.",
      "recommendation": "Augment with explicit configuration: AddOpenTelemetry().ConfigureResource(...).WithTracing(b => b.AddAspNetCoreInstrumentation().AddHttpClientInstrumentation()).WithMetrics(b => b.AddAspNetCoreInstrumentation()).UseAzureMonitor(). This makes the pipeline visible, auditable, and easier to extend.",
      "status": "open"
    },
    {
      "id": "AOTL-PIPE-002",
      "title": "No service name or version set in OpenTelemetry resource attributes",
      "severity": "notable",
      "category": "PIPE",
      "file": "src/ChatAPI/Program.cs",
      "line": 65,
      "description": "UseAzureMonitor() is called without ConfigureResource(). The service name reported to Application Insights defaults to the entry assembly name, which may not be stable across deployments and makes multi-service correlation harder.",
      "recommendation": "Add .ConfigureResource(r => r.AddService(serviceName: \"ChatAPI\", serviceVersion: \"1.0.0\")) before UseAzureMonitor(). Use an environment variable or assembly version for serviceVersion.",
      "status": "open"
    },
    {
      "id": "AOTL-PIPE-003",
      "title": "No Cosmos DB SDK instrumentation package referenced",
      "severity": "notable",
      "category": "PIPE",
      "file": "src/ChatAPI/ChatAPI.csproj",
      "description": "Cosmos DB is a primary storage dependency but no explicit OpenTelemetry Cosmos DB instrumentation is configured. The Cosmos DB SDK v3 supports OpenTelemetry via the Microsoft.Azure.Cosmos SDK built-in ActivitySource (enabled via CosmosClientOptions.CosmosClientTelemetryOptions), but this option is not set in the CosmosClient singleton registration.",
      "recommendation": "Enable Cosmos DB built-in telemetry by setting CosmosClientOptions.CosmosClientTelemetryOptions.IsDistributedTracingEnabled = true when constructing the CosmosClient. Register the Cosmos DB ActivitySource with the tracing pipeline.",
      "status": "open"
    },
    {
      "id": "AOTL-LOG-003",
      "title": "String concatenation used instead of structured log template",
      "severity": "notable",
      "category": "LOG",
      "file": "src/ChatAPI/Data/sample_data/products/GenerateProductInfo.cs",
      "line": 67,
      "description": "_logger.LogInformation(\"Product Content: \" + productContent) uses string concatenation. This defeats the structured logging system: the message template cannot be indexed, log aggregation by template is broken, and the allocation is always performed even when the log level is filtered out.",
      "recommendation": "Replace with _logger.LogInformation(\"Product Content: {ProductContent}\", productContent). Also consider whether logging full product content at Information level is appropriate (see AOTL-LOG-002).",
      "status": "open"
    },
    {
      "id": "AOTL-CORR-001",
      "title": "No correlation ID included in error responses",
      "severity": "notable",
      "category": "CORR",
      "file": "src/ChatAPI/Controllers/ChatController.cs",
      "description": "When exceptions propagate to the controller, the HTTP error response does not include the trace ID or a correlation ID header. API consumers cannot correlate a failed request to a specific trace in Application Insights without server-side access.",
      "recommendation": "Add a global exception handler middleware (app.UseExceptionHandler) that reads Activity.Current?.TraceId.ToString() and includes it in the error response body as a requestId field and/or as an X-Correlation-Id response header.",
      "status": "open"
    },
    {
      "id": "AOTL-LOG-004",
      "title": "ChatHistoryData injects ILogger<ProductData> — wrong generic type parameter",
      "severity": "minor",
      "category": "LOG",
      "file": "src/ChatAPI/Data/ChatHistoryData.cs",
      "line": 19,
      "description": "ChatHistoryData's primary constructor accepts ILogger<ProductData> instead of ILogger<ChatHistoryData>. All log entries from ChatHistoryData will appear under the ChatAPI.Data.ProductData category in Application Insights, making log filtering and diagnosis harder.",
      "recommendation": "Change the constructor parameter to ILogger<ChatHistoryData>.",
      "status": "open"
    },
    {
      "id": "AOTL-MON-001",
      "title": "No sampling explicitly configured for Azure Monitor",
      "severity": "minor",
      "category": "MON",
      "file": "src/ChatAPI/Program.cs",
      "line": 65,
      "description": "UseAzureMonitor() defaults to adaptive sampling. For an LLM-based API where each conversation involves multiple expensive round trips (embedding, search, completion), sampling behaviour should be an explicit architectural decision documented in code.",
      "recommendation": "Set options.SamplingRatio in UseAzureMonitor() to an explicit value (e.g., 1.0 for low-volume dev/staging, 0.1–0.5 for high-volume production). Add a comment documenting the rationale.",
      "status": "open"
    },
    {
      "id": "AOTL-MON-002",
      "title": "No startup validation for APPLICATIONINSIGHTS_CONNECTION_STRING",
      "severity": "minor",
      "category": "MON",
      "file": "src/ChatAPI/Program.cs",
      "line": 67,
      "description": "The APPLICATIONINSIGHTS_CONNECTION_STRING is read from configuration and passed directly to UseAzureMonitor() without null/empty validation. If the variable is missing, telemetry will silently fail with no startup warning.",
      "recommendation": "Add a startup check: if (string.IsNullOrEmpty(builder.Configuration[\"APPLICATIONINSIGHTS_CONNECTION_STRING\"])) log a warning indicating that Application Insights is not configured. Consider throwing in production environments.",
      "status": "open"
    },
    {
      "id": "AOTL-INFO-001",
      "title": "UseAzureMonitor with DefaultAzureCredential is a good pattern",
      "severity": "info",
      "category": "MON",
      "file": "src/ChatAPI/Program.cs",
      "line": 68,
      "description": "Azure Monitor is configured with DefaultAzureCredential rather than a key or shared secret. This avoids secrets in configuration and enables managed identity in Azure-hosted environments.",
      "status": "open"
    },
    {
      "id": "AOTL-INFO-002",
      "title": "ILogger<T> used consistently; no Console.WriteLine calls present",
      "severity": "info",
      "category": "LOG",
      "description": "All classes use ILogger<T> injected via primary constructor parameters. No Console.WriteLine or Debug.WriteLine calls were found in any ChatAPI source file. This is the correct baseline for structured logging.",
      "status": "open"
    },
    {
      "id": "AOTL-INFO-003",
      "title": "W3C TraceContext propagation implicitly enabled via UseAzureMonitor",
      "severity": "info",
      "category": "CORR",
      "file": "src/ChatAPI/Program.cs",
      "description": "UseAzureMonitor() enables W3C TraceContext header propagation by default for incoming and outgoing HTTP requests. Distributed traces will be correlated automatically across HttpClient calls without additional configuration.",
      "status": "open"
    }
  ],
  "summary": {
    "critical": 3,
    "notable": 5,
    "minor": 3,
    "info": 3,
    "total": 14
  }
}