{
  "steward": "cosmosdb-steward",
  "project": "Azure-AI-RAG-CSharp-Semantic-Kernel-Functions",
  "runDate": "2026-03-21",
  "runId": "2026-03-21T00-00-00",
  "findings": [
    {
      "id": "CSDB-PARTKEY-001",
      "title": "Chat history partitioned by /id but queried by sessionid — full cross-partition scan on every session load",
      "severity": "critical",
      "category": "PARTKEY",
      "file": "src/ChatAPI/Data/ChatHistoryData.cs",
      "line": 41,
      "description": "ChatMessage items are written with PartitionKey(chatMessage.id), so the container is logically keyed on /id. However, GetMessagesBySessionIdAsync queries with WHERE c.sessionid = @sessionId and no PartitionKey in QueryRequestOptions. Every call issues a full cross-partition fan-out across all logical partitions. This scales poorly and wastes RUs proportionally to total item count.",
      "recommendation": "Change the chat history container partition key to /sessionid. Update CreateItemAsync calls to use new PartitionKey(chatMessage.sessionid) and supply QueryRequestOptions with the session partition key on reads.",
      "status": "open"
    },
    {
      "id": "CSDB-QUERY-001",
      "title": "GetProductByNameAsync issues cross-partition fan-out query with no partition key filter",
      "severity": "critical",
      "category": "QUERY",
      "file": "src/ChatAPI/Data/ProductData.cs",
      "line": 43,
      "description": "The products container is keyed on /id. GetProductByNameAsync queries SELECT * FROM c WHERE c.name = @productName without supplying a PartitionKey in QueryRequestOptions, resulting in a cross-partition query that scans every partition. This is called on every product lookup by name via the Semantic Kernel plugin.",
      "recommendation": "Either change the partition key to /name if name-based access is the primary pattern, or maintain a separate name-to-id mapping to enable a point read. As an immediate guard, pass QueryRequestOptions with EnableCrossPartitionQuery = false in development to surface the issue.",
      "status": "open"
    },
    {
      "id": "CSDB-ERROR-001",
      "title": "Chat history write methods have no error handling — 429 and other Cosmos errors propagate unhandled",
      "severity": "notable",
      "category": "ERROR",
      "file": "src/ChatAPI/Data/ChatHistoryData.cs",
      "line": 29,
      "description": "AddUserMessageAsync and AddAssistantMessageAsync call CreateItemAsync with no try/catch. Any CosmosException — including 429 TooManyRequests, 503 Service Unavailable, or network timeouts — propagates unhandled to the request pipeline, causing the HTTP response to fail with an unformatted 500.",
      "recommendation": "Wrap CreateItemAsync in try/catch (CosmosException). Handle 429 responses with a RetryAfter delay or propagate a structured error. Decide whether a failed history write should abort the request or be tolerated as best-effort.",
      "status": "open"
    },
    {
      "id": "CSDB-ERROR-002",
      "title": "No 429 retry logic at application layer; SDK retry policy not configured",
      "severity": "notable",
      "category": "ERROR",
      "file": "src/ChatAPI/Program.cs",
      "line": 23,
      "description": "CosmosClient is constructed without CosmosClientOptions, leaving MaxRetryAttemptsOnRateLimitedRequests at the SDK default of 9 and MaxRetryWaitTimeOnRateLimitedRequests at 30 seconds. No application-layer retry policy exists. The SDK will silently retry up to 9 times with exponential backoff, potentially holding request threads for up to 30 seconds before failing, with no observability.",
      "recommendation": "Configure CosmosClientOptions with explicit MaxRetryAttemptsOnRateLimitedRequests (e.g., 3) and MaxRetryWaitTimeOnRateLimitedRequests (e.g., TimeSpan.FromSeconds(10)). Add structured logging on retry attempts. Consider a Polly policy at the service layer for critical write paths.",
      "status": "open"
    },
    {
      "id": "CSDB-QUERY-002",
      "title": "SELECT * used in both containers — fetches full documents when only specific fields are needed",
      "severity": "notable",
      "category": "QUERY",
      "file": "src/ChatAPI/Data/ChatHistoryData.cs",
      "line": 93,
      "description": "Both GetMessagesBySessionIdAsync (ChatHistoryData) and GetProductByNameAsync (ProductData) use SELECT * queries. This retrieves all document properties including system properties and any large fields, consuming more RUs and network bandwidth than necessary. For chat messages the full document is required, but for product name lookup only a few fields are needed.",
      "recommendation": "Replace SELECT * with projected field lists: for chat history use SELECT c.id, c.sessionid, c.message, c.role, c.Timestamp; for product name lookup use SELECT c.id, c.name, c.description.",
      "status": "open"
    },
    {
      "id": "CSDB-ERROR-003",
      "title": "GetProductByNameAsync returns null on no-match; callers risk NullReferenceException",
      "severity": "notable",
      "category": "ERROR",
      "file": "src/ChatAPI/Data/ProductData.cs",
      "line": 59,
      "description": "When no product matches the given name, GetProductByNameAsync returns null rather than throwing or returning a nullable-annotated type. The method signature declares Dictionary<string, object> (non-nullable). Callers that do not null-check will encounter a NullReferenceException at runtime.",
      "recommendation": "Change the return type to Dictionary<string, object>? and annotate appropriately. Update all callers to handle the null case explicitly. Alternatively, throw a domain-specific NotFoundException so callers have a typed exception to catch.",
      "status": "open"
    },
    {
      "id": "CSDB-CONFIG-001",
      "title": "No CosmosClientOptions configured — connection mode, retry policy, and serialization options at defaults",
      "severity": "minor",
      "category": "CONFIG",
      "file": "src/ChatAPI/Program.cs",
      "line": 23,
      "description": "CosmosClient is constructed with only a connection string. No CosmosClientOptions are provided, so ConnectionMode defaults to Gateway, serialization uses PascalCase (inconsistent with camelCase documents), and retry settings are at SDK defaults. This makes behaviour implicit and environment-dependent.",
      "recommendation": "Construct CosmosClient with CosmosClientOptions specifying ConnectionMode.Direct for production, CosmosSerializationOptions with PropertyNamingPolicy = CosmosPropertyNamingPolicy.CamelCase, and explicit MaxRetryAttemptsOnRateLimitedRequests and MaxRetryWaitTimeOnRateLimitedRequests.",
      "status": "open"
    },
    {
      "id": "CSDB-CONFIG-002",
      "title": "EnableContentResponseOnWrite not set to false — write operations return full document payloads",
      "severity": "minor",
      "category": "CONFIG",
      "file": "src/ChatAPI/Program.cs",
      "line": 23,
      "description": "By default the Cosmos SDK returns the full document body in the response to CreateItemAsync and UpsertItemAsync. None of the write paths in this project use the returned resource, so the extra RU charge and network payload are wasted on every chat message write.",
      "recommendation": "Set CosmosClientOptions.EnableContentResponseOnWrite = false. This eliminates the response body from write operations, reducing RU consumption and payload size at no cost to correctness.",
      "status": "open"
    },
    {
      "id": "CSDB-TTL-001",
      "title": "Chat history container has no TTL configured — old session messages accumulate indefinitely",
      "severity": "minor",
      "category": "TTL",
      "file": "src/ChatAPI/Data/ChatHistoryData.cs",
      "description": "Chat history messages are ephemeral by nature — sessions eventually end and old history is never retrieved again. Without TTL, messages accumulate indefinitely, increasing storage costs and making cross-partition queries more expensive over time.",
      "recommendation": "Enable TTL on the chat history container. Set DefaultTimeToLive on the container to a reasonable retention window (e.g., 30 days). Optionally set a per-document ttl field for finer control.",
      "status": "open"
    },
    {
      "id": "CSDB-CLIENT-001",
      "title": "CosmosClient correctly registered as Singleton — connection pool is properly shared",
      "severity": "info",
      "category": "CLIENT",
      "file": "src/ChatAPI/Program.cs",
      "line": 23,
      "description": "CosmosClient is registered with AddSingleton, ensuring the underlying connection pool is shared across all requests and services. This is the correct pattern and avoids connection exhaustion.",
      "recommendation": "No action required.",
      "status": "open"
    },
    {
      "id": "CSDB-SEED-001",
      "title": "Startup seeding catches all exceptions silently — application may start with empty product catalog",
      "severity": "info",
      "category": "SEED",
      "file": "src/ChatAPI/Data/sample_data/products/GenerateProductInfo.cs",
      "line": 84,
      "description": "PopulateCosmosAsync wraps all logic in a broad catch (Exception ex) with only an error log. If seeding fails (e.g., wrong connection string, container creation permission denied), the application starts normally but serves no products. The broad catch is intentional to prevent startup failure, but the silent continuation may mask misconfiguration.",
      "recommendation": "Consider a startup health check or at minimum a warning-level log and metric that clearly signals an empty catalog state. For critical deployments, re-throw after logging so the deployment fails fast.",
      "status": "open"
    }
  ],
  "summary": {
    "critical": 2,
    "notable": 4,
    "minor": 3,
    "info": 2,
    "total": 11
  }
}
