{
  "$schema_note": "Hlido Weekly Reliability Report — machine-readable edition. Evidence-backed; scoring weights are never exposed.",
  "edition": "2026-06-12",
  "generated_at": "2026-06-12T14:24:45.684Z",
  "registry": {
    "reviewed": 642,
    "scored": 642,
    "tier_distribution": {
      "FADING": 297,
      "STEADY": 193,
      "VITAL": 152
    }
  },
  "incidents": {
    "published_total": 19,
    "by_severity": {
      "critical": 6,
      "high": 4,
      "low": 7,
      "medium": 2
    },
    "dead_agents_count": 6,
    "dead_agents": [
      {
        "slug": "stanley-for-x",
        "title": "Primary site unreachable: domain no longer resolves",
        "observed_at": "2026-05-08T16:32:42Z",
        "id": "inc_2026-06-12_stanley-for-x_2e9a"
      },
      {
        "slug": "playht",
        "title": "Primary site unreachable: domain no longer resolves",
        "observed_at": "2026-05-08T16:32:42Z",
        "id": "inc_2026-06-12_playht_45a1"
      },
      {
        "slug": "oraza",
        "title": "Primary site unreachable: domain no longer resolves",
        "observed_at": "2026-05-08T16:32:42Z",
        "id": "inc_2026-06-12_oraza_3dbb"
      },
      {
        "slug": "hapax",
        "title": "Primary site unreachable: domain no longer resolves",
        "observed_at": "2026-05-08T16:32:42Z",
        "id": "inc_2026-06-12_hapax_2cf0"
      },
      {
        "slug": "fleece-ai",
        "title": "Primary site unreachable: domain no longer resolves",
        "observed_at": "2026-05-08T16:32:42Z",
        "id": "inc_2026-06-12_fleece-ai_468d"
      },
      {
        "slug": "adaptive",
        "title": "Primary site unreachable: domain no longer resolves",
        "observed_at": "2026-05-08T16:32:42Z",
        "id": "inc_2026-06-12_adaptive_2fa8"
      }
    ],
    "registry_url": "https://hlido.eu/incidents/"
  },
  "findings": [
    {
      "id": "ai-agent-category-quality-gap",
      "title": "AI Agent is the largest category (230 agents) but scores avg 63.9 — 5.5pts below corpus",
      "narrative": "Of all reviewed categories with >10 agents, 'AI Agent' (230 agents, 36% of corpus) has the worst average score at 63.9 vs corpus avg 69.4. 83 agents (36%) score below 60. Red flags in this category dominate the whole corpus: 83 unverified-claim flags, 32 auth-opacity flags. In contrast, top categories (Voice 79.3, Eval 79.1, Frameworks & Eval 79.0) score 15+ points higher. The most-reviewed category is also the worst quality signal.",
      "confidence": "high"
    },
    {
      "id": "chat-companion-quality-crisis",
      "title": "Chat & Companion (28 agents, avg 54.8) and Companion (3 agents, avg 53) are the bottom performers",
      "narrative": "Chat & Companion agents average 54.8 — the lowest of any category with 5+ agents. Companion agents avg 53. Combined 31 agents in the consumer chat space average below 55. Evidence coverage is also lowest here (11% for Chat & Companion). These products frequently fail on claim verification and auth transparency. This is a credibility risk for Hlido if these categories are over-visible in the discovery surface.",
      "confidence": "high"
    },
    {
      "id": "unverified-claims-dominance",
      "title": "Unverified claims is the #1 red flag across 161 scorecards — concentrated in AI Agent category",
      "narrative": "Across 635 reviewed agents, 'absence/lack of verified/verifiable claims' appears as a red flag in 161 scorecards (25% of all reviews). AI Agent category alone accounts for 83 of these (52%). The three-way cluster of unverified claims (161) + auth opacity (104) + sparse docs (97) = 362 flags affecting an estimated 35-40% of the corpus. Together these represent the single biggest trust gap in the AI agent ecosystem.",
      "confidence": "high"
    },
    {
      "id": "confidence-level-distribution",
      "title": "Only 21% of reviews achieve high confidence; 28% are low confidence",
      "narrative": "Confidence breakdown: high=136 (21%), medium-high=94 (15%), medium=221 (35%), medium-low=5 (1%), low=178 (28%). Over half the corpus (63%) is medium or lower confidence, driven primarily by login walls, sparse public surfaces, and limited testability of enterprise/API products. Low-confidence reviews disproportionately affect AI Agent category. High-confidence reviews correlate with open-source tools, CLI agents, and API-first products with public docs.",
      "confidence": "high"
    }
  ],
  "sources": {
    "insights_file": "2026-06-08.json",
    "incidents_as_of": "2026-06-12T11:59:53.199Z"
  },
  "links": {
    "registry": "https://hlido.eu/reviews/",
    "incidents_api": "https://hlido.eu/v1/incidents",
    "mcp": "https://hlido.eu/mcp",
    "report_index": "https://hlido.eu/reports/"
  }
}