Replay for f7f021f8-2392-4d7f-9e9d-f82ecc7917f2

Replay kind: benchmark

Replay source: original receipt

Comparison: pending or not applicable

replay page ยท replay JSON

{
  "kind": "benchmark",
  "packageId": "addr/0x69dFFb78f20D3B1511067C4899e6Afb5f3099964/research-agent@1.0.0",
  "packageHash": "sha256:5b49c3bf427aac8d024ed0e6064ef4e1f51b4ebb2d34511151ba838548c10f8e",
  "benchmarkId": "benchmark/source-backed-research@1.0.0",
  "benchmarkDefinitionHash": "sha256:e312a86a54f41197730b5ae0a0d0f3768dd06b8d63225b0e4300fdd44fae1f2a",
  "runtime": "codex",
  "executionBackend": "local",
  "availableTools": [
    "fetch",
    "markdown"
  ],
  "grantedPermissions": [
    "http:get",
    "storage:write"
  ],
  "requestedApiOrigins": [],
  "requestedChains": [],
  "requestedTokens": [],
  "result": {
    "status": "success",
    "score": 10,
    "maxScore": 10,
    "caseResults": [
      {
        "caseId": "captures-cited-links",
        "status": "pass",
        "score": 5,
        "maxScore": 5
      },
      {
        "caseId": "returns-source-backed-summary",
        "status": "pass",
        "score": 5,
        "maxScore": 5
      }
    ],
    "outputHash": "sha256:086d3d4dcb4c62652afd42091bad853ac638fa0c5aba4ebf9ef268fd56b8d47b",
    "stdoutObjectKey": "5674cf38-7dd2-495a-8322-0234e8c9f174/output.md",
    "artifactsObjectPrefix": "5674cf38-7dd2-495a-8322-0234e8c9f174/",
    "judgeSummary": "Benchmark suite produced the expected deterministic output. The research brief includes the expected source-backed summary language and all required cited links.",
    "judgeWarnings": []
  }
}