diff --git a/scripts/contextbench-select-slice.mjs b/scripts/contextbench-select-slice.mjs
new file mode 100644
index 0000000..6deffa8
--- /dev/null
+++ b/scripts/contextbench-select-slice.mjs
@@ -0,0 +1,913 @@
+#!/usr/bin/env node
+import { createHash } from 'node:crypto';
+import { existsSync, mkdirSync, readFileSync, rmSync, writeFileSync } from 'node:fs';
+import { dirname, join, resolve } from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { tmpdir } from 'node:os';
+
+const DATASET = 'Contextbench/ContextBench';
+const DATASET_CONFIG = 'contextbench_verified';
+const SPLIT = 'train';
+const DATASET_ROWS_URL = 'https://datasets-server.huggingface.co/rows';
+const SELECTION_SEED = 'phase37-contextbench-v1-2026-04-27';
+const SELECTION_TIMESTAMP = '2026-04-27T00:00:00.000Z';
+const CANONICALIZATION_VERSION = 'contextbench-canonical-json-lf-v1';
+const HARDNESS_STATUS = 'unavailable_in_contextbench_verified_schema';
+const HARDNESS_SOURCE = 'dataset_schema_probe';
+
+const REQUIRED_FIELDS = [
+ 'instance_id',
+ 'original_inst_id',
+ 'source',
+ 'language',
+ 'repo_url',
+ 'base_commit',
+ 'problem_statement',
+ 'gold_context',
+ 'patch',
+ 'test_patch',
+ 'f2p',
+ 'p2p'
+];
+
+const HASH_FIELDS = ['problem_statement', 'gold_context', 'patch', 'test_patch', 'f2p', 'p2p'];
+const FORBIDDEN_SELECTION_SOURCES = [
+ 'agent_outputs',
+ 'codebase_context_outputs',
+ 'competitor_outputs',
+ 'proxy_hardness_score',
+ 'post_failure_task_filtering'
+];
+
+function help() {
+ console.log(`ContextBench Phase 37 selection tool
+
+Usage:
+ node scripts/contextbench-select-slice.mjs --help
+ node scripts/contextbench-select-slice.mjs --dry-run --out
+ node scripts/contextbench-select-slice.mjs --probe-evaluator --out
+ node scripts/contextbench-select-slice.mjs --write-fixtures
+ node scripts/contextbench-select-slice.mjs --write-task-payloads --out [--checkout-root ]
+ node scripts/contextbench-select-slice.mjs --write-gold --task-id --out [--payloads ]
+ node scripts/contextbench-select-slice.mjs --materialize-checkouts --payloads [--max-tasks ]
+ node scripts/contextbench-select-slice.mjs --check [--rows-file ]
+
+Modes:
+ --dry-run Load ${DATASET}/${DATASET_CONFIG}, validate schema, compute eligible pool, and write audit files under --out.
+ --probe-evaluator Run python -m contextbench.evaluate against a synthetic local git fixture only; no lane or product output is used.
+ --write-fixtures Write tests/fixtures/contextbench-task-manifest.json and tests/fixtures/contextbench-selection-exclusions.json.
+ --write-task-payloads Write selected task problem statements and intended checkout paths for Phase 40 live runs.
+ --write-gold Write scorer-only official-evaluator gold input for selected task(s); never pass this to solvers.
+ --materialize-checkouts Clone/fetch selected task repositories to their payload repo_checkout_path and verify base commits.
+ --check Verify frozen manifest integrity. With --rows-file, also recompute deterministic selection from frozen rows.
+
+Forbidden selection inputs:
+ ${FORBIDDEN_SELECTION_SOURCES.join(', ')}
+`);
+}
+
+function parseArgs(argv) {
+ const args = {
+ out: '',
+ check: '',
+ rowsFile: '',
+ payloads: '',
+ taskId: '',
+ manifest: 'tests/fixtures/contextbench-task-manifest.json',
+ checkoutRoot: '',
+ maxTasks: 0,
+ dryRun: false,
+ probeEvaluator: false,
+ writeFixtures: false,
+ writeTaskPayloads: false,
+ writeGold: false,
+ materializeCheckouts: false
+ };
+ for (let i = 0; i < argv.length; i += 1) {
+ const arg = argv[i];
+ if (arg === '--help' || arg === '-h') args.help = true;
+ else if (arg === '--dry-run') args.dryRun = true;
+ else if (arg === '--probe-evaluator') args.probeEvaluator = true;
+ else if (arg === '--write-fixtures') args.writeFixtures = true;
+ else if (arg === '--write-task-payloads') args.writeTaskPayloads = true;
+ else if (arg === '--write-gold') args.writeGold = true;
+ else if (arg === '--materialize-checkouts') args.materializeCheckouts = true;
+ else if (arg === '--out') args.out = argv[++i] ?? '';
+ else if (arg === '--check') args.check = argv[++i] ?? '';
+ else if (arg === '--rows-file') args.rowsFile = argv[++i] ?? '';
+ else if (arg === '--payloads') args.payloads = argv[++i] ?? '';
+ else if (arg === '--task-id') args.taskId = argv[++i] ?? '';
+ else if (arg === '--manifest') args.manifest = argv[++i] ?? '';
+ else if (arg === '--checkout-root') args.checkoutRoot = argv[++i] ?? '';
+ else if (arg === '--max-tasks') args.maxTasks = Number(argv[++i] ?? '0');
+ else throw new Error(`Unknown argument: ${arg}`);
+ }
+ return args;
+}
+
+function stableStringify(value) {
+ if (value === null || typeof value !== 'object') return JSON.stringify(value);
+ if (Array.isArray(value)) return `[${value.map((item) => stableStringify(item)).join(',')}]`;
+ const entries = Object.entries(value).sort(([a], [b]) => a.localeCompare(b));
+ return `{${entries.map(([key, item]) => `${JSON.stringify(key)}:${stableStringify(item)}`).join(',')}}`;
+}
+
+function canonicalize(value) {
+ if (value === undefined) return 'undefined';
+ if (value === null) return 'null';
+ if (typeof value !== 'string') return stableStringify(value).replace(/\r\n?/g, '\n');
+ const normalized = value.replace(/\r\n?/g, '\n');
+ const trimmed = normalized.trim();
+ if (trimmed.startsWith('{') || trimmed.startsWith('[')) {
+ try {
+ return stableStringify(JSON.parse(trimmed));
+ } catch {
+ return normalized;
+ }
+ }
+ return normalized;
+}
+
+function sha256(value) {
+ return `sha256:${createHash('sha256').update(value, 'utf8').digest('hex')}`;
+}
+
+function hashObject(value) {
+ return sha256(stableStringify(value));
+}
+
+function writeJson(path, value) {
+ mkdirSync(dirname(path), { recursive: true });
+ writeFileSync(path, `${JSON.stringify(value, null, 2)}\n`, 'utf8');
+}
+
+async function fetchJson(url) {
+ const response = await fetch(url);
+ if (!response.ok)
+ throw new Error(`Fetch failed ${response.status} ${response.statusText}: ${url}`);
+ return response.json();
+}
+
+async function loadRows() {
+ const rows = [];
+ let total = null;
+ for (let offset = 0; total === null || offset < total; offset += 100) {
+ const params = new URLSearchParams({
+ dataset: DATASET,
+ config: DATASET_CONFIG,
+ split: SPLIT,
+ offset: String(offset),
+ length: '100'
+ });
+ const payload = await fetchJson(`${DATASET_ROWS_URL}?${params.toString()}`);
+ total = payload.num_rows_total;
+ rows.push(...payload.rows.map((entry) => entry.row));
+ }
+ return rows;
+}
+
+function normalizeRowsPayload(payload) {
+ if (Array.isArray(payload)) return payload;
+ if (Array.isArray(payload?.rows)) {
+ return payload.rows.map((entry) => entry?.row ?? entry);
+ }
+ throw new Error('rows file must be an array of dataset rows or an object with rows');
+}
+
+async function loadRowsForArgs(args) {
+ if (!args.rowsFile) return loadRows();
+ return normalizeRowsPayload(JSON.parse(readFileSync(resolve(args.rowsFile), 'utf8')));
+}
+
+function normalizeRow(row) {
+ const missing = REQUIRED_FIELDS.filter(
+ (field) => row[field] === undefined || row[field] === null || row[field] === ''
+ );
+ let goldContextItems = [];
+ if (!missing.includes('gold_context')) {
+ try {
+ const parsed = JSON.parse(row.gold_context);
+ if (!Array.isArray(parsed) || parsed.length === 0)
+ missing.push('gold_context_non_empty_array');
+ else goldContextItems = parsed;
+ } catch {
+ missing.push('gold_context_valid_json');
+ }
+ }
+ if (missing.length > 0)
+ return { eligible: false, reason: 'missing_or_malformed_required_fields', missing };
+
+ const fieldHashes = Object.fromEntries(
+ HASH_FIELDS.map((field) => [field, sha256(canonicalize(row[field]))])
+ );
+
+ return {
+ eligible: true,
+ task: {
+ instance_id: row.instance_id,
+ original_inst_id: row.original_inst_id,
+ source: row.source,
+ language: row.language,
+ repo: row.repo,
+ repo_url: row.repo_url,
+ base_commit: row.base_commit,
+ problem_statement_ref: 'dataset_field:problem_statement',
+ problem_statement_hash: fieldHashes.problem_statement,
+ gold_context_ref: 'dataset_field:gold_context',
+ gold_context_hash: fieldHashes.gold_context,
+ patch_hash: fieldHashes.patch,
+ test_patch_hash: fieldHashes.test_patch,
+ f2p_hash: fieldHashes.f2p,
+ p2p_hash: fieldHashes.p2p,
+ gold_context_span_count: goldContextItems.length,
+ hash_canonicalization_version: CANONICALIZATION_VERSION,
+ hardness_signal_status: HARDNESS_STATUS,
+ hardness_signal_source: HARDNESS_SOURCE,
+ hardness_proxy_used: false
+ }
+ };
+}
+
+function buildPool(rows) {
+ const seen = new Set();
+ const eligible = [];
+ const excluded = [];
+ for (const [index, row] of rows.entries()) {
+ const normalized = normalizeRow(row);
+ if (!normalized.eligible) {
+ excluded.push({
+ row_index: index,
+ instance_id: row.instance_id ?? '',
+ reason: normalized.reason,
+ details: normalized.missing
+ });
+ continue;
+ }
+ if (seen.has(normalized.task.instance_id)) {
+ excluded.push({
+ row_index: index,
+ instance_id: normalized.task.instance_id,
+ reason: 'duplicate_instance_id'
+ });
+ continue;
+ }
+ seen.add(normalized.task.instance_id);
+ eligible.push(normalized.task);
+ }
+ const taskPoolHash = hashObject(
+ eligible.map((task) => ({
+ instance_id: task.instance_id,
+ source: task.source,
+ language: task.language,
+ repo_url: task.repo_url,
+ base_commit: task.base_commit,
+ problem_statement_hash: task.problem_statement_hash,
+ gold_context_hash: task.gold_context_hash
+ }))
+ );
+ return { eligible, excluded, taskPoolHash };
+}
+
+function rankTask(task) {
+ return sha256(
+ `${SELECTION_SEED}:${task.source}:${task.language}:${task.repo_url}:${task.instance_id}`
+ );
+}
+
+function selectTasks(eligible) {
+ const ranked = [...eligible].sort((a, b) => rankTask(a).localeCompare(rankTask(b)));
+ const selected = [];
+ const selectedIds = new Set();
+ const add = (task, rationale) => {
+ if (selected.length >= 20 || selectedIds.has(task.instance_id)) return;
+ selectedIds.add(task.instance_id);
+ selected.push({ ...task, inclusion_rationale: rationale, deterministic_rank: rankTask(task) });
+ };
+
+ for (const language of [...new Set(ranked.map((task) => task.language))].sort()) {
+ const task = ranked.find((candidate) => candidate.language === language);
+ if (task) add(task, `language_coverage:${language}`);
+ }
+ for (const source of [...new Set(ranked.map((task) => task.source))].sort()) {
+ const task = ranked.find((candidate) => candidate.source === source);
+ if (task) add(task, `source_coverage:${source}`);
+ }
+ for (const task of ranked) {
+ const repoAlreadySelected = selected.some((candidate) => candidate.repo_url === task.repo_url);
+ if (!repoAlreadySelected) add(task, `repo_coverage:${task.repo_url}`);
+ if (selected.length >= 20) break;
+ }
+ for (const task of ranked) add(task, 'deterministic_fill');
+
+ const selectedSet = new Set(selected.map((task) => task.instance_id));
+ const nonSelectedEligible = ranked
+ .filter((task) => !selectedSet.has(task.instance_id))
+ .map((task) => ({
+ instance_id: task.instance_id,
+ source: task.source,
+ language: task.language,
+ repo_url: task.repo_url,
+ reason: 'eligible_not_selected',
+ deterministic_rank: rankTask(task)
+ }));
+
+ return { selected, nonSelectedEligible };
+}
+
+function sanitizePathSegment(value) {
+ return value
+ .replace(/[^a-zA-Z0-9._-]+/g, '-')
+ .replace(/^-+|-+$/g, '')
+ .slice(0, 120);
+}
+
+function checkoutPathForTask(task, checkoutRoot) {
+ if (!checkoutRoot) return '';
+ return join(
+ resolve(checkoutRoot),
+ sanitizePathSegment(`${task.repo}-${task.base_commit.slice(0, 12)}`)
+ );
+}
+
+function buildTaskPayloads(rows, manifest, checkoutRoot) {
+ const rowsById = new Map(rows.map((row) => [row.instance_id, row]));
+ const failures = [];
+ const tasks = [];
+ for (const task of manifest.tasks ?? []) {
+ const taskFailures = [];
+ const row = rowsById.get(task.instance_id);
+ if (!row) {
+ taskFailures.push('missing dataset row');
+ failures.push(`${task.instance_id}: missing dataset row`);
+ continue;
+ }
+ const problemStatement = typeof row.problem_statement === 'string' ? row.problem_statement : '';
+ if (!problemStatement.trim()) taskFailures.push('missing problem_statement');
+ const problemStatementHash = sha256(canonicalize(problemStatement));
+ if (problemStatementHash !== task.problem_statement_hash)
+ taskFailures.push('problem_statement_hash mismatch');
+ if (row.repo_url !== task.repo_url) taskFailures.push('repo_url mismatch');
+ if (row.base_commit !== task.base_commit) taskFailures.push('base_commit mismatch');
+ if (taskFailures.length > 0) {
+ failures.push(...taskFailures.map((failure) => `${task.instance_id}: ${failure}`));
+ continue;
+ }
+ tasks.push({
+ instance_id: task.instance_id,
+ original_inst_id: task.original_inst_id,
+ repo: task.repo,
+ repo_url: task.repo_url,
+ base_commit: task.base_commit,
+ problem_statement: problemStatement,
+ problem_statement_hash: problemStatementHash,
+ problem_statement_hash_verified: problemStatementHash === task.problem_statement_hash,
+ repo_checkout_path: checkoutPathForTask(task, checkoutRoot),
+ repo_checkout_status: checkoutRoot ? 'planned_not_verified' : 'not_planned',
+ lane_outputs_observed: false
+ });
+ }
+ if (failures.length > 0)
+ throw new Error(`task payload materialization failed:\n- ${failures.join('\n- ')}`);
+ const payloadBase = {
+ name: 'v2.4-contextbench-phase40-task-payloads',
+ protocolVersion: manifest.protocolVersion,
+ dataset: manifest.dataset,
+ datasetConfig: manifest.datasetConfig,
+ split: manifest.split,
+ claimBearing: false,
+ purpose:
+ 'Phase 40 task input materialization; not lane output and not benchmark evidence by itself.',
+ manifest_hash: manifest.manifest_hash,
+ hash_canonicalization_version: CANONICALIZATION_VERSION,
+ checkout_root: checkoutRoot ? resolve(checkoutRoot) : null,
+ task_count: tasks.length,
+ tasks
+ };
+ return withPayloadHash(payloadBase);
+}
+
+function summarize(tasks) {
+ const countBy = (field) =>
+ tasks.reduce((acc, task) => {
+ acc[task[field]] = (acc[task[field]] ?? 0) + 1;
+ return acc;
+ }, {});
+ return {
+ task_count: tasks.length,
+ language_distribution: countBy('language'),
+ source_distribution: countBy('source'),
+ repo_distribution: countBy('repo_url'),
+ repo_count: new Set(tasks.map((task) => task.repo_url)).size,
+ language_count: new Set(tasks.map((task) => task.language)).size
+ };
+}
+
+function buildArtifacts(rows) {
+ const { eligible, excluded, taskPoolHash } = buildPool(rows);
+ if (eligible.length < 20)
+ throw new Error(`Only ${eligible.length} eligible rows; need at least 20`);
+ if (new Set(eligible.map((task) => task.repo_url)).size < 2)
+ throw new Error('Eligible pool has fewer than two repositories');
+ if (new Set(eligible.map((task) => task.language)).size < 2)
+ throw new Error('Eligible pool has fewer than two languages');
+
+ const { selected, nonSelectedEligible } = selectTasks(eligible);
+ const exclusionLogPath = 'tests/fixtures/contextbench-selection-exclusions.json';
+ const manifestBase = {
+ name: 'v2.4-contextbench-phase37-task-manifest',
+ protocolVersion: 'contextbench-protocol-v1',
+ dataset: DATASET,
+ datasetConfig: DATASET_CONFIG,
+ split: SPLIT,
+ claimBearing: true,
+ selectedInPhase: 37,
+ selection_algorithm: 'deterministic_seeded_coverage_then_rank_fill_v1',
+ selection_seed_or_deterministic_order: SELECTION_SEED,
+ selection_timestamp: SELECTION_TIMESTAMP,
+ task_pool_hash: taskPoolHash,
+ exclusion_log_path: exclusionLogPath,
+ hash_canonicalization_version: CANONICALIZATION_VERSION,
+ evaluator_success_status: 'passed_synthetic_official_evaluator_probe',
+ hardness_signal_status: HARDNESS_STATUS,
+ hardness_signal_source: HARDNESS_SOURCE,
+ hardness_proxy_used: false,
+ forbidden_selection_sources: FORBIDDEN_SELECTION_SOURCES,
+ no_lane_outputs_observed_attestation:
+ 'No raw/native, codebase-context, competitor, proxy-hardness, or post-failure outputs were observed or used for selection.',
+ summary: summarize(selected),
+ tasks: selected
+ };
+ const manifest = { ...manifestBase, manifest_hash: hashObject(manifestBase) };
+ const exclusions = {
+ name: 'v2.4-contextbench-phase37-selection-exclusions',
+ protocolVersion: 'contextbench-protocol-v1',
+ dataset: DATASET,
+ datasetConfig: DATASET_CONFIG,
+ split: SPLIT,
+ selection_algorithm: manifest.selection_algorithm,
+ selection_seed_or_deterministic_order: SELECTION_SEED,
+ selection_timestamp: SELECTION_TIMESTAMP,
+ task_pool_hash: taskPoolHash,
+ hash_canonicalization_version: CANONICALIZATION_VERSION,
+ hardness_signal_status: HARDNESS_STATUS,
+ hardness_proxy_used: false,
+ no_lane_outputs_observed_attestation: manifest.no_lane_outputs_observed_attestation,
+ input_row_count: rows.length,
+ eligible_row_count: eligible.length,
+ selected_row_count: selected.length,
+ excluded_rows: excluded,
+ non_selected_eligible_rows: nonSelectedEligible
+ };
+ return { manifest, exclusions, eligible };
+}
+
+function verifyManifest(actual, expected = null) {
+ const failures = [];
+ const actualHash = actual.manifest_hash;
+ const actualWithoutHash = { ...actual };
+ delete actualWithoutHash.manifest_hash;
+ if (actualHash !== hashObject(actualWithoutHash))
+ failures.push('manifest_hash does not match manifest content');
+ if (expected && actualHash !== expected.manifest_hash)
+ failures.push('manifest differs from deterministic dataset selection');
+ if (!Array.isArray(actual.tasks)) failures.push('manifest tasks must be an array');
+ else if (actual.tasks.length !== 20)
+ failures.push(`expected 20 tasks, got ${actual.tasks.length}`);
+ if (actual.hardness_proxy_used !== false)
+ failures.push('manifest must set hardness_proxy_used false');
+ if (actual.hardness_signal_status !== HARDNESS_STATUS)
+ failures.push('manifest has wrong hardness signal status');
+ if (!actual.no_lane_outputs_observed_attestation) failures.push('missing no-output attestation');
+ const tasks = Array.isArray(actual.tasks) ? actual.tasks : [];
+ if (new Set(tasks.map((task) => task.repo_url)).size < 2)
+ failures.push('selected tasks cover fewer than two repos');
+ if (new Set(tasks.map((task) => task.language)).size < 2)
+ failures.push('selected tasks cover fewer than two languages');
+ return failures;
+}
+
+function run(command, args, cwd) {
+ const result = spawnSync(command, args, {
+ cwd,
+ encoding: 'utf8',
+ env: childEnvForCommand(command)
+ });
+ return { status: result.status, stdout: result.stdout ?? '', stderr: result.stderr ?? '' };
+}
+
+function runQuiet(command, args, cwd) {
+ const result = spawnSync(command, args, {
+ cwd,
+ env: childEnvForCommand(command),
+ encoding: 'utf8',
+ stdio: ['ignore', 'pipe', 'pipe']
+ });
+ return {
+ status: result.status,
+ stdout: result.stdout ?? '',
+ stderr: result.error?.message ?? result.stderr ?? ''
+ };
+}
+
+function childEnvForCommand(command) {
+ if (command !== 'git') return process.env;
+ const env = { ...process.env };
+ for (const key of Object.keys(env)) {
+ if (key.startsWith('GIT_')) {
+ delete env[key];
+ }
+ }
+ const gitHome = join(tmpdir(), 'contextbench-git-isolated-home');
+ mkdirSync(gitHome, { recursive: true });
+ env.HOME = gitHome;
+ env.USERPROFILE = gitHome;
+ env.XDG_CONFIG_HOME = gitHome;
+ env.GIT_CONFIG_NOSYSTEM = '1';
+ env.GIT_TERMINAL_PROMPT = '0';
+ return env;
+}
+
+function git(cwd, args) {
+ const result = run('git', args, cwd);
+ if (result.status !== 0)
+ throw new Error(`git ${args.join(' ')} failed: ${result.stderr || result.stdout}`);
+ return result.stdout.trim();
+}
+
+function createEvaluatorFixture(outDir) {
+ const repoDir = join(outDir, 'probe-repo');
+ rmSync(repoDir, { recursive: true, force: true });
+ mkdirSync(repoDir, { recursive: true });
+ writeFileSync(join(repoDir, 'sample.py'), 'def target():\n return 42\n', 'utf8');
+ git(repoDir, ['init']);
+ git(repoDir, ['config', 'user.email', 'contextbench-probe@example.invalid']);
+ git(repoDir, ['config', 'user.name', 'ContextBench Probe']);
+ git(repoDir, ['add', 'sample.py']);
+ git(repoDir, ['commit', '-m', 'probe fixture']);
+ const commit = git(repoDir, ['rev-parse', 'HEAD']);
+ const goldPath = join(outDir, 'gold.json');
+ const predPath = join(outDir, 'prediction.json');
+ const resultPath = join(outDir, 'results.jsonl');
+ writeJson(goldPath, {
+ inst_id: 'phase37-synthetic-evaluator-probe',
+ original_inst_id: 'phase37-synthetic-evaluator-probe',
+ repo_url: repoDir,
+ commit,
+ gold_ctx: [
+ { file: 'sample.py', start_line: 1, end_line: 2, content: 'def target():\n return 42' }
+ ],
+ patch: ''
+ });
+ writeJson(predPath, {
+ instance_id: 'phase37-synthetic-evaluator-probe',
+ repo_url: repoDir,
+ commit,
+ traj_data: {
+ pred_steps: [{ files: ['sample.py'], spans: { 'sample.py': [{ start: 1, end: 2 }] } }],
+ pred_files: ['sample.py'],
+ pred_spans: { 'sample.py': [{ start: 1, end: 2 }] }
+ },
+ model_patch: ''
+ });
+ return { repoDir, goldPath, predPath, resultPath };
+}
+
+function probeEvaluator(outDir) {
+ mkdirSync(outDir, { recursive: true });
+ const fixture = createEvaluatorFixture(outDir);
+ const officialRepoDir = join(outDir, 'ContextBench-official');
+ const moduleCheck = run('python', ['-m', 'contextbench.evaluate', '--help'], process.cwd());
+ let evaluatorCwd = process.cwd();
+ if (moduleCheck.status !== 0) {
+ if (!moduleCheck.stderr.includes('No module named')) {
+ throw new Error(
+ `official evaluator availability check failed: ${moduleCheck.stderr || moduleCheck.stdout}`
+ );
+ }
+ if (!readableOfficialEvaluator(officialRepoDir)) {
+ rmSync(officialRepoDir, { recursive: true, force: true });
+ const clone = run(
+ 'git',
+ [
+ '-c',
+ 'core.longpaths=true',
+ 'clone',
+ '--depth',
+ '1',
+ 'https://github.com/EuniAI/ContextBench.git',
+ officialRepoDir
+ ],
+ outDir
+ );
+ if (clone.status !== 0)
+ throw new Error(
+ `failed to clone official ContextBench repository: ${clone.stderr || clone.stdout}`
+ );
+ }
+ evaluatorCwd = officialRepoDir;
+ }
+ const result = run(
+ 'python',
+ [
+ '-m',
+ 'contextbench.evaluate',
+ '--gold',
+ fixture.goldPath,
+ '--pred',
+ fixture.predPath,
+ '--cache',
+ join(outDir, 'repo-cache'),
+ '--out',
+ fixture.resultPath
+ ],
+ evaluatorCwd
+ );
+ const status = result.status === 0 ? 'passed' : 'failed';
+ writeJson(join(outDir, 'probe-summary.json'), {
+ status,
+ command: `python -m contextbench.evaluate --gold ${fixture.goldPath} --pred ${fixture.predPath} --cache ${join(outDir, 'repo-cache')} --out ${fixture.resultPath}`,
+ synthetic_fixture_only: true,
+ lane_outputs_observed: false,
+ stdout: result.stdout,
+ stderr: result.stderr
+ });
+ if (result.status !== 0)
+ throw new Error(`official evaluator probe failed; see ${join(outDir, 'probe-summary.json')}`);
+ console.log(`official evaluator probe passed: ${fixture.resultPath}`);
+}
+
+function readableOfficialEvaluator(path) {
+ try {
+ readFileSync(join(path, 'contextbench', 'evaluate.py'), 'utf8');
+ return true;
+ } catch {
+ return false;
+ }
+}
+
+function payloadHashBase(payload) {
+ const copy = {
+ ...payload,
+ tasks: (payload.tasks ?? []).map((task) => {
+ const taskCopy = { ...task };
+ delete taskCopy.repo_checkout_path;
+ delete taskCopy.repo_status_short;
+ delete taskCopy.materialized_at;
+ return taskCopy;
+ })
+ };
+ delete copy.payload_hash;
+ delete copy.checkout_root;
+ delete copy.updated_at;
+ return copy;
+}
+
+function withPayloadHash(payload) {
+ const base = payloadHashBase(payload);
+ return { ...payload, payload_hash: hashObject(base) };
+}
+
+function gitMaybe(cwd, args) {
+ const result = runQuiet(
+ 'git',
+ ['-c', 'core.longpaths=true', '-c', 'core.autocrlf=false', ...args],
+ cwd
+ );
+ return result.status === 0 ? result.stdout.trim() : null;
+}
+
+function gitRequired(cwd, args) {
+ const result = runQuiet(
+ 'git',
+ ['-c', 'core.longpaths=true', '-c', 'core.autocrlf=false', ...args],
+ cwd
+ );
+ if (result.status !== 0)
+ throw new Error(`git ${args.join(' ')} failed: ${result.stderr || result.stdout}`);
+ return result.stdout.trim();
+}
+
+function cloneCheckout(task) {
+ const checkoutPath = task.repo_checkout_path;
+ if (!checkoutPath) throw new Error(`${task.instance_id}: missing repo_checkout_path`);
+ if (!task.repo_url) throw new Error(`${task.instance_id}: missing repo_url`);
+ if (!task.base_commit) throw new Error(`${task.instance_id}: missing base_commit`);
+ const absoluteCheckoutPath = resolve(checkoutPath);
+ if (!readableGitCheckout(absoluteCheckoutPath)) {
+ rmSync(absoluteCheckoutPath, { recursive: true, force: true });
+ mkdirSync(dirname(absoluteCheckoutPath), { recursive: true });
+ const clone = runQuiet(
+ 'git',
+ [
+ '-c',
+ 'core.longpaths=true',
+ '-c',
+ 'core.autocrlf=false',
+ 'clone',
+ '--no-checkout',
+ task.repo_url,
+ absoluteCheckoutPath
+ ],
+ process.cwd()
+ );
+ if (clone.status !== 0)
+ throw new Error(`${task.instance_id}: git clone failed: ${clone.stderr || clone.stdout}`);
+ }
+ gitRequired(absoluteCheckoutPath, ['config', 'core.longpaths', 'true']);
+ gitRequired(absoluteCheckoutPath, ['config', 'core.autocrlf', 'false']);
+ const currentHead = gitMaybe(absoluteCheckoutPath, ['rev-parse', 'HEAD']);
+ if (currentHead !== task.base_commit) {
+ const shallowFetch = runQuiet(
+ 'git',
+ ['-c', 'core.longpaths=true', 'fetch', '--depth', '1', 'origin', task.base_commit],
+ absoluteCheckoutPath
+ );
+ if (shallowFetch.status !== 0) {
+ gitRequired(absoluteCheckoutPath, ['fetch', 'origin', task.base_commit]);
+ }
+ }
+ gitRequired(absoluteCheckoutPath, ['checkout', '--force', '--detach', task.base_commit]);
+ gitRequired(absoluteCheckoutPath, ['clean', '-fd']);
+ const actualHead = gitRequired(absoluteCheckoutPath, ['rev-parse', 'HEAD']);
+ const statusShort = gitRequired(absoluteCheckoutPath, ['status', '--short']);
+ return {
+ ...task,
+ repo_checkout_path: absoluteCheckoutPath,
+ repo_checkout_status:
+ actualHead === task.base_commit && !statusShort ? 'verified' : 'not_clean_or_wrong_commit',
+ repo_actual_head: actualHead,
+ base_commit_verified: actualHead === task.base_commit,
+ repo_status_short: statusShort,
+ repo_clean_verified: statusShort === '',
+ materialized_at: new Date().toISOString()
+ };
+}
+
+function readableGitCheckout(path) {
+ if (!existsSync(path)) return false;
+ try {
+ readFileSync(join(path, '.git', 'HEAD'), 'utf8');
+ return true;
+ } catch {
+ return gitMaybe(path, ['rev-parse', '--git-dir']) !== null;
+ }
+}
+
+function materializeCheckouts(args) {
+ if (!args.payloads) throw new Error('--materialize-checkouts requires --payloads ');
+ const payloadPath = resolve(args.payloads);
+ const payload = JSON.parse(readFileSync(payloadPath, 'utf8'));
+ const maxTasks = Number.isInteger(args.maxTasks) && args.maxTasks > 0 ? args.maxTasks : Infinity;
+ let attempted = 0;
+ const tasks = [];
+ for (const task of payload.tasks ?? []) {
+ if (attempted >= maxTasks) {
+ tasks.push(task);
+ continue;
+ }
+ tasks.push(cloneCheckout(task));
+ attempted += 1;
+ }
+ const updated = withPayloadHash({
+ ...payload,
+ tasks,
+ updated_at: new Date().toISOString()
+ });
+ writeJson(payloadPath, updated);
+ console.log(`materialized ${attempted} checkout(s) in ${payloadPath}`);
+}
+
+function writeGoldInput(rows, args) {
+ if (!args.out) throw new Error('--write-gold requires --out ');
+ if (!args.taskId) throw new Error('--write-gold requires --task-id ');
+ const manifest = JSON.parse(readFileSync(resolve(args.manifest), 'utf8'));
+ const payloads = args.payloads
+ ? JSON.parse(readFileSync(resolve(args.payloads), 'utf8'))
+ : { tasks: [] };
+ const payloadById = new Map((payloads.tasks ?? []).map((task) => [task.instance_id, task]));
+ const rowById = new Map(rows.map((row) => [row.instance_id, row]));
+ const task = (manifest.tasks ?? []).find((candidate) => candidate.instance_id === args.taskId);
+ if (!task) throw new Error(`task ${args.taskId} is not present in manifest ${args.manifest}`);
+ const row = rowById.get(task.instance_id);
+ if (!row) throw new Error(`task ${task.instance_id} is not present in dataset rows`);
+ const goldHash = sha256(canonicalize(row.gold_context));
+ if (goldHash !== task.gold_context_hash)
+ throw new Error(`task ${task.instance_id} gold_context_hash mismatch`);
+ const payload = payloadById.get(task.instance_id);
+ const repoUrl = isVerifiedCheckoutPayload(payload, task)
+ ? payload.repo_checkout_path
+ : task.repo_url;
+ const goldInput = {
+ inst_id: task.instance_id,
+ original_inst_id: task.original_inst_id,
+ repo_url: repoUrl,
+ commit: task.base_commit,
+ gold_ctx: JSON.parse(row.gold_context),
+ patch: row.patch
+ };
+ writeJson(resolve(args.out), goldInput);
+ writeJson(`${resolve(args.out)}.summary.json`, {
+ claimBearing: false,
+ scorerOnly: true,
+ lane_outputs_observed: false,
+ task_id: task.instance_id,
+ original_repo_url: task.repo_url,
+ scorer_repo_url: repoUrl,
+ commit: task.base_commit,
+ gold_context_hash: goldHash,
+ gold_context_hash_verified: true,
+ payload_hash: payloads.payload_hash ?? null
+ });
+ console.log(`wrote scorer-only gold input ${resolve(args.out)}`);
+}
+
+function isVerifiedCheckoutPayload(payload, task) {
+ return (
+ payload?.repo_checkout_status === 'verified' &&
+ typeof payload.repo_checkout_path === 'string' &&
+ payload.repo_checkout_path.length > 0 &&
+ payload.repo_actual_head === task.base_commit &&
+ payload.base_commit_verified === true &&
+ payload.repo_clean_verified === true
+ );
+}
+
+async function main() {
+ const args = parseArgs(process.argv.slice(2));
+ if (args.help || process.argv.length <= 2) {
+ help();
+ return;
+ }
+ if (args.probeEvaluator) {
+ if (!args.out) throw new Error('--probe-evaluator requires --out ');
+ probeEvaluator(resolve(args.out));
+ return;
+ }
+
+ if (args.materializeCheckouts) {
+ materializeCheckouts(args);
+ return;
+ }
+
+ if (args.check && !args.rowsFile) {
+ const manifest = JSON.parse(readFileSync(resolve(args.check), 'utf8'));
+ const failures = verifyManifest(manifest);
+ if (failures.length > 0) throw new Error(`manifest check failed:\n- ${failures.join('\n- ')}`);
+ console.log(`manifest self-check passed: ${args.check}`);
+ return;
+ }
+
+ const rows = await loadRowsForArgs(args);
+
+ if (args.writeGold) {
+ writeGoldInput(rows, args);
+ return;
+ }
+
+ if (args.writeTaskPayloads) {
+ if (!args.out) throw new Error('--write-task-payloads requires --out ');
+ const manifest = JSON.parse(readFileSync(resolve(args.manifest), 'utf8'));
+ const payloads = buildTaskPayloads(rows, manifest, args.checkoutRoot);
+ writeJson(resolve(args.out), payloads);
+ console.log(`wrote task payloads ${resolve(args.out)}`);
+ return;
+ }
+
+ const artifacts = buildArtifacts(rows);
+
+ if (args.dryRun) {
+ if (!args.out) throw new Error('--dry-run requires --out ');
+ const outDir = resolve(args.out);
+ writeJson(join(outDir, 'contextbench-selection-exclusions.json'), artifacts.exclusions);
+ writeJson(join(outDir, 'contextbench-dry-run-summary.json'), {
+ dataset: DATASET,
+ datasetConfig: DATASET_CONFIG,
+ row_count: rows.length,
+ eligible_row_count: artifacts.eligible.length,
+ selected_preview: artifacts.manifest.tasks.map((task) => task.instance_id),
+ task_pool_hash: artifacts.manifest.task_pool_hash,
+ hardness_signal_status: HARDNESS_STATUS,
+ hardness_proxy_used: false
+ });
+ console.log(`dry-run wrote ${outDir}`);
+ }
+
+ if (args.writeFixtures) {
+ writeJson('tests/fixtures/contextbench-task-manifest.json', artifacts.manifest);
+ writeJson('tests/fixtures/contextbench-selection-exclusions.json', artifacts.exclusions);
+ console.log('wrote tests/fixtures/contextbench-task-manifest.json');
+ console.log('wrote tests/fixtures/contextbench-selection-exclusions.json');
+ }
+
+ if (args.check) {
+ const manifest = JSON.parse(readFileSync(resolve(args.check), 'utf8'));
+ const failures = verifyManifest(manifest, artifacts.manifest);
+ if (failures.length > 0) throw new Error(`manifest check failed:\n- ${failures.join('\n- ')}`);
+ console.log(`manifest check passed: ${args.check}`);
+ }
+}
+
+main().catch((error) => {
+ console.error(error instanceof Error ? error.message : String(error));
+ process.exitCode = 1;
+});
diff --git a/tests/contextbench-protocol.test.ts b/tests/contextbench-protocol.test.ts
new file mode 100644
index 0000000..af500c4
--- /dev/null
+++ b/tests/contextbench-protocol.test.ts
@@ -0,0 +1,496 @@
+import { describe, expect, it } from 'vitest';
+import protocolFixture from './fixtures/contextbench-benchmark-protocol.json';
+import correctionsFixture from './fixtures/contextbench-corrections.json';
+import lanesFixture from './fixtures/contextbench-lanes.json';
+
+type ProtocolFixture = {
+ name: string;
+ protocolVersion: string;
+ status: string;
+ claimAllowed: boolean;
+ phaseBoundary: {
+ phase36Freezes: string[];
+ phase37Freezes: string[];
+ phase36MustNotFreeze: string[];
+ };
+ benchmarkTarget: {
+ primary: string;
+ datasetConfig: string;
+ officialEvaluatorFirst: boolean;
+ fallbackScorerPolicy: {
+ claimBearing: boolean;
+ requiresValidationAgainstOfficialOutputs: boolean;
+ };
+ };
+ taskSlicePolicy: {
+ sliceKind: string;
+ taskCount: { min: number; max: number };
+ selectedInPhase: number;
+ phase36SelectionSchemaOnly: boolean;
+ requiredManifestFields: string[];
+ selectionMethodRequiredFields: string[];
+ coverageConstraints: {
+ minRepos: number;
+ minLanguages: number;
+ selectionBeforeOutputs: boolean;
+ };
+ hardnessSignalPolicy: {
+ required: boolean;
+ status: string;
+ proxyAllowed: boolean;
+ selectionMustRecordAbsence: boolean;
+ };
+ forbiddenSources: string[];
+ };
+ smokeOnlyCorpora: Array<{ name: string; claimBearing: boolean }>;
+ runPolicy: {
+ smokeRunsPerTaskLane: number;
+ claimBearingRunsPerTaskLane: number;
+ fewerThanClaimRunsMeans: string;
+ bestOfNReportingAllowed: boolean;
+ };
+ minimalRunnerBehavior: { mustNotScript: string[] };
+ structuredAnswerSchema: { requiredFields: string[]; invalidSchemaStatus: string };
+ trajectorySchema: { requiredFields: string[]; rawTracePreservationRequired: boolean };
+ metrics: {
+ primary: string[];
+ secondary: string[];
+ efficiencyIsSecondary: boolean;
+ tokenSavingsWinRequiresCorrectnessNonRegression: boolean;
+ };
+ factRecallJudgeScope: {
+ allowedOnlyFor: string[];
+ forbiddenFor: string[];
+ uncertainCountsAsSuccess: boolean;
+ };
+ budgets: {
+ sameModelAcrossLanes: boolean;
+ setupAndIndexingReportedSeparately: boolean;
+ defaults: Record;
+ };
+ thresholds: {
+ claimBearingRunsPerTaskLane: number;
+ setupFailuresBlockBroadClaims: boolean;
+ thresholdChangesRequireCorrection: boolean;
+ wedgeWinRequires: string[];
+ };
+ failureTaxonomy: string[];
+ runManifestSchema: {
+ appendOnly: boolean;
+ claimRunsRequireSlotsForEveryTaskLaneRepeat: boolean;
+ requiredFields: string[];
+ terminalStatuses: string[];
+ failedRunsIncludedInAggregates: boolean;
+ };
+ protocolFingerprint: { required: boolean; covers: string[] };
+ architectureReviewRule: {
+ requiredBeforePostBaselineProductChanges: boolean;
+ mustRejectTaskSpecificHeuristics: boolean;
+ requiresFrozenRerun: boolean;
+ };
+ postBaselineCycleGate: {
+ maxImprovementCyclesBeforeDecision: number;
+ allowedDecisions: string[];
+ noDecisionMeans: string;
+ };
+ tripwires: string[];
+ blockedClaims: string[];
+};
+
+type CorrectionsFixture = {
+ protocolVersion: string;
+ corrections: Array>;
+ policy: {
+ silentChangesAllowed: boolean;
+ allowedReasonCategories: string[];
+ requiresProtocolVersionBumpFor: string[];
+ requiredCorrectionFields: string[];
+ forbiddenReasons: string[];
+ anyFixtureChangeRequiresCorrection: boolean;
+ comparisonAcrossVersionsRequiresFullRerun: boolean;
+ };
+};
+
+type Lane = {
+ laneId: string;
+ phase36Status: string;
+ contextTool: string;
+ allowedTools: string[];
+ disallowedTools: string[];
+ nativeToolsAllowed: boolean;
+ setupCostReportedSeparately: boolean;
+ indexCostReportedSeparately: boolean;
+ cacheIsolationRequired: boolean;
+};
+
+type LanesFixture = {
+ protocolVersion: string;
+ initialExternalGate: string[];
+ broadClaimLaneSet: string[];
+ broadClaimsRequireAllLanesComplete: boolean;
+ setupFailedRequiredLaneBlocksBroadClaims: boolean;
+ lanes: Lane[];
+ setupFailureSemantics: {
+ status: string;
+ winEligible: boolean;
+ claimContribution: string;
+ includedInPublicationRows: boolean;
+ blocksBroadClaimsForRequiredLane: boolean;
+ requiresReproductionCommand: boolean;
+ requiresLogs: boolean;
+ };
+ laneContaminationRules: Record;
+ laneToolCardRequiredFields: string[];
+};
+
+const protocol = protocolFixture as ProtocolFixture;
+const corrections = correctionsFixture as CorrectionsFixture;
+const lanes = lanesFixture as LanesFixture;
+
+const requiredFailureStatuses = [
+ 'setup_failed',
+ 'task_setup_failed',
+ 'index_failed',
+ 'timeout',
+ 'invalid_schema',
+ 'no_answer',
+ 'wrong_answer',
+ 'wrong_evidence',
+ 'unsupported_claim',
+ 'false_ready',
+ 'tool_error',
+ 'judge_failed'
+];
+
+describe('ContextBench benchmark protocol invariants', () => {
+ it('keeps Phase 36 schema-only and leaves actual task identity freeze to Phase 37', () => {
+ expect(protocol.benchmarkTarget.primary).toBe('ContextBench');
+ expect(protocol.benchmarkTarget.datasetConfig).toBe('contextbench_verified');
+ expect(protocol.taskSlicePolicy.phase36SelectionSchemaOnly).toBe(true);
+ expect(protocol.taskSlicePolicy.selectedInPhase).toBe(37);
+ expect(protocol.taskSlicePolicy.taskCount).toEqual({ min: 20, max: 50 });
+ expect(protocol.phaseBoundary.phase36MustNotFreeze).toContain('actual_task_ids');
+ expect(protocol.phaseBoundary.phase36MustNotFreeze).toContain('actual_repo_commits');
+ expect(protocol.phaseBoundary.phase37Freezes).toContain('actual_contextbench_instance_ids');
+ expect(protocol.taskSlicePolicy.requiredManifestFields).toEqual(
+ expect.arrayContaining([
+ 'instance_id',
+ 'repo_url',
+ 'base_commit',
+ 'problem_statement_hash',
+ 'gold_context_hash',
+ 'patch_hash',
+ 'test_patch_hash'
+ ])
+ );
+ expect(protocol.taskSlicePolicy.selectionMethodRequiredFields).toEqual(
+ expect.arrayContaining([
+ 'selection_algorithm',
+ 'task_pool_hash',
+ 'selection_timestamp',
+ 'inclusion_rationale',
+ 'exclusion_log_path',
+ 'no_lane_outputs_observed_attestation'
+ ])
+ );
+ expect(protocol.taskSlicePolicy.forbiddenSources).toEqual(
+ expect.arrayContaining([
+ 'agent_outputs',
+ 'codebase_context_outputs',
+ 'competitor_outputs',
+ 'post_failure_task_filtering'
+ ])
+ );
+ expect(protocol.taskSlicePolicy.coverageConstraints.minRepos).toBeGreaterThanOrEqual(2);
+ expect(protocol.taskSlicePolicy.coverageConstraints.minLanguages).toBeGreaterThanOrEqual(2);
+ expect(protocol.taskSlicePolicy.coverageConstraints.selectionBeforeOutputs).toBe(true);
+ expect(protocol.taskSlicePolicy.hardnessSignalPolicy).toEqual({
+ required: false,
+ status: 'unavailable_in_contextbench_verified_schema',
+ proxyAllowed: false,
+ selectionMustRecordAbsence: true
+ });
+ });
+
+ it('records unavailable hardness as a schema fact and forbids proxy scoring', () => {
+ expect(protocol.taskSlicePolicy.hardnessSignalPolicy.required).toBe(false);
+ expect(protocol.taskSlicePolicy.hardnessSignalPolicy.status).toBe(
+ 'unavailable_in_contextbench_verified_schema'
+ );
+ expect(protocol.taskSlicePolicy.hardnessSignalPolicy.proxyAllowed).toBe(false);
+ expect(protocol.taskSlicePolicy.hardnessSignalPolicy.selectionMustRecordAbsence).toBe(true);
+ expect(JSON.stringify(protocol)).not.toContain('mustIncludeHardTasks');
+ });
+
+ it('freezes smoke and claim-bearing run-count policy', () => {
+ expect(protocol.runPolicy.smokeRunsPerTaskLane).toBe(1);
+ expect(protocol.runPolicy.claimBearingRunsPerTaskLane).toBe(3);
+ expect(protocol.runPolicy.fewerThanClaimRunsMeans).toBe('diagnostic_only_claim_allowed_false');
+ expect(protocol.runPolicy.bestOfNReportingAllowed).toBe(false);
+ expect(protocol.thresholds.claimBearingRunsPerTaskLane).toBe(
+ protocol.runPolicy.claimBearingRunsPerTaskLane
+ );
+ });
+
+ it('keeps smoke corpora non-claim-bearing and blocks public claims before evidence', () => {
+ expect(protocol.claimAllowed).toBe(false);
+ expect(protocol.smokeOnlyCorpora).toEqual(
+ expect.arrayContaining([
+ expect.objectContaining({ name: 'Excalidraw', claimBearing: false }),
+ expect.objectContaining({ name: 'FastAPI', claimBearing: false })
+ ])
+ );
+ expect(protocol.blockedClaims).toEqual(
+ expect.arrayContaining([
+ 'codebase_context_beats_competitors',
+ 'codebase_context_improves_patch_correctness',
+ 'focus_mode_improves_agent_outcomes',
+ 'token_savings_superiority',
+ 'setup_failed_competitor_is_loss'
+ ])
+ );
+ });
+
+ it('uses official ContextBench scoring first and constrains fallback scorer claims', () => {
+ expect(protocol.benchmarkTarget.officialEvaluatorFirst).toBe(true);
+ expect(protocol.benchmarkTarget.fallbackScorerPolicy.claimBearing).toBe(false);
+ expect(
+ protocol.benchmarkTarget.fallbackScorerPolicy.requiresValidationAgainstOfficialOutputs
+ ).toBe(true);
+ expect(protocol.tripwires).toContain(
+ 'official_evaluator_bypassed_without_documented_incompatibility'
+ );
+ });
+
+ it('freezes runner boundaries, structured answers, budgets, and judge scope', () => {
+ expect(protocol.minimalRunnerBehavior.mustNotScript).toEqual(
+ expect.arrayContaining([
+ 'agent_decisions',
+ 'file_selection',
+ 'query_rewrites',
+ 'evidence_selection'
+ ])
+ );
+ expect(protocol.structuredAnswerSchema.requiredFields).toEqual(
+ expect.arrayContaining([
+ 'answer',
+ 'confidence',
+ 'evidence',
+ 'filesReferenced',
+ 'unsupportedClaims',
+ 'readyToEdit'
+ ])
+ );
+ expect(protocol.structuredAnswerSchema.invalidSchemaStatus).toBe('invalid_schema');
+ expect(protocol.trajectorySchema.requiredFields).toEqual([
+ 'pred_steps',
+ 'pred_files',
+ 'pred_spans'
+ ]);
+ expect(protocol.trajectorySchema.rawTracePreservationRequired).toBe(true);
+ expect(protocol.budgets.sameModelAcrossLanes).toBe(true);
+ expect(protocol.budgets.setupAndIndexingReportedSeparately).toBe(true);
+ expect(protocol.budgets.defaults.maxContextTokens).toBeGreaterThan(0);
+ expect(protocol.factRecallJudgeScope.forbiddenFor).toContain('broad_rubric_vibes');
+ expect(protocol.factRecallJudgeScope.uncertainCountsAsSuccess).toBe(false);
+ });
+
+ it('prioritizes correctness metrics over token efficiency', () => {
+ expect(protocol.metrics.primary).toEqual(
+ expect.arrayContaining([
+ 'context_file_recall',
+ 'context_file_precision',
+ 'context_symbol_recall',
+ 'context_symbol_precision',
+ 'context_span_recall',
+ 'context_span_precision',
+ 'edit_location_recall',
+ 'edit_location_precision'
+ ])
+ );
+ expect(protocol.metrics.efficiencyIsSecondary).toBe(true);
+ expect(protocol.metrics.tokenSavingsWinRequiresCorrectnessNonRegression).toBe(true);
+ expect(protocol.thresholds.wedgeWinRequires).toEqual(
+ expect.arrayContaining(['no_correctness_regression', 'false_ready_rate_not_worse'])
+ );
+ });
+
+ it('keeps the full failure taxonomy visible in terminal run statuses', () => {
+ expect(protocol.failureTaxonomy).toEqual(requiredFailureStatuses);
+ expect(protocol.runManifestSchema.terminalStatuses).toEqual(
+ expect.arrayContaining(requiredFailureStatuses)
+ );
+ expect(protocol.runManifestSchema.appendOnly).toBe(true);
+ expect(protocol.runManifestSchema.claimRunsRequireSlotsForEveryTaskLaneRepeat).toBe(true);
+ expect(protocol.runManifestSchema.failedRunsIncludedInAggregates).toBe(true);
+ expect(protocol.runManifestSchema.requiredFields).toEqual(
+ expect.arrayContaining([
+ 'protocol_hash',
+ 'task_manifest_hash',
+ 'raw_trace_path',
+ 'score_path'
+ ])
+ );
+ });
+
+ it('requires protocol fingerprinting and correction-backed governance changes', () => {
+ expect(protocol.protocolFingerprint.required).toBe(true);
+ expect(protocol.protocolFingerprint.covers).toEqual(
+ expect.arrayContaining([
+ 'protocol_fixture',
+ 'lane_fixture',
+ 'correction_fixture',
+ 'task_manifest_after_phase37'
+ ])
+ );
+ expect(corrections.policy.silentChangesAllowed).toBe(false);
+ expect(corrections.policy.anyFixtureChangeRequiresCorrection).toBe(true);
+ expect(corrections.policy.comparisonAcrossVersionsRequiresFullRerun).toBe(true);
+ expect(corrections.policy.requiresProtocolVersionBumpFor).toEqual(
+ expect.arrayContaining([
+ 'task_ids',
+ 'repo_commits',
+ 'qrels',
+ 'thresholds',
+ 'metrics',
+ 'failure_taxonomy',
+ 'terminal_statuses',
+ 'blocked_claims',
+ 'lane_sets',
+ 'setup_failure_semantics',
+ 'correction_policy'
+ ])
+ );
+ expect(corrections.policy.requiredCorrectionFields).toEqual(
+ expect.arrayContaining([
+ 'correction_id',
+ 'reason_category',
+ 'prior_hash',
+ 'new_hash',
+ 'protocol_version_before',
+ 'protocol_version_after'
+ ])
+ );
+ for (const correction of corrections.corrections) {
+ for (const field of corrections.policy.requiredCorrectionFields) {
+ expect(correction[field]).toBeTruthy();
+ }
+ expect(corrections.policy.allowedReasonCategories).toContain(correction.reason_category);
+ expect(corrections.policy.forbiddenReasons).not.toContain(correction.reason_category);
+ }
+ expect(corrections.corrections).toEqual(
+ expect.arrayContaining([
+ expect.objectContaining({
+ correction_id: 'contextbench-hardness-signal-policy-2026-04-27',
+ reason_category: 'factual_erratum',
+ affected_fields: expect.arrayContaining([
+ 'taskSlicePolicy.coverageConstraints.mustIncludeHardTasks',
+ 'taskSlicePolicy.hardnessSignalPolicy'
+ ]),
+ prior_hash: expect.stringMatching(/^sha256:[a-f0-9]{64}$/),
+ new_hash: expect.stringMatching(/^sha256:[a-f0-9]{64}$/),
+ protocol_version_before: 'contextbench-protocol-v1',
+ protocol_version_after: 'contextbench-protocol-v1'
+ })
+ ])
+ );
+ });
+
+ it('requires architecture review and one-cycle continue/pivot/kill governance', () => {
+ expect(protocol.architectureReviewRule.requiredBeforePostBaselineProductChanges).toBe(true);
+ expect(protocol.architectureReviewRule.mustRejectTaskSpecificHeuristics).toBe(true);
+ expect(protocol.architectureReviewRule.requiresFrozenRerun).toBe(true);
+ expect(protocol.postBaselineCycleGate.maxImprovementCyclesBeforeDecision).toBe(1);
+ expect(protocol.postBaselineCycleGate.allowedDecisions).toEqual(['continue', 'pivot', 'kill']);
+ expect(protocol.postBaselineCycleGate.noDecisionMeans).toBe('stop_no_more_product_work');
+ });
+
+ it('freezes anti-gaming tripwires for output-aware edits and run manipulation', () => {
+ expect(protocol.tripwires).toEqual(
+ expect.arrayContaining([
+ 'fixture_or_qrel_changed_after_outputs',
+ 'threshold_moved_after_failures',
+ 'setup_failed_treated_as_win',
+ 'smoke_task_used_as_claim',
+ 'mixed_context_tools_in_one_lane',
+ 'product_change_before_baseline',
+ 'benchmark_repo_name_or_task_phrase_heuristic_added',
+ 'failed_run_removed_from_denominator',
+ 'best_of_n_reported_as_primary',
+ 'official_evaluator_bypassed_without_documented_incompatibility'
+ ])
+ );
+ });
+});
+
+describe('ContextBench lane governance invariants', () => {
+ it('preserves initial gate lanes and full broad-claim lane set', () => {
+ expect(lanes.initialExternalGate).toEqual([
+ 'raw-native',
+ 'codebase-context',
+ 'jcodemunch-repomapper'
+ ]);
+ expect(lanes.broadClaimLaneSet).toEqual([
+ 'raw-native',
+ 'codebase-context',
+ 'jcodemunch-repomapper',
+ 'grepai',
+ 'codebase-memory-mcp',
+ 'codegraphcontext'
+ ]);
+ expect(lanes.broadClaimsRequireAllLanesComplete).toBe(true);
+ expect(lanes.setupFailedRequiredLaneBlocksBroadClaims).toBe(true);
+ });
+
+ it('enforces exactly one context tool per lane and blocks native shell leakage', () => {
+ for (const lane of lanes.lanes) {
+ expect(lane.disallowedTools).not.toContain(lane.contextTool);
+ expect(lane.setupCostReportedSeparately).toBe(true);
+ expect(lane.indexCostReportedSeparately).toBe(true);
+ expect(lane.cacheIsolationRequired).toBe(true);
+ if (lane.laneId === 'raw-native') {
+ expect(lane.nativeToolsAllowed).toBe(true);
+ expect(lane.allowedTools).toEqual(
+ expect.arrayContaining(['native-read', 'native-search', 'native-shell-readonly'])
+ );
+ } else {
+ expect(lane.nativeToolsAllowed).toBe(false);
+ expect(lane.allowedTools).toEqual([lane.contextTool]);
+ expect(lane.disallowedTools).toEqual(
+ expect.arrayContaining(['native-read', 'native-search', 'native-shell-readonly'])
+ );
+ }
+ }
+ expect(lanes.laneContaminationRules.oneContextToolPerLane).toBe(true);
+ expect(lanes.laneContaminationRules.mixedLaneContextInvalidatesRun).toBe(true);
+ expect(lanes.laneContaminationRules.memoryStateMustBeIsolated).toBe(true);
+ });
+
+ it('treats setup failures as missing evidence instead of wins', () => {
+ expect(lanes.setupFailureSemantics).toMatchObject({
+ status: 'setup_failed',
+ winEligible: false,
+ claimContribution: 'missing_evidence',
+ includedInPublicationRows: true,
+ blocksBroadClaimsForRequiredLane: true,
+ requiresReproductionCommand: true,
+ requiresLogs: true
+ });
+ });
+
+ it('requires lane tool cards to make setup, index, version, cache, and artifact paths explicit', () => {
+ expect(lanes.laneToolCardRequiredFields).toEqual(
+ expect.arrayContaining([
+ 'laneId',
+ 'allowedTools',
+ 'disallowedTools',
+ 'setupCommand',
+ 'indexCommand',
+ 'queryCommand',
+ 'versionCommand',
+ 'cachePath',
+ 'artifactPaths'
+ ])
+ );
+ });
+});
diff --git a/tests/contextbench-task-manifest.test.ts b/tests/contextbench-task-manifest.test.ts
new file mode 100644
index 0000000..c8fcf00
--- /dev/null
+++ b/tests/contextbench-task-manifest.test.ts
@@ -0,0 +1,878 @@
+import { createHash } from 'node:crypto';
+import { execFileSync } from 'node:child_process';
+import { mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs';
+import { tmpdir } from 'node:os';
+import path from 'node:path';
+import { describe, expect, it } from 'vitest';
+import correctionsFixture from './fixtures/contextbench-corrections.json';
+import lanesFixture from './fixtures/contextbench-lanes.json';
+import manifestFixture from './fixtures/contextbench-task-manifest.json';
+import protocolFixture from './fixtures/contextbench-benchmark-protocol.json';
+import selectionExclusionsFixture from './fixtures/contextbench-selection-exclusions.json';
+import smokePackFixture from './fixtures/contextbench-smoke-pack.json';
+
+type ContextBenchTask = {
+ instance_id: string;
+ original_inst_id: string;
+ source: string;
+ language: string;
+ repo: string;
+ repo_url: string;
+ base_commit: string;
+ problem_statement_ref: string;
+ problem_statement_hash: string;
+ gold_context_ref: string;
+ gold_context_hash: string;
+ patch_hash: string;
+ test_patch_hash: string;
+ f2p_hash: string;
+ p2p_hash: string;
+ gold_context_span_count: number;
+ hash_canonicalization_version: string;
+ hardness_signal_status: string;
+ hardness_signal_source: string;
+ hardness_proxy_used: boolean;
+ inclusion_rationale: string;
+ deterministic_rank: string;
+};
+
+type ContextBenchManifest = {
+ name: string;
+ protocolVersion: string;
+ dataset: string;
+ datasetConfig: string;
+ split: string;
+ claimBearing: boolean;
+ selectedInPhase: number;
+ selection_algorithm: string;
+ selection_seed_or_deterministic_order: string;
+ selection_timestamp: string;
+ task_pool_hash: string;
+ exclusion_log_path: string;
+ hash_canonicalization_version: string;
+ evaluator_success_status: string;
+ hardness_signal_status: string;
+ hardness_signal_source: string;
+ hardness_proxy_used: boolean;
+ forbidden_selection_sources: string[];
+ no_lane_outputs_observed_attestation: string;
+ summary: {
+ task_count: number;
+ language_distribution: Record;
+ source_distribution: Record;
+ repo_distribution: Record;
+ repo_count: number;
+ language_count: number;
+ };
+ tasks: ContextBenchTask[];
+ manifest_hash: string;
+};
+
+type SelectionExclusions = {
+ protocolVersion: string;
+ dataset: string;
+ datasetConfig: string;
+ selection_algorithm: string;
+ selection_seed_or_deterministic_order: string;
+ selection_timestamp: string;
+ task_pool_hash: string;
+ hash_canonicalization_version: string;
+ hardness_signal_status: string;
+ hardness_proxy_used: boolean;
+ no_lane_outputs_observed_attestation: string;
+ input_row_count: number;
+ eligible_row_count: number;
+ selected_row_count: number;
+ excluded_rows: Array>;
+ non_selected_eligible_rows: Array<{
+ instance_id: string;
+ reason: string;
+ deterministic_rank: string;
+ }>;
+};
+
+type ProtocolFixture = {
+ claimAllowed: boolean;
+ phaseBoundary: { phase36MustNotFreeze: string[]; phase37Freezes: string[] };
+ taskSlicePolicy: {
+ coverageConstraints: {
+ minRepos: number;
+ minLanguages: number;
+ selectionBeforeOutputs: boolean;
+ };
+ hardnessSignalPolicy: {
+ required: boolean;
+ status: string;
+ proxyAllowed: boolean;
+ selectionMustRecordAbsence: boolean;
+ };
+ forbiddenSources: string[];
+ };
+ smokeOnlyCorpora: Array<{ name: string; claimBearing: boolean; purpose: string }>;
+ blockedClaims: string[];
+};
+
+type CorrectionsFixture = {
+ corrections: Array<{ correction_id: string; reason_category: string; affected_fields: string[] }>;
+ policy: { allowedReasonCategories: string[]; forbiddenReasons: string[] };
+};
+
+type LanesFixture = {
+ laneContaminationRules: {
+ oneContextToolPerLane: boolean;
+ mixedLaneContextInvalidatesRun: boolean;
+ };
+ lanes: Array<{
+ laneId: string;
+ contextTool: string;
+ allowedTools: string[];
+ disallowedTools: string[];
+ }>;
+};
+
+type SmokePack = {
+ claimBearing: boolean;
+ purpose: string;
+ executionStatus: string;
+ mustNotContributeTo: string[];
+ corpora: Array<{
+ name: string;
+ claimBearing: boolean;
+ purpose: string;
+ phase37RunnableTasks: boolean;
+ }>;
+};
+
+const manifest = manifestFixture as ContextBenchManifest;
+const exclusions = selectionExclusionsFixture as SelectionExclusions;
+const protocol = protocolFixture as ProtocolFixture;
+const corrections = correctionsFixture as CorrectionsFixture;
+const lanes = lanesFixture as LanesFixture;
+const smokePack = smokePackFixture as SmokePack;
+
+const shaPattern = /^sha256:[a-f0-9]{64}$/;
+const canonicalizationVersion = 'contextbench-canonical-json-lf-v1';
+const hardnessStatus = 'unavailable_in_contextbench_verified_schema';
+const childGitEnv = (() => {
+ const env = { ...process.env };
+ for (const key of Object.keys(env)) {
+ if (key.startsWith('GIT_')) delete env[key];
+ }
+ return env;
+})();
+
+function poisonedGitEnv(): NodeJS.ProcessEnv {
+ return {
+ ...childGitEnv,
+ GIT_DIR: path.join(tmpdir(), 'contextbench-poisoned-git-dir'),
+ GIT_WORK_TREE: path.join(tmpdir(), 'contextbench-poisoned-work-tree'),
+ GIT_INDEX_FILE: path.join(tmpdir(), 'contextbench-poisoned-index'),
+ GIT_CONFIG_GLOBAL: path.join(tmpdir(), 'contextbench-poisoned-gitconfig'),
+ GIT_SSH_COMMAND: 'false'
+ };
+}
+
+function stableStringify(value: unknown): string {
+ if (value === null || typeof value !== 'object') return JSON.stringify(value);
+ if (Array.isArray(value)) return `[${value.map((item) => stableStringify(item)).join(',')}]`;
+ const entries = Object.entries(value as Record).sort(([a], [b]) =>
+ a.localeCompare(b)
+ );
+ return `{${entries.map(([key, item]) => `${JSON.stringify(key)}:${stableStringify(item)}`).join(',')}}`;
+}
+
+function hashObject(value: unknown): string {
+ return `sha256:${createHash('sha256').update(stableStringify(value), 'utf8').digest('hex')}`;
+}
+
+function sha256Text(value: string): string {
+ return `sha256:${createHash('sha256').update(value, 'utf8').digest('hex')}`;
+}
+
+describe('ContextBench Phase 37 task manifest', () => {
+ it('freezes exactly 20 claim-bearing ContextBench tasks with required metadata', () => {
+ expect(manifest.dataset).toBe('Contextbench/ContextBench');
+ expect(manifest.datasetConfig).toBe('contextbench_verified');
+ expect(manifest.claimBearing).toBe(true);
+ expect(manifest.selectedInPhase).toBe(37);
+ expect(manifest.tasks).toHaveLength(20);
+ expect(new Set(manifest.tasks.map((task) => task.instance_id)).size).toBe(20);
+ expect(manifest.selection_algorithm).toBe('deterministic_seeded_coverage_then_rank_fill_v1');
+ expect(manifest.selection_seed_or_deterministic_order).toBe(
+ 'phase37-contextbench-v1-2026-04-27'
+ );
+ expect(manifest.selection_timestamp).toBeTruthy();
+ expect(manifest.task_pool_hash).toMatch(shaPattern);
+ expect(manifest.exclusion_log_path).toBe(
+ 'tests/fixtures/contextbench-selection-exclusions.json'
+ );
+ expect(manifest.evaluator_success_status).toBe('passed_synthetic_official_evaluator_probe');
+ });
+
+ it('meets repo, language, and source coverage without proxy hardness', () => {
+ expect(manifest.summary.repo_count).toBeGreaterThanOrEqual(
+ protocol.taskSlicePolicy.coverageConstraints.minRepos
+ );
+ expect(manifest.summary.language_count).toBeGreaterThanOrEqual(
+ protocol.taskSlicePolicy.coverageConstraints.minLanguages
+ );
+ expect(Object.keys(manifest.summary.source_distribution).length).toBeGreaterThanOrEqual(2);
+ expect(manifest.hardness_signal_status).toBe(hardnessStatus);
+ expect(manifest.hardness_signal_source).toBe('dataset_schema_probe');
+ expect(manifest.hardness_proxy_used).toBe(false);
+ expect(protocol.taskSlicePolicy.hardnessSignalPolicy).toMatchObject({
+ required: false,
+ status: hardnessStatus,
+ proxyAllowed: false,
+ selectionMustRecordAbsence: true
+ });
+ });
+
+ it('records stable identity, source, repo pin, and hash fields for every task', () => {
+ for (const task of manifest.tasks) {
+ expect(task.instance_id).toBeTruthy();
+ expect(task.original_inst_id).toBeTruthy();
+ expect(task.source).toBeTruthy();
+ expect(task.language).toBeTruthy();
+ expect(task.repo_url).toMatch(/^https:\/\/github\.com\/.+\.git$/);
+ expect(task.base_commit).toMatch(/^[a-f0-9]{40}$/);
+ expect(task.problem_statement_ref).toBe('dataset_field:problem_statement');
+ expect(task.gold_context_ref).toBe('dataset_field:gold_context');
+ expect(task.problem_statement_hash).toMatch(shaPattern);
+ expect(task.gold_context_hash).toMatch(shaPattern);
+ expect(task.patch_hash).toMatch(shaPattern);
+ expect(task.test_patch_hash).toMatch(shaPattern);
+ expect(task.f2p_hash).toMatch(shaPattern);
+ expect(task.p2p_hash).toMatch(shaPattern);
+ expect(task.gold_context_span_count).toBeGreaterThan(0);
+ expect(task.hash_canonicalization_version).toBe(canonicalizationVersion);
+ expect(task.hardness_proxy_used).toBe(false);
+ expect(task.deterministic_rank).toMatch(shaPattern);
+ expect(task.inclusion_rationale).toMatch(
+ /^(language_coverage|source_coverage|repo_coverage|deterministic_fill)/
+ );
+ }
+ });
+
+ it('self-verifies manifest hashing and metadata determinism', () => {
+ const withoutHash: Record = { ...manifest };
+ delete withoutHash.manifest_hash;
+ expect(manifest.manifest_hash).toBe(hashObject(withoutHash));
+ expect(manifest.hash_canonicalization_version).toBe(canonicalizationVersion);
+ expect(hashObject({ a: 1, b: ['x', 'y'] })).toBe(hashObject({ b: ['x', 'y'], a: 1 }));
+ });
+});
+
+describe('ContextBench Phase 37 exclusion log and anti-gaming guards', () => {
+ it('keeps the exclusion log aligned with the manifest and records non-selected eligible rows', () => {
+ expect(exclusions.dataset).toBe(manifest.dataset);
+ expect(exclusions.datasetConfig).toBe(manifest.datasetConfig);
+ expect(exclusions.selection_algorithm).toBe(manifest.selection_algorithm);
+ expect(exclusions.selection_seed_or_deterministic_order).toBe(
+ manifest.selection_seed_or_deterministic_order
+ );
+ expect(exclusions.selection_timestamp).toBe(manifest.selection_timestamp);
+ expect(exclusions.task_pool_hash).toBe(manifest.task_pool_hash);
+ expect(exclusions.hash_canonicalization_version).toBe(canonicalizationVersion);
+ expect(exclusions.hardness_signal_status).toBe(hardnessStatus);
+ expect(exclusions.hardness_proxy_used).toBe(false);
+ expect(exclusions.selected_row_count).toBe(20);
+ expect(exclusions.eligible_row_count).toBeGreaterThanOrEqual(20);
+ expect(exclusions.non_selected_eligible_rows.length).toBeGreaterThan(0);
+ expect(exclusions.non_selected_eligible_rows[0].reason).toBe('eligible_not_selected');
+ });
+
+ it('blocks output-aware and proxy-hardness selection sources', () => {
+ expect(manifest.forbidden_selection_sources).toEqual(
+ expect.arrayContaining([
+ 'agent_outputs',
+ 'codebase_context_outputs',
+ 'competitor_outputs',
+ 'proxy_hardness_score',
+ 'post_failure_task_filtering'
+ ])
+ );
+ expect(manifest.no_lane_outputs_observed_attestation).toContain('No raw/native');
+ expect(exclusions.no_lane_outputs_observed_attestation).toBe(
+ manifest.no_lane_outputs_observed_attestation
+ );
+ expect(protocol.taskSlicePolicy.forbiddenSources).toEqual(
+ expect.arrayContaining(['agent_outputs', 'codebase_context_outputs', 'competitor_outputs'])
+ );
+ });
+
+ it('keeps Phase 36 boundaries and correction-ledger semantics intact', () => {
+ expect(protocol.phaseBoundary.phase36MustNotFreeze).toEqual(
+ expect.arrayContaining(['actual_task_ids', 'actual_repo_commits'])
+ );
+ expect(protocol.phaseBoundary.phase37Freezes).toContain('actual_contextbench_instance_ids');
+ expect(corrections.corrections).toEqual(
+ expect.arrayContaining([
+ expect.objectContaining({
+ correction_id: 'contextbench-hardness-signal-policy-2026-04-27',
+ reason_category: 'factual_erratum',
+ affected_fields: expect.arrayContaining(['taskSlicePolicy.hardnessSignalPolicy'])
+ })
+ ])
+ );
+ expect(corrections.policy.allowedReasonCategories).toContain('factual_erratum');
+ expect(corrections.policy.forbiddenReasons).not.toContain('factual_erratum');
+ });
+
+ it('keeps lane isolation visible without running any lane', () => {
+ expect(lanes.laneContaminationRules.oneContextToolPerLane).toBe(true);
+ expect(lanes.laneContaminationRules.mixedLaneContextInvalidatesRun).toBe(true);
+ for (const lane of lanes.lanes) {
+ if (lane.laneId === 'raw-native') {
+ expect(lane.allowedTools).toEqual(expect.arrayContaining(['native-read', 'native-search']));
+ } else {
+ expect(lane.allowedTools).toContain(lane.contextTool);
+ }
+ expect(lane.disallowedTools).not.toContain(lane.contextTool);
+ }
+ });
+
+ it('keeps selector implementation wired to the mandatory anti-gaming fields', () => {
+ const script = readFileSync('scripts/contextbench-select-slice.mjs', 'utf8');
+ for (const requiredField of [
+ 'selection_timestamp',
+ 'task_pool_hash',
+ 'exclusion_log_path',
+ 'hash_canonicalization_version',
+ 'hardness_proxy_used',
+ 'no_lane_outputs_observed_attestation'
+ ]) {
+ expect(script).toContain(requiredField);
+ expect(stableStringify(manifest)).toContain(requiredField);
+ }
+ expect(script).toContain('proxy_hardness_score');
+ expect(script).toContain('post_failure_task_filtering');
+ });
+});
+
+describe('ContextBench Phase 37 smoke pack separation', () => {
+ it('keeps Excalidraw and FastAPI metadata-only and non-claim-bearing', () => {
+ expect(smokePack.claimBearing).toBe(false);
+ expect(smokePack.purpose).toBe('local_harness_smoke_only');
+ expect(smokePack.executionStatus).toBe('metadata_only_not_executed_in_phase37');
+ expect(smokePack.mustNotContributeTo).toEqual(
+ expect.arrayContaining(['contextbench_claim_bearing_aggregates', 'public_benchmark_claims'])
+ );
+ expect(smokePack.corpora).toEqual(
+ expect.arrayContaining([
+ expect.objectContaining({
+ name: 'Excalidraw',
+ claimBearing: false,
+ phase37RunnableTasks: false
+ }),
+ expect.objectContaining({
+ name: 'FastAPI',
+ claimBearing: false,
+ phase37RunnableTasks: false
+ })
+ ])
+ );
+ expect(protocol.smokeOnlyCorpora).toEqual(
+ expect.arrayContaining([
+ expect.objectContaining({ name: 'Excalidraw', claimBearing: false }),
+ expect.objectContaining({ name: 'FastAPI', claimBearing: false })
+ ])
+ );
+ });
+
+ it('does not mix smoke corpora into the claim-bearing ContextBench manifest or claims', () => {
+ const taskText = stableStringify(manifest.tasks);
+ expect(taskText).not.toContain('Excalidraw');
+ expect(taskText).not.toContain('FastAPI');
+ expect(protocol.claimAllowed).toBe(false);
+ expect(protocol.blockedClaims).toEqual(
+ expect.arrayContaining([
+ 'codebase_context_beats_competitors',
+ 'codebase_context_improves_productivity',
+ 'focus_mode_improves_agent_outcomes',
+ 'token_savings_superiority'
+ ])
+ );
+ });
+});
+
+describe('ContextBench Phase 40 task payload materialization', () => {
+ it('self-checks a frozen manifest without live dataset access', () => {
+ const tempRoot = mkdtempSync(path.join(tmpdir(), 'contextbench-check-requires-rows-'));
+ try {
+ const manifestPath = path.join(tempRoot, 'manifest.json');
+ writeFileSync(manifestPath, `${JSON.stringify(manifest, null, 2)}\n`, 'utf8');
+ const stdout = execFileSync(
+ 'node',
+ ['scripts/contextbench-select-slice.mjs', '--check', manifestPath],
+ {
+ encoding: 'utf8'
+ }
+ );
+ expect(stdout).toContain('manifest self-check passed');
+ } finally {
+ rmSync(tempRoot, { recursive: true, force: true });
+ }
+ });
+
+ it('fails manifest self-check when the frozen content hash is stale', () => {
+ const tempRoot = mkdtempSync(path.join(tmpdir(), 'contextbench-check-stale-hash-'));
+ try {
+ const manifestPath = path.join(tempRoot, 'manifest.json');
+ writeFileSync(
+ manifestPath,
+ `${JSON.stringify({ ...manifest, manifest_hash: 'sha256:stale' }, null, 2)}\n`,
+ 'utf8'
+ );
+ let stderr = '';
+ try {
+ execFileSync('node', ['scripts/contextbench-select-slice.mjs', '--check', manifestPath], {
+ encoding: 'utf8'
+ });
+ } catch (error: unknown) {
+ const failure = error as { stderr?: Buffer | string };
+ stderr = Buffer.isBuffer(failure.stderr)
+ ? failure.stderr.toString('utf8')
+ : String(failure.stderr ?? '');
+ }
+ expect(stderr).toContain('manifest_hash does not match manifest content');
+ } finally {
+ rmSync(tempRoot, { recursive: true, force: true });
+ }
+ });
+
+ it('writes selected problem statements without observing lane outputs', () => {
+ const tempRoot = mkdtempSync(path.join(tmpdir(), 'contextbench-task-payloads-'));
+ try {
+ const problemStatement = 'Fix the parser when the input contains nested groups.';
+ const task = {
+ instance_id: 'fixture-task-1',
+ original_inst_id: 'owner__repo-1',
+ repo: 'owner/repo',
+ repo_url: 'https://github.com/owner/repo.git',
+ base_commit: '1234567890abcdef1234567890abcdef12345678',
+ problem_statement_hash: sha256Text(problemStatement)
+ };
+ const manifestPath = path.join(tempRoot, 'manifest.json');
+ const rowsPath = path.join(tempRoot, 'rows.json');
+ const payloadPath = path.join(tempRoot, 'payloads.json');
+ const payloadPathB = path.join(tempRoot, 'payloads-b.json');
+ const checkoutRoot = path.join(tempRoot, 'checkouts');
+ const checkoutRootB = path.join(tempRoot, 'other-checkouts');
+ writeFileSync(
+ manifestPath,
+ `${JSON.stringify(
+ {
+ protocolVersion: 'contextbench-protocol-v1',
+ dataset: 'Contextbench/ContextBench',
+ datasetConfig: 'contextbench_verified',
+ split: 'train',
+ manifest_hash: 'sha256:test-manifest',
+ tasks: [task]
+ },
+ null,
+ 2
+ )}\n`,
+ 'utf8'
+ );
+ writeFileSync(
+ rowsPath,
+ `${JSON.stringify(
+ {
+ rows: [
+ {
+ row: {
+ instance_id: task.instance_id,
+ repo_url: task.repo_url,
+ base_commit: task.base_commit,
+ problem_statement: problemStatement
+ }
+ }
+ ]
+ },
+ null,
+ 2
+ )}\n`,
+ 'utf8'
+ );
+ execFileSync(
+ 'node',
+ [
+ 'scripts/contextbench-select-slice.mjs',
+ '--write-task-payloads',
+ '--rows-file',
+ rowsPath,
+ '--manifest',
+ manifestPath,
+ '--checkout-root',
+ checkoutRoot,
+ '--out',
+ payloadPath
+ ],
+ { encoding: 'utf8' }
+ );
+ execFileSync(
+ 'node',
+ [
+ 'scripts/contextbench-select-slice.mjs',
+ '--write-task-payloads',
+ '--rows-file',
+ rowsPath,
+ '--manifest',
+ manifestPath,
+ '--checkout-root',
+ checkoutRootB,
+ '--out',
+ payloadPathB
+ ],
+ { encoding: 'utf8' }
+ );
+
+ const payload = JSON.parse(readFileSync(payloadPath, 'utf8')) as {
+ claimBearing: boolean;
+ task_count: number;
+ payload_hash: string;
+ tasks: Array<{
+ instance_id: string;
+ problem_statement: string;
+ problem_statement_hash_verified: boolean;
+ repo_checkout_path: string;
+ repo_checkout_status: string;
+ lane_outputs_observed: boolean;
+ }>;
+ };
+ const payloadB = JSON.parse(readFileSync(payloadPathB, 'utf8')) as typeof payload;
+ expect(payload.claimBearing).toBe(false);
+ expect(payload.task_count).toBe(1);
+ expect(payload.payload_hash).toMatch(shaPattern);
+ expect(payload.payload_hash).toBe(payloadB.payload_hash);
+ expect(payload.tasks[0]).toMatchObject({
+ instance_id: task.instance_id,
+ problem_statement: problemStatement,
+ problem_statement_hash_verified: true,
+ repo_checkout_status: 'planned_not_verified',
+ lane_outputs_observed: false
+ });
+ expect(payload.tasks[0].repo_checkout_path).toContain('owner-repo-1234567890ab');
+ expect(payload.tasks[0].repo_checkout_path).not.toBe(payloadB.tasks[0].repo_checkout_path);
+ } finally {
+ rmSync(tempRoot, { recursive: true, force: true });
+ }
+ });
+
+ it('rejects invalid task payload rows before writing any task payload entry', () => {
+ const tempRoot = mkdtempSync(path.join(tmpdir(), 'contextbench-task-payloads-invalid-'));
+ try {
+ const problemStatement = 'Fix the parser when the input contains nested groups.';
+ const task = {
+ instance_id: 'fixture-task-1',
+ original_inst_id: 'owner__repo-1',
+ repo: 'owner/repo',
+ repo_url: 'https://github.com/owner/repo.git',
+ base_commit: '1234567890abcdef1234567890abcdef12345678',
+ problem_statement_hash: sha256Text(problemStatement)
+ };
+ const manifestPath = path.join(tempRoot, 'manifest.json');
+ const rowsPath = path.join(tempRoot, 'rows.json');
+ const payloadPath = path.join(tempRoot, 'payloads.json');
+ writeFileSync(
+ manifestPath,
+ `${JSON.stringify(
+ {
+ protocolVersion: 'contextbench-protocol-v1',
+ dataset: 'Contextbench/ContextBench',
+ datasetConfig: 'contextbench_verified',
+ split: 'train',
+ manifest_hash: 'sha256:test-manifest',
+ tasks: [task]
+ },
+ null,
+ 2
+ )}\n`,
+ 'utf8'
+ );
+ writeFileSync(
+ rowsPath,
+ `${JSON.stringify(
+ {
+ rows: [
+ {
+ row: {
+ instance_id: task.instance_id,
+ repo_url: task.repo_url,
+ base_commit: task.base_commit,
+ problem_statement: 'Different statement.'
+ }
+ }
+ ]
+ },
+ null,
+ 2
+ )}\n`,
+ 'utf8'
+ );
+ let stderr = '';
+ try {
+ execFileSync(
+ 'node',
+ [
+ 'scripts/contextbench-select-slice.mjs',
+ '--write-task-payloads',
+ '--rows-file',
+ rowsPath,
+ '--manifest',
+ manifestPath,
+ '--out',
+ payloadPath
+ ],
+ { encoding: 'utf8' }
+ );
+ } catch (error: unknown) {
+ const failure = error as { stderr?: Buffer | string };
+ stderr = Buffer.isBuffer(failure.stderr)
+ ? failure.stderr.toString('utf8')
+ : String(failure.stderr ?? '');
+ }
+ expect(stderr).toContain('problem_statement_hash mismatch');
+ expect(() => readFileSync(payloadPath, 'utf8')).toThrow();
+ } finally {
+ rmSync(tempRoot, { recursive: true, force: true });
+ }
+ });
+
+ it('materializes planned checkout paths and records verified base commits', () => {
+ const tempRoot = mkdtempSync(path.join(tmpdir(), 'contextbench-checkout-materializer-'));
+ try {
+ const sourceRepo = path.join(tempRoot, 'source-repo');
+ const checkoutPath = path.join(tempRoot, 'checkout-repo');
+ execFileSync('git', ['-c', 'core.autocrlf=false', 'init', sourceRepo], {
+ encoding: 'utf8',
+ env: childGitEnv
+ });
+ writeFileSync(path.join(sourceRepo, 'README.md'), 'fixture\n', 'utf8');
+ execFileSync('git', ['-c', 'core.autocrlf=false', 'add', 'README.md'], {
+ cwd: sourceRepo,
+ env: childGitEnv,
+ encoding: 'utf8'
+ });
+ execFileSync(
+ 'git',
+ [
+ '-c',
+ 'user.name=ContextBench Test',
+ '-c',
+ 'user.email=contextbench@example.invalid',
+ 'commit',
+ '-m',
+ 'fixture'
+ ],
+ { cwd: sourceRepo, encoding: 'utf8', env: childGitEnv }
+ );
+ const commit = execFileSync('git', ['rev-parse', 'HEAD'], {
+ cwd: sourceRepo,
+ env: childGitEnv,
+ encoding: 'utf8'
+ }).trim();
+ const payloadPath = path.join(tempRoot, 'payloads.json');
+ writeFileSync(
+ payloadPath,
+ `${JSON.stringify(
+ {
+ name: 'test-payloads',
+ protocolVersion: 'contextbench-protocol-v1',
+ claimBearing: false,
+ tasks: [
+ {
+ instance_id: 'fixture-task-1',
+ repo_url: sourceRepo,
+ base_commit: commit,
+ repo_checkout_path: checkoutPath,
+ repo_checkout_status: 'planned_not_verified'
+ }
+ ]
+ },
+ null,
+ 2
+ )}\n`,
+ 'utf8'
+ );
+
+ execFileSync(
+ 'node',
+ [
+ 'scripts/contextbench-select-slice.mjs',
+ '--materialize-checkouts',
+ '--payloads',
+ payloadPath,
+ '--max-tasks',
+ '1'
+ ],
+ { encoding: 'utf8' }
+ );
+
+ const payload = JSON.parse(readFileSync(payloadPath, 'utf8')) as {
+ payload_hash: string;
+ tasks: Array<{
+ repo_checkout_status: string;
+ repo_actual_head: string;
+ base_commit_verified: boolean;
+ repo_clean_verified: boolean;
+ }>;
+ };
+ expect(payload.payload_hash).toMatch(shaPattern);
+ expect(payload.tasks[0]).toMatchObject({
+ repo_checkout_status: 'verified',
+ repo_actual_head: commit,
+ base_commit_verified: true,
+ repo_clean_verified: true
+ });
+ expect(
+ execFileSync('git', ['rev-parse', 'HEAD'], {
+ cwd: checkoutPath,
+ encoding: 'utf8',
+ env: childGitEnv
+ }).trim()
+ ).toBe(commit);
+
+ const firstPayloadHash = payload.payload_hash;
+ writeFileSync(path.join(checkoutPath, 'untracked.txt'), 'stale local file\n', 'utf8');
+ execFileSync(
+ 'node',
+ [
+ 'scripts/contextbench-select-slice.mjs',
+ '--materialize-checkouts',
+ '--payloads',
+ payloadPath,
+ '--max-tasks',
+ '1'
+ ],
+ { encoding: 'utf8', env: poisonedGitEnv() }
+ );
+ const reverifiedPayload = JSON.parse(readFileSync(payloadPath, 'utf8')) as typeof payload;
+ expect(reverifiedPayload.payload_hash).toBe(firstPayloadHash);
+ expect(reverifiedPayload.tasks[0]).toMatchObject({
+ repo_checkout_status: 'verified',
+ repo_actual_head: commit,
+ base_commit_verified: true,
+ repo_clean_verified: true
+ });
+ } finally {
+ rmSync(tempRoot, { recursive: true, force: true });
+ }
+ });
+
+ it('writes scorer-only gold input without mixing it into solver payloads', () => {
+ const tempRoot = mkdtempSync(path.join(tmpdir(), 'contextbench-gold-input-'));
+ try {
+ const goldContext = JSON.stringify([
+ { file: 'src/a.ts', start_line: 1, end_line: 2, content: 'export const a = 1;' }
+ ]);
+ const task = {
+ instance_id: 'fixture-task-1',
+ original_inst_id: 'owner__repo-1',
+ repo: 'owner/repo',
+ repo_url: 'https://github.com/owner/repo.git',
+ base_commit: '1234567890abcdef1234567890abcdef12345678',
+ gold_context_hash: sha256Text(stableStringify(JSON.parse(goldContext) as unknown))
+ };
+ const manifestPath = path.join(tempRoot, 'manifest.json');
+ const rowsPath = path.join(tempRoot, 'rows.json');
+ const payloadPath = path.join(tempRoot, 'payloads.json');
+ const goldPath = path.join(tempRoot, 'gold.json');
+ const checkoutPath = path.join(tempRoot, 'checkout');
+ writeFileSync(
+ manifestPath,
+ `${JSON.stringify(
+ {
+ protocolVersion: 'contextbench-protocol-v1',
+ manifest_hash: 'sha256:test-manifest',
+ tasks: [task]
+ },
+ null,
+ 2
+ )}\n`,
+ 'utf8'
+ );
+ writeFileSync(
+ rowsPath,
+ `${JSON.stringify(
+ {
+ rows: [
+ {
+ row: {
+ instance_id: task.instance_id,
+ gold_context: goldContext,
+ patch: 'diff --git a/src/a.ts b/src/a.ts'
+ }
+ }
+ ]
+ },
+ null,
+ 2
+ )}\n`,
+ 'utf8'
+ );
+ writeFileSync(
+ payloadPath,
+ `${JSON.stringify(
+ {
+ payload_hash: 'sha256:test-payload',
+ tasks: [
+ {
+ instance_id: task.instance_id,
+ repo_checkout_path: checkoutPath,
+ repo_checkout_status: 'verified',
+ repo_actual_head: task.base_commit,
+ base_commit_verified: true,
+ repo_clean_verified: true
+ }
+ ]
+ },
+ null,
+ 2
+ )}\n`,
+ 'utf8'
+ );
+
+ execFileSync(
+ 'node',
+ [
+ 'scripts/contextbench-select-slice.mjs',
+ '--write-gold',
+ '--rows-file',
+ rowsPath,
+ '--manifest',
+ manifestPath,
+ '--payloads',
+ payloadPath,
+ '--task-id',
+ task.instance_id,
+ '--out',
+ goldPath
+ ],
+ { encoding: 'utf8' }
+ );
+
+ const gold = JSON.parse(readFileSync(goldPath, 'utf8')) as {
+ inst_id: string;
+ repo_url: string;
+ gold_ctx: unknown[];
+ };
+ const summary = JSON.parse(readFileSync(`${goldPath}.summary.json`, 'utf8')) as {
+ scorerOnly: boolean;
+ lane_outputs_observed: boolean;
+ gold_context_hash_verified: boolean;
+ };
+ expect(gold.inst_id).toBe(task.instance_id);
+ expect(gold.repo_url).toBe(checkoutPath);
+ expect(gold.gold_ctx).toHaveLength(1);
+ expect(summary).toMatchObject({
+ scorerOnly: true,
+ lane_outputs_observed: false,
+ gold_context_hash_verified: true
+ });
+ } finally {
+ rmSync(tempRoot, { recursive: true, force: true });
+ }
+ });
+});
diff --git a/tests/fixtures/contextbench-benchmark-protocol.json b/tests/fixtures/contextbench-benchmark-protocol.json
new file mode 100644
index 0000000..012f012
--- /dev/null
+++ b/tests/fixtures/contextbench-benchmark-protocol.json
@@ -0,0 +1,310 @@
+{
+ "name": "v2.4-contextbench-external-protocol",
+ "protocolVersion": "contextbench-protocol-v1",
+ "frozenDate": "2026-04-27",
+ "status": "protocol_frozen",
+ "claimAllowed": false,
+ "phaseBoundary": {
+ "phase36Freezes": [
+ "protocol_schema",
+ "lane_governance",
+ "correction_policy",
+ "claim_gates",
+ "run_manifest_schema"
+ ],
+ "phase37Freezes": [
+ "actual_contextbench_instance_ids",
+ "repo_urls",
+ "base_commits",
+ "language_distribution",
+ "problem_statement_references"
+ ],
+ "phase36MustNotFreeze": [
+ "actual_task_ids",
+ "actual_repo_commits",
+ "benchmark_outputs",
+ "runner_results"
+ ]
+ },
+ "benchmarkTarget": {
+ "primary": "ContextBench",
+ "sourceRepository": "https://github.com/EuniAI/ContextBench",
+ "datasetCandidates": ["Contextbench/ContextBench", "Schwerli/ContextBench"],
+ "datasetConfig": "contextbench_verified",
+ "officialEvaluatorFirst": true,
+ "officialEvaluatorCommand": "python -m contextbench.evaluate --gold --pred --out ",
+ "fallbackScorerPolicy": {
+ "allowed": "only_after_official_evaluator_incompatibility_is_documented",
+ "claimBearing": false,
+ "requiresValidationAgainstOfficialOutputs": true
+ }
+ },
+ "taskSlicePolicy": {
+ "sliceKind": "verified_mini_slice",
+ "taskCount": {
+ "min": 20,
+ "max": 50
+ },
+ "selectedInPhase": 37,
+ "phase36SelectionSchemaOnly": true,
+ "requiredManifestFields": [
+ "instance_id",
+ "original_inst_id",
+ "source",
+ "language",
+ "repo_url",
+ "base_commit",
+ "problem_statement_ref",
+ "problem_statement_hash",
+ "gold_context_ref",
+ "gold_context_hash",
+ "patch_hash",
+ "test_patch_hash",
+ "f2p_hash",
+ "p2p_hash"
+ ],
+ "selectionMethodRequiredFields": [
+ "selection_algorithm",
+ "selection_seed_or_deterministic_order",
+ "task_pool_hash",
+ "selection_timestamp",
+ "inclusion_rationale",
+ "exclusion_log_path",
+ "no_lane_outputs_observed_attestation"
+ ],
+ "coverageConstraints": {
+ "minRepos": 2,
+ "minLanguages": 2,
+ "selectionBeforeOutputs": true
+ },
+ "hardnessSignalPolicy": {
+ "required": false,
+ "status": "unavailable_in_contextbench_verified_schema",
+ "proxyAllowed": false,
+ "selectionMustRecordAbsence": true
+ },
+ "forbiddenSources": [
+ "agent_outputs",
+ "codebase_context_outputs",
+ "competitor_outputs",
+ "post_failure_task_filtering"
+ ]
+ },
+ "smokeOnlyCorpora": [
+ {
+ "name": "Excalidraw",
+ "claimBearing": false,
+ "purpose": "local_harness_smoke_only"
+ },
+ {
+ "name": "FastAPI",
+ "claimBearing": false,
+ "purpose": "local_harness_smoke_only"
+ }
+ ],
+ "runPolicy": {
+ "smokeRunsPerTaskLane": 1,
+ "claimBearingRunsPerTaskLane": 3,
+ "fewerThanClaimRunsMeans": "diagnostic_only_claim_allowed_false",
+ "reportAllRuns": true,
+ "bestOfNReportingAllowed": false
+ },
+ "minimalRunnerBehavior": {
+ "standardizes": [
+ "task_prompt",
+ "lane_tool_card",
+ "model",
+ "budget",
+ "timeout",
+ "trace_capture",
+ "structured_answer_schema"
+ ],
+ "mustNotScript": [
+ "agent_decisions",
+ "file_selection",
+ "query_rewrites",
+ "answer_content",
+ "evidence_selection"
+ ]
+ },
+ "structuredAnswerSchema": {
+ "requiredFields": [
+ "answer",
+ "confidence",
+ "evidence",
+ "filesReferenced",
+ "symbolsReferenced",
+ "unsupportedClaims",
+ "readyToEdit"
+ ],
+ "confidenceValues": ["low", "medium", "high"],
+ "evidenceFields": ["file", "lineRange", "reason"],
+ "invalidSchemaStatus": "invalid_schema"
+ },
+ "trajectorySchema": {
+ "requiredFields": ["pred_steps", "pred_files", "pred_spans"],
+ "optionalFields": ["pred_patch"],
+ "lineRangePolicy": "explicit_ranges_preferred_full_file_spans_must_be_marked",
+ "pathNormalizationRequired": true,
+ "rawTracePreservationRequired": true
+ },
+ "metrics": {
+ "primary": [
+ "context_file_recall",
+ "context_file_precision",
+ "context_symbol_recall",
+ "context_symbol_precision",
+ "context_span_recall",
+ "context_span_precision",
+ "edit_location_recall",
+ "edit_location_precision"
+ ],
+ "secondary": [
+ "auc_coverage",
+ "redundancy",
+ "explored_vs_used_gap",
+ "false_ready_rate",
+ "unsupported_claim_rate",
+ "setup_time_seconds",
+ "index_time_seconds",
+ "task_wall_time_seconds",
+ "context_token_estimate"
+ ],
+ "efficiencyIsSecondary": true,
+ "tokenSavingsWinRequiresCorrectnessNonRegression": true
+ },
+ "factRecallJudgeScope": {
+ "enabled": true,
+ "allowedOnlyFor": [
+ "predefined_atomic_facts",
+ "evidence_presence",
+ "unsupported_claim_detection"
+ ],
+ "forbiddenFor": [
+ "broad_rubric_vibes",
+ "post_hoc_expected_fact_creation",
+ "self_grading_by_solver_agent"
+ ],
+ "uncertainCountsAsSuccess": false
+ },
+ "budgets": {
+ "sameModelAcrossLanes": true,
+ "sameTimeoutAcrossLanes": true,
+ "sameTurnBudgetAcrossLanes": true,
+ "sameContextBudgetAcrossLanes": true,
+ "setupAndIndexingReportedSeparately": true,
+ "defaults": {
+ "maxContextTokens": 12000,
+ "maxAnswerTokens": 2000,
+ "timeoutSeconds": 300
+ }
+ },
+ "thresholds": {
+ "claimBearingRunsPerTaskLane": 3,
+ "setupFailuresBlockBroadClaims": true,
+ "wedgeWinRequires": [
+ "beats_raw_native_on_primary_context_metrics",
+ "beats_or_ties_jcodemunch_on_primary_context_metrics",
+ "no_correctness_regression",
+ "false_ready_rate_not_worse"
+ ],
+ "thresholdChangesRequireCorrection": true
+ },
+ "failureTaxonomy": [
+ "setup_failed",
+ "task_setup_failed",
+ "index_failed",
+ "timeout",
+ "invalid_schema",
+ "no_answer",
+ "wrong_answer",
+ "wrong_evidence",
+ "unsupported_claim",
+ "false_ready",
+ "tool_error",
+ "judge_failed"
+ ],
+ "runManifestSchema": {
+ "appendOnly": true,
+ "claimRunsRequireSlotsForEveryTaskLaneRepeat": true,
+ "requiredFields": [
+ "run_id",
+ "protocol_version",
+ "protocol_hash",
+ "task_manifest_hash",
+ "lane_id",
+ "task_id",
+ "repeat_index",
+ "status",
+ "started_at",
+ "completed_at",
+ "raw_trace_path",
+ "structured_answer_path",
+ "score_path"
+ ],
+ "terminalStatuses": [
+ "completed",
+ "setup_failed",
+ "task_setup_failed",
+ "index_failed",
+ "timeout",
+ "invalid_schema",
+ "no_answer",
+ "wrong_answer",
+ "wrong_evidence",
+ "unsupported_claim",
+ "false_ready",
+ "tool_error",
+ "judge_failed"
+ ],
+ "failedRunsIncludedInAggregates": true
+ },
+ "protocolFingerprint": {
+ "required": true,
+ "algorithm": "sha256",
+ "covers": [
+ "protocol_fixture",
+ "lane_fixture",
+ "correction_fixture",
+ "task_manifest_after_phase37",
+ "prompts",
+ "lane_tool_cards",
+ "budgets",
+ "thresholds",
+ "scoring_commands"
+ ]
+ },
+ "architectureReviewRule": {
+ "requiredBeforePostBaselineProductChanges": true,
+ "mustExplainGeneralMechanism": true,
+ "mustRejectTaskSpecificHeuristics": true,
+ "requiresFrozenRerun": true
+ },
+ "postBaselineCycleGate": {
+ "maxImprovementCyclesBeforeDecision": 1,
+ "requiresBaselineHash": true,
+ "requiresRerunHash": true,
+ "allowedDecisions": ["continue", "pivot", "kill"],
+ "noDecisionMeans": "stop_no_more_product_work"
+ },
+ "tripwires": [
+ "fixture_or_qrel_changed_after_outputs",
+ "threshold_moved_after_failures",
+ "setup_failed_treated_as_win",
+ "smoke_task_used_as_claim",
+ "mixed_context_tools_in_one_lane",
+ "product_change_before_baseline",
+ "benchmark_repo_name_or_task_phrase_heuristic_added",
+ "failed_run_removed_from_denominator",
+ "best_of_n_reported_as_primary",
+ "official_evaluator_bypassed_without_documented_incompatibility"
+ ],
+ "blockedClaims": [
+ "codebase_context_beats_competitors",
+ "codebase_context_improves_patch_correctness",
+ "codebase_context_improves_productivity",
+ "focus_mode_improves_agent_outcomes",
+ "token_savings_superiority",
+ "setup_failed_competitor_is_loss"
+ ]
+}
diff --git a/tests/fixtures/contextbench-corrections.json b/tests/fixtures/contextbench-corrections.json
new file mode 100644
index 0000000..419b256
--- /dev/null
+++ b/tests/fixtures/contextbench-corrections.json
@@ -0,0 +1,71 @@
+{
+ "name": "v2.4-contextbench-corrections-ledger",
+ "protocolVersion": "contextbench-protocol-v1",
+ "frozenDate": "2026-04-27",
+ "corrections": [
+ {
+ "correction_id": "contextbench-hardness-signal-policy-2026-04-27",
+ "date": "2026-04-27",
+ "reason_category": "factual_erratum",
+ "rationale": "Live inspection of Contextbench/ContextBench config contextbench_verified found no explicit hardness field, so the Phase 36 hard-task invariant is replaced with an explicit unavailable-signal policy and proxy hardness scoring remains forbidden.",
+ "affected_fields": [
+ "taskSlicePolicy.coverageConstraints.mustIncludeHardTasks",
+ "taskSlicePolicy.hardnessSignalPolicy"
+ ],
+ "prior_hash": "sha256:e196311c98e6af44c044dbe57321afa28afdacc598cb499720c42e2bbf4ad495",
+ "new_hash": "sha256:b630d813d266f1f814a53f9ca7695fc4b33c553e6cf961764ee76551fa8e63ab",
+ "protocol_version_before": "contextbench-protocol-v1",
+ "protocol_version_after": "contextbench-protocol-v1"
+ }
+ ],
+ "policy": {
+ "silentChangesAllowed": false,
+ "allowedReasonCategories": [
+ "factual_erratum",
+ "ambiguous_task_fix",
+ "repo_pin_correction",
+ "official_evaluator_compatibility_fix"
+ ],
+ "requiresProtocolVersionBumpFor": [
+ "task_ids",
+ "repo_commits",
+ "qrels",
+ "expected_facts",
+ "thresholds",
+ "prompts",
+ "lane_tool_cards",
+ "scoring_commands",
+ "budgets",
+ "metrics",
+ "failure_taxonomy",
+ "terminal_statuses",
+ "blocked_claims",
+ "tripwires",
+ "phase_boundary",
+ "lane_sets",
+ "setup_failure_semantics",
+ "run_manifest_schema",
+ "protocol_fingerprint",
+ "correction_policy"
+ ],
+ "anyFixtureChangeRequiresCorrection": true,
+ "requiredCorrectionFields": [
+ "correction_id",
+ "date",
+ "reason_category",
+ "rationale",
+ "affected_fields",
+ "prior_hash",
+ "new_hash",
+ "protocol_version_before",
+ "protocol_version_after"
+ ],
+ "forbiddenReasons": [
+ "match_system_output",
+ "improve_score",
+ "hide_failure",
+ "reduce_setup_work"
+ ],
+ "comparisonAcrossVersionsRequiresFullRerun": true
+ }
+}
diff --git a/tests/fixtures/contextbench-lanes.json b/tests/fixtures/contextbench-lanes.json
new file mode 100644
index 0000000..455fc32
--- /dev/null
+++ b/tests/fixtures/contextbench-lanes.json
@@ -0,0 +1,149 @@
+{
+ "name": "v2.4-contextbench-lanes",
+ "protocolVersion": "contextbench-protocol-v1",
+ "frozenDate": "2026-04-27",
+ "initialExternalGate": [
+ "raw-native",
+ "codebase-context",
+ "jcodemunch-repomapper"
+ ],
+ "broadClaimLaneSet": [
+ "raw-native",
+ "codebase-context",
+ "jcodemunch-repomapper",
+ "grepai",
+ "codebase-memory-mcp",
+ "codegraphcontext"
+ ],
+ "broadClaimsRequireAllLanesComplete": true,
+ "setupFailedRequiredLaneBlocksBroadClaims": true,
+ "lanes": [
+ {
+ "laneId": "raw-native",
+ "displayName": "Raw/native agent tools",
+ "claimRole": "baseline",
+ "phase36Status": "included_in_initial_gate",
+ "contextTool": "native-agent-tools",
+ "allowedTools": ["native-read", "native-search", "native-shell-readonly"],
+ "disallowedTools": [
+ "codebase-context",
+ "jcodemunch-repomapper",
+ "grepai",
+ "codebase-memory-mcp",
+ "codegraphcontext"
+ ],
+ "nativeToolsAllowed": true,
+ "setupCostReportedSeparately": true,
+ "indexCostReportedSeparately": true,
+ "cacheIsolationRequired": true
+ },
+ {
+ "laneId": "codebase-context",
+ "displayName": "codebase-context",
+ "claimRole": "candidate",
+ "phase36Status": "included_in_initial_gate",
+ "contextTool": "codebase-context",
+ "allowedTools": ["codebase-context"],
+ "disallowedTools": [
+ "native-read",
+ "native-search",
+ "native-shell-readonly",
+ "jcodemunch-repomapper",
+ "grepai",
+ "codebase-memory-mcp",
+ "codegraphcontext"
+ ],
+ "nativeToolsAllowed": false,
+ "setupCostReportedSeparately": true,
+ "indexCostReportedSeparately": true,
+ "cacheIsolationRequired": true
+ },
+ {
+ "laneId": "jcodemunch-repomapper",
+ "displayName": "jCodeMunch RepoMapper",
+ "claimRole": "first_serious_retrieval_competitor",
+ "phase36Status": "included_in_initial_gate",
+ "contextTool": "jcodemunch-repomapper",
+ "allowedTools": ["jcodemunch-repomapper"],
+ "disallowedTools": [
+ "native-read",
+ "native-search",
+ "native-shell-readonly",
+ "codebase-context",
+ "grepai",
+ "codebase-memory-mcp",
+ "codegraphcontext"
+ ],
+ "nativeToolsAllowed": false,
+ "setupCostReportedSeparately": true,
+ "indexCostReportedSeparately": true,
+ "cacheIsolationRequired": true
+ },
+ {
+ "laneId": "grepai",
+ "displayName": "GrepAI",
+ "claimRole": "required_for_broad_claims",
+ "phase36Status": "deferred_to_phase39",
+ "contextTool": "grepai",
+ "allowedTools": ["grepai"],
+ "disallowedTools": ["native-read", "native-search", "native-shell-readonly", "codebase-context", "jcodemunch-repomapper", "codebase-memory-mcp", "codegraphcontext"],
+ "nativeToolsAllowed": false,
+ "setupCostReportedSeparately": true,
+ "indexCostReportedSeparately": true,
+ "cacheIsolationRequired": true
+ },
+ {
+ "laneId": "codebase-memory-mcp",
+ "displayName": "codebase-memory-mcp",
+ "claimRole": "required_for_broad_claims",
+ "phase36Status": "deferred_to_phase39",
+ "contextTool": "codebase-memory-mcp",
+ "allowedTools": ["codebase-memory-mcp"],
+ "disallowedTools": ["native-read", "native-search", "native-shell-readonly", "codebase-context", "jcodemunch-repomapper", "grepai", "codegraphcontext"],
+ "nativeToolsAllowed": false,
+ "setupCostReportedSeparately": true,
+ "indexCostReportedSeparately": true,
+ "cacheIsolationRequired": true
+ },
+ {
+ "laneId": "codegraphcontext",
+ "displayName": "CodeGraphContext",
+ "claimRole": "required_for_broad_claims",
+ "phase36Status": "deferred_to_phase39",
+ "contextTool": "codegraphcontext",
+ "allowedTools": ["codegraphcontext"],
+ "disallowedTools": ["native-read", "native-search", "native-shell-readonly", "codebase-context", "jcodemunch-repomapper", "grepai", "codebase-memory-mcp"],
+ "nativeToolsAllowed": false,
+ "setupCostReportedSeparately": true,
+ "indexCostReportedSeparately": true,
+ "cacheIsolationRequired": true
+ }
+ ],
+ "setupFailureSemantics": {
+ "status": "setup_failed",
+ "winEligible": false,
+ "claimContribution": "missing_evidence",
+ "includedInPublicationRows": true,
+ "blocksBroadClaimsForRequiredLane": true,
+ "requiresReproductionCommand": true,
+ "requiresLogs": true
+ },
+ "laneContaminationRules": {
+ "oneContextToolPerLane": true,
+ "mixedLaneContextInvalidatesRun": true,
+ "sharedCachesForbiddenUnlessReadOnlyAndHashed": true,
+ "memoryStateMustBeIsolated": true,
+ "precomputedContextFromOtherLaneForbidden": true
+ },
+ "laneToolCardRequiredFields": [
+ "laneId",
+ "allowedTools",
+ "disallowedTools",
+ "setupCommand",
+ "indexCommand",
+ "queryCommand",
+ "versionCommand",
+ "cachePath",
+ "artifactPaths"
+ ]
+}
diff --git a/tests/fixtures/contextbench-selection-exclusions.json b/tests/fixtures/contextbench-selection-exclusions.json
new file mode 100644
index 0000000..96796e9
--- /dev/null
+++ b/tests/fixtures/contextbench-selection-exclusions.json
@@ -0,0 +1,3861 @@
+{
+ "name": "v2.4-contextbench-phase37-selection-exclusions",
+ "protocolVersion": "contextbench-protocol-v1",
+ "dataset": "Contextbench/ContextBench",
+ "datasetConfig": "contextbench_verified",
+ "split": "train",
+ "selection_algorithm": "deterministic_seeded_coverage_then_rank_fill_v1",
+ "selection_seed_or_deterministic_order": "phase37-contextbench-v1-2026-04-27",
+ "selection_timestamp": "2026-04-27T00:00:00.000Z",
+ "task_pool_hash": "sha256:a6af697f293ec595bccf9c264799f8a55308cc552b20b8ec61714240c2a03b26",
+ "hash_canonicalization_version": "contextbench-canonical-json-lf-v1",
+ "hardness_signal_status": "unavailable_in_contextbench_verified_schema",
+ "hardness_proxy_used": false,
+ "no_lane_outputs_observed_attestation": "No raw/native, codebase-context, competitor, proxy-hardness, or post-failure outputs were observed or used for selection.",
+ "input_row_count": 500,
+ "eligible_row_count": 500,
+ "selected_row_count": 20,
+ "excluded_rows": [],
+ "non_selected_eligible_rows": [
+ {
+ "instance_id": "Multi-SWE-Bench__javascript__maintenance__bugfix__838d3d25",
+ "source": "Multi",
+ "language": "javascript",
+ "repo_url": "https://github.com/sveltejs/svelte.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:03cf497e7e4415a656c070848b5c251e3a3e6a4179ee887cad42acb84390f152"
+ },
+ {
+ "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__0307e40c",
+ "source": "Poly",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:06baeef7c670478f7c787637882732d0baa64ff4f32505d955892d5d1d3527c2"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__634fe9b8",
+ "source": "Multi",
+ "language": "c",
+ "repo_url": "https://github.com/ponylang/ponyc.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:076cfb05187984e4cab05aa898d87d8ba219de13cb3a9f9ad8f193d87d950822"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__93721db4",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:07ea5844147f8f772b09eabda751164b99b0d5ad4bbb3f6b960de77cd11b2670"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__5c82134f",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:08f69910c2675a7ca7effaa484f3015b5a8f2daa755d32eb9cb7c8a3568e6675"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__31d4fe9d",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:0928f9a14603d2c32812d7598e79450a4efb7029ba992738a946ce388cde7204"
+ },
+ {
+ "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__6da325be",
+ "source": "Poly",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:0a1cad48d81847dad4b28eb18ada007c40d41ca6492956090766a4f8e420b1a5"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__c923cfa8",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:0b7b6fbab039f509d6801f6cb3d74f16f18b2418773a83f06ee2a3610431d459"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__4e7b7c81",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:0cd437ece62135e58d366417ff3ba5ee9a118a865898770abf010e96866027b4"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__0006beec",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:0cf4acef53f32baab2df4a99433fbaac1d6ebcc084099685cec088faa57d3e10"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__javascript__maintenance__bugfix__2d9d9f71",
+ "source": "Multi",
+ "language": "javascript",
+ "repo_url": "https://github.com/iamkun/dayjs.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:0d9f5b490823ebf2a0513a4b47c164abedcb2435cfcb62f4c0d709cc1160d2aa"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__3b4c72f1",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/sympy/sympy.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:0da8f8ca106b09d10740c819e558cdd71bd2c0440abd584abb3de0936ac74569"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__javascript__maintenance__bugfix__b31d2f53",
+ "source": "Multi",
+ "language": "javascript",
+ "repo_url": "https://github.com/iamkun/dayjs.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:0e1195bfd17ec106c224d7e54045aef1b2fca095e2590f0dc5b461034bfe60c1"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__maintenance__bugfix__8c189fda",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/keras-team/keras.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:0f681bf196a99486aa2fb0f426956a0c79372cbaeba33dc9b9ba2a3e25ebea1b"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__rust__maintenance__bugfix__3c69099b",
+ "source": "Multi",
+ "language": "rust",
+ "repo_url": "https://github.com/clap-rs/clap.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:0f9037edb3dde63aa4464dd6379fc2ab609755340654956cbd7a7b3f997b6cc7"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__3e2d031f",
+ "source": "Multi",
+ "language": "go",
+ "repo_url": "https://github.com/cli/cli.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:0fce488c2ee84d450d3734f01054d840c8d8c58151e35f592e13c534215ec0db"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__07bb383a",
+ "source": "Pro",
+ "language": "python",
+ "repo_url": "https://github.com/ansible/ansible.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:1006923d5364a3032de43f6d15b00b5c56cae3876120005cb106ba96a1105af5"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__994d041f",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:105f873d3ff97062d7d01e9eee57d3844138178d79b630d16dde71930a039311"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__6a53360d",
+ "source": "Multi",
+ "language": "typescript",
+ "repo_url": "https://github.com/vuejs/core.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:10bb0777c8a14ee1bf710dfc37814fa829afc1e71ea96badc874ca31b0bb9c54"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__95b4abc1",
+ "source": "Pro",
+ "language": "python",
+ "repo_url": "https://github.com/internetarchive/openlibrary.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:10c8fc62d875aa5cbde0d7c16ac9790e488db26f47d46f3099509b097a17c55c"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__2c34be8a",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:11c231ffec3cb7601f1c48f6759db7ed657a895f1cda2e602257f574d793d8f6"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__84fdadf0",
+ "source": "Pro",
+ "language": "python",
+ "repo_url": "https://github.com/ansible/ansible.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:122e62b3df7ac8c58decd7b4106ff2ca2a1f6298db727e890c7af5b5cd170c51"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__eb5704a5",
+ "source": "Multi",
+ "language": "go",
+ "repo_url": "https://github.com/cli/cli.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:124102c8beff648f813ecf0fde8989bdf42d51b49c37449a7917074a3b687ed8"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__rust__maintenance__bugfix__03fdad45",
+ "source": "Multi",
+ "language": "rust",
+ "repo_url": "https://github.com/tokio-rs/bytes.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:13b3615fee16b8239456863a733234c4ab8c6f54024f563d959a3646d16f7707"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__9c878ad5",
+ "source": "Pro",
+ "language": "python",
+ "repo_url": "https://github.com/ansible/ansible.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:16284a509303ae4caeae71d49e913e057c12271a801d58e95615206199cf8e0c"
+ },
+ {
+ "instance_id": "SWE-PolyBench__typescript__evolution__feature__07098661",
+ "source": "Poly",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:17b5cb4894ab5a99686cf09ea54c20de211e43b1eb3a5e836caffe40dff4923b"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__67699b64",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/matplotlib/matplotlib.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:17dd8dbf0cb630e6b465e9b03ff27da9825a836444ccc66648ff04537be88633"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__6b844996",
+ "source": "Multi",
+ "language": "c",
+ "repo_url": "https://github.com/facebook/zstd.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:18a2c1a973357bffedb01a16c719b0a14c7ce96c5ca414cd9e1bba381bc84846"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__7db465d6",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/sphinx-doc/sphinx.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:1941e11a86608a90ff33cbd18236249a3dd7585cc86191e8dab6fdc9dd6277b2"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__3d378646",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/scikit-learn/scikit-learn.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:1c06ce9f7cf18ffb7cabf8e39e05e8b0317fbfae26bc7234885fcb7ff771e495"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__05a89a66",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:1c662d7109360fea3fb9914255d544d38bf461003cd945c092993427e58eeede"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__5ec14acf",
+ "source": "Pro",
+ "language": "python",
+ "repo_url": "https://github.com/ansible/ansible.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:1ce02f9ffc2e770050dd835d9463a4a75ceea43cbea99546c8e8989293d97ddf"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__6cf8c11d",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:1d0accfcbbca86472e03ce1624cdba2002ca9da1a18a47a81d436ba94fed14f8"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__0f495035",
+ "source": "Multi",
+ "language": "c",
+ "repo_url": "https://github.com/facebook/zstd.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:1e4e46ccd14d323dfeb44e58bc614cf0bfd0a57bc90a015f2ba4a503757184e9"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__05c53458",
+ "source": "Multi",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:1ee25ae1d46d51343052b4cf9453269e9e109c0bc0af81d5d0d79042ab77533d"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__ce7f17a5",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/matplotlib/matplotlib.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:1fd109441ecfde35bab0ee355bce4462190cfa906d029536ac0164bc2fd261a8"
+ },
+ {
+ "instance_id": "SWE-PolyBench__typescript__evolution__feature__d520ba22",
+ "source": "Poly",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:2033c0f759844814d765212341a7e075c76bc3b7f78dc56f5f981e961578d828"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__java__maintenance__bugfix__07d7817b",
+ "source": "Multi",
+ "language": "java",
+ "repo_url": "https://github.com/fasterxml/jackson-databind.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:20c6cc46393e7f838b997bf791c215e5592cc8ef4c301baf2e6dacae110a1840"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__javascript__maintenance__bugfix__eea782b2",
+ "source": "Multi",
+ "language": "javascript",
+ "repo_url": "https://github.com/iamkun/dayjs.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:2122b6100066889299916ede35b082becbdd27f4f798f5044c6bcd41fd2f64cc"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__768d44e5",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:21b0cf95e6eb5ba4379aced23b240436f17a0d7913c88b52720424aa402db15d"
+ },
+ {
+ "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__48e5846a",
+ "source": "Poly",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:21ee8537e57ab86968caa867c2c2c2ba4e4d223524a602e172c63124b502b2a0"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__c1f435de",
+ "source": "Pro",
+ "language": "python",
+ "repo_url": "https://github.com/qutebrowser/qutebrowser.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:2272315e6cd2ea8db4d42dbfc36863cfd3633c1fcdcb0ca5f34530bd121e5932"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__497deb6f",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:2274a0674571e600418d650e7b46e6a52052225e50087c02067cd2e49f34fec8"
+ },
+ {
+ "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__a65e43ee",
+ "source": "Poly",
+ "language": "javascript",
+ "repo_url": "https://github.com/serverless/serverless.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:22c34eec746aacaca1e24b54dcf714587b4e9cf46fc526bc8b561692f31d5513"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__b9b45262",
+ "source": "Multi",
+ "language": "c",
+ "repo_url": "https://github.com/facebook/zstd.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:238c5e7518267abdad83b5cb0dfef1332931ee2a388bd80b1fcb1ed826cc0147"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__c4c8d376",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:23a801198076fc32539641ebf5446e2332a22a6daa5ded1bbb2c722aef7d4ce5"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__450d75e0",
+ "source": "Pro",
+ "language": "python",
+ "repo_url": "https://github.com/ansible/ansible.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:2423d8ecf7f82716dd5fda053c1184f70d5b9a59afc197a0f5ea72ca9e83f920"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__maintenance__bugfix__37455515",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/yt-dlp/yt-dlp.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:24484cd1ee507b8cf5024e9e0a42ab381e5f166eb15942f74e1fc2003b499bcb"
+ },
+ {
+ "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__16d1ff7a",
+ "source": "Poly",
+ "language": "typescript",
+ "repo_url": "https://github.com/microsoft/vscode.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:24863767938823e97b130eb622a8e1d4e159437685dd662072fbd887d563b193"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__c3284db4",
+ "source": "Multi",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:2559802d188383102ef01344141f18097ed13e332e53b83a7df48dccc4d26cda"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__5eee261d",
+ "source": "Multi",
+ "language": "typescript",
+ "repo_url": "https://github.com/vuejs/core.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:2637639450d32f5b0845ddbd2f1f9db7e4b994de2ba7b824b20ad21f3b617e08"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__80c9776a",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:26643c6362bea41db1dc6193aa85ad783c3342c68849a4bc04bb3dd978c88db0"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__java__maintenance__bugfix__ef567ef1",
+ "source": "Multi",
+ "language": "java",
+ "repo_url": "https://github.com/alibaba/fastjson2.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:26716eaddfd72568f3d98f40bf55cb3818b1dc4537d4f43a4b882690022de349"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__cb2c6ff3",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/pydata/xarray.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:272d7a81ad4d45995f6906384f849802932e94308defae3e82bff71ae11768c9"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__javascript__maintenance__bugfix__e31ec45c",
+ "source": "Pro",
+ "language": "javascript",
+ "repo_url": "https://github.com/NodeBB/NodeBB.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:27889de7044117c889568cbada79dc9062a80b73748a88be5c26db604e79e665"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__ac705f35",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:278f3ecd34a0b7a3e829f2610785ab55ac503c2436eec6ce77d4eeec5feeeb0b"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__javascript__maintenance__bugfix__3d4cfa9c",
+ "source": "Multi",
+ "language": "javascript",
+ "repo_url": "https://github.com/sveltejs/svelte.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:279fa30273903ce4360195be11f39c49c47f423b730b80c700f02d5299ab31a6"
+ },
+ {
+ "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__15589352",
+ "source": "Poly",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:291d68196e79c8659b321b0f8be7a73e160b9e2aedb23e5935637ce1b9a97945"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__c9656d7a",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:2937f2f4d79a3fc5f876211e2f1c1fcfa9a23880849f39f838fc3b797f75379b"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__90532e38",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/pydata/xarray.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:2b067a3eb02e8d4739f8d5221afce5a122ca686b43267d45b92d49b6b9497eed"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__rust__maintenance__bugfix__16860730",
+ "source": "Multi",
+ "language": "rust",
+ "repo_url": "https://github.com/nushell/nushell.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:2b32f43866183e19f892f8e8c18fb8319bdc86bb95e7d33b5edfaa367b5cbe3e"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__java__maintenance__bugfix__6ba3d94b",
+ "source": "Multi",
+ "language": "java",
+ "repo_url": "https://github.com/fasterxml/jackson-core.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:2b33ba1457c7d224932d22df1657f4371bdfe2ac8aaec5a2475686c4d53d5580"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__a47dfbbf",
+ "source": "Multi",
+ "language": "c",
+ "repo_url": "https://github.com/facebook/zstd.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:2bb94c4c97c4f5d8e786a88d03c203ac5515a219984de69fa192c999363c67a7"
+ },
+ {
+ "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__11e78a8d",
+ "source": "Poly",
+ "language": "javascript",
+ "repo_url": "https://github.com/sveltejs/svelte.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:2c0b27e3bca5f749e280ba006cad31ceef1f82291d130200a4ffa66deb73fa69"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__074f37ac",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/sympy/sympy.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:2ccedc8a6e160c727740aceb8cd8044af63a023af6119038283c27e387e661c5"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__dded0f96",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:2cfc8ce3ce0a51e13e6abdd2e5c9d9fe8502919aa3e96584279752875b592413"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__maintenance__bugfix__91e6ed53",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/huggingface/transformers.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:2d0187c1050864406217fa037da864735b387c6c4d33f7044d24159a28171579"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__0f94ce4d",
+ "source": "Multi",
+ "language": "c",
+ "repo_url": "https://github.com/ponylang/ponyc.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:2d4cc31785a1d34c59480eca8ab24a61f2fb4668f9c833340ebb3d965e37f976"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__81c5c6fa",
+ "source": "Pro",
+ "language": "python",
+ "repo_url": "https://github.com/internetarchive/openlibrary.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:2d8bf22b7f5197b0d4f0796fb7252082cf7c4c417fa9da5b13bb559037fabd94"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__c8b82135",
+ "source": "Multi",
+ "language": "typescript",
+ "repo_url": "https://github.com/vuejs/core.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:2dd8e944566a34880b0eb3076d4fe1052bd574c2da395acbb8db6da68246bfb8"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__1384380d",
+ "source": "Multi",
+ "language": "go",
+ "repo_url": "https://github.com/cli/cli.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:2df3876bed7c1a87d494645449281045b476aa21ff0377d5f6a2816af6077a97"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__64fffdfa",
+ "source": "Pro",
+ "language": "python",
+ "repo_url": "https://github.com/qutebrowser/qutebrowser.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:2dfe283663f0ebc3824b062e3f8ed0ef0387b7db96e754fc2c6f951c4f6d5682"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__183a2a5b",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:2e0173d18815f7832a7613296d032f803f40970a5717634557dd9c4bb6db8b52"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__83a70168",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/sympy/sympy.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:2e64e5f5583b6fb59a379296e325334b33512df139ac74ac698d0f07d754cc8a"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__42c77239",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/pydata/xarray.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:2eb0577f818eafa9649d5132854b7fcb5f456d543506fcab47b099e186a5b0fc"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__0f3c174a",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/sphinx-doc/sphinx.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:2ed40d88eda56cafa7be73895d90ab990579880bc554dd056a4b4dcaf8925fc0"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__d3bf673c",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/pydata/xarray.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:2f267702edb795356e07fc431e11656d9a38e5ef9957720fe127365725db583d"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__javascript__maintenance__bugfix__98bbaed2",
+ "source": "Multi",
+ "language": "javascript",
+ "repo_url": "https://github.com/axios/axios.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:2ff20a07bf25cf73be95db6b5fe298d7f01fdb989003790fdce08b5c48b117d6"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__9bdfae2c",
+ "source": "Multi",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:302555472db0e26208aa195618cfb873a8dc589f3822d7612ed2d480e2fbe299"
+ },
+ {
+ "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__9ca0e415",
+ "source": "Poly",
+ "language": "javascript",
+ "repo_url": "https://github.com/sveltejs/svelte.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:30ae54045facf638785b0dfc7994c7516b55b8577b8072e55e26dfd882c64c48"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__45db86cf",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:30c8ea811449d7adca7672c182f3f65cb8f5b41b795fda02254a9bbed2e01360"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__4d9664f3",
+ "source": "Multi",
+ "language": "go",
+ "repo_url": "https://github.com/cli/cli.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:31c58b4ca73871761f33989a961af53e1299257a95ece466a56eb2fa566196b9"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__rust__maintenance__bugfix__908342f0",
+ "source": "Multi",
+ "language": "rust",
+ "repo_url": "https://github.com/clap-rs/clap.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:322dea2c22c44fd1e0c3991a2b160185655723334ff7c5e876d4520d8efdcbd9"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__42ae5e9f",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/matplotlib/matplotlib.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:325e77fe6dec496cace3fd5be3faab00f47a6972e00c64b433f745ed29c6ea5f"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__4606de0b",
+ "source": "Multi",
+ "language": "go",
+ "repo_url": "https://github.com/cli/cli.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:32cdee42bf9d7b765923ab739ca76c06fbd908b08d01ece506efd01e028ba1bf"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__2fea5f6b",
+ "source": "Multi",
+ "language": "go",
+ "repo_url": "https://github.com/cli/cli.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:3425d8e234195ba036df9bb6bc46ae2362e3d11b491c4cc745b15ca56cc71b76"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__go__maintenance__bugfix__40a717e5",
+ "source": "Pro",
+ "language": "go",
+ "repo_url": "https://github.com/flipt-io/flipt.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:34b963d19ec1e4c9ae69e859bcadb97466ed2cfc14f584ebee4b7320a9a6009e"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__5555e84c",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:35cad2696938f93c811b304a7245987e6c3623f18ccba72f3a52e9501dde12c0"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__7439767b",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/sympy/sympy.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:37701cce5266f96431c27136ea5ac8eba24a3b1fcef7f6b3c76007891394b652"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__1e195cd6",
+ "source": "Multi",
+ "language": "c",
+ "repo_url": "https://github.com/ponylang/ponyc.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:3825364f24b94c8f5623eba131a72dc63a24988f34f5087a12377ecc38c19adf"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__2e76c8cd",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/pallets/flask.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:38277c285d71902ca97f19f4bb1c9d7ecced4d155b54f39fd6d085285180669f"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__bc1c666f",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/sympy/sympy.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:385299b57afd9ba5fed8f1f0443065a8a28f6937385c20e26915a8da3eb1a110"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__45da4482",
+ "source": "Multi",
+ "language": "c",
+ "repo_url": "https://github.com/ponylang/ponyc.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:38dea4af43965149078cd83c0a70c21c26d530f1ea083ae6de369fd47dfe94ba"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__ff0cfab5",
+ "source": "Multi",
+ "language": "go",
+ "repo_url": "https://github.com/cli/cli.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:3903d17a7e6a39afa977fdec9bfcdedf4f9f861ea0c9639bb354e8235474e8fa"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__4fb8fc10",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:3920290fd9832d1e2f3c4c4e5c11e1053607d5016b2e5ab28e4e57bac5979aea"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__rust__maintenance__bugfix__7acaca55",
+ "source": "Multi",
+ "language": "rust",
+ "repo_url": "https://github.com/clap-rs/clap.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:3994bfd3b0c0c2980d5285f4a42cb8a3b42f4e94364bdd21d4c634a52438e31a"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__7c2efe8a",
+ "source": "Pro",
+ "language": "python",
+ "repo_url": "https://github.com/internetarchive/openlibrary.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:39f2ea55a8faaf270fa4f6dd07b22f49f4848c3a7130b2f118e23ec4710d49a7"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__8d1c297a",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/sympy/sympy.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:3a6e8fadd6fd8a475642b765db5d7a8fc07df52a90edc3ffe953d555c6a15ff6"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__java__maintenance__bugfix__f0db9a27",
+ "source": "Multi",
+ "language": "java",
+ "repo_url": "https://github.com/fasterxml/jackson-core.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:3a7ca6c1f548555448ac475cd0526f726f7a51c2fd7ad2339d6dd9e59925094b"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__d0723f55",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/sphinx-doc/sphinx.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:3aaa3928fb4de89ccd3dfd6fe8cd537f0f23c4477bb7402dd4bd6dcee6e364f0"
+ },
+ {
+ "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__e647c8ce",
+ "source": "Poly",
+ "language": "javascript",
+ "repo_url": "https://github.com/sveltejs/svelte.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:3b10d62be939d9aa92432446f2b14b25ca9e99dcbe81917efd7b59ef8517e8e6"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__e5a3bf2a",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/astropy/astropy.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:3b2c0cd652714010d038c607053cf1d6c01b443e89bc8615c5c26a49d92e9596"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__1efc2b51",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:3b6bb198ef59f43402a44124c3fe8601448abcef5bae8bc1c14630ccb40dcda2"
+ },
+ {
+ "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__1e29a469",
+ "source": "Poly",
+ "language": "javascript",
+ "repo_url": "https://github.com/sveltejs/svelte.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:3bc93fea6caa989ac25bf866065a361879ab7c95ef9b4dfe42a2e1ce4e99d426"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__maintenance__bugfix__8b1080ea",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/keras-team/keras.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:3bf01c474f20d61cd853d0ad5371878b7a48dee8b40576377de6cbaa2f6a97ed"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__maintenance__bugfix__2f88019c",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/huggingface/transformers.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:3c2638e26125f56b90cd59d9fea08448093fbc6f739d649efe43049619e7d55d"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__javascript__maintenance__bugfix__5c001746",
+ "source": "Pro",
+ "language": "javascript",
+ "repo_url": "https://github.com/element-hq/element-web.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:3d6e51117088a3557c2b72c8453f2bcf118ccf2acda84bc7dc6500f9edcb2342"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__3185b834",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/astropy/astropy.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:3dbaeceb0cf71f3603b9cbcb84be604820acdeba1ee9e0b1b2576a4e2759520f"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__0b78ed50",
+ "source": "Multi",
+ "language": "go",
+ "repo_url": "https://github.com/cli/cli.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:3fa6341ba87d5effdfafaee3097e66df4ff0da78536c10a04711d78b3dea4e3e"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__88e1ffd3",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/psf/requests.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:3fbca068cde77c9a3d87ca965c8563a3865a7e88a69ba1d85b74c00fde315a83"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__ac127191",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/sympy/sympy.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:3fe99a63634ecc521ddf8c3195f6685436410459a62ccdf0742a4956aebbae37"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__9907b12d",
+ "source": "Multi",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:418c4601b3773b4321b6dd8509688745e5eb1c7fac011bc80dc3ebd19dac851a"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__maintenance__bugfix__5188c6bc",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/huggingface/transformers.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:419546367169c45315c9e54cdae9e5194856c3b9e1990e5351237eb8c33fd11d"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__2ccbdc72",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:423c4bc0e5dc7cc7275645bfcbdae0acb7504752dc20eb3641680cb06226e484"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__6a14056a",
+ "source": "Multi",
+ "language": "typescript",
+ "repo_url": "https://github.com/vuejs/core.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:42cbdc9325a3f0490412593e1e5c6e130855a2f1c525af8a818b1260a105c450"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__4d321c4a",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/sympy/sympy.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:431ea723154d39ddb5b2a0182db3e38ca67b299b3c7e8102b65b222732b4081d"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__maintenance__bugfix__ea5a4764",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/keras-team/keras.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:438d70abeae92385f74bfb9d6c2c6da52dc4bad3498c07444ccd10d9eb3cd94c"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__4b691a35",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:4459cdde1b7c2330e192ff256c9707eb50f18c2f9045a603911bab15eee03cf6"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__maintenance__bugfix__52a91dd8",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/langchain-ai/langchain.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:4478daa9fdcc199ab420e8fc7c6bf36b84bab63444bf14186e76a03567073f04"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__42a80be0",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/scikit-learn/scikit-learn.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:4621d1038742dc62a46e026ac0faa92103a45a5cd279bacfddc3dd68d37978c1"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__evolution__feature__b4302840",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/huggingface/transformers.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:463675d2f7fc019f315d4802136c1b406aa63597d0c25dae06835e6eaeb19b14"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__2e3d6eae",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:4665f7351006a92fc92a88b503bc558c976b0ffcd1cb2bc8f7a78a46cfb158cb"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__5aed9f96",
+ "source": "Multi",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:46e15b392e85beadac9ef37f17ee2bf1978576f28cc32272483a273fa5e6196f"
+ },
+ {
+ "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__2487b02a",
+ "source": "Poly",
+ "language": "typescript",
+ "repo_url": "https://github.com/microsoft/vscode.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:47b94c57f154f0cd9144f457e265a495d6c9f3b1fb726a854f5d2301492f7d6a"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__15e5aaa3",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/sympy/sympy.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:484a54348ff22d071f69282ec722ce98ac476228842626b3836907d74bf0846d"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__1bb5c0b0",
+ "source": "Pro",
+ "language": "python",
+ "repo_url": "https://github.com/ansible/ansible.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:488c33518e4122e8a198c1e7b8804f3e0c3885fd21462e5d884f6a6a7c508e0e"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__607fc4ff",
+ "source": "Pro",
+ "language": "python",
+ "repo_url": "https://github.com/internetarchive/openlibrary.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:49370aaf658368f0590382a55ed78d8b4dbf835a9383d213600d0696d730ba2d"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__javascript__maintenance__bugfix__7b6185af",
+ "source": "Pro",
+ "language": "javascript",
+ "repo_url": "https://github.com/NodeBB/NodeBB.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:4a2c896dd3bbe8aeb08f18b813f1c51f27c28264b73afcab99dccfa48ffb1c6a"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__javascript__maintenance__bugfix__39a9746b",
+ "source": "Pro",
+ "language": "javascript",
+ "repo_url": "https://github.com/element-hq/element-web.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:4a72ca03f9ae4a2b34af9734a100746c82b96772a0c8385a7a151af73ef78b23"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__e989ba2d",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/psf/requests.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:4a8dfcd074eb451c1d552b1e6b8723efd3bf08c8c34878c852bf693facf2c546"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__18eac778",
+ "source": "Multi",
+ "language": "typescript",
+ "repo_url": "https://github.com/vuejs/core.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:4b82b033dc6320aaaca83a04fc230aa33c361b4aaf12de316165da33fc0bfc82"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__cpp__maintenance__bugfix__1ec2c84a",
+ "source": "Multi",
+ "language": "cpp",
+ "repo_url": "https://github.com/fmtlib/fmt.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:4c0f3c256d452c2d157ba3fea7ad9fb5e2c9fef6ced5003c101549066a768fdc"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__07bafddd",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/scikit-learn/scikit-learn.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:4c827b8c4f2cdde571f9a7d18a6b33ec4147a346cb77b319fe28dcd051d51249"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__2aa6fa4c",
+ "source": "Multi",
+ "language": "typescript",
+ "repo_url": "https://github.com/vuejs/core.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:4d4cd2f55d001fd24a0cf217c297c471e88069da3f68bc6855bf0f403de64299"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__1ae3c3bd",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:4e4199a59ba0bcec91c2b70be32fe313b4c852415268c00d99c865f3c6bb72e3"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__53ca6a30",
+ "source": "Pro",
+ "language": "python",
+ "repo_url": "https://github.com/internetarchive/openlibrary.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:4f03139321008d9300d49ccb85540c22213606b926a4bcfe329c4749ba8fa01d"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__30065f19",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/sphinx-doc/sphinx.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:4f4dcda690d48d5a5e121e4ee01d8140c5683bcba7206bc73bdea7143856f346"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__b9f9961f",
+ "source": "Pro",
+ "language": "python",
+ "repo_url": "https://github.com/ansible/ansible.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:4f62da7451dd325f427814f08c2ef50c3244fb014144951a7f2d5af6f56de075"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__d22bf206",
+ "source": "Multi",
+ "language": "c",
+ "repo_url": "https://github.com/ponylang/ponyc.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:4f6cf5a824bf120bb14a0d67e80ad54fd292d516deb9bf84f8d96b2f7987656c"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__evolution__feature__17a093c8",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/huggingface/transformers.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:4f95b801738cab0738b8e3befee5aad9d904def47f049a4f6393731f5148c8c4"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__java__maintenance__bugfix__14da06bc",
+ "source": "Multi",
+ "language": "java",
+ "repo_url": "https://github.com/elastic/logstash.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:4fab17c12c78ee10a62559846664a4822b64e0930fe1cfedc4b30d84fbdf6db2"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__d129d52f",
+ "source": "Multi",
+ "language": "go",
+ "repo_url": "https://github.com/cli/cli.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:500524859a2c1be42a34288dbc4d540079d98d41a30130aeac87d04e1131d547"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__4f130690",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/scikit-learn/scikit-learn.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:517db77979a49f50a4e79046d284debb0fea497bdf37cec311cecc9a8382798c"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__maintenance__bugfix__214ce29d",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/huggingface/transformers.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:51928eadb5dc8d4d9f6f5741e050c14ab651ff1232b2d847dba31fae478ccec0"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__a5ec0eec",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:524f737afef9e8538f49defba64d553d27e49b35a5b00903a9efb4395143ed42"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__84effcbc",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:527217b9e22ef19ece5e491dc09f3a9bd07010b8365700bce787a881d4bd94eb"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__20eb5bc8",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/pydata/xarray.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:532100d56642791b0696083a6194805f103cc11d692a10af6bd438156eca34df"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__0f79e39c",
+ "source": "Multi",
+ "language": "go",
+ "repo_url": "https://github.com/cli/cli.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:53bb2da8bae618fd3c04a9515f5b3d2efcc60bf152fe70ed9285c0cb3e65dad2"
+ },
+ {
+ "instance_id": "SWE-PolyBench__typescript__evolution__feature__34826a6a",
+ "source": "Poly",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:53e3958f53705e9513df7399bb4e772b8df2807225672f9363ff3fd63d772386"
+ },
+ {
+ "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__21086305",
+ "source": "Poly",
+ "language": "javascript",
+ "repo_url": "https://github.com/prettier/prettier.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:54597b2edaa48f9e3e74424649f716dca48018304b7ceaf6534ed3ec73956b0b"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__739b23db",
+ "source": "Pro",
+ "language": "python",
+ "repo_url": "https://github.com/ansible/ansible.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:556427f95d643d5beb651cd221e0fff6fdf9ab22fe1187122c86d8ca9b8029ff"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__4d207ce8",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:559f204ba9c3f7f485b7e7df9fe053ecbcb0695e4ae21a3bf59c0068daf73a39"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__f9bb5442",
+ "source": "Multi",
+ "language": "c",
+ "repo_url": "https://github.com/facebook/zstd.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:5645e936378cf638659e695b39596fd46bd393ccc8670926ba1f6b37fa6f18e3"
+ },
+ {
+ "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__8cc426dd",
+ "source": "Poly",
+ "language": "javascript",
+ "repo_url": "https://github.com/sveltejs/svelte.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:57ed93eb89b32ff2e1427c509b02198bf4e54821cdf38dc7d365f8fbdeda54ce"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__8f9042e3",
+ "source": "Multi",
+ "language": "c",
+ "repo_url": "https://github.com/ponylang/ponyc.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:57f08ae5655ff12c003b5b1156be099370aa40360d206869423ed8352a78f740"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__cpp__maintenance__bugfix__e43d9aa5",
+ "source": "Multi",
+ "language": "cpp",
+ "repo_url": "https://github.com/catchorg/Catch2.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:5818ee91e9fa4b98c14f7fcb147d9f6dfec50377d603644d77578df58a536bda"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__maintenance__bugfix__9ff56151",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/huggingface/transformers.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:5821daeed84f963923a1daf44f9a93d679d59420e95ec1988683db229614255c"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__04c51be7",
+ "source": "Multi",
+ "language": "typescript",
+ "repo_url": "https://github.com/vuejs/core.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:585bff0a4a63824adb9de8a46b04a98d7f5790c9fbbb321abcac84e65d9b8e7b"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__61a7a81e",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:58b89e3adb8e561a28fa30bb429c70d3721e10abab2b8d64f939ad8ed41a2b4b"
+ },
+ {
+ "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__48468065",
+ "source": "Poly",
+ "language": "typescript",
+ "repo_url": "https://github.com/microsoft/vscode.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:595789856f88dec3707155e8b48a630c128eb0b07c26061f030623057810434a"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__javascript__maintenance__bugfix__09eb0d6d",
+ "source": "Pro",
+ "language": "javascript",
+ "repo_url": "https://github.com/NodeBB/NodeBB.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:5a1288d33a47b2f1dd2bcb3d3baf1d64733fd0cd9bc8950e0997003b5e1b4f42"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__javascript__maintenance__bugfix__ac8400d9",
+ "source": "Pro",
+ "language": "javascript",
+ "repo_url": "https://github.com/NodeBB/NodeBB.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:5a2ab1e03a31d103409a375f4c91366fbfd7982df9e80243688004e987243a7c"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__ebf021d0",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:5ad21b9964fb361fd46b99a64d9e63b809c39f29bb8476d83fb9f5d71ae123c4"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__go__maintenance__bugfix__720b4d92",
+ "source": "Pro",
+ "language": "go",
+ "repo_url": "https://github.com/future-architect/vuls.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:5b0cb19c0bea9f60a0c0c1bb2313a7bcc5b4b7a40053f3668854aa21202a32a7"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__9a05fe0c",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:5b1d0f8838ef362706223a87b68f2e9a93d9b42dd08beead758e4ff64fed7ad7"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__javascript__maintenance__bugfix__4eb0e647",
+ "source": "Pro",
+ "language": "javascript",
+ "repo_url": "https://github.com/NodeBB/NodeBB.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:5b5d10862203327b0ad1f3fdcea256fa8f35b58f0a60cbb72fe9489e39447167"
+ },
+ {
+ "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__c017c9ba",
+ "source": "Poly",
+ "language": "javascript",
+ "repo_url": "https://github.com/prettier/prettier.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:5b8d1b49ad1ec8a795078709e470a482e4ad24f9fb2e52b8e756ae2fb77d0125"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__java__maintenance__bugfix__5eea2978",
+ "source": "Multi",
+ "language": "java",
+ "repo_url": "https://github.com/fasterxml/jackson-core.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:5c31fbffc692213c38c3f7b8d37d8c946fa92ef4af93955b74887f8a1434607b"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__7ec77e9c",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/sympy/sympy.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:5c41cc5ec5b92656be419d1ecbe5778cc618997b6b295b2ae536885461164372"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__681ea2b6",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:5cba2062443f7fbe66c61546c6064d077499bd25600aa93a0b0efe2c54de76e9"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__85c030cf",
+ "source": "Multi",
+ "language": "go",
+ "repo_url": "https://github.com/cli/cli.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:5ccc8071a1186487184438e09f9513a9d6b044d6ec2f3fffcd982ba61e061d95"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__1eaeabf4",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/sympy/sympy.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:5efb915edc5e15863dbd672e1de1e8ffb040e36b085f5fbbc6d7eca80f51d77a"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__fd8123b1",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:5f509828b0b901c8d6ff3b1f1246f168380b3e99d0558f18928bfefdf3d29b06"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__05c9e61e",
+ "source": "Multi",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:6070fcb691545833d6afe578a801748287e6c2cbbdfd0f07830f5b64ef60de11"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__abfdb2c5",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:60ef9ce0c96aaee01ea666394c010cc8f9088491f2b1992afb587c6b3dc2f112"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__cpp__maintenance__bugfix__bca55dea",
+ "source": "Multi",
+ "language": "cpp",
+ "repo_url": "https://github.com/simdjson/simdjson.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:612164e013a50678221e5235f1c22ca215c94ff5cf76ab64b4afde9f681c7944"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__cpp__maintenance__bugfix__7c9ef76c",
+ "source": "Multi",
+ "language": "cpp",
+ "repo_url": "https://github.com/nlohmann/json.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:6262dcda36e891b2bfeb48853aaf50d60935957b418bd152fc2f987f5b7beb44"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__fb07ed8b",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:626c684403eea71e8695e58728721e5f71bcafa0915634b2ff9389e891f12aa4"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__52c152ba",
+ "source": "Multi",
+ "language": "go",
+ "repo_url": "https://github.com/cli/cli.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:6283859052944efcfbe7f321decd06680d883e3a2cced621fa39a670d2047445"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__3e44d93a",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:62cd5384796915a3a413d00cc15af190af3fb84576e46400b50b492253dab784"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__982277e4",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/pytest-dev/pytest.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:62e6252dc1659aafd5af8d1b8c51ba41619d4e4fc5a6473b8cbc278838aa955e"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__227490c2",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:631d130596a618701bac4f0f0bc572bfaa697da956b35efb4cfc601faa1f4c17"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__rust__maintenance__bugfix__ae8b362a",
+ "source": "Multi",
+ "language": "rust",
+ "repo_url": "https://github.com/clap-rs/clap.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:6334d348216fbcaa84d739b0fd10fe3d548c2820778ac5883dece03c90c008bb"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__a571e81e",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/matplotlib/matplotlib.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:6357eb2ba324edd60cea4b3c1acee4ddabd3e350206925737f6206aa7bb97908"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__12d848d7",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:63b6924492f7643de4880bdc7a8ea8d2e0aa22c90cdba20b15990f759233e4c0"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__73631dc4",
+ "source": "Multi",
+ "language": "go",
+ "repo_url": "https://github.com/cli/cli.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:642cd7932b06c686ccd8146878f5eab2c0d8f70db29b6f9f43cd298bea841a69"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__9ea965a5",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/sympy/sympy.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:6466b60cf2e5e7a16c3ef632ae8bbf9922839836cf77551f7e0903b3817fc279"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__1ac60ce9",
+ "source": "Multi",
+ "language": "c",
+ "repo_url": "https://github.com/ponylang/ponyc.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:654590e4d83cc87cf287f6d8e7d0904f71a76678678f40066563fdd0d25031a7"
+ },
+ {
+ "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__27393dd8",
+ "source": "Poly",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:667ea55b2304eb2954167f566d98c4170b3a85f2ac8545fc3bddfbfb104c5a30"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__maintenance__bugfix__3f3ff585",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/keras-team/keras.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:66e47107227bcee92616fc300c26ea54e5bb6dce79c6a6289b67b49a4bb9215f"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__4f88ede2",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/scikit-learn/scikit-learn.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:67a677c6c0855c38175ae597905db6d0c64f2d18522593bc05444004322961c6"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__62badbbf",
+ "source": "Pro",
+ "language": "python",
+ "repo_url": "https://github.com/ansible/ansible.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:67bce6bcc220c01af7831329348e69a2dcb05c37344efa43a48818cf797a5bc3"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__31f13b61",
+ "source": "Pro",
+ "language": "python",
+ "repo_url": "https://github.com/ansible/ansible.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:682198791d99566a239e6cc7962c07005848680c84558ab54ad31784ca145d95"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__8c647f3a",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/sphinx-doc/sphinx.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:687f21450cd78dd5e5d286b45e9a80067198353860affdb4ef6913a983bd1273"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__7ccc36b7",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/matplotlib/matplotlib.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:690bf652b97a808097f41b1fa01a11a67acf7a1158f4e1325cd3b119f260155b"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__20294df2",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/sympy/sympy.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:6952068baca8039d3787a2143c31f90e00c8a452a953af5041a4b63d6e77f935"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__607f6b9e",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:69674ac163ae9c3f316ec48e22fbd7a6f17e4404e9d20091736be22e72cf6634"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__afcc47be",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:699f52e3d1fe29cc759e970d09cad300b9116523777dc7cee20fd35017520854"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__54ebe590",
+ "source": "Multi",
+ "language": "typescript",
+ "repo_url": "https://github.com/vuejs/core.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:69c49c6fc9912a2c4c32957137cf60f81d54c4b3226316c2f12ad4c9d2111d0f"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__83fd3b37",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/sphinx-doc/sphinx.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:69cc4d466d9e7651cb9f7cdcb2af53cea552b6733418e56c75419fa87e59562e"
+ },
+ {
+ "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__4d090fc6",
+ "source": "Poly",
+ "language": "javascript",
+ "repo_url": "https://github.com/serverless/serverless.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:69d305681c83e0f5e42d660d5fd17b3709146b13a9dfae7f7a387df49cc652f2"
+ },
+ {
+ "instance_id": "SWE-PolyBench__typescript__evolution__feature__684a3415",
+ "source": "Poly",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:69d561426d5f8417e806a9a05eb81df822a9c382fb8a7454eb0c1ac2b6a7f727"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__b91692e0",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/scikit-learn/scikit-learn.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:6b51499b713194c37a3a7e11d568a39f6e14b8d9e27e9e3b2a54d77493935cf0"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__5c1509d5",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:6c0498e7e9c26b2bf2db5572115318e280a56100f72ea290de5cc48649b531a3"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__01527b89",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:6c0868a9ee0d4d965fbb31aa9b39eae008ce1d9e883ad6a48bb0baca4ca96d28"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__bc999af3",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/sympy/sympy.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:6c50df73d0fd67795dc3f83734ca786e7efade7109f35c1b70faee584f387132"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__javascript__maintenance__bugfix__87d9772c",
+ "source": "Multi",
+ "language": "javascript",
+ "repo_url": "https://github.com/iamkun/dayjs.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:6d39e2b553c73034cf24f6fc803f08351914fa3e83eceabe162b2c3a688bd654"
+ },
+ {
+ "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__3864c137",
+ "source": "Poly",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:6d53b7668908215a678128156ec1e5f43643e958c31d2b117de9a00da23db353"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__59acbd9a",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/sphinx-doc/sphinx.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:6da16ae02bfc81249f25aba32ebe0a48755fc42eb7ab9b5c142e5db392053c9b"
+ },
+ {
+ "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__678fa217",
+ "source": "Poly",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:6ddf8e13e41fb086d8516a849a4cf94d97065e64b064d67d006b92e34bc0ed64"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__javascript__maintenance__bugfix__1081c568",
+ "source": "Multi",
+ "language": "javascript",
+ "repo_url": "https://github.com/sveltejs/svelte.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:6f4bc2e66d8070f2eef7d8f111b0d2485f87f6be9c95a167638bbfebf190072c"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__evolution__feature__8bb50331",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/huggingface/transformers.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:6f641114fba66e2f31dc2f429759774d52f7a1560334974f02b84a2209b7723e"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__maintenance__bugfix__e3c9c53c",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/huggingface/transformers.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:70386292933c625d725b45a4096298537198c491d83ea186496bfb139f7ccafa"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__cpp__maintenance__bugfix__fe080aac",
+ "source": "Multi",
+ "language": "cpp",
+ "repo_url": "https://github.com/nlohmann/json.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:709365140586ef33f4238171e8653b166673dc32968462d68cc658b734ea2213"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__6d48775e",
+ "source": "Multi",
+ "language": "go",
+ "repo_url": "https://github.com/cli/cli.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:7098520c00c53b77f6ccae01ebe0259147c88c821799ddf7199deae874e37cee"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__rust__maintenance__bugfix__1cadcb7d",
+ "source": "Multi",
+ "language": "rust",
+ "repo_url": "https://github.com/tokio-rs/tracing.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:718a60e705b98b09ffa67327ed87c2423f4f89fd40668b4897b19fc2150bf6e4"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__maintenance__bugfix__158909b8",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/huggingface/transformers.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:72331d8f1180206a03b37d0c0b6aa0fa43a56acd6eb189d16a05fb8cd3af781c"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__rust__maintenance__bugfix__aa16b8b2",
+ "source": "Multi",
+ "language": "rust",
+ "repo_url": "https://github.com/tokio-rs/tracing.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:7279262842927bf02fe97d9d0bf2981475d26eb5b63b1cc4b3740f53826f3fa5"
+ },
+ {
+ "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__c26fd1ab",
+ "source": "Poly",
+ "language": "javascript",
+ "repo_url": "https://github.com/sveltejs/svelte.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:72e5bc55b7100df256b51764ad4df1646a41543822286c55270b8f4318d9dde1"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__cdbc5890",
+ "source": "Multi",
+ "language": "c",
+ "repo_url": "https://github.com/jqlang/jq.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:72f3d12cdc56f1f657ccec9a637dfa2c23924b58f8bc077d0c184210f25d4e8a"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__go__maintenance__bugfix__6efcf999",
+ "source": "Pro",
+ "language": "go",
+ "repo_url": "https://github.com/flipt-io/flipt.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:733c8efb41bc8a60bee7708a5f79e638a09801c1975274481e0aacacfdb4b93f"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__cpp__maintenance__bugfix__6a4e21e9",
+ "source": "Multi",
+ "language": "cpp",
+ "repo_url": "https://github.com/nlohmann/json.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:738476fd9d218f2fe95cd27e84c632a4084ad5cba5328ade17ca4b6624761a14"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__rust__maintenance__bugfix__0e4346f7",
+ "source": "Multi",
+ "language": "rust",
+ "repo_url": "https://github.com/clap-rs/clap.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:73a8076599d4ab1c5989bf79dbb2d79e43c18d94e208d532c3579d5f5dec698c"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__7b474035",
+ "source": "Multi",
+ "language": "c",
+ "repo_url": "https://github.com/ponylang/ponyc.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:742158bfee4796fbd9a5a5a07521accf283337d42f499a0e85832b0cb374eec4"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__maintenance__bugfix__023915d6",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/huggingface/transformers.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:742502385efb81d4e143521b77299cdaa1511bf14a6aa982bee823f63bdde089"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__maintenance__bugfix__40f09c26",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/huggingface/transformers.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:7441d8e380beaf496a53272d86cb0dba935b8c45589c8becc040df4c709ea31b"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__1ff281e5",
+ "source": "Multi",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:7449d00d459cb559d046b9e7c350d1c4ea6ab45d12e5771c5f04d72364b8738f"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__6e022940",
+ "source": "Multi",
+ "language": "go",
+ "repo_url": "https://github.com/cli/cli.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:74878b0d9e1f5b48523c925ae1035c6a9923423495bb95290a708445cdad80c6"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__ec6fdd6e",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/scikit-learn/scikit-learn.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:74f932d64a485353886fd36f7fc9061d6a0263bcbe595b98e02aad4e3448f0bb"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__maintenance__bugfix__2b00c0d1",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/huggingface/transformers.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:75c2493162d4590a20056e441ce622611e3acd55f0d44a6db0cec72333ab6717"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__rust__maintenance__bugfix__3a4062fe",
+ "source": "Multi",
+ "language": "rust",
+ "repo_url": "https://github.com/tokio-rs/tokio.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:7624c6dbc358f48c3b3049e74d36ddfdf02ad78800d3b5b5579bc32fcd46a7a3"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__8bffb1b1",
+ "source": "Multi",
+ "language": "c",
+ "repo_url": "https://github.com/facebook/zstd.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:768805d008bcbddaee4dc69545f551ef69b296bad1797fdda61cb327fe97f2f0"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__41a5a6f5",
+ "source": "Multi",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:76f220128b4fa519de4a33f87f977405cb3cd3cd9a805562793cb9cfb73f074f"
+ },
+ {
+ "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__c6d3e230",
+ "source": "Poly",
+ "language": "javascript",
+ "repo_url": "https://github.com/sveltejs/svelte.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:76fd82196cc4a43c634f82bd8344812e7ea1ab47e3314e04a6fea281262edc84"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__1e842312",
+ "source": "Multi",
+ "language": "c",
+ "repo_url": "https://github.com/ponylang/ponyc.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:781b005bd25fe85a69cd1ccefb26833eee312783c9401a97446f50c8aef12003"
+ },
+ {
+ "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__3b6e6f3a",
+ "source": "Poly",
+ "language": "javascript",
+ "repo_url": "https://github.com/prettier/prettier.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:7885d2af167c9842b8dc4c03c389f4a3b95c0749272b29f54a82feaec9cf067b"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__maintenance__bugfix__9a72b241",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/huggingface/transformers.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:797706981973d0ee39cabbe1869292d83656d77d8524829675928500adebeefd"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__3deeea9c",
+ "source": "Multi",
+ "language": "go",
+ "repo_url": "https://github.com/cli/cli.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:79bc5a4f3c5d4dcc678e3fa20125568462cf06e6deed4ea05d1fdafdcb040314"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__1d90db61",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/astropy/astropy.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:79bf99153faf79c643aca034fb5b8871a46080e0ee6692366cc129df86d59b03"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__8d23605c",
+ "source": "Multi",
+ "language": "go",
+ "repo_url": "https://github.com/cli/cli.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:7ac6f05dd5ef65360d9846061c91dde317cba102956f613554341f6cec918683"
+ },
+ {
+ "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__b9ce6a70",
+ "source": "Poly",
+ "language": "javascript",
+ "repo_url": "https://github.com/sveltejs/svelte.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:7ae9c265822d97293b796dccdd653239ae3cea29466d2be22b1734939634171a"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__evolution__refactor__57ad5598",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/keras-team/keras.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:7ca7f7bb2c6a9a0121ea7b53c0b00ef6cf3bf9c5eead2d6e902b59002da0aa21"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__55a3ef80",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:7ce46e6597793134d473cf55b7756461c337c6d8d1b8f68dca60691089e47029"
+ },
+ {
+ "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__eb1411ba",
+ "source": "Poly",
+ "language": "javascript",
+ "repo_url": "https://github.com/sveltejs/svelte.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:7d032c20e66dc879f7371d39dff884ba038453f13bd125c68aa2cfad484f710e"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__73fc043b",
+ "source": "Multi",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:7e4e9658bfea48a06dd74e3701fee59d9b5318f99f2ea610ec5f4994e69e389b"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__javascript__maintenance__bugfix__3d84a3a4",
+ "source": "Multi",
+ "language": "javascript",
+ "repo_url": "https://github.com/iamkun/dayjs.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:7e5ac40917f8124268765d35626f1c5dcaf454e5f4eb2925243ab3d6905ea286"
+ },
+ {
+ "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__7827bc22",
+ "source": "Poly",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:7ea1eed780ccaad7c45ea557b3623edd2d6d2fc217a369d936bbdb1a42d9e334"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__evolution__feature__4a329645",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/huggingface/transformers.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:802df16c6ad396b3c2cc5fd26de530b606796dcb89fd4e000377a5e6d50b10c3"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__0c4f8d13",
+ "source": "Pro",
+ "language": "python",
+ "repo_url": "https://github.com/ansible/ansible.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:8067fe7f87b1a5af783f5be0f476b93c4354cb839cc76fc43aa357b61d49e5aa"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__javascript__maintenance__bugfix__593287ea",
+ "source": "Pro",
+ "language": "javascript",
+ "repo_url": "https://github.com/element-hq/element-web.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:8069b2420301ffb272d156b54435dd51ab59bd444de9457e4d0dde1774363232"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__cf01f471",
+ "source": "Pro",
+ "language": "python",
+ "repo_url": "https://github.com/ansible/ansible.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:8104a9ae249ffc6fadf3ddb32ad0c67911ab3e8b16178863e443199bc6c5a37c"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__maintenance__bugfix__b52bbc24",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/langchain-ai/langchain.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:81634695810aa7da09bfd586daee2d34e9e2a42ccece0f34e5f48e16dfeee60e"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__e5236b5f",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/pytest-dev/pytest.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:8211dbad2044e9f0f9a49ed1e50d8470324f4b48a520b2834be68dd1e8578aec"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__cpp__maintenance__bugfix__41b108ee",
+ "source": "Multi",
+ "language": "cpp",
+ "repo_url": "https://github.com/simdjson/simdjson.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:8301da84fc6b696446f4eef5f6d5a7a5ea7f81dbf96f3a0281335e7b22fb9a86"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__maintenance__bugfix__016ce55c",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/huggingface/transformers.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:831b0746590c5cabacfe9996498dbd49bc4bf6099b3e31457df78375fac2493f"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__9c470c46",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/matplotlib/matplotlib.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:8339f7c26125616f718ea13d52e3bc630caaead92dfc97a5657b2722e7f7f424"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__rust__maintenance__bugfix__627665a2",
+ "source": "Multi",
+ "language": "rust",
+ "repo_url": "https://github.com/clap-rs/clap.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:84739cf0e692f50bea6d46b68b935177c5e872549eb64823a6f1c50ffd460977"
+ },
+ {
+ "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__7d106697",
+ "source": "Poly",
+ "language": "typescript",
+ "repo_url": "https://github.com/microsoft/vscode.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:848c15df1bb22a471a6795292058a6c818d703de7550f6c0e4bc1e7285a00ca9"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__evolution__feature__66f97093",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/huggingface/transformers.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:852ccb732d2036613fb1c2089b6e0cb03b7155c0f33ad162d70229c4efba0ff7"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__726ccefd",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/sympy/sympy.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:85520d3c315db14fdbfb58334cf74fa7a74e685afe000440e44fa2db3118c24c"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__2d984316",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/matplotlib/matplotlib.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:85b10de1276d5d5e1c32d728a815bc9b4e49de93d3c295eda3922a09255cbdf3"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__fcb506a5",
+ "source": "Pro",
+ "language": "python",
+ "repo_url": "https://github.com/internetarchive/openlibrary.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:85b8dc3a8e86cb7b95d67f072248ec9d1f7408965f120f1aeddf05612ec8daa7"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__dc72c033",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:85c1b2f4edeb69760f5f128a5466f39d7eb0a096549e0026e2992f05cacc2935"
+ },
+ {
+ "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__4f3cb6be",
+ "source": "Poly",
+ "language": "typescript",
+ "repo_url": "https://github.com/microsoft/vscode.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:86a2a0e0ac94805312cc4c203fd00e7f3644482cdb692a7a741486c51082297c"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__f3695f8d",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:87293e52686da9e8cf238fbfe45c6b7278e8610e016b206ce88396b65c8fc15c"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__34e61891",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/matplotlib/matplotlib.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:876d70facc5f1b31ceb467b3a1352c649e06fcf1c0f6a593502a966eee34d1fd"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__36b8c9ee",
+ "source": "Pro",
+ "language": "python",
+ "repo_url": "https://github.com/qutebrowser/qutebrowser.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:878173c82b326180ff289558ef2fde63920c389c012f732775a808003def3d42"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__cecff61c",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:87ab8824a4243aa360682d1b232635bd085fd19a4e3e4da951b15686793e08a8"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__cpp__maintenance__bugfix__4a37a167",
+ "source": "Multi",
+ "language": "cpp",
+ "repo_url": "https://github.com/nlohmann/json.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:87ec83948f2ca5d61885051328a45da4e37465efd270dfe777a9b1a252c12e33"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__maintenance__bugfix__380568e9",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/keras-team/keras.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:881129d67125ad82093f90197f9a953894deb5a4cf7c27041dbf8fbfbe396d5a"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__maintenance__bugfix__64a006b0",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/keras-team/keras.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:886d685ee1671749602dafb4d6e64110c9cfc71fd42aff14c0e4690789f22268"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__rust__maintenance__bugfix__d3133dfb",
+ "source": "Multi",
+ "language": "rust",
+ "repo_url": "https://github.com/clap-rs/clap.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:8888a098b4dc0c8de430bf7bbc3fec0160ab059342fe7a64d65259927e8cfc3b"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__d3576321",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/sympy/sympy.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:88898a2b5b178ab82652d381a4cad53a9e5bd55f7c12cc772534367334bee7dc"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__03f04397",
+ "source": "Multi",
+ "language": "go",
+ "repo_url": "https://github.com/cli/cli.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:88f3e319bb1058399124b00966a085114cdba079934e77a4c0e6478dfacc3c26"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__462b957d",
+ "source": "Pro",
+ "language": "python",
+ "repo_url": "https://github.com/ansible/ansible.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:89857fcfcb84aa3d77ca6c547b73d8d5e8e6d3edb47b758b78e28fdff25d78ea"
+ },
+ {
+ "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__10ab7842",
+ "source": "Poly",
+ "language": "javascript",
+ "repo_url": "https://github.com/prettier/prettier.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:89953e9847c6766bac5a1ee08e90a4787808c7ec91d6dd51293ff0a9a6a85bfa"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__maintenance__bugfix__453fb844",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/huggingface/transformers.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:8a9ba76716c4daaf02097c4c4d037ecb0100f2707920b5f381978c0b0858b5d4"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__250649eb",
+ "source": "Multi",
+ "language": "go",
+ "repo_url": "https://github.com/cli/cli.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:8ad038ef7f9e972db922f29d90da846bfac81f4b1be45d1a0ee84763d91cb5d8"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__deb49033",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/astropy/astropy.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:8b58d6c1da5b3c2945f4e0aaebea2caf3fd80f143eb49fddcd334dc0bfbbd293"
+ },
+ {
+ "instance_id": "SWE-PolyBench__typescript__evolution__feature__c2e9e5ff",
+ "source": "Poly",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:8bbf0358298981f3c9346ad49d7ad03bb3537ae8a8f8be7684202581e64b3d27"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__rust__maintenance__bugfix__2dc6f037",
+ "source": "Multi",
+ "language": "rust",
+ "repo_url": "https://github.com/clap-rs/clap.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:8c0bdc1c54a999569b5d2dddcf6c64883b0caa61a0633bb4f9d4ae221b46b085"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__abb9b8b0",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/pytest-dev/pytest.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:8c149eb730ede30053d63b2a7d1c8731296964b5a099477ed6e5319caeea3b20"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__74cafcca",
+ "source": "Multi",
+ "language": "go",
+ "repo_url": "https://github.com/grpc/grpc-go.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:8ce177589d622b1bfc701d3b18b04690b767b1a99016eb049d63ab1b76258151"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__rust__maintenance__bugfix__92158fba",
+ "source": "Multi",
+ "language": "rust",
+ "repo_url": "https://github.com/clap-rs/clap.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:8d637b2b05b3f0960bfba1f6b40c0af177b267a226fdb339e6a2bde59ae0076d"
+ },
+ {
+ "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__7dfdbbd1",
+ "source": "Poly",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:8e2b67b887c25506673b0e146be4223f4d4eaec539831728bcecfc4b21675097"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__e09a2d75",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/sympy/sympy.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:8e2f69c39a81cb656bdfbe6554ec3f8f1a480fbbdcda1b63ed1cd24426e3933f"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__cedbb0cb",
+ "source": "Multi",
+ "language": "go",
+ "repo_url": "https://github.com/cli/cli.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:8ea95b044c8c6f9e68d358c167589f3306906b6313f57b0e03e1ed3c537104ea"
+ },
+ {
+ "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__d1cb6b19",
+ "source": "Poly",
+ "language": "javascript",
+ "repo_url": "https://github.com/sveltejs/svelte.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:8f466469fa28afdbec9f8bec122ce404ae1b0221e82d9c946cadc14ecb3f851d"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__1ce4c38e",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/astropy/astropy.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:8fb54309bf66b86ffb1dfe7242a76b0a77931902556aed282ab9fdca0ffe5fe3"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__java__maintenance__bugfix__be0b5fff",
+ "source": "Multi",
+ "language": "java",
+ "repo_url": "https://github.com/fasterxml/jackson-databind.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:9026e34f28fcddcf9f99ddd475ec2221c3d6bbba4b72d1f107d9b7689d1019b1"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__a375715a",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/sympy/sympy.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:90c24c6321062e4a32b81037cfb33b47550553a0621ee581eeef88c94144a389"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__81f2c925",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:9117d6b1bd2320ab129975a7d766bcb56f10a9b2da9897c3762a20656f7f3eef"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__1409977d",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/pylint-dev/pylint.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:915a010197b66a8286782ed1663fb86163645f13ef6900de807b01f876c5c7b4"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__java__maintenance__bugfix__92782c7b",
+ "source": "Multi",
+ "language": "java",
+ "repo_url": "https://github.com/elastic/logstash.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:91a8cd5bb6f4b4378ab85835ae0d6f1c103f404d75869081ba7dc1eb3179765d"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__9dcac3ec",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/sphinx-doc/sphinx.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:92ab853e53a7110f896babba774e59f12094f6e752390403aee845cc7b583ca5"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__e2250ebc",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:938c74f9a745321ff241a3d6cd55482d1ce76b064081e789a7ca578d8de5364b"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__2d5d4dcd",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:93bee3a55b0d5b3b67628599739b980343c9248900ec275bc97f98d50b6aae2e"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__3d1b3145",
+ "source": "Multi",
+ "language": "go",
+ "repo_url": "https://github.com/cli/cli.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:93e6d588352c58805a2bb75d8da5d6747ad2979c03391b59786ae2881b8f10b8"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__maintenance__bugfix__31f7341a",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/keras-team/keras.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:9471b423e2d8b8b75896107e8c8b5b8f9b1f8cbdaeeb93bfbbb7f06e1128b975"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__27320d49",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/scikit-learn/scikit-learn.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:951a485ebdfd5d85f4e05f538fbfc2a62d18e6f967752b2472bab32c085ad863"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__maintenance__bugfix__24342259",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/keras-team/keras.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:955daa44f097188a2551ef420b3e9007fceed3161ad4fea4a0b649853f6a03fa"
+ },
+ {
+ "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__2bfb742a",
+ "source": "Poly",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:970365cfd51f891065027e51738bf4a767b6cf78726be216e421b74987d41648"
+ },
+ {
+ "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__18d389e5",
+ "source": "Poly",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:97e4c3724cae75adb2bac522fdfde27d4706ed457a34ce53a6f25b44e5adf924"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__4c132bfd",
+ "source": "Pro",
+ "language": "python",
+ "repo_url": "https://github.com/ansible/ansible.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:994d744d1f4fb4c48d15474bdbffdca55ba83d757dfdaa2e5205f35b3c33052a"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__maintenance__bugfix__faa6b3ac",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/huggingface/transformers.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:9c5e9e1dfe4d9d2d86293b3bfb389c11f487ab16c12b464f3528d1f0026904f9"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__5d44a351",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/matplotlib/matplotlib.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:9d62f264d89b1a4862dbf41777d4c50d5c10ec6221b111e8c573a754fa63b0b3"
+ },
+ {
+ "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__708894b2",
+ "source": "Poly",
+ "language": "typescript",
+ "repo_url": "https://github.com/microsoft/vscode.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:9db48d2ae4d47cfb2a49729372764fcb49e24a68c38c5f118d7621677c875b91"
+ },
+ {
+ "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__d3babcfe",
+ "source": "Poly",
+ "language": "javascript",
+ "repo_url": "https://github.com/sveltejs/svelte.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:9e316ecac7100ff1be961ab46d449e9361ea6e40207c63b7e6665ceaa9c00851"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__javascript__maintenance__bugfix__2f405b15",
+ "source": "Multi",
+ "language": "javascript",
+ "repo_url": "https://github.com/sveltejs/svelte.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:9ece8442cf52efe268a85d50f93e0b836cf3462185dc25a5b03781b12c236538"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__4c3f5d3d",
+ "source": "Multi",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:9f3551b6de5ec60ed6c278f6f7389d83005c441ccdcb80279455ff2b6ec730e5"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__8d36ca39",
+ "source": "Multi",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:9fec1583003fab61eea9b92595ab03b0ee3c95cd45748c4b1d2fafab6f3c9c04"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__3da242a2",
+ "source": "Multi",
+ "language": "go",
+ "repo_url": "https://github.com/cli/cli.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:a078dd8c242522f6c304392f133f62749c5018bc1f27c574fa2919e686aea4c0"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__7afb30f5",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/sphinx-doc/sphinx.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:a09fd23399ff1b3fd6f29e330bfa293400d2592e8905bf26abdfbdd680b260fd"
+ },
+ {
+ "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__463d4713",
+ "source": "Poly",
+ "language": "javascript",
+ "repo_url": "https://github.com/prettier/prettier.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:a13124ba3ea9b9a03713f7654b473e61ac8a9c8566dd7c6434e9580ff9750ab0"
+ },
+ {
+ "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__27e1903f",
+ "source": "Poly",
+ "language": "javascript",
+ "repo_url": "https://github.com/sveltejs/svelte.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:a31b1272ac89e7ed67e0978af287b462853f9ca8df9d481c3ef4db65de38a912"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__58d0e27a",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/sympy/sympy.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:a397aa0752b239f79d4aaf72e7b07b420ae65793ca98d1f35010ca73f31cfe62"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__go__maintenance__bugfix__997c7afd",
+ "source": "Pro",
+ "language": "go",
+ "repo_url": "https://github.com/flipt-io/flipt.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:a43cb294062094d63c6b654f8273631350b46be5afd2c62b8c2f84c233eeecb1"
+ },
+ {
+ "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__22615f27",
+ "source": "Poly",
+ "language": "typescript",
+ "repo_url": "https://github.com/microsoft/vscode.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:a4cf99fe00c3e123b13017c2d652de9df98a496fccb1e9e907d4f082f34e5e46"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__maintenance__bugfix__0cf27f56",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/keras-team/keras.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:a53255907c577b5a41255d44710fd143fe59cc6db0543b30a1c23f88e59c5c54"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__89932d58",
+ "source": "Pro",
+ "language": "python",
+ "repo_url": "https://github.com/qutebrowser/qutebrowser.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:a56d6c65069da4f88eb8da9856171abe91da6d84c46a73d69251316a204eb640"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__3c9b757f",
+ "source": "Multi",
+ "language": "go",
+ "repo_url": "https://github.com/cli/cli.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:a5d546889fb8eb7026432ce8617bff8c61725bc445431c03d27d201206e59e0f"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__evolution__feature__ebb79d55",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/huggingface/transformers.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:a5e50460decd41fc727f6ea98aa4889c75693532ca639cd99cad48ae8a3d09aa"
+ },
+ {
+ "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__d6eb75d7",
+ "source": "Poly",
+ "language": "javascript",
+ "repo_url": "https://github.com/prettier/prettier.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:a67778023cfa8e8ac3bb22d1a4d1977d43d36f225969227f973dd0e7fdbcfdd3"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__5900c195",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/scikit-learn/scikit-learn.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:a724c79b23d656ea57ce292a1cec246564263818180a6c7d1b82c77ef1451280"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__maintenance__bugfix__5acd9675",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/huggingface/transformers.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:a8796461b752a9d17600fef2f9426ca63b20eda47c181d0e3c5ac97aecd75490"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__f667fa43",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:a8ae6f132a4ac4baf95e295e5a7e2495555eae7d800d28415778e7b60a37f88c"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__7c4a11fb",
+ "source": "Pro",
+ "language": "python",
+ "repo_url": "https://github.com/ansible/ansible.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:a922795448000fc9066032ecec5619bb151deba35239e3e5d3f106fe3914c54c"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__3c7d37b7",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/sympy/sympy.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:a9b624ba64e17006ccf79a61c399c81ed274a45f5b794de672a31b9fc822f496"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__e6efdd87",
+ "source": "Pro",
+ "language": "python",
+ "repo_url": "https://github.com/ansible/ansible.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:ab34c3b16c69eff7dd5b745a62450015e55ad69fb92c601e81af853436862089"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__maintenance__bugfix__9ea927ce",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/yt-dlp/yt-dlp.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:abb6db29be8e92232144e8f3394afabd10cc895f0e786ff1edfcfe7b84aa773a"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__rust__maintenance__bugfix__37f525d2",
+ "source": "Multi",
+ "language": "rust",
+ "repo_url": "https://github.com/clap-rs/clap.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:ac9cf06fc9fa90d7b8f0ba9e74b22a7933a853ab6f483225d21db31f27879d03"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__java__maintenance__bugfix__2bd87230",
+ "source": "Multi",
+ "language": "java",
+ "repo_url": "https://github.com/fasterxml/jackson-databind.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:ad55a37adab0272127d4ae9ae9b836f8c0dd46628248b32765406444b323d4c7"
+ },
+ {
+ "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__9d623e73",
+ "source": "Poly",
+ "language": "javascript",
+ "repo_url": "https://github.com/sveltejs/svelte.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:aeef444f84a576941668a779614c2cf5cd44a396f04fbd8d88504ac6a12b7bd5"
+ },
+ {
+ "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__2d1b4a72",
+ "source": "Poly",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:af0fd756106f93628cb6cd7a82efeef9497e6231b2e41c4dec3f3fc4ea2cdf77"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__220feee3",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/pydata/xarray.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:af777e69cf4cd57b870980f77503361a4224184dcbcc463da0674b94e1b548ea"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__20f502e0",
+ "source": "Pro",
+ "language": "python",
+ "repo_url": "https://github.com/qutebrowser/qutebrowser.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:af98d3f9e096434d5807731942e46ee807670d5469ebd2be80e4d0c9bf484c99"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__maintenance__bugfix__dbd7cde5",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/huggingface/transformers.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:b17b955e1cf2793d722b16dcc00dc7bac27af882ec58645ad69c3ffae33880aa"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__91e36e16",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/sympy/sympy.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:b1aa46a004fb6f490e6a83dfbdac7b011e255cc0d085946f84d85beb6c2f3143"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__512d556b",
+ "source": "Pro",
+ "language": "python",
+ "repo_url": "https://github.com/ansible/ansible.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:b322bf4e06b97016619824e139b94f5adb0750207d7c0bd6611006e43a384af9"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__2a889a1d",
+ "source": "Multi",
+ "language": "go",
+ "repo_url": "https://github.com/cli/cli.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:b34bb301adba0b7546712a2d4843e00d280e8e669010b162c6fdf8e7b2920a46"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__67bcb30f",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/matplotlib/matplotlib.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:b39e2600e202b8ead38d9691cd212707da8e8e7223283ac92a32518bf3c62d20"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__a14d0e2e",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/matplotlib/matplotlib.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:b58ffc33c3d7400452dde210adf57809fcc977c134891a725a453833508a75ed"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__da598baa",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/pylint-dev/pylint.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:b59e4f01b51466dc3f750419b96e165656f12bbb88e37fe4dfa5b8e5ea81b2f0"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__javascript__maintenance__bugfix__a98dbc6f",
+ "source": "Multi",
+ "language": "javascript",
+ "repo_url": "https://github.com/sveltejs/svelte.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:b5dcc16f945ba74861c78ffcb682b080507a5ea5aa44f06ab0c85a73adf4b904"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__0b0f1e0e",
+ "source": "Multi",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:b607c7ef87ca19c0fbe49c9b4d997b36d1d00fc60516c25df4d611515aea81fd"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__0e3febd0",
+ "source": "Multi",
+ "language": "go",
+ "repo_url": "https://github.com/cli/cli.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:b6e7d823080ec26bbc97359cf4470a4cb67ba8ec5964c0833dbd3c3f7aea5bd8"
+ },
+ {
+ "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__108b2d38",
+ "source": "Poly",
+ "language": "javascript",
+ "repo_url": "https://github.com/prettier/prettier.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:b78c43106ffae42e278d322e5f8502f76340114c43c51acdad7fccee65355a16"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__javascript__maintenance__bugfix__bb727b6d",
+ "source": "Multi",
+ "language": "javascript",
+ "repo_url": "https://github.com/sveltejs/svelte.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:b87a13924843ff4d14ec115fc92d8a5a38821afaa2f94adb22d5ebdd8612d898"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__3f745086",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:ba4fbfe254236de3d71f3f6fb6fe9b2937deb26f42fb24eb5103a8b61bb33b76"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__9a7be8ba",
+ "source": "Multi",
+ "language": "c",
+ "repo_url": "https://github.com/facebook/zstd.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:baa752423903b4a2c07a53d7b95cfddb7e9e12de4c89cbc5352d70b7010769d6"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__3d85271b",
+ "source": "Multi",
+ "language": "go",
+ "repo_url": "https://github.com/cli/cli.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:bb41d9a50b1108700faa8f19f1babeef09615d17976dfa55e851c020fcbfe587"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__8fcb53e6",
+ "source": "Multi",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:bb680def81fe766e30e2d237bb936103d6ab827d08645799b4ccd29ed3acb896"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__a7aba10f",
+ "source": "Multi",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:bbafd4665b70c0e23e401815ea929865d1eefb6ad0480b6f9f8e2f9e3346b244"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__d561b333",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/sympy/sympy.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:bc3173a33e29dc2d65a71e2bbcbbd2ca9d4588b0f69a86fc657ef5be84596252"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__rust__maintenance__bugfix__3be02163",
+ "source": "Multi",
+ "language": "rust",
+ "repo_url": "https://github.com/clap-rs/clap.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:bc89e4566bf4836a7834cb5e3dceccd45852e7d167291aa2956a3eb161a1b0fc"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__e2b70931",
+ "source": "Pro",
+ "language": "python",
+ "repo_url": "https://github.com/ansible/ansible.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:bd0a05115773728759e9f20928a059819d94e9a2690c2abd97bd95e43e18bcb3"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__maintenance__bugfix__14ce2fa9",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/huggingface/transformers.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:be055c6a47f67c80939830516f2cbf6649c13bd1075ad2a781c3d73d15765619"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__bdde6d9c",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:be42aae569cdc86bbc6addcc35913bd844c096cb09f1bff259a5f7a92dfc9d9b"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__3e92c76b",
+ "source": "Multi",
+ "language": "go",
+ "repo_url": "https://github.com/cli/cli.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:bea642adc1d5756b9dbb20e74d1cd5bdb69f9748b1015d6817fc581de0d56bc1"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__986c9e85",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:bf04f5771c5bf81fea9fe7fe1f1cfbc6f22d2ca249960183e510b18155af0b39"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__16c72e4c",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:bf4e55bf239145aab2e150ae921bcd8d188baf46944560490859596e6f48e5cc"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__c2f0f2be",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/pytest-dev/pytest.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:bf5decd053bd9b45fa9f24c7be564443f67992798b3a78071cf06f32cf63be88"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__c4bcf730",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/sympy/sympy.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:bf92c267d19f49b8a8d1d9fd8e4bcbc77bb14bbe7e440218e0205a327bf354bd"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__2fb50735",
+ "source": "Multi",
+ "language": "typescript",
+ "repo_url": "https://github.com/darkreader/darkreader.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:c03df0fa030b5e13d21b16577cefea3d0083d761edb5c3f5500a092034909ce9"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__13b74fcf",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/sympy/sympy.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:c040e5b9d694dc373581825288e0c35b54ad408d06621833d2c97991e6423d49"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__af69d880",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/sympy/sympy.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:c09fdc8262f86be8e2954ed5638f9c0afc88fd3293ee1ceb444a537cc193555a"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__6f3e4cd9",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:c0b7081db523e497688bdcd33dd4d3c4c31ca7bb7394286119abf3b0c157060b"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__c20c3c4f",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:c150aeef93f1c8dc87128771e7f384bfb3486bf023aa561148272dcfc6f6c004"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__maintenance__bugfix__6ef81275",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/huggingface/transformers.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:c1e6af7a82a751ff57f8e49441100ff546670c57cf042ed9d709908d35df6a54"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__maintenance__bugfix__84e0afe2",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/huggingface/transformers.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:c26d424b2ac9dfb29f75d9b1893cd777f1ca5bdfa64fa64c1bf744d72e24d5d2"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__java__maintenance__bugfix__ec4f6157",
+ "source": "Multi",
+ "language": "java",
+ "repo_url": "https://github.com/fasterxml/jackson-dataformat-xml.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:c407c3b62fb4012ba392ab132a3669e9473cc98c91ead5eb4ab62ff60162a994"
+ },
+ {
+ "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__63513ada",
+ "source": "Poly",
+ "language": "javascript",
+ "repo_url": "https://github.com/sveltejs/svelte.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:c40d91e2c2e309097be79e45d2f882c0c789e3996e5a8c1adb2f0b22e153896e"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__0eecae1e",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/pytest-dev/pytest.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:c42e0448a8c6dfc581fe90a8b5b67f5ba3f2cc6b42511b36d2f2c05de375290b"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__java__maintenance__bugfix__8b7cc5e0",
+ "source": "Multi",
+ "language": "java",
+ "repo_url": "https://github.com/alibaba/fastjson2.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:c5188ffb64e16d530c778a10c0bee86d80c0e791d20dca5562a1cfb61fd03bc9"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__90f381a2",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:c6de3035e6bb0a6c7fcd580d2790415cd9ac239be0de00fea749a9bb13f94e33"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__02b5862c",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:c6f22afeb2fad8e18f1e462aa84aec7163ada6ac2cec60ff79f0f9dd8f5f89a0"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__6809df19",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:c771f88c030237b7093328e122d048d3c3915b3dbf24a0607a21d16ab2e6611f"
+ },
+ {
+ "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__52180d42",
+ "source": "Poly",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:c773d0cc7c0ca3162f2856de0e1a5d57d43cf3a62712da70dd45fdce476405c6"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__2ab7d0fc",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:c7b1868324f55c1a88ab05b59ea87363f3bc5503fc68e9e23456c54ef307fca6"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__javascript__maintenance__bugfix__2f838a18",
+ "source": "Multi",
+ "language": "javascript",
+ "repo_url": "https://github.com/sveltejs/svelte.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:c7cdf37688ca1a1a49cf40c1eaaefcfe3db6387baf95afc721dfdd4543d49e5c"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__javascript__maintenance__bugfix__5b47b0dd",
+ "source": "Multi",
+ "language": "javascript",
+ "repo_url": "https://github.com/iamkun/dayjs.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:c7f0f503a64e85acd3e2bfb23ed9b4a6ffa4edf2622c38a74f777a920eca4d56"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__1ac0bb81",
+ "source": "Multi",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:c99a754d793203cbc3af02c68003b240e1cd47094a33e1c6511559e8c6fc074a"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__6ebb54dc",
+ "source": "Pro",
+ "language": "python",
+ "repo_url": "https://github.com/ansible/ansible.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:c9eb75f11edf41e5a6b45640bf8c9f60e374f0c02c523b2d37d0434fb3a59075"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__java__maintenance__bugfix__36a08326",
+ "source": "Multi",
+ "language": "java",
+ "repo_url": "https://github.com/fasterxml/jackson-databind.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:cb60e775b06002b7860a7ab3a101827085254c81545b144fc1bcdeab531dda9f"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__35f76ab1",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/matplotlib/matplotlib.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:cc09ef4c979d53b845e6727aba1806419e20cd901c1e4e421686930bef67b4b4"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__5b2cf9bb",
+ "source": "Pro",
+ "language": "python",
+ "repo_url": "https://github.com/ansible/ansible.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:cd5ce37ff6f64b8bf143c40d81b446d535431462e5ed061f544a6bb84428015e"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__8d0f55cd",
+ "source": "Multi",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:cd7ca80fe1212ba6ed9a843596af3efc6223ae7f02650c1b7f8fe2dce98538a6"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__ff629f34",
+ "source": "Multi",
+ "language": "typescript",
+ "repo_url": "https://github.com/vuejs/core.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:cdac954b1ff15e8bbf37a0bdda904afa1458d9847c1fa671eae7dc0f6d389832"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__evolution__feature__c0e4893c",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/huggingface/transformers.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:ce1f70f6ea99737232ad8fc8e613c0202200dfd090ab4f2a6a48c50fe0e63c05"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__914c325d",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:ce3590710b19812de614ecb55689c77c44d932079a0d3ddab76603f07da4f54b"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__e97ac668",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:ceefd7e66d5fc81939e4ea281e9b407a311b1f5054a917694c3681c1db8d624c"
+ },
+ {
+ "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__1ad00901",
+ "source": "Poly",
+ "language": "javascript",
+ "repo_url": "https://github.com/sveltejs/svelte.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:cf9dc0c0e15795e88b328b46f1e6816c5624a22b35cb72b746e63d80c94d1dac"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__evolution__feature__9e2901b5",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/langchain-ai/langchain.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:d06ddc03f252496f86019dd7916fec3ec60850ade8a2e230a4598011927b58b8"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__javascript__maintenance__bugfix__cbea412a",
+ "source": "Multi",
+ "language": "javascript",
+ "repo_url": "https://github.com/expressjs/express.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:d10b85cdf578037750e570f3b39a0403d83409c166650642ea31d75f3609576c"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__5dc9809e",
+ "source": "Multi",
+ "language": "c",
+ "repo_url": "https://github.com/ponylang/ponyc.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:d1f805c826a1a8165f18af76dc068dd3ce6f9be4feb7b0129ae907d9d03ab2e7"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__javascript__maintenance__bugfix__01d1908e",
+ "source": "Multi",
+ "language": "javascript",
+ "repo_url": "https://github.com/sveltejs/svelte.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:d24d07a0cd07d9cb39963e7eb47115829dda5893afe6f086e9cbd20c8a7c306e"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__d9d9c3a6",
+ "source": "Pro",
+ "language": "python",
+ "repo_url": "https://github.com/internetarchive/openlibrary.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:d3490e8d61723a1b2ba6878154013c2241d2c49f5627027346e59ee9e0627e5c"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__maintenance__bugfix__70acfaad",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/huggingface/transformers.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:d3a25fe25506cf671f17ccf8ddfe0ae78c30206a6fcd2c7a3db207e11815086c"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__d2a786e2",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:d3dae85caa432dba24aff40383ca37a8f2502f75e43e6dc8e4d1c06cb2e4298f"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__1a760e52",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:d418a6392423a1e564c1e152d4d3b6374b042aeb059739a67cae09734dc479dc"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__60bc9e86",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:d4304d1a4c1bb2f3e3db93e39fda409b4e93dbd7b8299f66a95ec091b664060a"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__maintenance__bugfix__aaf309d2",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/langchain-ai/langchain.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:d4dbe32660e2adde6f36373f7cfa10d8f732e043e3db6267c5dfd8940b771b62"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__71f348da",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/astropy/astropy.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:d79cecf7c0058211d9b5dde989d4384fbc56271e5f5cbc08398745e472116882"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__49f4a0f4",
+ "source": "Multi",
+ "language": "c",
+ "repo_url": "https://github.com/jqlang/jq.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:d8413673bc44e9e7a41e0f468c77d0e92326c2e565826361b6f1ace221537595"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__5fdd12f6",
+ "source": "Pro",
+ "language": "python",
+ "repo_url": "https://github.com/ansible/ansible.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:d896cc18071438521d62e6a5e63ffdae5d5a5a1cac3a542137bf9ca39d967caf"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__60b000ec",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:d8fde0d461aa5ec223f3243119258d4bc6be63af4ddb1dd8277567d940b781f2"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__9b321036",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/sympy/sympy.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:d9f0693080266d8e9c82913d6dded534d9aaf72373d56b758e0e5e5b9fcac957"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__evolution__feature__674464fb",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/huggingface/transformers.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:da7461ac29413735a04f1ab12c942d9f73abfcf5d4d5ff2ad940fde0d2b57860"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__rust__maintenance__bugfix__bdc6ab14",
+ "source": "Multi",
+ "language": "rust",
+ "repo_url": "https://github.com/clap-rs/clap.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:db49155b884fd003644fba7bd3571443bbcd735d2cf06956e4998b0d909854cc"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__java__maintenance__bugfix__1665de74",
+ "source": "Multi",
+ "language": "java",
+ "repo_url": "https://github.com/mockito/mockito.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:db7887ef286d0eac4aa298039ce80017c2cd27343dc6b056408674717f5596f3"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__1ba303a5",
+ "source": "Multi",
+ "language": "go",
+ "repo_url": "https://github.com/cli/cli.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:dc0289179198fe098db70f2aaa5b1a38edb09940b560f8291f54758f0d4c0235"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__0498ad7f",
+ "source": "Multi",
+ "language": "go",
+ "repo_url": "https://github.com/cli/cli.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:dc56a4caf57287fcaffecef2814a7edaeb8247fb90e17131797fadc128e93d3b"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__go__maintenance__bugfix__d3c56cc5",
+ "source": "Pro",
+ "language": "go",
+ "repo_url": "https://github.com/navidrome/navidrome.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:dd3551455c3f5e595334fa75375386ee9eb1d06bd6898caf8e0bdb595da17e3b"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__ab946da6",
+ "source": "Pro",
+ "language": "python",
+ "repo_url": "https://github.com/internetarchive/openlibrary.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:de1ffb84da296157c1881430418d4dcc2bc4ba2452e8041eeae2791be45d247b"
+ },
+ {
+ "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__6659fd5a",
+ "source": "Poly",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:de817fbf20c0088317c22782a929f52d44421a1a05bf2bb07bcfd61f6924f48e"
+ },
+ {
+ "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__78039f77",
+ "source": "Poly",
+ "language": "javascript",
+ "repo_url": "https://github.com/sveltejs/svelte.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:dfa29e49540138a36a54be3366d1de99f12c5cc9c55514cfce550ca38c0df1a0"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__7958c8a7",
+ "source": "Multi",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:dfef3add36c57eed43a6a52e856b65b1a156723c21ca6d8fefa7b72df72724e2"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__28ed0b77",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/sphinx-doc/sphinx.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:e0e23c2eec2b816c50fc9a0c41256dbbb39a2710e38d3df8bd36c317a32d19fa"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__9e8a151e",
+ "source": "Multi",
+ "language": "go",
+ "repo_url": "https://github.com/cli/cli.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:e1411ff239f363f833e18a828baa28053031d68ba027ce8a6a88f5f898861045"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__rust__maintenance__bugfix__c9cd2b2c",
+ "source": "Multi",
+ "language": "rust",
+ "repo_url": "https://github.com/clap-rs/clap.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:e18e7949a018205b89c4ae9df2f9e073ae3211d6ccf989adf28fb6411b51341f"
+ },
+ {
+ "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__2d7a3934",
+ "source": "Poly",
+ "language": "javascript",
+ "repo_url": "https://github.com/prettier/prettier.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:e198e65fd568b37da48c3aef7d3781cf88eca2d55c2dee2cc36bbdffec6603ac"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__8f9e4358",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:e1df52a8f391307f3e42a19b86e02d1e60388128d741e9920df5cbceb9b0298f"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__cpp__maintenance__bugfix__8ebb764e",
+ "source": "Multi",
+ "language": "cpp",
+ "repo_url": "https://github.com/fmtlib/fmt.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:e22e6a8910fa1024444feb5bbc1f05c7d58df3af2ec327b75d077a1f33d53832"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__765e5e14",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/sphinx-doc/sphinx.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:e3010e19d080549ef76bf27ca8393f661ebae0b9a238e9e86c811fb04a251917"
+ },
+ {
+ "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__6688f2d9",
+ "source": "Poly",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:e674d637eea55ba86f9b95760203782ea26bc911e7c60ea715a596e41ea07a09"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__maintenance__bugfix__5518cb7f",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/huggingface/transformers.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:e68dc076f61a8a4a5ab3c52a5cdb40eb36fd226114a1186ce5d447696fd8dd8d"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__409b7811",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:e6a637203cb2f6f5d2cfa099e368d8580fb8165c361ca8802b554b455083c6fa"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__4f685b8e",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/pydata/xarray.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:e6ff2f46ab05dc61b08fe27f4d2240c3b84d75f5198c9a0e82ea8c2a24e21613"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__186c0af4",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/sympy/sympy.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:e709777a755e4ac0465d28eae10c9466eff5e249f11303523fbcf99055ecfb75"
+ },
+ {
+ "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__e21304a6",
+ "source": "Poly",
+ "language": "javascript",
+ "repo_url": "https://github.com/sveltejs/svelte.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:e77a2798d602646b9e92e25a56db47c5d7d5ebd35d376576970be61643d068e5"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__18d7bbbc",
+ "source": "Pro",
+ "language": "python",
+ "repo_url": "https://github.com/ansible/ansible.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:e79ad706364a163059a99e69e5b723aa61140ea633445d9df5c89ff4d06e22a6"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__04b981d7",
+ "source": "Multi",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:e816451d9d649cc768a43cddb7abbe52b25f3db7becbcf2bafdbd5c71e5c0569"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__51e329de",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/sympy/sympy.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:e838b5c6309d3c166f00c15a3dd2ffaaf56796f31b603e51e2fd5f8ec014072a"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__6f9b5ff2",
+ "source": "Multi",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:e8b72142fc0a24e7dd591b9b98eb9ad6a308fc23a236e6f522cb26611aabacd2"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__java__maintenance__bugfix__747c7f60",
+ "source": "Multi",
+ "language": "java",
+ "repo_url": "https://github.com/google/gson.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:e9c4e405c6c37c51edb2432bdb62e5f60523b97ff45a665b88788ae16af73e9a"
+ },
+ {
+ "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__7e3e2bb9",
+ "source": "Poly",
+ "language": "javascript",
+ "repo_url": "https://github.com/sveltejs/svelte.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:ea319b40e64f86943d33a01abd0eb23d3bec61d21e3a439b71ec167eaceb395f"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__6cb88b18",
+ "source": "Multi",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:ea76a4ceaa39464c266c223d2db255bc7fb03f44b9f1887ba52b87c80564f0d1"
+ },
+ {
+ "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__f4541fb1",
+ "source": "Poly",
+ "language": "javascript",
+ "repo_url": "https://github.com/serverless/serverless.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:ead9d1d11fce0ac7cde7aa8e20a8044d207af076a208e6d626cccc7753df9792"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__7c41997b",
+ "source": "Multi",
+ "language": "typescript",
+ "repo_url": "https://github.com/vuejs/core.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:eb0cede3f2062f0bcdfe36e5be49a2ca7181e07e72b653f372a9474657571df2"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__39769692",
+ "source": "Multi",
+ "language": "c",
+ "repo_url": "https://github.com/ponylang/ponyc.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:eb19e6340e006ca6c72914c4e9a68d3df88739b0709ce99599f523990ca51569"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__8971eeba",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/scikit-learn/scikit-learn.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:eb290535d9fc724d1bc3efa376a0580d8df0f44e7cb89986dbb0039aaee2a015"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__09457b9b",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/matplotlib/matplotlib.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:eb5e6a8d6ec18c499fad4cf18749d50a534c334f072061e135ada14d8dc547f2"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__51f3cc91",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/scikit-learn/scikit-learn.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:eb82cab056cbde378e2974f1e87eee3492275018e89e7a6a266dbd3cc8a8e517"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__rust__maintenance__bugfix__c363219b",
+ "source": "Multi",
+ "language": "rust",
+ "repo_url": "https://github.com/BurntSushi/ripgrep.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:ebbca34412ac4c2b46152226d554934a0a8404e52e46dcb1d0ffb7c1db8341fd"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__maintenance__bugfix__b01e9113",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/huggingface/transformers.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:ecbabe3ffe7a3d1b73cb615fd0ae40d46c03402d429fdf1efc5e1e01d49ba76a"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__ed8db47d",
+ "source": "Multi",
+ "language": "go",
+ "repo_url": "https://github.com/zeromicro/go-zero.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:ecbe41cacda5fcd5a62b1bf24fc6910c214c00215f515b7bee47936c90e1073e"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__ec975b6c",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/pydata/xarray.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:ed09eb3aaa6539d523efb5fe32ebbd5da963beeb9ba34cc5d48063bdb731522e"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__1c5aa714",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:eda5b9e4afe4ea4a07a8738b875a8740e1babc8c2b2759dbb1f59344f8500ceb"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__b8bf59e7",
+ "source": "Multi",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:edd06f968d2f90329b71e3bccd8f5810a82b670942151bbcefe31bbe400b4cb2"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__b10492be",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:ee6278dd5639c45bcc4c9a382cce28e105e6b12967027388d4f03a1e090358ae"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__maintenance__bugfix__2a9201a9",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/huggingface/transformers.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:eeca9dfee0ddefcbd821f5565f673d6a8562e87655dfa772a67d6d3efe9bc570"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__2c512ec3",
+ "source": "Multi",
+ "language": "go",
+ "repo_url": "https://github.com/cli/cli.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:ef7f5d41d540783f0d5ee3bfa15765edaffea12d15214c2faa0f2d46799b8ec8"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__538e9f59",
+ "source": "Multi",
+ "language": "c",
+ "repo_url": "https://github.com/jqlang/jq.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:ef8571f793165cbb30ea4771819c754cb2960d7b970fe4890279e23c49c7f2c1"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__a8414dbd",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/matplotlib/matplotlib.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:f14bec20b237f5d867c6de7379d918f4bd99cd2b2e09ac0fed87f05a1658dc4f"
+ },
+ {
+ "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__9c9b931e",
+ "source": "Poly",
+ "language": "javascript",
+ "repo_url": "https://github.com/serverless/serverless.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:f2410b91cf70a6e4383131e3fb8b8ea037fa57387dde774d1b20d3f06c755a7e"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__b8c88ac8",
+ "source": "Multi",
+ "language": "c",
+ "repo_url": "https://github.com/ponylang/ponyc.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:f24e0251614c20ab99d7653b700b2936a4044b41b39f11da02da3969459b36e9"
+ },
+ {
+ "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__59dd675b",
+ "source": "Poly",
+ "language": "javascript",
+ "repo_url": "https://github.com/serverless/serverless.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:f3147f4e5b1a1766dda71bec72de481e5ad52d4997f283a13ced15d450ed2866"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__7489fdd8",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:f4106b22b700f63f69d8d76b0f4addd891fef58fe16d03cc1f996074b51b9b06"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__3cb539ec",
+ "source": "Multi",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:f42b9b036c1e17742c7944802d01c4b5df88386c1a68a006554fbc4bc14ed439"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__a7a5e8ed",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:f48285241b981c9ff86992012e4e65b6d78ff9e9ec4eeb3c7a298faad7d5f9d5"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__d105d187",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:f4c380f41af944c6198d95afd7c897d5983d4ea662ebb5cd70eb8db4e0af86bd"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__3146e19b",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/matplotlib/matplotlib.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:f52c200b13fc0ec21998c4c5fdb4c1d2b30761dad2e89ee480ef404c7dc29993"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__2464eadb",
+ "source": "Pro",
+ "language": "python",
+ "repo_url": "https://github.com/ansible/ansible.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:f552958bb89e0388a620e8ab569a3e3593ed63490a9244493ca177e04102593f"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__96afaad2",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:f5743e84051031eec7fe11894654ecb3eba9816fb0663b4e6da5a59d0957923b"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__maintenance__bugfix__ac5893a5",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/huggingface/transformers.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:f65990127e80e8c6871f2b4fd7690a2748b555e24067cb0bca63686ccfc8e9c9"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__898aa1de",
+ "source": "Multi",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:f69202620e70d9ffd6d9aebad0b897f6470514dc20f92641ec01fb0315ffd8c1"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__evolution__feature__7fe6d907",
+ "source": "Poly",
+ "language": "python",
+ "repo_url": "https://github.com/huggingface/transformers.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:f6e9bd2e373b05182142e1e13fb23683dd6ed4b6c1228b3f87eab394fe8380ee"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__1cae51cc",
+ "source": "Pro",
+ "language": "python",
+ "repo_url": "https://github.com/ansible/ansible.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:f70e402f1b485c6b87684cdace52979bede3faeba8d9b889a908ec6ddf77955b"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__javascript__maintenance__bugfix__d33de279",
+ "source": "Multi",
+ "language": "javascript",
+ "repo_url": "https://github.com/sveltejs/svelte.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:f767f04df87c62b1e61fa9c8af648c181c079257ae99a0049540aa9a1ab0b002"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__46dc77a5",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:f774555d35bdf8e2faffde259e56b1366592070ac3828c752535d3a578157ab0"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__1ad7449c",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:f7a2737359640a608766038265ba1bedc4bcad7b7e8a6c9daa1127bbb52e2b78"
+ },
+ {
+ "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__1de1bd3c",
+ "source": "Poly",
+ "language": "typescript",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:f8e450cb9fca78cb26913bb7ed689719b77e59f7de463f4f5f5c2854615e6438"
+ },
+ {
+ "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__0f1fd789",
+ "source": "Poly",
+ "language": "javascript",
+ "repo_url": "https://github.com/serverless/serverless.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:fabe9f2fd696db98fa1fc080b1daa8c30ba267ca9689559babba5ea79c0783b5"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__f8da42bc",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:fae4463a6ff2b6e87c052282343e95897001dfbc9231f0013182a87ea9648442"
+ },
+ {
+ "instance_id": "SWE-PolyBench__typescript__evolution__feature__41cd3842",
+ "source": "Poly",
+ "language": "typescript",
+ "repo_url": "https://github.com/coder/code-server.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:fb77201bce7941a232f8ba992f4ee4adc742a2aef5143bf9f08dd1186dc328b3"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__71253eae",
+ "source": "Pro",
+ "language": "python",
+ "repo_url": "https://github.com/qutebrowser/qutebrowser.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:fc2ee2e566b315448824fe160a6abee6fd4af93692e178cf8428da8dc94865b7"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__bebfd692",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/matplotlib/matplotlib.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:fca9b69e05ea88a2af9cb8cadd4ef2b7ec22d3410477107e88d85b1213e5abf2"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__64469377",
+ "source": "Pro",
+ "language": "python",
+ "repo_url": "https://github.com/ansible/ansible.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:fd00f4982535a09732d7e6ef2d819e519c47f5f43364353945ef2cc4bd50bef0"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__ee10f0e4",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/django/django.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:fe3ad1c49518945395f0eb9522b2c435e8a75b47a61f2b77c07b9673f2db27dd"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__bdec299d",
+ "source": "Verified",
+ "language": "python",
+ "repo_url": "https://github.com/scikit-learn/scikit-learn.git",
+ "reason": "eligible_not_selected",
+ "deterministic_rank": "sha256:febed0a62f60c8b14f6f7d84c509acba61b0a0ccc849d019c6d562dea85870dc"
+ }
+ ]
+}
diff --git a/tests/fixtures/contextbench-smoke-pack.json b/tests/fixtures/contextbench-smoke-pack.json
new file mode 100644
index 0000000..deee473
--- /dev/null
+++ b/tests/fixtures/contextbench-smoke-pack.json
@@ -0,0 +1,30 @@
+{
+ "name": "v2.4-contextbench-phase37-smoke-pack",
+ "protocolVersion": "contextbench-protocol-v1",
+ "claimBearing": false,
+ "purpose": "local_harness_smoke_only",
+ "selectedInPhase": 37,
+ "mustNotContributeTo": [
+ "contextbench_claim_bearing_aggregates",
+ "wedge_win_decision",
+ "public_benchmark_claims",
+ "product_tuning_before_baseline"
+ ],
+ "executionStatus": "metadata_only_not_executed_in_phase37",
+ "corpora": [
+ {
+ "name": "Excalidraw",
+ "repo": "excalidraw/excalidraw",
+ "claimBearing": false,
+ "purpose": "local_harness_smoke_only",
+ "phase37RunnableTasks": false
+ },
+ {
+ "name": "FastAPI",
+ "repo": "fastapi/fastapi",
+ "claimBearing": false,
+ "purpose": "local_harness_smoke_only",
+ "phase37RunnableTasks": false
+ }
+ ]
+}
diff --git a/tests/fixtures/contextbench-task-manifest.json b/tests/fixtures/contextbench-task-manifest.json
new file mode 100644
index 0000000..bcba553
--- /dev/null
+++ b/tests/fixtures/contextbench-task-manifest.json
@@ -0,0 +1,553 @@
+{
+ "name": "v2.4-contextbench-phase37-task-manifest",
+ "protocolVersion": "contextbench-protocol-v1",
+ "dataset": "Contextbench/ContextBench",
+ "datasetConfig": "contextbench_verified",
+ "split": "train",
+ "claimBearing": true,
+ "selectedInPhase": 37,
+ "selection_algorithm": "deterministic_seeded_coverage_then_rank_fill_v1",
+ "selection_seed_or_deterministic_order": "phase37-contextbench-v1-2026-04-27",
+ "selection_timestamp": "2026-04-27T00:00:00.000Z",
+ "task_pool_hash": "sha256:a6af697f293ec595bccf9c264799f8a55308cc552b20b8ec61714240c2a03b26",
+ "exclusion_log_path": "tests/fixtures/contextbench-selection-exclusions.json",
+ "hash_canonicalization_version": "contextbench-canonical-json-lf-v1",
+ "evaluator_success_status": "passed_synthetic_official_evaluator_probe",
+ "hardness_signal_status": "unavailable_in_contextbench_verified_schema",
+ "hardness_signal_source": "dataset_schema_probe",
+ "hardness_proxy_used": false,
+ "forbidden_selection_sources": [
+ "agent_outputs",
+ "codebase_context_outputs",
+ "competitor_outputs",
+ "proxy_hardness_score",
+ "post_failure_task_filtering"
+ ],
+ "no_lane_outputs_observed_attestation": "No raw/native, codebase-context, competitor, proxy-hardness, or post-failure outputs were observed or used for selection.",
+ "summary": {
+ "task_count": 20,
+ "language_distribution": {
+ "c": 1,
+ "cpp": 1,
+ "go": 2,
+ "java": 1,
+ "javascript": 3,
+ "python": 8,
+ "rust": 1,
+ "typescript": 3
+ },
+ "source_distribution": {
+ "Multi": 8,
+ "Pro": 2,
+ "Poly": 6,
+ "Verified": 4
+ },
+ "repo_distribution": {
+ "https://github.com/ponylang/ponyc.git": 1,
+ "https://github.com/fmtlib/fmt.git": 1,
+ "https://github.com/navidrome/navidrome.git": 1,
+ "https://github.com/fasterxml/jackson-databind.git": 1,
+ "https://github.com/prettier/prettier.git": 1,
+ "https://github.com/ansible/ansible.git": 1,
+ "https://github.com/rayon-rs/rayon.git": 1,
+ "https://github.com/mui/material-ui.git": 1,
+ "https://github.com/sphinx-doc/sphinx.git": 1,
+ "https://github.com/sveltejs/svelte.git": 1,
+ "https://github.com/sympy/sympy.git": 1,
+ "https://github.com/django/django.git": 1,
+ "https://github.com/vuejs/core.git": 1,
+ "https://github.com/matplotlib/matplotlib.git": 1,
+ "https://github.com/cli/cli.git": 1,
+ "https://github.com/Significant-Gravitas/AutoGPT.git": 1,
+ "https://github.com/coder/code-server.git": 1,
+ "https://github.com/iamkun/dayjs.git": 1,
+ "https://github.com/huggingface/transformers.git": 1,
+ "https://github.com/keras-team/keras.git": 1
+ },
+ "repo_count": 20,
+ "language_count": 8
+ },
+ "tasks": [
+ {
+ "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__5e659108",
+ "original_inst_id": "ponylang__ponyc-2261",
+ "source": "Multi",
+ "language": "c",
+ "repo": "ponylang/ponyc",
+ "repo_url": "https://github.com/ponylang/ponyc.git",
+ "base_commit": "682b45d3abd7b24381bfc56423da85c3527785c7",
+ "problem_statement_ref": "dataset_field:problem_statement",
+ "problem_statement_hash": "sha256:05e7cceb9bc67c2d30de97c5870667a6edb4fc026425f5795d31999fb7a919fe",
+ "gold_context_ref": "dataset_field:gold_context",
+ "gold_context_hash": "sha256:1e46ac54ac1ced7326f7392c1c7d4d49a6872d8d9d2a5bc978a553a3efeabd7b",
+ "patch_hash": "sha256:fe6f99fb701c617accd8adcfd1b54fc85416a0ca5ecc3566002178ebdf8d5e0f",
+ "test_patch_hash": "sha256:2104a1f6daafebcab12a64aeab585d55b7c3a6f817484da93982590789710393",
+ "f2p_hash": "sha256:7427e2bf8cd96a986ce86a231c1edd10a9adb256cf250cfd4a5e7211cb16debf",
+ "p2p_hash": "sha256:0b8b2e8f080de6d002a6fc72d2d800102b4fbb911c6d02c8f390ab05e55d97c3",
+ "gold_context_span_count": 7,
+ "hash_canonicalization_version": "contextbench-canonical-json-lf-v1",
+ "hardness_signal_status": "unavailable_in_contextbench_verified_schema",
+ "hardness_signal_source": "dataset_schema_probe",
+ "hardness_proxy_used": false,
+ "inclusion_rationale": "language_coverage:c",
+ "deterministic_rank": "sha256:06c4c3c32891fd726edf124a39a231af3f26ce179cfd4be0f77f773ff3ef197b"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__cpp__maintenance__bugfix__3e497d27",
+ "original_inst_id": "fmtlib__fmt-4286",
+ "source": "Multi",
+ "language": "cpp",
+ "repo": "fmtlib/fmt",
+ "repo_url": "https://github.com/fmtlib/fmt.git",
+ "base_commit": "e3ddede6c4ee818825c4e5a6dfa1d384860c27d9",
+ "problem_statement_ref": "dataset_field:problem_statement",
+ "problem_statement_hash": "sha256:13359b5f2c2ae376bbdf0524fbe4ce5bc730d7ecb4c380feaa40130cf363b7f8",
+ "gold_context_ref": "dataset_field:gold_context",
+ "gold_context_hash": "sha256:c3bb60a71ccc2512b49a366ae036fd45d26df403d3cda4b0eeb3e26379885d15",
+ "patch_hash": "sha256:cb30e0912b415a30796aeed3e3a80b3678119c67839d4683cd363f51a10f1288",
+ "test_patch_hash": "sha256:87b52d25b995bbc7eb42a034bc9b80418ecd66575af92f033a739d7287443971",
+ "f2p_hash": "sha256:4f53cda18c2baa0c0354bb5f9a3ecbe5ed12ab4d8e11ba873c2f11161202b945",
+ "p2p_hash": "sha256:4f53cda18c2baa0c0354bb5f9a3ecbe5ed12ab4d8e11ba873c2f11161202b945",
+ "gold_context_span_count": 5,
+ "hash_canonicalization_version": "contextbench-canonical-json-lf-v1",
+ "hardness_signal_status": "unavailable_in_contextbench_verified_schema",
+ "hardness_signal_source": "dataset_schema_probe",
+ "hardness_proxy_used": false,
+ "inclusion_rationale": "language_coverage:cpp",
+ "deterministic_rank": "sha256:008e0dd469d875e3e3b7e1a862a28aac07c2a176c6e22a1b22d828dcc2c4d749"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__go__maintenance__bugfix__4df06349",
+ "original_inst_id": "instance_navidrome__navidrome-31799662706fedddf5bcc1a76b50409d1f91d327",
+ "source": "Pro",
+ "language": "go",
+ "repo": "navidrome/navidrome",
+ "repo_url": "https://github.com/navidrome/navidrome.git",
+ "base_commit": "537e2fc033b71a4a69190b74f755ebc352bb4196",
+ "problem_statement_ref": "dataset_field:problem_statement",
+ "problem_statement_hash": "sha256:98f230eac3090013f5a266281095712421e94665c04243a543559f4490bcc51c",
+ "gold_context_ref": "dataset_field:gold_context",
+ "gold_context_hash": "sha256:b42c1b407f9823477acc674d5e8134639d479d379d87c9314255f4dab54b8c8f",
+ "patch_hash": "sha256:5910d049273561faffc0530b132aba0f8ba455388ceedb6ecb53cf830382a858",
+ "test_patch_hash": "sha256:2e94df8a4032b34fcab8496cc2435c94dce20bf65b2d5e81eec4759ae7c41462",
+ "f2p_hash": "sha256:8f778e849bbf8f8799540dff0d88e9f06a3f6081a67275445470fa3b53563ea4",
+ "p2p_hash": "sha256:4f53cda18c2baa0c0354bb5f9a3ecbe5ed12ab4d8e11ba873c2f11161202b945",
+ "gold_context_span_count": 20,
+ "hash_canonicalization_version": "contextbench-canonical-json-lf-v1",
+ "hardness_signal_status": "unavailable_in_contextbench_verified_schema",
+ "hardness_signal_source": "dataset_schema_probe",
+ "hardness_proxy_used": false,
+ "inclusion_rationale": "language_coverage:go",
+ "deterministic_rank": "sha256:02725d58f7f2b212bee547de17732ab19be7bf83c72adc6830e645581d43fd5c"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__java__maintenance__bugfix__9a3d9d3f",
+ "original_inst_id": "fasterxml__jackson-databind-4015",
+ "source": "Multi",
+ "language": "java",
+ "repo": "fasterxml/jackson",
+ "repo_url": "https://github.com/fasterxml/jackson-databind.git",
+ "base_commit": "9684204f3073580e711320c3531a95bcaffa63ef",
+ "problem_statement_ref": "dataset_field:problem_statement",
+ "problem_statement_hash": "sha256:bcf38df283aae459899174cdd14e73bbbbc3a3841c5aedcd8b02d4cfa4f59e6d",
+ "gold_context_ref": "dataset_field:gold_context",
+ "gold_context_hash": "sha256:eb3ec4665c7bd4efeaadc34bdc0c810a56ee9e09b5c303c781118c2bafa33ef5",
+ "patch_hash": "sha256:52a2719faa7e89847d232f897c653179f51b8548229af7a7ae9dc0a061dcedf2",
+ "test_patch_hash": "sha256:b878454f799b391ed44950fb8ace050b96c2e0a623a52bb0b2db9e96a3ee2cce",
+ "f2p_hash": "sha256:ef73c6bee12f21091983403b4fe7a994041b2dcb7a311a2585780432cc68e2d3",
+ "p2p_hash": "sha256:f2544689902c010f58e9b5dda4fd659a7997dc2b1d272b0f935e9c2929937a9c",
+ "gold_context_span_count": 8,
+ "hash_canonicalization_version": "contextbench-canonical-json-lf-v1",
+ "hardness_signal_status": "unavailable_in_contextbench_verified_schema",
+ "hardness_signal_source": "dataset_schema_probe",
+ "hardness_proxy_used": false,
+ "inclusion_rationale": "language_coverage:java",
+ "deterministic_rank": "sha256:129ec535e675a727eec91d19846c71ddd56624c4e99a2e7bc930827dfe3b08d1"
+ },
+ {
+ "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__9b08d665",
+ "original_inst_id": "prettier__prettier-12930",
+ "source": "Poly",
+ "language": "javascript",
+ "repo": "prettier/prettier",
+ "repo_url": "https://github.com/prettier/prettier.git",
+ "base_commit": "3147244f55e6e0aafd2cc6fa5875f7c8cfa8e4e3",
+ "problem_statement_ref": "dataset_field:problem_statement",
+ "problem_statement_hash": "sha256:c28783e14d2a03daebe39c368b1a4bc2a281a629badf57746684e57d16faa656",
+ "gold_context_ref": "dataset_field:gold_context",
+ "gold_context_hash": "sha256:5565a443e984483bcfd2e65fc18ba8acfa1af2176f523647b84c9f19f9245fd1",
+ "patch_hash": "sha256:4141d24f17271be80e4d04c1303edd299d9a3fa5c3077de0b8284ca440d90133",
+ "test_patch_hash": "sha256:be65001c46d6368bd30f8b47116d7154aea7bc687141154d0c256ca4eddb4661",
+ "f2p_hash": "sha256:95f839d90081354c8aaeb83ce45adbab096979079382b1c6ca9eee793b3af603",
+ "p2p_hash": "sha256:ff7047d6d9118ad4b105615d0dadb38c2a7ab0ed14e6ff880f1ce8b6b05da4e9",
+ "gold_context_span_count": 7,
+ "hash_canonicalization_version": "contextbench-canonical-json-lf-v1",
+ "hardness_signal_status": "unavailable_in_contextbench_verified_schema",
+ "hardness_signal_source": "dataset_schema_probe",
+ "hardness_proxy_used": false,
+ "inclusion_rationale": "language_coverage:javascript",
+ "deterministic_rank": "sha256:0029073e63edd541c2c8d76e7eb11355f3089bbcbd245968ff19b044671f8fea"
+ },
+ {
+ "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__942d0b14",
+ "original_inst_id": "instance_ansible__ansible-4c5ce5a1a9e79a845aff4978cfeb72a0d4ecf7d6-v1055803c3a812189a1133297f7f5468579283f86",
+ "source": "Pro",
+ "language": "python",
+ "repo": "ansible/ansible",
+ "repo_url": "https://github.com/ansible/ansible.git",
+ "base_commit": "8a175f59c939ca29ad56f3fa9edbc37a8656879a",
+ "problem_statement_ref": "dataset_field:problem_statement",
+ "problem_statement_hash": "sha256:614c79e3d7e231b12535d28984391d3928dc855e501a5219c34fd432ae357d1b",
+ "gold_context_ref": "dataset_field:gold_context",
+ "gold_context_hash": "sha256:e723a3ddcf8aafd31cda47da0e994791f688f1bd55fefa2f6c3e30fbf418f10b",
+ "patch_hash": "sha256:39da004c3b0de60c884343eb65eed8ba4cbaed102b0b0e0db482084634e97315",
+ "test_patch_hash": "sha256:c38b83ce8113ce9e88954766a457fef81f32171d1406c8b7858c5a683e2630b1",
+ "f2p_hash": "sha256:e841a8565a45f855aa75d37bd75900dc1ae305aa32cf97a154f7a497ef47bbe7",
+ "p2p_hash": "sha256:8eeace7feaa6ebab7c9caa0b39a524fb52ba336bc39d28cb374f4c69203a6e5d",
+ "gold_context_span_count": 38,
+ "hash_canonicalization_version": "contextbench-canonical-json-lf-v1",
+ "hardness_signal_status": "unavailable_in_contextbench_verified_schema",
+ "hardness_signal_source": "dataset_schema_probe",
+ "hardness_proxy_used": false,
+ "inclusion_rationale": "language_coverage:python",
+ "deterministic_rank": "sha256:01ecf67e3162f21cfb345b3953fc150968f3b870f0b7b7246d1c0ee576af3a0a"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__rust__maintenance__bugfix__1b6e3d94",
+ "original_inst_id": "rayon-rs__rayon-986",
+ "source": "Multi",
+ "language": "rust",
+ "repo": "rayon-rs/rayon",
+ "repo_url": "https://github.com/rayon-rs/rayon.git",
+ "base_commit": "2de810e97d5ce832ff98023a4a9cf215a86244ea",
+ "problem_statement_ref": "dataset_field:problem_statement",
+ "problem_statement_hash": "sha256:d2e0e4c6e073868fc45c3baf8378dd67103f69770ac941ed7e59af8a9625c481",
+ "gold_context_ref": "dataset_field:gold_context",
+ "gold_context_hash": "sha256:35d3e3085f7acacc2aef63952a83fc2144cf6df257d6dd5e88374898670e3f33",
+ "patch_hash": "sha256:761b3732a3a9b95afcf2be239f751b68ad7e0f41ebc12a209b8f3ad4452fd428",
+ "test_patch_hash": "sha256:a17b3f33f4c0df04b48ebd85e02a2f7583f3d1d3a695686b9fd9bef908c8e366",
+ "f2p_hash": "sha256:f9e706fe3388368af494dcadd0476b6d6e5480b69c1ea8ebc42f2fb911c2a065",
+ "p2p_hash": "sha256:2a69fc0a192a55fbeed4b93ff95427df35dceb721c523f27a4b6428ff8aaaa9a",
+ "gold_context_span_count": 3,
+ "hash_canonicalization_version": "contextbench-canonical-json-lf-v1",
+ "hardness_signal_status": "unavailable_in_contextbench_verified_schema",
+ "hardness_signal_source": "dataset_schema_probe",
+ "hardness_proxy_used": false,
+ "inclusion_rationale": "language_coverage:rust",
+ "deterministic_rank": "sha256:0a3794344aed7de0e0c0025cb6f3928b2d7a3707a61968fbd4c1656047167d50"
+ },
+ {
+ "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__2bb4ea7a",
+ "original_inst_id": "mui__material-ui-11451",
+ "source": "Poly",
+ "language": "typescript",
+ "repo": "mui/material-ui",
+ "repo_url": "https://github.com/mui/material-ui.git",
+ "base_commit": "04fae47c2a876f38aacfae866d220ddcbb7358ef",
+ "problem_statement_ref": "dataset_field:problem_statement",
+ "problem_statement_hash": "sha256:5ceb05d2189ff18b4cbb02093d5e4b1e52bffb08e80122f5d8b3699222415b42",
+ "gold_context_ref": "dataset_field:gold_context",
+ "gold_context_hash": "sha256:d77bcd419c081d67e27c620253eaf90ffba372835e306a429e17a4eb860417d8",
+ "patch_hash": "sha256:15fcc637a90c60bca48396b1bc2d8f6cf14fd759a133cfec08cb3677d20c3321",
+ "test_patch_hash": "sha256:2ba883d077b80974f3b32b01c6e312a79deac6bb09b5661651815b925417c2b0",
+ "f2p_hash": "sha256:003a275a398f95298be3292b2ec1ab9784677110f9f0ffca80b240b209b3b6d9",
+ "p2p_hash": "sha256:0189e0d23712d2134e97dc5796caf6c2a00d6d1b9d80395ecc679ab587871a72",
+ "gold_context_span_count": 6,
+ "hash_canonicalization_version": "contextbench-canonical-json-lf-v1",
+ "hardness_signal_status": "unavailable_in_contextbench_verified_schema",
+ "hardness_signal_source": "dataset_schema_probe",
+ "hardness_proxy_used": false,
+ "inclusion_rationale": "language_coverage:typescript",
+ "deterministic_rank": "sha256:057eb134ead21d5ff4b6e9bbac2a919bbe31aecf02894ec8144c5d6ed317e08b"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__b5cec962",
+ "original_inst_id": "sphinx-doc__sphinx-7462",
+ "source": "Verified",
+ "language": "python",
+ "repo": "sphinx-doc/sphinx",
+ "repo_url": "https://github.com/sphinx-doc/sphinx.git",
+ "base_commit": "b3e26a6c851133b82b50f4b68b53692076574d13",
+ "problem_statement_ref": "dataset_field:problem_statement",
+ "problem_statement_hash": "sha256:d0b3103864192f0222897e7cbb72d6a99e01d1a1517ab5b1415702d6b6d81f44",
+ "gold_context_ref": "dataset_field:gold_context",
+ "gold_context_hash": "sha256:3a69df821d9b5c1e3f9b0a66de9573c49d23946cb4fd397d6f60f75c1629bf29",
+ "patch_hash": "sha256:13cebe1d1a9993e2bbb9707a9c85bf4e98fab0563d8305e446147266f7274fae",
+ "test_patch_hash": "sha256:1745f2e0dfab31c3d3e871d528b1847969758b3aa457a5aa0145b50f647d29e1",
+ "f2p_hash": "sha256:ab83c2156bc175d166f39f0b76a83537e7e05796612c81afe0c02ed29e3f6cb7",
+ "p2p_hash": "sha256:2ea7e8094b6379e26c73114345ab56b377d40eb1d225f90f65457ca6a4f6338f",
+ "gold_context_span_count": 6,
+ "hash_canonicalization_version": "contextbench-canonical-json-lf-v1",
+ "hardness_signal_status": "unavailable_in_contextbench_verified_schema",
+ "hardness_signal_source": "dataset_schema_probe",
+ "hardness_proxy_used": false,
+ "inclusion_rationale": "source_coverage:Verified",
+ "deterministic_rank": "sha256:0308fa0bca6a4843611607cf2809d2cfae1fcdd535a79d1418c4498eadae7fc6"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__javascript__maintenance__bugfix__bd81816f",
+ "original_inst_id": "sveltejs__svelte-12649",
+ "source": "Multi",
+ "language": "javascript",
+ "repo": "sveltejs/svelte",
+ "repo_url": "https://github.com/sveltejs/svelte.git",
+ "base_commit": "e417d3a2d281a5ec9b595be5ffbd47efe57b28c3",
+ "problem_statement_ref": "dataset_field:problem_statement",
+ "problem_statement_hash": "sha256:6220f9321fcf00a621f7a25261507877c039185ac5162f390313c4863149d88c",
+ "gold_context_ref": "dataset_field:gold_context",
+ "gold_context_hash": "sha256:1c95ff3cf469bad8e25d9880393364f3b4ceef0a490b7bb98363ba89d8117302",
+ "patch_hash": "sha256:5ea1a6794598dc6febdd72d6057f53798351c0cd818e5807bfc61c55be8ad5d9",
+ "test_patch_hash": "sha256:463ddfabf4000b974eadf4250aaa4bca2ffba2eacaff1c14ec57440eb217cb1e",
+ "f2p_hash": "sha256:9774cde059974b3da9225a7e8357248f715f8ce57909d612a84b41249c885a9e",
+ "p2p_hash": "sha256:5a5f762975164bdbe96da8187f7981ddfb8703d92ff7d788c9947b684d27cbbe",
+ "gold_context_span_count": 5,
+ "hash_canonicalization_version": "contextbench-canonical-json-lf-v1",
+ "hardness_signal_status": "unavailable_in_contextbench_verified_schema",
+ "hardness_signal_source": "dataset_schema_probe",
+ "hardness_proxy_used": false,
+ "inclusion_rationale": "repo_coverage:https://github.com/sveltejs/svelte.git",
+ "deterministic_rank": "sha256:01cdbd47d54d4edaea4f4e1409d6837a7f1ccc6840eeeaf03b818c4b1ad6c457"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__3d43f489",
+ "original_inst_id": "sympy__sympy-20801",
+ "source": "Verified",
+ "language": "python",
+ "repo": "sympy/sympy",
+ "repo_url": "https://github.com/sympy/sympy.git",
+ "base_commit": "e11d3fed782146eebbffdc9ced0364b223b84b6c",
+ "problem_statement_ref": "dataset_field:problem_statement",
+ "problem_statement_hash": "sha256:f86e8f37d24aff7e284d6f7fbdc903525c2175528a1957144165848ed3882611",
+ "gold_context_ref": "dataset_field:gold_context",
+ "gold_context_hash": "sha256:ccec0fb4b2b20c1357f5faab16b0c80da8d5ec49063805caef879b6ec2b54432",
+ "patch_hash": "sha256:dbec818a9a22bcfeaeeb2f292cf7bba7048f84bca05aa04f38870e483ec803d5",
+ "test_patch_hash": "sha256:f84920fc71c885455f40e439ea052707b7936b1367ba6af67a2a7b9c3f626f9c",
+ "f2p_hash": "sha256:c4492515480ba65d31e505c3be65f13ac4b0605033cad0eb3af2498d891ca0f4",
+ "p2p_hash": "sha256:92ceb1b732a10472f9080dcf81a5c97d42979e977ea72ce0d78d74c18928e265",
+ "gold_context_span_count": 4,
+ "hash_canonicalization_version": "contextbench-canonical-json-lf-v1",
+ "hardness_signal_status": "unavailable_in_contextbench_verified_schema",
+ "hardness_signal_source": "dataset_schema_probe",
+ "hardness_proxy_used": false,
+ "inclusion_rationale": "repo_coverage:https://github.com/sympy/sympy.git",
+ "deterministic_rank": "sha256:04b354066b641959f673366d5562f54869fc105a427dc276397e06f73719617c"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__0b74a2c7",
+ "original_inst_id": "django__django-15104",
+ "source": "Verified",
+ "language": "python",
+ "repo": "django/django",
+ "repo_url": "https://github.com/django/django.git",
+ "base_commit": "a7e7043c8746933dafce652507d3b821801cdc7d",
+ "problem_statement_ref": "dataset_field:problem_statement",
+ "problem_statement_hash": "sha256:28a48760ccbbb8e549a66edeae834de8a47f4a34d06ba80e4c929c6ea55626d2",
+ "gold_context_ref": "dataset_field:gold_context",
+ "gold_context_hash": "sha256:d5b9e5860d36119df398d4959137f1da593075e87a04bc2ffc2b6f5130f3293f",
+ "patch_hash": "sha256:9027ea1fe3294c58a5e144b66167851e4aa1c494a2044a1eb0eee61b14fdb28c",
+ "test_patch_hash": "sha256:a2e79b5018f0df0a14713774c7a23d3e205d50181e90e693b10a082b7e1b09cf",
+ "f2p_hash": "sha256:64c9ed9b8adf7cea5930cad3e75dc176b2fd9f0d19baab697bb9fedb1ee29845",
+ "p2p_hash": "sha256:b5d6ced33eda9e014f2c7a58f3c6e3f4f9c44417c15b3b7f3034d1efb37050c2",
+ "gold_context_span_count": 4,
+ "hash_canonicalization_version": "contextbench-canonical-json-lf-v1",
+ "hardness_signal_status": "unavailable_in_contextbench_verified_schema",
+ "hardness_signal_source": "dataset_schema_probe",
+ "hardness_proxy_used": false,
+ "inclusion_rationale": "repo_coverage:https://github.com/django/django.git",
+ "deterministic_rank": "sha256:069c0ead766ec25d98c701050e07a0e0aa193492aebbe54df3710c483dc581f4"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__8d2ac221",
+ "original_inst_id": "vuejs__core-11338",
+ "source": "Multi",
+ "language": "typescript",
+ "repo": "vuejs/core",
+ "repo_url": "https://github.com/vuejs/core.git",
+ "base_commit": "314ce82e479dbb33a9281ba8c2ebe288536b32df",
+ "problem_statement_ref": "dataset_field:problem_statement",
+ "problem_statement_hash": "sha256:ba71ad0cac694142813a1ca2fd4a48427bae06654cddec160ab5fbe3479c5efe",
+ "gold_context_ref": "dataset_field:gold_context",
+ "gold_context_hash": "sha256:8ef471f302a75779fa253b72a32b646bbde912d1609d5ddcce33737bba0e172d",
+ "patch_hash": "sha256:8a825ebf0547ea7ba0481582a6142e8ac50e35abda621a57330b25860d3dbb27",
+ "test_patch_hash": "sha256:1c3d96b897df04791477d894ecb483b24b89c2a0747c025cd9972dcf0372c679",
+ "f2p_hash": "sha256:2da7f2e3aa00743c295a835682f3f3a8d795a9d737705282e50bc79985cd4030",
+ "p2p_hash": "sha256:d749e04597508014a59354eabfb7d3548e4b46efc1d69611e7eb70a05e3a29b8",
+ "gold_context_span_count": 1,
+ "hash_canonicalization_version": "contextbench-canonical-json-lf-v1",
+ "hardness_signal_status": "unavailable_in_contextbench_verified_schema",
+ "hardness_signal_source": "dataset_schema_probe",
+ "hardness_proxy_used": false,
+ "inclusion_rationale": "repo_coverage:https://github.com/vuejs/core.git",
+ "deterministic_rank": "sha256:06f257dee6b643131f5e89fd302743e71d08c9f31af73689c9bd8437dac1def3"
+ },
+ {
+ "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__497d4650",
+ "original_inst_id": "matplotlib__matplotlib-26342",
+ "source": "Verified",
+ "language": "python",
+ "repo": "matplotlib/matplotlib",
+ "repo_url": "https://github.com/matplotlib/matplotlib.git",
+ "base_commit": "2aee6ccd7c7e1f8d282c1e7579f4ee546b838542",
+ "problem_statement_ref": "dataset_field:problem_statement",
+ "problem_statement_hash": "sha256:6f94f5120bf02fab3b081a6316a334c57e3b46ac696c739a3fea9bded03ad6a0",
+ "gold_context_ref": "dataset_field:gold_context",
+ "gold_context_hash": "sha256:419a82cda0d53300471284bb3f74091ad296860938a74c1bb8969ff558b37d87",
+ "patch_hash": "sha256:7b731bb05e85f1103654fc521800c50d4654b2f1c3a175b9c554a3275fe1af91",
+ "test_patch_hash": "sha256:861b4bbaa78126ed2eb6300cb9e98324d3508af22c7e2fc5a7a0df8bf78e82b5",
+ "f2p_hash": "sha256:6164581e61a64698a807ba470dfce8cdc4e50fddfb73f0362992a81c607b51ed",
+ "p2p_hash": "sha256:c7af2ef236d8d0fbbe3a9ef80b4b5537a99e696445a30c3c45494420185d56b4",
+ "gold_context_span_count": 4,
+ "hash_canonicalization_version": "contextbench-canonical-json-lf-v1",
+ "hardness_signal_status": "unavailable_in_contextbench_verified_schema",
+ "hardness_signal_source": "dataset_schema_probe",
+ "hardness_proxy_used": false,
+ "inclusion_rationale": "repo_coverage:https://github.com/matplotlib/matplotlib.git",
+ "deterministic_rank": "sha256:078219aeba8f8ff60b7dced916a6d15adf436ba45be9bf9569c66002f6f60a3a"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__1b8cfbf9",
+ "original_inst_id": "cli__cli-3608",
+ "source": "Multi",
+ "language": "go",
+ "repo": "cli/cli",
+ "repo_url": "https://github.com/cli/cli.git",
+ "base_commit": "026b07d1cfbf23a3ccb4d4703a2496d65e177fcc",
+ "problem_statement_ref": "dataset_field:problem_statement",
+ "problem_statement_hash": "sha256:5624c959557e8e711c9a6a87d4bf34b6c129f194037acb62a24f10fd06951aaf",
+ "gold_context_ref": "dataset_field:gold_context",
+ "gold_context_hash": "sha256:285ac1fa7b8db77f5d3366c32022c2eb75f6d2eeb819038f9caf8d716e6a176c",
+ "patch_hash": "sha256:2055d5b9a01c5dc4920aeff6b78792051d42a21216f3c8541732f918b5dcd80a",
+ "test_patch_hash": "sha256:bccdb11384ba8309fef4c4b8dd3e9bc00b1c9ba1488574cb66054c30a67664f2",
+ "f2p_hash": "sha256:4f53cda18c2baa0c0354bb5f9a3ecbe5ed12ab4d8e11ba873c2f11161202b945",
+ "p2p_hash": "sha256:72c0895e3ceaa0ce85aab728fd108cb723d5a102d594faa849b47d6fef613e53",
+ "gold_context_span_count": 6,
+ "hash_canonicalization_version": "contextbench-canonical-json-lf-v1",
+ "hardness_signal_status": "unavailable_in_contextbench_verified_schema",
+ "hardness_signal_source": "dataset_schema_probe",
+ "hardness_proxy_used": false,
+ "inclusion_rationale": "repo_coverage:https://github.com/cli/cli.git",
+ "deterministic_rank": "sha256:081b5e11e2f16c4b99a8a7ec6e1a60b947ebc620ce615491fd7ebf084471e0c5"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__maintenance__bugfix__ed58622a",
+ "original_inst_id": "Significant-Gravitas__AutoGPT-4652",
+ "source": "Poly",
+ "language": "python",
+ "repo": "Significant-Gravitas/AutoGPT",
+ "repo_url": "https://github.com/Significant-Gravitas/AutoGPT.git",
+ "base_commit": "9150f32f8b8602395534795ddd2d930a1684e419",
+ "problem_statement_ref": "dataset_field:problem_statement",
+ "problem_statement_hash": "sha256:4f0e49579b8f6426e62ab322a02bdbc6cfaaad98b3a68dbbcf9494eda624cf74",
+ "gold_context_ref": "dataset_field:gold_context",
+ "gold_context_hash": "sha256:e4690cc62e797a387c5ab6e9fda4da333ed9655dddbdd16feefe6bf10409ead3",
+ "patch_hash": "sha256:a15c0cce56ab9684014680e81e12a2452691ba7dd9a074abf9e1c147b8db544b",
+ "test_patch_hash": "sha256:fd72dac6d058feaff76f7bcfdd0deb5585cc7d4f2a82b0c9aa7f69807efd08cf",
+ "f2p_hash": "sha256:964f21451fc284a0a0b4dc07480f2b561be418cd15df6c8b836f741db2a12fb2",
+ "p2p_hash": "sha256:4f53cda18c2baa0c0354bb5f9a3ecbe5ed12ab4d8e11ba873c2f11161202b945",
+ "gold_context_span_count": 4,
+ "hash_canonicalization_version": "contextbench-canonical-json-lf-v1",
+ "hardness_signal_status": "unavailable_in_contextbench_verified_schema",
+ "hardness_signal_source": "dataset_schema_probe",
+ "hardness_proxy_used": false,
+ "inclusion_rationale": "repo_coverage:https://github.com/Significant-Gravitas/AutoGPT.git",
+ "deterministic_rank": "sha256:0891fa88d9469a199cd07fe9ab642933bf43e31eac4170c1593db85b182998a8"
+ },
+ {
+ "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__42165c4e",
+ "original_inst_id": "coder__code-server-6278",
+ "source": "Poly",
+ "language": "typescript",
+ "repo": "coder/code-server",
+ "repo_url": "https://github.com/coder/code-server.git",
+ "base_commit": "5d3c9edce436d11d51aa1e586c11eaa49d626dc2",
+ "problem_statement_ref": "dataset_field:problem_statement",
+ "problem_statement_hash": "sha256:700bd6272bcb0d96b6c9a21deb8f955b4fa5859f9ce2ae1c70bc9b1c9a5f498a",
+ "gold_context_ref": "dataset_field:gold_context",
+ "gold_context_hash": "sha256:4c8b070a0e8a04edbcec642c760e80758d27038332444f30ae1fcd01d3d7be22",
+ "patch_hash": "sha256:48fd1ab073d134e7ed2466022d84f4bff72ae861191250834b275a0ee31d2101",
+ "test_patch_hash": "sha256:8b5aa18dc79a9236b871b3885cee9b9409084d731b157e6c7f73ca3a1abf7de4",
+ "f2p_hash": "sha256:a417b6820ece6a95fb17b3f23a1543407f47bc09112776ff4b6616c5ad849941",
+ "p2p_hash": "sha256:eaa6a203414a673aacfb010a35c637a4b4b29b62705464c41e436b7ed07abfd9",
+ "gold_context_span_count": 8,
+ "hash_canonicalization_version": "contextbench-canonical-json-lf-v1",
+ "hardness_signal_status": "unavailable_in_contextbench_verified_schema",
+ "hardness_signal_source": "dataset_schema_probe",
+ "hardness_proxy_used": false,
+ "inclusion_rationale": "repo_coverage:https://github.com/coder/code-server.git",
+ "deterministic_rank": "sha256:09ce27ec3327da069ae81a329ef028e8d7234ff82d8b3534a3e8e02316931bb2"
+ },
+ {
+ "instance_id": "Multi-SWE-Bench__javascript__maintenance__bugfix__72488b59",
+ "original_inst_id": "iamkun__dayjs-734",
+ "source": "Multi",
+ "language": "javascript",
+ "repo": "iamkun/dayjs",
+ "repo_url": "https://github.com/iamkun/dayjs.git",
+ "base_commit": "9ad2e47e0569b23991bb0d5578f49c792c12df08",
+ "problem_statement_ref": "dataset_field:problem_statement",
+ "problem_statement_hash": "sha256:0da2caeed51e7052d0def67531b090f806a40bd05607b992335156d2ebcd26b7",
+ "gold_context_ref": "dataset_field:gold_context",
+ "gold_context_hash": "sha256:277283406441212773856bb86437ef9f010edd8330673b808a4355b87f946f3b",
+ "patch_hash": "sha256:ee9a68adab2e0bcb376b04f28f8b04f892a565104dd6470f46c6e14f0a3da7b9",
+ "test_patch_hash": "sha256:b3f11ed93a41fb1c9b79adc890c9a9f5353423396b31b6301219ed2ed3c64dfc",
+ "f2p_hash": "sha256:25489d127ba8e86e07e3f57d792aa12dbd1e5bf4930fcf96ce16bb35dd0f2dbe",
+ "p2p_hash": "sha256:3e558fdeb2b03a062ddf7384221784371658fcac6f5be29b01da586ca229d0e3",
+ "gold_context_span_count": 2,
+ "hash_canonicalization_version": "contextbench-canonical-json-lf-v1",
+ "hardness_signal_status": "unavailable_in_contextbench_verified_schema",
+ "hardness_signal_source": "dataset_schema_probe",
+ "hardness_proxy_used": false,
+ "inclusion_rationale": "repo_coverage:https://github.com/iamkun/dayjs.git",
+ "deterministic_rank": "sha256:0b9a0ab545b454b9e02de1b980bd114e80ee942edd577ca33c23af692b093eb2"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__maintenance__bugfix__3bb9721e",
+ "original_inst_id": "huggingface__transformers-23796",
+ "source": "Poly",
+ "language": "python",
+ "repo": "huggingface/transformers",
+ "repo_url": "https://github.com/huggingface/transformers.git",
+ "base_commit": "de9255de27abfcae4a1f816b904915f0b1e23cd9",
+ "problem_statement_ref": "dataset_field:problem_statement",
+ "problem_statement_hash": "sha256:54872da2cbad7c4d2ffa8f27589c9512bfe330ec39e3ec932813b5bee6d14645",
+ "gold_context_ref": "dataset_field:gold_context",
+ "gold_context_hash": "sha256:47e235cec8e62d0200b74b1960f2ff7a403abe405c6d686f6e947d65eb7c0032",
+ "patch_hash": "sha256:d616ac16ea924be75d0c2dbf02ef0d9def6911470d12745df9f864f84385bd6a",
+ "test_patch_hash": "sha256:2a6f350f2825586d6926a91ee0d22552ace8745e0f305e1c283258e09a356603",
+ "f2p_hash": "sha256:bd8fee5f231e391541a7a8a10394a21a982426f51fb2b603ffb9b9f2d235c846",
+ "p2p_hash": "sha256:d54b2c97284fef4f7d3400bb4cb2344ad1ad995272c2b09b8c4eb512c760dc80",
+ "gold_context_span_count": 8,
+ "hash_canonicalization_version": "contextbench-canonical-json-lf-v1",
+ "hardness_signal_status": "unavailable_in_contextbench_verified_schema",
+ "hardness_signal_source": "dataset_schema_probe",
+ "hardness_proxy_used": false,
+ "inclusion_rationale": "repo_coverage:https://github.com/huggingface/transformers.git",
+ "deterministic_rank": "sha256:0c5ec70f387858a230cec51047ebc2f8de9823bf61f55799a644c3d0486e53ec"
+ },
+ {
+ "instance_id": "SWE-PolyBench__python__maintenance__bugfix__b110ef6c",
+ "original_inst_id": "keras-team__keras-19924",
+ "source": "Poly",
+ "language": "python",
+ "repo": "keras-team/keras",
+ "repo_url": "https://github.com/keras-team/keras.git",
+ "base_commit": "a2e9a5252d2eab389bd19d359e6e7325a8232c79",
+ "problem_statement_ref": "dataset_field:problem_statement",
+ "problem_statement_hash": "sha256:0675761ca6753399284ab8041ea0634562fe10ce3d0eba258249dc72ae013665",
+ "gold_context_ref": "dataset_field:gold_context",
+ "gold_context_hash": "sha256:e09351f3bbb210a57eb89e1ba6b2e64e4960c893a6963dec425dd9f7bc883704",
+ "patch_hash": "sha256:4ebb5fda593d647f505895617ae10d75701159aa81c3495015761ff7e0580a14",
+ "test_patch_hash": "sha256:b80e72f00e0bfca54f32bc74dbfb55337a92aa6a8c186ea223336f5b294e33c8",
+ "f2p_hash": "sha256:b94f4142820f1e2f1e49b4eee09cc93f438099890599993215f8f898f612bb8d",
+ "p2p_hash": "sha256:470b4ab32dbfcd5d30e849a9929326582d083a98cd88135867ccef1d85c04056",
+ "gold_context_span_count": 14,
+ "hash_canonicalization_version": "contextbench-canonical-json-lf-v1",
+ "hardness_signal_status": "unavailable_in_contextbench_verified_schema",
+ "hardness_signal_source": "dataset_schema_probe",
+ "hardness_proxy_used": false,
+ "inclusion_rationale": "repo_coverage:https://github.com/keras-team/keras.git",
+ "deterministic_rank": "sha256:0ce807d76cad534312c24b67fbc1971f55a13237477ac46b0ffe1a2c707b96da"
+ }
+ ],
+ "manifest_hash": "sha256:1d30ee64bc3e0cb385d7bb76d58f5f9c21da3f93e69019067596111dedc16848"
+}
diff --git a/tests/impact-2hop.test.ts b/tests/impact-2hop.test.ts
index cf1f84f..010499e 100644
--- a/tests/impact-2hop.test.ts
+++ b/tests/impact-2hop.test.ts
@@ -15,6 +15,8 @@ import {
RELATIONSHIPS_FILENAME
} from '../src/constants/codebase-context.js';
+const SLOW_WINDOWS_TEST_TIMEOUT_MS = 60000;
+
vi.mock('../src/core/reranker.js', () => ({
rerank: vi.fn(async (_query: string, results: unknown) => results),
getRerankerStatus: vi.fn(() => 'fallback'),
@@ -127,5 +129,5 @@ describe('Impact candidates (2-hop)', () => {
`Expected hop 2 candidate src/a.ts, got impact.details=${JSON.stringify(details)}`
);
}
- }, 30000);
+ }, SLOW_WINDOWS_TEST_TIMEOUT_MS);
});
diff --git a/tests/search-compact-mode.test.ts b/tests/search-compact-mode.test.ts
index 92f0327..c4d573c 100644
--- a/tests/search-compact-mode.test.ts
+++ b/tests/search-compact-mode.test.ts
@@ -50,6 +50,8 @@ function parseSearchResponse(text: string): SearchResponse {
return JSON.parse(text) as SearchResponse;
}
+const SLOW_WINDOWS_TEST_TIMEOUT_MS = 60000;
+
describe('search_codebase compact/full mode', () => {
let tempRoot: string | null = null;
let originalArgv: string[] | null = null;
@@ -572,7 +574,7 @@ describe('search_codebase compact/full mode', () => {
expect(results[0].filePath).toBe(actualChunk.filePath);
expect(results[0].imports).toEqual(actualChunk.imports);
expect(results[0].exports).toEqual(actualChunk.exports);
- }, 30000);
+ }, SLOW_WINDOWS_TEST_TIMEOUT_MS);
it('adds a warning only when the final full payload exceeds the compact budget threshold', async () => {
const oversizedSummary = 'Token-heavy summary '.repeat(1200);
diff --git a/tests/search-decision-card.test.ts b/tests/search-decision-card.test.ts
index d99b4c7..c6d77ae 100644
--- a/tests/search-decision-card.test.ts
+++ b/tests/search-decision-card.test.ts
@@ -40,6 +40,8 @@ type ToolCallResponse = {
isError?: boolean;
};
+const SLOW_WINDOWS_TEST_TIMEOUT_MS = 60000;
+
function getToolCallHandler(
server: unknown
): (request: ToolCallRequest) => Promise {
@@ -153,7 +155,7 @@ export class ProfileService {
config: { skipEmbedding: true }
});
await indexer.index();
- }, 30000);
+ }, SLOW_WINDOWS_TEST_TIMEOUT_MS);
afterEach(async () => {
if (originalArgv) {
@@ -170,7 +172,7 @@ export class ProfileService {
await rmWithRetries(tempRoot);
tempRoot = null;
}
- }, 30000);
+ }, SLOW_WINDOWS_TEST_TIMEOUT_MS);
it('intent="edit" with multiple results returns full decision card with ready field', async () => {
if (!tempRoot) throw new Error('tempRoot not initialized');
@@ -207,7 +209,7 @@ export class ProfileService {
}
expect(preflight.ready).toBeDefined();
expect(typeof preflight.ready).toBe('boolean');
- }, 30000);
+ }, SLOW_WINDOWS_TEST_TIMEOUT_MS);
it('decision card has all expected fields when returned', async () => {
if (!tempRoot) throw new Error('tempRoot not initialized');
@@ -259,7 +261,7 @@ export class ProfileService {
if (preflight.whatWouldHelp) {
expect(Array.isArray(preflight.whatWouldHelp)).toBe(true);
}
- }, 30000);
+ }, SLOW_WINDOWS_TEST_TIMEOUT_MS);
it('intent="explore" returns lightweight preflight', async () => {
if (!tempRoot) throw new Error('tempRoot not initialized');
@@ -290,7 +292,7 @@ export class ProfileService {
expect(typeof preflight.ready).toBe('boolean');
// Should NOT have full decision card fields for explore
}
- }, 30000);
+ }, SLOW_WINDOWS_TEST_TIMEOUT_MS);
it('includes snippet field when includeSnippets=true', async () => {
if (!tempRoot) throw new Error('tempRoot not initialized');
@@ -321,7 +323,7 @@ export class ProfileService {
// At least some results should have a snippet
const withSnippets = parsed.results.filter((result) => result.snippet);
expect(withSnippets.length).toBeGreaterThan(0);
- }, 30000);
+ }, SLOW_WINDOWS_TEST_TIMEOUT_MS);
it('does not include snippet field when includeSnippets=false', async () => {
if (!tempRoot) throw new Error('tempRoot not initialized');
@@ -350,7 +352,7 @@ export class ProfileService {
parsed.results.forEach((result) => {
expect(result.snippet).toBeUndefined();
});
- }, 30000);
+ }, SLOW_WINDOWS_TEST_TIMEOUT_MS);
it('scope header starts snippet when includeSnippets=true', async () => {
if (!tempRoot) throw new Error('tempRoot not initialized');
@@ -381,5 +383,5 @@ export class ProfileService {
const firstLine = withSnippet.snippet.split('\n')[0].trim();
expect(firstLine).toMatch(/^\/\//);
}
- }, 30000);
+ }, SLOW_WINDOWS_TEST_TIMEOUT_MS);
});
diff --git a/tests/search-snippets.test.ts b/tests/search-snippets.test.ts
index 4b387ed..01aa59d 100644
--- a/tests/search-snippets.test.ts
+++ b/tests/search-snippets.test.ts
@@ -11,6 +11,8 @@ vi.mock('../src/core/reranker.js', () => ({
isAmbiguous: vi.fn(() => false)
}));
+const SLOW_WINDOWS_TEST_TIMEOUT_MS = 60000;
+
describe('Search Snippets with Scope Headers', () => {
let tempRoot: string | null = null;
@@ -98,7 +100,7 @@ export const VERSION = '1.0.0';
config: { skipEmbedding: true }
});
await indexer.index();
- }, 30000);
+ }, SLOW_WINDOWS_TEST_TIMEOUT_MS);
afterEach(async () => {
if (tempRoot) {
@@ -106,7 +108,7 @@ export const VERSION = '1.0.0';
tempRoot = null;
}
delete process.env.CODEBASE_ROOT;
- }, 30000);
+ }, SLOW_WINDOWS_TEST_TIMEOUT_MS);
it('returns snippets when includeSnippets=true', async () => {
if (!tempRoot) throw new Error('tempRoot not initialized');
@@ -136,7 +138,7 @@ export const VERSION = '1.0.0';
const withSnippets = parsed.results.filter((r: any) => r.snippet);
expect(withSnippets.length).toBeGreaterThan(0);
- }, 30000);
+ }, SLOW_WINDOWS_TEST_TIMEOUT_MS);
it('scope header is a comment line starting with //', async () => {
if (!tempRoot) throw new Error('tempRoot not initialized');