diff --git a/scripts/contextbench-select-slice.mjs b/scripts/contextbench-select-slice.mjs new file mode 100644 index 0000000..6deffa8 --- /dev/null +++ b/scripts/contextbench-select-slice.mjs @@ -0,0 +1,913 @@ +#!/usr/bin/env node +import { createHash } from 'node:crypto'; +import { existsSync, mkdirSync, readFileSync, rmSync, writeFileSync } from 'node:fs'; +import { dirname, join, resolve } from 'node:path'; +import { spawnSync } from 'node:child_process'; +import { tmpdir } from 'node:os'; + +const DATASET = 'Contextbench/ContextBench'; +const DATASET_CONFIG = 'contextbench_verified'; +const SPLIT = 'train'; +const DATASET_ROWS_URL = 'https://datasets-server.huggingface.co/rows'; +const SELECTION_SEED = 'phase37-contextbench-v1-2026-04-27'; +const SELECTION_TIMESTAMP = '2026-04-27T00:00:00.000Z'; +const CANONICALIZATION_VERSION = 'contextbench-canonical-json-lf-v1'; +const HARDNESS_STATUS = 'unavailable_in_contextbench_verified_schema'; +const HARDNESS_SOURCE = 'dataset_schema_probe'; + +const REQUIRED_FIELDS = [ + 'instance_id', + 'original_inst_id', + 'source', + 'language', + 'repo_url', + 'base_commit', + 'problem_statement', + 'gold_context', + 'patch', + 'test_patch', + 'f2p', + 'p2p' +]; + +const HASH_FIELDS = ['problem_statement', 'gold_context', 'patch', 'test_patch', 'f2p', 'p2p']; +const FORBIDDEN_SELECTION_SOURCES = [ + 'agent_outputs', + 'codebase_context_outputs', + 'competitor_outputs', + 'proxy_hardness_score', + 'post_failure_task_filtering' +]; + +function help() { + console.log(`ContextBench Phase 37 selection tool + +Usage: + node scripts/contextbench-select-slice.mjs --help + node scripts/contextbench-select-slice.mjs --dry-run --out + node scripts/contextbench-select-slice.mjs --probe-evaluator --out + node scripts/contextbench-select-slice.mjs --write-fixtures + node scripts/contextbench-select-slice.mjs --write-task-payloads --out [--checkout-root ] + node scripts/contextbench-select-slice.mjs --write-gold --task-id --out [--payloads ] + node scripts/contextbench-select-slice.mjs --materialize-checkouts --payloads [--max-tasks ] + node scripts/contextbench-select-slice.mjs --check [--rows-file ] + +Modes: + --dry-run Load ${DATASET}/${DATASET_CONFIG}, validate schema, compute eligible pool, and write audit files under --out. + --probe-evaluator Run python -m contextbench.evaluate against a synthetic local git fixture only; no lane or product output is used. + --write-fixtures Write tests/fixtures/contextbench-task-manifest.json and tests/fixtures/contextbench-selection-exclusions.json. + --write-task-payloads Write selected task problem statements and intended checkout paths for Phase 40 live runs. + --write-gold Write scorer-only official-evaluator gold input for selected task(s); never pass this to solvers. + --materialize-checkouts Clone/fetch selected task repositories to their payload repo_checkout_path and verify base commits. + --check Verify frozen manifest integrity. With --rows-file, also recompute deterministic selection from frozen rows. + +Forbidden selection inputs: + ${FORBIDDEN_SELECTION_SOURCES.join(', ')} +`); +} + +function parseArgs(argv) { + const args = { + out: '', + check: '', + rowsFile: '', + payloads: '', + taskId: '', + manifest: 'tests/fixtures/contextbench-task-manifest.json', + checkoutRoot: '', + maxTasks: 0, + dryRun: false, + probeEvaluator: false, + writeFixtures: false, + writeTaskPayloads: false, + writeGold: false, + materializeCheckouts: false + }; + for (let i = 0; i < argv.length; i += 1) { + const arg = argv[i]; + if (arg === '--help' || arg === '-h') args.help = true; + else if (arg === '--dry-run') args.dryRun = true; + else if (arg === '--probe-evaluator') args.probeEvaluator = true; + else if (arg === '--write-fixtures') args.writeFixtures = true; + else if (arg === '--write-task-payloads') args.writeTaskPayloads = true; + else if (arg === '--write-gold') args.writeGold = true; + else if (arg === '--materialize-checkouts') args.materializeCheckouts = true; + else if (arg === '--out') args.out = argv[++i] ?? ''; + else if (arg === '--check') args.check = argv[++i] ?? ''; + else if (arg === '--rows-file') args.rowsFile = argv[++i] ?? ''; + else if (arg === '--payloads') args.payloads = argv[++i] ?? ''; + else if (arg === '--task-id') args.taskId = argv[++i] ?? ''; + else if (arg === '--manifest') args.manifest = argv[++i] ?? ''; + else if (arg === '--checkout-root') args.checkoutRoot = argv[++i] ?? ''; + else if (arg === '--max-tasks') args.maxTasks = Number(argv[++i] ?? '0'); + else throw new Error(`Unknown argument: ${arg}`); + } + return args; +} + +function stableStringify(value) { + if (value === null || typeof value !== 'object') return JSON.stringify(value); + if (Array.isArray(value)) return `[${value.map((item) => stableStringify(item)).join(',')}]`; + const entries = Object.entries(value).sort(([a], [b]) => a.localeCompare(b)); + return `{${entries.map(([key, item]) => `${JSON.stringify(key)}:${stableStringify(item)}`).join(',')}}`; +} + +function canonicalize(value) { + if (value === undefined) return 'undefined'; + if (value === null) return 'null'; + if (typeof value !== 'string') return stableStringify(value).replace(/\r\n?/g, '\n'); + const normalized = value.replace(/\r\n?/g, '\n'); + const trimmed = normalized.trim(); + if (trimmed.startsWith('{') || trimmed.startsWith('[')) { + try { + return stableStringify(JSON.parse(trimmed)); + } catch { + return normalized; + } + } + return normalized; +} + +function sha256(value) { + return `sha256:${createHash('sha256').update(value, 'utf8').digest('hex')}`; +} + +function hashObject(value) { + return sha256(stableStringify(value)); +} + +function writeJson(path, value) { + mkdirSync(dirname(path), { recursive: true }); + writeFileSync(path, `${JSON.stringify(value, null, 2)}\n`, 'utf8'); +} + +async function fetchJson(url) { + const response = await fetch(url); + if (!response.ok) + throw new Error(`Fetch failed ${response.status} ${response.statusText}: ${url}`); + return response.json(); +} + +async function loadRows() { + const rows = []; + let total = null; + for (let offset = 0; total === null || offset < total; offset += 100) { + const params = new URLSearchParams({ + dataset: DATASET, + config: DATASET_CONFIG, + split: SPLIT, + offset: String(offset), + length: '100' + }); + const payload = await fetchJson(`${DATASET_ROWS_URL}?${params.toString()}`); + total = payload.num_rows_total; + rows.push(...payload.rows.map((entry) => entry.row)); + } + return rows; +} + +function normalizeRowsPayload(payload) { + if (Array.isArray(payload)) return payload; + if (Array.isArray(payload?.rows)) { + return payload.rows.map((entry) => entry?.row ?? entry); + } + throw new Error('rows file must be an array of dataset rows or an object with rows'); +} + +async function loadRowsForArgs(args) { + if (!args.rowsFile) return loadRows(); + return normalizeRowsPayload(JSON.parse(readFileSync(resolve(args.rowsFile), 'utf8'))); +} + +function normalizeRow(row) { + const missing = REQUIRED_FIELDS.filter( + (field) => row[field] === undefined || row[field] === null || row[field] === '' + ); + let goldContextItems = []; + if (!missing.includes('gold_context')) { + try { + const parsed = JSON.parse(row.gold_context); + if (!Array.isArray(parsed) || parsed.length === 0) + missing.push('gold_context_non_empty_array'); + else goldContextItems = parsed; + } catch { + missing.push('gold_context_valid_json'); + } + } + if (missing.length > 0) + return { eligible: false, reason: 'missing_or_malformed_required_fields', missing }; + + const fieldHashes = Object.fromEntries( + HASH_FIELDS.map((field) => [field, sha256(canonicalize(row[field]))]) + ); + + return { + eligible: true, + task: { + instance_id: row.instance_id, + original_inst_id: row.original_inst_id, + source: row.source, + language: row.language, + repo: row.repo, + repo_url: row.repo_url, + base_commit: row.base_commit, + problem_statement_ref: 'dataset_field:problem_statement', + problem_statement_hash: fieldHashes.problem_statement, + gold_context_ref: 'dataset_field:gold_context', + gold_context_hash: fieldHashes.gold_context, + patch_hash: fieldHashes.patch, + test_patch_hash: fieldHashes.test_patch, + f2p_hash: fieldHashes.f2p, + p2p_hash: fieldHashes.p2p, + gold_context_span_count: goldContextItems.length, + hash_canonicalization_version: CANONICALIZATION_VERSION, + hardness_signal_status: HARDNESS_STATUS, + hardness_signal_source: HARDNESS_SOURCE, + hardness_proxy_used: false + } + }; +} + +function buildPool(rows) { + const seen = new Set(); + const eligible = []; + const excluded = []; + for (const [index, row] of rows.entries()) { + const normalized = normalizeRow(row); + if (!normalized.eligible) { + excluded.push({ + row_index: index, + instance_id: row.instance_id ?? '', + reason: normalized.reason, + details: normalized.missing + }); + continue; + } + if (seen.has(normalized.task.instance_id)) { + excluded.push({ + row_index: index, + instance_id: normalized.task.instance_id, + reason: 'duplicate_instance_id' + }); + continue; + } + seen.add(normalized.task.instance_id); + eligible.push(normalized.task); + } + const taskPoolHash = hashObject( + eligible.map((task) => ({ + instance_id: task.instance_id, + source: task.source, + language: task.language, + repo_url: task.repo_url, + base_commit: task.base_commit, + problem_statement_hash: task.problem_statement_hash, + gold_context_hash: task.gold_context_hash + })) + ); + return { eligible, excluded, taskPoolHash }; +} + +function rankTask(task) { + return sha256( + `${SELECTION_SEED}:${task.source}:${task.language}:${task.repo_url}:${task.instance_id}` + ); +} + +function selectTasks(eligible) { + const ranked = [...eligible].sort((a, b) => rankTask(a).localeCompare(rankTask(b))); + const selected = []; + const selectedIds = new Set(); + const add = (task, rationale) => { + if (selected.length >= 20 || selectedIds.has(task.instance_id)) return; + selectedIds.add(task.instance_id); + selected.push({ ...task, inclusion_rationale: rationale, deterministic_rank: rankTask(task) }); + }; + + for (const language of [...new Set(ranked.map((task) => task.language))].sort()) { + const task = ranked.find((candidate) => candidate.language === language); + if (task) add(task, `language_coverage:${language}`); + } + for (const source of [...new Set(ranked.map((task) => task.source))].sort()) { + const task = ranked.find((candidate) => candidate.source === source); + if (task) add(task, `source_coverage:${source}`); + } + for (const task of ranked) { + const repoAlreadySelected = selected.some((candidate) => candidate.repo_url === task.repo_url); + if (!repoAlreadySelected) add(task, `repo_coverage:${task.repo_url}`); + if (selected.length >= 20) break; + } + for (const task of ranked) add(task, 'deterministic_fill'); + + const selectedSet = new Set(selected.map((task) => task.instance_id)); + const nonSelectedEligible = ranked + .filter((task) => !selectedSet.has(task.instance_id)) + .map((task) => ({ + instance_id: task.instance_id, + source: task.source, + language: task.language, + repo_url: task.repo_url, + reason: 'eligible_not_selected', + deterministic_rank: rankTask(task) + })); + + return { selected, nonSelectedEligible }; +} + +function sanitizePathSegment(value) { + return value + .replace(/[^a-zA-Z0-9._-]+/g, '-') + .replace(/^-+|-+$/g, '') + .slice(0, 120); +} + +function checkoutPathForTask(task, checkoutRoot) { + if (!checkoutRoot) return ''; + return join( + resolve(checkoutRoot), + sanitizePathSegment(`${task.repo}-${task.base_commit.slice(0, 12)}`) + ); +} + +function buildTaskPayloads(rows, manifest, checkoutRoot) { + const rowsById = new Map(rows.map((row) => [row.instance_id, row])); + const failures = []; + const tasks = []; + for (const task of manifest.tasks ?? []) { + const taskFailures = []; + const row = rowsById.get(task.instance_id); + if (!row) { + taskFailures.push('missing dataset row'); + failures.push(`${task.instance_id}: missing dataset row`); + continue; + } + const problemStatement = typeof row.problem_statement === 'string' ? row.problem_statement : ''; + if (!problemStatement.trim()) taskFailures.push('missing problem_statement'); + const problemStatementHash = sha256(canonicalize(problemStatement)); + if (problemStatementHash !== task.problem_statement_hash) + taskFailures.push('problem_statement_hash mismatch'); + if (row.repo_url !== task.repo_url) taskFailures.push('repo_url mismatch'); + if (row.base_commit !== task.base_commit) taskFailures.push('base_commit mismatch'); + if (taskFailures.length > 0) { + failures.push(...taskFailures.map((failure) => `${task.instance_id}: ${failure}`)); + continue; + } + tasks.push({ + instance_id: task.instance_id, + original_inst_id: task.original_inst_id, + repo: task.repo, + repo_url: task.repo_url, + base_commit: task.base_commit, + problem_statement: problemStatement, + problem_statement_hash: problemStatementHash, + problem_statement_hash_verified: problemStatementHash === task.problem_statement_hash, + repo_checkout_path: checkoutPathForTask(task, checkoutRoot), + repo_checkout_status: checkoutRoot ? 'planned_not_verified' : 'not_planned', + lane_outputs_observed: false + }); + } + if (failures.length > 0) + throw new Error(`task payload materialization failed:\n- ${failures.join('\n- ')}`); + const payloadBase = { + name: 'v2.4-contextbench-phase40-task-payloads', + protocolVersion: manifest.protocolVersion, + dataset: manifest.dataset, + datasetConfig: manifest.datasetConfig, + split: manifest.split, + claimBearing: false, + purpose: + 'Phase 40 task input materialization; not lane output and not benchmark evidence by itself.', + manifest_hash: manifest.manifest_hash, + hash_canonicalization_version: CANONICALIZATION_VERSION, + checkout_root: checkoutRoot ? resolve(checkoutRoot) : null, + task_count: tasks.length, + tasks + }; + return withPayloadHash(payloadBase); +} + +function summarize(tasks) { + const countBy = (field) => + tasks.reduce((acc, task) => { + acc[task[field]] = (acc[task[field]] ?? 0) + 1; + return acc; + }, {}); + return { + task_count: tasks.length, + language_distribution: countBy('language'), + source_distribution: countBy('source'), + repo_distribution: countBy('repo_url'), + repo_count: new Set(tasks.map((task) => task.repo_url)).size, + language_count: new Set(tasks.map((task) => task.language)).size + }; +} + +function buildArtifacts(rows) { + const { eligible, excluded, taskPoolHash } = buildPool(rows); + if (eligible.length < 20) + throw new Error(`Only ${eligible.length} eligible rows; need at least 20`); + if (new Set(eligible.map((task) => task.repo_url)).size < 2) + throw new Error('Eligible pool has fewer than two repositories'); + if (new Set(eligible.map((task) => task.language)).size < 2) + throw new Error('Eligible pool has fewer than two languages'); + + const { selected, nonSelectedEligible } = selectTasks(eligible); + const exclusionLogPath = 'tests/fixtures/contextbench-selection-exclusions.json'; + const manifestBase = { + name: 'v2.4-contextbench-phase37-task-manifest', + protocolVersion: 'contextbench-protocol-v1', + dataset: DATASET, + datasetConfig: DATASET_CONFIG, + split: SPLIT, + claimBearing: true, + selectedInPhase: 37, + selection_algorithm: 'deterministic_seeded_coverage_then_rank_fill_v1', + selection_seed_or_deterministic_order: SELECTION_SEED, + selection_timestamp: SELECTION_TIMESTAMP, + task_pool_hash: taskPoolHash, + exclusion_log_path: exclusionLogPath, + hash_canonicalization_version: CANONICALIZATION_VERSION, + evaluator_success_status: 'passed_synthetic_official_evaluator_probe', + hardness_signal_status: HARDNESS_STATUS, + hardness_signal_source: HARDNESS_SOURCE, + hardness_proxy_used: false, + forbidden_selection_sources: FORBIDDEN_SELECTION_SOURCES, + no_lane_outputs_observed_attestation: + 'No raw/native, codebase-context, competitor, proxy-hardness, or post-failure outputs were observed or used for selection.', + summary: summarize(selected), + tasks: selected + }; + const manifest = { ...manifestBase, manifest_hash: hashObject(manifestBase) }; + const exclusions = { + name: 'v2.4-contextbench-phase37-selection-exclusions', + protocolVersion: 'contextbench-protocol-v1', + dataset: DATASET, + datasetConfig: DATASET_CONFIG, + split: SPLIT, + selection_algorithm: manifest.selection_algorithm, + selection_seed_or_deterministic_order: SELECTION_SEED, + selection_timestamp: SELECTION_TIMESTAMP, + task_pool_hash: taskPoolHash, + hash_canonicalization_version: CANONICALIZATION_VERSION, + hardness_signal_status: HARDNESS_STATUS, + hardness_proxy_used: false, + no_lane_outputs_observed_attestation: manifest.no_lane_outputs_observed_attestation, + input_row_count: rows.length, + eligible_row_count: eligible.length, + selected_row_count: selected.length, + excluded_rows: excluded, + non_selected_eligible_rows: nonSelectedEligible + }; + return { manifest, exclusions, eligible }; +} + +function verifyManifest(actual, expected = null) { + const failures = []; + const actualHash = actual.manifest_hash; + const actualWithoutHash = { ...actual }; + delete actualWithoutHash.manifest_hash; + if (actualHash !== hashObject(actualWithoutHash)) + failures.push('manifest_hash does not match manifest content'); + if (expected && actualHash !== expected.manifest_hash) + failures.push('manifest differs from deterministic dataset selection'); + if (!Array.isArray(actual.tasks)) failures.push('manifest tasks must be an array'); + else if (actual.tasks.length !== 20) + failures.push(`expected 20 tasks, got ${actual.tasks.length}`); + if (actual.hardness_proxy_used !== false) + failures.push('manifest must set hardness_proxy_used false'); + if (actual.hardness_signal_status !== HARDNESS_STATUS) + failures.push('manifest has wrong hardness signal status'); + if (!actual.no_lane_outputs_observed_attestation) failures.push('missing no-output attestation'); + const tasks = Array.isArray(actual.tasks) ? actual.tasks : []; + if (new Set(tasks.map((task) => task.repo_url)).size < 2) + failures.push('selected tasks cover fewer than two repos'); + if (new Set(tasks.map((task) => task.language)).size < 2) + failures.push('selected tasks cover fewer than two languages'); + return failures; +} + +function run(command, args, cwd) { + const result = spawnSync(command, args, { + cwd, + encoding: 'utf8', + env: childEnvForCommand(command) + }); + return { status: result.status, stdout: result.stdout ?? '', stderr: result.stderr ?? '' }; +} + +function runQuiet(command, args, cwd) { + const result = spawnSync(command, args, { + cwd, + env: childEnvForCommand(command), + encoding: 'utf8', + stdio: ['ignore', 'pipe', 'pipe'] + }); + return { + status: result.status, + stdout: result.stdout ?? '', + stderr: result.error?.message ?? result.stderr ?? '' + }; +} + +function childEnvForCommand(command) { + if (command !== 'git') return process.env; + const env = { ...process.env }; + for (const key of Object.keys(env)) { + if (key.startsWith('GIT_')) { + delete env[key]; + } + } + const gitHome = join(tmpdir(), 'contextbench-git-isolated-home'); + mkdirSync(gitHome, { recursive: true }); + env.HOME = gitHome; + env.USERPROFILE = gitHome; + env.XDG_CONFIG_HOME = gitHome; + env.GIT_CONFIG_NOSYSTEM = '1'; + env.GIT_TERMINAL_PROMPT = '0'; + return env; +} + +function git(cwd, args) { + const result = run('git', args, cwd); + if (result.status !== 0) + throw new Error(`git ${args.join(' ')} failed: ${result.stderr || result.stdout}`); + return result.stdout.trim(); +} + +function createEvaluatorFixture(outDir) { + const repoDir = join(outDir, 'probe-repo'); + rmSync(repoDir, { recursive: true, force: true }); + mkdirSync(repoDir, { recursive: true }); + writeFileSync(join(repoDir, 'sample.py'), 'def target():\n return 42\n', 'utf8'); + git(repoDir, ['init']); + git(repoDir, ['config', 'user.email', 'contextbench-probe@example.invalid']); + git(repoDir, ['config', 'user.name', 'ContextBench Probe']); + git(repoDir, ['add', 'sample.py']); + git(repoDir, ['commit', '-m', 'probe fixture']); + const commit = git(repoDir, ['rev-parse', 'HEAD']); + const goldPath = join(outDir, 'gold.json'); + const predPath = join(outDir, 'prediction.json'); + const resultPath = join(outDir, 'results.jsonl'); + writeJson(goldPath, { + inst_id: 'phase37-synthetic-evaluator-probe', + original_inst_id: 'phase37-synthetic-evaluator-probe', + repo_url: repoDir, + commit, + gold_ctx: [ + { file: 'sample.py', start_line: 1, end_line: 2, content: 'def target():\n return 42' } + ], + patch: '' + }); + writeJson(predPath, { + instance_id: 'phase37-synthetic-evaluator-probe', + repo_url: repoDir, + commit, + traj_data: { + pred_steps: [{ files: ['sample.py'], spans: { 'sample.py': [{ start: 1, end: 2 }] } }], + pred_files: ['sample.py'], + pred_spans: { 'sample.py': [{ start: 1, end: 2 }] } + }, + model_patch: '' + }); + return { repoDir, goldPath, predPath, resultPath }; +} + +function probeEvaluator(outDir) { + mkdirSync(outDir, { recursive: true }); + const fixture = createEvaluatorFixture(outDir); + const officialRepoDir = join(outDir, 'ContextBench-official'); + const moduleCheck = run('python', ['-m', 'contextbench.evaluate', '--help'], process.cwd()); + let evaluatorCwd = process.cwd(); + if (moduleCheck.status !== 0) { + if (!moduleCheck.stderr.includes('No module named')) { + throw new Error( + `official evaluator availability check failed: ${moduleCheck.stderr || moduleCheck.stdout}` + ); + } + if (!readableOfficialEvaluator(officialRepoDir)) { + rmSync(officialRepoDir, { recursive: true, force: true }); + const clone = run( + 'git', + [ + '-c', + 'core.longpaths=true', + 'clone', + '--depth', + '1', + 'https://github.com/EuniAI/ContextBench.git', + officialRepoDir + ], + outDir + ); + if (clone.status !== 0) + throw new Error( + `failed to clone official ContextBench repository: ${clone.stderr || clone.stdout}` + ); + } + evaluatorCwd = officialRepoDir; + } + const result = run( + 'python', + [ + '-m', + 'contextbench.evaluate', + '--gold', + fixture.goldPath, + '--pred', + fixture.predPath, + '--cache', + join(outDir, 'repo-cache'), + '--out', + fixture.resultPath + ], + evaluatorCwd + ); + const status = result.status === 0 ? 'passed' : 'failed'; + writeJson(join(outDir, 'probe-summary.json'), { + status, + command: `python -m contextbench.evaluate --gold ${fixture.goldPath} --pred ${fixture.predPath} --cache ${join(outDir, 'repo-cache')} --out ${fixture.resultPath}`, + synthetic_fixture_only: true, + lane_outputs_observed: false, + stdout: result.stdout, + stderr: result.stderr + }); + if (result.status !== 0) + throw new Error(`official evaluator probe failed; see ${join(outDir, 'probe-summary.json')}`); + console.log(`official evaluator probe passed: ${fixture.resultPath}`); +} + +function readableOfficialEvaluator(path) { + try { + readFileSync(join(path, 'contextbench', 'evaluate.py'), 'utf8'); + return true; + } catch { + return false; + } +} + +function payloadHashBase(payload) { + const copy = { + ...payload, + tasks: (payload.tasks ?? []).map((task) => { + const taskCopy = { ...task }; + delete taskCopy.repo_checkout_path; + delete taskCopy.repo_status_short; + delete taskCopy.materialized_at; + return taskCopy; + }) + }; + delete copy.payload_hash; + delete copy.checkout_root; + delete copy.updated_at; + return copy; +} + +function withPayloadHash(payload) { + const base = payloadHashBase(payload); + return { ...payload, payload_hash: hashObject(base) }; +} + +function gitMaybe(cwd, args) { + const result = runQuiet( + 'git', + ['-c', 'core.longpaths=true', '-c', 'core.autocrlf=false', ...args], + cwd + ); + return result.status === 0 ? result.stdout.trim() : null; +} + +function gitRequired(cwd, args) { + const result = runQuiet( + 'git', + ['-c', 'core.longpaths=true', '-c', 'core.autocrlf=false', ...args], + cwd + ); + if (result.status !== 0) + throw new Error(`git ${args.join(' ')} failed: ${result.stderr || result.stdout}`); + return result.stdout.trim(); +} + +function cloneCheckout(task) { + const checkoutPath = task.repo_checkout_path; + if (!checkoutPath) throw new Error(`${task.instance_id}: missing repo_checkout_path`); + if (!task.repo_url) throw new Error(`${task.instance_id}: missing repo_url`); + if (!task.base_commit) throw new Error(`${task.instance_id}: missing base_commit`); + const absoluteCheckoutPath = resolve(checkoutPath); + if (!readableGitCheckout(absoluteCheckoutPath)) { + rmSync(absoluteCheckoutPath, { recursive: true, force: true }); + mkdirSync(dirname(absoluteCheckoutPath), { recursive: true }); + const clone = runQuiet( + 'git', + [ + '-c', + 'core.longpaths=true', + '-c', + 'core.autocrlf=false', + 'clone', + '--no-checkout', + task.repo_url, + absoluteCheckoutPath + ], + process.cwd() + ); + if (clone.status !== 0) + throw new Error(`${task.instance_id}: git clone failed: ${clone.stderr || clone.stdout}`); + } + gitRequired(absoluteCheckoutPath, ['config', 'core.longpaths', 'true']); + gitRequired(absoluteCheckoutPath, ['config', 'core.autocrlf', 'false']); + const currentHead = gitMaybe(absoluteCheckoutPath, ['rev-parse', 'HEAD']); + if (currentHead !== task.base_commit) { + const shallowFetch = runQuiet( + 'git', + ['-c', 'core.longpaths=true', 'fetch', '--depth', '1', 'origin', task.base_commit], + absoluteCheckoutPath + ); + if (shallowFetch.status !== 0) { + gitRequired(absoluteCheckoutPath, ['fetch', 'origin', task.base_commit]); + } + } + gitRequired(absoluteCheckoutPath, ['checkout', '--force', '--detach', task.base_commit]); + gitRequired(absoluteCheckoutPath, ['clean', '-fd']); + const actualHead = gitRequired(absoluteCheckoutPath, ['rev-parse', 'HEAD']); + const statusShort = gitRequired(absoluteCheckoutPath, ['status', '--short']); + return { + ...task, + repo_checkout_path: absoluteCheckoutPath, + repo_checkout_status: + actualHead === task.base_commit && !statusShort ? 'verified' : 'not_clean_or_wrong_commit', + repo_actual_head: actualHead, + base_commit_verified: actualHead === task.base_commit, + repo_status_short: statusShort, + repo_clean_verified: statusShort === '', + materialized_at: new Date().toISOString() + }; +} + +function readableGitCheckout(path) { + if (!existsSync(path)) return false; + try { + readFileSync(join(path, '.git', 'HEAD'), 'utf8'); + return true; + } catch { + return gitMaybe(path, ['rev-parse', '--git-dir']) !== null; + } +} + +function materializeCheckouts(args) { + if (!args.payloads) throw new Error('--materialize-checkouts requires --payloads '); + const payloadPath = resolve(args.payloads); + const payload = JSON.parse(readFileSync(payloadPath, 'utf8')); + const maxTasks = Number.isInteger(args.maxTasks) && args.maxTasks > 0 ? args.maxTasks : Infinity; + let attempted = 0; + const tasks = []; + for (const task of payload.tasks ?? []) { + if (attempted >= maxTasks) { + tasks.push(task); + continue; + } + tasks.push(cloneCheckout(task)); + attempted += 1; + } + const updated = withPayloadHash({ + ...payload, + tasks, + updated_at: new Date().toISOString() + }); + writeJson(payloadPath, updated); + console.log(`materialized ${attempted} checkout(s) in ${payloadPath}`); +} + +function writeGoldInput(rows, args) { + if (!args.out) throw new Error('--write-gold requires --out '); + if (!args.taskId) throw new Error('--write-gold requires --task-id '); + const manifest = JSON.parse(readFileSync(resolve(args.manifest), 'utf8')); + const payloads = args.payloads + ? JSON.parse(readFileSync(resolve(args.payloads), 'utf8')) + : { tasks: [] }; + const payloadById = new Map((payloads.tasks ?? []).map((task) => [task.instance_id, task])); + const rowById = new Map(rows.map((row) => [row.instance_id, row])); + const task = (manifest.tasks ?? []).find((candidate) => candidate.instance_id === args.taskId); + if (!task) throw new Error(`task ${args.taskId} is not present in manifest ${args.manifest}`); + const row = rowById.get(task.instance_id); + if (!row) throw new Error(`task ${task.instance_id} is not present in dataset rows`); + const goldHash = sha256(canonicalize(row.gold_context)); + if (goldHash !== task.gold_context_hash) + throw new Error(`task ${task.instance_id} gold_context_hash mismatch`); + const payload = payloadById.get(task.instance_id); + const repoUrl = isVerifiedCheckoutPayload(payload, task) + ? payload.repo_checkout_path + : task.repo_url; + const goldInput = { + inst_id: task.instance_id, + original_inst_id: task.original_inst_id, + repo_url: repoUrl, + commit: task.base_commit, + gold_ctx: JSON.parse(row.gold_context), + patch: row.patch + }; + writeJson(resolve(args.out), goldInput); + writeJson(`${resolve(args.out)}.summary.json`, { + claimBearing: false, + scorerOnly: true, + lane_outputs_observed: false, + task_id: task.instance_id, + original_repo_url: task.repo_url, + scorer_repo_url: repoUrl, + commit: task.base_commit, + gold_context_hash: goldHash, + gold_context_hash_verified: true, + payload_hash: payloads.payload_hash ?? null + }); + console.log(`wrote scorer-only gold input ${resolve(args.out)}`); +} + +function isVerifiedCheckoutPayload(payload, task) { + return ( + payload?.repo_checkout_status === 'verified' && + typeof payload.repo_checkout_path === 'string' && + payload.repo_checkout_path.length > 0 && + payload.repo_actual_head === task.base_commit && + payload.base_commit_verified === true && + payload.repo_clean_verified === true + ); +} + +async function main() { + const args = parseArgs(process.argv.slice(2)); + if (args.help || process.argv.length <= 2) { + help(); + return; + } + if (args.probeEvaluator) { + if (!args.out) throw new Error('--probe-evaluator requires --out '); + probeEvaluator(resolve(args.out)); + return; + } + + if (args.materializeCheckouts) { + materializeCheckouts(args); + return; + } + + if (args.check && !args.rowsFile) { + const manifest = JSON.parse(readFileSync(resolve(args.check), 'utf8')); + const failures = verifyManifest(manifest); + if (failures.length > 0) throw new Error(`manifest check failed:\n- ${failures.join('\n- ')}`); + console.log(`manifest self-check passed: ${args.check}`); + return; + } + + const rows = await loadRowsForArgs(args); + + if (args.writeGold) { + writeGoldInput(rows, args); + return; + } + + if (args.writeTaskPayloads) { + if (!args.out) throw new Error('--write-task-payloads requires --out '); + const manifest = JSON.parse(readFileSync(resolve(args.manifest), 'utf8')); + const payloads = buildTaskPayloads(rows, manifest, args.checkoutRoot); + writeJson(resolve(args.out), payloads); + console.log(`wrote task payloads ${resolve(args.out)}`); + return; + } + + const artifacts = buildArtifacts(rows); + + if (args.dryRun) { + if (!args.out) throw new Error('--dry-run requires --out '); + const outDir = resolve(args.out); + writeJson(join(outDir, 'contextbench-selection-exclusions.json'), artifacts.exclusions); + writeJson(join(outDir, 'contextbench-dry-run-summary.json'), { + dataset: DATASET, + datasetConfig: DATASET_CONFIG, + row_count: rows.length, + eligible_row_count: artifacts.eligible.length, + selected_preview: artifacts.manifest.tasks.map((task) => task.instance_id), + task_pool_hash: artifacts.manifest.task_pool_hash, + hardness_signal_status: HARDNESS_STATUS, + hardness_proxy_used: false + }); + console.log(`dry-run wrote ${outDir}`); + } + + if (args.writeFixtures) { + writeJson('tests/fixtures/contextbench-task-manifest.json', artifacts.manifest); + writeJson('tests/fixtures/contextbench-selection-exclusions.json', artifacts.exclusions); + console.log('wrote tests/fixtures/contextbench-task-manifest.json'); + console.log('wrote tests/fixtures/contextbench-selection-exclusions.json'); + } + + if (args.check) { + const manifest = JSON.parse(readFileSync(resolve(args.check), 'utf8')); + const failures = verifyManifest(manifest, artifacts.manifest); + if (failures.length > 0) throw new Error(`manifest check failed:\n- ${failures.join('\n- ')}`); + console.log(`manifest check passed: ${args.check}`); + } +} + +main().catch((error) => { + console.error(error instanceof Error ? error.message : String(error)); + process.exitCode = 1; +}); diff --git a/tests/contextbench-protocol.test.ts b/tests/contextbench-protocol.test.ts new file mode 100644 index 0000000..af500c4 --- /dev/null +++ b/tests/contextbench-protocol.test.ts @@ -0,0 +1,496 @@ +import { describe, expect, it } from 'vitest'; +import protocolFixture from './fixtures/contextbench-benchmark-protocol.json'; +import correctionsFixture from './fixtures/contextbench-corrections.json'; +import lanesFixture from './fixtures/contextbench-lanes.json'; + +type ProtocolFixture = { + name: string; + protocolVersion: string; + status: string; + claimAllowed: boolean; + phaseBoundary: { + phase36Freezes: string[]; + phase37Freezes: string[]; + phase36MustNotFreeze: string[]; + }; + benchmarkTarget: { + primary: string; + datasetConfig: string; + officialEvaluatorFirst: boolean; + fallbackScorerPolicy: { + claimBearing: boolean; + requiresValidationAgainstOfficialOutputs: boolean; + }; + }; + taskSlicePolicy: { + sliceKind: string; + taskCount: { min: number; max: number }; + selectedInPhase: number; + phase36SelectionSchemaOnly: boolean; + requiredManifestFields: string[]; + selectionMethodRequiredFields: string[]; + coverageConstraints: { + minRepos: number; + minLanguages: number; + selectionBeforeOutputs: boolean; + }; + hardnessSignalPolicy: { + required: boolean; + status: string; + proxyAllowed: boolean; + selectionMustRecordAbsence: boolean; + }; + forbiddenSources: string[]; + }; + smokeOnlyCorpora: Array<{ name: string; claimBearing: boolean }>; + runPolicy: { + smokeRunsPerTaskLane: number; + claimBearingRunsPerTaskLane: number; + fewerThanClaimRunsMeans: string; + bestOfNReportingAllowed: boolean; + }; + minimalRunnerBehavior: { mustNotScript: string[] }; + structuredAnswerSchema: { requiredFields: string[]; invalidSchemaStatus: string }; + trajectorySchema: { requiredFields: string[]; rawTracePreservationRequired: boolean }; + metrics: { + primary: string[]; + secondary: string[]; + efficiencyIsSecondary: boolean; + tokenSavingsWinRequiresCorrectnessNonRegression: boolean; + }; + factRecallJudgeScope: { + allowedOnlyFor: string[]; + forbiddenFor: string[]; + uncertainCountsAsSuccess: boolean; + }; + budgets: { + sameModelAcrossLanes: boolean; + setupAndIndexingReportedSeparately: boolean; + defaults: Record; + }; + thresholds: { + claimBearingRunsPerTaskLane: number; + setupFailuresBlockBroadClaims: boolean; + thresholdChangesRequireCorrection: boolean; + wedgeWinRequires: string[]; + }; + failureTaxonomy: string[]; + runManifestSchema: { + appendOnly: boolean; + claimRunsRequireSlotsForEveryTaskLaneRepeat: boolean; + requiredFields: string[]; + terminalStatuses: string[]; + failedRunsIncludedInAggregates: boolean; + }; + protocolFingerprint: { required: boolean; covers: string[] }; + architectureReviewRule: { + requiredBeforePostBaselineProductChanges: boolean; + mustRejectTaskSpecificHeuristics: boolean; + requiresFrozenRerun: boolean; + }; + postBaselineCycleGate: { + maxImprovementCyclesBeforeDecision: number; + allowedDecisions: string[]; + noDecisionMeans: string; + }; + tripwires: string[]; + blockedClaims: string[]; +}; + +type CorrectionsFixture = { + protocolVersion: string; + corrections: Array>; + policy: { + silentChangesAllowed: boolean; + allowedReasonCategories: string[]; + requiresProtocolVersionBumpFor: string[]; + requiredCorrectionFields: string[]; + forbiddenReasons: string[]; + anyFixtureChangeRequiresCorrection: boolean; + comparisonAcrossVersionsRequiresFullRerun: boolean; + }; +}; + +type Lane = { + laneId: string; + phase36Status: string; + contextTool: string; + allowedTools: string[]; + disallowedTools: string[]; + nativeToolsAllowed: boolean; + setupCostReportedSeparately: boolean; + indexCostReportedSeparately: boolean; + cacheIsolationRequired: boolean; +}; + +type LanesFixture = { + protocolVersion: string; + initialExternalGate: string[]; + broadClaimLaneSet: string[]; + broadClaimsRequireAllLanesComplete: boolean; + setupFailedRequiredLaneBlocksBroadClaims: boolean; + lanes: Lane[]; + setupFailureSemantics: { + status: string; + winEligible: boolean; + claimContribution: string; + includedInPublicationRows: boolean; + blocksBroadClaimsForRequiredLane: boolean; + requiresReproductionCommand: boolean; + requiresLogs: boolean; + }; + laneContaminationRules: Record; + laneToolCardRequiredFields: string[]; +}; + +const protocol = protocolFixture as ProtocolFixture; +const corrections = correctionsFixture as CorrectionsFixture; +const lanes = lanesFixture as LanesFixture; + +const requiredFailureStatuses = [ + 'setup_failed', + 'task_setup_failed', + 'index_failed', + 'timeout', + 'invalid_schema', + 'no_answer', + 'wrong_answer', + 'wrong_evidence', + 'unsupported_claim', + 'false_ready', + 'tool_error', + 'judge_failed' +]; + +describe('ContextBench benchmark protocol invariants', () => { + it('keeps Phase 36 schema-only and leaves actual task identity freeze to Phase 37', () => { + expect(protocol.benchmarkTarget.primary).toBe('ContextBench'); + expect(protocol.benchmarkTarget.datasetConfig).toBe('contextbench_verified'); + expect(protocol.taskSlicePolicy.phase36SelectionSchemaOnly).toBe(true); + expect(protocol.taskSlicePolicy.selectedInPhase).toBe(37); + expect(protocol.taskSlicePolicy.taskCount).toEqual({ min: 20, max: 50 }); + expect(protocol.phaseBoundary.phase36MustNotFreeze).toContain('actual_task_ids'); + expect(protocol.phaseBoundary.phase36MustNotFreeze).toContain('actual_repo_commits'); + expect(protocol.phaseBoundary.phase37Freezes).toContain('actual_contextbench_instance_ids'); + expect(protocol.taskSlicePolicy.requiredManifestFields).toEqual( + expect.arrayContaining([ + 'instance_id', + 'repo_url', + 'base_commit', + 'problem_statement_hash', + 'gold_context_hash', + 'patch_hash', + 'test_patch_hash' + ]) + ); + expect(protocol.taskSlicePolicy.selectionMethodRequiredFields).toEqual( + expect.arrayContaining([ + 'selection_algorithm', + 'task_pool_hash', + 'selection_timestamp', + 'inclusion_rationale', + 'exclusion_log_path', + 'no_lane_outputs_observed_attestation' + ]) + ); + expect(protocol.taskSlicePolicy.forbiddenSources).toEqual( + expect.arrayContaining([ + 'agent_outputs', + 'codebase_context_outputs', + 'competitor_outputs', + 'post_failure_task_filtering' + ]) + ); + expect(protocol.taskSlicePolicy.coverageConstraints.minRepos).toBeGreaterThanOrEqual(2); + expect(protocol.taskSlicePolicy.coverageConstraints.minLanguages).toBeGreaterThanOrEqual(2); + expect(protocol.taskSlicePolicy.coverageConstraints.selectionBeforeOutputs).toBe(true); + expect(protocol.taskSlicePolicy.hardnessSignalPolicy).toEqual({ + required: false, + status: 'unavailable_in_contextbench_verified_schema', + proxyAllowed: false, + selectionMustRecordAbsence: true + }); + }); + + it('records unavailable hardness as a schema fact and forbids proxy scoring', () => { + expect(protocol.taskSlicePolicy.hardnessSignalPolicy.required).toBe(false); + expect(protocol.taskSlicePolicy.hardnessSignalPolicy.status).toBe( + 'unavailable_in_contextbench_verified_schema' + ); + expect(protocol.taskSlicePolicy.hardnessSignalPolicy.proxyAllowed).toBe(false); + expect(protocol.taskSlicePolicy.hardnessSignalPolicy.selectionMustRecordAbsence).toBe(true); + expect(JSON.stringify(protocol)).not.toContain('mustIncludeHardTasks'); + }); + + it('freezes smoke and claim-bearing run-count policy', () => { + expect(protocol.runPolicy.smokeRunsPerTaskLane).toBe(1); + expect(protocol.runPolicy.claimBearingRunsPerTaskLane).toBe(3); + expect(protocol.runPolicy.fewerThanClaimRunsMeans).toBe('diagnostic_only_claim_allowed_false'); + expect(protocol.runPolicy.bestOfNReportingAllowed).toBe(false); + expect(protocol.thresholds.claimBearingRunsPerTaskLane).toBe( + protocol.runPolicy.claimBearingRunsPerTaskLane + ); + }); + + it('keeps smoke corpora non-claim-bearing and blocks public claims before evidence', () => { + expect(protocol.claimAllowed).toBe(false); + expect(protocol.smokeOnlyCorpora).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'Excalidraw', claimBearing: false }), + expect.objectContaining({ name: 'FastAPI', claimBearing: false }) + ]) + ); + expect(protocol.blockedClaims).toEqual( + expect.arrayContaining([ + 'codebase_context_beats_competitors', + 'codebase_context_improves_patch_correctness', + 'focus_mode_improves_agent_outcomes', + 'token_savings_superiority', + 'setup_failed_competitor_is_loss' + ]) + ); + }); + + it('uses official ContextBench scoring first and constrains fallback scorer claims', () => { + expect(protocol.benchmarkTarget.officialEvaluatorFirst).toBe(true); + expect(protocol.benchmarkTarget.fallbackScorerPolicy.claimBearing).toBe(false); + expect( + protocol.benchmarkTarget.fallbackScorerPolicy.requiresValidationAgainstOfficialOutputs + ).toBe(true); + expect(protocol.tripwires).toContain( + 'official_evaluator_bypassed_without_documented_incompatibility' + ); + }); + + it('freezes runner boundaries, structured answers, budgets, and judge scope', () => { + expect(protocol.minimalRunnerBehavior.mustNotScript).toEqual( + expect.arrayContaining([ + 'agent_decisions', + 'file_selection', + 'query_rewrites', + 'evidence_selection' + ]) + ); + expect(protocol.structuredAnswerSchema.requiredFields).toEqual( + expect.arrayContaining([ + 'answer', + 'confidence', + 'evidence', + 'filesReferenced', + 'unsupportedClaims', + 'readyToEdit' + ]) + ); + expect(protocol.structuredAnswerSchema.invalidSchemaStatus).toBe('invalid_schema'); + expect(protocol.trajectorySchema.requiredFields).toEqual([ + 'pred_steps', + 'pred_files', + 'pred_spans' + ]); + expect(protocol.trajectorySchema.rawTracePreservationRequired).toBe(true); + expect(protocol.budgets.sameModelAcrossLanes).toBe(true); + expect(protocol.budgets.setupAndIndexingReportedSeparately).toBe(true); + expect(protocol.budgets.defaults.maxContextTokens).toBeGreaterThan(0); + expect(protocol.factRecallJudgeScope.forbiddenFor).toContain('broad_rubric_vibes'); + expect(protocol.factRecallJudgeScope.uncertainCountsAsSuccess).toBe(false); + }); + + it('prioritizes correctness metrics over token efficiency', () => { + expect(protocol.metrics.primary).toEqual( + expect.arrayContaining([ + 'context_file_recall', + 'context_file_precision', + 'context_symbol_recall', + 'context_symbol_precision', + 'context_span_recall', + 'context_span_precision', + 'edit_location_recall', + 'edit_location_precision' + ]) + ); + expect(protocol.metrics.efficiencyIsSecondary).toBe(true); + expect(protocol.metrics.tokenSavingsWinRequiresCorrectnessNonRegression).toBe(true); + expect(protocol.thresholds.wedgeWinRequires).toEqual( + expect.arrayContaining(['no_correctness_regression', 'false_ready_rate_not_worse']) + ); + }); + + it('keeps the full failure taxonomy visible in terminal run statuses', () => { + expect(protocol.failureTaxonomy).toEqual(requiredFailureStatuses); + expect(protocol.runManifestSchema.terminalStatuses).toEqual( + expect.arrayContaining(requiredFailureStatuses) + ); + expect(protocol.runManifestSchema.appendOnly).toBe(true); + expect(protocol.runManifestSchema.claimRunsRequireSlotsForEveryTaskLaneRepeat).toBe(true); + expect(protocol.runManifestSchema.failedRunsIncludedInAggregates).toBe(true); + expect(protocol.runManifestSchema.requiredFields).toEqual( + expect.arrayContaining([ + 'protocol_hash', + 'task_manifest_hash', + 'raw_trace_path', + 'score_path' + ]) + ); + }); + + it('requires protocol fingerprinting and correction-backed governance changes', () => { + expect(protocol.protocolFingerprint.required).toBe(true); + expect(protocol.protocolFingerprint.covers).toEqual( + expect.arrayContaining([ + 'protocol_fixture', + 'lane_fixture', + 'correction_fixture', + 'task_manifest_after_phase37' + ]) + ); + expect(corrections.policy.silentChangesAllowed).toBe(false); + expect(corrections.policy.anyFixtureChangeRequiresCorrection).toBe(true); + expect(corrections.policy.comparisonAcrossVersionsRequiresFullRerun).toBe(true); + expect(corrections.policy.requiresProtocolVersionBumpFor).toEqual( + expect.arrayContaining([ + 'task_ids', + 'repo_commits', + 'qrels', + 'thresholds', + 'metrics', + 'failure_taxonomy', + 'terminal_statuses', + 'blocked_claims', + 'lane_sets', + 'setup_failure_semantics', + 'correction_policy' + ]) + ); + expect(corrections.policy.requiredCorrectionFields).toEqual( + expect.arrayContaining([ + 'correction_id', + 'reason_category', + 'prior_hash', + 'new_hash', + 'protocol_version_before', + 'protocol_version_after' + ]) + ); + for (const correction of corrections.corrections) { + for (const field of corrections.policy.requiredCorrectionFields) { + expect(correction[field]).toBeTruthy(); + } + expect(corrections.policy.allowedReasonCategories).toContain(correction.reason_category); + expect(corrections.policy.forbiddenReasons).not.toContain(correction.reason_category); + } + expect(corrections.corrections).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + correction_id: 'contextbench-hardness-signal-policy-2026-04-27', + reason_category: 'factual_erratum', + affected_fields: expect.arrayContaining([ + 'taskSlicePolicy.coverageConstraints.mustIncludeHardTasks', + 'taskSlicePolicy.hardnessSignalPolicy' + ]), + prior_hash: expect.stringMatching(/^sha256:[a-f0-9]{64}$/), + new_hash: expect.stringMatching(/^sha256:[a-f0-9]{64}$/), + protocol_version_before: 'contextbench-protocol-v1', + protocol_version_after: 'contextbench-protocol-v1' + }) + ]) + ); + }); + + it('requires architecture review and one-cycle continue/pivot/kill governance', () => { + expect(protocol.architectureReviewRule.requiredBeforePostBaselineProductChanges).toBe(true); + expect(protocol.architectureReviewRule.mustRejectTaskSpecificHeuristics).toBe(true); + expect(protocol.architectureReviewRule.requiresFrozenRerun).toBe(true); + expect(protocol.postBaselineCycleGate.maxImprovementCyclesBeforeDecision).toBe(1); + expect(protocol.postBaselineCycleGate.allowedDecisions).toEqual(['continue', 'pivot', 'kill']); + expect(protocol.postBaselineCycleGate.noDecisionMeans).toBe('stop_no_more_product_work'); + }); + + it('freezes anti-gaming tripwires for output-aware edits and run manipulation', () => { + expect(protocol.tripwires).toEqual( + expect.arrayContaining([ + 'fixture_or_qrel_changed_after_outputs', + 'threshold_moved_after_failures', + 'setup_failed_treated_as_win', + 'smoke_task_used_as_claim', + 'mixed_context_tools_in_one_lane', + 'product_change_before_baseline', + 'benchmark_repo_name_or_task_phrase_heuristic_added', + 'failed_run_removed_from_denominator', + 'best_of_n_reported_as_primary', + 'official_evaluator_bypassed_without_documented_incompatibility' + ]) + ); + }); +}); + +describe('ContextBench lane governance invariants', () => { + it('preserves initial gate lanes and full broad-claim lane set', () => { + expect(lanes.initialExternalGate).toEqual([ + 'raw-native', + 'codebase-context', + 'jcodemunch-repomapper' + ]); + expect(lanes.broadClaimLaneSet).toEqual([ + 'raw-native', + 'codebase-context', + 'jcodemunch-repomapper', + 'grepai', + 'codebase-memory-mcp', + 'codegraphcontext' + ]); + expect(lanes.broadClaimsRequireAllLanesComplete).toBe(true); + expect(lanes.setupFailedRequiredLaneBlocksBroadClaims).toBe(true); + }); + + it('enforces exactly one context tool per lane and blocks native shell leakage', () => { + for (const lane of lanes.lanes) { + expect(lane.disallowedTools).not.toContain(lane.contextTool); + expect(lane.setupCostReportedSeparately).toBe(true); + expect(lane.indexCostReportedSeparately).toBe(true); + expect(lane.cacheIsolationRequired).toBe(true); + if (lane.laneId === 'raw-native') { + expect(lane.nativeToolsAllowed).toBe(true); + expect(lane.allowedTools).toEqual( + expect.arrayContaining(['native-read', 'native-search', 'native-shell-readonly']) + ); + } else { + expect(lane.nativeToolsAllowed).toBe(false); + expect(lane.allowedTools).toEqual([lane.contextTool]); + expect(lane.disallowedTools).toEqual( + expect.arrayContaining(['native-read', 'native-search', 'native-shell-readonly']) + ); + } + } + expect(lanes.laneContaminationRules.oneContextToolPerLane).toBe(true); + expect(lanes.laneContaminationRules.mixedLaneContextInvalidatesRun).toBe(true); + expect(lanes.laneContaminationRules.memoryStateMustBeIsolated).toBe(true); + }); + + it('treats setup failures as missing evidence instead of wins', () => { + expect(lanes.setupFailureSemantics).toMatchObject({ + status: 'setup_failed', + winEligible: false, + claimContribution: 'missing_evidence', + includedInPublicationRows: true, + blocksBroadClaimsForRequiredLane: true, + requiresReproductionCommand: true, + requiresLogs: true + }); + }); + + it('requires lane tool cards to make setup, index, version, cache, and artifact paths explicit', () => { + expect(lanes.laneToolCardRequiredFields).toEqual( + expect.arrayContaining([ + 'laneId', + 'allowedTools', + 'disallowedTools', + 'setupCommand', + 'indexCommand', + 'queryCommand', + 'versionCommand', + 'cachePath', + 'artifactPaths' + ]) + ); + }); +}); diff --git a/tests/contextbench-task-manifest.test.ts b/tests/contextbench-task-manifest.test.ts new file mode 100644 index 0000000..c8fcf00 --- /dev/null +++ b/tests/contextbench-task-manifest.test.ts @@ -0,0 +1,878 @@ +import { createHash } from 'node:crypto'; +import { execFileSync } from 'node:child_process'; +import { mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import path from 'node:path'; +import { describe, expect, it } from 'vitest'; +import correctionsFixture from './fixtures/contextbench-corrections.json'; +import lanesFixture from './fixtures/contextbench-lanes.json'; +import manifestFixture from './fixtures/contextbench-task-manifest.json'; +import protocolFixture from './fixtures/contextbench-benchmark-protocol.json'; +import selectionExclusionsFixture from './fixtures/contextbench-selection-exclusions.json'; +import smokePackFixture from './fixtures/contextbench-smoke-pack.json'; + +type ContextBenchTask = { + instance_id: string; + original_inst_id: string; + source: string; + language: string; + repo: string; + repo_url: string; + base_commit: string; + problem_statement_ref: string; + problem_statement_hash: string; + gold_context_ref: string; + gold_context_hash: string; + patch_hash: string; + test_patch_hash: string; + f2p_hash: string; + p2p_hash: string; + gold_context_span_count: number; + hash_canonicalization_version: string; + hardness_signal_status: string; + hardness_signal_source: string; + hardness_proxy_used: boolean; + inclusion_rationale: string; + deterministic_rank: string; +}; + +type ContextBenchManifest = { + name: string; + protocolVersion: string; + dataset: string; + datasetConfig: string; + split: string; + claimBearing: boolean; + selectedInPhase: number; + selection_algorithm: string; + selection_seed_or_deterministic_order: string; + selection_timestamp: string; + task_pool_hash: string; + exclusion_log_path: string; + hash_canonicalization_version: string; + evaluator_success_status: string; + hardness_signal_status: string; + hardness_signal_source: string; + hardness_proxy_used: boolean; + forbidden_selection_sources: string[]; + no_lane_outputs_observed_attestation: string; + summary: { + task_count: number; + language_distribution: Record; + source_distribution: Record; + repo_distribution: Record; + repo_count: number; + language_count: number; + }; + tasks: ContextBenchTask[]; + manifest_hash: string; +}; + +type SelectionExclusions = { + protocolVersion: string; + dataset: string; + datasetConfig: string; + selection_algorithm: string; + selection_seed_or_deterministic_order: string; + selection_timestamp: string; + task_pool_hash: string; + hash_canonicalization_version: string; + hardness_signal_status: string; + hardness_proxy_used: boolean; + no_lane_outputs_observed_attestation: string; + input_row_count: number; + eligible_row_count: number; + selected_row_count: number; + excluded_rows: Array>; + non_selected_eligible_rows: Array<{ + instance_id: string; + reason: string; + deterministic_rank: string; + }>; +}; + +type ProtocolFixture = { + claimAllowed: boolean; + phaseBoundary: { phase36MustNotFreeze: string[]; phase37Freezes: string[] }; + taskSlicePolicy: { + coverageConstraints: { + minRepos: number; + minLanguages: number; + selectionBeforeOutputs: boolean; + }; + hardnessSignalPolicy: { + required: boolean; + status: string; + proxyAllowed: boolean; + selectionMustRecordAbsence: boolean; + }; + forbiddenSources: string[]; + }; + smokeOnlyCorpora: Array<{ name: string; claimBearing: boolean; purpose: string }>; + blockedClaims: string[]; +}; + +type CorrectionsFixture = { + corrections: Array<{ correction_id: string; reason_category: string; affected_fields: string[] }>; + policy: { allowedReasonCategories: string[]; forbiddenReasons: string[] }; +}; + +type LanesFixture = { + laneContaminationRules: { + oneContextToolPerLane: boolean; + mixedLaneContextInvalidatesRun: boolean; + }; + lanes: Array<{ + laneId: string; + contextTool: string; + allowedTools: string[]; + disallowedTools: string[]; + }>; +}; + +type SmokePack = { + claimBearing: boolean; + purpose: string; + executionStatus: string; + mustNotContributeTo: string[]; + corpora: Array<{ + name: string; + claimBearing: boolean; + purpose: string; + phase37RunnableTasks: boolean; + }>; +}; + +const manifest = manifestFixture as ContextBenchManifest; +const exclusions = selectionExclusionsFixture as SelectionExclusions; +const protocol = protocolFixture as ProtocolFixture; +const corrections = correctionsFixture as CorrectionsFixture; +const lanes = lanesFixture as LanesFixture; +const smokePack = smokePackFixture as SmokePack; + +const shaPattern = /^sha256:[a-f0-9]{64}$/; +const canonicalizationVersion = 'contextbench-canonical-json-lf-v1'; +const hardnessStatus = 'unavailable_in_contextbench_verified_schema'; +const childGitEnv = (() => { + const env = { ...process.env }; + for (const key of Object.keys(env)) { + if (key.startsWith('GIT_')) delete env[key]; + } + return env; +})(); + +function poisonedGitEnv(): NodeJS.ProcessEnv { + return { + ...childGitEnv, + GIT_DIR: path.join(tmpdir(), 'contextbench-poisoned-git-dir'), + GIT_WORK_TREE: path.join(tmpdir(), 'contextbench-poisoned-work-tree'), + GIT_INDEX_FILE: path.join(tmpdir(), 'contextbench-poisoned-index'), + GIT_CONFIG_GLOBAL: path.join(tmpdir(), 'contextbench-poisoned-gitconfig'), + GIT_SSH_COMMAND: 'false' + }; +} + +function stableStringify(value: unknown): string { + if (value === null || typeof value !== 'object') return JSON.stringify(value); + if (Array.isArray(value)) return `[${value.map((item) => stableStringify(item)).join(',')}]`; + const entries = Object.entries(value as Record).sort(([a], [b]) => + a.localeCompare(b) + ); + return `{${entries.map(([key, item]) => `${JSON.stringify(key)}:${stableStringify(item)}`).join(',')}}`; +} + +function hashObject(value: unknown): string { + return `sha256:${createHash('sha256').update(stableStringify(value), 'utf8').digest('hex')}`; +} + +function sha256Text(value: string): string { + return `sha256:${createHash('sha256').update(value, 'utf8').digest('hex')}`; +} + +describe('ContextBench Phase 37 task manifest', () => { + it('freezes exactly 20 claim-bearing ContextBench tasks with required metadata', () => { + expect(manifest.dataset).toBe('Contextbench/ContextBench'); + expect(manifest.datasetConfig).toBe('contextbench_verified'); + expect(manifest.claimBearing).toBe(true); + expect(manifest.selectedInPhase).toBe(37); + expect(manifest.tasks).toHaveLength(20); + expect(new Set(manifest.tasks.map((task) => task.instance_id)).size).toBe(20); + expect(manifest.selection_algorithm).toBe('deterministic_seeded_coverage_then_rank_fill_v1'); + expect(manifest.selection_seed_or_deterministic_order).toBe( + 'phase37-contextbench-v1-2026-04-27' + ); + expect(manifest.selection_timestamp).toBeTruthy(); + expect(manifest.task_pool_hash).toMatch(shaPattern); + expect(manifest.exclusion_log_path).toBe( + 'tests/fixtures/contextbench-selection-exclusions.json' + ); + expect(manifest.evaluator_success_status).toBe('passed_synthetic_official_evaluator_probe'); + }); + + it('meets repo, language, and source coverage without proxy hardness', () => { + expect(manifest.summary.repo_count).toBeGreaterThanOrEqual( + protocol.taskSlicePolicy.coverageConstraints.minRepos + ); + expect(manifest.summary.language_count).toBeGreaterThanOrEqual( + protocol.taskSlicePolicy.coverageConstraints.minLanguages + ); + expect(Object.keys(manifest.summary.source_distribution).length).toBeGreaterThanOrEqual(2); + expect(manifest.hardness_signal_status).toBe(hardnessStatus); + expect(manifest.hardness_signal_source).toBe('dataset_schema_probe'); + expect(manifest.hardness_proxy_used).toBe(false); + expect(protocol.taskSlicePolicy.hardnessSignalPolicy).toMatchObject({ + required: false, + status: hardnessStatus, + proxyAllowed: false, + selectionMustRecordAbsence: true + }); + }); + + it('records stable identity, source, repo pin, and hash fields for every task', () => { + for (const task of manifest.tasks) { + expect(task.instance_id).toBeTruthy(); + expect(task.original_inst_id).toBeTruthy(); + expect(task.source).toBeTruthy(); + expect(task.language).toBeTruthy(); + expect(task.repo_url).toMatch(/^https:\/\/github\.com\/.+\.git$/); + expect(task.base_commit).toMatch(/^[a-f0-9]{40}$/); + expect(task.problem_statement_ref).toBe('dataset_field:problem_statement'); + expect(task.gold_context_ref).toBe('dataset_field:gold_context'); + expect(task.problem_statement_hash).toMatch(shaPattern); + expect(task.gold_context_hash).toMatch(shaPattern); + expect(task.patch_hash).toMatch(shaPattern); + expect(task.test_patch_hash).toMatch(shaPattern); + expect(task.f2p_hash).toMatch(shaPattern); + expect(task.p2p_hash).toMatch(shaPattern); + expect(task.gold_context_span_count).toBeGreaterThan(0); + expect(task.hash_canonicalization_version).toBe(canonicalizationVersion); + expect(task.hardness_proxy_used).toBe(false); + expect(task.deterministic_rank).toMatch(shaPattern); + expect(task.inclusion_rationale).toMatch( + /^(language_coverage|source_coverage|repo_coverage|deterministic_fill)/ + ); + } + }); + + it('self-verifies manifest hashing and metadata determinism', () => { + const withoutHash: Record = { ...manifest }; + delete withoutHash.manifest_hash; + expect(manifest.manifest_hash).toBe(hashObject(withoutHash)); + expect(manifest.hash_canonicalization_version).toBe(canonicalizationVersion); + expect(hashObject({ a: 1, b: ['x', 'y'] })).toBe(hashObject({ b: ['x', 'y'], a: 1 })); + }); +}); + +describe('ContextBench Phase 37 exclusion log and anti-gaming guards', () => { + it('keeps the exclusion log aligned with the manifest and records non-selected eligible rows', () => { + expect(exclusions.dataset).toBe(manifest.dataset); + expect(exclusions.datasetConfig).toBe(manifest.datasetConfig); + expect(exclusions.selection_algorithm).toBe(manifest.selection_algorithm); + expect(exclusions.selection_seed_or_deterministic_order).toBe( + manifest.selection_seed_or_deterministic_order + ); + expect(exclusions.selection_timestamp).toBe(manifest.selection_timestamp); + expect(exclusions.task_pool_hash).toBe(manifest.task_pool_hash); + expect(exclusions.hash_canonicalization_version).toBe(canonicalizationVersion); + expect(exclusions.hardness_signal_status).toBe(hardnessStatus); + expect(exclusions.hardness_proxy_used).toBe(false); + expect(exclusions.selected_row_count).toBe(20); + expect(exclusions.eligible_row_count).toBeGreaterThanOrEqual(20); + expect(exclusions.non_selected_eligible_rows.length).toBeGreaterThan(0); + expect(exclusions.non_selected_eligible_rows[0].reason).toBe('eligible_not_selected'); + }); + + it('blocks output-aware and proxy-hardness selection sources', () => { + expect(manifest.forbidden_selection_sources).toEqual( + expect.arrayContaining([ + 'agent_outputs', + 'codebase_context_outputs', + 'competitor_outputs', + 'proxy_hardness_score', + 'post_failure_task_filtering' + ]) + ); + expect(manifest.no_lane_outputs_observed_attestation).toContain('No raw/native'); + expect(exclusions.no_lane_outputs_observed_attestation).toBe( + manifest.no_lane_outputs_observed_attestation + ); + expect(protocol.taskSlicePolicy.forbiddenSources).toEqual( + expect.arrayContaining(['agent_outputs', 'codebase_context_outputs', 'competitor_outputs']) + ); + }); + + it('keeps Phase 36 boundaries and correction-ledger semantics intact', () => { + expect(protocol.phaseBoundary.phase36MustNotFreeze).toEqual( + expect.arrayContaining(['actual_task_ids', 'actual_repo_commits']) + ); + expect(protocol.phaseBoundary.phase37Freezes).toContain('actual_contextbench_instance_ids'); + expect(corrections.corrections).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + correction_id: 'contextbench-hardness-signal-policy-2026-04-27', + reason_category: 'factual_erratum', + affected_fields: expect.arrayContaining(['taskSlicePolicy.hardnessSignalPolicy']) + }) + ]) + ); + expect(corrections.policy.allowedReasonCategories).toContain('factual_erratum'); + expect(corrections.policy.forbiddenReasons).not.toContain('factual_erratum'); + }); + + it('keeps lane isolation visible without running any lane', () => { + expect(lanes.laneContaminationRules.oneContextToolPerLane).toBe(true); + expect(lanes.laneContaminationRules.mixedLaneContextInvalidatesRun).toBe(true); + for (const lane of lanes.lanes) { + if (lane.laneId === 'raw-native') { + expect(lane.allowedTools).toEqual(expect.arrayContaining(['native-read', 'native-search'])); + } else { + expect(lane.allowedTools).toContain(lane.contextTool); + } + expect(lane.disallowedTools).not.toContain(lane.contextTool); + } + }); + + it('keeps selector implementation wired to the mandatory anti-gaming fields', () => { + const script = readFileSync('scripts/contextbench-select-slice.mjs', 'utf8'); + for (const requiredField of [ + 'selection_timestamp', + 'task_pool_hash', + 'exclusion_log_path', + 'hash_canonicalization_version', + 'hardness_proxy_used', + 'no_lane_outputs_observed_attestation' + ]) { + expect(script).toContain(requiredField); + expect(stableStringify(manifest)).toContain(requiredField); + } + expect(script).toContain('proxy_hardness_score'); + expect(script).toContain('post_failure_task_filtering'); + }); +}); + +describe('ContextBench Phase 37 smoke pack separation', () => { + it('keeps Excalidraw and FastAPI metadata-only and non-claim-bearing', () => { + expect(smokePack.claimBearing).toBe(false); + expect(smokePack.purpose).toBe('local_harness_smoke_only'); + expect(smokePack.executionStatus).toBe('metadata_only_not_executed_in_phase37'); + expect(smokePack.mustNotContributeTo).toEqual( + expect.arrayContaining(['contextbench_claim_bearing_aggregates', 'public_benchmark_claims']) + ); + expect(smokePack.corpora).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + name: 'Excalidraw', + claimBearing: false, + phase37RunnableTasks: false + }), + expect.objectContaining({ + name: 'FastAPI', + claimBearing: false, + phase37RunnableTasks: false + }) + ]) + ); + expect(protocol.smokeOnlyCorpora).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'Excalidraw', claimBearing: false }), + expect.objectContaining({ name: 'FastAPI', claimBearing: false }) + ]) + ); + }); + + it('does not mix smoke corpora into the claim-bearing ContextBench manifest or claims', () => { + const taskText = stableStringify(manifest.tasks); + expect(taskText).not.toContain('Excalidraw'); + expect(taskText).not.toContain('FastAPI'); + expect(protocol.claimAllowed).toBe(false); + expect(protocol.blockedClaims).toEqual( + expect.arrayContaining([ + 'codebase_context_beats_competitors', + 'codebase_context_improves_productivity', + 'focus_mode_improves_agent_outcomes', + 'token_savings_superiority' + ]) + ); + }); +}); + +describe('ContextBench Phase 40 task payload materialization', () => { + it('self-checks a frozen manifest without live dataset access', () => { + const tempRoot = mkdtempSync(path.join(tmpdir(), 'contextbench-check-requires-rows-')); + try { + const manifestPath = path.join(tempRoot, 'manifest.json'); + writeFileSync(manifestPath, `${JSON.stringify(manifest, null, 2)}\n`, 'utf8'); + const stdout = execFileSync( + 'node', + ['scripts/contextbench-select-slice.mjs', '--check', manifestPath], + { + encoding: 'utf8' + } + ); + expect(stdout).toContain('manifest self-check passed'); + } finally { + rmSync(tempRoot, { recursive: true, force: true }); + } + }); + + it('fails manifest self-check when the frozen content hash is stale', () => { + const tempRoot = mkdtempSync(path.join(tmpdir(), 'contextbench-check-stale-hash-')); + try { + const manifestPath = path.join(tempRoot, 'manifest.json'); + writeFileSync( + manifestPath, + `${JSON.stringify({ ...manifest, manifest_hash: 'sha256:stale' }, null, 2)}\n`, + 'utf8' + ); + let stderr = ''; + try { + execFileSync('node', ['scripts/contextbench-select-slice.mjs', '--check', manifestPath], { + encoding: 'utf8' + }); + } catch (error: unknown) { + const failure = error as { stderr?: Buffer | string }; + stderr = Buffer.isBuffer(failure.stderr) + ? failure.stderr.toString('utf8') + : String(failure.stderr ?? ''); + } + expect(stderr).toContain('manifest_hash does not match manifest content'); + } finally { + rmSync(tempRoot, { recursive: true, force: true }); + } + }); + + it('writes selected problem statements without observing lane outputs', () => { + const tempRoot = mkdtempSync(path.join(tmpdir(), 'contextbench-task-payloads-')); + try { + const problemStatement = 'Fix the parser when the input contains nested groups.'; + const task = { + instance_id: 'fixture-task-1', + original_inst_id: 'owner__repo-1', + repo: 'owner/repo', + repo_url: 'https://github.com/owner/repo.git', + base_commit: '1234567890abcdef1234567890abcdef12345678', + problem_statement_hash: sha256Text(problemStatement) + }; + const manifestPath = path.join(tempRoot, 'manifest.json'); + const rowsPath = path.join(tempRoot, 'rows.json'); + const payloadPath = path.join(tempRoot, 'payloads.json'); + const payloadPathB = path.join(tempRoot, 'payloads-b.json'); + const checkoutRoot = path.join(tempRoot, 'checkouts'); + const checkoutRootB = path.join(tempRoot, 'other-checkouts'); + writeFileSync( + manifestPath, + `${JSON.stringify( + { + protocolVersion: 'contextbench-protocol-v1', + dataset: 'Contextbench/ContextBench', + datasetConfig: 'contextbench_verified', + split: 'train', + manifest_hash: 'sha256:test-manifest', + tasks: [task] + }, + null, + 2 + )}\n`, + 'utf8' + ); + writeFileSync( + rowsPath, + `${JSON.stringify( + { + rows: [ + { + row: { + instance_id: task.instance_id, + repo_url: task.repo_url, + base_commit: task.base_commit, + problem_statement: problemStatement + } + } + ] + }, + null, + 2 + )}\n`, + 'utf8' + ); + execFileSync( + 'node', + [ + 'scripts/contextbench-select-slice.mjs', + '--write-task-payloads', + '--rows-file', + rowsPath, + '--manifest', + manifestPath, + '--checkout-root', + checkoutRoot, + '--out', + payloadPath + ], + { encoding: 'utf8' } + ); + execFileSync( + 'node', + [ + 'scripts/contextbench-select-slice.mjs', + '--write-task-payloads', + '--rows-file', + rowsPath, + '--manifest', + manifestPath, + '--checkout-root', + checkoutRootB, + '--out', + payloadPathB + ], + { encoding: 'utf8' } + ); + + const payload = JSON.parse(readFileSync(payloadPath, 'utf8')) as { + claimBearing: boolean; + task_count: number; + payload_hash: string; + tasks: Array<{ + instance_id: string; + problem_statement: string; + problem_statement_hash_verified: boolean; + repo_checkout_path: string; + repo_checkout_status: string; + lane_outputs_observed: boolean; + }>; + }; + const payloadB = JSON.parse(readFileSync(payloadPathB, 'utf8')) as typeof payload; + expect(payload.claimBearing).toBe(false); + expect(payload.task_count).toBe(1); + expect(payload.payload_hash).toMatch(shaPattern); + expect(payload.payload_hash).toBe(payloadB.payload_hash); + expect(payload.tasks[0]).toMatchObject({ + instance_id: task.instance_id, + problem_statement: problemStatement, + problem_statement_hash_verified: true, + repo_checkout_status: 'planned_not_verified', + lane_outputs_observed: false + }); + expect(payload.tasks[0].repo_checkout_path).toContain('owner-repo-1234567890ab'); + expect(payload.tasks[0].repo_checkout_path).not.toBe(payloadB.tasks[0].repo_checkout_path); + } finally { + rmSync(tempRoot, { recursive: true, force: true }); + } + }); + + it('rejects invalid task payload rows before writing any task payload entry', () => { + const tempRoot = mkdtempSync(path.join(tmpdir(), 'contextbench-task-payloads-invalid-')); + try { + const problemStatement = 'Fix the parser when the input contains nested groups.'; + const task = { + instance_id: 'fixture-task-1', + original_inst_id: 'owner__repo-1', + repo: 'owner/repo', + repo_url: 'https://github.com/owner/repo.git', + base_commit: '1234567890abcdef1234567890abcdef12345678', + problem_statement_hash: sha256Text(problemStatement) + }; + const manifestPath = path.join(tempRoot, 'manifest.json'); + const rowsPath = path.join(tempRoot, 'rows.json'); + const payloadPath = path.join(tempRoot, 'payloads.json'); + writeFileSync( + manifestPath, + `${JSON.stringify( + { + protocolVersion: 'contextbench-protocol-v1', + dataset: 'Contextbench/ContextBench', + datasetConfig: 'contextbench_verified', + split: 'train', + manifest_hash: 'sha256:test-manifest', + tasks: [task] + }, + null, + 2 + )}\n`, + 'utf8' + ); + writeFileSync( + rowsPath, + `${JSON.stringify( + { + rows: [ + { + row: { + instance_id: task.instance_id, + repo_url: task.repo_url, + base_commit: task.base_commit, + problem_statement: 'Different statement.' + } + } + ] + }, + null, + 2 + )}\n`, + 'utf8' + ); + let stderr = ''; + try { + execFileSync( + 'node', + [ + 'scripts/contextbench-select-slice.mjs', + '--write-task-payloads', + '--rows-file', + rowsPath, + '--manifest', + manifestPath, + '--out', + payloadPath + ], + { encoding: 'utf8' } + ); + } catch (error: unknown) { + const failure = error as { stderr?: Buffer | string }; + stderr = Buffer.isBuffer(failure.stderr) + ? failure.stderr.toString('utf8') + : String(failure.stderr ?? ''); + } + expect(stderr).toContain('problem_statement_hash mismatch'); + expect(() => readFileSync(payloadPath, 'utf8')).toThrow(); + } finally { + rmSync(tempRoot, { recursive: true, force: true }); + } + }); + + it('materializes planned checkout paths and records verified base commits', () => { + const tempRoot = mkdtempSync(path.join(tmpdir(), 'contextbench-checkout-materializer-')); + try { + const sourceRepo = path.join(tempRoot, 'source-repo'); + const checkoutPath = path.join(tempRoot, 'checkout-repo'); + execFileSync('git', ['-c', 'core.autocrlf=false', 'init', sourceRepo], { + encoding: 'utf8', + env: childGitEnv + }); + writeFileSync(path.join(sourceRepo, 'README.md'), 'fixture\n', 'utf8'); + execFileSync('git', ['-c', 'core.autocrlf=false', 'add', 'README.md'], { + cwd: sourceRepo, + env: childGitEnv, + encoding: 'utf8' + }); + execFileSync( + 'git', + [ + '-c', + 'user.name=ContextBench Test', + '-c', + 'user.email=contextbench@example.invalid', + 'commit', + '-m', + 'fixture' + ], + { cwd: sourceRepo, encoding: 'utf8', env: childGitEnv } + ); + const commit = execFileSync('git', ['rev-parse', 'HEAD'], { + cwd: sourceRepo, + env: childGitEnv, + encoding: 'utf8' + }).trim(); + const payloadPath = path.join(tempRoot, 'payloads.json'); + writeFileSync( + payloadPath, + `${JSON.stringify( + { + name: 'test-payloads', + protocolVersion: 'contextbench-protocol-v1', + claimBearing: false, + tasks: [ + { + instance_id: 'fixture-task-1', + repo_url: sourceRepo, + base_commit: commit, + repo_checkout_path: checkoutPath, + repo_checkout_status: 'planned_not_verified' + } + ] + }, + null, + 2 + )}\n`, + 'utf8' + ); + + execFileSync( + 'node', + [ + 'scripts/contextbench-select-slice.mjs', + '--materialize-checkouts', + '--payloads', + payloadPath, + '--max-tasks', + '1' + ], + { encoding: 'utf8' } + ); + + const payload = JSON.parse(readFileSync(payloadPath, 'utf8')) as { + payload_hash: string; + tasks: Array<{ + repo_checkout_status: string; + repo_actual_head: string; + base_commit_verified: boolean; + repo_clean_verified: boolean; + }>; + }; + expect(payload.payload_hash).toMatch(shaPattern); + expect(payload.tasks[0]).toMatchObject({ + repo_checkout_status: 'verified', + repo_actual_head: commit, + base_commit_verified: true, + repo_clean_verified: true + }); + expect( + execFileSync('git', ['rev-parse', 'HEAD'], { + cwd: checkoutPath, + encoding: 'utf8', + env: childGitEnv + }).trim() + ).toBe(commit); + + const firstPayloadHash = payload.payload_hash; + writeFileSync(path.join(checkoutPath, 'untracked.txt'), 'stale local file\n', 'utf8'); + execFileSync( + 'node', + [ + 'scripts/contextbench-select-slice.mjs', + '--materialize-checkouts', + '--payloads', + payloadPath, + '--max-tasks', + '1' + ], + { encoding: 'utf8', env: poisonedGitEnv() } + ); + const reverifiedPayload = JSON.parse(readFileSync(payloadPath, 'utf8')) as typeof payload; + expect(reverifiedPayload.payload_hash).toBe(firstPayloadHash); + expect(reverifiedPayload.tasks[0]).toMatchObject({ + repo_checkout_status: 'verified', + repo_actual_head: commit, + base_commit_verified: true, + repo_clean_verified: true + }); + } finally { + rmSync(tempRoot, { recursive: true, force: true }); + } + }); + + it('writes scorer-only gold input without mixing it into solver payloads', () => { + const tempRoot = mkdtempSync(path.join(tmpdir(), 'contextbench-gold-input-')); + try { + const goldContext = JSON.stringify([ + { file: 'src/a.ts', start_line: 1, end_line: 2, content: 'export const a = 1;' } + ]); + const task = { + instance_id: 'fixture-task-1', + original_inst_id: 'owner__repo-1', + repo: 'owner/repo', + repo_url: 'https://github.com/owner/repo.git', + base_commit: '1234567890abcdef1234567890abcdef12345678', + gold_context_hash: sha256Text(stableStringify(JSON.parse(goldContext) as unknown)) + }; + const manifestPath = path.join(tempRoot, 'manifest.json'); + const rowsPath = path.join(tempRoot, 'rows.json'); + const payloadPath = path.join(tempRoot, 'payloads.json'); + const goldPath = path.join(tempRoot, 'gold.json'); + const checkoutPath = path.join(tempRoot, 'checkout'); + writeFileSync( + manifestPath, + `${JSON.stringify( + { + protocolVersion: 'contextbench-protocol-v1', + manifest_hash: 'sha256:test-manifest', + tasks: [task] + }, + null, + 2 + )}\n`, + 'utf8' + ); + writeFileSync( + rowsPath, + `${JSON.stringify( + { + rows: [ + { + row: { + instance_id: task.instance_id, + gold_context: goldContext, + patch: 'diff --git a/src/a.ts b/src/a.ts' + } + } + ] + }, + null, + 2 + )}\n`, + 'utf8' + ); + writeFileSync( + payloadPath, + `${JSON.stringify( + { + payload_hash: 'sha256:test-payload', + tasks: [ + { + instance_id: task.instance_id, + repo_checkout_path: checkoutPath, + repo_checkout_status: 'verified', + repo_actual_head: task.base_commit, + base_commit_verified: true, + repo_clean_verified: true + } + ] + }, + null, + 2 + )}\n`, + 'utf8' + ); + + execFileSync( + 'node', + [ + 'scripts/contextbench-select-slice.mjs', + '--write-gold', + '--rows-file', + rowsPath, + '--manifest', + manifestPath, + '--payloads', + payloadPath, + '--task-id', + task.instance_id, + '--out', + goldPath + ], + { encoding: 'utf8' } + ); + + const gold = JSON.parse(readFileSync(goldPath, 'utf8')) as { + inst_id: string; + repo_url: string; + gold_ctx: unknown[]; + }; + const summary = JSON.parse(readFileSync(`${goldPath}.summary.json`, 'utf8')) as { + scorerOnly: boolean; + lane_outputs_observed: boolean; + gold_context_hash_verified: boolean; + }; + expect(gold.inst_id).toBe(task.instance_id); + expect(gold.repo_url).toBe(checkoutPath); + expect(gold.gold_ctx).toHaveLength(1); + expect(summary).toMatchObject({ + scorerOnly: true, + lane_outputs_observed: false, + gold_context_hash_verified: true + }); + } finally { + rmSync(tempRoot, { recursive: true, force: true }); + } + }); +}); diff --git a/tests/fixtures/contextbench-benchmark-protocol.json b/tests/fixtures/contextbench-benchmark-protocol.json new file mode 100644 index 0000000..012f012 --- /dev/null +++ b/tests/fixtures/contextbench-benchmark-protocol.json @@ -0,0 +1,310 @@ +{ + "name": "v2.4-contextbench-external-protocol", + "protocolVersion": "contextbench-protocol-v1", + "frozenDate": "2026-04-27", + "status": "protocol_frozen", + "claimAllowed": false, + "phaseBoundary": { + "phase36Freezes": [ + "protocol_schema", + "lane_governance", + "correction_policy", + "claim_gates", + "run_manifest_schema" + ], + "phase37Freezes": [ + "actual_contextbench_instance_ids", + "repo_urls", + "base_commits", + "language_distribution", + "problem_statement_references" + ], + "phase36MustNotFreeze": [ + "actual_task_ids", + "actual_repo_commits", + "benchmark_outputs", + "runner_results" + ] + }, + "benchmarkTarget": { + "primary": "ContextBench", + "sourceRepository": "https://github.com/EuniAI/ContextBench", + "datasetCandidates": ["Contextbench/ContextBench", "Schwerli/ContextBench"], + "datasetConfig": "contextbench_verified", + "officialEvaluatorFirst": true, + "officialEvaluatorCommand": "python -m contextbench.evaluate --gold --pred --out ", + "fallbackScorerPolicy": { + "allowed": "only_after_official_evaluator_incompatibility_is_documented", + "claimBearing": false, + "requiresValidationAgainstOfficialOutputs": true + } + }, + "taskSlicePolicy": { + "sliceKind": "verified_mini_slice", + "taskCount": { + "min": 20, + "max": 50 + }, + "selectedInPhase": 37, + "phase36SelectionSchemaOnly": true, + "requiredManifestFields": [ + "instance_id", + "original_inst_id", + "source", + "language", + "repo_url", + "base_commit", + "problem_statement_ref", + "problem_statement_hash", + "gold_context_ref", + "gold_context_hash", + "patch_hash", + "test_patch_hash", + "f2p_hash", + "p2p_hash" + ], + "selectionMethodRequiredFields": [ + "selection_algorithm", + "selection_seed_or_deterministic_order", + "task_pool_hash", + "selection_timestamp", + "inclusion_rationale", + "exclusion_log_path", + "no_lane_outputs_observed_attestation" + ], + "coverageConstraints": { + "minRepos": 2, + "minLanguages": 2, + "selectionBeforeOutputs": true + }, + "hardnessSignalPolicy": { + "required": false, + "status": "unavailable_in_contextbench_verified_schema", + "proxyAllowed": false, + "selectionMustRecordAbsence": true + }, + "forbiddenSources": [ + "agent_outputs", + "codebase_context_outputs", + "competitor_outputs", + "post_failure_task_filtering" + ] + }, + "smokeOnlyCorpora": [ + { + "name": "Excalidraw", + "claimBearing": false, + "purpose": "local_harness_smoke_only" + }, + { + "name": "FastAPI", + "claimBearing": false, + "purpose": "local_harness_smoke_only" + } + ], + "runPolicy": { + "smokeRunsPerTaskLane": 1, + "claimBearingRunsPerTaskLane": 3, + "fewerThanClaimRunsMeans": "diagnostic_only_claim_allowed_false", + "reportAllRuns": true, + "bestOfNReportingAllowed": false + }, + "minimalRunnerBehavior": { + "standardizes": [ + "task_prompt", + "lane_tool_card", + "model", + "budget", + "timeout", + "trace_capture", + "structured_answer_schema" + ], + "mustNotScript": [ + "agent_decisions", + "file_selection", + "query_rewrites", + "answer_content", + "evidence_selection" + ] + }, + "structuredAnswerSchema": { + "requiredFields": [ + "answer", + "confidence", + "evidence", + "filesReferenced", + "symbolsReferenced", + "unsupportedClaims", + "readyToEdit" + ], + "confidenceValues": ["low", "medium", "high"], + "evidenceFields": ["file", "lineRange", "reason"], + "invalidSchemaStatus": "invalid_schema" + }, + "trajectorySchema": { + "requiredFields": ["pred_steps", "pred_files", "pred_spans"], + "optionalFields": ["pred_patch"], + "lineRangePolicy": "explicit_ranges_preferred_full_file_spans_must_be_marked", + "pathNormalizationRequired": true, + "rawTracePreservationRequired": true + }, + "metrics": { + "primary": [ + "context_file_recall", + "context_file_precision", + "context_symbol_recall", + "context_symbol_precision", + "context_span_recall", + "context_span_precision", + "edit_location_recall", + "edit_location_precision" + ], + "secondary": [ + "auc_coverage", + "redundancy", + "explored_vs_used_gap", + "false_ready_rate", + "unsupported_claim_rate", + "setup_time_seconds", + "index_time_seconds", + "task_wall_time_seconds", + "context_token_estimate" + ], + "efficiencyIsSecondary": true, + "tokenSavingsWinRequiresCorrectnessNonRegression": true + }, + "factRecallJudgeScope": { + "enabled": true, + "allowedOnlyFor": [ + "predefined_atomic_facts", + "evidence_presence", + "unsupported_claim_detection" + ], + "forbiddenFor": [ + "broad_rubric_vibes", + "post_hoc_expected_fact_creation", + "self_grading_by_solver_agent" + ], + "uncertainCountsAsSuccess": false + }, + "budgets": { + "sameModelAcrossLanes": true, + "sameTimeoutAcrossLanes": true, + "sameTurnBudgetAcrossLanes": true, + "sameContextBudgetAcrossLanes": true, + "setupAndIndexingReportedSeparately": true, + "defaults": { + "maxContextTokens": 12000, + "maxAnswerTokens": 2000, + "timeoutSeconds": 300 + } + }, + "thresholds": { + "claimBearingRunsPerTaskLane": 3, + "setupFailuresBlockBroadClaims": true, + "wedgeWinRequires": [ + "beats_raw_native_on_primary_context_metrics", + "beats_or_ties_jcodemunch_on_primary_context_metrics", + "no_correctness_regression", + "false_ready_rate_not_worse" + ], + "thresholdChangesRequireCorrection": true + }, + "failureTaxonomy": [ + "setup_failed", + "task_setup_failed", + "index_failed", + "timeout", + "invalid_schema", + "no_answer", + "wrong_answer", + "wrong_evidence", + "unsupported_claim", + "false_ready", + "tool_error", + "judge_failed" + ], + "runManifestSchema": { + "appendOnly": true, + "claimRunsRequireSlotsForEveryTaskLaneRepeat": true, + "requiredFields": [ + "run_id", + "protocol_version", + "protocol_hash", + "task_manifest_hash", + "lane_id", + "task_id", + "repeat_index", + "status", + "started_at", + "completed_at", + "raw_trace_path", + "structured_answer_path", + "score_path" + ], + "terminalStatuses": [ + "completed", + "setup_failed", + "task_setup_failed", + "index_failed", + "timeout", + "invalid_schema", + "no_answer", + "wrong_answer", + "wrong_evidence", + "unsupported_claim", + "false_ready", + "tool_error", + "judge_failed" + ], + "failedRunsIncludedInAggregates": true + }, + "protocolFingerprint": { + "required": true, + "algorithm": "sha256", + "covers": [ + "protocol_fixture", + "lane_fixture", + "correction_fixture", + "task_manifest_after_phase37", + "prompts", + "lane_tool_cards", + "budgets", + "thresholds", + "scoring_commands" + ] + }, + "architectureReviewRule": { + "requiredBeforePostBaselineProductChanges": true, + "mustExplainGeneralMechanism": true, + "mustRejectTaskSpecificHeuristics": true, + "requiresFrozenRerun": true + }, + "postBaselineCycleGate": { + "maxImprovementCyclesBeforeDecision": 1, + "requiresBaselineHash": true, + "requiresRerunHash": true, + "allowedDecisions": ["continue", "pivot", "kill"], + "noDecisionMeans": "stop_no_more_product_work" + }, + "tripwires": [ + "fixture_or_qrel_changed_after_outputs", + "threshold_moved_after_failures", + "setup_failed_treated_as_win", + "smoke_task_used_as_claim", + "mixed_context_tools_in_one_lane", + "product_change_before_baseline", + "benchmark_repo_name_or_task_phrase_heuristic_added", + "failed_run_removed_from_denominator", + "best_of_n_reported_as_primary", + "official_evaluator_bypassed_without_documented_incompatibility" + ], + "blockedClaims": [ + "codebase_context_beats_competitors", + "codebase_context_improves_patch_correctness", + "codebase_context_improves_productivity", + "focus_mode_improves_agent_outcomes", + "token_savings_superiority", + "setup_failed_competitor_is_loss" + ] +} diff --git a/tests/fixtures/contextbench-corrections.json b/tests/fixtures/contextbench-corrections.json new file mode 100644 index 0000000..419b256 --- /dev/null +++ b/tests/fixtures/contextbench-corrections.json @@ -0,0 +1,71 @@ +{ + "name": "v2.4-contextbench-corrections-ledger", + "protocolVersion": "contextbench-protocol-v1", + "frozenDate": "2026-04-27", + "corrections": [ + { + "correction_id": "contextbench-hardness-signal-policy-2026-04-27", + "date": "2026-04-27", + "reason_category": "factual_erratum", + "rationale": "Live inspection of Contextbench/ContextBench config contextbench_verified found no explicit hardness field, so the Phase 36 hard-task invariant is replaced with an explicit unavailable-signal policy and proxy hardness scoring remains forbidden.", + "affected_fields": [ + "taskSlicePolicy.coverageConstraints.mustIncludeHardTasks", + "taskSlicePolicy.hardnessSignalPolicy" + ], + "prior_hash": "sha256:e196311c98e6af44c044dbe57321afa28afdacc598cb499720c42e2bbf4ad495", + "new_hash": "sha256:b630d813d266f1f814a53f9ca7695fc4b33c553e6cf961764ee76551fa8e63ab", + "protocol_version_before": "contextbench-protocol-v1", + "protocol_version_after": "contextbench-protocol-v1" + } + ], + "policy": { + "silentChangesAllowed": false, + "allowedReasonCategories": [ + "factual_erratum", + "ambiguous_task_fix", + "repo_pin_correction", + "official_evaluator_compatibility_fix" + ], + "requiresProtocolVersionBumpFor": [ + "task_ids", + "repo_commits", + "qrels", + "expected_facts", + "thresholds", + "prompts", + "lane_tool_cards", + "scoring_commands", + "budgets", + "metrics", + "failure_taxonomy", + "terminal_statuses", + "blocked_claims", + "tripwires", + "phase_boundary", + "lane_sets", + "setup_failure_semantics", + "run_manifest_schema", + "protocol_fingerprint", + "correction_policy" + ], + "anyFixtureChangeRequiresCorrection": true, + "requiredCorrectionFields": [ + "correction_id", + "date", + "reason_category", + "rationale", + "affected_fields", + "prior_hash", + "new_hash", + "protocol_version_before", + "protocol_version_after" + ], + "forbiddenReasons": [ + "match_system_output", + "improve_score", + "hide_failure", + "reduce_setup_work" + ], + "comparisonAcrossVersionsRequiresFullRerun": true + } +} diff --git a/tests/fixtures/contextbench-lanes.json b/tests/fixtures/contextbench-lanes.json new file mode 100644 index 0000000..455fc32 --- /dev/null +++ b/tests/fixtures/contextbench-lanes.json @@ -0,0 +1,149 @@ +{ + "name": "v2.4-contextbench-lanes", + "protocolVersion": "contextbench-protocol-v1", + "frozenDate": "2026-04-27", + "initialExternalGate": [ + "raw-native", + "codebase-context", + "jcodemunch-repomapper" + ], + "broadClaimLaneSet": [ + "raw-native", + "codebase-context", + "jcodemunch-repomapper", + "grepai", + "codebase-memory-mcp", + "codegraphcontext" + ], + "broadClaimsRequireAllLanesComplete": true, + "setupFailedRequiredLaneBlocksBroadClaims": true, + "lanes": [ + { + "laneId": "raw-native", + "displayName": "Raw/native agent tools", + "claimRole": "baseline", + "phase36Status": "included_in_initial_gate", + "contextTool": "native-agent-tools", + "allowedTools": ["native-read", "native-search", "native-shell-readonly"], + "disallowedTools": [ + "codebase-context", + "jcodemunch-repomapper", + "grepai", + "codebase-memory-mcp", + "codegraphcontext" + ], + "nativeToolsAllowed": true, + "setupCostReportedSeparately": true, + "indexCostReportedSeparately": true, + "cacheIsolationRequired": true + }, + { + "laneId": "codebase-context", + "displayName": "codebase-context", + "claimRole": "candidate", + "phase36Status": "included_in_initial_gate", + "contextTool": "codebase-context", + "allowedTools": ["codebase-context"], + "disallowedTools": [ + "native-read", + "native-search", + "native-shell-readonly", + "jcodemunch-repomapper", + "grepai", + "codebase-memory-mcp", + "codegraphcontext" + ], + "nativeToolsAllowed": false, + "setupCostReportedSeparately": true, + "indexCostReportedSeparately": true, + "cacheIsolationRequired": true + }, + { + "laneId": "jcodemunch-repomapper", + "displayName": "jCodeMunch RepoMapper", + "claimRole": "first_serious_retrieval_competitor", + "phase36Status": "included_in_initial_gate", + "contextTool": "jcodemunch-repomapper", + "allowedTools": ["jcodemunch-repomapper"], + "disallowedTools": [ + "native-read", + "native-search", + "native-shell-readonly", + "codebase-context", + "grepai", + "codebase-memory-mcp", + "codegraphcontext" + ], + "nativeToolsAllowed": false, + "setupCostReportedSeparately": true, + "indexCostReportedSeparately": true, + "cacheIsolationRequired": true + }, + { + "laneId": "grepai", + "displayName": "GrepAI", + "claimRole": "required_for_broad_claims", + "phase36Status": "deferred_to_phase39", + "contextTool": "grepai", + "allowedTools": ["grepai"], + "disallowedTools": ["native-read", "native-search", "native-shell-readonly", "codebase-context", "jcodemunch-repomapper", "codebase-memory-mcp", "codegraphcontext"], + "nativeToolsAllowed": false, + "setupCostReportedSeparately": true, + "indexCostReportedSeparately": true, + "cacheIsolationRequired": true + }, + { + "laneId": "codebase-memory-mcp", + "displayName": "codebase-memory-mcp", + "claimRole": "required_for_broad_claims", + "phase36Status": "deferred_to_phase39", + "contextTool": "codebase-memory-mcp", + "allowedTools": ["codebase-memory-mcp"], + "disallowedTools": ["native-read", "native-search", "native-shell-readonly", "codebase-context", "jcodemunch-repomapper", "grepai", "codegraphcontext"], + "nativeToolsAllowed": false, + "setupCostReportedSeparately": true, + "indexCostReportedSeparately": true, + "cacheIsolationRequired": true + }, + { + "laneId": "codegraphcontext", + "displayName": "CodeGraphContext", + "claimRole": "required_for_broad_claims", + "phase36Status": "deferred_to_phase39", + "contextTool": "codegraphcontext", + "allowedTools": ["codegraphcontext"], + "disallowedTools": ["native-read", "native-search", "native-shell-readonly", "codebase-context", "jcodemunch-repomapper", "grepai", "codebase-memory-mcp"], + "nativeToolsAllowed": false, + "setupCostReportedSeparately": true, + "indexCostReportedSeparately": true, + "cacheIsolationRequired": true + } + ], + "setupFailureSemantics": { + "status": "setup_failed", + "winEligible": false, + "claimContribution": "missing_evidence", + "includedInPublicationRows": true, + "blocksBroadClaimsForRequiredLane": true, + "requiresReproductionCommand": true, + "requiresLogs": true + }, + "laneContaminationRules": { + "oneContextToolPerLane": true, + "mixedLaneContextInvalidatesRun": true, + "sharedCachesForbiddenUnlessReadOnlyAndHashed": true, + "memoryStateMustBeIsolated": true, + "precomputedContextFromOtherLaneForbidden": true + }, + "laneToolCardRequiredFields": [ + "laneId", + "allowedTools", + "disallowedTools", + "setupCommand", + "indexCommand", + "queryCommand", + "versionCommand", + "cachePath", + "artifactPaths" + ] +} diff --git a/tests/fixtures/contextbench-selection-exclusions.json b/tests/fixtures/contextbench-selection-exclusions.json new file mode 100644 index 0000000..96796e9 --- /dev/null +++ b/tests/fixtures/contextbench-selection-exclusions.json @@ -0,0 +1,3861 @@ +{ + "name": "v2.4-contextbench-phase37-selection-exclusions", + "protocolVersion": "contextbench-protocol-v1", + "dataset": "Contextbench/ContextBench", + "datasetConfig": "contextbench_verified", + "split": "train", + "selection_algorithm": "deterministic_seeded_coverage_then_rank_fill_v1", + "selection_seed_or_deterministic_order": "phase37-contextbench-v1-2026-04-27", + "selection_timestamp": "2026-04-27T00:00:00.000Z", + "task_pool_hash": "sha256:a6af697f293ec595bccf9c264799f8a55308cc552b20b8ec61714240c2a03b26", + "hash_canonicalization_version": "contextbench-canonical-json-lf-v1", + "hardness_signal_status": "unavailable_in_contextbench_verified_schema", + "hardness_proxy_used": false, + "no_lane_outputs_observed_attestation": "No raw/native, codebase-context, competitor, proxy-hardness, or post-failure outputs were observed or used for selection.", + "input_row_count": 500, + "eligible_row_count": 500, + "selected_row_count": 20, + "excluded_rows": [], + "non_selected_eligible_rows": [ + { + "instance_id": "Multi-SWE-Bench__javascript__maintenance__bugfix__838d3d25", + "source": "Multi", + "language": "javascript", + "repo_url": "https://github.com/sveltejs/svelte.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:03cf497e7e4415a656c070848b5c251e3a3e6a4179ee887cad42acb84390f152" + }, + { + "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__0307e40c", + "source": "Poly", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:06baeef7c670478f7c787637882732d0baa64ff4f32505d955892d5d1d3527c2" + }, + { + "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__634fe9b8", + "source": "Multi", + "language": "c", + "repo_url": "https://github.com/ponylang/ponyc.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:076cfb05187984e4cab05aa898d87d8ba219de13cb3a9f9ad8f193d87d950822" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__93721db4", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:07ea5844147f8f772b09eabda751164b99b0d5ad4bbb3f6b960de77cd11b2670" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__5c82134f", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:08f69910c2675a7ca7effaa484f3015b5a8f2daa755d32eb9cb7c8a3568e6675" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__31d4fe9d", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:0928f9a14603d2c32812d7598e79450a4efb7029ba992738a946ce388cde7204" + }, + { + "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__6da325be", + "source": "Poly", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:0a1cad48d81847dad4b28eb18ada007c40d41ca6492956090766a4f8e420b1a5" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__c923cfa8", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:0b7b6fbab039f509d6801f6cb3d74f16f18b2418773a83f06ee2a3610431d459" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__4e7b7c81", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:0cd437ece62135e58d366417ff3ba5ee9a118a865898770abf010e96866027b4" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__0006beec", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:0cf4acef53f32baab2df4a99433fbaac1d6ebcc084099685cec088faa57d3e10" + }, + { + "instance_id": "Multi-SWE-Bench__javascript__maintenance__bugfix__2d9d9f71", + "source": "Multi", + "language": "javascript", + "repo_url": "https://github.com/iamkun/dayjs.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:0d9f5b490823ebf2a0513a4b47c164abedcb2435cfcb62f4c0d709cc1160d2aa" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__3b4c72f1", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/sympy/sympy.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:0da8f8ca106b09d10740c819e558cdd71bd2c0440abd584abb3de0936ac74569" + }, + { + "instance_id": "Multi-SWE-Bench__javascript__maintenance__bugfix__b31d2f53", + "source": "Multi", + "language": "javascript", + "repo_url": "https://github.com/iamkun/dayjs.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:0e1195bfd17ec106c224d7e54045aef1b2fca095e2590f0dc5b461034bfe60c1" + }, + { + "instance_id": "SWE-PolyBench__python__maintenance__bugfix__8c189fda", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/keras-team/keras.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:0f681bf196a99486aa2fb0f426956a0c79372cbaeba33dc9b9ba2a3e25ebea1b" + }, + { + "instance_id": "Multi-SWE-Bench__rust__maintenance__bugfix__3c69099b", + "source": "Multi", + "language": "rust", + "repo_url": "https://github.com/clap-rs/clap.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:0f9037edb3dde63aa4464dd6379fc2ab609755340654956cbd7a7b3f997b6cc7" + }, + { + "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__3e2d031f", + "source": "Multi", + "language": "go", + "repo_url": "https://github.com/cli/cli.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:0fce488c2ee84d450d3734f01054d840c8d8c58151e35f592e13c534215ec0db" + }, + { + "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__07bb383a", + "source": "Pro", + "language": "python", + "repo_url": "https://github.com/ansible/ansible.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:1006923d5364a3032de43f6d15b00b5c56cae3876120005cb106ba96a1105af5" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__994d041f", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:105f873d3ff97062d7d01e9eee57d3844138178d79b630d16dde71930a039311" + }, + { + "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__6a53360d", + "source": "Multi", + "language": "typescript", + "repo_url": "https://github.com/vuejs/core.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:10bb0777c8a14ee1bf710dfc37814fa829afc1e71ea96badc874ca31b0bb9c54" + }, + { + "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__95b4abc1", + "source": "Pro", + "language": "python", + "repo_url": "https://github.com/internetarchive/openlibrary.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:10c8fc62d875aa5cbde0d7c16ac9790e488db26f47d46f3099509b097a17c55c" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__2c34be8a", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:11c231ffec3cb7601f1c48f6759db7ed657a895f1cda2e602257f574d793d8f6" + }, + { + "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__84fdadf0", + "source": "Pro", + "language": "python", + "repo_url": "https://github.com/ansible/ansible.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:122e62b3df7ac8c58decd7b4106ff2ca2a1f6298db727e890c7af5b5cd170c51" + }, + { + "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__eb5704a5", + "source": "Multi", + "language": "go", + "repo_url": "https://github.com/cli/cli.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:124102c8beff648f813ecf0fde8989bdf42d51b49c37449a7917074a3b687ed8" + }, + { + "instance_id": "Multi-SWE-Bench__rust__maintenance__bugfix__03fdad45", + "source": "Multi", + "language": "rust", + "repo_url": "https://github.com/tokio-rs/bytes.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:13b3615fee16b8239456863a733234c4ab8c6f54024f563d959a3646d16f7707" + }, + { + "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__9c878ad5", + "source": "Pro", + "language": "python", + "repo_url": "https://github.com/ansible/ansible.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:16284a509303ae4caeae71d49e913e057c12271a801d58e95615206199cf8e0c" + }, + { + "instance_id": "SWE-PolyBench__typescript__evolution__feature__07098661", + "source": "Poly", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:17b5cb4894ab5a99686cf09ea54c20de211e43b1eb3a5e836caffe40dff4923b" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__67699b64", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/matplotlib/matplotlib.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:17dd8dbf0cb630e6b465e9b03ff27da9825a836444ccc66648ff04537be88633" + }, + { + "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__6b844996", + "source": "Multi", + "language": "c", + "repo_url": "https://github.com/facebook/zstd.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:18a2c1a973357bffedb01a16c719b0a14c7ce96c5ca414cd9e1bba381bc84846" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__7db465d6", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/sphinx-doc/sphinx.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:1941e11a86608a90ff33cbd18236249a3dd7585cc86191e8dab6fdc9dd6277b2" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__3d378646", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/scikit-learn/scikit-learn.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:1c06ce9f7cf18ffb7cabf8e39e05e8b0317fbfae26bc7234885fcb7ff771e495" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__05a89a66", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:1c662d7109360fea3fb9914255d544d38bf461003cd945c092993427e58eeede" + }, + { + "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__5ec14acf", + "source": "Pro", + "language": "python", + "repo_url": "https://github.com/ansible/ansible.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:1ce02f9ffc2e770050dd835d9463a4a75ceea43cbea99546c8e8989293d97ddf" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__6cf8c11d", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:1d0accfcbbca86472e03ce1624cdba2002ca9da1a18a47a81d436ba94fed14f8" + }, + { + "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__0f495035", + "source": "Multi", + "language": "c", + "repo_url": "https://github.com/facebook/zstd.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:1e4e46ccd14d323dfeb44e58bc614cf0bfd0a57bc90a015f2ba4a503757184e9" + }, + { + "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__05c53458", + "source": "Multi", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:1ee25ae1d46d51343052b4cf9453269e9e109c0bc0af81d5d0d79042ab77533d" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__ce7f17a5", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/matplotlib/matplotlib.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:1fd109441ecfde35bab0ee355bce4462190cfa906d029536ac0164bc2fd261a8" + }, + { + "instance_id": "SWE-PolyBench__typescript__evolution__feature__d520ba22", + "source": "Poly", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:2033c0f759844814d765212341a7e075c76bc3b7f78dc56f5f981e961578d828" + }, + { + "instance_id": "Multi-SWE-Bench__java__maintenance__bugfix__07d7817b", + "source": "Multi", + "language": "java", + "repo_url": "https://github.com/fasterxml/jackson-databind.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:20c6cc46393e7f838b997bf791c215e5592cc8ef4c301baf2e6dacae110a1840" + }, + { + "instance_id": "Multi-SWE-Bench__javascript__maintenance__bugfix__eea782b2", + "source": "Multi", + "language": "javascript", + "repo_url": "https://github.com/iamkun/dayjs.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:2122b6100066889299916ede35b082becbdd27f4f798f5044c6bcd41fd2f64cc" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__768d44e5", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:21b0cf95e6eb5ba4379aced23b240436f17a0d7913c88b52720424aa402db15d" + }, + { + "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__48e5846a", + "source": "Poly", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:21ee8537e57ab86968caa867c2c2c2ba4e4d223524a602e172c63124b502b2a0" + }, + { + "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__c1f435de", + "source": "Pro", + "language": "python", + "repo_url": "https://github.com/qutebrowser/qutebrowser.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:2272315e6cd2ea8db4d42dbfc36863cfd3633c1fcdcb0ca5f34530bd121e5932" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__497deb6f", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:2274a0674571e600418d650e7b46e6a52052225e50087c02067cd2e49f34fec8" + }, + { + "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__a65e43ee", + "source": "Poly", + "language": "javascript", + "repo_url": "https://github.com/serverless/serverless.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:22c34eec746aacaca1e24b54dcf714587b4e9cf46fc526bc8b561692f31d5513" + }, + { + "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__b9b45262", + "source": "Multi", + "language": "c", + "repo_url": "https://github.com/facebook/zstd.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:238c5e7518267abdad83b5cb0dfef1332931ee2a388bd80b1fcb1ed826cc0147" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__c4c8d376", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:23a801198076fc32539641ebf5446e2332a22a6daa5ded1bbb2c722aef7d4ce5" + }, + { + "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__450d75e0", + "source": "Pro", + "language": "python", + "repo_url": "https://github.com/ansible/ansible.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:2423d8ecf7f82716dd5fda053c1184f70d5b9a59afc197a0f5ea72ca9e83f920" + }, + { + "instance_id": "SWE-PolyBench__python__maintenance__bugfix__37455515", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/yt-dlp/yt-dlp.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:24484cd1ee507b8cf5024e9e0a42ab381e5f166eb15942f74e1fc2003b499bcb" + }, + { + "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__16d1ff7a", + "source": "Poly", + "language": "typescript", + "repo_url": "https://github.com/microsoft/vscode.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:24863767938823e97b130eb622a8e1d4e159437685dd662072fbd887d563b193" + }, + { + "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__c3284db4", + "source": "Multi", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:2559802d188383102ef01344141f18097ed13e332e53b83a7df48dccc4d26cda" + }, + { + "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__5eee261d", + "source": "Multi", + "language": "typescript", + "repo_url": "https://github.com/vuejs/core.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:2637639450d32f5b0845ddbd2f1f9db7e4b994de2ba7b824b20ad21f3b617e08" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__80c9776a", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:26643c6362bea41db1dc6193aa85ad783c3342c68849a4bc04bb3dd978c88db0" + }, + { + "instance_id": "Multi-SWE-Bench__java__maintenance__bugfix__ef567ef1", + "source": "Multi", + "language": "java", + "repo_url": "https://github.com/alibaba/fastjson2.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:26716eaddfd72568f3d98f40bf55cb3818b1dc4537d4f43a4b882690022de349" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__cb2c6ff3", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/pydata/xarray.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:272d7a81ad4d45995f6906384f849802932e94308defae3e82bff71ae11768c9" + }, + { + "instance_id": "SWE-Bench-Pro__javascript__maintenance__bugfix__e31ec45c", + "source": "Pro", + "language": "javascript", + "repo_url": "https://github.com/NodeBB/NodeBB.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:27889de7044117c889568cbada79dc9062a80b73748a88be5c26db604e79e665" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__ac705f35", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:278f3ecd34a0b7a3e829f2610785ab55ac503c2436eec6ce77d4eeec5feeeb0b" + }, + { + "instance_id": "Multi-SWE-Bench__javascript__maintenance__bugfix__3d4cfa9c", + "source": "Multi", + "language": "javascript", + "repo_url": "https://github.com/sveltejs/svelte.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:279fa30273903ce4360195be11f39c49c47f423b730b80c700f02d5299ab31a6" + }, + { + "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__15589352", + "source": "Poly", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:291d68196e79c8659b321b0f8be7a73e160b9e2aedb23e5935637ce1b9a97945" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__c9656d7a", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:2937f2f4d79a3fc5f876211e2f1c1fcfa9a23880849f39f838fc3b797f75379b" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__90532e38", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/pydata/xarray.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:2b067a3eb02e8d4739f8d5221afce5a122ca686b43267d45b92d49b6b9497eed" + }, + { + "instance_id": "Multi-SWE-Bench__rust__maintenance__bugfix__16860730", + "source": "Multi", + "language": "rust", + "repo_url": "https://github.com/nushell/nushell.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:2b32f43866183e19f892f8e8c18fb8319bdc86bb95e7d33b5edfaa367b5cbe3e" + }, + { + "instance_id": "Multi-SWE-Bench__java__maintenance__bugfix__6ba3d94b", + "source": "Multi", + "language": "java", + "repo_url": "https://github.com/fasterxml/jackson-core.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:2b33ba1457c7d224932d22df1657f4371bdfe2ac8aaec5a2475686c4d53d5580" + }, + { + "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__a47dfbbf", + "source": "Multi", + "language": "c", + "repo_url": "https://github.com/facebook/zstd.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:2bb94c4c97c4f5d8e786a88d03c203ac5515a219984de69fa192c999363c67a7" + }, + { + "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__11e78a8d", + "source": "Poly", + "language": "javascript", + "repo_url": "https://github.com/sveltejs/svelte.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:2c0b27e3bca5f749e280ba006cad31ceef1f82291d130200a4ffa66deb73fa69" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__074f37ac", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/sympy/sympy.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:2ccedc8a6e160c727740aceb8cd8044af63a023af6119038283c27e387e661c5" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__dded0f96", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:2cfc8ce3ce0a51e13e6abdd2e5c9d9fe8502919aa3e96584279752875b592413" + }, + { + "instance_id": "SWE-PolyBench__python__maintenance__bugfix__91e6ed53", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/huggingface/transformers.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:2d0187c1050864406217fa037da864735b387c6c4d33f7044d24159a28171579" + }, + { + "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__0f94ce4d", + "source": "Multi", + "language": "c", + "repo_url": "https://github.com/ponylang/ponyc.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:2d4cc31785a1d34c59480eca8ab24a61f2fb4668f9c833340ebb3d965e37f976" + }, + { + "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__81c5c6fa", + "source": "Pro", + "language": "python", + "repo_url": "https://github.com/internetarchive/openlibrary.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:2d8bf22b7f5197b0d4f0796fb7252082cf7c4c417fa9da5b13bb559037fabd94" + }, + { + "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__c8b82135", + "source": "Multi", + "language": "typescript", + "repo_url": "https://github.com/vuejs/core.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:2dd8e944566a34880b0eb3076d4fe1052bd574c2da395acbb8db6da68246bfb8" + }, + { + "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__1384380d", + "source": "Multi", + "language": "go", + "repo_url": "https://github.com/cli/cli.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:2df3876bed7c1a87d494645449281045b476aa21ff0377d5f6a2816af6077a97" + }, + { + "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__64fffdfa", + "source": "Pro", + "language": "python", + "repo_url": "https://github.com/qutebrowser/qutebrowser.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:2dfe283663f0ebc3824b062e3f8ed0ef0387b7db96e754fc2c6f951c4f6d5682" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__183a2a5b", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:2e0173d18815f7832a7613296d032f803f40970a5717634557dd9c4bb6db8b52" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__83a70168", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/sympy/sympy.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:2e64e5f5583b6fb59a379296e325334b33512df139ac74ac698d0f07d754cc8a" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__42c77239", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/pydata/xarray.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:2eb0577f818eafa9649d5132854b7fcb5f456d543506fcab47b099e186a5b0fc" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__0f3c174a", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/sphinx-doc/sphinx.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:2ed40d88eda56cafa7be73895d90ab990579880bc554dd056a4b4dcaf8925fc0" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__d3bf673c", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/pydata/xarray.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:2f267702edb795356e07fc431e11656d9a38e5ef9957720fe127365725db583d" + }, + { + "instance_id": "Multi-SWE-Bench__javascript__maintenance__bugfix__98bbaed2", + "source": "Multi", + "language": "javascript", + "repo_url": "https://github.com/axios/axios.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:2ff20a07bf25cf73be95db6b5fe298d7f01fdb989003790fdce08b5c48b117d6" + }, + { + "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__9bdfae2c", + "source": "Multi", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:302555472db0e26208aa195618cfb873a8dc589f3822d7612ed2d480e2fbe299" + }, + { + "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__9ca0e415", + "source": "Poly", + "language": "javascript", + "repo_url": "https://github.com/sveltejs/svelte.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:30ae54045facf638785b0dfc7994c7516b55b8577b8072e55e26dfd882c64c48" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__45db86cf", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:30c8ea811449d7adca7672c182f3f65cb8f5b41b795fda02254a9bbed2e01360" + }, + { + "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__4d9664f3", + "source": "Multi", + "language": "go", + "repo_url": "https://github.com/cli/cli.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:31c58b4ca73871761f33989a961af53e1299257a95ece466a56eb2fa566196b9" + }, + { + "instance_id": "Multi-SWE-Bench__rust__maintenance__bugfix__908342f0", + "source": "Multi", + "language": "rust", + "repo_url": "https://github.com/clap-rs/clap.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:322dea2c22c44fd1e0c3991a2b160185655723334ff7c5e876d4520d8efdcbd9" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__42ae5e9f", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/matplotlib/matplotlib.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:325e77fe6dec496cace3fd5be3faab00f47a6972e00c64b433f745ed29c6ea5f" + }, + { + "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__4606de0b", + "source": "Multi", + "language": "go", + "repo_url": "https://github.com/cli/cli.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:32cdee42bf9d7b765923ab739ca76c06fbd908b08d01ece506efd01e028ba1bf" + }, + { + "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__2fea5f6b", + "source": "Multi", + "language": "go", + "repo_url": "https://github.com/cli/cli.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:3425d8e234195ba036df9bb6bc46ae2362e3d11b491c4cc745b15ca56cc71b76" + }, + { + "instance_id": "SWE-Bench-Pro__go__maintenance__bugfix__40a717e5", + "source": "Pro", + "language": "go", + "repo_url": "https://github.com/flipt-io/flipt.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:34b963d19ec1e4c9ae69e859bcadb97466ed2cfc14f584ebee4b7320a9a6009e" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__5555e84c", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:35cad2696938f93c811b304a7245987e6c3623f18ccba72f3a52e9501dde12c0" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__7439767b", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/sympy/sympy.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:37701cce5266f96431c27136ea5ac8eba24a3b1fcef7f6b3c76007891394b652" + }, + { + "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__1e195cd6", + "source": "Multi", + "language": "c", + "repo_url": "https://github.com/ponylang/ponyc.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:3825364f24b94c8f5623eba131a72dc63a24988f34f5087a12377ecc38c19adf" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__2e76c8cd", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/pallets/flask.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:38277c285d71902ca97f19f4bb1c9d7ecced4d155b54f39fd6d085285180669f" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__bc1c666f", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/sympy/sympy.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:385299b57afd9ba5fed8f1f0443065a8a28f6937385c20e26915a8da3eb1a110" + }, + { + "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__45da4482", + "source": "Multi", + "language": "c", + "repo_url": "https://github.com/ponylang/ponyc.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:38dea4af43965149078cd83c0a70c21c26d530f1ea083ae6de369fd47dfe94ba" + }, + { + "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__ff0cfab5", + "source": "Multi", + "language": "go", + "repo_url": "https://github.com/cli/cli.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:3903d17a7e6a39afa977fdec9bfcdedf4f9f861ea0c9639bb354e8235474e8fa" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__4fb8fc10", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:3920290fd9832d1e2f3c4c4e5c11e1053607d5016b2e5ab28e4e57bac5979aea" + }, + { + "instance_id": "Multi-SWE-Bench__rust__maintenance__bugfix__7acaca55", + "source": "Multi", + "language": "rust", + "repo_url": "https://github.com/clap-rs/clap.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:3994bfd3b0c0c2980d5285f4a42cb8a3b42f4e94364bdd21d4c634a52438e31a" + }, + { + "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__7c2efe8a", + "source": "Pro", + "language": "python", + "repo_url": "https://github.com/internetarchive/openlibrary.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:39f2ea55a8faaf270fa4f6dd07b22f49f4848c3a7130b2f118e23ec4710d49a7" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__8d1c297a", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/sympy/sympy.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:3a6e8fadd6fd8a475642b765db5d7a8fc07df52a90edc3ffe953d555c6a15ff6" + }, + { + "instance_id": "Multi-SWE-Bench__java__maintenance__bugfix__f0db9a27", + "source": "Multi", + "language": "java", + "repo_url": "https://github.com/fasterxml/jackson-core.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:3a7ca6c1f548555448ac475cd0526f726f7a51c2fd7ad2339d6dd9e59925094b" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__d0723f55", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/sphinx-doc/sphinx.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:3aaa3928fb4de89ccd3dfd6fe8cd537f0f23c4477bb7402dd4bd6dcee6e364f0" + }, + { + "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__e647c8ce", + "source": "Poly", + "language": "javascript", + "repo_url": "https://github.com/sveltejs/svelte.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:3b10d62be939d9aa92432446f2b14b25ca9e99dcbe81917efd7b59ef8517e8e6" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__e5a3bf2a", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/astropy/astropy.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:3b2c0cd652714010d038c607053cf1d6c01b443e89bc8615c5c26a49d92e9596" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__1efc2b51", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:3b6bb198ef59f43402a44124c3fe8601448abcef5bae8bc1c14630ccb40dcda2" + }, + { + "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__1e29a469", + "source": "Poly", + "language": "javascript", + "repo_url": "https://github.com/sveltejs/svelte.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:3bc93fea6caa989ac25bf866065a361879ab7c95ef9b4dfe42a2e1ce4e99d426" + }, + { + "instance_id": "SWE-PolyBench__python__maintenance__bugfix__8b1080ea", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/keras-team/keras.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:3bf01c474f20d61cd853d0ad5371878b7a48dee8b40576377de6cbaa2f6a97ed" + }, + { + "instance_id": "SWE-PolyBench__python__maintenance__bugfix__2f88019c", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/huggingface/transformers.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:3c2638e26125f56b90cd59d9fea08448093fbc6f739d649efe43049619e7d55d" + }, + { + "instance_id": "SWE-Bench-Pro__javascript__maintenance__bugfix__5c001746", + "source": "Pro", + "language": "javascript", + "repo_url": "https://github.com/element-hq/element-web.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:3d6e51117088a3557c2b72c8453f2bcf118ccf2acda84bc7dc6500f9edcb2342" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__3185b834", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/astropy/astropy.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:3dbaeceb0cf71f3603b9cbcb84be604820acdeba1ee9e0b1b2576a4e2759520f" + }, + { + "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__0b78ed50", + "source": "Multi", + "language": "go", + "repo_url": "https://github.com/cli/cli.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:3fa6341ba87d5effdfafaee3097e66df4ff0da78536c10a04711d78b3dea4e3e" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__88e1ffd3", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/psf/requests.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:3fbca068cde77c9a3d87ca965c8563a3865a7e88a69ba1d85b74c00fde315a83" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__ac127191", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/sympy/sympy.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:3fe99a63634ecc521ddf8c3195f6685436410459a62ccdf0742a4956aebbae37" + }, + { + "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__9907b12d", + "source": "Multi", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:418c4601b3773b4321b6dd8509688745e5eb1c7fac011bc80dc3ebd19dac851a" + }, + { + "instance_id": "SWE-PolyBench__python__maintenance__bugfix__5188c6bc", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/huggingface/transformers.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:419546367169c45315c9e54cdae9e5194856c3b9e1990e5351237eb8c33fd11d" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__2ccbdc72", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:423c4bc0e5dc7cc7275645bfcbdae0acb7504752dc20eb3641680cb06226e484" + }, + { + "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__6a14056a", + "source": "Multi", + "language": "typescript", + "repo_url": "https://github.com/vuejs/core.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:42cbdc9325a3f0490412593e1e5c6e130855a2f1c525af8a818b1260a105c450" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__4d321c4a", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/sympy/sympy.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:431ea723154d39ddb5b2a0182db3e38ca67b299b3c7e8102b65b222732b4081d" + }, + { + "instance_id": "SWE-PolyBench__python__maintenance__bugfix__ea5a4764", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/keras-team/keras.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:438d70abeae92385f74bfb9d6c2c6da52dc4bad3498c07444ccd10d9eb3cd94c" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__4b691a35", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:4459cdde1b7c2330e192ff256c9707eb50f18c2f9045a603911bab15eee03cf6" + }, + { + "instance_id": "SWE-PolyBench__python__maintenance__bugfix__52a91dd8", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/langchain-ai/langchain.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:4478daa9fdcc199ab420e8fc7c6bf36b84bab63444bf14186e76a03567073f04" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__42a80be0", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/scikit-learn/scikit-learn.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:4621d1038742dc62a46e026ac0faa92103a45a5cd279bacfddc3dd68d37978c1" + }, + { + "instance_id": "SWE-PolyBench__python__evolution__feature__b4302840", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/huggingface/transformers.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:463675d2f7fc019f315d4802136c1b406aa63597d0c25dae06835e6eaeb19b14" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__2e3d6eae", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:4665f7351006a92fc92a88b503bc558c976b0ffcd1cb2bc8f7a78a46cfb158cb" + }, + { + "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__5aed9f96", + "source": "Multi", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:46e15b392e85beadac9ef37f17ee2bf1978576f28cc32272483a273fa5e6196f" + }, + { + "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__2487b02a", + "source": "Poly", + "language": "typescript", + "repo_url": "https://github.com/microsoft/vscode.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:47b94c57f154f0cd9144f457e265a495d6c9f3b1fb726a854f5d2301492f7d6a" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__15e5aaa3", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/sympy/sympy.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:484a54348ff22d071f69282ec722ce98ac476228842626b3836907d74bf0846d" + }, + { + "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__1bb5c0b0", + "source": "Pro", + "language": "python", + "repo_url": "https://github.com/ansible/ansible.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:488c33518e4122e8a198c1e7b8804f3e0c3885fd21462e5d884f6a6a7c508e0e" + }, + { + "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__607fc4ff", + "source": "Pro", + "language": "python", + "repo_url": "https://github.com/internetarchive/openlibrary.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:49370aaf658368f0590382a55ed78d8b4dbf835a9383d213600d0696d730ba2d" + }, + { + "instance_id": "SWE-Bench-Pro__javascript__maintenance__bugfix__7b6185af", + "source": "Pro", + "language": "javascript", + "repo_url": "https://github.com/NodeBB/NodeBB.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:4a2c896dd3bbe8aeb08f18b813f1c51f27c28264b73afcab99dccfa48ffb1c6a" + }, + { + "instance_id": "SWE-Bench-Pro__javascript__maintenance__bugfix__39a9746b", + "source": "Pro", + "language": "javascript", + "repo_url": "https://github.com/element-hq/element-web.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:4a72ca03f9ae4a2b34af9734a100746c82b96772a0c8385a7a151af73ef78b23" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__e989ba2d", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/psf/requests.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:4a8dfcd074eb451c1d552b1e6b8723efd3bf08c8c34878c852bf693facf2c546" + }, + { + "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__18eac778", + "source": "Multi", + "language": "typescript", + "repo_url": "https://github.com/vuejs/core.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:4b82b033dc6320aaaca83a04fc230aa33c361b4aaf12de316165da33fc0bfc82" + }, + { + "instance_id": "Multi-SWE-Bench__cpp__maintenance__bugfix__1ec2c84a", + "source": "Multi", + "language": "cpp", + "repo_url": "https://github.com/fmtlib/fmt.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:4c0f3c256d452c2d157ba3fea7ad9fb5e2c9fef6ced5003c101549066a768fdc" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__07bafddd", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/scikit-learn/scikit-learn.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:4c827b8c4f2cdde571f9a7d18a6b33ec4147a346cb77b319fe28dcd051d51249" + }, + { + "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__2aa6fa4c", + "source": "Multi", + "language": "typescript", + "repo_url": "https://github.com/vuejs/core.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:4d4cd2f55d001fd24a0cf217c297c471e88069da3f68bc6855bf0f403de64299" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__1ae3c3bd", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:4e4199a59ba0bcec91c2b70be32fe313b4c852415268c00d99c865f3c6bb72e3" + }, + { + "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__53ca6a30", + "source": "Pro", + "language": "python", + "repo_url": "https://github.com/internetarchive/openlibrary.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:4f03139321008d9300d49ccb85540c22213606b926a4bcfe329c4749ba8fa01d" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__30065f19", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/sphinx-doc/sphinx.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:4f4dcda690d48d5a5e121e4ee01d8140c5683bcba7206bc73bdea7143856f346" + }, + { + "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__b9f9961f", + "source": "Pro", + "language": "python", + "repo_url": "https://github.com/ansible/ansible.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:4f62da7451dd325f427814f08c2ef50c3244fb014144951a7f2d5af6f56de075" + }, + { + "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__d22bf206", + "source": "Multi", + "language": "c", + "repo_url": "https://github.com/ponylang/ponyc.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:4f6cf5a824bf120bb14a0d67e80ad54fd292d516deb9bf84f8d96b2f7987656c" + }, + { + "instance_id": "SWE-PolyBench__python__evolution__feature__17a093c8", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/huggingface/transformers.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:4f95b801738cab0738b8e3befee5aad9d904def47f049a4f6393731f5148c8c4" + }, + { + "instance_id": "Multi-SWE-Bench__java__maintenance__bugfix__14da06bc", + "source": "Multi", + "language": "java", + "repo_url": "https://github.com/elastic/logstash.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:4fab17c12c78ee10a62559846664a4822b64e0930fe1cfedc4b30d84fbdf6db2" + }, + { + "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__d129d52f", + "source": "Multi", + "language": "go", + "repo_url": "https://github.com/cli/cli.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:500524859a2c1be42a34288dbc4d540079d98d41a30130aeac87d04e1131d547" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__4f130690", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/scikit-learn/scikit-learn.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:517db77979a49f50a4e79046d284debb0fea497bdf37cec311cecc9a8382798c" + }, + { + "instance_id": "SWE-PolyBench__python__maintenance__bugfix__214ce29d", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/huggingface/transformers.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:51928eadb5dc8d4d9f6f5741e050c14ab651ff1232b2d847dba31fae478ccec0" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__a5ec0eec", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:524f737afef9e8538f49defba64d553d27e49b35a5b00903a9efb4395143ed42" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__84effcbc", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:527217b9e22ef19ece5e491dc09f3a9bd07010b8365700bce787a881d4bd94eb" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__20eb5bc8", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/pydata/xarray.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:532100d56642791b0696083a6194805f103cc11d692a10af6bd438156eca34df" + }, + { + "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__0f79e39c", + "source": "Multi", + "language": "go", + "repo_url": "https://github.com/cli/cli.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:53bb2da8bae618fd3c04a9515f5b3d2efcc60bf152fe70ed9285c0cb3e65dad2" + }, + { + "instance_id": "SWE-PolyBench__typescript__evolution__feature__34826a6a", + "source": "Poly", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:53e3958f53705e9513df7399bb4e772b8df2807225672f9363ff3fd63d772386" + }, + { + "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__21086305", + "source": "Poly", + "language": "javascript", + "repo_url": "https://github.com/prettier/prettier.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:54597b2edaa48f9e3e74424649f716dca48018304b7ceaf6534ed3ec73956b0b" + }, + { + "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__739b23db", + "source": "Pro", + "language": "python", + "repo_url": "https://github.com/ansible/ansible.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:556427f95d643d5beb651cd221e0fff6fdf9ab22fe1187122c86d8ca9b8029ff" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__4d207ce8", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:559f204ba9c3f7f485b7e7df9fe053ecbcb0695e4ae21a3bf59c0068daf73a39" + }, + { + "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__f9bb5442", + "source": "Multi", + "language": "c", + "repo_url": "https://github.com/facebook/zstd.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:5645e936378cf638659e695b39596fd46bd393ccc8670926ba1f6b37fa6f18e3" + }, + { + "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__8cc426dd", + "source": "Poly", + "language": "javascript", + "repo_url": "https://github.com/sveltejs/svelte.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:57ed93eb89b32ff2e1427c509b02198bf4e54821cdf38dc7d365f8fbdeda54ce" + }, + { + "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__8f9042e3", + "source": "Multi", + "language": "c", + "repo_url": "https://github.com/ponylang/ponyc.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:57f08ae5655ff12c003b5b1156be099370aa40360d206869423ed8352a78f740" + }, + { + "instance_id": "Multi-SWE-Bench__cpp__maintenance__bugfix__e43d9aa5", + "source": "Multi", + "language": "cpp", + "repo_url": "https://github.com/catchorg/Catch2.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:5818ee91e9fa4b98c14f7fcb147d9f6dfec50377d603644d77578df58a536bda" + }, + { + "instance_id": "SWE-PolyBench__python__maintenance__bugfix__9ff56151", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/huggingface/transformers.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:5821daeed84f963923a1daf44f9a93d679d59420e95ec1988683db229614255c" + }, + { + "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__04c51be7", + "source": "Multi", + "language": "typescript", + "repo_url": "https://github.com/vuejs/core.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:585bff0a4a63824adb9de8a46b04a98d7f5790c9fbbb321abcac84e65d9b8e7b" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__61a7a81e", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:58b89e3adb8e561a28fa30bb429c70d3721e10abab2b8d64f939ad8ed41a2b4b" + }, + { + "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__48468065", + "source": "Poly", + "language": "typescript", + "repo_url": "https://github.com/microsoft/vscode.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:595789856f88dec3707155e8b48a630c128eb0b07c26061f030623057810434a" + }, + { + "instance_id": "SWE-Bench-Pro__javascript__maintenance__bugfix__09eb0d6d", + "source": "Pro", + "language": "javascript", + "repo_url": "https://github.com/NodeBB/NodeBB.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:5a1288d33a47b2f1dd2bcb3d3baf1d64733fd0cd9bc8950e0997003b5e1b4f42" + }, + { + "instance_id": "SWE-Bench-Pro__javascript__maintenance__bugfix__ac8400d9", + "source": "Pro", + "language": "javascript", + "repo_url": "https://github.com/NodeBB/NodeBB.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:5a2ab1e03a31d103409a375f4c91366fbfd7982df9e80243688004e987243a7c" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__ebf021d0", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:5ad21b9964fb361fd46b99a64d9e63b809c39f29bb8476d83fb9f5d71ae123c4" + }, + { + "instance_id": "SWE-Bench-Pro__go__maintenance__bugfix__720b4d92", + "source": "Pro", + "language": "go", + "repo_url": "https://github.com/future-architect/vuls.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:5b0cb19c0bea9f60a0c0c1bb2313a7bcc5b4b7a40053f3668854aa21202a32a7" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__9a05fe0c", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:5b1d0f8838ef362706223a87b68f2e9a93d9b42dd08beead758e4ff64fed7ad7" + }, + { + "instance_id": "SWE-Bench-Pro__javascript__maintenance__bugfix__4eb0e647", + "source": "Pro", + "language": "javascript", + "repo_url": "https://github.com/NodeBB/NodeBB.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:5b5d10862203327b0ad1f3fdcea256fa8f35b58f0a60cbb72fe9489e39447167" + }, + { + "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__c017c9ba", + "source": "Poly", + "language": "javascript", + "repo_url": "https://github.com/prettier/prettier.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:5b8d1b49ad1ec8a795078709e470a482e4ad24f9fb2e52b8e756ae2fb77d0125" + }, + { + "instance_id": "Multi-SWE-Bench__java__maintenance__bugfix__5eea2978", + "source": "Multi", + "language": "java", + "repo_url": "https://github.com/fasterxml/jackson-core.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:5c31fbffc692213c38c3f7b8d37d8c946fa92ef4af93955b74887f8a1434607b" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__7ec77e9c", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/sympy/sympy.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:5c41cc5ec5b92656be419d1ecbe5778cc618997b6b295b2ae536885461164372" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__681ea2b6", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:5cba2062443f7fbe66c61546c6064d077499bd25600aa93a0b0efe2c54de76e9" + }, + { + "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__85c030cf", + "source": "Multi", + "language": "go", + "repo_url": "https://github.com/cli/cli.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:5ccc8071a1186487184438e09f9513a9d6b044d6ec2f3fffcd982ba61e061d95" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__1eaeabf4", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/sympy/sympy.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:5efb915edc5e15863dbd672e1de1e8ffb040e36b085f5fbbc6d7eca80f51d77a" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__fd8123b1", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:5f509828b0b901c8d6ff3b1f1246f168380b3e99d0558f18928bfefdf3d29b06" + }, + { + "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__05c9e61e", + "source": "Multi", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:6070fcb691545833d6afe578a801748287e6c2cbbdfd0f07830f5b64ef60de11" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__abfdb2c5", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:60ef9ce0c96aaee01ea666394c010cc8f9088491f2b1992afb587c6b3dc2f112" + }, + { + "instance_id": "Multi-SWE-Bench__cpp__maintenance__bugfix__bca55dea", + "source": "Multi", + "language": "cpp", + "repo_url": "https://github.com/simdjson/simdjson.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:612164e013a50678221e5235f1c22ca215c94ff5cf76ab64b4afde9f681c7944" + }, + { + "instance_id": "Multi-SWE-Bench__cpp__maintenance__bugfix__7c9ef76c", + "source": "Multi", + "language": "cpp", + "repo_url": "https://github.com/nlohmann/json.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:6262dcda36e891b2bfeb48853aaf50d60935957b418bd152fc2f987f5b7beb44" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__fb07ed8b", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:626c684403eea71e8695e58728721e5f71bcafa0915634b2ff9389e891f12aa4" + }, + { + "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__52c152ba", + "source": "Multi", + "language": "go", + "repo_url": "https://github.com/cli/cli.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:6283859052944efcfbe7f321decd06680d883e3a2cced621fa39a670d2047445" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__3e44d93a", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:62cd5384796915a3a413d00cc15af190af3fb84576e46400b50b492253dab784" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__982277e4", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/pytest-dev/pytest.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:62e6252dc1659aafd5af8d1b8c51ba41619d4e4fc5a6473b8cbc278838aa955e" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__227490c2", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:631d130596a618701bac4f0f0bc572bfaa697da956b35efb4cfc601faa1f4c17" + }, + { + "instance_id": "Multi-SWE-Bench__rust__maintenance__bugfix__ae8b362a", + "source": "Multi", + "language": "rust", + "repo_url": "https://github.com/clap-rs/clap.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:6334d348216fbcaa84d739b0fd10fe3d548c2820778ac5883dece03c90c008bb" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__a571e81e", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/matplotlib/matplotlib.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:6357eb2ba324edd60cea4b3c1acee4ddabd3e350206925737f6206aa7bb97908" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__12d848d7", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:63b6924492f7643de4880bdc7a8ea8d2e0aa22c90cdba20b15990f759233e4c0" + }, + { + "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__73631dc4", + "source": "Multi", + "language": "go", + "repo_url": "https://github.com/cli/cli.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:642cd7932b06c686ccd8146878f5eab2c0d8f70db29b6f9f43cd298bea841a69" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__9ea965a5", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/sympy/sympy.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:6466b60cf2e5e7a16c3ef632ae8bbf9922839836cf77551f7e0903b3817fc279" + }, + { + "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__1ac60ce9", + "source": "Multi", + "language": "c", + "repo_url": "https://github.com/ponylang/ponyc.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:654590e4d83cc87cf287f6d8e7d0904f71a76678678f40066563fdd0d25031a7" + }, + { + "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__27393dd8", + "source": "Poly", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:667ea55b2304eb2954167f566d98c4170b3a85f2ac8545fc3bddfbfb104c5a30" + }, + { + "instance_id": "SWE-PolyBench__python__maintenance__bugfix__3f3ff585", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/keras-team/keras.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:66e47107227bcee92616fc300c26ea54e5bb6dce79c6a6289b67b49a4bb9215f" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__4f88ede2", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/scikit-learn/scikit-learn.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:67a677c6c0855c38175ae597905db6d0c64f2d18522593bc05444004322961c6" + }, + { + "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__62badbbf", + "source": "Pro", + "language": "python", + "repo_url": "https://github.com/ansible/ansible.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:67bce6bcc220c01af7831329348e69a2dcb05c37344efa43a48818cf797a5bc3" + }, + { + "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__31f13b61", + "source": "Pro", + "language": "python", + "repo_url": "https://github.com/ansible/ansible.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:682198791d99566a239e6cc7962c07005848680c84558ab54ad31784ca145d95" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__8c647f3a", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/sphinx-doc/sphinx.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:687f21450cd78dd5e5d286b45e9a80067198353860affdb4ef6913a983bd1273" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__7ccc36b7", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/matplotlib/matplotlib.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:690bf652b97a808097f41b1fa01a11a67acf7a1158f4e1325cd3b119f260155b" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__20294df2", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/sympy/sympy.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:6952068baca8039d3787a2143c31f90e00c8a452a953af5041a4b63d6e77f935" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__607f6b9e", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:69674ac163ae9c3f316ec48e22fbd7a6f17e4404e9d20091736be22e72cf6634" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__afcc47be", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:699f52e3d1fe29cc759e970d09cad300b9116523777dc7cee20fd35017520854" + }, + { + "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__54ebe590", + "source": "Multi", + "language": "typescript", + "repo_url": "https://github.com/vuejs/core.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:69c49c6fc9912a2c4c32957137cf60f81d54c4b3226316c2f12ad4c9d2111d0f" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__83fd3b37", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/sphinx-doc/sphinx.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:69cc4d466d9e7651cb9f7cdcb2af53cea552b6733418e56c75419fa87e59562e" + }, + { + "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__4d090fc6", + "source": "Poly", + "language": "javascript", + "repo_url": "https://github.com/serverless/serverless.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:69d305681c83e0f5e42d660d5fd17b3709146b13a9dfae7f7a387df49cc652f2" + }, + { + "instance_id": "SWE-PolyBench__typescript__evolution__feature__684a3415", + "source": "Poly", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:69d561426d5f8417e806a9a05eb81df822a9c382fb8a7454eb0c1ac2b6a7f727" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__b91692e0", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/scikit-learn/scikit-learn.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:6b51499b713194c37a3a7e11d568a39f6e14b8d9e27e9e3b2a54d77493935cf0" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__5c1509d5", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:6c0498e7e9c26b2bf2db5572115318e280a56100f72ea290de5cc48649b531a3" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__01527b89", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:6c0868a9ee0d4d965fbb31aa9b39eae008ce1d9e883ad6a48bb0baca4ca96d28" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__bc999af3", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/sympy/sympy.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:6c50df73d0fd67795dc3f83734ca786e7efade7109f35c1b70faee584f387132" + }, + { + "instance_id": "Multi-SWE-Bench__javascript__maintenance__bugfix__87d9772c", + "source": "Multi", + "language": "javascript", + "repo_url": "https://github.com/iamkun/dayjs.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:6d39e2b553c73034cf24f6fc803f08351914fa3e83eceabe162b2c3a688bd654" + }, + { + "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__3864c137", + "source": "Poly", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:6d53b7668908215a678128156ec1e5f43643e958c31d2b117de9a00da23db353" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__59acbd9a", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/sphinx-doc/sphinx.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:6da16ae02bfc81249f25aba32ebe0a48755fc42eb7ab9b5c142e5db392053c9b" + }, + { + "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__678fa217", + "source": "Poly", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:6ddf8e13e41fb086d8516a849a4cf94d97065e64b064d67d006b92e34bc0ed64" + }, + { + "instance_id": "Multi-SWE-Bench__javascript__maintenance__bugfix__1081c568", + "source": "Multi", + "language": "javascript", + "repo_url": "https://github.com/sveltejs/svelte.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:6f4bc2e66d8070f2eef7d8f111b0d2485f87f6be9c95a167638bbfebf190072c" + }, + { + "instance_id": "SWE-PolyBench__python__evolution__feature__8bb50331", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/huggingface/transformers.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:6f641114fba66e2f31dc2f429759774d52f7a1560334974f02b84a2209b7723e" + }, + { + "instance_id": "SWE-PolyBench__python__maintenance__bugfix__e3c9c53c", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/huggingface/transformers.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:70386292933c625d725b45a4096298537198c491d83ea186496bfb139f7ccafa" + }, + { + "instance_id": "Multi-SWE-Bench__cpp__maintenance__bugfix__fe080aac", + "source": "Multi", + "language": "cpp", + "repo_url": "https://github.com/nlohmann/json.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:709365140586ef33f4238171e8653b166673dc32968462d68cc658b734ea2213" + }, + { + "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__6d48775e", + "source": "Multi", + "language": "go", + "repo_url": "https://github.com/cli/cli.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:7098520c00c53b77f6ccae01ebe0259147c88c821799ddf7199deae874e37cee" + }, + { + "instance_id": "Multi-SWE-Bench__rust__maintenance__bugfix__1cadcb7d", + "source": "Multi", + "language": "rust", + "repo_url": "https://github.com/tokio-rs/tracing.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:718a60e705b98b09ffa67327ed87c2423f4f89fd40668b4897b19fc2150bf6e4" + }, + { + "instance_id": "SWE-PolyBench__python__maintenance__bugfix__158909b8", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/huggingface/transformers.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:72331d8f1180206a03b37d0c0b6aa0fa43a56acd6eb189d16a05fb8cd3af781c" + }, + { + "instance_id": "Multi-SWE-Bench__rust__maintenance__bugfix__aa16b8b2", + "source": "Multi", + "language": "rust", + "repo_url": "https://github.com/tokio-rs/tracing.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:7279262842927bf02fe97d9d0bf2981475d26eb5b63b1cc4b3740f53826f3fa5" + }, + { + "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__c26fd1ab", + "source": "Poly", + "language": "javascript", + "repo_url": "https://github.com/sveltejs/svelte.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:72e5bc55b7100df256b51764ad4df1646a41543822286c55270b8f4318d9dde1" + }, + { + "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__cdbc5890", + "source": "Multi", + "language": "c", + "repo_url": "https://github.com/jqlang/jq.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:72f3d12cdc56f1f657ccec9a637dfa2c23924b58f8bc077d0c184210f25d4e8a" + }, + { + "instance_id": "SWE-Bench-Pro__go__maintenance__bugfix__6efcf999", + "source": "Pro", + "language": "go", + "repo_url": "https://github.com/flipt-io/flipt.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:733c8efb41bc8a60bee7708a5f79e638a09801c1975274481e0aacacfdb4b93f" + }, + { + "instance_id": "Multi-SWE-Bench__cpp__maintenance__bugfix__6a4e21e9", + "source": "Multi", + "language": "cpp", + "repo_url": "https://github.com/nlohmann/json.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:738476fd9d218f2fe95cd27e84c632a4084ad5cba5328ade17ca4b6624761a14" + }, + { + "instance_id": "Multi-SWE-Bench__rust__maintenance__bugfix__0e4346f7", + "source": "Multi", + "language": "rust", + "repo_url": "https://github.com/clap-rs/clap.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:73a8076599d4ab1c5989bf79dbb2d79e43c18d94e208d532c3579d5f5dec698c" + }, + { + "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__7b474035", + "source": "Multi", + "language": "c", + "repo_url": "https://github.com/ponylang/ponyc.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:742158bfee4796fbd9a5a5a07521accf283337d42f499a0e85832b0cb374eec4" + }, + { + "instance_id": "SWE-PolyBench__python__maintenance__bugfix__023915d6", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/huggingface/transformers.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:742502385efb81d4e143521b77299cdaa1511bf14a6aa982bee823f63bdde089" + }, + { + "instance_id": "SWE-PolyBench__python__maintenance__bugfix__40f09c26", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/huggingface/transformers.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:7441d8e380beaf496a53272d86cb0dba935b8c45589c8becc040df4c709ea31b" + }, + { + "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__1ff281e5", + "source": "Multi", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:7449d00d459cb559d046b9e7c350d1c4ea6ab45d12e5771c5f04d72364b8738f" + }, + { + "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__6e022940", + "source": "Multi", + "language": "go", + "repo_url": "https://github.com/cli/cli.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:74878b0d9e1f5b48523c925ae1035c6a9923423495bb95290a708445cdad80c6" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__ec6fdd6e", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/scikit-learn/scikit-learn.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:74f932d64a485353886fd36f7fc9061d6a0263bcbe595b98e02aad4e3448f0bb" + }, + { + "instance_id": "SWE-PolyBench__python__maintenance__bugfix__2b00c0d1", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/huggingface/transformers.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:75c2493162d4590a20056e441ce622611e3acd55f0d44a6db0cec72333ab6717" + }, + { + "instance_id": "Multi-SWE-Bench__rust__maintenance__bugfix__3a4062fe", + "source": "Multi", + "language": "rust", + "repo_url": "https://github.com/tokio-rs/tokio.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:7624c6dbc358f48c3b3049e74d36ddfdf02ad78800d3b5b5579bc32fcd46a7a3" + }, + { + "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__8bffb1b1", + "source": "Multi", + "language": "c", + "repo_url": "https://github.com/facebook/zstd.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:768805d008bcbddaee4dc69545f551ef69b296bad1797fdda61cb327fe97f2f0" + }, + { + "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__41a5a6f5", + "source": "Multi", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:76f220128b4fa519de4a33f87f977405cb3cd3cd9a805562793cb9cfb73f074f" + }, + { + "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__c6d3e230", + "source": "Poly", + "language": "javascript", + "repo_url": "https://github.com/sveltejs/svelte.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:76fd82196cc4a43c634f82bd8344812e7ea1ab47e3314e04a6fea281262edc84" + }, + { + "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__1e842312", + "source": "Multi", + "language": "c", + "repo_url": "https://github.com/ponylang/ponyc.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:781b005bd25fe85a69cd1ccefb26833eee312783c9401a97446f50c8aef12003" + }, + { + "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__3b6e6f3a", + "source": "Poly", + "language": "javascript", + "repo_url": "https://github.com/prettier/prettier.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:7885d2af167c9842b8dc4c03c389f4a3b95c0749272b29f54a82feaec9cf067b" + }, + { + "instance_id": "SWE-PolyBench__python__maintenance__bugfix__9a72b241", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/huggingface/transformers.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:797706981973d0ee39cabbe1869292d83656d77d8524829675928500adebeefd" + }, + { + "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__3deeea9c", + "source": "Multi", + "language": "go", + "repo_url": "https://github.com/cli/cli.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:79bc5a4f3c5d4dcc678e3fa20125568462cf06e6deed4ea05d1fdafdcb040314" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__1d90db61", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/astropy/astropy.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:79bf99153faf79c643aca034fb5b8871a46080e0ee6692366cc129df86d59b03" + }, + { + "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__8d23605c", + "source": "Multi", + "language": "go", + "repo_url": "https://github.com/cli/cli.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:7ac6f05dd5ef65360d9846061c91dde317cba102956f613554341f6cec918683" + }, + { + "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__b9ce6a70", + "source": "Poly", + "language": "javascript", + "repo_url": "https://github.com/sveltejs/svelte.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:7ae9c265822d97293b796dccdd653239ae3cea29466d2be22b1734939634171a" + }, + { + "instance_id": "SWE-PolyBench__python__evolution__refactor__57ad5598", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/keras-team/keras.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:7ca7f7bb2c6a9a0121ea7b53c0b00ef6cf3bf9c5eead2d6e902b59002da0aa21" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__55a3ef80", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:7ce46e6597793134d473cf55b7756461c337c6d8d1b8f68dca60691089e47029" + }, + { + "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__eb1411ba", + "source": "Poly", + "language": "javascript", + "repo_url": "https://github.com/sveltejs/svelte.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:7d032c20e66dc879f7371d39dff884ba038453f13bd125c68aa2cfad484f710e" + }, + { + "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__73fc043b", + "source": "Multi", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:7e4e9658bfea48a06dd74e3701fee59d9b5318f99f2ea610ec5f4994e69e389b" + }, + { + "instance_id": "Multi-SWE-Bench__javascript__maintenance__bugfix__3d84a3a4", + "source": "Multi", + "language": "javascript", + "repo_url": "https://github.com/iamkun/dayjs.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:7e5ac40917f8124268765d35626f1c5dcaf454e5f4eb2925243ab3d6905ea286" + }, + { + "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__7827bc22", + "source": "Poly", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:7ea1eed780ccaad7c45ea557b3623edd2d6d2fc217a369d936bbdb1a42d9e334" + }, + { + "instance_id": "SWE-PolyBench__python__evolution__feature__4a329645", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/huggingface/transformers.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:802df16c6ad396b3c2cc5fd26de530b606796dcb89fd4e000377a5e6d50b10c3" + }, + { + "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__0c4f8d13", + "source": "Pro", + "language": "python", + "repo_url": "https://github.com/ansible/ansible.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:8067fe7f87b1a5af783f5be0f476b93c4354cb839cc76fc43aa357b61d49e5aa" + }, + { + "instance_id": "SWE-Bench-Pro__javascript__maintenance__bugfix__593287ea", + "source": "Pro", + "language": "javascript", + "repo_url": "https://github.com/element-hq/element-web.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:8069b2420301ffb272d156b54435dd51ab59bd444de9457e4d0dde1774363232" + }, + { + "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__cf01f471", + "source": "Pro", + "language": "python", + "repo_url": "https://github.com/ansible/ansible.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:8104a9ae249ffc6fadf3ddb32ad0c67911ab3e8b16178863e443199bc6c5a37c" + }, + { + "instance_id": "SWE-PolyBench__python__maintenance__bugfix__b52bbc24", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/langchain-ai/langchain.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:81634695810aa7da09bfd586daee2d34e9e2a42ccece0f34e5f48e16dfeee60e" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__e5236b5f", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/pytest-dev/pytest.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:8211dbad2044e9f0f9a49ed1e50d8470324f4b48a520b2834be68dd1e8578aec" + }, + { + "instance_id": "Multi-SWE-Bench__cpp__maintenance__bugfix__41b108ee", + "source": "Multi", + "language": "cpp", + "repo_url": "https://github.com/simdjson/simdjson.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:8301da84fc6b696446f4eef5f6d5a7a5ea7f81dbf96f3a0281335e7b22fb9a86" + }, + { + "instance_id": "SWE-PolyBench__python__maintenance__bugfix__016ce55c", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/huggingface/transformers.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:831b0746590c5cabacfe9996498dbd49bc4bf6099b3e31457df78375fac2493f" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__9c470c46", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/matplotlib/matplotlib.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:8339f7c26125616f718ea13d52e3bc630caaead92dfc97a5657b2722e7f7f424" + }, + { + "instance_id": "Multi-SWE-Bench__rust__maintenance__bugfix__627665a2", + "source": "Multi", + "language": "rust", + "repo_url": "https://github.com/clap-rs/clap.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:84739cf0e692f50bea6d46b68b935177c5e872549eb64823a6f1c50ffd460977" + }, + { + "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__7d106697", + "source": "Poly", + "language": "typescript", + "repo_url": "https://github.com/microsoft/vscode.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:848c15df1bb22a471a6795292058a6c818d703de7550f6c0e4bc1e7285a00ca9" + }, + { + "instance_id": "SWE-PolyBench__python__evolution__feature__66f97093", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/huggingface/transformers.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:852ccb732d2036613fb1c2089b6e0cb03b7155c0f33ad162d70229c4efba0ff7" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__726ccefd", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/sympy/sympy.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:85520d3c315db14fdbfb58334cf74fa7a74e685afe000440e44fa2db3118c24c" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__2d984316", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/matplotlib/matplotlib.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:85b10de1276d5d5e1c32d728a815bc9b4e49de93d3c295eda3922a09255cbdf3" + }, + { + "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__fcb506a5", + "source": "Pro", + "language": "python", + "repo_url": "https://github.com/internetarchive/openlibrary.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:85b8dc3a8e86cb7b95d67f072248ec9d1f7408965f120f1aeddf05612ec8daa7" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__dc72c033", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:85c1b2f4edeb69760f5f128a5466f39d7eb0a096549e0026e2992f05cacc2935" + }, + { + "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__4f3cb6be", + "source": "Poly", + "language": "typescript", + "repo_url": "https://github.com/microsoft/vscode.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:86a2a0e0ac94805312cc4c203fd00e7f3644482cdb692a7a741486c51082297c" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__f3695f8d", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:87293e52686da9e8cf238fbfe45c6b7278e8610e016b206ce88396b65c8fc15c" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__34e61891", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/matplotlib/matplotlib.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:876d70facc5f1b31ceb467b3a1352c649e06fcf1c0f6a593502a966eee34d1fd" + }, + { + "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__36b8c9ee", + "source": "Pro", + "language": "python", + "repo_url": "https://github.com/qutebrowser/qutebrowser.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:878173c82b326180ff289558ef2fde63920c389c012f732775a808003def3d42" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__cecff61c", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:87ab8824a4243aa360682d1b232635bd085fd19a4e3e4da951b15686793e08a8" + }, + { + "instance_id": "Multi-SWE-Bench__cpp__maintenance__bugfix__4a37a167", + "source": "Multi", + "language": "cpp", + "repo_url": "https://github.com/nlohmann/json.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:87ec83948f2ca5d61885051328a45da4e37465efd270dfe777a9b1a252c12e33" + }, + { + "instance_id": "SWE-PolyBench__python__maintenance__bugfix__380568e9", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/keras-team/keras.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:881129d67125ad82093f90197f9a953894deb5a4cf7c27041dbf8fbfbe396d5a" + }, + { + "instance_id": "SWE-PolyBench__python__maintenance__bugfix__64a006b0", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/keras-team/keras.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:886d685ee1671749602dafb4d6e64110c9cfc71fd42aff14c0e4690789f22268" + }, + { + "instance_id": "Multi-SWE-Bench__rust__maintenance__bugfix__d3133dfb", + "source": "Multi", + "language": "rust", + "repo_url": "https://github.com/clap-rs/clap.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:8888a098b4dc0c8de430bf7bbc3fec0160ab059342fe7a64d65259927e8cfc3b" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__d3576321", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/sympy/sympy.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:88898a2b5b178ab82652d381a4cad53a9e5bd55f7c12cc772534367334bee7dc" + }, + { + "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__03f04397", + "source": "Multi", + "language": "go", + "repo_url": "https://github.com/cli/cli.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:88f3e319bb1058399124b00966a085114cdba079934e77a4c0e6478dfacc3c26" + }, + { + "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__462b957d", + "source": "Pro", + "language": "python", + "repo_url": "https://github.com/ansible/ansible.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:89857fcfcb84aa3d77ca6c547b73d8d5e8e6d3edb47b758b78e28fdff25d78ea" + }, + { + "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__10ab7842", + "source": "Poly", + "language": "javascript", + "repo_url": "https://github.com/prettier/prettier.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:89953e9847c6766bac5a1ee08e90a4787808c7ec91d6dd51293ff0a9a6a85bfa" + }, + { + "instance_id": "SWE-PolyBench__python__maintenance__bugfix__453fb844", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/huggingface/transformers.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:8a9ba76716c4daaf02097c4c4d037ecb0100f2707920b5f381978c0b0858b5d4" + }, + { + "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__250649eb", + "source": "Multi", + "language": "go", + "repo_url": "https://github.com/cli/cli.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:8ad038ef7f9e972db922f29d90da846bfac81f4b1be45d1a0ee84763d91cb5d8" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__deb49033", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/astropy/astropy.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:8b58d6c1da5b3c2945f4e0aaebea2caf3fd80f143eb49fddcd334dc0bfbbd293" + }, + { + "instance_id": "SWE-PolyBench__typescript__evolution__feature__c2e9e5ff", + "source": "Poly", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:8bbf0358298981f3c9346ad49d7ad03bb3537ae8a8f8be7684202581e64b3d27" + }, + { + "instance_id": "Multi-SWE-Bench__rust__maintenance__bugfix__2dc6f037", + "source": "Multi", + "language": "rust", + "repo_url": "https://github.com/clap-rs/clap.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:8c0bdc1c54a999569b5d2dddcf6c64883b0caa61a0633bb4f9d4ae221b46b085" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__abb9b8b0", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/pytest-dev/pytest.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:8c149eb730ede30053d63b2a7d1c8731296964b5a099477ed6e5319caeea3b20" + }, + { + "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__74cafcca", + "source": "Multi", + "language": "go", + "repo_url": "https://github.com/grpc/grpc-go.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:8ce177589d622b1bfc701d3b18b04690b767b1a99016eb049d63ab1b76258151" + }, + { + "instance_id": "Multi-SWE-Bench__rust__maintenance__bugfix__92158fba", + "source": "Multi", + "language": "rust", + "repo_url": "https://github.com/clap-rs/clap.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:8d637b2b05b3f0960bfba1f6b40c0af177b267a226fdb339e6a2bde59ae0076d" + }, + { + "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__7dfdbbd1", + "source": "Poly", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:8e2b67b887c25506673b0e146be4223f4d4eaec539831728bcecfc4b21675097" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__e09a2d75", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/sympy/sympy.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:8e2f69c39a81cb656bdfbe6554ec3f8f1a480fbbdcda1b63ed1cd24426e3933f" + }, + { + "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__cedbb0cb", + "source": "Multi", + "language": "go", + "repo_url": "https://github.com/cli/cli.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:8ea95b044c8c6f9e68d358c167589f3306906b6313f57b0e03e1ed3c537104ea" + }, + { + "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__d1cb6b19", + "source": "Poly", + "language": "javascript", + "repo_url": "https://github.com/sveltejs/svelte.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:8f466469fa28afdbec9f8bec122ce404ae1b0221e82d9c946cadc14ecb3f851d" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__1ce4c38e", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/astropy/astropy.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:8fb54309bf66b86ffb1dfe7242a76b0a77931902556aed282ab9fdca0ffe5fe3" + }, + { + "instance_id": "Multi-SWE-Bench__java__maintenance__bugfix__be0b5fff", + "source": "Multi", + "language": "java", + "repo_url": "https://github.com/fasterxml/jackson-databind.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:9026e34f28fcddcf9f99ddd475ec2221c3d6bbba4b72d1f107d9b7689d1019b1" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__a375715a", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/sympy/sympy.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:90c24c6321062e4a32b81037cfb33b47550553a0621ee581eeef88c94144a389" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__81f2c925", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:9117d6b1bd2320ab129975a7d766bcb56f10a9b2da9897c3762a20656f7f3eef" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__1409977d", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/pylint-dev/pylint.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:915a010197b66a8286782ed1663fb86163645f13ef6900de807b01f876c5c7b4" + }, + { + "instance_id": "Multi-SWE-Bench__java__maintenance__bugfix__92782c7b", + "source": "Multi", + "language": "java", + "repo_url": "https://github.com/elastic/logstash.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:91a8cd5bb6f4b4378ab85835ae0d6f1c103f404d75869081ba7dc1eb3179765d" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__9dcac3ec", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/sphinx-doc/sphinx.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:92ab853e53a7110f896babba774e59f12094f6e752390403aee845cc7b583ca5" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__e2250ebc", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:938c74f9a745321ff241a3d6cd55482d1ce76b064081e789a7ca578d8de5364b" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__2d5d4dcd", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:93bee3a55b0d5b3b67628599739b980343c9248900ec275bc97f98d50b6aae2e" + }, + { + "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__3d1b3145", + "source": "Multi", + "language": "go", + "repo_url": "https://github.com/cli/cli.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:93e6d588352c58805a2bb75d8da5d6747ad2979c03391b59786ae2881b8f10b8" + }, + { + "instance_id": "SWE-PolyBench__python__maintenance__bugfix__31f7341a", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/keras-team/keras.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:9471b423e2d8b8b75896107e8c8b5b8f9b1f8cbdaeeb93bfbbb7f06e1128b975" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__27320d49", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/scikit-learn/scikit-learn.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:951a485ebdfd5d85f4e05f538fbfc2a62d18e6f967752b2472bab32c085ad863" + }, + { + "instance_id": "SWE-PolyBench__python__maintenance__bugfix__24342259", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/keras-team/keras.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:955daa44f097188a2551ef420b3e9007fceed3161ad4fea4a0b649853f6a03fa" + }, + { + "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__2bfb742a", + "source": "Poly", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:970365cfd51f891065027e51738bf4a767b6cf78726be216e421b74987d41648" + }, + { + "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__18d389e5", + "source": "Poly", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:97e4c3724cae75adb2bac522fdfde27d4706ed457a34ce53a6f25b44e5adf924" + }, + { + "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__4c132bfd", + "source": "Pro", + "language": "python", + "repo_url": "https://github.com/ansible/ansible.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:994d744d1f4fb4c48d15474bdbffdca55ba83d757dfdaa2e5205f35b3c33052a" + }, + { + "instance_id": "SWE-PolyBench__python__maintenance__bugfix__faa6b3ac", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/huggingface/transformers.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:9c5e9e1dfe4d9d2d86293b3bfb389c11f487ab16c12b464f3528d1f0026904f9" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__5d44a351", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/matplotlib/matplotlib.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:9d62f264d89b1a4862dbf41777d4c50d5c10ec6221b111e8c573a754fa63b0b3" + }, + { + "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__708894b2", + "source": "Poly", + "language": "typescript", + "repo_url": "https://github.com/microsoft/vscode.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:9db48d2ae4d47cfb2a49729372764fcb49e24a68c38c5f118d7621677c875b91" + }, + { + "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__d3babcfe", + "source": "Poly", + "language": "javascript", + "repo_url": "https://github.com/sveltejs/svelte.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:9e316ecac7100ff1be961ab46d449e9361ea6e40207c63b7e6665ceaa9c00851" + }, + { + "instance_id": "Multi-SWE-Bench__javascript__maintenance__bugfix__2f405b15", + "source": "Multi", + "language": "javascript", + "repo_url": "https://github.com/sveltejs/svelte.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:9ece8442cf52efe268a85d50f93e0b836cf3462185dc25a5b03781b12c236538" + }, + { + "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__4c3f5d3d", + "source": "Multi", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:9f3551b6de5ec60ed6c278f6f7389d83005c441ccdcb80279455ff2b6ec730e5" + }, + { + "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__8d36ca39", + "source": "Multi", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:9fec1583003fab61eea9b92595ab03b0ee3c95cd45748c4b1d2fafab6f3c9c04" + }, + { + "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__3da242a2", + "source": "Multi", + "language": "go", + "repo_url": "https://github.com/cli/cli.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:a078dd8c242522f6c304392f133f62749c5018bc1f27c574fa2919e686aea4c0" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__7afb30f5", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/sphinx-doc/sphinx.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:a09fd23399ff1b3fd6f29e330bfa293400d2592e8905bf26abdfbdd680b260fd" + }, + { + "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__463d4713", + "source": "Poly", + "language": "javascript", + "repo_url": "https://github.com/prettier/prettier.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:a13124ba3ea9b9a03713f7654b473e61ac8a9c8566dd7c6434e9580ff9750ab0" + }, + { + "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__27e1903f", + "source": "Poly", + "language": "javascript", + "repo_url": "https://github.com/sveltejs/svelte.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:a31b1272ac89e7ed67e0978af287b462853f9ca8df9d481c3ef4db65de38a912" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__58d0e27a", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/sympy/sympy.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:a397aa0752b239f79d4aaf72e7b07b420ae65793ca98d1f35010ca73f31cfe62" + }, + { + "instance_id": "SWE-Bench-Pro__go__maintenance__bugfix__997c7afd", + "source": "Pro", + "language": "go", + "repo_url": "https://github.com/flipt-io/flipt.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:a43cb294062094d63c6b654f8273631350b46be5afd2c62b8c2f84c233eeecb1" + }, + { + "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__22615f27", + "source": "Poly", + "language": "typescript", + "repo_url": "https://github.com/microsoft/vscode.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:a4cf99fe00c3e123b13017c2d652de9df98a496fccb1e9e907d4f082f34e5e46" + }, + { + "instance_id": "SWE-PolyBench__python__maintenance__bugfix__0cf27f56", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/keras-team/keras.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:a53255907c577b5a41255d44710fd143fe59cc6db0543b30a1c23f88e59c5c54" + }, + { + "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__89932d58", + "source": "Pro", + "language": "python", + "repo_url": "https://github.com/qutebrowser/qutebrowser.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:a56d6c65069da4f88eb8da9856171abe91da6d84c46a73d69251316a204eb640" + }, + { + "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__3c9b757f", + "source": "Multi", + "language": "go", + "repo_url": "https://github.com/cli/cli.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:a5d546889fb8eb7026432ce8617bff8c61725bc445431c03d27d201206e59e0f" + }, + { + "instance_id": "SWE-PolyBench__python__evolution__feature__ebb79d55", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/huggingface/transformers.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:a5e50460decd41fc727f6ea98aa4889c75693532ca639cd99cad48ae8a3d09aa" + }, + { + "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__d6eb75d7", + "source": "Poly", + "language": "javascript", + "repo_url": "https://github.com/prettier/prettier.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:a67778023cfa8e8ac3bb22d1a4d1977d43d36f225969227f973dd0e7fdbcfdd3" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__5900c195", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/scikit-learn/scikit-learn.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:a724c79b23d656ea57ce292a1cec246564263818180a6c7d1b82c77ef1451280" + }, + { + "instance_id": "SWE-PolyBench__python__maintenance__bugfix__5acd9675", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/huggingface/transformers.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:a8796461b752a9d17600fef2f9426ca63b20eda47c181d0e3c5ac97aecd75490" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__f667fa43", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:a8ae6f132a4ac4baf95e295e5a7e2495555eae7d800d28415778e7b60a37f88c" + }, + { + "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__7c4a11fb", + "source": "Pro", + "language": "python", + "repo_url": "https://github.com/ansible/ansible.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:a922795448000fc9066032ecec5619bb151deba35239e3e5d3f106fe3914c54c" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__3c7d37b7", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/sympy/sympy.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:a9b624ba64e17006ccf79a61c399c81ed274a45f5b794de672a31b9fc822f496" + }, + { + "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__e6efdd87", + "source": "Pro", + "language": "python", + "repo_url": "https://github.com/ansible/ansible.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:ab34c3b16c69eff7dd5b745a62450015e55ad69fb92c601e81af853436862089" + }, + { + "instance_id": "SWE-PolyBench__python__maintenance__bugfix__9ea927ce", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/yt-dlp/yt-dlp.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:abb6db29be8e92232144e8f3394afabd10cc895f0e786ff1edfcfe7b84aa773a" + }, + { + "instance_id": "Multi-SWE-Bench__rust__maintenance__bugfix__37f525d2", + "source": "Multi", + "language": "rust", + "repo_url": "https://github.com/clap-rs/clap.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:ac9cf06fc9fa90d7b8f0ba9e74b22a7933a853ab6f483225d21db31f27879d03" + }, + { + "instance_id": "Multi-SWE-Bench__java__maintenance__bugfix__2bd87230", + "source": "Multi", + "language": "java", + "repo_url": "https://github.com/fasterxml/jackson-databind.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:ad55a37adab0272127d4ae9ae9b836f8c0dd46628248b32765406444b323d4c7" + }, + { + "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__9d623e73", + "source": "Poly", + "language": "javascript", + "repo_url": "https://github.com/sveltejs/svelte.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:aeef444f84a576941668a779614c2cf5cd44a396f04fbd8d88504ac6a12b7bd5" + }, + { + "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__2d1b4a72", + "source": "Poly", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:af0fd756106f93628cb6cd7a82efeef9497e6231b2e41c4dec3f3fc4ea2cdf77" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__220feee3", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/pydata/xarray.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:af777e69cf4cd57b870980f77503361a4224184dcbcc463da0674b94e1b548ea" + }, + { + "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__20f502e0", + "source": "Pro", + "language": "python", + "repo_url": "https://github.com/qutebrowser/qutebrowser.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:af98d3f9e096434d5807731942e46ee807670d5469ebd2be80e4d0c9bf484c99" + }, + { + "instance_id": "SWE-PolyBench__python__maintenance__bugfix__dbd7cde5", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/huggingface/transformers.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:b17b955e1cf2793d722b16dcc00dc7bac27af882ec58645ad69c3ffae33880aa" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__91e36e16", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/sympy/sympy.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:b1aa46a004fb6f490e6a83dfbdac7b011e255cc0d085946f84d85beb6c2f3143" + }, + { + "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__512d556b", + "source": "Pro", + "language": "python", + "repo_url": "https://github.com/ansible/ansible.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:b322bf4e06b97016619824e139b94f5adb0750207d7c0bd6611006e43a384af9" + }, + { + "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__2a889a1d", + "source": "Multi", + "language": "go", + "repo_url": "https://github.com/cli/cli.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:b34bb301adba0b7546712a2d4843e00d280e8e669010b162c6fdf8e7b2920a46" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__67bcb30f", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/matplotlib/matplotlib.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:b39e2600e202b8ead38d9691cd212707da8e8e7223283ac92a32518bf3c62d20" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__a14d0e2e", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/matplotlib/matplotlib.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:b58ffc33c3d7400452dde210adf57809fcc977c134891a725a453833508a75ed" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__da598baa", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/pylint-dev/pylint.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:b59e4f01b51466dc3f750419b96e165656f12bbb88e37fe4dfa5b8e5ea81b2f0" + }, + { + "instance_id": "Multi-SWE-Bench__javascript__maintenance__bugfix__a98dbc6f", + "source": "Multi", + "language": "javascript", + "repo_url": "https://github.com/sveltejs/svelte.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:b5dcc16f945ba74861c78ffcb682b080507a5ea5aa44f06ab0c85a73adf4b904" + }, + { + "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__0b0f1e0e", + "source": "Multi", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:b607c7ef87ca19c0fbe49c9b4d997b36d1d00fc60516c25df4d611515aea81fd" + }, + { + "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__0e3febd0", + "source": "Multi", + "language": "go", + "repo_url": "https://github.com/cli/cli.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:b6e7d823080ec26bbc97359cf4470a4cb67ba8ec5964c0833dbd3c3f7aea5bd8" + }, + { + "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__108b2d38", + "source": "Poly", + "language": "javascript", + "repo_url": "https://github.com/prettier/prettier.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:b78c43106ffae42e278d322e5f8502f76340114c43c51acdad7fccee65355a16" + }, + { + "instance_id": "Multi-SWE-Bench__javascript__maintenance__bugfix__bb727b6d", + "source": "Multi", + "language": "javascript", + "repo_url": "https://github.com/sveltejs/svelte.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:b87a13924843ff4d14ec115fc92d8a5a38821afaa2f94adb22d5ebdd8612d898" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__3f745086", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:ba4fbfe254236de3d71f3f6fb6fe9b2937deb26f42fb24eb5103a8b61bb33b76" + }, + { + "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__9a7be8ba", + "source": "Multi", + "language": "c", + "repo_url": "https://github.com/facebook/zstd.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:baa752423903b4a2c07a53d7b95cfddb7e9e12de4c89cbc5352d70b7010769d6" + }, + { + "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__3d85271b", + "source": "Multi", + "language": "go", + "repo_url": "https://github.com/cli/cli.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:bb41d9a50b1108700faa8f19f1babeef09615d17976dfa55e851c020fcbfe587" + }, + { + "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__8fcb53e6", + "source": "Multi", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:bb680def81fe766e30e2d237bb936103d6ab827d08645799b4ccd29ed3acb896" + }, + { + "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__a7aba10f", + "source": "Multi", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:bbafd4665b70c0e23e401815ea929865d1eefb6ad0480b6f9f8e2f9e3346b244" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__d561b333", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/sympy/sympy.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:bc3173a33e29dc2d65a71e2bbcbbd2ca9d4588b0f69a86fc657ef5be84596252" + }, + { + "instance_id": "Multi-SWE-Bench__rust__maintenance__bugfix__3be02163", + "source": "Multi", + "language": "rust", + "repo_url": "https://github.com/clap-rs/clap.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:bc89e4566bf4836a7834cb5e3dceccd45852e7d167291aa2956a3eb161a1b0fc" + }, + { + "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__e2b70931", + "source": "Pro", + "language": "python", + "repo_url": "https://github.com/ansible/ansible.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:bd0a05115773728759e9f20928a059819d94e9a2690c2abd97bd95e43e18bcb3" + }, + { + "instance_id": "SWE-PolyBench__python__maintenance__bugfix__14ce2fa9", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/huggingface/transformers.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:be055c6a47f67c80939830516f2cbf6649c13bd1075ad2a781c3d73d15765619" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__bdde6d9c", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:be42aae569cdc86bbc6addcc35913bd844c096cb09f1bff259a5f7a92dfc9d9b" + }, + { + "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__3e92c76b", + "source": "Multi", + "language": "go", + "repo_url": "https://github.com/cli/cli.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:bea642adc1d5756b9dbb20e74d1cd5bdb69f9748b1015d6817fc581de0d56bc1" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__986c9e85", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:bf04f5771c5bf81fea9fe7fe1f1cfbc6f22d2ca249960183e510b18155af0b39" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__16c72e4c", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:bf4e55bf239145aab2e150ae921bcd8d188baf46944560490859596e6f48e5cc" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__c2f0f2be", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/pytest-dev/pytest.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:bf5decd053bd9b45fa9f24c7be564443f67992798b3a78071cf06f32cf63be88" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__c4bcf730", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/sympy/sympy.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:bf92c267d19f49b8a8d1d9fd8e4bcbc77bb14bbe7e440218e0205a327bf354bd" + }, + { + "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__2fb50735", + "source": "Multi", + "language": "typescript", + "repo_url": "https://github.com/darkreader/darkreader.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:c03df0fa030b5e13d21b16577cefea3d0083d761edb5c3f5500a092034909ce9" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__13b74fcf", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/sympy/sympy.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:c040e5b9d694dc373581825288e0c35b54ad408d06621833d2c97991e6423d49" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__af69d880", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/sympy/sympy.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:c09fdc8262f86be8e2954ed5638f9c0afc88fd3293ee1ceb444a537cc193555a" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__6f3e4cd9", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:c0b7081db523e497688bdcd33dd4d3c4c31ca7bb7394286119abf3b0c157060b" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__c20c3c4f", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:c150aeef93f1c8dc87128771e7f384bfb3486bf023aa561148272dcfc6f6c004" + }, + { + "instance_id": "SWE-PolyBench__python__maintenance__bugfix__6ef81275", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/huggingface/transformers.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:c1e6af7a82a751ff57f8e49441100ff546670c57cf042ed9d709908d35df6a54" + }, + { + "instance_id": "SWE-PolyBench__python__maintenance__bugfix__84e0afe2", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/huggingface/transformers.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:c26d424b2ac9dfb29f75d9b1893cd777f1ca5bdfa64fa64c1bf744d72e24d5d2" + }, + { + "instance_id": "Multi-SWE-Bench__java__maintenance__bugfix__ec4f6157", + "source": "Multi", + "language": "java", + "repo_url": "https://github.com/fasterxml/jackson-dataformat-xml.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:c407c3b62fb4012ba392ab132a3669e9473cc98c91ead5eb4ab62ff60162a994" + }, + { + "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__63513ada", + "source": "Poly", + "language": "javascript", + "repo_url": "https://github.com/sveltejs/svelte.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:c40d91e2c2e309097be79e45d2f882c0c789e3996e5a8c1adb2f0b22e153896e" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__0eecae1e", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/pytest-dev/pytest.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:c42e0448a8c6dfc581fe90a8b5b67f5ba3f2cc6b42511b36d2f2c05de375290b" + }, + { + "instance_id": "Multi-SWE-Bench__java__maintenance__bugfix__8b7cc5e0", + "source": "Multi", + "language": "java", + "repo_url": "https://github.com/alibaba/fastjson2.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:c5188ffb64e16d530c778a10c0bee86d80c0e791d20dca5562a1cfb61fd03bc9" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__90f381a2", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:c6de3035e6bb0a6c7fcd580d2790415cd9ac239be0de00fea749a9bb13f94e33" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__02b5862c", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:c6f22afeb2fad8e18f1e462aa84aec7163ada6ac2cec60ff79f0f9dd8f5f89a0" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__6809df19", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:c771f88c030237b7093328e122d048d3c3915b3dbf24a0607a21d16ab2e6611f" + }, + { + "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__52180d42", + "source": "Poly", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:c773d0cc7c0ca3162f2856de0e1a5d57d43cf3a62712da70dd45fdce476405c6" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__2ab7d0fc", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:c7b1868324f55c1a88ab05b59ea87363f3bc5503fc68e9e23456c54ef307fca6" + }, + { + "instance_id": "Multi-SWE-Bench__javascript__maintenance__bugfix__2f838a18", + "source": "Multi", + "language": "javascript", + "repo_url": "https://github.com/sveltejs/svelte.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:c7cdf37688ca1a1a49cf40c1eaaefcfe3db6387baf95afc721dfdd4543d49e5c" + }, + { + "instance_id": "Multi-SWE-Bench__javascript__maintenance__bugfix__5b47b0dd", + "source": "Multi", + "language": "javascript", + "repo_url": "https://github.com/iamkun/dayjs.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:c7f0f503a64e85acd3e2bfb23ed9b4a6ffa4edf2622c38a74f777a920eca4d56" + }, + { + "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__1ac0bb81", + "source": "Multi", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:c99a754d793203cbc3af02c68003b240e1cd47094a33e1c6511559e8c6fc074a" + }, + { + "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__6ebb54dc", + "source": "Pro", + "language": "python", + "repo_url": "https://github.com/ansible/ansible.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:c9eb75f11edf41e5a6b45640bf8c9f60e374f0c02c523b2d37d0434fb3a59075" + }, + { + "instance_id": "Multi-SWE-Bench__java__maintenance__bugfix__36a08326", + "source": "Multi", + "language": "java", + "repo_url": "https://github.com/fasterxml/jackson-databind.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:cb60e775b06002b7860a7ab3a101827085254c81545b144fc1bcdeab531dda9f" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__35f76ab1", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/matplotlib/matplotlib.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:cc09ef4c979d53b845e6727aba1806419e20cd901c1e4e421686930bef67b4b4" + }, + { + "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__5b2cf9bb", + "source": "Pro", + "language": "python", + "repo_url": "https://github.com/ansible/ansible.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:cd5ce37ff6f64b8bf143c40d81b446d535431462e5ed061f544a6bb84428015e" + }, + { + "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__8d0f55cd", + "source": "Multi", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:cd7ca80fe1212ba6ed9a843596af3efc6223ae7f02650c1b7f8fe2dce98538a6" + }, + { + "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__ff629f34", + "source": "Multi", + "language": "typescript", + "repo_url": "https://github.com/vuejs/core.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:cdac954b1ff15e8bbf37a0bdda904afa1458d9847c1fa671eae7dc0f6d389832" + }, + { + "instance_id": "SWE-PolyBench__python__evolution__feature__c0e4893c", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/huggingface/transformers.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:ce1f70f6ea99737232ad8fc8e613c0202200dfd090ab4f2a6a48c50fe0e63c05" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__914c325d", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:ce3590710b19812de614ecb55689c77c44d932079a0d3ddab76603f07da4f54b" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__e97ac668", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:ceefd7e66d5fc81939e4ea281e9b407a311b1f5054a917694c3681c1db8d624c" + }, + { + "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__1ad00901", + "source": "Poly", + "language": "javascript", + "repo_url": "https://github.com/sveltejs/svelte.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:cf9dc0c0e15795e88b328b46f1e6816c5624a22b35cb72b746e63d80c94d1dac" + }, + { + "instance_id": "SWE-PolyBench__python__evolution__feature__9e2901b5", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/langchain-ai/langchain.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:d06ddc03f252496f86019dd7916fec3ec60850ade8a2e230a4598011927b58b8" + }, + { + "instance_id": "Multi-SWE-Bench__javascript__maintenance__bugfix__cbea412a", + "source": "Multi", + "language": "javascript", + "repo_url": "https://github.com/expressjs/express.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:d10b85cdf578037750e570f3b39a0403d83409c166650642ea31d75f3609576c" + }, + { + "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__5dc9809e", + "source": "Multi", + "language": "c", + "repo_url": "https://github.com/ponylang/ponyc.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:d1f805c826a1a8165f18af76dc068dd3ce6f9be4feb7b0129ae907d9d03ab2e7" + }, + { + "instance_id": "Multi-SWE-Bench__javascript__maintenance__bugfix__01d1908e", + "source": "Multi", + "language": "javascript", + "repo_url": "https://github.com/sveltejs/svelte.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:d24d07a0cd07d9cb39963e7eb47115829dda5893afe6f086e9cbd20c8a7c306e" + }, + { + "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__d9d9c3a6", + "source": "Pro", + "language": "python", + "repo_url": "https://github.com/internetarchive/openlibrary.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:d3490e8d61723a1b2ba6878154013c2241d2c49f5627027346e59ee9e0627e5c" + }, + { + "instance_id": "SWE-PolyBench__python__maintenance__bugfix__70acfaad", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/huggingface/transformers.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:d3a25fe25506cf671f17ccf8ddfe0ae78c30206a6fcd2c7a3db207e11815086c" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__d2a786e2", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:d3dae85caa432dba24aff40383ca37a8f2502f75e43e6dc8e4d1c06cb2e4298f" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__1a760e52", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:d418a6392423a1e564c1e152d4d3b6374b042aeb059739a67cae09734dc479dc" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__60bc9e86", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:d4304d1a4c1bb2f3e3db93e39fda409b4e93dbd7b8299f66a95ec091b664060a" + }, + { + "instance_id": "SWE-PolyBench__python__maintenance__bugfix__aaf309d2", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/langchain-ai/langchain.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:d4dbe32660e2adde6f36373f7cfa10d8f732e043e3db6267c5dfd8940b771b62" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__71f348da", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/astropy/astropy.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:d79cecf7c0058211d9b5dde989d4384fbc56271e5f5cbc08398745e472116882" + }, + { + "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__49f4a0f4", + "source": "Multi", + "language": "c", + "repo_url": "https://github.com/jqlang/jq.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:d8413673bc44e9e7a41e0f468c77d0e92326c2e565826361b6f1ace221537595" + }, + { + "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__5fdd12f6", + "source": "Pro", + "language": "python", + "repo_url": "https://github.com/ansible/ansible.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:d896cc18071438521d62e6a5e63ffdae5d5a5a1cac3a542137bf9ca39d967caf" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__60b000ec", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:d8fde0d461aa5ec223f3243119258d4bc6be63af4ddb1dd8277567d940b781f2" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__9b321036", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/sympy/sympy.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:d9f0693080266d8e9c82913d6dded534d9aaf72373d56b758e0e5e5b9fcac957" + }, + { + "instance_id": "SWE-PolyBench__python__evolution__feature__674464fb", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/huggingface/transformers.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:da7461ac29413735a04f1ab12c942d9f73abfcf5d4d5ff2ad940fde0d2b57860" + }, + { + "instance_id": "Multi-SWE-Bench__rust__maintenance__bugfix__bdc6ab14", + "source": "Multi", + "language": "rust", + "repo_url": "https://github.com/clap-rs/clap.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:db49155b884fd003644fba7bd3571443bbcd735d2cf06956e4998b0d909854cc" + }, + { + "instance_id": "Multi-SWE-Bench__java__maintenance__bugfix__1665de74", + "source": "Multi", + "language": "java", + "repo_url": "https://github.com/mockito/mockito.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:db7887ef286d0eac4aa298039ce80017c2cd27343dc6b056408674717f5596f3" + }, + { + "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__1ba303a5", + "source": "Multi", + "language": "go", + "repo_url": "https://github.com/cli/cli.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:dc0289179198fe098db70f2aaa5b1a38edb09940b560f8291f54758f0d4c0235" + }, + { + "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__0498ad7f", + "source": "Multi", + "language": "go", + "repo_url": "https://github.com/cli/cli.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:dc56a4caf57287fcaffecef2814a7edaeb8247fb90e17131797fadc128e93d3b" + }, + { + "instance_id": "SWE-Bench-Pro__go__maintenance__bugfix__d3c56cc5", + "source": "Pro", + "language": "go", + "repo_url": "https://github.com/navidrome/navidrome.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:dd3551455c3f5e595334fa75375386ee9eb1d06bd6898caf8e0bdb595da17e3b" + }, + { + "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__ab946da6", + "source": "Pro", + "language": "python", + "repo_url": "https://github.com/internetarchive/openlibrary.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:de1ffb84da296157c1881430418d4dcc2bc4ba2452e8041eeae2791be45d247b" + }, + { + "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__6659fd5a", + "source": "Poly", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:de817fbf20c0088317c22782a929f52d44421a1a05bf2bb07bcfd61f6924f48e" + }, + { + "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__78039f77", + "source": "Poly", + "language": "javascript", + "repo_url": "https://github.com/sveltejs/svelte.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:dfa29e49540138a36a54be3366d1de99f12c5cc9c55514cfce550ca38c0df1a0" + }, + { + "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__7958c8a7", + "source": "Multi", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:dfef3add36c57eed43a6a52e856b65b1a156723c21ca6d8fefa7b72df72724e2" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__28ed0b77", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/sphinx-doc/sphinx.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:e0e23c2eec2b816c50fc9a0c41256dbbb39a2710e38d3df8bd36c317a32d19fa" + }, + { + "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__9e8a151e", + "source": "Multi", + "language": "go", + "repo_url": "https://github.com/cli/cli.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:e1411ff239f363f833e18a828baa28053031d68ba027ce8a6a88f5f898861045" + }, + { + "instance_id": "Multi-SWE-Bench__rust__maintenance__bugfix__c9cd2b2c", + "source": "Multi", + "language": "rust", + "repo_url": "https://github.com/clap-rs/clap.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:e18e7949a018205b89c4ae9df2f9e073ae3211d6ccf989adf28fb6411b51341f" + }, + { + "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__2d7a3934", + "source": "Poly", + "language": "javascript", + "repo_url": "https://github.com/prettier/prettier.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:e198e65fd568b37da48c3aef7d3781cf88eca2d55c2dee2cc36bbdffec6603ac" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__8f9e4358", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:e1df52a8f391307f3e42a19b86e02d1e60388128d741e9920df5cbceb9b0298f" + }, + { + "instance_id": "Multi-SWE-Bench__cpp__maintenance__bugfix__8ebb764e", + "source": "Multi", + "language": "cpp", + "repo_url": "https://github.com/fmtlib/fmt.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:e22e6a8910fa1024444feb5bbc1f05c7d58df3af2ec327b75d077a1f33d53832" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__765e5e14", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/sphinx-doc/sphinx.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:e3010e19d080549ef76bf27ca8393f661ebae0b9a238e9e86c811fb04a251917" + }, + { + "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__6688f2d9", + "source": "Poly", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:e674d637eea55ba86f9b95760203782ea26bc911e7c60ea715a596e41ea07a09" + }, + { + "instance_id": "SWE-PolyBench__python__maintenance__bugfix__5518cb7f", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/huggingface/transformers.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:e68dc076f61a8a4a5ab3c52a5cdb40eb36fd226114a1186ce5d447696fd8dd8d" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__409b7811", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:e6a637203cb2f6f5d2cfa099e368d8580fb8165c361ca8802b554b455083c6fa" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__4f685b8e", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/pydata/xarray.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:e6ff2f46ab05dc61b08fe27f4d2240c3b84d75f5198c9a0e82ea8c2a24e21613" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__186c0af4", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/sympy/sympy.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:e709777a755e4ac0465d28eae10c9466eff5e249f11303523fbcf99055ecfb75" + }, + { + "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__e21304a6", + "source": "Poly", + "language": "javascript", + "repo_url": "https://github.com/sveltejs/svelte.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:e77a2798d602646b9e92e25a56db47c5d7d5ebd35d376576970be61643d068e5" + }, + { + "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__18d7bbbc", + "source": "Pro", + "language": "python", + "repo_url": "https://github.com/ansible/ansible.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:e79ad706364a163059a99e69e5b723aa61140ea633445d9df5c89ff4d06e22a6" + }, + { + "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__04b981d7", + "source": "Multi", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:e816451d9d649cc768a43cddb7abbe52b25f3db7becbcf2bafdbd5c71e5c0569" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__51e329de", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/sympy/sympy.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:e838b5c6309d3c166f00c15a3dd2ffaaf56796f31b603e51e2fd5f8ec014072a" + }, + { + "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__6f9b5ff2", + "source": "Multi", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:e8b72142fc0a24e7dd591b9b98eb9ad6a308fc23a236e6f522cb26611aabacd2" + }, + { + "instance_id": "Multi-SWE-Bench__java__maintenance__bugfix__747c7f60", + "source": "Multi", + "language": "java", + "repo_url": "https://github.com/google/gson.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:e9c4e405c6c37c51edb2432bdb62e5f60523b97ff45a665b88788ae16af73e9a" + }, + { + "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__7e3e2bb9", + "source": "Poly", + "language": "javascript", + "repo_url": "https://github.com/sveltejs/svelte.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:ea319b40e64f86943d33a01abd0eb23d3bec61d21e3a439b71ec167eaceb395f" + }, + { + "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__6cb88b18", + "source": "Multi", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:ea76a4ceaa39464c266c223d2db255bc7fb03f44b9f1887ba52b87c80564f0d1" + }, + { + "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__f4541fb1", + "source": "Poly", + "language": "javascript", + "repo_url": "https://github.com/serverless/serverless.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:ead9d1d11fce0ac7cde7aa8e20a8044d207af076a208e6d626cccc7753df9792" + }, + { + "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__7c41997b", + "source": "Multi", + "language": "typescript", + "repo_url": "https://github.com/vuejs/core.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:eb0cede3f2062f0bcdfe36e5be49a2ca7181e07e72b653f372a9474657571df2" + }, + { + "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__39769692", + "source": "Multi", + "language": "c", + "repo_url": "https://github.com/ponylang/ponyc.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:eb19e6340e006ca6c72914c4e9a68d3df88739b0709ce99599f523990ca51569" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__8971eeba", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/scikit-learn/scikit-learn.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:eb290535d9fc724d1bc3efa376a0580d8df0f44e7cb89986dbb0039aaee2a015" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__09457b9b", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/matplotlib/matplotlib.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:eb5e6a8d6ec18c499fad4cf18749d50a534c334f072061e135ada14d8dc547f2" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__51f3cc91", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/scikit-learn/scikit-learn.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:eb82cab056cbde378e2974f1e87eee3492275018e89e7a6a266dbd3cc8a8e517" + }, + { + "instance_id": "Multi-SWE-Bench__rust__maintenance__bugfix__c363219b", + "source": "Multi", + "language": "rust", + "repo_url": "https://github.com/BurntSushi/ripgrep.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:ebbca34412ac4c2b46152226d554934a0a8404e52e46dcb1d0ffb7c1db8341fd" + }, + { + "instance_id": "SWE-PolyBench__python__maintenance__bugfix__b01e9113", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/huggingface/transformers.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:ecbabe3ffe7a3d1b73cb615fd0ae40d46c03402d429fdf1efc5e1e01d49ba76a" + }, + { + "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__ed8db47d", + "source": "Multi", + "language": "go", + "repo_url": "https://github.com/zeromicro/go-zero.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:ecbe41cacda5fcd5a62b1bf24fc6910c214c00215f515b7bee47936c90e1073e" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__ec975b6c", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/pydata/xarray.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:ed09eb3aaa6539d523efb5fe32ebbd5da963beeb9ba34cc5d48063bdb731522e" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__1c5aa714", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:eda5b9e4afe4ea4a07a8738b875a8740e1babc8c2b2759dbb1f59344f8500ceb" + }, + { + "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__b8bf59e7", + "source": "Multi", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:edd06f968d2f90329b71e3bccd8f5810a82b670942151bbcefe31bbe400b4cb2" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__b10492be", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:ee6278dd5639c45bcc4c9a382cce28e105e6b12967027388d4f03a1e090358ae" + }, + { + "instance_id": "SWE-PolyBench__python__maintenance__bugfix__2a9201a9", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/huggingface/transformers.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:eeca9dfee0ddefcbd821f5565f673d6a8562e87655dfa772a67d6d3efe9bc570" + }, + { + "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__2c512ec3", + "source": "Multi", + "language": "go", + "repo_url": "https://github.com/cli/cli.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:ef7f5d41d540783f0d5ee3bfa15765edaffea12d15214c2faa0f2d46799b8ec8" + }, + { + "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__538e9f59", + "source": "Multi", + "language": "c", + "repo_url": "https://github.com/jqlang/jq.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:ef8571f793165cbb30ea4771819c754cb2960d7b970fe4890279e23c49c7f2c1" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__a8414dbd", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/matplotlib/matplotlib.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:f14bec20b237f5d867c6de7379d918f4bd99cd2b2e09ac0fed87f05a1658dc4f" + }, + { + "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__9c9b931e", + "source": "Poly", + "language": "javascript", + "repo_url": "https://github.com/serverless/serverless.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:f2410b91cf70a6e4383131e3fb8b8ea037fa57387dde774d1b20d3f06c755a7e" + }, + { + "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__b8c88ac8", + "source": "Multi", + "language": "c", + "repo_url": "https://github.com/ponylang/ponyc.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:f24e0251614c20ab99d7653b700b2936a4044b41b39f11da02da3969459b36e9" + }, + { + "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__59dd675b", + "source": "Poly", + "language": "javascript", + "repo_url": "https://github.com/serverless/serverless.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:f3147f4e5b1a1766dda71bec72de481e5ad52d4997f283a13ced15d450ed2866" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__7489fdd8", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:f4106b22b700f63f69d8d76b0f4addd891fef58fe16d03cc1f996074b51b9b06" + }, + { + "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__3cb539ec", + "source": "Multi", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:f42b9b036c1e17742c7944802d01c4b5df88386c1a68a006554fbc4bc14ed439" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__a7a5e8ed", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:f48285241b981c9ff86992012e4e65b6d78ff9e9ec4eeb3c7a298faad7d5f9d5" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__d105d187", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:f4c380f41af944c6198d95afd7c897d5983d4ea662ebb5cd70eb8db4e0af86bd" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__3146e19b", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/matplotlib/matplotlib.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:f52c200b13fc0ec21998c4c5fdb4c1d2b30761dad2e89ee480ef404c7dc29993" + }, + { + "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__2464eadb", + "source": "Pro", + "language": "python", + "repo_url": "https://github.com/ansible/ansible.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:f552958bb89e0388a620e8ab569a3e3593ed63490a9244493ca177e04102593f" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__96afaad2", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:f5743e84051031eec7fe11894654ecb3eba9816fb0663b4e6da5a59d0957923b" + }, + { + "instance_id": "SWE-PolyBench__python__maintenance__bugfix__ac5893a5", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/huggingface/transformers.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:f65990127e80e8c6871f2b4fd7690a2748b555e24067cb0bca63686ccfc8e9c9" + }, + { + "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__898aa1de", + "source": "Multi", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:f69202620e70d9ffd6d9aebad0b897f6470514dc20f92641ec01fb0315ffd8c1" + }, + { + "instance_id": "SWE-PolyBench__python__evolution__feature__7fe6d907", + "source": "Poly", + "language": "python", + "repo_url": "https://github.com/huggingface/transformers.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:f6e9bd2e373b05182142e1e13fb23683dd6ed4b6c1228b3f87eab394fe8380ee" + }, + { + "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__1cae51cc", + "source": "Pro", + "language": "python", + "repo_url": "https://github.com/ansible/ansible.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:f70e402f1b485c6b87684cdace52979bede3faeba8d9b889a908ec6ddf77955b" + }, + { + "instance_id": "Multi-SWE-Bench__javascript__maintenance__bugfix__d33de279", + "source": "Multi", + "language": "javascript", + "repo_url": "https://github.com/sveltejs/svelte.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:f767f04df87c62b1e61fa9c8af648c181c079257ae99a0049540aa9a1ab0b002" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__46dc77a5", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:f774555d35bdf8e2faffde259e56b1366592070ac3828c752535d3a578157ab0" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__1ad7449c", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:f7a2737359640a608766038265ba1bedc4bcad7b7e8a6c9daa1127bbb52e2b78" + }, + { + "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__1de1bd3c", + "source": "Poly", + "language": "typescript", + "repo_url": "https://github.com/mui/material-ui.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:f8e450cb9fca78cb26913bb7ed689719b77e59f7de463f4f5f5c2854615e6438" + }, + { + "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__0f1fd789", + "source": "Poly", + "language": "javascript", + "repo_url": "https://github.com/serverless/serverless.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:fabe9f2fd696db98fa1fc080b1daa8c30ba267ca9689559babba5ea79c0783b5" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__f8da42bc", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:fae4463a6ff2b6e87c052282343e95897001dfbc9231f0013182a87ea9648442" + }, + { + "instance_id": "SWE-PolyBench__typescript__evolution__feature__41cd3842", + "source": "Poly", + "language": "typescript", + "repo_url": "https://github.com/coder/code-server.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:fb77201bce7941a232f8ba992f4ee4adc742a2aef5143bf9f08dd1186dc328b3" + }, + { + "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__71253eae", + "source": "Pro", + "language": "python", + "repo_url": "https://github.com/qutebrowser/qutebrowser.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:fc2ee2e566b315448824fe160a6abee6fd4af93692e178cf8428da8dc94865b7" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__bebfd692", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/matplotlib/matplotlib.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:fca9b69e05ea88a2af9cb8cadd4ef2b7ec22d3410477107e88d85b1213e5abf2" + }, + { + "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__64469377", + "source": "Pro", + "language": "python", + "repo_url": "https://github.com/ansible/ansible.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:fd00f4982535a09732d7e6ef2d819e519c47f5f43364353945ef2cc4bd50bef0" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__ee10f0e4", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/django/django.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:fe3ad1c49518945395f0eb9522b2c435e8a75b47a61f2b77c07b9673f2db27dd" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__bdec299d", + "source": "Verified", + "language": "python", + "repo_url": "https://github.com/scikit-learn/scikit-learn.git", + "reason": "eligible_not_selected", + "deterministic_rank": "sha256:febed0a62f60c8b14f6f7d84c509acba61b0a0ccc849d019c6d562dea85870dc" + } + ] +} diff --git a/tests/fixtures/contextbench-smoke-pack.json b/tests/fixtures/contextbench-smoke-pack.json new file mode 100644 index 0000000..deee473 --- /dev/null +++ b/tests/fixtures/contextbench-smoke-pack.json @@ -0,0 +1,30 @@ +{ + "name": "v2.4-contextbench-phase37-smoke-pack", + "protocolVersion": "contextbench-protocol-v1", + "claimBearing": false, + "purpose": "local_harness_smoke_only", + "selectedInPhase": 37, + "mustNotContributeTo": [ + "contextbench_claim_bearing_aggregates", + "wedge_win_decision", + "public_benchmark_claims", + "product_tuning_before_baseline" + ], + "executionStatus": "metadata_only_not_executed_in_phase37", + "corpora": [ + { + "name": "Excalidraw", + "repo": "excalidraw/excalidraw", + "claimBearing": false, + "purpose": "local_harness_smoke_only", + "phase37RunnableTasks": false + }, + { + "name": "FastAPI", + "repo": "fastapi/fastapi", + "claimBearing": false, + "purpose": "local_harness_smoke_only", + "phase37RunnableTasks": false + } + ] +} diff --git a/tests/fixtures/contextbench-task-manifest.json b/tests/fixtures/contextbench-task-manifest.json new file mode 100644 index 0000000..bcba553 --- /dev/null +++ b/tests/fixtures/contextbench-task-manifest.json @@ -0,0 +1,553 @@ +{ + "name": "v2.4-contextbench-phase37-task-manifest", + "protocolVersion": "contextbench-protocol-v1", + "dataset": "Contextbench/ContextBench", + "datasetConfig": "contextbench_verified", + "split": "train", + "claimBearing": true, + "selectedInPhase": 37, + "selection_algorithm": "deterministic_seeded_coverage_then_rank_fill_v1", + "selection_seed_or_deterministic_order": "phase37-contextbench-v1-2026-04-27", + "selection_timestamp": "2026-04-27T00:00:00.000Z", + "task_pool_hash": "sha256:a6af697f293ec595bccf9c264799f8a55308cc552b20b8ec61714240c2a03b26", + "exclusion_log_path": "tests/fixtures/contextbench-selection-exclusions.json", + "hash_canonicalization_version": "contextbench-canonical-json-lf-v1", + "evaluator_success_status": "passed_synthetic_official_evaluator_probe", + "hardness_signal_status": "unavailable_in_contextbench_verified_schema", + "hardness_signal_source": "dataset_schema_probe", + "hardness_proxy_used": false, + "forbidden_selection_sources": [ + "agent_outputs", + "codebase_context_outputs", + "competitor_outputs", + "proxy_hardness_score", + "post_failure_task_filtering" + ], + "no_lane_outputs_observed_attestation": "No raw/native, codebase-context, competitor, proxy-hardness, or post-failure outputs were observed or used for selection.", + "summary": { + "task_count": 20, + "language_distribution": { + "c": 1, + "cpp": 1, + "go": 2, + "java": 1, + "javascript": 3, + "python": 8, + "rust": 1, + "typescript": 3 + }, + "source_distribution": { + "Multi": 8, + "Pro": 2, + "Poly": 6, + "Verified": 4 + }, + "repo_distribution": { + "https://github.com/ponylang/ponyc.git": 1, + "https://github.com/fmtlib/fmt.git": 1, + "https://github.com/navidrome/navidrome.git": 1, + "https://github.com/fasterxml/jackson-databind.git": 1, + "https://github.com/prettier/prettier.git": 1, + "https://github.com/ansible/ansible.git": 1, + "https://github.com/rayon-rs/rayon.git": 1, + "https://github.com/mui/material-ui.git": 1, + "https://github.com/sphinx-doc/sphinx.git": 1, + "https://github.com/sveltejs/svelte.git": 1, + "https://github.com/sympy/sympy.git": 1, + "https://github.com/django/django.git": 1, + "https://github.com/vuejs/core.git": 1, + "https://github.com/matplotlib/matplotlib.git": 1, + "https://github.com/cli/cli.git": 1, + "https://github.com/Significant-Gravitas/AutoGPT.git": 1, + "https://github.com/coder/code-server.git": 1, + "https://github.com/iamkun/dayjs.git": 1, + "https://github.com/huggingface/transformers.git": 1, + "https://github.com/keras-team/keras.git": 1 + }, + "repo_count": 20, + "language_count": 8 + }, + "tasks": [ + { + "instance_id": "Multi-SWE-Bench__c__maintenance__bugfix__5e659108", + "original_inst_id": "ponylang__ponyc-2261", + "source": "Multi", + "language": "c", + "repo": "ponylang/ponyc", + "repo_url": "https://github.com/ponylang/ponyc.git", + "base_commit": "682b45d3abd7b24381bfc56423da85c3527785c7", + "problem_statement_ref": "dataset_field:problem_statement", + "problem_statement_hash": "sha256:05e7cceb9bc67c2d30de97c5870667a6edb4fc026425f5795d31999fb7a919fe", + "gold_context_ref": "dataset_field:gold_context", + "gold_context_hash": "sha256:1e46ac54ac1ced7326f7392c1c7d4d49a6872d8d9d2a5bc978a553a3efeabd7b", + "patch_hash": "sha256:fe6f99fb701c617accd8adcfd1b54fc85416a0ca5ecc3566002178ebdf8d5e0f", + "test_patch_hash": "sha256:2104a1f6daafebcab12a64aeab585d55b7c3a6f817484da93982590789710393", + "f2p_hash": "sha256:7427e2bf8cd96a986ce86a231c1edd10a9adb256cf250cfd4a5e7211cb16debf", + "p2p_hash": "sha256:0b8b2e8f080de6d002a6fc72d2d800102b4fbb911c6d02c8f390ab05e55d97c3", + "gold_context_span_count": 7, + "hash_canonicalization_version": "contextbench-canonical-json-lf-v1", + "hardness_signal_status": "unavailable_in_contextbench_verified_schema", + "hardness_signal_source": "dataset_schema_probe", + "hardness_proxy_used": false, + "inclusion_rationale": "language_coverage:c", + "deterministic_rank": "sha256:06c4c3c32891fd726edf124a39a231af3f26ce179cfd4be0f77f773ff3ef197b" + }, + { + "instance_id": "Multi-SWE-Bench__cpp__maintenance__bugfix__3e497d27", + "original_inst_id": "fmtlib__fmt-4286", + "source": "Multi", + "language": "cpp", + "repo": "fmtlib/fmt", + "repo_url": "https://github.com/fmtlib/fmt.git", + "base_commit": "e3ddede6c4ee818825c4e5a6dfa1d384860c27d9", + "problem_statement_ref": "dataset_field:problem_statement", + "problem_statement_hash": "sha256:13359b5f2c2ae376bbdf0524fbe4ce5bc730d7ecb4c380feaa40130cf363b7f8", + "gold_context_ref": "dataset_field:gold_context", + "gold_context_hash": "sha256:c3bb60a71ccc2512b49a366ae036fd45d26df403d3cda4b0eeb3e26379885d15", + "patch_hash": "sha256:cb30e0912b415a30796aeed3e3a80b3678119c67839d4683cd363f51a10f1288", + "test_patch_hash": "sha256:87b52d25b995bbc7eb42a034bc9b80418ecd66575af92f033a739d7287443971", + "f2p_hash": "sha256:4f53cda18c2baa0c0354bb5f9a3ecbe5ed12ab4d8e11ba873c2f11161202b945", + "p2p_hash": "sha256:4f53cda18c2baa0c0354bb5f9a3ecbe5ed12ab4d8e11ba873c2f11161202b945", + "gold_context_span_count": 5, + "hash_canonicalization_version": "contextbench-canonical-json-lf-v1", + "hardness_signal_status": "unavailable_in_contextbench_verified_schema", + "hardness_signal_source": "dataset_schema_probe", + "hardness_proxy_used": false, + "inclusion_rationale": "language_coverage:cpp", + "deterministic_rank": "sha256:008e0dd469d875e3e3b7e1a862a28aac07c2a176c6e22a1b22d828dcc2c4d749" + }, + { + "instance_id": "SWE-Bench-Pro__go__maintenance__bugfix__4df06349", + "original_inst_id": "instance_navidrome__navidrome-31799662706fedddf5bcc1a76b50409d1f91d327", + "source": "Pro", + "language": "go", + "repo": "navidrome/navidrome", + "repo_url": "https://github.com/navidrome/navidrome.git", + "base_commit": "537e2fc033b71a4a69190b74f755ebc352bb4196", + "problem_statement_ref": "dataset_field:problem_statement", + "problem_statement_hash": "sha256:98f230eac3090013f5a266281095712421e94665c04243a543559f4490bcc51c", + "gold_context_ref": "dataset_field:gold_context", + "gold_context_hash": "sha256:b42c1b407f9823477acc674d5e8134639d479d379d87c9314255f4dab54b8c8f", + "patch_hash": "sha256:5910d049273561faffc0530b132aba0f8ba455388ceedb6ecb53cf830382a858", + "test_patch_hash": "sha256:2e94df8a4032b34fcab8496cc2435c94dce20bf65b2d5e81eec4759ae7c41462", + "f2p_hash": "sha256:8f778e849bbf8f8799540dff0d88e9f06a3f6081a67275445470fa3b53563ea4", + "p2p_hash": "sha256:4f53cda18c2baa0c0354bb5f9a3ecbe5ed12ab4d8e11ba873c2f11161202b945", + "gold_context_span_count": 20, + "hash_canonicalization_version": "contextbench-canonical-json-lf-v1", + "hardness_signal_status": "unavailable_in_contextbench_verified_schema", + "hardness_signal_source": "dataset_schema_probe", + "hardness_proxy_used": false, + "inclusion_rationale": "language_coverage:go", + "deterministic_rank": "sha256:02725d58f7f2b212bee547de17732ab19be7bf83c72adc6830e645581d43fd5c" + }, + { + "instance_id": "Multi-SWE-Bench__java__maintenance__bugfix__9a3d9d3f", + "original_inst_id": "fasterxml__jackson-databind-4015", + "source": "Multi", + "language": "java", + "repo": "fasterxml/jackson", + "repo_url": "https://github.com/fasterxml/jackson-databind.git", + "base_commit": "9684204f3073580e711320c3531a95bcaffa63ef", + "problem_statement_ref": "dataset_field:problem_statement", + "problem_statement_hash": "sha256:bcf38df283aae459899174cdd14e73bbbbc3a3841c5aedcd8b02d4cfa4f59e6d", + "gold_context_ref": "dataset_field:gold_context", + "gold_context_hash": "sha256:eb3ec4665c7bd4efeaadc34bdc0c810a56ee9e09b5c303c781118c2bafa33ef5", + "patch_hash": "sha256:52a2719faa7e89847d232f897c653179f51b8548229af7a7ae9dc0a061dcedf2", + "test_patch_hash": "sha256:b878454f799b391ed44950fb8ace050b96c2e0a623a52bb0b2db9e96a3ee2cce", + "f2p_hash": "sha256:ef73c6bee12f21091983403b4fe7a994041b2dcb7a311a2585780432cc68e2d3", + "p2p_hash": "sha256:f2544689902c010f58e9b5dda4fd659a7997dc2b1d272b0f935e9c2929937a9c", + "gold_context_span_count": 8, + "hash_canonicalization_version": "contextbench-canonical-json-lf-v1", + "hardness_signal_status": "unavailable_in_contextbench_verified_schema", + "hardness_signal_source": "dataset_schema_probe", + "hardness_proxy_used": false, + "inclusion_rationale": "language_coverage:java", + "deterministic_rank": "sha256:129ec535e675a727eec91d19846c71ddd56624c4e99a2e7bc930827dfe3b08d1" + }, + { + "instance_id": "SWE-PolyBench__javascript__maintenance__bugfix__9b08d665", + "original_inst_id": "prettier__prettier-12930", + "source": "Poly", + "language": "javascript", + "repo": "prettier/prettier", + "repo_url": "https://github.com/prettier/prettier.git", + "base_commit": "3147244f55e6e0aafd2cc6fa5875f7c8cfa8e4e3", + "problem_statement_ref": "dataset_field:problem_statement", + "problem_statement_hash": "sha256:c28783e14d2a03daebe39c368b1a4bc2a281a629badf57746684e57d16faa656", + "gold_context_ref": "dataset_field:gold_context", + "gold_context_hash": "sha256:5565a443e984483bcfd2e65fc18ba8acfa1af2176f523647b84c9f19f9245fd1", + "patch_hash": "sha256:4141d24f17271be80e4d04c1303edd299d9a3fa5c3077de0b8284ca440d90133", + "test_patch_hash": "sha256:be65001c46d6368bd30f8b47116d7154aea7bc687141154d0c256ca4eddb4661", + "f2p_hash": "sha256:95f839d90081354c8aaeb83ce45adbab096979079382b1c6ca9eee793b3af603", + "p2p_hash": "sha256:ff7047d6d9118ad4b105615d0dadb38c2a7ab0ed14e6ff880f1ce8b6b05da4e9", + "gold_context_span_count": 7, + "hash_canonicalization_version": "contextbench-canonical-json-lf-v1", + "hardness_signal_status": "unavailable_in_contextbench_verified_schema", + "hardness_signal_source": "dataset_schema_probe", + "hardness_proxy_used": false, + "inclusion_rationale": "language_coverage:javascript", + "deterministic_rank": "sha256:0029073e63edd541c2c8d76e7eb11355f3089bbcbd245968ff19b044671f8fea" + }, + { + "instance_id": "SWE-Bench-Pro__python__maintenance__bugfix__942d0b14", + "original_inst_id": "instance_ansible__ansible-4c5ce5a1a9e79a845aff4978cfeb72a0d4ecf7d6-v1055803c3a812189a1133297f7f5468579283f86", + "source": "Pro", + "language": "python", + "repo": "ansible/ansible", + "repo_url": "https://github.com/ansible/ansible.git", + "base_commit": "8a175f59c939ca29ad56f3fa9edbc37a8656879a", + "problem_statement_ref": "dataset_field:problem_statement", + "problem_statement_hash": "sha256:614c79e3d7e231b12535d28984391d3928dc855e501a5219c34fd432ae357d1b", + "gold_context_ref": "dataset_field:gold_context", + "gold_context_hash": "sha256:e723a3ddcf8aafd31cda47da0e994791f688f1bd55fefa2f6c3e30fbf418f10b", + "patch_hash": "sha256:39da004c3b0de60c884343eb65eed8ba4cbaed102b0b0e0db482084634e97315", + "test_patch_hash": "sha256:c38b83ce8113ce9e88954766a457fef81f32171d1406c8b7858c5a683e2630b1", + "f2p_hash": "sha256:e841a8565a45f855aa75d37bd75900dc1ae305aa32cf97a154f7a497ef47bbe7", + "p2p_hash": "sha256:8eeace7feaa6ebab7c9caa0b39a524fb52ba336bc39d28cb374f4c69203a6e5d", + "gold_context_span_count": 38, + "hash_canonicalization_version": "contextbench-canonical-json-lf-v1", + "hardness_signal_status": "unavailable_in_contextbench_verified_schema", + "hardness_signal_source": "dataset_schema_probe", + "hardness_proxy_used": false, + "inclusion_rationale": "language_coverage:python", + "deterministic_rank": "sha256:01ecf67e3162f21cfb345b3953fc150968f3b870f0b7b7246d1c0ee576af3a0a" + }, + { + "instance_id": "Multi-SWE-Bench__rust__maintenance__bugfix__1b6e3d94", + "original_inst_id": "rayon-rs__rayon-986", + "source": "Multi", + "language": "rust", + "repo": "rayon-rs/rayon", + "repo_url": "https://github.com/rayon-rs/rayon.git", + "base_commit": "2de810e97d5ce832ff98023a4a9cf215a86244ea", + "problem_statement_ref": "dataset_field:problem_statement", + "problem_statement_hash": "sha256:d2e0e4c6e073868fc45c3baf8378dd67103f69770ac941ed7e59af8a9625c481", + "gold_context_ref": "dataset_field:gold_context", + "gold_context_hash": "sha256:35d3e3085f7acacc2aef63952a83fc2144cf6df257d6dd5e88374898670e3f33", + "patch_hash": "sha256:761b3732a3a9b95afcf2be239f751b68ad7e0f41ebc12a209b8f3ad4452fd428", + "test_patch_hash": "sha256:a17b3f33f4c0df04b48ebd85e02a2f7583f3d1d3a695686b9fd9bef908c8e366", + "f2p_hash": "sha256:f9e706fe3388368af494dcadd0476b6d6e5480b69c1ea8ebc42f2fb911c2a065", + "p2p_hash": "sha256:2a69fc0a192a55fbeed4b93ff95427df35dceb721c523f27a4b6428ff8aaaa9a", + "gold_context_span_count": 3, + "hash_canonicalization_version": "contextbench-canonical-json-lf-v1", + "hardness_signal_status": "unavailable_in_contextbench_verified_schema", + "hardness_signal_source": "dataset_schema_probe", + "hardness_proxy_used": false, + "inclusion_rationale": "language_coverage:rust", + "deterministic_rank": "sha256:0a3794344aed7de0e0c0025cb6f3928b2d7a3707a61968fbd4c1656047167d50" + }, + { + "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__2bb4ea7a", + "original_inst_id": "mui__material-ui-11451", + "source": "Poly", + "language": "typescript", + "repo": "mui/material-ui", + "repo_url": "https://github.com/mui/material-ui.git", + "base_commit": "04fae47c2a876f38aacfae866d220ddcbb7358ef", + "problem_statement_ref": "dataset_field:problem_statement", + "problem_statement_hash": "sha256:5ceb05d2189ff18b4cbb02093d5e4b1e52bffb08e80122f5d8b3699222415b42", + "gold_context_ref": "dataset_field:gold_context", + "gold_context_hash": "sha256:d77bcd419c081d67e27c620253eaf90ffba372835e306a429e17a4eb860417d8", + "patch_hash": "sha256:15fcc637a90c60bca48396b1bc2d8f6cf14fd759a133cfec08cb3677d20c3321", + "test_patch_hash": "sha256:2ba883d077b80974f3b32b01c6e312a79deac6bb09b5661651815b925417c2b0", + "f2p_hash": "sha256:003a275a398f95298be3292b2ec1ab9784677110f9f0ffca80b240b209b3b6d9", + "p2p_hash": "sha256:0189e0d23712d2134e97dc5796caf6c2a00d6d1b9d80395ecc679ab587871a72", + "gold_context_span_count": 6, + "hash_canonicalization_version": "contextbench-canonical-json-lf-v1", + "hardness_signal_status": "unavailable_in_contextbench_verified_schema", + "hardness_signal_source": "dataset_schema_probe", + "hardness_proxy_used": false, + "inclusion_rationale": "language_coverage:typescript", + "deterministic_rank": "sha256:057eb134ead21d5ff4b6e9bbac2a919bbe31aecf02894ec8144c5d6ed317e08b" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__b5cec962", + "original_inst_id": "sphinx-doc__sphinx-7462", + "source": "Verified", + "language": "python", + "repo": "sphinx-doc/sphinx", + "repo_url": "https://github.com/sphinx-doc/sphinx.git", + "base_commit": "b3e26a6c851133b82b50f4b68b53692076574d13", + "problem_statement_ref": "dataset_field:problem_statement", + "problem_statement_hash": "sha256:d0b3103864192f0222897e7cbb72d6a99e01d1a1517ab5b1415702d6b6d81f44", + "gold_context_ref": "dataset_field:gold_context", + "gold_context_hash": "sha256:3a69df821d9b5c1e3f9b0a66de9573c49d23946cb4fd397d6f60f75c1629bf29", + "patch_hash": "sha256:13cebe1d1a9993e2bbb9707a9c85bf4e98fab0563d8305e446147266f7274fae", + "test_patch_hash": "sha256:1745f2e0dfab31c3d3e871d528b1847969758b3aa457a5aa0145b50f647d29e1", + "f2p_hash": "sha256:ab83c2156bc175d166f39f0b76a83537e7e05796612c81afe0c02ed29e3f6cb7", + "p2p_hash": "sha256:2ea7e8094b6379e26c73114345ab56b377d40eb1d225f90f65457ca6a4f6338f", + "gold_context_span_count": 6, + "hash_canonicalization_version": "contextbench-canonical-json-lf-v1", + "hardness_signal_status": "unavailable_in_contextbench_verified_schema", + "hardness_signal_source": "dataset_schema_probe", + "hardness_proxy_used": false, + "inclusion_rationale": "source_coverage:Verified", + "deterministic_rank": "sha256:0308fa0bca6a4843611607cf2809d2cfae1fcdd535a79d1418c4498eadae7fc6" + }, + { + "instance_id": "Multi-SWE-Bench__javascript__maintenance__bugfix__bd81816f", + "original_inst_id": "sveltejs__svelte-12649", + "source": "Multi", + "language": "javascript", + "repo": "sveltejs/svelte", + "repo_url": "https://github.com/sveltejs/svelte.git", + "base_commit": "e417d3a2d281a5ec9b595be5ffbd47efe57b28c3", + "problem_statement_ref": "dataset_field:problem_statement", + "problem_statement_hash": "sha256:6220f9321fcf00a621f7a25261507877c039185ac5162f390313c4863149d88c", + "gold_context_ref": "dataset_field:gold_context", + "gold_context_hash": "sha256:1c95ff3cf469bad8e25d9880393364f3b4ceef0a490b7bb98363ba89d8117302", + "patch_hash": "sha256:5ea1a6794598dc6febdd72d6057f53798351c0cd818e5807bfc61c55be8ad5d9", + "test_patch_hash": "sha256:463ddfabf4000b974eadf4250aaa4bca2ffba2eacaff1c14ec57440eb217cb1e", + "f2p_hash": "sha256:9774cde059974b3da9225a7e8357248f715f8ce57909d612a84b41249c885a9e", + "p2p_hash": "sha256:5a5f762975164bdbe96da8187f7981ddfb8703d92ff7d788c9947b684d27cbbe", + "gold_context_span_count": 5, + "hash_canonicalization_version": "contextbench-canonical-json-lf-v1", + "hardness_signal_status": "unavailable_in_contextbench_verified_schema", + "hardness_signal_source": "dataset_schema_probe", + "hardness_proxy_used": false, + "inclusion_rationale": "repo_coverage:https://github.com/sveltejs/svelte.git", + "deterministic_rank": "sha256:01cdbd47d54d4edaea4f4e1409d6837a7f1ccc6840eeeaf03b818c4b1ad6c457" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__3d43f489", + "original_inst_id": "sympy__sympy-20801", + "source": "Verified", + "language": "python", + "repo": "sympy/sympy", + "repo_url": "https://github.com/sympy/sympy.git", + "base_commit": "e11d3fed782146eebbffdc9ced0364b223b84b6c", + "problem_statement_ref": "dataset_field:problem_statement", + "problem_statement_hash": "sha256:f86e8f37d24aff7e284d6f7fbdc903525c2175528a1957144165848ed3882611", + "gold_context_ref": "dataset_field:gold_context", + "gold_context_hash": "sha256:ccec0fb4b2b20c1357f5faab16b0c80da8d5ec49063805caef879b6ec2b54432", + "patch_hash": "sha256:dbec818a9a22bcfeaeeb2f292cf7bba7048f84bca05aa04f38870e483ec803d5", + "test_patch_hash": "sha256:f84920fc71c885455f40e439ea052707b7936b1367ba6af67a2a7b9c3f626f9c", + "f2p_hash": "sha256:c4492515480ba65d31e505c3be65f13ac4b0605033cad0eb3af2498d891ca0f4", + "p2p_hash": "sha256:92ceb1b732a10472f9080dcf81a5c97d42979e977ea72ce0d78d74c18928e265", + "gold_context_span_count": 4, + "hash_canonicalization_version": "contextbench-canonical-json-lf-v1", + "hardness_signal_status": "unavailable_in_contextbench_verified_schema", + "hardness_signal_source": "dataset_schema_probe", + "hardness_proxy_used": false, + "inclusion_rationale": "repo_coverage:https://github.com/sympy/sympy.git", + "deterministic_rank": "sha256:04b354066b641959f673366d5562f54869fc105a427dc276397e06f73719617c" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__0b74a2c7", + "original_inst_id": "django__django-15104", + "source": "Verified", + "language": "python", + "repo": "django/django", + "repo_url": "https://github.com/django/django.git", + "base_commit": "a7e7043c8746933dafce652507d3b821801cdc7d", + "problem_statement_ref": "dataset_field:problem_statement", + "problem_statement_hash": "sha256:28a48760ccbbb8e549a66edeae834de8a47f4a34d06ba80e4c929c6ea55626d2", + "gold_context_ref": "dataset_field:gold_context", + "gold_context_hash": "sha256:d5b9e5860d36119df398d4959137f1da593075e87a04bc2ffc2b6f5130f3293f", + "patch_hash": "sha256:9027ea1fe3294c58a5e144b66167851e4aa1c494a2044a1eb0eee61b14fdb28c", + "test_patch_hash": "sha256:a2e79b5018f0df0a14713774c7a23d3e205d50181e90e693b10a082b7e1b09cf", + "f2p_hash": "sha256:64c9ed9b8adf7cea5930cad3e75dc176b2fd9f0d19baab697bb9fedb1ee29845", + "p2p_hash": "sha256:b5d6ced33eda9e014f2c7a58f3c6e3f4f9c44417c15b3b7f3034d1efb37050c2", + "gold_context_span_count": 4, + "hash_canonicalization_version": "contextbench-canonical-json-lf-v1", + "hardness_signal_status": "unavailable_in_contextbench_verified_schema", + "hardness_signal_source": "dataset_schema_probe", + "hardness_proxy_used": false, + "inclusion_rationale": "repo_coverage:https://github.com/django/django.git", + "deterministic_rank": "sha256:069c0ead766ec25d98c701050e07a0e0aa193492aebbe54df3710c483dc581f4" + }, + { + "instance_id": "Multi-SWE-Bench__typescript__maintenance__bugfix__8d2ac221", + "original_inst_id": "vuejs__core-11338", + "source": "Multi", + "language": "typescript", + "repo": "vuejs/core", + "repo_url": "https://github.com/vuejs/core.git", + "base_commit": "314ce82e479dbb33a9281ba8c2ebe288536b32df", + "problem_statement_ref": "dataset_field:problem_statement", + "problem_statement_hash": "sha256:ba71ad0cac694142813a1ca2fd4a48427bae06654cddec160ab5fbe3479c5efe", + "gold_context_ref": "dataset_field:gold_context", + "gold_context_hash": "sha256:8ef471f302a75779fa253b72a32b646bbde912d1609d5ddcce33737bba0e172d", + "patch_hash": "sha256:8a825ebf0547ea7ba0481582a6142e8ac50e35abda621a57330b25860d3dbb27", + "test_patch_hash": "sha256:1c3d96b897df04791477d894ecb483b24b89c2a0747c025cd9972dcf0372c679", + "f2p_hash": "sha256:2da7f2e3aa00743c295a835682f3f3a8d795a9d737705282e50bc79985cd4030", + "p2p_hash": "sha256:d749e04597508014a59354eabfb7d3548e4b46efc1d69611e7eb70a05e3a29b8", + "gold_context_span_count": 1, + "hash_canonicalization_version": "contextbench-canonical-json-lf-v1", + "hardness_signal_status": "unavailable_in_contextbench_verified_schema", + "hardness_signal_source": "dataset_schema_probe", + "hardness_proxy_used": false, + "inclusion_rationale": "repo_coverage:https://github.com/vuejs/core.git", + "deterministic_rank": "sha256:06f257dee6b643131f5e89fd302743e71d08c9f31af73689c9bd8437dac1def3" + }, + { + "instance_id": "SWE-Bench-Verified__python__maintenance__bugfix__497d4650", + "original_inst_id": "matplotlib__matplotlib-26342", + "source": "Verified", + "language": "python", + "repo": "matplotlib/matplotlib", + "repo_url": "https://github.com/matplotlib/matplotlib.git", + "base_commit": "2aee6ccd7c7e1f8d282c1e7579f4ee546b838542", + "problem_statement_ref": "dataset_field:problem_statement", + "problem_statement_hash": "sha256:6f94f5120bf02fab3b081a6316a334c57e3b46ac696c739a3fea9bded03ad6a0", + "gold_context_ref": "dataset_field:gold_context", + "gold_context_hash": "sha256:419a82cda0d53300471284bb3f74091ad296860938a74c1bb8969ff558b37d87", + "patch_hash": "sha256:7b731bb05e85f1103654fc521800c50d4654b2f1c3a175b9c554a3275fe1af91", + "test_patch_hash": "sha256:861b4bbaa78126ed2eb6300cb9e98324d3508af22c7e2fc5a7a0df8bf78e82b5", + "f2p_hash": "sha256:6164581e61a64698a807ba470dfce8cdc4e50fddfb73f0362992a81c607b51ed", + "p2p_hash": "sha256:c7af2ef236d8d0fbbe3a9ef80b4b5537a99e696445a30c3c45494420185d56b4", + "gold_context_span_count": 4, + "hash_canonicalization_version": "contextbench-canonical-json-lf-v1", + "hardness_signal_status": "unavailable_in_contextbench_verified_schema", + "hardness_signal_source": "dataset_schema_probe", + "hardness_proxy_used": false, + "inclusion_rationale": "repo_coverage:https://github.com/matplotlib/matplotlib.git", + "deterministic_rank": "sha256:078219aeba8f8ff60b7dced916a6d15adf436ba45be9bf9569c66002f6f60a3a" + }, + { + "instance_id": "Multi-SWE-Bench__go__maintenance__bugfix__1b8cfbf9", + "original_inst_id": "cli__cli-3608", + "source": "Multi", + "language": "go", + "repo": "cli/cli", + "repo_url": "https://github.com/cli/cli.git", + "base_commit": "026b07d1cfbf23a3ccb4d4703a2496d65e177fcc", + "problem_statement_ref": "dataset_field:problem_statement", + "problem_statement_hash": "sha256:5624c959557e8e711c9a6a87d4bf34b6c129f194037acb62a24f10fd06951aaf", + "gold_context_ref": "dataset_field:gold_context", + "gold_context_hash": "sha256:285ac1fa7b8db77f5d3366c32022c2eb75f6d2eeb819038f9caf8d716e6a176c", + "patch_hash": "sha256:2055d5b9a01c5dc4920aeff6b78792051d42a21216f3c8541732f918b5dcd80a", + "test_patch_hash": "sha256:bccdb11384ba8309fef4c4b8dd3e9bc00b1c9ba1488574cb66054c30a67664f2", + "f2p_hash": "sha256:4f53cda18c2baa0c0354bb5f9a3ecbe5ed12ab4d8e11ba873c2f11161202b945", + "p2p_hash": "sha256:72c0895e3ceaa0ce85aab728fd108cb723d5a102d594faa849b47d6fef613e53", + "gold_context_span_count": 6, + "hash_canonicalization_version": "contextbench-canonical-json-lf-v1", + "hardness_signal_status": "unavailable_in_contextbench_verified_schema", + "hardness_signal_source": "dataset_schema_probe", + "hardness_proxy_used": false, + "inclusion_rationale": "repo_coverage:https://github.com/cli/cli.git", + "deterministic_rank": "sha256:081b5e11e2f16c4b99a8a7ec6e1a60b947ebc620ce615491fd7ebf084471e0c5" + }, + { + "instance_id": "SWE-PolyBench__python__maintenance__bugfix__ed58622a", + "original_inst_id": "Significant-Gravitas__AutoGPT-4652", + "source": "Poly", + "language": "python", + "repo": "Significant-Gravitas/AutoGPT", + "repo_url": "https://github.com/Significant-Gravitas/AutoGPT.git", + "base_commit": "9150f32f8b8602395534795ddd2d930a1684e419", + "problem_statement_ref": "dataset_field:problem_statement", + "problem_statement_hash": "sha256:4f0e49579b8f6426e62ab322a02bdbc6cfaaad98b3a68dbbcf9494eda624cf74", + "gold_context_ref": "dataset_field:gold_context", + "gold_context_hash": "sha256:e4690cc62e797a387c5ab6e9fda4da333ed9655dddbdd16feefe6bf10409ead3", + "patch_hash": "sha256:a15c0cce56ab9684014680e81e12a2452691ba7dd9a074abf9e1c147b8db544b", + "test_patch_hash": "sha256:fd72dac6d058feaff76f7bcfdd0deb5585cc7d4f2a82b0c9aa7f69807efd08cf", + "f2p_hash": "sha256:964f21451fc284a0a0b4dc07480f2b561be418cd15df6c8b836f741db2a12fb2", + "p2p_hash": "sha256:4f53cda18c2baa0c0354bb5f9a3ecbe5ed12ab4d8e11ba873c2f11161202b945", + "gold_context_span_count": 4, + "hash_canonicalization_version": "contextbench-canonical-json-lf-v1", + "hardness_signal_status": "unavailable_in_contextbench_verified_schema", + "hardness_signal_source": "dataset_schema_probe", + "hardness_proxy_used": false, + "inclusion_rationale": "repo_coverage:https://github.com/Significant-Gravitas/AutoGPT.git", + "deterministic_rank": "sha256:0891fa88d9469a199cd07fe9ab642933bf43e31eac4170c1593db85b182998a8" + }, + { + "instance_id": "SWE-PolyBench__typescript__maintenance__bugfix__42165c4e", + "original_inst_id": "coder__code-server-6278", + "source": "Poly", + "language": "typescript", + "repo": "coder/code-server", + "repo_url": "https://github.com/coder/code-server.git", + "base_commit": "5d3c9edce436d11d51aa1e586c11eaa49d626dc2", + "problem_statement_ref": "dataset_field:problem_statement", + "problem_statement_hash": "sha256:700bd6272bcb0d96b6c9a21deb8f955b4fa5859f9ce2ae1c70bc9b1c9a5f498a", + "gold_context_ref": "dataset_field:gold_context", + "gold_context_hash": "sha256:4c8b070a0e8a04edbcec642c760e80758d27038332444f30ae1fcd01d3d7be22", + "patch_hash": "sha256:48fd1ab073d134e7ed2466022d84f4bff72ae861191250834b275a0ee31d2101", + "test_patch_hash": "sha256:8b5aa18dc79a9236b871b3885cee9b9409084d731b157e6c7f73ca3a1abf7de4", + "f2p_hash": "sha256:a417b6820ece6a95fb17b3f23a1543407f47bc09112776ff4b6616c5ad849941", + "p2p_hash": "sha256:eaa6a203414a673aacfb010a35c637a4b4b29b62705464c41e436b7ed07abfd9", + "gold_context_span_count": 8, + "hash_canonicalization_version": "contextbench-canonical-json-lf-v1", + "hardness_signal_status": "unavailable_in_contextbench_verified_schema", + "hardness_signal_source": "dataset_schema_probe", + "hardness_proxy_used": false, + "inclusion_rationale": "repo_coverage:https://github.com/coder/code-server.git", + "deterministic_rank": "sha256:09ce27ec3327da069ae81a329ef028e8d7234ff82d8b3534a3e8e02316931bb2" + }, + { + "instance_id": "Multi-SWE-Bench__javascript__maintenance__bugfix__72488b59", + "original_inst_id": "iamkun__dayjs-734", + "source": "Multi", + "language": "javascript", + "repo": "iamkun/dayjs", + "repo_url": "https://github.com/iamkun/dayjs.git", + "base_commit": "9ad2e47e0569b23991bb0d5578f49c792c12df08", + "problem_statement_ref": "dataset_field:problem_statement", + "problem_statement_hash": "sha256:0da2caeed51e7052d0def67531b090f806a40bd05607b992335156d2ebcd26b7", + "gold_context_ref": "dataset_field:gold_context", + "gold_context_hash": "sha256:277283406441212773856bb86437ef9f010edd8330673b808a4355b87f946f3b", + "patch_hash": "sha256:ee9a68adab2e0bcb376b04f28f8b04f892a565104dd6470f46c6e14f0a3da7b9", + "test_patch_hash": "sha256:b3f11ed93a41fb1c9b79adc890c9a9f5353423396b31b6301219ed2ed3c64dfc", + "f2p_hash": "sha256:25489d127ba8e86e07e3f57d792aa12dbd1e5bf4930fcf96ce16bb35dd0f2dbe", + "p2p_hash": "sha256:3e558fdeb2b03a062ddf7384221784371658fcac6f5be29b01da586ca229d0e3", + "gold_context_span_count": 2, + "hash_canonicalization_version": "contextbench-canonical-json-lf-v1", + "hardness_signal_status": "unavailable_in_contextbench_verified_schema", + "hardness_signal_source": "dataset_schema_probe", + "hardness_proxy_used": false, + "inclusion_rationale": "repo_coverage:https://github.com/iamkun/dayjs.git", + "deterministic_rank": "sha256:0b9a0ab545b454b9e02de1b980bd114e80ee942edd577ca33c23af692b093eb2" + }, + { + "instance_id": "SWE-PolyBench__python__maintenance__bugfix__3bb9721e", + "original_inst_id": "huggingface__transformers-23796", + "source": "Poly", + "language": "python", + "repo": "huggingface/transformers", + "repo_url": "https://github.com/huggingface/transformers.git", + "base_commit": "de9255de27abfcae4a1f816b904915f0b1e23cd9", + "problem_statement_ref": "dataset_field:problem_statement", + "problem_statement_hash": "sha256:54872da2cbad7c4d2ffa8f27589c9512bfe330ec39e3ec932813b5bee6d14645", + "gold_context_ref": "dataset_field:gold_context", + "gold_context_hash": "sha256:47e235cec8e62d0200b74b1960f2ff7a403abe405c6d686f6e947d65eb7c0032", + "patch_hash": "sha256:d616ac16ea924be75d0c2dbf02ef0d9def6911470d12745df9f864f84385bd6a", + "test_patch_hash": "sha256:2a6f350f2825586d6926a91ee0d22552ace8745e0f305e1c283258e09a356603", + "f2p_hash": "sha256:bd8fee5f231e391541a7a8a10394a21a982426f51fb2b603ffb9b9f2d235c846", + "p2p_hash": "sha256:d54b2c97284fef4f7d3400bb4cb2344ad1ad995272c2b09b8c4eb512c760dc80", + "gold_context_span_count": 8, + "hash_canonicalization_version": "contextbench-canonical-json-lf-v1", + "hardness_signal_status": "unavailable_in_contextbench_verified_schema", + "hardness_signal_source": "dataset_schema_probe", + "hardness_proxy_used": false, + "inclusion_rationale": "repo_coverage:https://github.com/huggingface/transformers.git", + "deterministic_rank": "sha256:0c5ec70f387858a230cec51047ebc2f8de9823bf61f55799a644c3d0486e53ec" + }, + { + "instance_id": "SWE-PolyBench__python__maintenance__bugfix__b110ef6c", + "original_inst_id": "keras-team__keras-19924", + "source": "Poly", + "language": "python", + "repo": "keras-team/keras", + "repo_url": "https://github.com/keras-team/keras.git", + "base_commit": "a2e9a5252d2eab389bd19d359e6e7325a8232c79", + "problem_statement_ref": "dataset_field:problem_statement", + "problem_statement_hash": "sha256:0675761ca6753399284ab8041ea0634562fe10ce3d0eba258249dc72ae013665", + "gold_context_ref": "dataset_field:gold_context", + "gold_context_hash": "sha256:e09351f3bbb210a57eb89e1ba6b2e64e4960c893a6963dec425dd9f7bc883704", + "patch_hash": "sha256:4ebb5fda593d647f505895617ae10d75701159aa81c3495015761ff7e0580a14", + "test_patch_hash": "sha256:b80e72f00e0bfca54f32bc74dbfb55337a92aa6a8c186ea223336f5b294e33c8", + "f2p_hash": "sha256:b94f4142820f1e2f1e49b4eee09cc93f438099890599993215f8f898f612bb8d", + "p2p_hash": "sha256:470b4ab32dbfcd5d30e849a9929326582d083a98cd88135867ccef1d85c04056", + "gold_context_span_count": 14, + "hash_canonicalization_version": "contextbench-canonical-json-lf-v1", + "hardness_signal_status": "unavailable_in_contextbench_verified_schema", + "hardness_signal_source": "dataset_schema_probe", + "hardness_proxy_used": false, + "inclusion_rationale": "repo_coverage:https://github.com/keras-team/keras.git", + "deterministic_rank": "sha256:0ce807d76cad534312c24b67fbc1971f55a13237477ac46b0ffe1a2c707b96da" + } + ], + "manifest_hash": "sha256:1d30ee64bc3e0cb385d7bb76d58f5f9c21da3f93e69019067596111dedc16848" +} diff --git a/tests/impact-2hop.test.ts b/tests/impact-2hop.test.ts index cf1f84f..010499e 100644 --- a/tests/impact-2hop.test.ts +++ b/tests/impact-2hop.test.ts @@ -15,6 +15,8 @@ import { RELATIONSHIPS_FILENAME } from '../src/constants/codebase-context.js'; +const SLOW_WINDOWS_TEST_TIMEOUT_MS = 60000; + vi.mock('../src/core/reranker.js', () => ({ rerank: vi.fn(async (_query: string, results: unknown) => results), getRerankerStatus: vi.fn(() => 'fallback'), @@ -127,5 +129,5 @@ describe('Impact candidates (2-hop)', () => { `Expected hop 2 candidate src/a.ts, got impact.details=${JSON.stringify(details)}` ); } - }, 30000); + }, SLOW_WINDOWS_TEST_TIMEOUT_MS); }); diff --git a/tests/search-compact-mode.test.ts b/tests/search-compact-mode.test.ts index 92f0327..c4d573c 100644 --- a/tests/search-compact-mode.test.ts +++ b/tests/search-compact-mode.test.ts @@ -50,6 +50,8 @@ function parseSearchResponse(text: string): SearchResponse { return JSON.parse(text) as SearchResponse; } +const SLOW_WINDOWS_TEST_TIMEOUT_MS = 60000; + describe('search_codebase compact/full mode', () => { let tempRoot: string | null = null; let originalArgv: string[] | null = null; @@ -572,7 +574,7 @@ describe('search_codebase compact/full mode', () => { expect(results[0].filePath).toBe(actualChunk.filePath); expect(results[0].imports).toEqual(actualChunk.imports); expect(results[0].exports).toEqual(actualChunk.exports); - }, 30000); + }, SLOW_WINDOWS_TEST_TIMEOUT_MS); it('adds a warning only when the final full payload exceeds the compact budget threshold', async () => { const oversizedSummary = 'Token-heavy summary '.repeat(1200); diff --git a/tests/search-decision-card.test.ts b/tests/search-decision-card.test.ts index d99b4c7..c6d77ae 100644 --- a/tests/search-decision-card.test.ts +++ b/tests/search-decision-card.test.ts @@ -40,6 +40,8 @@ type ToolCallResponse = { isError?: boolean; }; +const SLOW_WINDOWS_TEST_TIMEOUT_MS = 60000; + function getToolCallHandler( server: unknown ): (request: ToolCallRequest) => Promise { @@ -153,7 +155,7 @@ export class ProfileService { config: { skipEmbedding: true } }); await indexer.index(); - }, 30000); + }, SLOW_WINDOWS_TEST_TIMEOUT_MS); afterEach(async () => { if (originalArgv) { @@ -170,7 +172,7 @@ export class ProfileService { await rmWithRetries(tempRoot); tempRoot = null; } - }, 30000); + }, SLOW_WINDOWS_TEST_TIMEOUT_MS); it('intent="edit" with multiple results returns full decision card with ready field', async () => { if (!tempRoot) throw new Error('tempRoot not initialized'); @@ -207,7 +209,7 @@ export class ProfileService { } expect(preflight.ready).toBeDefined(); expect(typeof preflight.ready).toBe('boolean'); - }, 30000); + }, SLOW_WINDOWS_TEST_TIMEOUT_MS); it('decision card has all expected fields when returned', async () => { if (!tempRoot) throw new Error('tempRoot not initialized'); @@ -259,7 +261,7 @@ export class ProfileService { if (preflight.whatWouldHelp) { expect(Array.isArray(preflight.whatWouldHelp)).toBe(true); } - }, 30000); + }, SLOW_WINDOWS_TEST_TIMEOUT_MS); it('intent="explore" returns lightweight preflight', async () => { if (!tempRoot) throw new Error('tempRoot not initialized'); @@ -290,7 +292,7 @@ export class ProfileService { expect(typeof preflight.ready).toBe('boolean'); // Should NOT have full decision card fields for explore } - }, 30000); + }, SLOW_WINDOWS_TEST_TIMEOUT_MS); it('includes snippet field when includeSnippets=true', async () => { if (!tempRoot) throw new Error('tempRoot not initialized'); @@ -321,7 +323,7 @@ export class ProfileService { // At least some results should have a snippet const withSnippets = parsed.results.filter((result) => result.snippet); expect(withSnippets.length).toBeGreaterThan(0); - }, 30000); + }, SLOW_WINDOWS_TEST_TIMEOUT_MS); it('does not include snippet field when includeSnippets=false', async () => { if (!tempRoot) throw new Error('tempRoot not initialized'); @@ -350,7 +352,7 @@ export class ProfileService { parsed.results.forEach((result) => { expect(result.snippet).toBeUndefined(); }); - }, 30000); + }, SLOW_WINDOWS_TEST_TIMEOUT_MS); it('scope header starts snippet when includeSnippets=true', async () => { if (!tempRoot) throw new Error('tempRoot not initialized'); @@ -381,5 +383,5 @@ export class ProfileService { const firstLine = withSnippet.snippet.split('\n')[0].trim(); expect(firstLine).toMatch(/^\/\//); } - }, 30000); + }, SLOW_WINDOWS_TEST_TIMEOUT_MS); }); diff --git a/tests/search-snippets.test.ts b/tests/search-snippets.test.ts index 4b387ed..01aa59d 100644 --- a/tests/search-snippets.test.ts +++ b/tests/search-snippets.test.ts @@ -11,6 +11,8 @@ vi.mock('../src/core/reranker.js', () => ({ isAmbiguous: vi.fn(() => false) })); +const SLOW_WINDOWS_TEST_TIMEOUT_MS = 60000; + describe('Search Snippets with Scope Headers', () => { let tempRoot: string | null = null; @@ -98,7 +100,7 @@ export const VERSION = '1.0.0'; config: { skipEmbedding: true } }); await indexer.index(); - }, 30000); + }, SLOW_WINDOWS_TEST_TIMEOUT_MS); afterEach(async () => { if (tempRoot) { @@ -106,7 +108,7 @@ export const VERSION = '1.0.0'; tempRoot = null; } delete process.env.CODEBASE_ROOT; - }, 30000); + }, SLOW_WINDOWS_TEST_TIMEOUT_MS); it('returns snippets when includeSnippets=true', async () => { if (!tempRoot) throw new Error('tempRoot not initialized'); @@ -136,7 +138,7 @@ export const VERSION = '1.0.0'; const withSnippets = parsed.results.filter((r: any) => r.snippet); expect(withSnippets.length).toBeGreaterThan(0); - }, 30000); + }, SLOW_WINDOWS_TEST_TIMEOUT_MS); it('scope header is a comment line starting with //', async () => { if (!tempRoot) throw new Error('tempRoot not initialized');