diff --git a/scientific-bounty-provenance-guard/README.md b/scientific-bounty-provenance-guard/README.md new file mode 100644 index 00000000..49013e2b --- /dev/null +++ b/scientific-bounty-provenance-guard/README.md @@ -0,0 +1,41 @@ +# Scientific Bounty Provenance Guard + +This module adds a solver-side provenance and duplicate-submission guard for a +scientific bounty marketplace. It is intentionally separate from intake, +scoring, arbitration, escrow, IP transfer, export-control, and accessibility +workflows already represented by other issue #18 submissions. + +## Purpose + +Scientific bounty sponsors need a lightweight gate before payout review begins: + +- verify each submission declares artifact hashes and dataset/license provenance; +- identify exact artifact reuse across different solver teams; +- flag near-duplicate abstracts that may indicate derivative or recycled work; +- produce deterministic release, revise, or hold recommendations for reviewers. + +The guard does not process payments, store private data, call external APIs, or +make legal conclusions. It only prepares a reproducible reviewer packet. + +## Usage + +```bash +npm test +npm run demo +``` + +## Decision Model + +- `release`: submission has complete provenance and no duplicate signal. +- `revise`: submission can proceed after missing hash/license/repro notes are fixed. +- `hold`: duplicate hashes or near-duplicate scientific rationale require review + before payout, IP transfer, or sponsor acceptance. + +## Files + +- `src/index.js`: guard implementation. +- `test/provenanceGuard.test.js`: deterministic Node test suite. +- `examples/submissions.json`: sample challenge packet. +- `artifacts/reviewer-report.md`: reviewer-facing demo report. +- `artifacts/summary.svg` and `artifacts/summary.png`: visual summary artifacts. +- `artifacts/demo.mp4`: short demo video for bounty review. diff --git a/scientific-bounty-provenance-guard/artifacts/demo.mp4 b/scientific-bounty-provenance-guard/artifacts/demo.mp4 new file mode 100644 index 00000000..39a3abd6 Binary files /dev/null and b/scientific-bounty-provenance-guard/artifacts/demo.mp4 differ diff --git a/scientific-bounty-provenance-guard/artifacts/reviewer-report.md b/scientific-bounty-provenance-guard/artifacts/reviewer-report.md new file mode 100644 index 00000000..84b5872f --- /dev/null +++ b/scientific-bounty-provenance-guard/artifacts/reviewer-report.md @@ -0,0 +1,30 @@ +# Provenance Review Report + +Challenge: sci-bounty-demo-001 +Generated: 2026-06-05T23:00:00Z + +## Summary + +- Release: 0 +- Revise: 1 +- Hold: 2 + +## Decisions + +### sub-clean-rainfall + +- Team: atlas-lab +- Decision: hold +- Reasons: duplicate or near-duplicate signal requires reviewer triage + +### sub-copied-rainfall + +- Team: mirror-lab +- Decision: hold +- Reasons: duplicate or near-duplicate signal requires reviewer triage + +### sub-incomplete-biomarker + +- Team: cell-lab +- Decision: revise +- Reasons: missing reproducibility note; missing sha256 for biomarkers/rankings.csv; missing license for biomarkers/rankings.csv diff --git a/scientific-bounty-provenance-guard/artifacts/summary.png b/scientific-bounty-provenance-guard/artifacts/summary.png new file mode 100644 index 00000000..da9a3eb0 Binary files /dev/null and b/scientific-bounty-provenance-guard/artifacts/summary.png differ diff --git a/scientific-bounty-provenance-guard/artifacts/summary.svg b/scientific-bounty-provenance-guard/artifacts/summary.svg new file mode 100644 index 00000000..c0c60087 --- /dev/null +++ b/scientific-bounty-provenance-guard/artifacts/summary.svg @@ -0,0 +1,20 @@ + + + + Scientific Bounty Provenance Guard + Pre-payout duplicate and evidence provenance triage for solver submissions + + + Release + 0 clean + + Revise + 1 incomplete + + Hold + 2 duplicate + + Guard checks + artifact sha256, dataset license, reproducibility note, cross-team hash reuse, near-duplicate rationale + No payment processing, private data, credentials, external APIs, or legal conclusions. + diff --git a/scientific-bounty-provenance-guard/examples/submissions.json b/scientific-bounty-provenance-guard/examples/submissions.json new file mode 100644 index 00000000..4efa0291 --- /dev/null +++ b/scientific-bounty-provenance-guard/examples/submissions.json @@ -0,0 +1,47 @@ +{ + "challengeId": "sci-bounty-demo-001", + "generatedAt": "2026-06-05T23:00:00Z", + "submissions": [ + { + "id": "sub-clean-rainfall", + "teamId": "atlas-lab", + "title": "Regional rainfall model", + "abstract": "Forecast regional rainfall with an open climate dataset and documented metrics.", + "reproducibilityNote": "Run notebook.ipynb with seed 42 and compare metrics.json.", + "artifacts": [ + { + "path": "rainfall/model-card.md", + "sha256": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "license": "CC-BY-4.0", + "dataset": "open-climate-demo" + } + ] + }, + { + "id": "sub-copied-rainfall", + "teamId": "mirror-lab", + "title": "Regional rainfall model", + "abstract": "Forecast regional rainfall with an open climate dataset and documented metrics.", + "reproducibilityNote": "Run notebook.ipynb with seed 42 and compare metrics.json.", + "artifacts": [ + { + "path": "copy/model-card.md", + "sha256": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "license": "CC-BY-4.0", + "dataset": "open-climate-demo" + } + ] + }, + { + "id": "sub-incomplete-biomarker", + "teamId": "cell-lab", + "title": "Biomarker screen", + "abstract": "Rank candidate biomarkers for a single-cell RNA sequencing challenge.", + "artifacts": [ + { + "path": "biomarkers/rankings.csv" + } + ] + } + ] +} diff --git a/scientific-bounty-provenance-guard/package.json b/scientific-bounty-provenance-guard/package.json new file mode 100644 index 00000000..191040fb --- /dev/null +++ b/scientific-bounty-provenance-guard/package.json @@ -0,0 +1,10 @@ +{ + "name": "scientific-bounty-provenance-guard", + "version": "1.0.0", + "private": true, + "type": "module", + "scripts": { + "test": "node --test test/*.test.js", + "demo": "node scripts/demo.js" + } +} diff --git a/scientific-bounty-provenance-guard/scripts/demo.js b/scientific-bounty-provenance-guard/scripts/demo.js new file mode 100644 index 00000000..3ea77f81 --- /dev/null +++ b/scientific-bounty-provenance-guard/scripts/demo.js @@ -0,0 +1,9 @@ +import { readFile } from "node:fs/promises"; +import { evaluateSubmissionProvenance, renderReviewerReport } from "../src/index.js"; + +const packet = JSON.parse( + await readFile(new URL("../examples/submissions.json", import.meta.url), "utf8") +); +const result = evaluateSubmissionProvenance(packet); + +console.log(renderReviewerReport(result)); diff --git a/scientific-bounty-provenance-guard/src/index.js b/scientific-bounty-provenance-guard/src/index.js new file mode 100644 index 00000000..06121e6a --- /dev/null +++ b/scientific-bounty-provenance-guard/src/index.js @@ -0,0 +1,176 @@ +import { createHash } from "node:crypto"; + +const HOLD_SIMILARITY_THRESHOLD = 0.82; + +export function normalizeText(value) { + return String(value ?? "") + .toLowerCase() + .replace(/[^a-z0-9]+/g, " ") + .trim() + .replace(/\s+/g, " "); +} + +export function tokenize(value) { + const normalized = normalizeText(value); + return normalized ? new Set(normalized.split(" ")) : new Set(); +} + +export function jaccardSimilarity(left, right) { + const a = tokenize(left); + const b = tokenize(right); + + if (a.size === 0 && b.size === 0) { + return 1; + } + + const intersection = [...a].filter((token) => b.has(token)).length; + const union = new Set([...a, ...b]).size; + return intersection / union; +} + +export function hashArtifactDescriptor(artifact) { + const descriptor = JSON.stringify({ + path: artifact.path ?? "", + sha256: artifact.sha256 ?? "", + license: artifact.license ?? "", + dataset: artifact.dataset ?? "" + }); + + return createHash("sha256").update(descriptor).digest("hex"); +} + +function findArtifactIssues(submission) { + const issues = []; + + if (!submission.reproducibilityNote) { + issues.push("missing reproducibility note"); + } + + for (const artifact of submission.artifacts ?? []) { + if (!artifact.sha256) { + issues.push(`missing sha256 for ${artifact.path ?? "artifact"}`); + } + + if (!artifact.license) { + issues.push(`missing license for ${artifact.path ?? "artifact"}`); + } + } + + if ((submission.artifacts ?? []).length === 0) { + issues.push("no artifacts declared"); + } + + return issues; +} + +function findDuplicateSignals(submissions) { + const signals = []; + + for (let i = 0; i < submissions.length; i += 1) { + for (let j = i + 1; j < submissions.length; j += 1) { + const left = submissions[i]; + const right = submissions[j]; + + if (left.teamId === right.teamId) { + continue; + } + + const leftHashes = new Set((left.artifacts ?? []).map((artifact) => artifact.sha256).filter(Boolean)); + const rightHashes = new Set((right.artifacts ?? []).map((artifact) => artifact.sha256).filter(Boolean)); + const sharedHashes = [...leftHashes].filter((hash) => rightHashes.has(hash)); + const similarity = jaccardSimilarity(`${left.title} ${left.abstract}`, `${right.title} ${right.abstract}`); + + if (sharedHashes.length > 0 || similarity >= HOLD_SIMILARITY_THRESHOLD) { + signals.push({ + leftId: left.id, + rightId: right.id, + sharedHashes, + similarity: Number(similarity.toFixed(3)) + }); + } + } + } + + return signals; +} + +export function evaluateSubmissionProvenance(packet) { + const submissions = packet.submissions ?? []; + const duplicateSignals = findDuplicateSignals(submissions); + const heldIds = new Set(duplicateSignals.flatMap((signal) => [signal.leftId, signal.rightId])); + + const decisions = submissions.map((submission) => { + const issues = findArtifactIssues(submission); + const artifactDescriptorHashes = (submission.artifacts ?? []).map(hashArtifactDescriptor); + + if (heldIds.has(submission.id)) { + return { + submissionId: submission.id, + teamId: submission.teamId, + decision: "hold", + reasons: ["duplicate or near-duplicate signal requires reviewer triage", ...issues], + artifactDescriptorHashes + }; + } + + if (issues.length > 0) { + return { + submissionId: submission.id, + teamId: submission.teamId, + decision: "revise", + reasons: issues, + artifactDescriptorHashes + }; + } + + return { + submissionId: submission.id, + teamId: submission.teamId, + decision: "release", + reasons: ["provenance complete and no duplicate signal found"], + artifactDescriptorHashes + }; + }); + + return { + challengeId: packet.challengeId, + generatedAt: packet.generatedAt ?? "demo", + duplicateSignals, + decisions, + summary: { + release: decisions.filter((decision) => decision.decision === "release").length, + revise: decisions.filter((decision) => decision.decision === "revise").length, + hold: decisions.filter((decision) => decision.decision === "hold").length + } + }; +} + +export function renderReviewerReport(result) { + const lines = [ + `# Provenance Review Report`, + ``, + `Challenge: ${result.challengeId}`, + `Generated: ${result.generatedAt}`, + ``, + `## Summary`, + ``, + `- Release: ${result.summary.release}`, + `- Revise: ${result.summary.revise}`, + `- Hold: ${result.summary.hold}`, + ``, + `## Decisions` + ]; + + for (const decision of result.decisions) { + lines.push( + ``, + `### ${decision.submissionId}`, + ``, + `- Team: ${decision.teamId}`, + `- Decision: ${decision.decision}`, + `- Reasons: ${decision.reasons.join("; ")}` + ); + } + + return `${lines.join("\n")}\n`; +} diff --git a/scientific-bounty-provenance-guard/test/provenanceGuard.test.js b/scientific-bounty-provenance-guard/test/provenanceGuard.test.js new file mode 100644 index 00000000..e209695f --- /dev/null +++ b/scientific-bounty-provenance-guard/test/provenanceGuard.test.js @@ -0,0 +1,135 @@ +import test from "node:test"; +import assert from "node:assert/strict"; +import { + evaluateSubmissionProvenance, + jaccardSimilarity, + renderReviewerReport +} from "../src/index.js"; + +const completeArtifact = { + path: "results/model-card.md", + sha256: "a".repeat(64), + license: "CC-BY-4.0", + dataset: "open-climate-demo" +}; + +test("complete unique submissions are released", () => { + const result = evaluateSubmissionProvenance({ + challengeId: "challenge-clean", + submissions: [ + { + id: "sub-1", + teamId: "team-a", + title: "Regional rainfall model", + abstract: "Forecast regional rainfall using a documented open dataset.", + reproducibilityNote: "Run notebook.ipynb with seed 42.", + artifacts: [completeArtifact] + } + ] + }); + + assert.equal(result.summary.release, 1); + assert.equal(result.decisions[0].decision, "release"); +}); + +test("missing artifact provenance requests revision", () => { + const result = evaluateSubmissionProvenance({ + challengeId: "challenge-revise", + submissions: [ + { + id: "sub-2", + teamId: "team-b", + title: "Biomarker screen", + abstract: "Rank candidate biomarkers for single-cell analysis.", + artifacts: [{ path: "rankings.csv" }] + } + ] + }); + + assert.equal(result.summary.revise, 1); + assert.equal(result.decisions[0].decision, "revise"); + assert.match(result.decisions[0].reasons.join(" "), /missing sha256/); + assert.match(result.decisions[0].reasons.join(" "), /missing license/); +}); + +test("shared artifact hashes across teams put both submissions on hold", () => { + const result = evaluateSubmissionProvenance({ + challengeId: "challenge-duplicate-hash", + submissions: [ + { + id: "sub-3", + teamId: "team-c", + title: "Noise-reduction prototype", + abstract: "Prototype for quantum noise reduction with artifact evidence.", + reproducibilityNote: "Run demo.js.", + artifacts: [completeArtifact] + }, + { + id: "sub-4", + teamId: "team-d", + title: "Independent quantum denoising prototype", + abstract: "Separate writeup but reuses the same artifact hash.", + reproducibilityNote: "Run demo.js.", + artifacts: [{ ...completeArtifact, path: "copied/model-card.md" }] + } + ] + }); + + assert.equal(result.summary.hold, 2); + assert.equal(result.duplicateSignals.length, 1); + assert.deepEqual(result.duplicateSignals[0].sharedHashes, [completeArtifact.sha256]); +}); + +test("near-duplicate scientific rationale is held for reviewer triage", () => { + const similarity = jaccardSimilarity( + "Open climate benchmark with regional rainfall model and transparent metrics", + "Open climate benchmark with regional rainfall model and transparent metrics" + ); + + const result = evaluateSubmissionProvenance({ + challengeId: "challenge-near-duplicate", + submissions: [ + { + id: "sub-5", + teamId: "team-e", + title: "Open climate benchmark", + abstract: "Regional rainfall model with transparent benchmark metrics.", + reproducibilityNote: "Run notebook.", + artifacts: [{ ...completeArtifact, sha256: "b".repeat(64) }] + }, + { + id: "sub-6", + teamId: "team-f", + title: "Open climate benchmark", + abstract: "Regional rainfall model with transparent benchmark metrics.", + reproducibilityNote: "Run notebook.", + artifacts: [{ ...completeArtifact, sha256: "c".repeat(64) }] + } + ] + }); + + assert.equal(similarity, 1); + assert.equal(result.summary.hold, 2); +}); + +test("reviewer report renders deterministic summary", () => { + const result = evaluateSubmissionProvenance({ + challengeId: "challenge-report", + generatedAt: "2026-06-05T23:00:00Z", + submissions: [ + { + id: "sub-7", + teamId: "team-g", + title: "Clean submission", + abstract: "Complete scientific packet.", + reproducibilityNote: "Run npm test.", + artifacts: [completeArtifact] + } + ] + }); + const report = renderReviewerReport(result); + + assert.match(report, /Challenge: challenge-report/); + assert.match(report, /Release: 1/); + assert.match(report, /Decision: release/); +});