Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions scientific-bounty-provenance-guard/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Scientific Bounty Provenance Guard

This module adds a solver-side provenance and duplicate-submission guard for a
scientific bounty marketplace. It is intentionally separate from intake,
scoring, arbitration, escrow, IP transfer, export-control, and accessibility
workflows already represented by other issue #18 submissions.

## Purpose

Scientific bounty sponsors need a lightweight gate before payout review begins:

- verify each submission declares artifact hashes and dataset/license provenance;
- identify exact artifact reuse across different solver teams;
- flag near-duplicate abstracts that may indicate derivative or recycled work;
- produce deterministic release, revise, or hold recommendations for reviewers.

The guard does not process payments, store private data, call external APIs, or
make legal conclusions. It only prepares a reproducible reviewer packet.

## Usage

```bash
npm test
npm run demo
```

## Decision Model

- `release`: submission has complete provenance and no duplicate signal.
- `revise`: submission can proceed after missing hash/license/repro notes are fixed.
- `hold`: duplicate hashes or near-duplicate scientific rationale require review
before payout, IP transfer, or sponsor acceptance.

## Files

- `src/index.js`: guard implementation.
- `test/provenanceGuard.test.js`: deterministic Node test suite.
- `examples/submissions.json`: sample challenge packet.
- `artifacts/reviewer-report.md`: reviewer-facing demo report.
- `artifacts/summary.svg` and `artifacts/summary.png`: visual summary artifacts.
- `artifacts/demo.mp4`: short demo video for bounty review.
Binary file not shown.
30 changes: 30 additions & 0 deletions scientific-bounty-provenance-guard/artifacts/reviewer-report.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Provenance Review Report

Challenge: sci-bounty-demo-001
Generated: 2026-06-05T23:00:00Z

## Summary

- Release: 0
- Revise: 1
- Hold: 2

## Decisions

### sub-clean-rainfall

- Team: atlas-lab
- Decision: hold
- Reasons: duplicate or near-duplicate signal requires reviewer triage

### sub-copied-rainfall

- Team: mirror-lab
- Decision: hold
- Reasons: duplicate or near-duplicate signal requires reviewer triage

### sub-incomplete-biomarker

- Team: cell-lab
- Decision: revise
- Reasons: missing reproducibility note; missing sha256 for biomarkers/rankings.csv; missing license for biomarkers/rankings.csv
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
20 changes: 20 additions & 0 deletions scientific-bounty-provenance-guard/artifacts/summary.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
47 changes: 47 additions & 0 deletions scientific-bounty-provenance-guard/examples/submissions.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
{
"challengeId": "sci-bounty-demo-001",
"generatedAt": "2026-06-05T23:00:00Z",
"submissions": [
{
"id": "sub-clean-rainfall",
"teamId": "atlas-lab",
"title": "Regional rainfall model",
"abstract": "Forecast regional rainfall with an open climate dataset and documented metrics.",
"reproducibilityNote": "Run notebook.ipynb with seed 42 and compare metrics.json.",
"artifacts": [
{
"path": "rainfall/model-card.md",
"sha256": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
"license": "CC-BY-4.0",
"dataset": "open-climate-demo"
}
]
},
{
"id": "sub-copied-rainfall",
"teamId": "mirror-lab",
"title": "Regional rainfall model",
"abstract": "Forecast regional rainfall with an open climate dataset and documented metrics.",
"reproducibilityNote": "Run notebook.ipynb with seed 42 and compare metrics.json.",
"artifacts": [
{
"path": "copy/model-card.md",
"sha256": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
"license": "CC-BY-4.0",
"dataset": "open-climate-demo"
}
]
},
{
"id": "sub-incomplete-biomarker",
"teamId": "cell-lab",
"title": "Biomarker screen",
"abstract": "Rank candidate biomarkers for a single-cell RNA sequencing challenge.",
"artifacts": [
{
"path": "biomarkers/rankings.csv"
}
]
}
]
}
10 changes: 10 additions & 0 deletions scientific-bounty-provenance-guard/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"name": "scientific-bounty-provenance-guard",
"version": "1.0.0",
"private": true,
"type": "module",
"scripts": {
"test": "node --test test/*.test.js",
"demo": "node scripts/demo.js"
}
}
9 changes: 9 additions & 0 deletions scientific-bounty-provenance-guard/scripts/demo.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import { readFile } from "node:fs/promises";
import { evaluateSubmissionProvenance, renderReviewerReport } from "../src/index.js";

const packet = JSON.parse(
await readFile(new URL("../examples/submissions.json", import.meta.url), "utf8")
);
const result = evaluateSubmissionProvenance(packet);

console.log(renderReviewerReport(result));
176 changes: 176 additions & 0 deletions scientific-bounty-provenance-guard/src/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
import { createHash } from "node:crypto";

const HOLD_SIMILARITY_THRESHOLD = 0.82;

export function normalizeText(value) {
return String(value ?? "")
.toLowerCase()
.replace(/[^a-z0-9]+/g, " ")
.trim()
.replace(/\s+/g, " ");
}

export function tokenize(value) {
const normalized = normalizeText(value);
return normalized ? new Set(normalized.split(" ")) : new Set();
}

export function jaccardSimilarity(left, right) {
const a = tokenize(left);
const b = tokenize(right);

if (a.size === 0 && b.size === 0) {
return 1;
}

const intersection = [...a].filter((token) => b.has(token)).length;
const union = new Set([...a, ...b]).size;
return intersection / union;
}

export function hashArtifactDescriptor(artifact) {
const descriptor = JSON.stringify({
path: artifact.path ?? "",
sha256: artifact.sha256 ?? "",
license: artifact.license ?? "",
dataset: artifact.dataset ?? ""
});

return createHash("sha256").update(descriptor).digest("hex");
}

function findArtifactIssues(submission) {
const issues = [];

if (!submission.reproducibilityNote) {
issues.push("missing reproducibility note");
}

for (const artifact of submission.artifacts ?? []) {
if (!artifact.sha256) {
issues.push(`missing sha256 for ${artifact.path ?? "artifact"}`);
}

if (!artifact.license) {
issues.push(`missing license for ${artifact.path ?? "artifact"}`);
}
}

if ((submission.artifacts ?? []).length === 0) {
issues.push("no artifacts declared");
}

return issues;
}

function findDuplicateSignals(submissions) {
const signals = [];

for (let i = 0; i < submissions.length; i += 1) {
for (let j = i + 1; j < submissions.length; j += 1) {
const left = submissions[i];
const right = submissions[j];

if (left.teamId === right.teamId) {
continue;
}

const leftHashes = new Set((left.artifacts ?? []).map((artifact) => artifact.sha256).filter(Boolean));
const rightHashes = new Set((right.artifacts ?? []).map((artifact) => artifact.sha256).filter(Boolean));
const sharedHashes = [...leftHashes].filter((hash) => rightHashes.has(hash));
const similarity = jaccardSimilarity(`${left.title} ${left.abstract}`, `${right.title} ${right.abstract}`);

if (sharedHashes.length > 0 || similarity >= HOLD_SIMILARITY_THRESHOLD) {
signals.push({
leftId: left.id,
rightId: right.id,
sharedHashes,
similarity: Number(similarity.toFixed(3))
});
}
}
}

return signals;
}

export function evaluateSubmissionProvenance(packet) {
const submissions = packet.submissions ?? [];
const duplicateSignals = findDuplicateSignals(submissions);
const heldIds = new Set(duplicateSignals.flatMap((signal) => [signal.leftId, signal.rightId]));

const decisions = submissions.map((submission) => {
const issues = findArtifactIssues(submission);
const artifactDescriptorHashes = (submission.artifacts ?? []).map(hashArtifactDescriptor);

if (heldIds.has(submission.id)) {
return {
submissionId: submission.id,
teamId: submission.teamId,
decision: "hold",
reasons: ["duplicate or near-duplicate signal requires reviewer triage", ...issues],
artifactDescriptorHashes
};
}

if (issues.length > 0) {
return {
submissionId: submission.id,
teamId: submission.teamId,
decision: "revise",
reasons: issues,
artifactDescriptorHashes
};
}

return {
submissionId: submission.id,
teamId: submission.teamId,
decision: "release",
reasons: ["provenance complete and no duplicate signal found"],
artifactDescriptorHashes
};
});

return {
challengeId: packet.challengeId,
generatedAt: packet.generatedAt ?? "demo",
duplicateSignals,
decisions,
summary: {
release: decisions.filter((decision) => decision.decision === "release").length,
revise: decisions.filter((decision) => decision.decision === "revise").length,
hold: decisions.filter((decision) => decision.decision === "hold").length
}
};
}

export function renderReviewerReport(result) {
const lines = [
`# Provenance Review Report`,
``,
`Challenge: ${result.challengeId}`,
`Generated: ${result.generatedAt}`,
``,
`## Summary`,
``,
`- Release: ${result.summary.release}`,
`- Revise: ${result.summary.revise}`,
`- Hold: ${result.summary.hold}`,
``,
`## Decisions`
];

for (const decision of result.decisions) {
lines.push(
``,
`### ${decision.submissionId}`,
``,
`- Team: ${decision.teamId}`,
`- Decision: ${decision.decision}`,
`- Reasons: ${decision.reasons.join("; ")}`
);
}

return `${lines.join("\n")}\n`;
}
Loading