Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions reviewer-calibration-assistant/demo/generate-demo.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import { mkdirSync, writeFileSync } from "node:fs";
import { evaluateReviewerCalibration, renderMarkdownReport } from "../src/calibration.js";

const packet = {
projectId: "proj-neuro-118",
manuscriptId: "ms-gapfinder-204",
domainTemplate: "neuroscience",
reviewMode: "public",
reviews: [
{
reviewerId: "rev-stat",
template: "neuroscience",
recommendation: "major_revision",
scores: { clarity: 3, rigor: 2, novelty: 4, reproducibility: 2 },
claimChecks: [
{ claimId: "claim-primary-endpoint", evidenceIds: ["fig-2", "table-1"] },
{ claimId: "claim-causal-language", evidenceIds: ["methods-4"] },
{ claimId: "claim-replication", evidenceIds: [] }
]
},
{
reviewerId: "rev-domain",
template: "neuroscience",
recommendation: "minor_revision",
scores: { clarity: 4, rigor: 4, novelty: 4, reproducibility: 3 },
claimChecks: [
{ claimId: "claim-primary-endpoint", evidenceIds: ["fig-2"] },
{ claimId: "claim-causal-language", evidenceIds: [] },
{ claimId: "claim-replication", evidenceIds: ["supp-1"] }
]
},
{
reviewerId: "rev-methods",
template: "clinical_trials",
recommendation: "reject",
scores: { clarity: 2, rigor: 1, novelty: 3, reproducibility: 1 },
claimChecks: [
{ claimId: "claim-primary-endpoint", evidenceIds: ["table-1"] },
{ claimId: "claim-causal-language", evidenceIds: [] },
{ claimId: "claim-replication", evidenceIds: [] }
]
}
]
};

mkdirSync("reports", { recursive: true });
const result = evaluateReviewerCalibration(packet);
writeFileSync("reports/reviewer-calibration-report.json", `${JSON.stringify(result, null, 2)}\n`);
writeFileSync("reports/reviewer-calibration-report.md", renderMarkdownReport(result));
console.log(JSON.stringify(result, null, 2));
46 changes: 46 additions & 0 deletions reviewer-calibration-assistant/demo/write-svg.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import { readFileSync, writeFileSync } from "node:fs";

const result = JSON.parse(readFileSync("reports/reviewer-calibration-report.json", "utf8"));
const blockerLines = result.blockers.slice(0, 5).map((blocker, index) =>
`<text x="72" y="${402 + index * 34}" class="small">${blocker}</text>`
).join("\n");

const svg = `<svg xmlns="http://www.w3.org/2000/svg" width="1280" height="720" viewBox="0 0 1280 720">
<rect width="1280" height="720" fill="#f7f8fb"/>
<rect x="48" y="48" width="1184" height="624" rx="10" fill="#ffffff" stroke="#d8dde8"/>
<text x="72" y="116" class="title">Reviewer Calibration Assistant</text>
<text x="72" y="158" class="subtitle">AI-Powered Research Assistant Suite evidence artifact</text>

<rect x="72" y="210" width="250" height="118" rx="8" fill="#eef6ff"/>
<text x="96" y="254" class="label">Decision</text>
<text x="96" y="296" class="metric">${result.decision}</text>

<rect x="356" y="210" width="250" height="118" rx="8" fill="#f2fbf4"/>
<text x="380" y="254" class="label">Agreement</text>
<text x="380" y="296" class="metric">${result.pairwiseAgreement}</text>

<rect x="640" y="210" width="250" height="118" rx="8" fill="#fff8eb"/>
<text x="664" y="254" class="label">Consensus</text>
<text x="664" y="296" class="metric">${result.consensusAction}</text>

<rect x="924" y="210" width="250" height="118" rx="8" fill="#fff1f1"/>
<text x="948" y="254" class="label">Blockers</text>
<text x="948" y="296" class="metric">${result.blockers.length}</text>

<text x="72" y="374" class="section">Calibration blockers</text>
${blockerLines}
<text x="72" y="614" class="footer">Synthetic data only. No private manuscripts, external AI APIs, credentials, or payment systems.</text>
<style>
text { font-family: Arial, Helvetica, sans-serif; fill: #1f2937; }
.title { font-size: 42px; font-weight: 700; }
.subtitle { font-size: 22px; fill: #5b6472; }
.label { font-size: 20px; fill: #5b6472; }
.metric { font-size: 24px; font-weight: 700; }
.section { font-size: 26px; font-weight: 700; }
.small { font-size: 22px; }
.footer { font-size: 18px; fill: #6b7280; }
</style>
</svg>
`;

writeFileSync("reports/reviewer-calibration-summary.svg", svg);
12 changes: 12 additions & 0 deletions reviewer-calibration-assistant/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"name": "reviewer-calibration-assistant",
"version": "1.0.0",
"description": "Deterministic reviewer calibration and inter-rater agreement assistant for SCIBASE AI review workflows.",
"type": "module",
"scripts": {
"test": "node test/calibration.test.js",
"demo": "node demo/generate-demo.js",
"demo:video": "npm run demo && node demo/write-svg.js && qlmanage -t -s 1280 -o reports reports/reviewer-calibration-summary.svg >/dev/null && ffmpeg -y -loop 1 -framerate 30 -t 8 -i reports/reviewer-calibration-summary.svg.png -vf format=yuv420p reports/reviewer-calibration-demo.mp4"
},
"license": "MIT"
}
36 changes: 36 additions & 0 deletions reviewer-calibration-assistant/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Reviewer Calibration Assistant

This module adds a deterministic reviewer calibration and inter-rater agreement assistant for the SCIBASE AI-Powered Research Assistant Suite.

## Scope

The assistant evaluates review packets before AI-generated peer-review summaries, reputation updates, or author-facing recommendations are released. It focuses on a distinct quality gate not covered by the existing broad assistant, reproducibility checker, research-gap finder, structured-abstract, model-assumption, dependency, external-validity, or evidence-binder slices.

It checks:

- pairwise reviewer agreement across clarity, rigor, novelty, reproducibility, and recommendation;
- recommendation consensus ratio;
- severe score spread by scoring dimension;
- domain-template drift;
- evidence-anchor coverage for reviewer claim checks;
- duplicate reviewer records;
- whether disputed packets should remain double-blind during calibration.

## Local Validation

```sh
npm test
npm run demo
npm run demo:video
```

The demo writes reviewer-ready evidence artifacts to `reports/`:

- `reviewer-calibration-report.json`
- `reviewer-calibration-report.md`
- `reviewer-calibration-summary.svg`
- `reviewer-calibration-demo.mp4`

## Boundaries

The implementation is dependency-free and uses synthetic data only. It does not call external AI APIs, ingest private manuscripts, access credentials, or integrate with payment systems.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
{
"projectId": "proj-neuro-118",
"manuscriptId": "ms-gapfinder-204",
"reviewerCount": 3,
"reviewMode": "public",
"consensusAction": "major_revision",
"consensusRatio": 0.333,
"pairwiseAgreement": 0.667,
"pairwiseDetails": [
{
"reviewers": [
"rev-stat",
"rev-domain"
],
"agreement": 0.75
},
{
"reviewers": [
"rev-stat",
"rev-methods"
],
"agreement": 0.75
},
{
"reviewers": [
"rev-domain",
"rev-methods"
],
"agreement": 0.5
}
],
"scoreDivergence": [
{
"field": "clarity",
"spread": 2
},
{
"field": "rigor",
"spread": 3
},
{
"field": "reproducibility",
"spread": 2
}
],
"templateDrift": [
{
"reviewerId": "rev-methods",
"expectedTemplate": "neuroscience",
"actualTemplate": "clinical_trials"
}
],
"evidenceGaps": [
{
"reviewerId": "rev-stat",
"coverage": 0.667
},
{
"reviewerId": "rev-domain",
"coverage": 0.667
},
{
"reviewerId": "rev-methods",
"coverage": 0.333
}
],
"blindModeRisk": {
"reviewMode": "public",
"reason": "disputed review packet should remain double-blind until calibration is complete"
},
"duplicateReviewers": [],
"blockers": [
"low_inter_rater_agreement",
"weak_recommendation_consensus",
"severe_score_divergence",
"domain_template_drift",
"insufficient_evidence_coverage",
"blind_mode_escalation_required"
],
"decision": "calibrate_before_release",
"nextActions": [
"Run reviewer calibration on divergent scoring dimensions before author-facing release.",
"Request adjudication because recommendation consensus is below threshold.",
"Ask reviewers to justify high-spread score dimensions with evidence anchors.",
"Normalize all reviews to the configured domain template.",
"Require evidence anchors for unsupported claim checks.",
"Keep disputed packets double-blind until calibration is resolved."
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Reviewer Calibration Assistant Report

| Field | Value |
| --- | --- |
| Project | proj-neuro-118 |
| Manuscript | ms-gapfinder-204 |
| Reviewers | 3 |
| Decision | calibrate_before_release |
| Pairwise agreement | 0.667 |
| Consensus action | major_revision (0.333) |

## Blockers
- low_inter_rater_agreement
- weak_recommendation_consensus
- severe_score_divergence
- domain_template_drift
- insufficient_evidence_coverage
- blind_mode_escalation_required

## Required Actions
- Run reviewer calibration on divergent scoring dimensions before author-facing release.
- Request adjudication because recommendation consensus is below threshold.
- Ask reviewers to justify high-spread score dimensions with evidence anchors.
- Normalize all reviews to the configured domain template.
- Require evidence anchors for unsupported claim checks.
- Keep disputed packets double-blind until calibration is resolved.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading