diff --git a/reviewer-calibration-assistant/demo/generate-demo.js b/reviewer-calibration-assistant/demo/generate-demo.js
new file mode 100644
index 00000000..2dd5116c
--- /dev/null
+++ b/reviewer-calibration-assistant/demo/generate-demo.js
@@ -0,0 +1,50 @@
+import { mkdirSync, writeFileSync } from "node:fs";
+import { evaluateReviewerCalibration, renderMarkdownReport } from "../src/calibration.js";
+
+const packet = {
+ projectId: "proj-neuro-118",
+ manuscriptId: "ms-gapfinder-204",
+ domainTemplate: "neuroscience",
+ reviewMode: "public",
+ reviews: [
+ {
+ reviewerId: "rev-stat",
+ template: "neuroscience",
+ recommendation: "major_revision",
+ scores: { clarity: 3, rigor: 2, novelty: 4, reproducibility: 2 },
+ claimChecks: [
+ { claimId: "claim-primary-endpoint", evidenceIds: ["fig-2", "table-1"] },
+ { claimId: "claim-causal-language", evidenceIds: ["methods-4"] },
+ { claimId: "claim-replication", evidenceIds: [] }
+ ]
+ },
+ {
+ reviewerId: "rev-domain",
+ template: "neuroscience",
+ recommendation: "minor_revision",
+ scores: { clarity: 4, rigor: 4, novelty: 4, reproducibility: 3 },
+ claimChecks: [
+ { claimId: "claim-primary-endpoint", evidenceIds: ["fig-2"] },
+ { claimId: "claim-causal-language", evidenceIds: [] },
+ { claimId: "claim-replication", evidenceIds: ["supp-1"] }
+ ]
+ },
+ {
+ reviewerId: "rev-methods",
+ template: "clinical_trials",
+ recommendation: "reject",
+ scores: { clarity: 2, rigor: 1, novelty: 3, reproducibility: 1 },
+ claimChecks: [
+ { claimId: "claim-primary-endpoint", evidenceIds: ["table-1"] },
+ { claimId: "claim-causal-language", evidenceIds: [] },
+ { claimId: "claim-replication", evidenceIds: [] }
+ ]
+ }
+ ]
+};
+
+mkdirSync("reports", { recursive: true });
+const result = evaluateReviewerCalibration(packet);
+writeFileSync("reports/reviewer-calibration-report.json", `${JSON.stringify(result, null, 2)}\n`);
+writeFileSync("reports/reviewer-calibration-report.md", renderMarkdownReport(result));
+console.log(JSON.stringify(result, null, 2));
diff --git a/reviewer-calibration-assistant/demo/write-svg.js b/reviewer-calibration-assistant/demo/write-svg.js
new file mode 100644
index 00000000..8f2f04f1
--- /dev/null
+++ b/reviewer-calibration-assistant/demo/write-svg.js
@@ -0,0 +1,46 @@
+import { readFileSync, writeFileSync } from "node:fs";
+
+const result = JSON.parse(readFileSync("reports/reviewer-calibration-report.json", "utf8"));
+const blockerLines = result.blockers.slice(0, 5).map((blocker, index) =>
+ `${blocker}`
+).join("\n");
+
+const svg = `
+`;
+
+writeFileSync("reports/reviewer-calibration-summary.svg", svg);
diff --git a/reviewer-calibration-assistant/package.json b/reviewer-calibration-assistant/package.json
new file mode 100644
index 00000000..29bcab62
--- /dev/null
+++ b/reviewer-calibration-assistant/package.json
@@ -0,0 +1,12 @@
+{
+ "name": "reviewer-calibration-assistant",
+ "version": "1.0.0",
+ "description": "Deterministic reviewer calibration and inter-rater agreement assistant for SCIBASE AI review workflows.",
+ "type": "module",
+ "scripts": {
+ "test": "node test/calibration.test.js",
+ "demo": "node demo/generate-demo.js",
+ "demo:video": "npm run demo && node demo/write-svg.js && qlmanage -t -s 1280 -o reports reports/reviewer-calibration-summary.svg >/dev/null && ffmpeg -y -loop 1 -framerate 30 -t 8 -i reports/reviewer-calibration-summary.svg.png -vf format=yuv420p reports/reviewer-calibration-demo.mp4"
+ },
+ "license": "MIT"
+}
diff --git a/reviewer-calibration-assistant/readme.md b/reviewer-calibration-assistant/readme.md
new file mode 100644
index 00000000..951b21ee
--- /dev/null
+++ b/reviewer-calibration-assistant/readme.md
@@ -0,0 +1,36 @@
+# Reviewer Calibration Assistant
+
+This module adds a deterministic reviewer calibration and inter-rater agreement assistant for the SCIBASE AI-Powered Research Assistant Suite.
+
+## Scope
+
+The assistant evaluates review packets before AI-generated peer-review summaries, reputation updates, or author-facing recommendations are released. It focuses on a distinct quality gate not covered by the existing broad assistant, reproducibility checker, research-gap finder, structured-abstract, model-assumption, dependency, external-validity, or evidence-binder slices.
+
+It checks:
+
+- pairwise reviewer agreement across clarity, rigor, novelty, reproducibility, and recommendation;
+- recommendation consensus ratio;
+- severe score spread by scoring dimension;
+- domain-template drift;
+- evidence-anchor coverage for reviewer claim checks;
+- duplicate reviewer records;
+- whether disputed packets should remain double-blind during calibration.
+
+## Local Validation
+
+```sh
+npm test
+npm run demo
+npm run demo:video
+```
+
+The demo writes reviewer-ready evidence artifacts to `reports/`:
+
+- `reviewer-calibration-report.json`
+- `reviewer-calibration-report.md`
+- `reviewer-calibration-summary.svg`
+- `reviewer-calibration-demo.mp4`
+
+## Boundaries
+
+The implementation is dependency-free and uses synthetic data only. It does not call external AI APIs, ingest private manuscripts, access credentials, or integrate with payment systems.
diff --git a/reviewer-calibration-assistant/reports/reviewer-calibration-demo.mp4 b/reviewer-calibration-assistant/reports/reviewer-calibration-demo.mp4
new file mode 100644
index 00000000..21c9a222
Binary files /dev/null and b/reviewer-calibration-assistant/reports/reviewer-calibration-demo.mp4 differ
diff --git a/reviewer-calibration-assistant/reports/reviewer-calibration-report.json b/reviewer-calibration-assistant/reports/reviewer-calibration-report.json
new file mode 100644
index 00000000..f69dc99f
--- /dev/null
+++ b/reviewer-calibration-assistant/reports/reviewer-calibration-report.json
@@ -0,0 +1,89 @@
+{
+ "projectId": "proj-neuro-118",
+ "manuscriptId": "ms-gapfinder-204",
+ "reviewerCount": 3,
+ "reviewMode": "public",
+ "consensusAction": "major_revision",
+ "consensusRatio": 0.333,
+ "pairwiseAgreement": 0.667,
+ "pairwiseDetails": [
+ {
+ "reviewers": [
+ "rev-stat",
+ "rev-domain"
+ ],
+ "agreement": 0.75
+ },
+ {
+ "reviewers": [
+ "rev-stat",
+ "rev-methods"
+ ],
+ "agreement": 0.75
+ },
+ {
+ "reviewers": [
+ "rev-domain",
+ "rev-methods"
+ ],
+ "agreement": 0.5
+ }
+ ],
+ "scoreDivergence": [
+ {
+ "field": "clarity",
+ "spread": 2
+ },
+ {
+ "field": "rigor",
+ "spread": 3
+ },
+ {
+ "field": "reproducibility",
+ "spread": 2
+ }
+ ],
+ "templateDrift": [
+ {
+ "reviewerId": "rev-methods",
+ "expectedTemplate": "neuroscience",
+ "actualTemplate": "clinical_trials"
+ }
+ ],
+ "evidenceGaps": [
+ {
+ "reviewerId": "rev-stat",
+ "coverage": 0.667
+ },
+ {
+ "reviewerId": "rev-domain",
+ "coverage": 0.667
+ },
+ {
+ "reviewerId": "rev-methods",
+ "coverage": 0.333
+ }
+ ],
+ "blindModeRisk": {
+ "reviewMode": "public",
+ "reason": "disputed review packet should remain double-blind until calibration is complete"
+ },
+ "duplicateReviewers": [],
+ "blockers": [
+ "low_inter_rater_agreement",
+ "weak_recommendation_consensus",
+ "severe_score_divergence",
+ "domain_template_drift",
+ "insufficient_evidence_coverage",
+ "blind_mode_escalation_required"
+ ],
+ "decision": "calibrate_before_release",
+ "nextActions": [
+ "Run reviewer calibration on divergent scoring dimensions before author-facing release.",
+ "Request adjudication because recommendation consensus is below threshold.",
+ "Ask reviewers to justify high-spread score dimensions with evidence anchors.",
+ "Normalize all reviews to the configured domain template.",
+ "Require evidence anchors for unsupported claim checks.",
+ "Keep disputed packets double-blind until calibration is resolved."
+ ]
+}
diff --git a/reviewer-calibration-assistant/reports/reviewer-calibration-report.md b/reviewer-calibration-assistant/reports/reviewer-calibration-report.md
new file mode 100644
index 00000000..e1eb1a3d
--- /dev/null
+++ b/reviewer-calibration-assistant/reports/reviewer-calibration-report.md
@@ -0,0 +1,26 @@
+# Reviewer Calibration Assistant Report
+
+| Field | Value |
+| --- | --- |
+| Project | proj-neuro-118 |
+| Manuscript | ms-gapfinder-204 |
+| Reviewers | 3 |
+| Decision | calibrate_before_release |
+| Pairwise agreement | 0.667 |
+| Consensus action | major_revision (0.333) |
+
+## Blockers
+- low_inter_rater_agreement
+- weak_recommendation_consensus
+- severe_score_divergence
+- domain_template_drift
+- insufficient_evidence_coverage
+- blind_mode_escalation_required
+
+## Required Actions
+- Run reviewer calibration on divergent scoring dimensions before author-facing release.
+- Request adjudication because recommendation consensus is below threshold.
+- Ask reviewers to justify high-spread score dimensions with evidence anchors.
+- Normalize all reviews to the configured domain template.
+- Require evidence anchors for unsupported claim checks.
+- Keep disputed packets double-blind until calibration is resolved.
diff --git a/reviewer-calibration-assistant/reports/reviewer-calibration-summary.svg b/reviewer-calibration-assistant/reports/reviewer-calibration-summary.svg
new file mode 100644
index 00000000..c590c0c5
--- /dev/null
+++ b/reviewer-calibration-assistant/reports/reviewer-calibration-summary.svg
@@ -0,0 +1,40 @@
+
diff --git a/reviewer-calibration-assistant/src/calibration.js b/reviewer-calibration-assistant/src/calibration.js
new file mode 100644
index 00000000..559637f1
--- /dev/null
+++ b/reviewer-calibration-assistant/src/calibration.js
@@ -0,0 +1,203 @@
+const DEFAULT_THRESHOLDS = {
+ minimumPairwiseAgreement: 0.68,
+ severeScoreSpread: 2,
+ actionConsensusRatio: 0.67,
+ requiredEvidenceCoverage: 0.75,
+ requireBlindModeForDisputedReviews: true
+};
+
+const ACTION_RANK = {
+ accept: 4,
+ minor_revision: 3,
+ major_revision: 2,
+ reject: 1
+};
+
+const SCORE_FIELDS = ["clarity", "rigor", "novelty", "reproducibility"];
+
+function clamp(value, min, max) {
+ return Math.max(min, Math.min(max, value));
+}
+
+function unique(values) {
+ return [...new Set(values)];
+}
+
+function mode(values) {
+ const counts = new Map();
+ for (const value of values) counts.set(value, (counts.get(value) || 0) + 1);
+ return [...counts.entries()].sort((a, b) => b[1] - a[1] || String(a[0]).localeCompare(String(b[0])))[0];
+}
+
+function average(values) {
+ return values.length ? values.reduce((sum, value) => sum + value, 0) / values.length : 0;
+}
+
+function scoreSpread(reviews, field) {
+ const scores = reviews.map((review) => Number(review.scores?.[field] ?? 0));
+ return Math.max(...scores) - Math.min(...scores);
+}
+
+function reviewVector(review) {
+ const scoreValues = SCORE_FIELDS.map((field) => Number(review.scores?.[field] ?? 0));
+ const actionValue = ACTION_RANK[review.recommendation] ?? 0;
+ return [...scoreValues, actionValue];
+}
+
+function pairAgreement(left, right) {
+ const leftVector = reviewVector(left);
+ const rightVector = reviewVector(right);
+ const distances = leftVector.map((value, index) => Math.abs(value - rightVector[index]) / 4);
+ return clamp(1 - average(distances), 0, 1);
+}
+
+function buildPairs(reviews) {
+ const pairs = [];
+ for (let i = 0; i < reviews.length; i += 1) {
+ for (let j = i + 1; j < reviews.length; j += 1) {
+ pairs.push({
+ reviewers: [reviews[i].reviewerId, reviews[j].reviewerId],
+ agreement: Number(pairAgreement(reviews[i], reviews[j]).toFixed(3))
+ });
+ }
+ }
+ return pairs;
+}
+
+function evidenceCoverage(review) {
+ const claims = review.claimChecks || [];
+ if (claims.length === 0) return 0;
+ const supported = claims.filter((claim) => claim.evidenceIds?.length > 0).length;
+ return supported / claims.length;
+}
+
+function detectDomainTemplateDrift(packet) {
+ const expected = packet.domainTemplate;
+ return packet.reviews
+ .filter((review) => review.template !== expected)
+ .map((review) => ({
+ reviewerId: review.reviewerId,
+ expectedTemplate: expected,
+ actualTemplate: review.template || "missing"
+ }));
+}
+
+function detectEvidenceGaps(packet, thresholds) {
+ return packet.reviews
+ .map((review) => ({
+ reviewerId: review.reviewerId,
+ coverage: Number(evidenceCoverage(review).toFixed(3))
+ }))
+ .filter((review) => review.coverage < thresholds.requiredEvidenceCoverage);
+}
+
+function detectScoreDivergence(reviews, thresholds) {
+ return SCORE_FIELDS
+ .map((field) => ({ field, spread: scoreSpread(reviews, field) }))
+ .filter((item) => item.spread >= thresholds.severeScoreSpread);
+}
+
+function detectBlindModeRisk(packet, thresholds, scoreDivergence, pairwiseAgreement) {
+ if (!thresholds.requireBlindModeForDisputedReviews) return null;
+ if (packet.reviewMode === "double_blind") return null;
+ if (scoreDivergence.length === 0 && pairwiseAgreement >= thresholds.minimumPairwiseAgreement) return null;
+ return {
+ reviewMode: packet.reviewMode || "missing",
+ reason: "disputed review packet should remain double-blind until calibration is complete"
+ };
+}
+
+export function evaluateReviewerCalibration(packet, customThresholds = {}) {
+ const thresholds = { ...DEFAULT_THRESHOLDS, ...customThresholds };
+ if (!packet || !Array.isArray(packet.reviews) || packet.reviews.length < 2) {
+ throw new Error("Reviewer calibration requires at least two review records.");
+ }
+
+ const duplicateReviewers = unique(
+ packet.reviews
+ .map((review) => review.reviewerId)
+ .filter((reviewerId, index, all) => all.indexOf(reviewerId) !== index)
+ );
+ const pairs = buildPairs(packet.reviews);
+ const pairwiseAgreement = Number(average(pairs.map((pair) => pair.agreement)).toFixed(3));
+ const [consensusAction, consensusCount] = mode(packet.reviews.map((review) => review.recommendation));
+ const consensusRatio = Number((consensusCount / packet.reviews.length).toFixed(3));
+ const scoreDivergence = detectScoreDivergence(packet.reviews, thresholds);
+ const templateDrift = detectDomainTemplateDrift(packet);
+ const evidenceGaps = detectEvidenceGaps(packet, thresholds);
+ const blindModeRisk = detectBlindModeRisk(packet, thresholds, scoreDivergence, pairwiseAgreement);
+
+ const blockers = [];
+ if (duplicateReviewers.length) blockers.push("duplicate_reviewer_records");
+ if (pairwiseAgreement < thresholds.minimumPairwiseAgreement) blockers.push("low_inter_rater_agreement");
+ if (consensusRatio < thresholds.actionConsensusRatio) blockers.push("weak_recommendation_consensus");
+ if (scoreDivergence.length) blockers.push("severe_score_divergence");
+ if (templateDrift.length) blockers.push("domain_template_drift");
+ if (evidenceGaps.length) blockers.push("insufficient_evidence_coverage");
+ if (blindModeRisk) blockers.push("blind_mode_escalation_required");
+
+ return {
+ projectId: packet.projectId,
+ manuscriptId: packet.manuscriptId,
+ reviewerCount: packet.reviews.length,
+ reviewMode: packet.reviewMode,
+ consensusAction,
+ consensusRatio,
+ pairwiseAgreement,
+ pairwiseDetails: pairs,
+ scoreDivergence,
+ templateDrift,
+ evidenceGaps,
+ blindModeRisk,
+ duplicateReviewers,
+ blockers,
+ decision: blockers.length ? "calibrate_before_release" : "release_review_summary",
+ nextActions: buildNextActions(blockers)
+ };
+}
+
+function buildNextActions(blockers) {
+ const actionMap = {
+ duplicate_reviewer_records: "Remove duplicate reviewer entries before reputation or review-summary updates.",
+ low_inter_rater_agreement: "Run reviewer calibration on divergent scoring dimensions before author-facing release.",
+ weak_recommendation_consensus: "Request adjudication because recommendation consensus is below threshold.",
+ severe_score_divergence: "Ask reviewers to justify high-spread score dimensions with evidence anchors.",
+ domain_template_drift: "Normalize all reviews to the configured domain template.",
+ insufficient_evidence_coverage: "Require evidence anchors for unsupported claim checks.",
+ blind_mode_escalation_required: "Keep disputed packets double-blind until calibration is resolved."
+ };
+ return blockers.map((blocker) => actionMap[blocker]);
+}
+
+export function renderMarkdownReport(result) {
+ const rows = [
+ ["Project", result.projectId],
+ ["Manuscript", result.manuscriptId],
+ ["Reviewers", result.reviewerCount],
+ ["Decision", result.decision],
+ ["Pairwise agreement", result.pairwiseAgreement],
+ ["Consensus action", `${result.consensusAction} (${result.consensusRatio})`]
+ ];
+ const summary = rows.map(([label, value]) => `| ${label} | ${value} |`).join("\n");
+ const blockers = result.blockers.length
+ ? result.blockers.map((blocker) => `- ${blocker}`).join("\n")
+ : "- none";
+ const actions = result.nextActions.length
+ ? result.nextActions.map((action) => `- ${action}`).join("\n")
+ : "- Review packet can be released.";
+
+ return [
+ "# Reviewer Calibration Assistant Report",
+ "",
+ "| Field | Value |",
+ "| --- | --- |",
+ summary,
+ "",
+ "## Blockers",
+ blockers,
+ "",
+ "## Required Actions",
+ actions,
+ ""
+ ].join("\n");
+}
diff --git a/reviewer-calibration-assistant/test/calibration.test.js b/reviewer-calibration-assistant/test/calibration.test.js
new file mode 100644
index 00000000..a121b7bd
--- /dev/null
+++ b/reviewer-calibration-assistant/test/calibration.test.js
@@ -0,0 +1,94 @@
+import assert from "node:assert/strict";
+import { evaluateReviewerCalibration, renderMarkdownReport } from "../src/calibration.js";
+
+const basePacket = {
+ projectId: "proj-crispr-042",
+ manuscriptId: "ms-review-17",
+ domainTemplate: "clinical_trials",
+ reviewMode: "double_blind",
+ reviews: [
+ {
+ reviewerId: "rev-a",
+ template: "clinical_trials",
+ recommendation: "major_revision",
+ scores: { clarity: 3, rigor: 2, novelty: 3, reproducibility: 2 },
+ claimChecks: [
+ { claimId: "c1", evidenceIds: ["e1"] },
+ { claimId: "c2", evidenceIds: ["e2"] }
+ ]
+ },
+ {
+ reviewerId: "rev-b",
+ template: "clinical_trials",
+ recommendation: "major_revision",
+ scores: { clarity: 3, rigor: 2, novelty: 3, reproducibility: 2 },
+ claimChecks: [
+ { claimId: "c1", evidenceIds: ["e1"] },
+ { claimId: "c2", evidenceIds: ["e2"] }
+ ]
+ },
+ {
+ reviewerId: "rev-c",
+ template: "clinical_trials",
+ recommendation: "major_revision",
+ scores: { clarity: 4, rigor: 3, novelty: 3, reproducibility: 3 },
+ claimChecks: [
+ { claimId: "c1", evidenceIds: ["e1"] },
+ { claimId: "c2", evidenceIds: ["e2"] }
+ ]
+ }
+ ]
+};
+
+const riskyPacket = {
+ ...basePacket,
+ reviewMode: "public",
+ reviews: [
+ basePacket.reviews[0],
+ {
+ reviewerId: "rev-b",
+ template: "molecular_biology",
+ recommendation: "accept",
+ scores: { clarity: 5, rigor: 5, novelty: 5, reproducibility: 5 },
+ claimChecks: [
+ { claimId: "c1", evidenceIds: [] },
+ { claimId: "c2", evidenceIds: [] }
+ ]
+ },
+ {
+ reviewerId: "rev-c",
+ template: "clinical_trials",
+ recommendation: "reject",
+ scores: { clarity: 1, rigor: 1, novelty: 2, reproducibility: 1 },
+ claimChecks: [
+ { claimId: "c1", evidenceIds: ["e1"] },
+ { claimId: "c2", evidenceIds: [] }
+ ]
+ }
+ ]
+};
+
+const ready = evaluateReviewerCalibration(basePacket);
+assert.equal(ready.decision, "release_review_summary");
+assert.equal(ready.blockers.length, 0);
+assert.equal(ready.consensusAction, "major_revision");
+assert.ok(ready.pairwiseAgreement >= 0.8);
+
+const risky = evaluateReviewerCalibration(riskyPacket);
+assert.equal(risky.decision, "calibrate_before_release");
+assert.ok(risky.blockers.includes("low_inter_rater_agreement"));
+assert.ok(risky.blockers.includes("severe_score_divergence"));
+assert.ok(risky.blockers.includes("domain_template_drift"));
+assert.ok(risky.blockers.includes("insufficient_evidence_coverage"));
+assert.ok(risky.blockers.includes("blind_mode_escalation_required"));
+assert.equal(risky.templateDrift[0].reviewerId, "rev-b");
+assert.equal(risky.evidenceGaps.length, 2);
+
+assert.throws(() => evaluateReviewerCalibration({ reviews: [basePacket.reviews[0]] }), /at least two/);
+
+const report = renderMarkdownReport(risky);
+assert.match(report, /Reviewer Calibration Assistant Report/);
+assert.match(report, /calibrate_before_release/);
+assert.match(report, /low_inter_rater_agreement/);
+
+console.log("reviewer calibration assistant tests passed");