From a7fe3e7c34074e3c8074cae587e1d107b9fafe92 Mon Sep 17 00:00:00 2001
From: Rainer M Krug <Rainer@krugs.de>
Date: Fri, 29 May 2026 09:09:41 +0200
Subject: [PATCH 1/7] chore: move repo to openalexPro org
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- DESCRIPTION: URL, BugReports, Additional_repositories → openalexPro org
- _pkgdown.yml: site URL → openalexpro.github.io
- README.md: all badge/link URLs updated to openalexPro org
- CI workflows: extra-repositories → openalexpro.r-universe.dev

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .github/workflows/R-CMD-check.yaml   |  2 +-
 .github/workflows/pkgdown.yaml       |  2 +-
 .github/workflows/test-coverage.yaml |  2 +-
 DESCRIPTION                          |  6 +++---
 README.md                            | 10 +++++-----
 _pkgdown.yml                         |  2 +-
 6 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
index 2218a05..85da591 100644
--- a/.github/workflows/R-CMD-check.yaml
+++ b/.github/workflows/R-CMD-check.yaml
@@ -39,7 +39,7 @@ jobs:
           r-version: ${{ matrix.config.r }}
           http-user-agent: ${{ matrix.config.http-user-agent }}
           use-public-rspm: true
-          extra-repositories: https://rkrug.r-universe.dev
+          extra-repositories: https://openalexpro.r-universe.dev
 
       - uses: r-lib/actions/setup-r-dependencies@v2
         with:
diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml
index 812598c..c93ca22 100644
--- a/.github/workflows/pkgdown.yaml
+++ b/.github/workflows/pkgdown.yaml
@@ -30,7 +30,7 @@ jobs:
       - uses: r-lib/actions/setup-r@v2
         with:
           use-public-rspm: true
-          extra-repositories: https://rkrug.r-universe.dev
+          extra-repositories: https://openalexpro.r-universe.dev
 
       - uses: r-lib/actions/setup-r-dependencies@v2
         with:
diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml
index 449d07e..fb3cb9e 100644
--- a/.github/workflows/test-coverage.yaml
+++ b/.github/workflows/test-coverage.yaml
@@ -21,7 +21,7 @@ jobs:
       - uses: r-lib/actions/setup-r@v2
         with:
           use-public-rspm: true
-          extra-repositories: https://rkrug.r-universe.dev
+          extra-repositories: https://openalexpro.r-universe.dev
 
       - uses: r-lib/actions/setup-r-dependencies@v2
         with:
diff --git a/DESCRIPTION b/DESCRIPTION
index 3c76b81..723fedd 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -8,8 +8,8 @@ Description: Utilities to convert an OpenAlex parquet/Arrow corpus into
     other formats. Implemented are at the moment CSL JSON, BibTeX, BibLaTeX,
     Markdown, LaTeX, HTML, or PDF via Pandoc. Uses DuckDB over Arrow for
     efficient chunked CSL JSON conversion. 
-URL: https://github.com/rkrug/openalexConvert, https://rkrug.github.io/openalexConvert/
-BugReports: https://github.com/rkrug/openalexConvert/issues
+URL: https://github.com/openalexPro/openalexConvert, https://openalexpro.github.io/openalexConvert/
+BugReports: https://github.com/openalexPro/openalexConvert/issues
 License: GPL (>= 2)
 Depends:
     R (>= 4.1.0)
@@ -26,7 +26,7 @@ Suggests:
     knitr,
     quarto,
     testthat (>= 3.0.0)
-Additional_repositories: https://rkrug.r-universe.dev
+Additional_repositories: https://openalexpro.r-universe.dev
 Encoding: UTF-8
 RoxygenNote: 7.3.3
 VignetteBuilder: quarto
diff --git a/README.md b/README.md
index 0402583..6f5028d 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 # openalexConvert
 
 Convert an [OpenAlex](https://openalex.org) parquet corpus (produced by
-[openalexPro](https://github.com/rkrug/openalexPro)) into bibliography formats via
+[openalexPro](https://github.com/openalexPro/openalexPro)) into bibliography formats via
 [Pandoc](https://pandoc.org): CSL JSON, BibTeX, BibLaTeX, Markdown, LaTeX, HTML, or PDF.
 
 ## Installation
@@ -9,7 +9,7 @@ Convert an [OpenAlex](https://openalex.org) parquet corpus (produced by
 ```r
 install.packages(
   "openalexConvert",
-  repos = c("https://rkrug.r-universe.dev", "https://cloud.r-project.org")
+  repos = c("https://openalexpro.r-universe.dev", "https://cloud.r-project.org")
 )
 ```
 
@@ -37,9 +37,9 @@ csljson_convert_pandoc(
 
 ## Documentation
 
-Full documentation and vignettes: <https://rkrug.github.io/openalexConvert/>
+Full documentation and vignettes: <https://openalexpro.github.io/openalexConvert/>
 
 ## Related packages
 
-- [openalexPro](https://github.com/rkrug/openalexPro) — API access and parquet output
-- [openalexSnowball](https://github.com/rkrug/openalexSnowball) — snowball citation search
+- [openalexPro](https://github.com/openalexPro/openalexPro) — API access and parquet output
+- [openalexSnowball](https://github.com/openalexPro/openalexSnowball) — snowball citation search
diff --git a/_pkgdown.yml b/_pkgdown.yml
index ce87aa0..1c70402 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -1,4 +1,4 @@
-url: https://rkrug.github.io/openalexConvert/
+url: https://openalexpro.github.io/openalexConvert/
 template:
   bootstrap: 5
 

From 698a77be2458d00f37701ef462f048b1ca471a19 Mon Sep 17 00:00:00 2001
From: Rainer M Krug <Rainer@krugs.de>
Date: Fri, 29 May 2026 09:13:52 +0200
Subject: [PATCH 2/7] fix(ci): use rkrug.r-universe.dev until
 openalexpro.r-universe.dev is live

openalexpro.r-universe.dev does not exist yet (packages.json needs to
reach main first). Temporarily keep rkrug.r-universe.dev as the install
source; will switch once the org universe is built.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .github/workflows/R-CMD-check.yaml   | 2 +-
 .github/workflows/pkgdown.yaml       | 2 +-
 .github/workflows/test-coverage.yaml | 2 +-
 DESCRIPTION                          | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
index 85da591..2218a05 100644
--- a/.github/workflows/R-CMD-check.yaml
+++ b/.github/workflows/R-CMD-check.yaml
@@ -39,7 +39,7 @@ jobs:
           r-version: ${{ matrix.config.r }}
           http-user-agent: ${{ matrix.config.http-user-agent }}
           use-public-rspm: true
-          extra-repositories: https://openalexpro.r-universe.dev
+          extra-repositories: https://rkrug.r-universe.dev
 
       - uses: r-lib/actions/setup-r-dependencies@v2
         with:
diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml
index c93ca22..812598c 100644
--- a/.github/workflows/pkgdown.yaml
+++ b/.github/workflows/pkgdown.yaml
@@ -30,7 +30,7 @@ jobs:
       - uses: r-lib/actions/setup-r@v2
         with:
           use-public-rspm: true
-          extra-repositories: https://openalexpro.r-universe.dev
+          extra-repositories: https://rkrug.r-universe.dev
 
       - uses: r-lib/actions/setup-r-dependencies@v2
         with:
diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml
index fb3cb9e..449d07e 100644
--- a/.github/workflows/test-coverage.yaml
+++ b/.github/workflows/test-coverage.yaml
@@ -21,7 +21,7 @@ jobs:
       - uses: r-lib/actions/setup-r@v2
         with:
           use-public-rspm: true
-          extra-repositories: https://openalexpro.r-universe.dev
+          extra-repositories: https://rkrug.r-universe.dev
 
       - uses: r-lib/actions/setup-r-dependencies@v2
         with:
diff --git a/DESCRIPTION b/DESCRIPTION
index 723fedd..0cecf5e 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -26,7 +26,7 @@ Suggests:
     knitr,
     quarto,
     testthat (>= 3.0.0)
-Additional_repositories: https://openalexpro.r-universe.dev
+Additional_repositories: https://rkrug.r-universe.dev
 Encoding: UTF-8
 RoxygenNote: 7.3.3
 VignetteBuilder: quarto

From 21b9cb39e0bc871f794bfd0f7557e89b6215caa8 Mon Sep 17 00:00:00 2001
From: Rainer M Krug <Rainer@krugs.de>
Date: Fri, 29 May 2026 09:25:40 +0200
Subject: [PATCH 3/7] fix(ci): install openalexPro from GitHub; point repos to
 openalexpro.r-universe.dev

rkrug.r-universe.dev dropped openalexPro after the org transfer.
Use github::openalexPro/openalexPro in CI extra-packages as bridge
until openalexpro.r-universe.dev finishes its first build.
Additional_repositories updated to openalexpro.r-universe.dev.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .github/workflows/R-CMD-check.yaml   | 4 ++--
 .github/workflows/pkgdown.yaml       | 4 ++--
 .github/workflows/test-coverage.yaml | 4 ++--
 DESCRIPTION                          | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
index 2218a05..bec4e8a 100644
--- a/.github/workflows/R-CMD-check.yaml
+++ b/.github/workflows/R-CMD-check.yaml
@@ -39,11 +39,11 @@ jobs:
           r-version: ${{ matrix.config.r }}
           http-user-agent: ${{ matrix.config.http-user-agent }}
           use-public-rspm: true
-          extra-repositories: https://rkrug.r-universe.dev
+          
 
       - uses: r-lib/actions/setup-r-dependencies@v2
         with:
-          extra-packages: any::rcmdcheck
+          extra-packages: any::rcmdcheck, github::openalexPro/openalexPro
           needs: check
 
       - uses: r-lib/actions/check-r-package@v2
diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml
index 812598c..da3a478 100644
--- a/.github/workflows/pkgdown.yaml
+++ b/.github/workflows/pkgdown.yaml
@@ -30,11 +30,11 @@ jobs:
       - uses: r-lib/actions/setup-r@v2
         with:
           use-public-rspm: true
-          extra-repositories: https://rkrug.r-universe.dev
+          
 
       - uses: r-lib/actions/setup-r-dependencies@v2
         with:
-          extra-packages: any::pkgdown, local::.
+          extra-packages: any::pkgdown, github::openalexPro/openalexPro, local::.
           needs: website
 
       - name: Build pkgnet report (site asset)
diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml
index 449d07e..285290d 100644
--- a/.github/workflows/test-coverage.yaml
+++ b/.github/workflows/test-coverage.yaml
@@ -21,11 +21,11 @@ jobs:
       - uses: r-lib/actions/setup-r@v2
         with:
           use-public-rspm: true
-          extra-repositories: https://rkrug.r-universe.dev
+          
 
       - uses: r-lib/actions/setup-r-dependencies@v2
         with:
-          extra-packages: any::covr, any::xml2
+          extra-packages: any::covr, any::xml2, github::openalexPro/openalexPro
           needs: coverage
 
       - name: Test coverage
diff --git a/DESCRIPTION b/DESCRIPTION
index 0cecf5e..723fedd 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -26,7 +26,7 @@ Suggests:
     knitr,
     quarto,
     testthat (>= 3.0.0)
-Additional_repositories: https://rkrug.r-universe.dev
+Additional_repositories: https://openalexpro.r-universe.dev
 Encoding: UTF-8
 RoxygenNote: 7.3.3
 VignetteBuilder: quarto

From 2939bc8291a9a12091df5ece43d6221f0aa2ef6d Mon Sep 17 00:00:00 2001
From: Rainer M Krug <Rainer@krugs.de>
Date: Fri, 29 May 2026 18:47:56 +0200
Subject: [PATCH 4/7] chore: add Zenodo DOI 10.5281/zenodo.20448988

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 DESCRIPTION | 2 +-
 README.md   | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 476ae08..e9299f8 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -8,7 +8,7 @@ Description: Utilities to convert an OpenAlex parquet/Arrow corpus into
     other formats. Implemented are at the moment CSL JSON, BibTeX, BibLaTeX,
     Markdown, LaTeX, HTML, or PDF via Pandoc. Uses DuckDB over Arrow for
     efficient chunked CSL JSON conversion. 
-URL: https://github.com/rkrug/openalexConvert, https://rkrug.github.io/openalexConvert/
+URL: https://github.com/rkrug/openalexConvert, https://rkrug.github.io/openalexConvert/, https://doi.org/10.5281/zenodo.20448988
 BugReports: https://github.com/rkrug/openalexConvert/issues
 License: GPL (>= 2)
 Depends:
diff --git a/README.md b/README.md
index 0402583..20e8aa2 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,5 @@
+[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.20448988.svg)](https://doi.org/10.5281/zenodo.20448988)
+
 # openalexConvert
 
 Convert an [OpenAlex](https://openalex.org) parquet corpus (produced by

From 84b84915b64dad03d4d3adcb8e8a873263d55956 Mon Sep 17 00:00:00 2001
From: Rainer M Krug <Rainer@krugs.de>
Date: Tue, 2 Jun 2026 17:05:34 +0200
Subject: [PATCH 5/7] Add CLAUDE.md

---
 .Rbuildignore |   1 +
 CLAUDE.md     | 165 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 166 insertions(+)
 create mode 100644 CLAUDE.md

diff --git a/.Rbuildignore b/.Rbuildignore
index a98127c..1604cc9 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -7,3 +7,4 @@
 ^.github$
 ^\.github$
 ^vignettes$
+^.claude
\ No newline at end of file
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 100644
index 0000000..995a6cc
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1,165 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## What this package is
+
+`openalexConvert` is an R package that converts an OpenAlex parquet/Arrow corpus
+(as produced by the sibling package [openalexPro](https://github.com/rkrug/openalexPro))
+into bibliography formats. It is part of the `openalexPro` ecosystem (alongside
+`openalexSnowball`) and is distributed via r-universe (`https://rkrug.r-universe.dev`),
+not CRAN. Status is alpha (see the startup message in `R/zzz.R`).
+
+## Commands
+
+All commands assume the package root and use R's standard tooling.
+
+```r
+# Run the full test suite
+devtools::test()
+
+# Run a single test file
+testthat::test_file("tests/testthat/test-001-corpus_csl_pandoc.R")
+
+# Regenerate man/*.Rd and NAMESPACE from roxygen2 comments (required after
+# changing @export tags or @param docs — NAMESPACE is generated, never hand-edit)
+devtools::document()
+
+# Full R CMD check
+devtools::check()
+
+# Render the pkgdown site locally
+pkgdown::build_site()
+```
+
+CI (`.github/workflows/`) runs `R-CMD-check` on macOS/Windows/Ubuntu ×
+devel/release/oldrel-1, plus `test-coverage` and `pkgdown`. Because `openalexPro`
+lives on r-universe, `Additional_repositories` in `DESCRIPTION` must list it or
+dependency resolution fails in CI.
+
+## Architecture
+
+The package is a thin pipeline with two stages. Each exported function is one
+self-contained file in `R/`.
+
+1. **Corpus → CSL JSON** ([R/corpus_to_csljson.R](R/corpus_to_csljson.R)):
+   `corpus_to_csljson()` reads an Arrow Dataset/Table/data.frame, registers it
+   with DuckDB (`duckdb_register_arrow`), and processes it in chunks of
+   `chunk_size` rows via `LIMIT/OFFSET`. Output is one `chunk_N.json` file per
+   chunk, each a CSL JSON array. This is the memory-efficient core — large
+   corpora never load fully into R.
+
+2. **CSL JSON → everything else** ([R/csljson_convert_pandoc.R](R/csljson_convert_pandoc.R)):
+   `csljson_convert_pandoc()` shells out to Pandoc (via `rmarkdown::pandoc_convert`)
+   to produce bibtex, biblatex, docx, markdown, latex, html, or pdf. It dispatches
+   on whether the input is a directory of chunks or a single file, crossed with
+   whether `to` is a bibliography format (`bibtex`/`biblatex`, direct conversion)
+   or a formatted-document format (renders via citeproc with a generated
+   `nocite: "@*"` markdown stub).
+
+`corpus_export_via_pandoc()` ([R/corpus_export_via_pandoc.R](R/corpus_export_via_pandoc.R))
+is a convenience wrapper chaining stage 1 → stage 2 through a temp dir.
+
+`csljson_to_zotero_upload()` ([R/csljson_to_zotero_upload.R](R/csljson_to_zotero_upload.R))
+is independent of the above: it POSTs CSL JSON files to the Zotero Web API
+(`/groups/{id}/items`) using `httr2`, with `progressr` progress. API key comes
+from `ZOTERO_API_KEY`.
+
+### Key design points in the corpus→CSL mapping
+
+#### Schema resilience (`.build_select_sql`)
+
+This is the central concern and the function most likely to need edits. OpenAlex
+parquet schemas vary across openalexPro versions and across record sources, so the
+mapping never assumes a column exists. The function:
+
+- Reads the live column set via `SELECT * FROM src LIMIT 0` (in `corpus_to_csljson`)
+  and passes `cols` in. Every field expression is chosen with a `has(name)` guard.
+- Falls back to a typed null (`CAST(NULL AS VARCHAR/INTEGER/BOOLEAN)`) for missing
+  scalar columns, or an empty list `[]` for missing list columns (issns, authors,
+  orcids, keywords). The downstream R code relies on these placeholders existing,
+  so the `SELECT` always emits the *same column names* regardless of input schema.
+- Handles **two venue layouts** that coexist in the wild: the legacy `host_venue`
+  struct and the newer `primary_location.source` struct. When both are present it
+  `COALESCE`s them (host_venue first); when only one is present it uses that. This
+  applies to venue name, venue type, publisher, `issn_l`, and `issn`. If you touch
+  venue handling, keep all five expressions consistent.
+- Uses DuckDB struct/list navigation directly in SQL: dotted paths like
+  `primary_location.source.display_name`, and `list_transform(authorships, x -> ...)`
+  to pluck author display names and ORCIDs into parallel arrays.
+- `url` is a `COALESCE` priority chain: `doi_url` → `open_access.oa_url` →
+  `primary_location.landing_page_url` → `id`.
+- ISBN is deliberately hard-coded to `CAST(NULL AS VARCHAR)` — the nested path is
+  unreliable, so ISBN is effectively disabled at the SQL layer (see the comment).
+
+**To add a mapped field you must touch two places**, and they share assumptions:
+add the `has()`-guarded expression with an `AS <name>` alias in `.build_select_sql()`,
+then read `rec$<name>` in `.map_record_to_csl()`. List-typed columns come back as
+list-columns, so they are accessed as `rec$<name>[[1]]` (note the double bracket).
+
+#### Row → CSL item (`.map_record_to_csl`)
+
+Converts one SQL result row to a CSL item list. Notable behaviors:
+
+- `%||%` is a custom coalesce defined here (not rlang's): it treats `NULL`,
+  length-0, `NA`, **and empty string** all as missing. Used pervasively for scalars.
+- Authors: names are split by `.split_name()` (handles `"Family, Given"` and
+  space-separated, taking the last token as family). ORCIDs ride along in a parallel
+  array, matched by index.
+- `issued` date-parts: prefers `publication_date` (split on `-` into year/month/day),
+  falling back to `year` alone.
+- An aggregated `note` field encodes OA status and citation count
+  (`OA:true; OA_status:gold; Citations:42`) since CSL has no native fields for these.
+- Keywords (from `concepts`) are collapsed to a single `"; "`-joined string.
+- ISBN is only emitted for `book`/`chapter`/`report` types — but since the SQL
+  always nulls ISBN, this branch is currently dead unless the SQL is changed too.
+
+#### CSL type inference (`.infer_csl_type`)
+
+Maps OpenAlex `type`/venue-type strings to CSL types via regex (e.g.
+`journal-article` → `article-journal`, `book-chapter` → `chapter`,
+`posted-content`/`preprint` → `manuscript`). It then applies signal-based overrides:
+presence of ISBN nudges toward `book`, ISSN toward `article-journal`, and a
+container + volume/issue toward `article-journal`. Default when nothing matches is
+`article-journal`.
+
+#### Sanitization (`.sanitize_csl_item`)
+
+Runs recursively over the assembled item before serialization: drops `NULL` and
+scalar `NA`, normalizes character data to UTF-8 (`iconv`), strips control chars and
+collapses whitespace, caps `abstract` at **700 chars**, and cleans author sublists
+(removes `NA`/`"NA"` ORCIDs, blanks `NA` given/family). The 700-char abstract cap
+here is separate from the 10000-char cap in the Pandoc stage (below).
+
+#### DOI normalization (`.normalize_doi`)
+
+Delegates to `openalexPro::extract_doi(..., normalize = TRUE, what = "doi")` to get
+a bare DOI, with a regex strip of the resolver prefix as fallback if that call
+errors. Do **not** reimplement DOI parsing here — a past bug (see `NEWS.md`) came
+from a local regex that missed lowercase suffixes; the fix was to delegate.
+
+#### Pandoc abstract guard (`.normalize_json_for_pandoc`)
+
+Before bibtex/biblatex/formatted conversion, this re-serializes the JSON (for
+consistent encoding) and, for directory/chunk conversion, drops any abstract longer
+than **10000 chars** — long abstracts can stall pandoc/LaTeX. Single-file bibtex
+conversion calls it with `drop_long_abstracts = FALSE`. The returned `$sanitized`
+flag is surfaced in the verbose `[sanitized]` log marker.
+
+## Tests
+
+`tests/testthat/test-001-corpus_csl_pandoc.R` runs the full pipeline against
+parquet fixtures in `tests/fixtures/corpus/` and compares output against golden
+fixtures (`corpus_csl/`, `corpus_bibtex/`, `corpus_biblatex/`, `corpus_docs/`).
+
+Important: exact-text comparison of Pandoc output is **Pandoc-version sensitive**,
+so those assertions are guarded by `skip_on_ci()`. JSON-structure comparison (the
+stage-1 output) is exact and always runs. Most tests `skip_if_not(rmarkdown::pandoc_available())`.
+If you change the CSL mapping, the golden JSON fixtures in `tests/fixtures/corpus_csl/`
+must be regenerated to match.
+
+## Docs / vignettes
+
+Vignettes are Quarto (`.qmd`, `VignetteBuilder: quarto`) in `vignettes/`. The
+`vignettes/` dir is in `.Rbuildignore` (excluded from the tarball to avoid R CMD
+check warnings). `pkgnet_report.qmd` is a generated package-network report.

From 52f4e923b013558b084aaf01eab92e12508ab713 Mon Sep 17 00:00:00 2001
From: Rainer M Krug <Rainer@krugs.de>
Date: Tue, 2 Jun 2026 17:18:51 +0200
Subject: [PATCH 6/7] test: raise coverage to 86% and fix broken
 corpus_export_via_pandoc()

Add tests for the one-shot export wrapper, Pandoc HTML/single-file
rendering, conversion error paths, and the Zotero uploader (validation
plus mocked success/failure/network-error paths). Package coverage rises
from ~70% to ~86%.

Writing the wrapper tests surfaced two bugs that left
corpus_export_via_pandoc() non-functional:
- it called corpus_to_csljson() with positional args that misaligned with
  the project_dir-first signature; now uses named args
- it pre-created the temp CSL dir, which corpus_to_csljson() then refused
  to overwrite; the path is now only reserved, not created

Also reflow over-long lines in the R source for lintr/goodpractice.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 R/corpus_export_via_pandoc.R             |   8 +-
 R/corpus_to_csljson.R                    |   7 +-
 R/csljson_convert_pandoc.R               | 102 ++++++++++++---
 R/csljson_to_zotero_upload.R             |   3 +-
 tests/testthat/test-002-export-formats.R | 160 +++++++++++++++++++++++
 tests/testthat/test-003-zotero-upload.R  | 112 ++++++++++++++++
 6 files changed, 369 insertions(+), 23 deletions(-)
 create mode 100644 tests/testthat/test-002-export-formats.R
 create mode 100644 tests/testthat/test-003-zotero-upload.R

diff --git a/R/corpus_export_via_pandoc.R b/R/corpus_export_via_pandoc.R
index 7d49d44..6586f97 100644
--- a/R/corpus_export_via_pandoc.R
+++ b/R/corpus_export_via_pandoc.R
@@ -8,7 +8,8 @@
 #' @param to Target format passed to Pandoc (e.g., `"bibtex"`, `"biblatex"`).
 #' @param csl_tmp Optional path for a temporary CSL JSON directory. If `NULL`, a
 #'   temporary directory is used and removed afterwards.
-#' @param ... Additional arguments passed to `corpus_to_csljson()` (e.g., `chunk_size`).
+#' @param ... Additional arguments passed to `corpus_to_csljson()`
+#'   (e.g., `chunk_size`).
 #'
 #' @return Invisibly returns `normalizePath(output)`.
 #'
@@ -23,11 +24,12 @@ corpus_export_via_pandoc <- function(
   to <- match.arg(to)
   remove_tmp <- FALSE
   if (is.null(csl_tmp)) {
+    # `corpus_to_csljson()` creates the directory itself and errors if it
+    # already exists, so only reserve the path here.
     csl_tmp <- tempfile(pattern = "csljson_")
-    dir.create(csl_tmp, recursive = TRUE, showWarnings = FALSE)
     remove_tmp <- TRUE
   }
-  corpus_to_csljson(corpus, csl_tmp, ...)
+  corpus_to_csljson(corpus = corpus, output = csl_tmp, ...)
   on.exit(
     if (remove_tmp) {
       try(unlink(csl_tmp, recursive = TRUE, force = TRUE), silent = TRUE)
diff --git a/R/corpus_to_csljson.R b/R/corpus_to_csljson.R
index 92c0a50..f0b8bfc 100644
--- a/R/corpus_to_csljson.R
+++ b/R/corpus_to_csljson.R
@@ -427,7 +427,12 @@ corpus_to_csljson <- function(
   }
   doi_raw <- as.character(doi_raw)
   tryCatch(
-    openalexPro::extract_doi(doi_raw, non_doi_value = "", normalize = TRUE, what = "doi"),
+    openalexPro::extract_doi(
+      doi_raw,
+      non_doi_value = "",
+      normalize = TRUE,
+      what = "doi"
+    ),
     error = function(e) sub("^(?i)https?://(dx\\.)?doi\\.org/", "", doi_raw)
   )
 }
diff --git a/R/csljson_convert_pandoc.R b/R/csljson_convert_pandoc.R
index 4a5dcf9..3783247 100644
--- a/R/csljson_convert_pandoc.R
+++ b/R/csljson_convert_pandoc.R
@@ -20,15 +20,17 @@
 #' @param to One of `"biblatex"`, `"bibtex"`, `"docx"`, `"markdown"`,
 #'   `"latex"`, `"html"`, or `"pdf"`.
 #' @param from Source format; defaults to "csljson".
-#' @param overwrite Logical; overwrite existing output file(s). Defaults to FALSE.
+#' @param overwrite Logical; overwrite existing output file(s). Defaults to
+#'   FALSE.
 #' @param verbose Print progress messages.
 #' @param references_csl Optional path to a CSL style file (e.g., apa.csl). If
 #'   NULL, Pandoc's default style is used.
 #' @param pdf_engine LaTeX engine used when `to = "pdf"`. Common values are
 #'   `"xelatex"` (default, good Unicode support), `"lualatex"`, or
 #'   `"pdflatex"`. Passed to Pandoc as `--pdf-engine`.
-#' @param pdf_mainfont Main text font name for PDF output (used with XeLaTeX/LuaLaTeX).
-#'   Sets Pandoc variable `mainfont` (e.g., `-V mainfont=Source Serif Pro`).
+#' @param pdf_mainfont Main text font name for PDF output (used with
+#'   XeLaTeX/LuaLaTeX). Sets Pandoc variable `mainfont` (e.g.,
+#'   `-V mainfont=Source Serif Pro`).
 #' @param pdf_sansfont Sans‑serif font name for PDF output. Sets Pandoc
 #'   variable `sansfont`.
 #' @param pdf_monofont Monospace font name for PDF output. Sets Pandoc
@@ -51,8 +53,8 @@
 #' - `pdf_cjk_mainfont`, `pdf_cjk_options` → `-V CJKmainfont=...`,
 #'   `-V CJKoptions=...`
 #'
-#' Use these to ensure Unicode coverage and consistent typography, especially for
-#' multilingual bibliographies.
+#' Use these to ensure Unicode coverage and consistent typography, especially
+#' for multilingual bibliographies.
 #'
 #' @md
 #'
@@ -155,7 +157,10 @@ csljson_convert_pandoc <- function(
 .check_pandoc_ready <- function() {
   if (!requireNamespace("rmarkdown", quietly = TRUE)) {
     stop(
-      "Package 'rmarkdown' is required for Pandoc conversion. Please install it."
+      paste(
+        "Package 'rmarkdown' is required for Pandoc conversion.",
+        "Please install it."
+      )
     )
   }
   if (!rmarkdown::pandoc_available()) {
@@ -170,7 +175,11 @@ csljson_convert_pandoc <- function(
 #' - Optionally removes very long abstracts (> cap) to avoid pandoc/LaTeX stalls
 #' Returns a list(path, sanitized_flag). Path may be a temp file.
 #' @noRd
-.normalize_json_for_pandoc <- function(path, drop_long_abstracts = TRUE, cap = 10000) {
+.normalize_json_for_pandoc <- function(
+  path,
+  drop_long_abstracts = TRUE,
+  cap = 10000
+) {
   in_use <- normalizePath(path, mustWork = TRUE)
   tmp_in <- tempfile(fileext = ".json")
   sanitized <- FALSE
@@ -180,7 +189,11 @@ csljson_convert_pandoc <- function(
       if (!is.null(j) && length(j) > 0 && is.null(names(j))) {
         for (kk in seq_along(j)) {
           it <- j[[kk]]
-          if (is.list(it) && !is.null(it$abstract) && is.character(it$abstract)) {
+          if (
+            is.list(it) &&
+              !is.null(it$abstract) &&
+              is.character(it$abstract)
+          ) {
             ab <- it$abstract
             if (length(ab) == 1L && nchar(ab, allowNA = FALSE) > cap) {
               it$abstract <- NULL
@@ -278,7 +291,11 @@ csljson_convert_pandoc <- function(
 #' @noRd
 .convert_dir_bib <- function(csljson_dir, output_dir, to, overwrite, verbose) {
   in_dir <- normalizePath(csljson_dir, mustWork = TRUE)
-  chunk_files <- list.files(in_dir, pattern = "^chunk_\\d+\\.json$", full.names = TRUE)
+  chunk_files <- list.files(
+    in_dir,
+    pattern = "^chunk_\\d+\\.json$",
+    full.names = TRUE
+  )
   if (!length(chunk_files)) {
     stop("No chunk_*.json files found in ", csljson_dir)
   }
@@ -290,10 +307,20 @@ csljson_convert_pandoc <- function(
     base <- sub("\\.json$", "", basename(in_f))
     out_f <- file.path(out_dir, paste0(base, ext))
     if (file.exists(out_f)) {
-      if (!overwrite) stop("Output file exists: ", out_f, ". Set overwrite = TRUE to replace.")
+      if (!overwrite) {
+        stop(
+          "Output file exists: ",
+          out_f,
+          ". Set overwrite = TRUE to replace."
+        )
+      }
       unlink(out_f)
     }
-    norm <- .normalize_json_for_pandoc(in_f, drop_long_abstracts = TRUE, cap = 10000)
+    norm <- .normalize_json_for_pandoc(
+      in_f,
+      drop_long_abstracts = TRUE,
+      cap = 10000
+    )
     if (isTRUE(verbose)) {
       message(
         "Converting with pandoc: ", basename(in_f), " -> ", basename(out_f),
@@ -328,7 +355,11 @@ csljson_convert_pandoc <- function(
   pdf_cjk_options
 ) {
   in_dir <- normalizePath(csljson_dir, mustWork = TRUE)
-  chunk_files <- list.files(in_dir, pattern = "^chunk_\\d+\\.json$", full.names = TRUE)
+  chunk_files <- list.files(
+    in_dir,
+    pattern = "^chunk_\\d+\\.json$",
+    full.names = TRUE
+  )
   if (!length(chunk_files)) {
     stop("No chunk_*.json files found in ", csljson_dir)
   }
@@ -345,14 +376,33 @@ csljson_convert_pandoc <- function(
   )
   md <- .write_refs_md()
   out_dir <- .ensure_dir(output_dir)
-  ext <- switch(to, docx = ".docx", markdown = ".md", latex = ".tex", html = ".html", pdf = ".pdf")
+  ext <- switch(
+    to,
+    docx = ".docx",
+    markdown = ".md",
+    latex = ".tex",
+    html = ".html",
+    pdf = ".pdf"
+  )
   refs_out <- file.path(out_dir, paste0("references", ext))
   if (file.exists(refs_out)) {
-    if (!overwrite) stop("Output file exists: ", refs_out, ". Set overwrite = TRUE to replace.")
+    if (!overwrite) {
+      stop(
+        "Output file exists: ",
+        refs_out,
+        ". Set overwrite = TRUE to replace."
+      )
+    }
     unlink(refs_out)
   }
   if (isTRUE(verbose)) {
-    message("Rendering formatted references: ", basename(refs_out), " (", to, ")")
+    message(
+      "Rendering formatted references: ",
+      basename(refs_out),
+      " (",
+      to,
+      ")"
+    )
   }
   rmarkdown::pandoc_convert(
     input = md,
@@ -388,7 +438,10 @@ csljson_convert_pandoc <- function(
   }
   if (isTRUE(verbose)) {
     message(
-      "Converting with pandoc: ", basename(input_file), " -> ", basename(out_file),
+      "Converting with pandoc: ",
+      basename(input_file),
+      " -> ",
+      basename(out_file),
       " (", to, ")"
     )
   }
@@ -432,7 +485,14 @@ csljson_convert_pandoc <- function(
   md <- .write_refs_md()
   refs_out <- output
   if (identical(tools::file_ext(refs_out), "")) {
-    ext <- switch(to, docx = ".docx", markdown = ".md", latex = ".tex", html = ".html", pdf = ".pdf")
+    ext <- switch(
+    to,
+    docx = ".docx",
+    markdown = ".md",
+    latex = ".tex",
+    html = ".html",
+    pdf = ".pdf"
+  )
     refs_out <- paste0(refs_out, ext)
   }
   rd <- dirname(refs_out)
@@ -445,7 +505,13 @@ csljson_convert_pandoc <- function(
     unlink(refs_out)
   }
   if (isTRUE(verbose)) {
-    message("Rendering formatted references: ", basename(refs_out), " (", to, ")")
+    message(
+      "Rendering formatted references: ",
+      basename(refs_out),
+      " (",
+      to,
+      ")"
+    )
   }
   rmarkdown::pandoc_convert(
     input = md,
diff --git a/R/csljson_to_zotero_upload.R b/R/csljson_to_zotero_upload.R
index 1067b5e..5f1913e 100644
--- a/R/csljson_to_zotero_upload.R
+++ b/R/csljson_to_zotero_upload.R
@@ -23,7 +23,8 @@
 #'   \item{file}{Path to the CSL-JSON file.}
 #'   \item{status_code}{HTTP status code returned by the Zotero API.}
 #'   \item{ok}{Logical; `TRUE` if `status_code` is in 200–299.}
-#'   \item{message}{Character; short message or error text (possibly truncated).}
+#'   \item{message}{Character; short message or error text (possibly
+#'     truncated).}
 #' }
 #' Invisibly returns this data.frame.
 #'
diff --git a/tests/testthat/test-002-export-formats.R b/tests/testthat/test-002-export-formats.R
new file mode 100644
index 0000000..dc59f4f
--- /dev/null
+++ b/tests/testthat/test-002-export-formats.R
@@ -0,0 +1,160 @@
+test_that("corpus_export_via_pandoc one-shot wrapper produces a .bib", {
+  skip_if_not_installed("rmarkdown")
+  skip_if_not(rmarkdown::pandoc_available(), "pandoc not available")
+
+  input_dir <- testthat::test_path("..", "fixtures", "corpus")
+  skip_if_not(dir.exists(input_dir), "fixtures corpus not available")
+
+  out_dir <- tempfile("export_")
+  dir.create(out_dir, recursive = TRUE, showWarnings = FALSE)
+  on.exit(unlink(out_dir, recursive = TRUE, force = TRUE), add = TRUE)
+  out_bib <- file.path(out_dir, "corpus.bib")
+
+  res <- corpus_export_via_pandoc(
+    corpus = input_dir,
+    output = out_bib,
+    to = "bibtex",
+    chunk_size = 100
+  )
+  expect_true(file.exists(res))
+  expect_true(file.info(res)$size > 0)
+})
+
+test_that("corpus_export_via_pandoc honours an explicit csl_tmp dir", {
+  skip_if_not_installed("rmarkdown")
+  skip_if_not(rmarkdown::pandoc_available(), "pandoc not available")
+
+  input_dir <- testthat::test_path("..", "fixtures", "corpus")
+  skip_if_not(dir.exists(input_dir), "fixtures corpus not available")
+
+  out_dir <- tempfile("export_")
+  dir.create(out_dir, recursive = TRUE, showWarnings = FALSE)
+  on.exit(unlink(out_dir, recursive = TRUE, force = TRUE), add = TRUE)
+  csl_tmp <- file.path(out_dir, "csl")
+
+  res <- corpus_export_via_pandoc(
+    corpus = input_dir,
+    output = file.path(out_dir, "out"),
+    to = "biblatex",
+    csl_tmp = csl_tmp,
+    chunk_size = 100
+  )
+  expect_true(file.exists(res))
+  # When csl_tmp is supplied it is not removed afterwards.
+  expect_true(dir.exists(csl_tmp))
+  expect_true(length(list.files(csl_tmp, pattern = "chunk_\\d+\\.json$")) >= 1)
+})
+
+test_that("csljson_convert_pandoc renders html for a directory of chunks", {
+  skip_if_not_installed("rmarkdown")
+  skip_if_not(rmarkdown::pandoc_available(), "pandoc not available")
+
+  input_dir <- testthat::test_path("..", "fixtures", "corpus")
+  skip_if_not(dir.exists(input_dir), "fixtures corpus not available")
+
+  csl_dir <- tempfile("csljson_")
+  dir.create(csl_dir, recursive = TRUE, showWarnings = FALSE)
+  on.exit(unlink(csl_dir, recursive = TRUE, force = TRUE), add = TRUE)
+  corpus_to_csljson(
+    corpus = input_dir,
+    output = csl_dir,
+    chunk_size = 100,
+    overwrite = TRUE,
+    verbose = FALSE
+  )
+
+  out_html_dir <- tempfile("html_")
+  on.exit(unlink(out_html_dir, recursive = TRUE, force = TRUE), add = TRUE)
+  html_path <- csljson_convert_pandoc(
+    csl_dir,
+    out_html_dir,
+    to = "html",
+    overwrite = TRUE,
+    verbose = FALSE
+  )
+  expect_true(file.exists(html_path))
+  expect_identical(basename(html_path), "references.html")
+  expect_true(file.info(html_path)$size > 0)
+})
+
+test_that("csljson_convert_pandoc renders a single file to markdown", {
+  skip_if_not_installed("rmarkdown")
+  skip_if_not(rmarkdown::pandoc_available(), "pandoc not available")
+
+  input_dir <- testthat::test_path("..", "fixtures", "corpus")
+  skip_if_not(dir.exists(input_dir), "fixtures corpus not available")
+
+  csl_dir <- tempfile("csljson_")
+  dir.create(csl_dir, recursive = TRUE, showWarnings = FALSE)
+  on.exit(unlink(csl_dir, recursive = TRUE, force = TRUE), add = TRUE)
+  corpus_to_csljson(
+    corpus = input_dir,
+    output = csl_dir,
+    chunk_size = 100,
+    overwrite = TRUE,
+    verbose = FALSE
+  )
+  chunk1 <- file.path(csl_dir, "chunk_1.json")
+
+  out_base <- tempfile("refs_")
+  on.exit(unlink(paste0(out_base, ".md"), force = TRUE), add = TRUE)
+  # No extension supplied -> the function appends ".md".
+  md_path <- csljson_convert_pandoc(
+    chunk1,
+    out_base,
+    to = "markdown",
+    overwrite = TRUE,
+    verbose = FALSE
+  )
+  expect_true(file.exists(md_path))
+  expect_identical(tools::file_ext(md_path), "md")
+  md_txt <- readLines(md_path, warn = FALSE, encoding = "UTF-8")
+  expect_false(any(grepl("^:{3,}", md_txt)))
+})
+
+test_that("csljson_convert_pandoc errors on missing input or existing out", {
+  skip_if_not_installed("rmarkdown")
+  skip_if_not(rmarkdown::pandoc_available(), "pandoc not available")
+
+  expect_error(
+    csljson_convert_pandoc("does-not-exist.json", tempfile(), to = "bibtex"),
+    "does not exist"
+  )
+
+  input_dir <- testthat::test_path("..", "fixtures", "corpus")
+  skip_if_not(dir.exists(input_dir), "fixtures corpus not available")
+  csl_dir <- tempfile("csljson_")
+  dir.create(csl_dir, recursive = TRUE, showWarnings = FALSE)
+  on.exit(unlink(csl_dir, recursive = TRUE, force = TRUE), add = TRUE)
+  corpus_to_csljson(
+    corpus = input_dir,
+    output = csl_dir,
+    chunk_size = 100,
+    overwrite = TRUE,
+    verbose = FALSE
+  )
+
+  out_dir <- tempfile("bib_")
+  dir.create(out_dir, recursive = TRUE, showWarnings = FALSE)
+  on.exit(unlink(out_dir, recursive = TRUE, force = TRUE), add = TRUE)
+  csljson_convert_pandoc(csl_dir, out_dir, to = "bibtex", verbose = FALSE)
+  # Second run without overwrite must refuse to clobber.
+  expect_error(
+    csljson_convert_pandoc(csl_dir, out_dir, to = "bibtex", verbose = FALSE),
+    "exists"
+  )
+})
+
+test_that("corpus_to_csljson rejects missing args and existing output", {
+  expect_error(corpus_to_csljson(corpus = NULL), "`corpus` must be provided")
+
+  input_dir <- testthat::test_path("..", "fixtures", "corpus")
+  skip_if_not(dir.exists(input_dir), "fixtures corpus not available")
+  out_dir <- tempfile("csljson_")
+  dir.create(out_dir, recursive = TRUE, showWarnings = FALSE)
+  on.exit(unlink(out_dir, recursive = TRUE, force = TRUE), add = TRUE)
+  expect_error(
+    corpus_to_csljson(corpus = input_dir, output = out_dir, overwrite = FALSE),
+    "exists"
+  )
+})
diff --git a/tests/testthat/test-003-zotero-upload.R b/tests/testthat/test-003-zotero-upload.R
new file mode 100644
index 0000000..7f133b4
--- /dev/null
+++ b/tests/testthat/test-003-zotero-upload.R
@@ -0,0 +1,112 @@
+test_that("csljson_to_zotero_upload errors on empty api key", {
+  expect_error(
+    csljson_to_zotero_upload(
+      files = "x.json",
+      group_id = "123",
+      api_key = ""
+    ),
+    "API key is empty"
+  )
+})
+
+test_that("csljson_to_zotero_upload errors when no files are supplied", {
+  empty_dir <- tempfile("empty_")
+  dir.create(empty_dir, recursive = TRUE, showWarnings = FALSE)
+  on.exit(unlink(empty_dir, recursive = TRUE, force = TRUE), add = TRUE)
+  expect_error(
+    csljson_to_zotero_upload(
+      files = empty_dir,
+      group_id = "123",
+      api_key = "dummy"
+    )
+  )
+})
+
+test_that("csljson_to_zotero_upload records missing files without network", {
+  res <- csljson_to_zotero_upload(
+    files = c("nope-1.json", "nope-2.json"),
+    group_id = "123",
+    api_key = "dummy",
+    pause = 0
+  )
+  expect_s3_class(res, "data.frame")
+  expect_equal(nrow(res), 2L)
+  expect_false(any(res$ok))
+  expect_true(all(is.na(res$status_code)))
+  expect_true(all(res$message == "File does not exist"))
+})
+
+test_that("csljson_to_zotero_upload success path (mocked httr2)", {
+  skip_if_not_installed("httr2")
+
+  d <- tempfile("csl_")
+  dir.create(d, recursive = TRUE, showWarnings = FALSE)
+  on.exit(unlink(d, recursive = TRUE, force = TRUE), add = TRUE)
+  writeLines('[{"type":"article","title":"a"}]', file.path(d, "a.json"))
+  writeLines('[{"type":"article","title":"b"}]', file.path(d, "b.json"))
+
+  testthat::local_mocked_bindings(
+    req_perform = function(req, ...) structure(list(), class = "fake_resp"),
+    resp_status = function(resp, ...) 200L,
+    .package = "httr2"
+  )
+
+  res <- csljson_to_zotero_upload(
+    files = d,
+    group_id = "123",
+    api_key = "dummy",
+    pause = 0
+  )
+  expect_equal(nrow(res), 2L)
+  expect_true(all(res$ok))
+  expect_true(all(res$status_code == 200L))
+})
+
+test_that("csljson_to_zotero_upload failure path captures body (mocked)", {
+  skip_if_not_installed("httr2")
+
+  f <- tempfile(fileext = ".json")
+  on.exit(unlink(f, force = TRUE), add = TRUE)
+  writeLines('[{"type":"article","title":"a"}]', f)
+
+  testthat::local_mocked_bindings(
+    req_perform = function(req, ...) structure(list(), class = "fake_resp"),
+    resp_status = function(resp, ...) 400L,
+    resp_body_string = function(resp, ...) "Bad Request: invalid item",
+    .package = "httr2"
+  )
+
+  res <- csljson_to_zotero_upload(
+    files = f,
+    group_id = "123",
+    api_key = "dummy",
+    pause = 0
+  )
+  expect_equal(nrow(res), 1L)
+  expect_false(res$ok)
+  expect_equal(res$status_code, 400L)
+  expect_match(res$message, "Bad Request")
+})
+
+test_that("csljson_to_zotero_upload captures request errors (mocked)", {
+  skip_if_not_installed("httr2")
+
+  f <- tempfile(fileext = ".json")
+  on.exit(unlink(f, force = TRUE), add = TRUE)
+  writeLines('[{"type":"article","title":"a"}]', f)
+
+  testthat::local_mocked_bindings(
+    req_perform = function(req, ...) stop("connection refused"),
+    .package = "httr2"
+  )
+
+  res <- csljson_to_zotero_upload(
+    files = f,
+    group_id = "123",
+    api_key = "dummy",
+    pause = 0
+  )
+  expect_false(res$ok)
+  expect_true(is.na(res$status_code))
+  expect_match(res$message, "connection refused")
+})

From 688b89300e5ade6d3849b5d8196f3364bd5a96f9 Mon Sep 17 00:00:00 2001
From: Rainer M Krug <Rainer@krugs.de>
Date: Tue, 2 Jun 2026 17:30:49 +0200
Subject: [PATCH 7/7] fix: corpus_export_via_pandoc() now produces a single
 file, not a directory

The wrapper passed the CSL JSON *directory* to csljson_convert_pandoc(),
which for directory input treats `output` as a directory and writes
chunk_*.bib into it. So `corpus_export_via_pandoc(output = "corpus.bib")`
created a *directory* named corpus.bib rather than the single file its
docs promise. The test only caught this on Windows, where a directory
reports size 0 (vs. non-zero on macOS/Linux).

Merge the chunked CSL JSON into a single array first, then run the
single-file conversion, and return the actual created path (the previous
`normalizePath(output)` was wrong when an extension was appended).

Tests now assert the result is a file, not a directory, on all platforms.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 NAMESPACE                                |  2 ++
 R/corpus_export_via_pandoc.R             | 29 +++++++++++++++++++++---
 man/corpus_export_via_pandoc.Rd          |  5 ++--
 man/csljson_convert_pandoc.Rd            | 12 ++++++----
 man/csljson_to_zotero_upload.Rd          |  3 ++-
 tests/testthat/test-002-export-formats.R |  6 +++++
 6 files changed, 46 insertions(+), 11 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index fe33ef2..aadaf67 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -15,5 +15,7 @@ importFrom(httr2,req_perform)
 importFrom(httr2,request)
 importFrom(httr2,resp_body_string)
 importFrom(httr2,resp_status)
+importFrom(jsonlite,read_json)
+importFrom(jsonlite,write_json)
 importFrom(progressr,progressor)
 importFrom(progressr,with_progress)
diff --git a/R/corpus_export_via_pandoc.R b/R/corpus_export_via_pandoc.R
index 6586f97..a14df2e 100644
--- a/R/corpus_export_via_pandoc.R
+++ b/R/corpus_export_via_pandoc.R
@@ -11,7 +11,9 @@
 #' @param ... Additional arguments passed to `corpus_to_csljson()`
 #'   (e.g., `chunk_size`).
 #'
-#' @return Invisibly returns `normalizePath(output)`.
+#' @return Invisibly returns the normalized path to the created file.
+#'
+#' @importFrom jsonlite read_json write_json
 #'
 #' @export
 corpus_export_via_pandoc <- function(
@@ -36,6 +38,27 @@ corpus_export_via_pandoc <- function(
     },
     add = TRUE
   )
-  csljson_convert_pandoc(csl_tmp, output, to = to)
-  invisible(normalizePath(output))
+
+  # Merge the chunked CSL JSON into a single array before conversion so that
+  # `output` is a single file (e.g. `corpus.bib`) rather than a directory of
+  # per-chunk files (which is what passing the directory to
+  # `csljson_convert_pandoc()` would produce).
+  chunk_files <- sort(list.files(
+    csl_tmp,
+    pattern = "^chunk_\\d+\\.json$",
+    full.names = TRUE
+  ))
+  if (!length(chunk_files)) {
+    stop("No CSL JSON chunks were produced from `corpus`.")
+  }
+  items <- unlist(
+    lapply(chunk_files, jsonlite::read_json),
+    recursive = FALSE
+  )
+  combined <- tempfile(fileext = ".json")
+  on.exit(try(unlink(combined, force = TRUE), silent = TRUE), add = TRUE)
+  jsonlite::write_json(items, combined, auto_unbox = TRUE, pretty = FALSE)
+
+  out_path <- csljson_convert_pandoc(combined, output, to = to)
+  invisible(out_path)
 }
diff --git a/man/corpus_export_via_pandoc.Rd b/man/corpus_export_via_pandoc.Rd
index 556e624..376475f 100644
--- a/man/corpus_export_via_pandoc.Rd
+++ b/man/corpus_export_via_pandoc.Rd
@@ -22,10 +22,11 @@ corpus_export_via_pandoc(
 \item{csl_tmp}{Optional path for a temporary CSL JSON directory. If `NULL`, a
 temporary directory is used and removed afterwards.}
 
-\item{...}{Additional arguments passed to `corpus_to_csljson()` (e.g., `chunk_size`).}
+\item{...}{Additional arguments passed to `corpus_to_csljson()`
+(e.g., `chunk_size`).}
 }
 \value{
-Invisibly returns `normalizePath(output)`.
+Invisibly returns the normalized path to the created file.
 }
 \description{
 Convenience wrapper that maps a corpus to CSL JSON, then converts it to the
diff --git a/man/csljson_convert_pandoc.Rd b/man/csljson_convert_pandoc.Rd
index f261d9a..a1bee37 100644
--- a/man/csljson_convert_pandoc.Rd
+++ b/man/csljson_convert_pandoc.Rd
@@ -35,7 +35,8 @@ output directory (dir input; file will be \verb{references.<ext>} within).}
 
 \item{from}{Source format; defaults to "csljson".}
 
-\item{overwrite}{Logical; overwrite existing output file(s). Defaults to FALSE.}
+\item{overwrite}{Logical; overwrite existing output file(s). Defaults to
+FALSE.}
 
 \item{verbose}{Print progress messages.}
 
@@ -46,8 +47,9 @@ NULL, Pandoc's default style is used.}
 \code{"xelatex"} (default, good Unicode support), \code{"lualatex"}, or
 \code{"pdflatex"}. Passed to Pandoc as \code{--pdf-engine}.}
 
-\item{pdf_mainfont}{Main text font name for PDF output (used with XeLaTeX/LuaLaTeX).
-Sets Pandoc variable \code{mainfont} (e.g., \verb{-V mainfont=Source Serif Pro}).}
+\item{pdf_mainfont}{Main text font name for PDF output (used with
+XeLaTeX/LuaLaTeX). Sets Pandoc variable \code{mainfont} (e.g.,
+\verb{-V mainfont=Source Serif Pro}).}
 
 \item{pdf_sansfont}{Sans‑serif font name for PDF output. Sets Pandoc
 variable \code{sansfont}.}
@@ -91,6 +93,6 @@ Pandoc command line flags and variables as follows:
 \verb{-V CJKoptions=...}
 }
 
-Use these to ensure Unicode coverage and consistent typography, especially for
-multilingual bibliographies.
+Use these to ensure Unicode coverage and consistent typography, especially
+for multilingual bibliographies.
 }
diff --git a/man/csljson_to_zotero_upload.Rd b/man/csljson_to_zotero_upload.Rd
index c328523..5bfedc3 100644
--- a/man/csljson_to_zotero_upload.Rd
+++ b/man/csljson_to_zotero_upload.Rd
@@ -30,7 +30,8 @@ A data.frame with one row per file and columns:
 \item{file}{Path to the CSL-JSON file.}
 \item{status_code}{HTTP status code returned by the Zotero API.}
 \item{ok}{Logical; \code{TRUE} if \code{status_code} is in 200–299.}
-\item{message}{Character; short message or error text (possibly truncated).}
+\item{message}{Character; short message or error text (possibly
+truncated).}
 }
 Invisibly returns this data.frame.
 }
diff --git a/tests/testthat/test-002-export-formats.R b/tests/testthat/test-002-export-formats.R
index dc59f4f..4da2f35 100644
--- a/tests/testthat/test-002-export-formats.R
+++ b/tests/testthat/test-002-export-formats.R
@@ -16,6 +16,8 @@ test_that("corpus_export_via_pandoc one-shot wrapper produces a .bib", {
     to = "bibtex",
     chunk_size = 100
   )
+  # Must be a single file, not a directory of per-chunk .bib files.
+  expect_false(dir.exists(res))
   expect_true(file.exists(res))
   expect_true(file.info(res)$size > 0)
 })
@@ -39,7 +41,11 @@ test_that("corpus_export_via_pandoc honours an explicit csl_tmp dir", {
     csl_tmp = csl_tmp,
     chunk_size = 100
   )
+  # Output extension is added when missing; result is a single file.
+  expect_false(dir.exists(res))
   expect_true(file.exists(res))
+  expect_identical(tools::file_ext(res), "bib")
+  expect_true(file.info(res)$size > 0)
   # When csl_tmp is supplied it is not removed afterwards.
   expect_true(dir.exists(csl_tmp))
   expect_true(length(list.files(csl_tmp, pattern = "chunk_\\d+\\.json$")) >= 1)