From 65acf6a33f305a8bb1eaed311f467364474fe7b8 Mon Sep 17 00:00:00 2001 From: Eryk Kullikowski Date: Fri, 5 Dec 2025 15:12:41 +0100 Subject: [PATCH 01/14] feat: add configurable file upload options and related tests --- CHANGELOG.md | 5 ++ src/files/index.ts | 61 +++++++++++++++++-- src/files/infra/clients/DirectUploadClient.ts | 32 +++++++--- test/unit/files/DirectUploadClient.test.ts | 58 ++++++++++++++++-- test/unit/files/FilesConfig.test.ts | 59 ++++++++++++++++++ 5 files changed, 199 insertions(+), 16 deletions(-) create mode 100644 test/unit/files/FilesConfig.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 56926bf8..5c40e84a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,10 +13,15 @@ This changelog follows the principles of [Keep a Changelog](https://keepachangel - New Use Case: [Create a Dataset Template](./docs/useCases.md#create-a-dataset-template) under Collections. - New Use Case: [Update Terms of Access](./docs/useCases.md#update-terms-of-access). +- Files: Added `FilesConfig` class for configuring file upload behavior at runtime, including: + - `useS3Tagging`: Option to disable S3 object tagging (`x-amz-tagging` header) for S3-compatible storage that doesn't support tagging. Default: `true`. + - `maxMultipartRetries`: Configurable maximum retries for multipart upload parts. Default: `5`. + - `fileUploadTimeoutMs`: Configurable timeout for file upload operations. Default: `60000`. ### Changed - Add pagination query parameters to Dataset Version Summeries and File Version Summaries use cases +- Files: `DirectUploadClient` constructor now accepts a `DirectUploadClientConfig` object instead of a plain number for `maxMultipartRetries`. ### Fixed diff --git a/src/files/index.ts b/src/files/index.ts index a9d38386..485b2238 100644 --- a/src/files/index.ts +++ b/src/files/index.ts @@ -9,7 +9,7 @@ import { GetFile } from './domain/useCases/GetFile' import { GetFileCitation } from './domain/useCases/GetFileCitation' import { GetFileAndDataset } from './domain/useCases/GetFileAndDataset' import { UploadFile } from './domain/useCases/UploadFile' -import { DirectUploadClient } from './infra/clients/DirectUploadClient' +import { DirectUploadClient, DirectUploadClientConfig } from './infra/clients/DirectUploadClient' import { AddUploadedFilesToDataset } from './domain/useCases/AddUploadedFilesToDataset' import { DeleteFile } from './domain/useCases/DeleteFile' import { ReplaceFile } from './domain/useCases/ReplaceFile' @@ -20,8 +20,40 @@ import { UpdateFileCategories } from './domain/useCases/UpdateFileCategories' import { GetFileVersionSummaries } from './domain/useCases/GetFileVersionSummaries' import { IsFileDeleted } from './domain/useCases/IsFileDeleted' +/** + * Configuration for file upload operations. + * Use FilesConfig.init() to configure upload behavior before using uploadFile. + */ +class FilesConfig { + private static uploadConfig: DirectUploadClientConfig = {} + + /** + * Initialize file upload configuration. + * @param config - Configuration options for file uploads + * @param config.useS3Tagging - Whether to include S3 tagging header (x-amz-tagging: dv-state=temp). + * Set to false if your S3 implementation doesn't support object tagging. Default: true + * @param config.maxMultipartRetries - Maximum number of retries for multipart upload parts. Default: 5 + * @param config.fileUploadTimeoutMs - Timeout in milliseconds for file upload operations. Default: 60000 + */ + static init(config: DirectUploadClientConfig) { + this.uploadConfig = config + } + + static getConfig(): DirectUploadClientConfig { + return this.uploadConfig + } +} + const filesRepository = new FilesRepository() -const directUploadClient = new DirectUploadClient(filesRepository) +// DirectUploadClient is created lazily to allow configuration before first use +let directUploadClientInstance: DirectUploadClient | null = null + +const getDirectUploadClient = (): DirectUploadClient => { + if (!directUploadClientInstance) { + directUploadClientInstance = new DirectUploadClient(filesRepository, FilesConfig.getConfig()) + } + return directUploadClientInstance +} const getDatasetFiles = new GetDatasetFiles(filesRepository) const getDatasetFileCounts = new GetDatasetFileCounts(filesRepository) @@ -32,7 +64,6 @@ const getDatasetFilesTotalDownloadSize = new GetDatasetFilesTotalDownloadSize(fi const getFile = new GetFile(filesRepository) const getFileAndDataset = new GetFileAndDataset(filesRepository) const getFileCitation = new GetFileCitation(filesRepository) -const uploadFile = new UploadFile(directUploadClient) const addUploadedFilesToDataset = new AddUploadedFilesToDataset(filesRepository) const deleteFile = new DeleteFile(filesRepository) const replaceFile = new ReplaceFile(filesRepository) @@ -43,6 +74,27 @@ const updateFileCategories = new UpdateFileCategories(filesRepository) const getFileVersionSummaries = new GetFileVersionSummaries(filesRepository) const isFileDeleted = new IsFileDeleted(filesRepository) +// uploadFile is created lazily to respect FilesConfig settings +let uploadFileInstance: UploadFile | null = null + +/** + * Uploads a file to remote storage and returns the storage identifier. + * Respects FilesConfig settings (call FilesConfig.init() before first upload if you need custom config). + */ +const uploadFile = { + execute: ( + datasetId: number | string, + file: File, + progress: (now: number) => void, + abortController: AbortController + ): Promise => { + if (!uploadFileInstance) { + uploadFileInstance = new UploadFile(getDirectUploadClient()) + } + return uploadFileInstance.execute(datasetId, file, progress, abortController) + } +} + export { getDatasetFiles, getFileDownloadCount, @@ -62,7 +114,8 @@ export { updateFileCategories, replaceFile, getFileVersionSummaries, - isFileDeleted + isFileDeleted, + FilesConfig } export { FileModel as File, FileEmbargo, FileChecksum } from './domain/models/FileModel' diff --git a/src/files/infra/clients/DirectUploadClient.ts b/src/files/infra/clients/DirectUploadClient.ts index 8dfd1a9b..c2120d73 100644 --- a/src/files/infra/clients/DirectUploadClient.ts +++ b/src/files/infra/clients/DirectUploadClient.ts @@ -15,15 +15,27 @@ import { MultipartAbortError } from './errors/MultipartAbortError' import { FileUploadCancelError } from './errors/FileUploadCancelError' import { ApiConstants } from '../../../core/infra/repositories/ApiConstants' +export interface DirectUploadClientConfig { + /** Maximum number of retries for multipart upload parts. Default: 5 */ + maxMultipartRetries?: number + /** Whether to include S3 tagging header (x-amz-tagging: dv-state=temp). Default: true + * Set to false if your S3 implementation doesn't support object tagging. */ + useS3Tagging?: boolean + /** Timeout in milliseconds for file upload operations. Default: 60000 */ + fileUploadTimeoutMs?: number +} + export class DirectUploadClient implements IDirectUploadClient { private filesRepository: IFilesRepository private maxMultipartRetries: number + private useS3Tagging: boolean + private readonly fileUploadTimeoutMs: number - private readonly fileUploadTimeoutMs: number = 60_000 - - constructor(filesRepository: IFilesRepository, maxMultipartRetries = 5) { + constructor(filesRepository: IFilesRepository, config: DirectUploadClientConfig = {}) { this.filesRepository = filesRepository - this.maxMultipartRetries = maxMultipartRetries + this.maxMultipartRetries = config.maxMultipartRetries ?? 5 + this.useS3Tagging = config.useS3Tagging ?? true + this.fileUploadTimeoutMs = config.fileUploadTimeoutMs ?? 60_000 } public async uploadFile( @@ -59,11 +71,15 @@ export class DirectUploadClient implements IDirectUploadClient { ): Promise { try { const arrayBuffer = await file.arrayBuffer() + const headers: Record = { + 'Content-Type': 'application/octet-stream' + } + // Only add S3 tagging header if enabled (some S3 implementations don't support it) + if (this.useS3Tagging) { + headers['x-amz-tagging'] = 'dv-state=temp' + } await axios.put(destination.urls[0], arrayBuffer, { - headers: { - 'Content-Type': 'application/octet-stream', - 'x-amz-tagging': 'dv-state=temp' - }, + headers, timeout: this.fileUploadTimeoutMs, signal: abortController.signal, onUploadProgress: (progressEvent) => diff --git a/test/unit/files/DirectUploadClient.test.ts b/test/unit/files/DirectUploadClient.test.ts index 38f1921a..c0646cca 100644 --- a/test/unit/files/DirectUploadClient.test.ts +++ b/test/unit/files/DirectUploadClient.test.ts @@ -87,6 +87,56 @@ describe('uploadFile', () => { expect(actual).toEqual(testDestination.storageId) }) + + test('should include S3 tagging header by default', async () => { + const filesRepositoryStub: IFilesRepository = {} as IFilesRepository + const testDestination: FileUploadDestination = createSingleFileUploadDestinationModel() + filesRepositoryStub.getFileUploadDestination = jest.fn().mockResolvedValue(testDestination) + + const axiosPutSpy = jest.spyOn(axios, 'put').mockResolvedValue(undefined) + + const sut = new DirectUploadClient(filesRepositoryStub) + + const progressMock = jest.fn() + const abortController = new AbortController() + + await sut.uploadFile(1, testFile, progressMock, abortController) + + expect(axiosPutSpy).toHaveBeenCalledWith( + testDestination.urls[0], + expect.anything(), + expect.objectContaining({ + headers: expect.objectContaining({ + 'x-amz-tagging': 'dv-state=temp' + }) + }) + ) + }) + + test('should not include S3 tagging header when useS3Tagging is false', async () => { + const filesRepositoryStub: IFilesRepository = {} as IFilesRepository + const testDestination: FileUploadDestination = createSingleFileUploadDestinationModel() + filesRepositoryStub.getFileUploadDestination = jest.fn().mockResolvedValue(testDestination) + + const axiosPutSpy = jest.spyOn(axios, 'put').mockResolvedValue(undefined) + + const sut = new DirectUploadClient(filesRepositoryStub, { useS3Tagging: false }) + + const progressMock = jest.fn() + const abortController = new AbortController() + + await sut.uploadFile(1, testFile, progressMock, abortController) + + expect(axiosPutSpy).toHaveBeenCalledWith( + testDestination.urls[0], + expect.anything(), + expect.objectContaining({ + headers: expect.not.objectContaining({ + 'x-amz-tagging': expect.anything() + }) + }) + ) + }) }) describe('Multiple parts file', () => { @@ -113,7 +163,7 @@ describe('uploadFile', () => { jest.spyOn(axios, 'delete').mockResolvedValue(undefined) jest.spyOn(axios, 'put').mockRejectedValue('error') - const sut = new DirectUploadClient(filesRepositoryStub, 1) + const sut = new DirectUploadClient(filesRepositoryStub, { maxMultipartRetries: 1 }) const progressMock = jest.fn() const abortController = new AbortController() @@ -143,7 +193,7 @@ describe('uploadFile', () => { const progressMock = jest.fn() const abortController = new AbortController() - const sut = new DirectUploadClient(filesRepositoryStub, 1) + const sut = new DirectUploadClient(filesRepositoryStub, { maxMultipartRetries: 1 }) await expect(sut.uploadFile(1, testFile, progressMock, abortController)).rejects.toThrow( MultipartAbortError @@ -165,7 +215,7 @@ describe('uploadFile', () => { const progressMock = jest.fn() const abortController = new AbortController() - const sut = new DirectUploadClient(filesRepositoryStub, 1) + const sut = new DirectUploadClient(filesRepositoryStub, { maxMultipartRetries: 1 }) await expect(sut.uploadFile(1, testFile, progressMock, abortController)).rejects.toThrow( MultipartCompletionError ) @@ -187,7 +237,7 @@ describe('uploadFile', () => { .mockResolvedValueOnce(successfulPartResponse) .mockResolvedValueOnce(undefined) - const sut = new DirectUploadClient(filesRepositoryStub, 1) + const sut = new DirectUploadClient(filesRepositoryStub, { maxMultipartRetries: 1 }) const progressMock = jest.fn() const abortController = new AbortController() diff --git a/test/unit/files/FilesConfig.test.ts b/test/unit/files/FilesConfig.test.ts new file mode 100644 index 00000000..c2ae7487 --- /dev/null +++ b/test/unit/files/FilesConfig.test.ts @@ -0,0 +1,59 @@ +import { FilesConfig } from '../../../src/files' + +describe('FilesConfig', () => { + beforeEach(() => { + // Reset config before each test + FilesConfig.init({}) + }) + + describe('init', () => { + test('should set useS3Tagging configuration', () => { + FilesConfig.init({ useS3Tagging: false }) + + const config = FilesConfig.getConfig() + expect(config.useS3Tagging).toBe(false) + }) + + test('should set maxMultipartRetries configuration', () => { + FilesConfig.init({ maxMultipartRetries: 10 }) + + const config = FilesConfig.getConfig() + expect(config.maxMultipartRetries).toBe(10) + }) + + test('should set fileUploadTimeoutMs configuration', () => { + FilesConfig.init({ fileUploadTimeoutMs: 120000 }) + + const config = FilesConfig.getConfig() + expect(config.fileUploadTimeoutMs).toBe(120000) + }) + + test('should set multiple configuration options', () => { + FilesConfig.init({ + useS3Tagging: false, + maxMultipartRetries: 3, + fileUploadTimeoutMs: 30000 + }) + + const config = FilesConfig.getConfig() + expect(config.useS3Tagging).toBe(false) + expect(config.maxMultipartRetries).toBe(3) + expect(config.fileUploadTimeoutMs).toBe(30000) + }) + }) + + describe('getConfig', () => { + test('should return empty config by default', () => { + const config = FilesConfig.getConfig() + expect(config).toEqual({}) + }) + + test('should return previously set config', () => { + const expectedConfig = { useS3Tagging: true, maxMultipartRetries: 5 } + FilesConfig.init(expectedConfig) + + const config = FilesConfig.getConfig() + expect(config).toEqual(expectedConfig) + }) + }) +}) From 3c2aaf81d4115dd1b288828f9c451f2b0ecae161 Mon Sep 17 00:00:00 2001 From: ErykKul Date: Mon, 4 May 2026 13:37:52 +0200 Subject: [PATCH 02/14] Remove FilesConfig/useS3Tagging; drive tagging from server response The x-amz-tagging header value is now taken from destination.tagging, which is populated by the server when tagging is enabled. This removes the client-side FilesConfig/useS3Tagging flag that duplicated the backend DISABLE_S3_TAGGING JVM setting and would drift silently. - FileUploadDestination: add tagging?: string - fileUploadDestinationsTransformers: pass tagging through both paths - DirectUploadClient: use destination.tagging as header value; remove useS3Tagging field and config option - files/index.ts: remove FilesConfig class and lazy-init pattern; uploadFile is now a plain UploadFile instance - package.json: scope prettier/eslint scripts to ./src to avoid permission errors scanning test/environment/docker-dev-volumes --- .prettierignore | 1 + package.json | 6 +- .../domain/models/FileUploadDestination.ts | 1 + src/files/index.ts | 61 ++----------------- src/files/infra/clients/DirectUploadClient.ts | 10 +-- .../fileUploadDestinationsTransformers.ts | 8 ++- 6 files changed, 17 insertions(+), 70 deletions(-) create mode 100644 .prettierignore diff --git a/.prettierignore b/.prettierignore new file mode 100644 index 00000000..ebda3570 --- /dev/null +++ b/.prettierignore @@ -0,0 +1 @@ +test/environment/docker-dev-volumes/ diff --git a/package.json b/package.json index 06a0f896..f258477a 100644 --- a/package.json +++ b/package.json @@ -16,10 +16,10 @@ "test:coverage": "jest --coverage -c jest.config.ts", "test:coverage:check": "jest --coverage --ci --config jest.config.ts", "lint": "npm run lint:eslint && npm run lint:prettier", - "lint:fix": "eslint --fix --ext .ts ./src --ignore-path .gitignore .", - "lint:eslint": "eslint --ignore-path .gitignore .", + "lint:fix": "eslint --fix --ext .ts ./src --ignore-path .gitignore ./src", + "lint:eslint": "eslint --ignore-path .gitignore ./src", "lint:prettier": "prettier --check '**/*.(yml|json|md)'", - "format": "prettier --write './**/*.{js,ts,md,json,yml,md}' --config ./.prettierrc", + "format": "prettier --write './src/**/*.{js,ts,md,json,yml,md}' --config ./.prettierrc", "typecheck": "tsc --noEmit", "prepare": "husky" }, diff --git a/src/files/domain/models/FileUploadDestination.ts b/src/files/domain/models/FileUploadDestination.ts index 4bb42c2a..4d6edf13 100644 --- a/src/files/domain/models/FileUploadDestination.ts +++ b/src/files/domain/models/FileUploadDestination.ts @@ -4,4 +4,5 @@ export interface FileUploadDestination { partSize: number abortEndpoint?: string completeEndpoint?: string + tagging?: string } diff --git a/src/files/index.ts b/src/files/index.ts index 485b2238..adfde16f 100644 --- a/src/files/index.ts +++ b/src/files/index.ts @@ -9,7 +9,7 @@ import { GetFile } from './domain/useCases/GetFile' import { GetFileCitation } from './domain/useCases/GetFileCitation' import { GetFileAndDataset } from './domain/useCases/GetFileAndDataset' import { UploadFile } from './domain/useCases/UploadFile' -import { DirectUploadClient, DirectUploadClientConfig } from './infra/clients/DirectUploadClient' +import { DirectUploadClient } from './infra/clients/DirectUploadClient' import { AddUploadedFilesToDataset } from './domain/useCases/AddUploadedFilesToDataset' import { DeleteFile } from './domain/useCases/DeleteFile' import { ReplaceFile } from './domain/useCases/ReplaceFile' @@ -20,40 +20,8 @@ import { UpdateFileCategories } from './domain/useCases/UpdateFileCategories' import { GetFileVersionSummaries } from './domain/useCases/GetFileVersionSummaries' import { IsFileDeleted } from './domain/useCases/IsFileDeleted' -/** - * Configuration for file upload operations. - * Use FilesConfig.init() to configure upload behavior before using uploadFile. - */ -class FilesConfig { - private static uploadConfig: DirectUploadClientConfig = {} - - /** - * Initialize file upload configuration. - * @param config - Configuration options for file uploads - * @param config.useS3Tagging - Whether to include S3 tagging header (x-amz-tagging: dv-state=temp). - * Set to false if your S3 implementation doesn't support object tagging. Default: true - * @param config.maxMultipartRetries - Maximum number of retries for multipart upload parts. Default: 5 - * @param config.fileUploadTimeoutMs - Timeout in milliseconds for file upload operations. Default: 60000 - */ - static init(config: DirectUploadClientConfig) { - this.uploadConfig = config - } - - static getConfig(): DirectUploadClientConfig { - return this.uploadConfig - } -} - const filesRepository = new FilesRepository() -// DirectUploadClient is created lazily to allow configuration before first use -let directUploadClientInstance: DirectUploadClient | null = null - -const getDirectUploadClient = (): DirectUploadClient => { - if (!directUploadClientInstance) { - directUploadClientInstance = new DirectUploadClient(filesRepository, FilesConfig.getConfig()) - } - return directUploadClientInstance -} +const directUploadClient = new DirectUploadClient(filesRepository) const getDatasetFiles = new GetDatasetFiles(filesRepository) const getDatasetFileCounts = new GetDatasetFileCounts(filesRepository) @@ -73,27 +41,7 @@ const updateFileTabularTags = new UpdateFileTabularTags(filesRepository) const updateFileCategories = new UpdateFileCategories(filesRepository) const getFileVersionSummaries = new GetFileVersionSummaries(filesRepository) const isFileDeleted = new IsFileDeleted(filesRepository) - -// uploadFile is created lazily to respect FilesConfig settings -let uploadFileInstance: UploadFile | null = null - -/** - * Uploads a file to remote storage and returns the storage identifier. - * Respects FilesConfig settings (call FilesConfig.init() before first upload if you need custom config). - */ -const uploadFile = { - execute: ( - datasetId: number | string, - file: File, - progress: (now: number) => void, - abortController: AbortController - ): Promise => { - if (!uploadFileInstance) { - uploadFileInstance = new UploadFile(getDirectUploadClient()) - } - return uploadFileInstance.execute(datasetId, file, progress, abortController) - } -} +const uploadFile = new UploadFile(directUploadClient) export { getDatasetFiles, @@ -114,8 +62,7 @@ export { updateFileCategories, replaceFile, getFileVersionSummaries, - isFileDeleted, - FilesConfig + isFileDeleted } export { FileModel as File, FileEmbargo, FileChecksum } from './domain/models/FileModel' diff --git a/src/files/infra/clients/DirectUploadClient.ts b/src/files/infra/clients/DirectUploadClient.ts index c2120d73..4e867da8 100644 --- a/src/files/infra/clients/DirectUploadClient.ts +++ b/src/files/infra/clients/DirectUploadClient.ts @@ -18,9 +18,6 @@ import { ApiConstants } from '../../../core/infra/repositories/ApiConstants' export interface DirectUploadClientConfig { /** Maximum number of retries for multipart upload parts. Default: 5 */ maxMultipartRetries?: number - /** Whether to include S3 tagging header (x-amz-tagging: dv-state=temp). Default: true - * Set to false if your S3 implementation doesn't support object tagging. */ - useS3Tagging?: boolean /** Timeout in milliseconds for file upload operations. Default: 60000 */ fileUploadTimeoutMs?: number } @@ -28,13 +25,11 @@ export interface DirectUploadClientConfig { export class DirectUploadClient implements IDirectUploadClient { private filesRepository: IFilesRepository private maxMultipartRetries: number - private useS3Tagging: boolean private readonly fileUploadTimeoutMs: number constructor(filesRepository: IFilesRepository, config: DirectUploadClientConfig = {}) { this.filesRepository = filesRepository this.maxMultipartRetries = config.maxMultipartRetries ?? 5 - this.useS3Tagging = config.useS3Tagging ?? true this.fileUploadTimeoutMs = config.fileUploadTimeoutMs ?? 60_000 } @@ -74,9 +69,8 @@ export class DirectUploadClient implements IDirectUploadClient { const headers: Record = { 'Content-Type': 'application/octet-stream' } - // Only add S3 tagging header if enabled (some S3 implementations don't support it) - if (this.useS3Tagging) { - headers['x-amz-tagging'] = 'dv-state=temp' + if (destination.tagging !== undefined) { + headers['x-amz-tagging'] = destination.tagging } await axios.put(destination.urls[0], arrayBuffer, { headers, diff --git a/src/files/infra/repositories/transformers/fileUploadDestinationsTransformers.ts b/src/files/infra/repositories/transformers/fileUploadDestinationsTransformers.ts index 55a35757..f1a195dc 100644 --- a/src/files/infra/repositories/transformers/fileUploadDestinationsTransformers.ts +++ b/src/files/infra/repositories/transformers/fileUploadDestinationsTransformers.ts @@ -5,6 +5,7 @@ export interface FileSingleUploadDestinationPayload { url: string partSize: number storageIdentifier: string + tagging?: string } export interface FileMultipartUploadDestinationPayload { @@ -13,6 +14,7 @@ export interface FileMultipartUploadDestinationPayload { storageIdentifier: string complete?: string abort?: string + tagging?: string } export const transformUploadDestinationsResponseToUploadDestination = ( @@ -24,7 +26,8 @@ export const transformUploadDestinationsResponseToUploadDestination = ( return { urls: [fileUploadDestinationsPayload.url], partSize: fileUploadDestinationsPayload.partSize, - storageId: fileUploadDestinationsPayload.storageIdentifier + storageId: fileUploadDestinationsPayload.storageIdentifier, + tagging: fileUploadDestinationsPayload.tagging } } else { return transformMultipartUploadDestinationsPayloadToMultipartUploadDestinationModel( @@ -45,6 +48,7 @@ export const transformMultipartUploadDestinationsPayloadToMultipartUploadDestina partSize: fileUploadDestinationsPayload.partSize, storageId: fileUploadDestinationsPayload.storageIdentifier, abortEndpoint: fileUploadDestinationsPayload.abort?.substring(4), - completeEndpoint: fileUploadDestinationsPayload.complete?.substring(4) + completeEndpoint: fileUploadDestinationsPayload.complete?.substring(4), + tagging: fileUploadDestinationsPayload.tagging } } From 4fbcffe683930205da4ca30c5808d97574bcfa62 Mon Sep 17 00:00:00 2001 From: ErykKul Date: Mon, 4 May 2026 19:42:21 +0200 Subject: [PATCH 03/14] Polish configurable upload PR --- .gitignore | 3 +- .prettierignore | 3 +- CHANGELOG.md | 6 +- package.json | 8 +-- src/files/infra/clients/DirectUploadClient.ts | 9 +++ .../GetMyDataCollectionItems.test.ts | 2 +- .../GetDatasetAvailableDatasetTypes.test.ts | 2 +- test/unit/files/DirectUploadClient.test.ts | 34 +++++++++-- test/unit/files/FilesConfig.test.ts | 59 ------------------- test/unit/files/FilesRepository.test.ts | 42 +++++++++++++ 10 files changed, 93 insertions(+), 75 deletions(-) delete mode 100644 test/unit/files/FilesConfig.test.ts diff --git a/.gitignore b/.gitignore index e8782206..c1927a44 100644 --- a/.gitignore +++ b/.gitignore @@ -9,10 +9,11 @@ node_modules # unit tests coverage +test/environment/docker-dev-volumes # macOS .DS_Store # ignore npm lock package-json.lock -.npmrc \ No newline at end of file +.npmrc diff --git a/.prettierignore b/.prettierignore index ebda3570..44a5b55f 100644 --- a/.prettierignore +++ b/.prettierignore @@ -1 +1,2 @@ -test/environment/docker-dev-volumes/ +test/environment/docker-dev-volumes +test/environment/docker-dev-volumes/** diff --git a/CHANGELOG.md b/CHANGELOG.md index c7e242b2..7877f2f1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,10 +28,8 @@ This changelog follows the principles of [Keep a Changelog](https://keepachangel - Templates: Added `setTemplateAsDefault` use case and repository method to support Dataverse endpoint `POST /dataverses/{id}/template/default/{templateId}`. - Templates: Added `unsetTemplateAsDefault` use case and repository method to support Dataverse endpoint `DELETE /dataverses/{id}/template/default`. - New Use Case: [Update Terms of Access](./docs/useCases.md#update-terms-of-access). -- Files: Added `FilesConfig` class for configuring file upload behavior at runtime, including: - - `useS3Tagging`: Option to disable S3 object tagging (`x-amz-tagging` header) for S3-compatible storage that doesn't support tagging. Default: `true`. - - `maxMultipartRetries`: Configurable maximum retries for multipart upload parts. Default: `5`. - - `fileUploadTimeoutMs`: Configurable timeout for file upload operations. Default: `60000`. +- Files: Direct uploads now forward the `tagging` value returned by the upload destination response as the `x-amz-tagging` header for single-part uploads. +- Files: Added a `DirectUploadClientConfig` object for configuring multipart upload retries and upload timeout. - Guestbooks: Added use cases and repository support for guestbook creation, listing, and enabling/disabling. - Guestbooks: Added dataset-level guestbook assignment and removal support via `assignDatasetGuestbook` (`PUT /api/datasets/{identifier}/guestbook`) and `removeDatasetGuestbook` (`DELETE /api/datasets/{identifier}/guestbook`). - Datasets/Guestbooks: Added `guestbookId` in `getDataset` responses. diff --git a/package.json b/package.json index f258477a..832cc8ce 100644 --- a/package.json +++ b/package.json @@ -16,10 +16,10 @@ "test:coverage": "jest --coverage -c jest.config.ts", "test:coverage:check": "jest --coverage --ci --config jest.config.ts", "lint": "npm run lint:eslint && npm run lint:prettier", - "lint:fix": "eslint --fix --ext .ts ./src --ignore-path .gitignore ./src", - "lint:eslint": "eslint --ignore-path .gitignore ./src", - "lint:prettier": "prettier --check '**/*.(yml|json|md)'", - "format": "prettier --write './src/**/*.{js,ts,md,json,yml,md}' --config ./.prettierrc", + "lint:fix": "eslint --fix --ext .ts --ignore-path .gitignore .", + "lint:eslint": "eslint --ext .ts --ignore-path .gitignore .", + "lint:prettier": "prettier --check '*.{yml,json,md}' 'docs/**/*.{yml,json,md}' 'test/environment/docker-compose.yml'", + "format": "prettier --write './src/**/*.{js,ts,md,json,yml}' '*.{yml,json,md}' 'docs/**/*.{yml,json,md}' 'test/environment/docker-compose.yml' --config ./.prettierrc", "typecheck": "tsc --noEmit", "prepare": "husky" }, diff --git a/src/files/infra/clients/DirectUploadClient.ts b/src/files/infra/clients/DirectUploadClient.ts index 4e867da8..ab03bc6e 100644 --- a/src/files/infra/clients/DirectUploadClient.ts +++ b/src/files/infra/clients/DirectUploadClient.ts @@ -99,12 +99,17 @@ export class DirectUploadClient implements IDirectUploadClient { const eTags: Record = {} const maxRetries = this.maxMultipartRetries const limitConcurrency = pLimit(1) + let uploadFailed = false const uploadPart = async ( destinationUrl: string, index: number, retries = 0 ): Promise => { + if (uploadFailed) { + return + } + const offset = index * partMaxSize const partSize = Math.min(partMaxSize, file.size - offset) const fileSlice = file.slice(offset, offset + partSize) @@ -125,6 +130,8 @@ export class DirectUploadClient implements IDirectUploadClient { eTags[`${index + 1}`] = eTag } catch (error) { if (axios.isCancel(error)) { + uploadFailed = true + limitConcurrency.clearQueue() await this.abortMultipartUpload(file.name, datasetId, destination.abortEndpoint as string) throw new FileUploadCancelError(file.name, datasetId) } @@ -133,6 +140,8 @@ export class DirectUploadClient implements IDirectUploadClient { await new Promise((resolve) => setTimeout(resolve, backoffDelay)) await uploadPart(destinationUrl, index, retries + 1) } else { + uploadFailed = true + limitConcurrency.clearQueue() await this.abortMultipartUpload(file.name, datasetId, destination.abortEndpoint as string) const errorMessage = diff --git a/test/functional/collections/GetMyDataCollectionItems.test.ts b/test/functional/collections/GetMyDataCollectionItems.test.ts index 5a9c43ec..bac06677 100644 --- a/test/functional/collections/GetMyDataCollectionItems.test.ts +++ b/test/functional/collections/GetMyDataCollectionItems.test.ts @@ -76,7 +76,7 @@ describe('execute', () => { [PublicationStatus.Deaccessioned], undefined, undefined, - undefined + testCollectionAlias ) throw new Error('Use case should throw an error') } catch (error) { diff --git a/test/functional/datasets/GetDatasetAvailableDatasetTypes.test.ts b/test/functional/datasets/GetDatasetAvailableDatasetTypes.test.ts index 6f03c714..17a98a42 100644 --- a/test/functional/datasets/GetDatasetAvailableDatasetTypes.test.ts +++ b/test/functional/datasets/GetDatasetAvailableDatasetTypes.test.ts @@ -25,7 +25,7 @@ describe('getDatasetAvailableDatasetTypes', () => { displayName: 'Dataset' } ] - expect(actualDatasetTypes).toEqual(expectedDatasetTypes) + expect(actualDatasetTypes).toEqual(expect.arrayContaining(expectedDatasetTypes)) }) }) }) diff --git a/test/unit/files/DirectUploadClient.test.ts b/test/unit/files/DirectUploadClient.test.ts index c0646cca..f4e54137 100644 --- a/test/unit/files/DirectUploadClient.test.ts +++ b/test/unit/files/DirectUploadClient.test.ts @@ -88,9 +88,12 @@ describe('uploadFile', () => { expect(actual).toEqual(testDestination.storageId) }) - test('should include S3 tagging header by default', async () => { + test('should include S3 tagging header when upload destination provides tagging', async () => { const filesRepositoryStub: IFilesRepository = {} as IFilesRepository - const testDestination: FileUploadDestination = createSingleFileUploadDestinationModel() + const testDestination: FileUploadDestination = { + ...createSingleFileUploadDestinationModel(), + tagging: 'dv-state=temp' + } filesRepositoryStub.getFileUploadDestination = jest.fn().mockResolvedValue(testDestination) const axiosPutSpy = jest.spyOn(axios, 'put').mockResolvedValue(undefined) @@ -113,14 +116,14 @@ describe('uploadFile', () => { ) }) - test('should not include S3 tagging header when useS3Tagging is false', async () => { + test('should not include S3 tagging header when upload destination omits tagging', async () => { const filesRepositoryStub: IFilesRepository = {} as IFilesRepository const testDestination: FileUploadDestination = createSingleFileUploadDestinationModel() filesRepositoryStub.getFileUploadDestination = jest.fn().mockResolvedValue(testDestination) const axiosPutSpy = jest.spyOn(axios, 'put').mockResolvedValue(undefined) - const sut = new DirectUploadClient(filesRepositoryStub, { useS3Tagging: false }) + const sut = new DirectUploadClient(filesRepositoryStub) const progressMock = jest.fn() const abortController = new AbortController() @@ -137,6 +140,29 @@ describe('uploadFile', () => { }) ) }) + + test('should use configured file upload timeout', async () => { + const filesRepositoryStub: IFilesRepository = {} as IFilesRepository + const testDestination: FileUploadDestination = createSingleFileUploadDestinationModel() + filesRepositoryStub.getFileUploadDestination = jest.fn().mockResolvedValue(testDestination) + + const axiosPutSpy = jest.spyOn(axios, 'put').mockResolvedValue(undefined) + + const sut = new DirectUploadClient(filesRepositoryStub, { fileUploadTimeoutMs: 30_000 }) + + const progressMock = jest.fn() + const abortController = new AbortController() + + await sut.uploadFile(1, testFile, progressMock, abortController) + + expect(axiosPutSpy).toHaveBeenCalledWith( + testDestination.urls[0], + expect.anything(), + expect.objectContaining({ + timeout: 30_000 + }) + ) + }) }) describe('Multiple parts file', () => { diff --git a/test/unit/files/FilesConfig.test.ts b/test/unit/files/FilesConfig.test.ts deleted file mode 100644 index c2ae7487..00000000 --- a/test/unit/files/FilesConfig.test.ts +++ /dev/null @@ -1,59 +0,0 @@ -import { FilesConfig } from '../../../src/files' - -describe('FilesConfig', () => { - beforeEach(() => { - // Reset config before each test - FilesConfig.init({}) - }) - - describe('init', () => { - test('should set useS3Tagging configuration', () => { - FilesConfig.init({ useS3Tagging: false }) - - const config = FilesConfig.getConfig() - expect(config.useS3Tagging).toBe(false) - }) - - test('should set maxMultipartRetries configuration', () => { - FilesConfig.init({ maxMultipartRetries: 10 }) - - const config = FilesConfig.getConfig() - expect(config.maxMultipartRetries).toBe(10) - }) - - test('should set fileUploadTimeoutMs configuration', () => { - FilesConfig.init({ fileUploadTimeoutMs: 120000 }) - - const config = FilesConfig.getConfig() - expect(config.fileUploadTimeoutMs).toBe(120000) - }) - - test('should set multiple configuration options', () => { - FilesConfig.init({ - useS3Tagging: false, - maxMultipartRetries: 3, - fileUploadTimeoutMs: 30000 - }) - - const config = FilesConfig.getConfig() - expect(config.useS3Tagging).toBe(false) - expect(config.maxMultipartRetries).toBe(3) - expect(config.fileUploadTimeoutMs).toBe(30000) - }) - }) - - describe('getConfig', () => { - test('should return empty config by default', () => { - const config = FilesConfig.getConfig() - expect(config).toEqual({}) - }) - - test('should return previously set config', () => { - const expectedConfig = { useS3Tagging: true, maxMultipartRetries: 5 } - FilesConfig.init(expectedConfig) - - const config = FilesConfig.getConfig() - expect(config).toEqual(expectedConfig) - }) - }) -}) diff --git a/test/unit/files/FilesRepository.test.ts b/test/unit/files/FilesRepository.test.ts index 4a9df202..d10f50b3 100644 --- a/test/unit/files/FilesRepository.test.ts +++ b/test/unit/files/FilesRepository.test.ts @@ -148,6 +148,48 @@ describe('FilesRepository', () => { expect(actual).toEqual(testMultipleFileUploadDestination) }) + test('should return destination with tagging when single response includes tagging', async () => { + const tagging = 'dv-state=temp' + jest.spyOn(axios, 'get').mockResolvedValue({ + data: { + status: 'OK', + data: { + ...createSingleFileUploadDestinationPayload(), + tagging + } + } + }) + jest.spyOn(fs, 'statSync').mockReturnValue({ size: testFileSize } as fs.Stats) + + const actual = await sut.getFileUploadDestination(testDatasetId, singlepartFile) + + expect(actual).toEqual({ + ...testSingleFileUploadDestination, + tagging + }) + }) + + test('should return destination with tagging when multipart response includes tagging', async () => { + const tagging = 'dv-state=temp' + jest.spyOn(axios, 'get').mockResolvedValue({ + data: { + status: 'OK', + data: { + ...createMultipartFileUploadDestinationPayload(), + tagging + } + } + }) + jest.spyOn(fs, 'statSync').mockReturnValue({ size: testFileSize } as fs.Stats) + + const actual = await sut.getFileUploadDestination(testDatasetId, multipartFile) + + expect(actual).toEqual({ + ...testMultipleFileUploadDestination, + tagging + }) + }) + test('should return error on repository read error', async () => { jest.spyOn(axios, 'get').mockRejectedValue(TestConstants.TEST_ERROR_RESPONSE) jest.spyOn(fs, 'statSync').mockReturnValue({ size: testFileSize } as fs.Stats) From c60e2b70b658f4604774ac142f7048b8411524f3 Mon Sep 17 00:00:00 2001 From: ErykKul Date: Mon, 4 May 2026 19:46:44 +0200 Subject: [PATCH 04/14] Trim upload PR scope --- .gitignore | 3 +-- .prettierignore | 2 -- package.json | 8 ++++---- src/files/index.ts | 2 +- .../collections/GetMyDataCollectionItems.test.ts | 2 +- .../datasets/GetDatasetAvailableDatasetTypes.test.ts | 2 +- 6 files changed, 8 insertions(+), 11 deletions(-) delete mode 100644 .prettierignore diff --git a/.gitignore b/.gitignore index c1927a44..e8782206 100644 --- a/.gitignore +++ b/.gitignore @@ -9,11 +9,10 @@ node_modules # unit tests coverage -test/environment/docker-dev-volumes # macOS .DS_Store # ignore npm lock package-json.lock -.npmrc +.npmrc \ No newline at end of file diff --git a/.prettierignore b/.prettierignore deleted file mode 100644 index 44a5b55f..00000000 --- a/.prettierignore +++ /dev/null @@ -1,2 +0,0 @@ -test/environment/docker-dev-volumes -test/environment/docker-dev-volumes/** diff --git a/package.json b/package.json index 832cc8ce..06a0f896 100644 --- a/package.json +++ b/package.json @@ -16,10 +16,10 @@ "test:coverage": "jest --coverage -c jest.config.ts", "test:coverage:check": "jest --coverage --ci --config jest.config.ts", "lint": "npm run lint:eslint && npm run lint:prettier", - "lint:fix": "eslint --fix --ext .ts --ignore-path .gitignore .", - "lint:eslint": "eslint --ext .ts --ignore-path .gitignore .", - "lint:prettier": "prettier --check '*.{yml,json,md}' 'docs/**/*.{yml,json,md}' 'test/environment/docker-compose.yml'", - "format": "prettier --write './src/**/*.{js,ts,md,json,yml}' '*.{yml,json,md}' 'docs/**/*.{yml,json,md}' 'test/environment/docker-compose.yml' --config ./.prettierrc", + "lint:fix": "eslint --fix --ext .ts ./src --ignore-path .gitignore .", + "lint:eslint": "eslint --ignore-path .gitignore .", + "lint:prettier": "prettier --check '**/*.(yml|json|md)'", + "format": "prettier --write './**/*.{js,ts,md,json,yml,md}' --config ./.prettierrc", "typecheck": "tsc --noEmit", "prepare": "husky" }, diff --git a/src/files/index.ts b/src/files/index.ts index adfde16f..a9d38386 100644 --- a/src/files/index.ts +++ b/src/files/index.ts @@ -32,6 +32,7 @@ const getDatasetFilesTotalDownloadSize = new GetDatasetFilesTotalDownloadSize(fi const getFile = new GetFile(filesRepository) const getFileAndDataset = new GetFileAndDataset(filesRepository) const getFileCitation = new GetFileCitation(filesRepository) +const uploadFile = new UploadFile(directUploadClient) const addUploadedFilesToDataset = new AddUploadedFilesToDataset(filesRepository) const deleteFile = new DeleteFile(filesRepository) const replaceFile = new ReplaceFile(filesRepository) @@ -41,7 +42,6 @@ const updateFileTabularTags = new UpdateFileTabularTags(filesRepository) const updateFileCategories = new UpdateFileCategories(filesRepository) const getFileVersionSummaries = new GetFileVersionSummaries(filesRepository) const isFileDeleted = new IsFileDeleted(filesRepository) -const uploadFile = new UploadFile(directUploadClient) export { getDatasetFiles, diff --git a/test/functional/collections/GetMyDataCollectionItems.test.ts b/test/functional/collections/GetMyDataCollectionItems.test.ts index bac06677..5a9c43ec 100644 --- a/test/functional/collections/GetMyDataCollectionItems.test.ts +++ b/test/functional/collections/GetMyDataCollectionItems.test.ts @@ -76,7 +76,7 @@ describe('execute', () => { [PublicationStatus.Deaccessioned], undefined, undefined, - testCollectionAlias + undefined ) throw new Error('Use case should throw an error') } catch (error) { diff --git a/test/functional/datasets/GetDatasetAvailableDatasetTypes.test.ts b/test/functional/datasets/GetDatasetAvailableDatasetTypes.test.ts index 17a98a42..6f03c714 100644 --- a/test/functional/datasets/GetDatasetAvailableDatasetTypes.test.ts +++ b/test/functional/datasets/GetDatasetAvailableDatasetTypes.test.ts @@ -25,7 +25,7 @@ describe('getDatasetAvailableDatasetTypes', () => { displayName: 'Dataset' } ] - expect(actualDatasetTypes).toEqual(expect.arrayContaining(expectedDatasetTypes)) + expect(actualDatasetTypes).toEqual(expectedDatasetTypes) }) }) }) From 8c1e158f9fb0eba12f7dcc97c30e31e30f02d668 Mon Sep 17 00:00:00 2001 From: ErykKul Date: Mon, 4 May 2026 19:48:10 +0200 Subject: [PATCH 05/14] typo fix --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7877f2f1..0b521c4c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,7 +40,7 @@ This changelog follows the principles of [Keep a Changelog](https://keepachangel ### Changed -- Add pagination query parameters to Dataset Version Summeries and File Version Summaries use cases. +- Add pagination query parameters to Dataset Version Summaries and File Version Summaries use cases. - Templates: Rename `CreateDatasetTemplateDTO` to `CreateTemplateDTO`. - Templates: Rename `createDatasetTemplate` repository method to `createTemplate`. - Templates: Rename `getDatasetTemplates` repository method to `getTemplatesByCollectionId`. From 0dccf9934f0854bff942ce91ab57137b0d9ebdb5 Mon Sep 17 00:00:00 2001 From: ErykKul Date: Mon, 4 May 2026 19:58:17 +0200 Subject: [PATCH 06/14] Isolate set default template functional test --- .../templates/SetTemplateAsDefault.test.ts | 47 ++++++++++++++----- 1 file changed, 34 insertions(+), 13 deletions(-) diff --git a/test/functional/templates/SetTemplateAsDefault.test.ts b/test/functional/templates/SetTemplateAsDefault.test.ts index 65458940..40006305 100644 --- a/test/functional/templates/SetTemplateAsDefault.test.ts +++ b/test/functional/templates/SetTemplateAsDefault.test.ts @@ -9,10 +9,12 @@ import { import { CreateTemplateDTO } from '../../../src/templates/domain/dtos/CreateTemplateDTO' import { MetadataFieldTypeClass } from '../../../src/metadataBlocks/domain/models/MetadataBlock' import { deleteDatasetTemplateViaApi } from '../../testHelpers/datasets/datasetTemplatesHelper' +import { + createCollectionViaApi, + deleteCollectionViaApi +} from '../../testHelpers/collections/collectionHelper' describe('SetTemplateAsDefault.execute', () => { - const collectionIdOrAlias = ':root' - beforeEach(async () => { ApiConfig.init( TestConstants.TEST_API_URL, @@ -23,6 +25,7 @@ describe('SetTemplateAsDefault.execute', () => { test('should set the default template for a collection', async () => { const templateName = `TestDefaultTemplate-${Date.now()}` + const collectionIdOrAlias = `setTemplateDefault${Date.now()}` const templateDto: CreateTemplateDTO = { name: templateName, isDefault: false, @@ -55,21 +58,39 @@ describe('SetTemplateAsDefault.execute', () => { ] } - await createTemplate.execute(templateDto, collectionIdOrAlias) - const templatesAfterCreate = await getTemplatesByCollectionId.execute(collectionIdOrAlias) - const createdTemplate = templatesAfterCreate.find((template) => template.name === templateName) + let createdTemplateId: number | undefined + let collectionCreated = false - if (!createdTemplate) { - throw new Error('Created template was not found in collection templates.') - } + try { + await createCollectionViaApi(collectionIdOrAlias) + collectionCreated = true + await createTemplate.execute(templateDto, collectionIdOrAlias) + const templatesAfterCreate = await getTemplatesByCollectionId.execute(collectionIdOrAlias) + const createdTemplate = templatesAfterCreate.find( + (template) => template.name === templateName + ) - await setTemplateAsDefault.execute(createdTemplate.id, collectionIdOrAlias) + if (!createdTemplate) { + throw new Error('Created template was not found in collection templates.') + } - const templatesAfterSet = await getTemplatesByCollectionId.execute(collectionIdOrAlias) - const updatedTemplate = templatesAfterSet.find((template) => template.id === createdTemplate.id) + createdTemplateId = createdTemplate.id - expect(updatedTemplate?.isDefault).toBe(true) + await setTemplateAsDefault.execute(createdTemplate.id, collectionIdOrAlias) - await deleteDatasetTemplateViaApi(createdTemplate.id) + const templatesAfterSet = await getTemplatesByCollectionId.execute(collectionIdOrAlias) + const updatedTemplate = templatesAfterSet.find( + (template) => template.id === createdTemplate.id + ) + + expect(updatedTemplate?.isDefault).toBe(true) + } finally { + if (createdTemplateId !== undefined) { + await deleteDatasetTemplateViaApi(createdTemplateId) + } + if (collectionCreated) { + await deleteCollectionViaApi(collectionIdOrAlias) + } + } }) }) From f29ad91a4d63f382a6c8c10c00ce98f32bfd66ca Mon Sep 17 00:00:00 2001 From: ErykKul Date: Mon, 4 May 2026 20:31:20 +0200 Subject: [PATCH 07/14] Revert "Isolate set default template functional test" This reverts commit 5a9f204774235ae5eff5c35749001c25c9bba595. --- .../templates/SetTemplateAsDefault.test.ts | 47 +++++-------------- 1 file changed, 13 insertions(+), 34 deletions(-) diff --git a/test/functional/templates/SetTemplateAsDefault.test.ts b/test/functional/templates/SetTemplateAsDefault.test.ts index 40006305..65458940 100644 --- a/test/functional/templates/SetTemplateAsDefault.test.ts +++ b/test/functional/templates/SetTemplateAsDefault.test.ts @@ -9,12 +9,10 @@ import { import { CreateTemplateDTO } from '../../../src/templates/domain/dtos/CreateTemplateDTO' import { MetadataFieldTypeClass } from '../../../src/metadataBlocks/domain/models/MetadataBlock' import { deleteDatasetTemplateViaApi } from '../../testHelpers/datasets/datasetTemplatesHelper' -import { - createCollectionViaApi, - deleteCollectionViaApi -} from '../../testHelpers/collections/collectionHelper' describe('SetTemplateAsDefault.execute', () => { + const collectionIdOrAlias = ':root' + beforeEach(async () => { ApiConfig.init( TestConstants.TEST_API_URL, @@ -25,7 +23,6 @@ describe('SetTemplateAsDefault.execute', () => { test('should set the default template for a collection', async () => { const templateName = `TestDefaultTemplate-${Date.now()}` - const collectionIdOrAlias = `setTemplateDefault${Date.now()}` const templateDto: CreateTemplateDTO = { name: templateName, isDefault: false, @@ -58,39 +55,21 @@ describe('SetTemplateAsDefault.execute', () => { ] } - let createdTemplateId: number | undefined - let collectionCreated = false - - try { - await createCollectionViaApi(collectionIdOrAlias) - collectionCreated = true - await createTemplate.execute(templateDto, collectionIdOrAlias) - const templatesAfterCreate = await getTemplatesByCollectionId.execute(collectionIdOrAlias) - const createdTemplate = templatesAfterCreate.find( - (template) => template.name === templateName - ) + await createTemplate.execute(templateDto, collectionIdOrAlias) + const templatesAfterCreate = await getTemplatesByCollectionId.execute(collectionIdOrAlias) + const createdTemplate = templatesAfterCreate.find((template) => template.name === templateName) - if (!createdTemplate) { - throw new Error('Created template was not found in collection templates.') - } + if (!createdTemplate) { + throw new Error('Created template was not found in collection templates.') + } - createdTemplateId = createdTemplate.id + await setTemplateAsDefault.execute(createdTemplate.id, collectionIdOrAlias) - await setTemplateAsDefault.execute(createdTemplate.id, collectionIdOrAlias) + const templatesAfterSet = await getTemplatesByCollectionId.execute(collectionIdOrAlias) + const updatedTemplate = templatesAfterSet.find((template) => template.id === createdTemplate.id) - const templatesAfterSet = await getTemplatesByCollectionId.execute(collectionIdOrAlias) - const updatedTemplate = templatesAfterSet.find( - (template) => template.id === createdTemplate.id - ) + expect(updatedTemplate?.isDefault).toBe(true) - expect(updatedTemplate?.isDefault).toBe(true) - } finally { - if (createdTemplateId !== undefined) { - await deleteDatasetTemplateViaApi(createdTemplateId) - } - if (collectionCreated) { - await deleteCollectionViaApi(collectionIdOrAlias) - } - } + await deleteDatasetTemplateViaApi(createdTemplate.id) }) }) From 07bf0826df29930a5b5bb3d0551e4ae869e7fa20 Mon Sep 17 00:00:00 2001 From: ErykKul Date: Tue, 5 May 2026 11:55:54 +0200 Subject: [PATCH 08/14] Scope format and lint scripts to ./src MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `npm run format` and `npm run lint:eslint` traverse the whole repo by default, which fails on systems where `test/environment/docker-dev-volumes` contains directories owned by container users (e.g. solr/data) and is not readable by the developer's UID. The pre-commit hook then aborts. Narrowing the globs to `./src` keeps the formatters and linters running on what we actually care about — application source — and lets the pre-commit hook succeed regardless of how container volumes are provisioned. --- package.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/package.json b/package.json index 06a0f896..99e80ef7 100644 --- a/package.json +++ b/package.json @@ -16,10 +16,10 @@ "test:coverage": "jest --coverage -c jest.config.ts", "test:coverage:check": "jest --coverage --ci --config jest.config.ts", "lint": "npm run lint:eslint && npm run lint:prettier", - "lint:fix": "eslint --fix --ext .ts ./src --ignore-path .gitignore .", - "lint:eslint": "eslint --ignore-path .gitignore .", + "lint:fix": "eslint --fix --ext .ts --ignore-path .gitignore ./src", + "lint:eslint": "eslint --ignore-path .gitignore ./src", "lint:prettier": "prettier --check '**/*.(yml|json|md)'", - "format": "prettier --write './**/*.{js,ts,md,json,yml,md}' --config ./.prettierrc", + "format": "prettier --write './src/**/*.{js,ts,md,json,yml,md}' --config ./.prettierrc", "typecheck": "tsc --noEmit", "prepare": "husky" }, From 571d62542017697002b99cf3c98f1f4d01ce2b28 Mon Sep 17 00:00:00 2001 From: ErykKul Date: Tue, 5 May 2026 11:56:29 +0200 Subject: [PATCH 09/14] Re-export DataverseApiAuthMechanism from public core surface Adds DataverseApiAuthMechanism to the existing core/index.ts re-export alongside ApiConfig so consumers don't have to deep-import it from `@iqss/dataverse-client-javascript/dist/core/infra/repositories/ApiConfig`. This is the SDK side of a small two-line change agreed with the dataverse-frontend reusable-components track: once a prerelease ships this export, the standalone uploader can import the enum from the package's public surface. Until then, consumers can keep the deep import. Non-breaking additive change. --- src/core/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/index.ts b/src/core/index.ts index e7cb65f8..55dc5bc7 100644 --- a/src/core/index.ts +++ b/src/core/index.ts @@ -1,5 +1,5 @@ export { ReadError } from './domain/repositories/ReadError' export { WriteError } from './domain/repositories/WriteError' -export { ApiConfig } from './infra/repositories/ApiConfig' +export { ApiConfig, DataverseApiAuthMechanism } from './infra/repositories/ApiConfig' export { DvObjectOwnerNode, DvObjectType } from './domain/models/DvObjectOwnerNode' export { PublicationStatus } from './domain/models/PublicationStatus' From 397d662834355cf9ba352b2fd968cbda7f96099a Mon Sep 17 00:00:00 2001 From: ErykKul Date: Tue, 5 May 2026 11:58:15 +0200 Subject: [PATCH 10/14] Add tree node listing SDK helpers (#6691) New use cases backing the paginated dataset version tree endpoint: GET /api/datasets/{id}/versions/{versionId}/tree - listDatasetTreeNode: single-page lookup. Accepts path, limit, cursor, include (all/folders/files), order (NameAZ/NameZA), includeDeaccessioned, originals. - iterateDatasetTreeNode: async generator that walks the cursor chain so callers can consume one folder's children without driving pagination by hand. Wire format mirrors the backend response 1:1 (folder items carry optional `counts`, file items add id/size/contentType/access/ checksum/downloadUrl). Order/include parsing falls back to defaults on unknown values for forward-compat. Includes Jest unit tests for the use cases and the transformer. --- CHANGELOG.md | 3 + src/datasets/domain/models/FileTreeNode.ts | 37 ++++++ src/datasets/domain/models/FileTreePage.ts | 22 ++++ .../repositories/IDatasetsRepository.ts | 14 +++ .../domain/useCases/IterateDatasetTreeNode.ts | 30 +++++ .../domain/useCases/ListDatasetTreeNode.ts | 20 +++ src/datasets/index.ts | 18 ++- .../infra/repositories/DatasetsRepository.ts | 37 +++++- .../transformers/fileTreeTransformers.ts | 104 ++++++++++++++++ .../datasets/IterateDatasetTreeNode.test.ts | 76 ++++++++++++ .../unit/datasets/ListDatasetTreeNode.test.ts | 54 ++++++++ .../datasets/fileTreeTransformers.test.ts | 117 ++++++++++++++++++ 12 files changed, 530 insertions(+), 2 deletions(-) create mode 100644 src/datasets/domain/models/FileTreeNode.ts create mode 100644 src/datasets/domain/models/FileTreePage.ts create mode 100644 src/datasets/domain/useCases/IterateDatasetTreeNode.ts create mode 100644 src/datasets/domain/useCases/ListDatasetTreeNode.ts create mode 100644 src/datasets/infra/repositories/transformers/fileTreeTransformers.ts create mode 100644 test/unit/datasets/IterateDatasetTreeNode.test.ts create mode 100644 test/unit/datasets/ListDatasetTreeNode.test.ts create mode 100644 test/unit/datasets/fileTreeTransformers.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b521c4c..4a1470ee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,9 @@ This changelog follows the principles of [Keep a Changelog](https://keepachangel ### Added +- Datasets: `listDatasetTreeNode` use case and repository method backing `GET /datasets/{id}/versions/{versionId}/tree` for paginated, lazy listing of folders/files inside a dataset version. Returns `FileTreePage` with folder-first ordering, opaque keyset cursors, and per-file `downloadUrl`. +- Datasets: `iterateDatasetTreeNode` async generator that walks the cursor chain so callers can consume one folder's children without driving pagination by hand. + ### Changed ### Fixed diff --git a/src/datasets/domain/models/FileTreeNode.ts b/src/datasets/domain/models/FileTreeNode.ts new file mode 100644 index 00000000..0f7a379a --- /dev/null +++ b/src/datasets/domain/models/FileTreeNode.ts @@ -0,0 +1,37 @@ +export enum FileTreeNodeType { + FOLDER = 'folder', + FILE = 'file' +} + +export interface FileTreeFolderNode { + type: FileTreeNodeType.FOLDER + name: string + path: string + counts?: { + files: number + folders: number + } +} + +export interface FileTreeFileNode { + type: FileTreeNodeType.FILE + id: number + name: string + path: string + size: number + contentType?: string + access?: 'public' | 'restricted' | 'embargoed' + checksum?: { + type: string + value: string + } + downloadUrl: string +} + +export type FileTreeNode = FileTreeFolderNode | FileTreeFileNode + +export const isFileTreeFolderNode = (node: FileTreeNode): node is FileTreeFolderNode => + node.type === FileTreeNodeType.FOLDER + +export const isFileTreeFileNode = (node: FileTreeNode): node is FileTreeFileNode => + node.type === FileTreeNodeType.FILE diff --git a/src/datasets/domain/models/FileTreePage.ts b/src/datasets/domain/models/FileTreePage.ts new file mode 100644 index 00000000..77c139e3 --- /dev/null +++ b/src/datasets/domain/models/FileTreePage.ts @@ -0,0 +1,22 @@ +import { FileTreeNode } from './FileTreeNode' + +export enum FileTreeInclude { + ALL = 'all', + FOLDERS = 'folders', + FILES = 'files' +} + +export enum FileTreeOrder { + NAME_AZ = 'NameAZ', + NAME_ZA = 'NameZA' +} + +export interface FileTreePage { + path: string + items: FileTreeNode[] + nextCursor: string | null + limit: number + order: FileTreeOrder + include: FileTreeInclude + approximateCount?: number +} diff --git a/src/datasets/domain/repositories/IDatasetsRepository.ts b/src/datasets/domain/repositories/IDatasetsRepository.ts index 02c0d2c3..3c0d0098 100644 --- a/src/datasets/domain/repositories/IDatasetsRepository.ts +++ b/src/datasets/domain/repositories/IDatasetsRepository.ts @@ -18,6 +18,19 @@ import { DatasetLicenseUpdateRequest } from '../dtos/DatasetLicenseUpdateRequest import { DatasetTypeDTO } from '../dtos/DatasetTypeDTO' import { StorageDriver } from '../models/StorageDriver' import { DatasetUploadLimits } from '../models/DatasetUploadLimits' +import { FileTreePage, FileTreeInclude, FileTreeOrder } from '../models/FileTreePage' + +export interface ListDatasetTreeNodeParams { + datasetId: number | string + datasetVersionId?: string + path?: string + limit?: number + cursor?: string + include?: FileTreeInclude + order?: FileTreeOrder + includeDeaccessioned?: boolean + originals?: boolean +} export interface IDatasetsRepository { getDataset( @@ -104,4 +117,5 @@ export interface IDatasetsRepository { ): Promise getDatasetStorageDriver(datasetId: number | string): Promise getDatasetUploadLimits(datasetId: number | string): Promise + listDatasetTreeNode(params: ListDatasetTreeNodeParams): Promise } diff --git a/src/datasets/domain/useCases/IterateDatasetTreeNode.ts b/src/datasets/domain/useCases/IterateDatasetTreeNode.ts new file mode 100644 index 00000000..2c5c986b --- /dev/null +++ b/src/datasets/domain/useCases/IterateDatasetTreeNode.ts @@ -0,0 +1,30 @@ +import { IDatasetsRepository, ListDatasetTreeNodeParams } from '../repositories/IDatasetsRepository' +import { FileTreeNode } from '../models/FileTreeNode' + +/** + * Async generator that exhaustively iterates the immediate children of the + * given path inside a dataset version, transparently following the + * `nextCursor` chain. + * + * Use this when you need every direct child of a folder; it does NOT recurse + * into subfolders — that is the caller's responsibility (e.g. pre-download + * enumeration walks the tree by re-invoking this iterator with each folder + * path it discovers). + */ +export class IterateDatasetTreeNode { + constructor(private readonly datasetsRepository: IDatasetsRepository) {} + + async *execute(params: ListDatasetTreeNodeParams): AsyncGenerator { + let cursor = params.cursor + do { + const page = await this.datasetsRepository.listDatasetTreeNode({ + ...params, + cursor + }) + for (const item of page.items) { + yield item + } + cursor = page.nextCursor ?? undefined + } while (cursor) + } +} diff --git a/src/datasets/domain/useCases/ListDatasetTreeNode.ts b/src/datasets/domain/useCases/ListDatasetTreeNode.ts new file mode 100644 index 00000000..787b881c --- /dev/null +++ b/src/datasets/domain/useCases/ListDatasetTreeNode.ts @@ -0,0 +1,20 @@ +import { UseCase } from '../../../core/domain/useCases/UseCase' +import { IDatasetsRepository, ListDatasetTreeNodeParams } from '../repositories/IDatasetsRepository' +import { FileTreePage } from '../models/FileTreePage' + +export class ListDatasetTreeNode implements UseCase { + constructor(private readonly datasetsRepository: IDatasetsRepository) {} + + /** + * Lists the immediate children of the given folder path inside a dataset + * version, returning a single page of folders and files. + * + * Folders are returned first, then files. Both are sorted by name. Use the + * returned `nextCursor` to keep paging the same folder. The cursor is + * opaque to callers and is server-validated; an invalid cursor yields a 400 + * from the API. + */ + async execute(params: ListDatasetTreeNodeParams): Promise { + return this.datasetsRepository.listDatasetTreeNode(params) + } +} diff --git a/src/datasets/index.ts b/src/datasets/index.ts index a129467f..dd75f628 100644 --- a/src/datasets/index.ts +++ b/src/datasets/index.ts @@ -35,6 +35,8 @@ import { UpdateTermsOfAccess } from './domain/useCases/UpdateTermsOfAccess' import { UpdateDatasetLicense } from './domain/useCases/UpdateDatasetLicense' import { GetDatasetStorageDriver } from './domain/useCases/GetDatasetStorageDriver' import { GetDatasetUploadLimits } from './domain/useCases/GetDatasetUploadLimits' +import { ListDatasetTreeNode } from './domain/useCases/ListDatasetTreeNode' +import { IterateDatasetTreeNode } from './domain/useCases/IterateDatasetTreeNode' const datasetsRepository = new DatasetsRepository() @@ -86,6 +88,8 @@ const updateTermsOfAccess = new UpdateTermsOfAccess(datasetsRepository) const updateDatasetLicense = new UpdateDatasetLicense(datasetsRepository) const getDatasetStorageDriver = new GetDatasetStorageDriver(datasetsRepository) const getDatasetUploadLimits = new GetDatasetUploadLimits(datasetsRepository) +const listDatasetTreeNode = new ListDatasetTreeNode(datasetsRepository) +const iterateDatasetTreeNode = new IterateDatasetTreeNode(datasetsRepository) export { getDataset, @@ -118,7 +122,9 @@ export { deleteDatasetType, updateDatasetLicense, getDatasetStorageDriver, - getDatasetUploadLimits + getDatasetUploadLimits, + listDatasetTreeNode, + iterateDatasetTreeNode } export { DatasetNotNumberedVersion } from './domain/models/DatasetNotNumberedVersion' export { DatasetUserPermissions } from './domain/models/DatasetUserPermissions' @@ -159,3 +165,13 @@ export { DatasetType } from './domain/models/DatasetType' export { DatasetTypeDTO } from './domain/dtos/DatasetTypeDTO' export { StorageDriver } from './domain/models/StorageDriver' export { DatasetUploadLimits } from './domain/models/DatasetUploadLimits' +export { + FileTreeNode, + FileTreeFolderNode, + FileTreeFileNode, + FileTreeNodeType, + isFileTreeFolderNode, + isFileTreeFileNode +} from './domain/models/FileTreeNode' +export { FileTreePage, FileTreeInclude, FileTreeOrder } from './domain/models/FileTreePage' +export { ListDatasetTreeNodeParams } from './domain/repositories/IDatasetsRepository' diff --git a/src/datasets/infra/repositories/DatasetsRepository.ts b/src/datasets/infra/repositories/DatasetsRepository.ts index 5bac498d..f83f0a22 100644 --- a/src/datasets/infra/repositories/DatasetsRepository.ts +++ b/src/datasets/infra/repositories/DatasetsRepository.ts @@ -1,5 +1,11 @@ import { ApiRepository } from '../../../core/infra/repositories/ApiRepository' -import { IDatasetsRepository } from '../../domain/repositories/IDatasetsRepository' +import { + IDatasetsRepository, + ListDatasetTreeNodeParams +} from '../../domain/repositories/IDatasetsRepository' +import { DatasetNotNumberedVersion } from '../../domain/models/DatasetNotNumberedVersion' +import { FileTreeInclude, FileTreeOrder, FileTreePage } from '../../domain/models/FileTreePage' +import { transformTreeResponseToFileTreePage } from './transformers/fileTreeTransformers' import { Dataset, VersionUpdateType } from '../../domain/models/Dataset' import { transformVersionResponseToDataset, @@ -523,4 +529,33 @@ export class DatasetsRepository extends ApiRepository implements IDatasetsReposi throw error }) } + + public async listDatasetTreeNode(params: ListDatasetTreeNodeParams): Promise { + const versionId = params.datasetVersionId ?? DatasetNotNumberedVersion.LATEST + const queryParams: Record = {} + if (params.path !== undefined) queryParams.path = params.path + if (params.limit !== undefined) queryParams.limit = params.limit + if (params.cursor !== undefined) queryParams.cursor = params.cursor + queryParams.include = params.include ?? FileTreeInclude.ALL + queryParams.order = params.order ?? FileTreeOrder.NAME_AZ + if (params.includeDeaccessioned !== undefined) { + queryParams.includeDeaccessioned = params.includeDeaccessioned + } + if (params.originals !== undefined) { + queryParams.originals = params.originals + } + return this.doGet( + this.buildApiEndpoint( + this.datasetsResourceName, + `versions/${versionId}/tree`, + params.datasetId + ), + true, + queryParams + ) + .then((response) => transformTreeResponseToFileTreePage(response)) + .catch((error) => { + throw error + }) + } } diff --git a/src/datasets/infra/repositories/transformers/fileTreeTransformers.ts b/src/datasets/infra/repositories/transformers/fileTreeTransformers.ts new file mode 100644 index 00000000..fa01c88a --- /dev/null +++ b/src/datasets/infra/repositories/transformers/fileTreeTransformers.ts @@ -0,0 +1,104 @@ +import { AxiosResponse } from 'axios' +import { FileTreeInclude, FileTreeOrder, FileTreePage } from '../../../domain/models/FileTreePage' +import { + FileTreeFileNode, + FileTreeFolderNode, + FileTreeNode, + FileTreeNodeType +} from '../../../domain/models/FileTreeNode' + +interface FolderItemPayload { + type: 'folder' + name: string + path: string + counts?: { files: number; folders: number } +} + +interface FileItemPayload { + type: 'file' + id: number + name: string + path: string + size: number + contentType?: string + access?: 'public' | 'restricted' | 'embargoed' + checksum?: { type: string; value: string } + downloadUrl: string +} + +type ItemPayload = FolderItemPayload | FileItemPayload + +interface TreeResponsePayload { + path: string + items: ItemPayload[] + nextCursor: string | null + limit: number + order: string + include: string + approximateCount?: number +} + +const ALLOWED_ORDERS: FileTreeOrder[] = [FileTreeOrder.NAME_AZ, FileTreeOrder.NAME_ZA] +const ALLOWED_INCLUDES: FileTreeInclude[] = [ + FileTreeInclude.ALL, + FileTreeInclude.FOLDERS, + FileTreeInclude.FILES +] + +export const transformTreeResponseToFileTreePage = (response: AxiosResponse): FileTreePage => { + const payload = unwrap(response.data) + return { + path: payload.path, + items: payload.items.map(transformItem), + nextCursor: payload.nextCursor, + limit: payload.limit, + order: parseOrder(payload.order), + include: parseInclude(payload.include), + approximateCount: payload.approximateCount + } +} + +const transformItem = (item: ItemPayload): FileTreeNode => { + if (item.type === 'folder') { + return transformFolder(item) + } + return transformFile(item) +} + +const transformFolder = (item: FolderItemPayload): FileTreeFolderNode => ({ + type: FileTreeNodeType.FOLDER, + name: item.name, + path: item.path, + counts: item.counts +}) + +const transformFile = (item: FileItemPayload): FileTreeFileNode => ({ + type: FileTreeNodeType.FILE, + id: item.id, + name: item.name, + path: item.path, + size: item.size, + contentType: item.contentType, + access: item.access, + checksum: item.checksum, + downloadUrl: item.downloadUrl +}) + +const parseOrder = (value: string): FileTreeOrder => { + return (ALLOWED_ORDERS as string[]).includes(value) + ? (value as FileTreeOrder) + : FileTreeOrder.NAME_AZ +} + +const parseInclude = (value: string): FileTreeInclude => { + return (ALLOWED_INCLUDES as string[]).includes(value) + ? (value as FileTreeInclude) + : FileTreeInclude.ALL +} + +const unwrap = (value: { data: T } | T): T => { + if (value && typeof value === 'object' && 'data' in (value as Record)) { + return (value as { data: T }).data + } + return value as T +} diff --git a/test/unit/datasets/IterateDatasetTreeNode.test.ts b/test/unit/datasets/IterateDatasetTreeNode.test.ts new file mode 100644 index 00000000..2aab04ba --- /dev/null +++ b/test/unit/datasets/IterateDatasetTreeNode.test.ts @@ -0,0 +1,76 @@ +import { IterateDatasetTreeNode } from '../../../src/datasets/domain/useCases/IterateDatasetTreeNode' +import { + IDatasetsRepository, + ListDatasetTreeNodeParams +} from '../../../src/datasets/domain/repositories/IDatasetsRepository' +import { + FileTreeInclude, + FileTreeOrder, + FileTreePage +} from '../../../src/datasets/domain/models/FileTreePage' +import { FileTreeNodeType } from '../../../src/datasets/domain/models/FileTreeNode' + +const page = (overrides: Partial): FileTreePage => ({ + path: '', + items: [], + nextCursor: null, + limit: 100, + order: FileTreeOrder.NAME_AZ, + include: FileTreeInclude.ALL, + ...overrides +}) + +describe('IterateDatasetTreeNode (unit)', () => { + test('iterates a single page', async () => { + const file = { + type: FileTreeNodeType.FILE, + id: 1, + name: 'a.txt', + path: 'a.txt', + size: 100, + downloadUrl: '/api/access/datafile/1' + } + const repo: IDatasetsRepository = {} as IDatasetsRepository + repo.listDatasetTreeNode = jest.fn().mockResolvedValue(page({ items: [file] })) + + const sut = new IterateDatasetTreeNode(repo) + const collected: typeof file[] = [] + for await (const node of sut.execute({ datasetId: 1 })) { + collected.push(node as typeof file) + } + expect(collected.map((n) => n.id)).toEqual([1]) + }) + + test('walks the cursor chain until exhausted', async () => { + const fileFor = (id: number) => ({ + type: FileTreeNodeType.FILE, + id, + name: `f${id}.txt`, + path: `f${id}.txt`, + size: 100, + downloadUrl: `/api/access/datafile/${id}` + }) + const pages: FileTreePage[] = [ + page({ items: [fileFor(1), fileFor(2)], nextCursor: 'c2' }), + page({ items: [fileFor(3)], nextCursor: 'c3' }), + page({ items: [fileFor(4)] }) + ] + const calls: ListDatasetTreeNodeParams[] = [] + const repo: IDatasetsRepository = {} as IDatasetsRepository + repo.listDatasetTreeNode = jest.fn().mockImplementation((params: ListDatasetTreeNodeParams) => { + calls.push(params) + const idx = calls.length - 1 + return Promise.resolve(pages[idx]) + }) + + const sut = new IterateDatasetTreeNode(repo) + const ids: number[] = [] + for await (const node of sut.execute({ datasetId: 1 })) { + if (node.type === FileTreeNodeType.FILE) { + ids.push(node.id) + } + } + expect(ids).toEqual([1, 2, 3, 4]) + expect(calls.map((c) => c.cursor)).toEqual([undefined, 'c2', 'c3']) + }) +}) diff --git a/test/unit/datasets/ListDatasetTreeNode.test.ts b/test/unit/datasets/ListDatasetTreeNode.test.ts new file mode 100644 index 00000000..2b37f4df --- /dev/null +++ b/test/unit/datasets/ListDatasetTreeNode.test.ts @@ -0,0 +1,54 @@ +import { ListDatasetTreeNode } from '../../../src/datasets/domain/useCases/ListDatasetTreeNode' +import { IDatasetsRepository } from '../../../src/datasets/domain/repositories/IDatasetsRepository' +import { + FileTreeInclude, + FileTreeOrder, + FileTreePage +} from '../../../src/datasets/domain/models/FileTreePage' +import { FileTreeNodeType } from '../../../src/datasets/domain/models/FileTreeNode' +import { ReadError } from '../../../src/core/domain/repositories/ReadError' + +describe('ListDatasetTreeNode (unit)', () => { + const testPage: FileTreePage = { + path: 'data', + items: [ + { + type: FileTreeNodeType.FOLDER, + name: 'sub', + path: 'data/sub', + counts: { files: 1, folders: 0 } + }, + { + type: FileTreeNodeType.FILE, + id: 7, + name: 'a.txt', + path: 'data/a.txt', + size: 1024, + downloadUrl: '/api/access/datafile/7' + } + ], + nextCursor: null, + limit: 100, + order: FileTreeOrder.NAME_AZ, + include: FileTreeInclude.ALL, + approximateCount: 2 + } + + test('returns the page produced by the repository', async () => { + const repo: IDatasetsRepository = {} as IDatasetsRepository + repo.listDatasetTreeNode = jest.fn().mockResolvedValue(testPage) + + const sut = new ListDatasetTreeNode(repo) + const result = await sut.execute({ datasetId: 1, path: 'data' }) + expect(result).toEqual(testPage) + expect(repo.listDatasetTreeNode).toHaveBeenCalledWith({ datasetId: 1, path: 'data' }) + }) + + test('propagates ReadError', async () => { + const repo: IDatasetsRepository = {} as IDatasetsRepository + repo.listDatasetTreeNode = jest.fn().mockRejectedValue(new ReadError('[400] bad cursor')) + + const sut = new ListDatasetTreeNode(repo) + await expect(sut.execute({ datasetId: 1 })).rejects.toThrow(ReadError) + }) +}) diff --git a/test/unit/datasets/fileTreeTransformers.test.ts b/test/unit/datasets/fileTreeTransformers.test.ts new file mode 100644 index 00000000..65d8b02c --- /dev/null +++ b/test/unit/datasets/fileTreeTransformers.test.ts @@ -0,0 +1,117 @@ +import { AxiosResponse } from 'axios' +import { transformTreeResponseToFileTreePage } from '../../../src/datasets/infra/repositories/transformers/fileTreeTransformers' +import { + FileTreeInclude, + FileTreeOrder +} from '../../../src/datasets/domain/models/FileTreePage' +import { + FileTreeNodeType, + isFileTreeFolderNode, + isFileTreeFileNode +} from '../../../src/datasets/domain/models/FileTreeNode' + +const buildResponse = (data: unknown): AxiosResponse => + ({ + data: { data }, + status: 200, + statusText: 'OK', + headers: {}, + config: {} as never + }) as AxiosResponse + +describe('transformTreeResponseToFileTreePage', () => { + test('maps folder and file payloads to typed FileTreeNodes', () => { + const response = buildResponse({ + path: 'data', + items: [ + { type: 'folder', name: 'raw', path: 'data/raw', counts: { files: 3, folders: 0 } }, + { + type: 'file', + id: 42, + name: 'a.csv', + path: 'data/a.csv', + size: 1024, + contentType: 'text/csv', + access: 'public', + checksum: { type: 'MD5', value: 'abc' }, + downloadUrl: '/api/access/datafile/42' + } + ], + nextCursor: 'eyJ', + limit: 100, + order: 'NameAZ', + include: 'all', + approximateCount: 2 + }) + + const page = transformTreeResponseToFileTreePage(response) + expect(page.path).toBe('data') + expect(page.items).toHaveLength(2) + expect(page.nextCursor).toBe('eyJ') + expect(page.limit).toBe(100) + expect(page.order).toBe(FileTreeOrder.NAME_AZ) + expect(page.include).toBe(FileTreeInclude.ALL) + expect(page.approximateCount).toBe(2) + + const folder = page.items[0] + if (!isFileTreeFolderNode(folder)) { + throw new Error('expected folder') + } + expect(folder.name).toBe('raw') + expect(folder.counts).toEqual({ files: 3, folders: 0 }) + + const file = page.items[1] + if (!isFileTreeFileNode(file)) { + throw new Error('expected file') + } + expect(file.id).toBe(42) + expect(file.size).toBe(1024) + expect(file.access).toBe('public') + expect(file.checksum).toEqual({ type: 'MD5', value: 'abc' }) + }) + + test('falls back to defaults when order/include are unrecognized', () => { + const response = buildResponse({ + path: '', + items: [], + nextCursor: null, + limit: 100, + order: 'WhateverElse', + include: 'something' + }) + const page = transformTreeResponseToFileTreePage(response) + expect(page.order).toBe(FileTreeOrder.NAME_AZ) + expect(page.include).toBe(FileTreeInclude.ALL) + }) + + test('handles undecorated payload (no .data envelope)', () => { + const response = { + data: { + path: 'docs', + items: [ + { + type: 'file', + id: 1, + name: 'README.md', + path: 'docs/README.md', + size: 200, + downloadUrl: '/api/access/datafile/1' + } + ], + nextCursor: null, + limit: 100, + order: 'NameZA', + include: 'files' + }, + status: 200, + statusText: 'OK', + headers: {}, + config: {} as never + } as AxiosResponse + + const page = transformTreeResponseToFileTreePage(response) + expect(page.order).toBe(FileTreeOrder.NAME_ZA) + expect(page.include).toBe(FileTreeInclude.FILES) + expect(page.items[0].type).toBe(FileTreeNodeType.FILE) + }) +}) From 0df68ec635912887dc1350490319ee3406f1cbd0 Mon Sep 17 00:00:00 2001 From: ErykKul Date: Tue, 5 May 2026 12:38:58 +0200 Subject: [PATCH 11/14] test: follow IQSS/dataverse#12182 storage-driver endpoint move PR #12182 merged on dataverse develop and moved the per-collection storage-driver endpoint: OLD: PUT /api/admin/dataverse/{alias}/storageDriver NEW: PUT /api/dataverses/{alias}/storageDriver The CI integration tests on PR #403 now run against a Dataverse container that includes the move, so setStorageDriverViaApi was hitting the old admin path and getting 404, which cascaded into every dataset/file test that depends on the directUploadTestCollection having LocalStack as its storage driver. Fix: update setStorageDriverViaApi to use the new public endpoint. The endpoint still requires X-Dataverse-Key for write operations (superuser only), so authentication is unchanged. --- test/testHelpers/collections/collectionHelper.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/testHelpers/collections/collectionHelper.ts b/test/testHelpers/collections/collectionHelper.ts index b19b668f..6d274deb 100644 --- a/test/testHelpers/collections/collectionHelper.ts +++ b/test/testHelpers/collections/collectionHelper.ts @@ -134,7 +134,7 @@ export async function setStorageDriverViaApi( ): Promise { try { return await axios.put( - `${TestConstants.TEST_API_URL}/admin/dataverse/${collectionAlias}/storageDriver`, + `${TestConstants.TEST_API_URL}/dataverses/${collectionAlias}/storageDriver`, driverLabel, { headers: { 'Content-Type': 'text/plain', 'X-Dataverse-Key': process.env.TEST_API_KEY } From 17822cbe9ed138a5a28b573f782a4a2fc8fb4aab Mon Sep 17 00:00:00 2001 From: ErykKul Date: Tue, 5 May 2026 15:55:54 +0200 Subject: [PATCH 12/14] Document tree-node use cases and DataverseApiAuthMechanism re-export - docs/useCases.md: add 'List a Folder of a Dataset Version (Tree View)' and 'Iterate a Folder of a Dataset Version (Tree View)' under Datasets read use cases, with example calls and notes on cursor / ETag / ordering. Adds matching TOC entries. - CHANGELOG.md (Unreleased): add a one-line note about re-exporting DataverseApiAuthMechanism from the public surface so the standalone reusable-component bundles can import it without a deep path. --- CHANGELOG.md | 1 + docs/useCases.md | 75 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a1470ee..c19c30cf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ This changelog follows the principles of [Keep a Changelog](https://keepachangel - Datasets: `listDatasetTreeNode` use case and repository method backing `GET /datasets/{id}/versions/{versionId}/tree` for paginated, lazy listing of folders/files inside a dataset version. Returns `FileTreePage` with folder-first ordering, opaque keyset cursors, and per-file `downloadUrl`. - Datasets: `iterateDatasetTreeNode` async generator that walks the cursor chain so callers can consume one folder's children without driving pagination by hand. +- Core: re-export `DataverseApiAuthMechanism` from the public surface so consumers of the standalone reusable bundles (e.g. `dv-tree-view`, `dv-uploader`) can import it without reaching into `core/...`. ### Changed diff --git a/docs/useCases.md b/docs/useCases.md index 4fb7a6f1..87949126 100644 --- a/docs/useCases.md +++ b/docs/useCases.md @@ -53,6 +53,8 @@ The different use cases currently available in the package are classified below, - [Get Dataset Available Dataset Types](#get-dataset-available-dataset-types) - [Get Dataset Available Dataset Type](#get-dataset-available-dataset-type) - [Get Dataset Upload Limits](#get-dataset-upload-limits) + - [List a Folder of a Dataset Version (Tree View)](#list-a-folder-of-a-dataset-version-tree-view) + - [Iterate a Folder of a Dataset Version (Tree View)](#iterate-a-folder-of-a-dataset-version-tree-view) - [Datasets write use cases](#datasets-write-use-cases) - [Create a Dataset](#create-a-dataset) - [Update a Dataset](#update-a-dataset) @@ -1619,6 +1621,79 @@ _See [use case](../src/datasets/domain/useCases/GetDatasetUploadLimits.ts) imple If the backend does not define any quota limits for the dataset, the returned object can be empty (`{}`). +#### List a Folder of a Dataset Version (Tree View) + +Returns a [FileTreePage](../src/datasets/domain/models/FileTreePage.ts) for the immediate children (folders and files) inside a folder of a dataset version, intended for lazy tree-view UIs that fetch each folder's children on demand. + +Folders come first, then files. Both are name-sorted (case-insensitive); files break ties on data file id for stability. The page carries an opaque `nextCursor` token; clients echo it back to fetch the next page and never construct one themselves. + +##### Example call: + +```typescript +import { listDatasetTreeNode, FileTreePage } from '@iqss/dataverse-client-javascript' + +/* ... */ + +const datasetId = 'doi:10.77777/FK2/AAAAAA' + +listDatasetTreeNode + .execute({ + datasetId, + datasetVersionId: '1.0', + path: 'data/raw', + limit: 100 + }) + .then((page: FileTreePage) => { + /* ... */ + }) + +/* ... */ +``` + +_See [use case](../src/datasets/domain/useCases/ListDatasetTreeNode.ts) implementation_. + +`datasetId` can be a numeric id or a persistent identifier string. `datasetVersionId` is optional and defaults to `DatasetNotNumberedVersion.LATEST`. + +Other optional parameters: `cursor` (opaque, from a previous response), `include` (`'all' | 'folders' | 'files'`, default `'all'`), `order` (`'NameAZ' | 'NameZA'`, default `'NameAZ'`), `includeDeaccessioned` (default `false`), and `originals` (when `true`, the per-file `downloadUrl` carries `?format=original`). + +For published, non-deaccessioned versions the underlying API emits `ETag` + `Cache-Control: public, immutable` headers. Drafts and deaccessioned versions don't. + +#### Iterate a Folder of a Dataset Version (Tree View) + +Returns an async generator over [FileTreeNode](../src/datasets/domain/models/FileTreeNode.ts) values for one folder, walking the cursor chain so callers can consume the children without driving pagination by hand. + +##### Example call: + +```typescript +import { + iterateDatasetTreeNode, + FileTreeNode, + isFileTreeFileNode +} from '@iqss/dataverse-client-javascript' + +/* ... */ + +const datasetId = 'doi:10.77777/FK2/AAAAAA' + +for await (const node of iterateDatasetTreeNode.execute({ + datasetId, + datasetVersionId: '1.0', + path: 'data/raw' +})) { + if (isFileTreeFileNode(node)) { + /* ... */ + } else { + /* node is a folder ... */ + } +} + +/* ... */ +``` + +_See [use case](../src/datasets/domain/useCases/IterateDatasetTreeNode.ts) implementation_. + +The generator stops after yielding everything in the requested folder; it does **not** descend into subfolders. Pass each subfolder's `path` back through `iterateDatasetTreeNode` if you want a recursive walk. + ## Files ### Files read use cases From bb19183df208de8db64217d73ad1f266d87575a4 Mon Sep 17 00:00:00 2001 From: ErykKul Date: Thu, 7 May 2026 18:21:56 +0200 Subject: [PATCH 13/14] Re-export DirectUploadClientConfig from public files index --- src/files/index.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/files/index.ts b/src/files/index.ts index a9d38386..e14bee75 100644 --- a/src/files/index.ts +++ b/src/files/index.ts @@ -100,3 +100,7 @@ export { FileMetadataChange, FileVersionSummarySubset } from './domain/models/FileVersionSummaryInfo' +// Re-export the direct-upload client config so consumers can construct +// their own `DirectUploadClient` with custom timeouts / retry counts +// without reaching into the SDK's `infra/` private path. +export { DirectUploadClientConfig } from './infra/clients/DirectUploadClient' From cccbc5a64586adfd7a58a42b5cabad7fc89a03d1 Mon Sep 17 00:00:00 2001 From: ErykKul Date: Thu, 7 May 2026 19:24:33 +0200 Subject: [PATCH 14/14] Restore lint/format coverage for tests --- package.json | 6 +++--- test/unit/datasets/IterateDatasetTreeNode.test.ts | 2 +- test/unit/datasets/fileTreeTransformers.test.ts | 7 ++----- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/package.json b/package.json index 99e80ef7..b252bcf8 100644 --- a/package.json +++ b/package.json @@ -16,10 +16,10 @@ "test:coverage": "jest --coverage -c jest.config.ts", "test:coverage:check": "jest --coverage --ci --config jest.config.ts", "lint": "npm run lint:eslint && npm run lint:prettier", - "lint:fix": "eslint --fix --ext .ts --ignore-path .gitignore ./src", - "lint:eslint": "eslint --ignore-path .gitignore ./src", + "lint:fix": "eslint --fix --ext .ts --ignore-path .gitignore ./src ./test/unit ./test/integration ./test/functional", + "lint:eslint": "eslint --ignore-path .gitignore ./src ./test/unit ./test/integration ./test/functional", "lint:prettier": "prettier --check '**/*.(yml|json|md)'", - "format": "prettier --write './src/**/*.{js,ts,md,json,yml,md}' --config ./.prettierrc", + "format": "prettier --write './src/**/*.{js,ts,md,json,yml,md}' './test/{unit,integration,functional}/**/*.{js,ts,json}' --config ./.prettierrc", "typecheck": "tsc --noEmit", "prepare": "husky" }, diff --git a/test/unit/datasets/IterateDatasetTreeNode.test.ts b/test/unit/datasets/IterateDatasetTreeNode.test.ts index 2aab04ba..c1d328ec 100644 --- a/test/unit/datasets/IterateDatasetTreeNode.test.ts +++ b/test/unit/datasets/IterateDatasetTreeNode.test.ts @@ -34,7 +34,7 @@ describe('IterateDatasetTreeNode (unit)', () => { repo.listDatasetTreeNode = jest.fn().mockResolvedValue(page({ items: [file] })) const sut = new IterateDatasetTreeNode(repo) - const collected: typeof file[] = [] + const collected: (typeof file)[] = [] for await (const node of sut.execute({ datasetId: 1 })) { collected.push(node as typeof file) } diff --git a/test/unit/datasets/fileTreeTransformers.test.ts b/test/unit/datasets/fileTreeTransformers.test.ts index 65d8b02c..658212e0 100644 --- a/test/unit/datasets/fileTreeTransformers.test.ts +++ b/test/unit/datasets/fileTreeTransformers.test.ts @@ -1,9 +1,6 @@ import { AxiosResponse } from 'axios' import { transformTreeResponseToFileTreePage } from '../../../src/datasets/infra/repositories/transformers/fileTreeTransformers' -import { - FileTreeInclude, - FileTreeOrder -} from '../../../src/datasets/domain/models/FileTreePage' +import { FileTreeInclude, FileTreeOrder } from '../../../src/datasets/domain/models/FileTreePage' import { FileTreeNodeType, isFileTreeFolderNode, @@ -17,7 +14,7 @@ const buildResponse = (data: unknown): AxiosResponse => statusText: 'OK', headers: {}, config: {} as never - }) as AxiosResponse + } as AxiosResponse) describe('transformTreeResponseToFileTreePage', () => { test('maps folder and file payloads to typed FileTreeNodes', () => {