diff --git a/CHANGELOG.md b/CHANGELOG.md index d042fa65..c19c30cf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,10 @@ This changelog follows the principles of [Keep a Changelog](https://keepachangel ### Added +- Datasets: `listDatasetTreeNode` use case and repository method backing `GET /datasets/{id}/versions/{versionId}/tree` for paginated, lazy listing of folders/files inside a dataset version. Returns `FileTreePage` with folder-first ordering, opaque keyset cursors, and per-file `downloadUrl`. +- Datasets: `iterateDatasetTreeNode` async generator that walks the cursor chain so callers can consume one folder's children without driving pagination by hand. +- Core: re-export `DataverseApiAuthMechanism` from the public surface so consumers of the standalone reusable bundles (e.g. `dv-tree-view`, `dv-uploader`) can import it without reaching into `core/...`. + ### Changed ### Fixed @@ -28,6 +32,8 @@ This changelog follows the principles of [Keep a Changelog](https://keepachangel - Templates: Added `setTemplateAsDefault` use case and repository method to support Dataverse endpoint `POST /dataverses/{id}/template/default/{templateId}`. - Templates: Added `unsetTemplateAsDefault` use case and repository method to support Dataverse endpoint `DELETE /dataverses/{id}/template/default`. - New Use Case: [Update Terms of Access](./docs/useCases.md#update-terms-of-access). +- Files: Direct uploads now forward the `tagging` value returned by the upload destination response as the `x-amz-tagging` header for single-part uploads. +- Files: Added a `DirectUploadClientConfig` object for configuring multipart upload retries and upload timeout. - Guestbooks: Added use cases and repository support for guestbook creation, listing, and enabling/disabling. - Guestbooks: Added dataset-level guestbook assignment and removal support via `assignDatasetGuestbook` (`PUT /api/datasets/{identifier}/guestbook`) and `removeDatasetGuestbook` (`DELETE /api/datasets/{identifier}/guestbook`). - Datasets/Guestbooks: Added `guestbookId` in `getDataset` responses. @@ -38,10 +44,11 @@ This changelog follows the principles of [Keep a Changelog](https://keepachangel ### Changed -- Add pagination query parameters to Dataset Version Summeries and File Version Summaries use cases. +- Add pagination query parameters to Dataset Version Summaries and File Version Summaries use cases. - Templates: Rename `CreateDatasetTemplateDTO` to `CreateTemplateDTO`. - Templates: Rename `createDatasetTemplate` repository method to `createTemplate`. - Templates: Rename `getDatasetTemplates` repository method to `getTemplatesByCollectionId`. +- Files: `DirectUploadClient` constructor now accepts a `DirectUploadClientConfig` object instead of a plain number for `maxMultipartRetries`. ### Fixed diff --git a/docs/useCases.md b/docs/useCases.md index 4fb7a6f1..87949126 100644 --- a/docs/useCases.md +++ b/docs/useCases.md @@ -53,6 +53,8 @@ The different use cases currently available in the package are classified below, - [Get Dataset Available Dataset Types](#get-dataset-available-dataset-types) - [Get Dataset Available Dataset Type](#get-dataset-available-dataset-type) - [Get Dataset Upload Limits](#get-dataset-upload-limits) + - [List a Folder of a Dataset Version (Tree View)](#list-a-folder-of-a-dataset-version-tree-view) + - [Iterate a Folder of a Dataset Version (Tree View)](#iterate-a-folder-of-a-dataset-version-tree-view) - [Datasets write use cases](#datasets-write-use-cases) - [Create a Dataset](#create-a-dataset) - [Update a Dataset](#update-a-dataset) @@ -1619,6 +1621,79 @@ _See [use case](../src/datasets/domain/useCases/GetDatasetUploadLimits.ts) imple If the backend does not define any quota limits for the dataset, the returned object can be empty (`{}`). +#### List a Folder of a Dataset Version (Tree View) + +Returns a [FileTreePage](../src/datasets/domain/models/FileTreePage.ts) for the immediate children (folders and files) inside a folder of a dataset version, intended for lazy tree-view UIs that fetch each folder's children on demand. + +Folders come first, then files. Both are name-sorted (case-insensitive); files break ties on data file id for stability. The page carries an opaque `nextCursor` token; clients echo it back to fetch the next page and never construct one themselves. + +##### Example call: + +```typescript +import { listDatasetTreeNode, FileTreePage } from '@iqss/dataverse-client-javascript' + +/* ... */ + +const datasetId = 'doi:10.77777/FK2/AAAAAA' + +listDatasetTreeNode + .execute({ + datasetId, + datasetVersionId: '1.0', + path: 'data/raw', + limit: 100 + }) + .then((page: FileTreePage) => { + /* ... */ + }) + +/* ... */ +``` + +_See [use case](../src/datasets/domain/useCases/ListDatasetTreeNode.ts) implementation_. + +`datasetId` can be a numeric id or a persistent identifier string. `datasetVersionId` is optional and defaults to `DatasetNotNumberedVersion.LATEST`. + +Other optional parameters: `cursor` (opaque, from a previous response), `include` (`'all' | 'folders' | 'files'`, default `'all'`), `order` (`'NameAZ' | 'NameZA'`, default `'NameAZ'`), `includeDeaccessioned` (default `false`), and `originals` (when `true`, the per-file `downloadUrl` carries `?format=original`). + +For published, non-deaccessioned versions the underlying API emits `ETag` + `Cache-Control: public, immutable` headers. Drafts and deaccessioned versions don't. + +#### Iterate a Folder of a Dataset Version (Tree View) + +Returns an async generator over [FileTreeNode](../src/datasets/domain/models/FileTreeNode.ts) values for one folder, walking the cursor chain so callers can consume the children without driving pagination by hand. + +##### Example call: + +```typescript +import { + iterateDatasetTreeNode, + FileTreeNode, + isFileTreeFileNode +} from '@iqss/dataverse-client-javascript' + +/* ... */ + +const datasetId = 'doi:10.77777/FK2/AAAAAA' + +for await (const node of iterateDatasetTreeNode.execute({ + datasetId, + datasetVersionId: '1.0', + path: 'data/raw' +})) { + if (isFileTreeFileNode(node)) { + /* ... */ + } else { + /* node is a folder ... */ + } +} + +/* ... */ +``` + +_See [use case](../src/datasets/domain/useCases/IterateDatasetTreeNode.ts) implementation_. + +The generator stops after yielding everything in the requested folder; it does **not** descend into subfolders. Pass each subfolder's `path` back through `iterateDatasetTreeNode` if you want a recursive walk. + ## Files ### Files read use cases diff --git a/package.json b/package.json index 06a0f896..b252bcf8 100644 --- a/package.json +++ b/package.json @@ -16,10 +16,10 @@ "test:coverage": "jest --coverage -c jest.config.ts", "test:coverage:check": "jest --coverage --ci --config jest.config.ts", "lint": "npm run lint:eslint && npm run lint:prettier", - "lint:fix": "eslint --fix --ext .ts ./src --ignore-path .gitignore .", - "lint:eslint": "eslint --ignore-path .gitignore .", + "lint:fix": "eslint --fix --ext .ts --ignore-path .gitignore ./src ./test/unit ./test/integration ./test/functional", + "lint:eslint": "eslint --ignore-path .gitignore ./src ./test/unit ./test/integration ./test/functional", "lint:prettier": "prettier --check '**/*.(yml|json|md)'", - "format": "prettier --write './**/*.{js,ts,md,json,yml,md}' --config ./.prettierrc", + "format": "prettier --write './src/**/*.{js,ts,md,json,yml,md}' './test/{unit,integration,functional}/**/*.{js,ts,json}' --config ./.prettierrc", "typecheck": "tsc --noEmit", "prepare": "husky" }, diff --git a/src/core/index.ts b/src/core/index.ts index e7cb65f8..55dc5bc7 100644 --- a/src/core/index.ts +++ b/src/core/index.ts @@ -1,5 +1,5 @@ export { ReadError } from './domain/repositories/ReadError' export { WriteError } from './domain/repositories/WriteError' -export { ApiConfig } from './infra/repositories/ApiConfig' +export { ApiConfig, DataverseApiAuthMechanism } from './infra/repositories/ApiConfig' export { DvObjectOwnerNode, DvObjectType } from './domain/models/DvObjectOwnerNode' export { PublicationStatus } from './domain/models/PublicationStatus' diff --git a/src/datasets/domain/models/FileTreeNode.ts b/src/datasets/domain/models/FileTreeNode.ts new file mode 100644 index 00000000..0f7a379a --- /dev/null +++ b/src/datasets/domain/models/FileTreeNode.ts @@ -0,0 +1,37 @@ +export enum FileTreeNodeType { + FOLDER = 'folder', + FILE = 'file' +} + +export interface FileTreeFolderNode { + type: FileTreeNodeType.FOLDER + name: string + path: string + counts?: { + files: number + folders: number + } +} + +export interface FileTreeFileNode { + type: FileTreeNodeType.FILE + id: number + name: string + path: string + size: number + contentType?: string + access?: 'public' | 'restricted' | 'embargoed' + checksum?: { + type: string + value: string + } + downloadUrl: string +} + +export type FileTreeNode = FileTreeFolderNode | FileTreeFileNode + +export const isFileTreeFolderNode = (node: FileTreeNode): node is FileTreeFolderNode => + node.type === FileTreeNodeType.FOLDER + +export const isFileTreeFileNode = (node: FileTreeNode): node is FileTreeFileNode => + node.type === FileTreeNodeType.FILE diff --git a/src/datasets/domain/models/FileTreePage.ts b/src/datasets/domain/models/FileTreePage.ts new file mode 100644 index 00000000..77c139e3 --- /dev/null +++ b/src/datasets/domain/models/FileTreePage.ts @@ -0,0 +1,22 @@ +import { FileTreeNode } from './FileTreeNode' + +export enum FileTreeInclude { + ALL = 'all', + FOLDERS = 'folders', + FILES = 'files' +} + +export enum FileTreeOrder { + NAME_AZ = 'NameAZ', + NAME_ZA = 'NameZA' +} + +export interface FileTreePage { + path: string + items: FileTreeNode[] + nextCursor: string | null + limit: number + order: FileTreeOrder + include: FileTreeInclude + approximateCount?: number +} diff --git a/src/datasets/domain/repositories/IDatasetsRepository.ts b/src/datasets/domain/repositories/IDatasetsRepository.ts index 02c0d2c3..3c0d0098 100644 --- a/src/datasets/domain/repositories/IDatasetsRepository.ts +++ b/src/datasets/domain/repositories/IDatasetsRepository.ts @@ -18,6 +18,19 @@ import { DatasetLicenseUpdateRequest } from '../dtos/DatasetLicenseUpdateRequest import { DatasetTypeDTO } from '../dtos/DatasetTypeDTO' import { StorageDriver } from '../models/StorageDriver' import { DatasetUploadLimits } from '../models/DatasetUploadLimits' +import { FileTreePage, FileTreeInclude, FileTreeOrder } from '../models/FileTreePage' + +export interface ListDatasetTreeNodeParams { + datasetId: number | string + datasetVersionId?: string + path?: string + limit?: number + cursor?: string + include?: FileTreeInclude + order?: FileTreeOrder + includeDeaccessioned?: boolean + originals?: boolean +} export interface IDatasetsRepository { getDataset( @@ -104,4 +117,5 @@ export interface IDatasetsRepository { ): Promise getDatasetStorageDriver(datasetId: number | string): Promise getDatasetUploadLimits(datasetId: number | string): Promise + listDatasetTreeNode(params: ListDatasetTreeNodeParams): Promise } diff --git a/src/datasets/domain/useCases/IterateDatasetTreeNode.ts b/src/datasets/domain/useCases/IterateDatasetTreeNode.ts new file mode 100644 index 00000000..2c5c986b --- /dev/null +++ b/src/datasets/domain/useCases/IterateDatasetTreeNode.ts @@ -0,0 +1,30 @@ +import { IDatasetsRepository, ListDatasetTreeNodeParams } from '../repositories/IDatasetsRepository' +import { FileTreeNode } from '../models/FileTreeNode' + +/** + * Async generator that exhaustively iterates the immediate children of the + * given path inside a dataset version, transparently following the + * `nextCursor` chain. + * + * Use this when you need every direct child of a folder; it does NOT recurse + * into subfolders — that is the caller's responsibility (e.g. pre-download + * enumeration walks the tree by re-invoking this iterator with each folder + * path it discovers). + */ +export class IterateDatasetTreeNode { + constructor(private readonly datasetsRepository: IDatasetsRepository) {} + + async *execute(params: ListDatasetTreeNodeParams): AsyncGenerator { + let cursor = params.cursor + do { + const page = await this.datasetsRepository.listDatasetTreeNode({ + ...params, + cursor + }) + for (const item of page.items) { + yield item + } + cursor = page.nextCursor ?? undefined + } while (cursor) + } +} diff --git a/src/datasets/domain/useCases/ListDatasetTreeNode.ts b/src/datasets/domain/useCases/ListDatasetTreeNode.ts new file mode 100644 index 00000000..787b881c --- /dev/null +++ b/src/datasets/domain/useCases/ListDatasetTreeNode.ts @@ -0,0 +1,20 @@ +import { UseCase } from '../../../core/domain/useCases/UseCase' +import { IDatasetsRepository, ListDatasetTreeNodeParams } from '../repositories/IDatasetsRepository' +import { FileTreePage } from '../models/FileTreePage' + +export class ListDatasetTreeNode implements UseCase { + constructor(private readonly datasetsRepository: IDatasetsRepository) {} + + /** + * Lists the immediate children of the given folder path inside a dataset + * version, returning a single page of folders and files. + * + * Folders are returned first, then files. Both are sorted by name. Use the + * returned `nextCursor` to keep paging the same folder. The cursor is + * opaque to callers and is server-validated; an invalid cursor yields a 400 + * from the API. + */ + async execute(params: ListDatasetTreeNodeParams): Promise { + return this.datasetsRepository.listDatasetTreeNode(params) + } +} diff --git a/src/datasets/index.ts b/src/datasets/index.ts index a129467f..dd75f628 100644 --- a/src/datasets/index.ts +++ b/src/datasets/index.ts @@ -35,6 +35,8 @@ import { UpdateTermsOfAccess } from './domain/useCases/UpdateTermsOfAccess' import { UpdateDatasetLicense } from './domain/useCases/UpdateDatasetLicense' import { GetDatasetStorageDriver } from './domain/useCases/GetDatasetStorageDriver' import { GetDatasetUploadLimits } from './domain/useCases/GetDatasetUploadLimits' +import { ListDatasetTreeNode } from './domain/useCases/ListDatasetTreeNode' +import { IterateDatasetTreeNode } from './domain/useCases/IterateDatasetTreeNode' const datasetsRepository = new DatasetsRepository() @@ -86,6 +88,8 @@ const updateTermsOfAccess = new UpdateTermsOfAccess(datasetsRepository) const updateDatasetLicense = new UpdateDatasetLicense(datasetsRepository) const getDatasetStorageDriver = new GetDatasetStorageDriver(datasetsRepository) const getDatasetUploadLimits = new GetDatasetUploadLimits(datasetsRepository) +const listDatasetTreeNode = new ListDatasetTreeNode(datasetsRepository) +const iterateDatasetTreeNode = new IterateDatasetTreeNode(datasetsRepository) export { getDataset, @@ -118,7 +122,9 @@ export { deleteDatasetType, updateDatasetLicense, getDatasetStorageDriver, - getDatasetUploadLimits + getDatasetUploadLimits, + listDatasetTreeNode, + iterateDatasetTreeNode } export { DatasetNotNumberedVersion } from './domain/models/DatasetNotNumberedVersion' export { DatasetUserPermissions } from './domain/models/DatasetUserPermissions' @@ -159,3 +165,13 @@ export { DatasetType } from './domain/models/DatasetType' export { DatasetTypeDTO } from './domain/dtos/DatasetTypeDTO' export { StorageDriver } from './domain/models/StorageDriver' export { DatasetUploadLimits } from './domain/models/DatasetUploadLimits' +export { + FileTreeNode, + FileTreeFolderNode, + FileTreeFileNode, + FileTreeNodeType, + isFileTreeFolderNode, + isFileTreeFileNode +} from './domain/models/FileTreeNode' +export { FileTreePage, FileTreeInclude, FileTreeOrder } from './domain/models/FileTreePage' +export { ListDatasetTreeNodeParams } from './domain/repositories/IDatasetsRepository' diff --git a/src/datasets/infra/repositories/DatasetsRepository.ts b/src/datasets/infra/repositories/DatasetsRepository.ts index 5bac498d..f83f0a22 100644 --- a/src/datasets/infra/repositories/DatasetsRepository.ts +++ b/src/datasets/infra/repositories/DatasetsRepository.ts @@ -1,5 +1,11 @@ import { ApiRepository } from '../../../core/infra/repositories/ApiRepository' -import { IDatasetsRepository } from '../../domain/repositories/IDatasetsRepository' +import { + IDatasetsRepository, + ListDatasetTreeNodeParams +} from '../../domain/repositories/IDatasetsRepository' +import { DatasetNotNumberedVersion } from '../../domain/models/DatasetNotNumberedVersion' +import { FileTreeInclude, FileTreeOrder, FileTreePage } from '../../domain/models/FileTreePage' +import { transformTreeResponseToFileTreePage } from './transformers/fileTreeTransformers' import { Dataset, VersionUpdateType } from '../../domain/models/Dataset' import { transformVersionResponseToDataset, @@ -523,4 +529,33 @@ export class DatasetsRepository extends ApiRepository implements IDatasetsReposi throw error }) } + + public async listDatasetTreeNode(params: ListDatasetTreeNodeParams): Promise { + const versionId = params.datasetVersionId ?? DatasetNotNumberedVersion.LATEST + const queryParams: Record = {} + if (params.path !== undefined) queryParams.path = params.path + if (params.limit !== undefined) queryParams.limit = params.limit + if (params.cursor !== undefined) queryParams.cursor = params.cursor + queryParams.include = params.include ?? FileTreeInclude.ALL + queryParams.order = params.order ?? FileTreeOrder.NAME_AZ + if (params.includeDeaccessioned !== undefined) { + queryParams.includeDeaccessioned = params.includeDeaccessioned + } + if (params.originals !== undefined) { + queryParams.originals = params.originals + } + return this.doGet( + this.buildApiEndpoint( + this.datasetsResourceName, + `versions/${versionId}/tree`, + params.datasetId + ), + true, + queryParams + ) + .then((response) => transformTreeResponseToFileTreePage(response)) + .catch((error) => { + throw error + }) + } } diff --git a/src/datasets/infra/repositories/transformers/fileTreeTransformers.ts b/src/datasets/infra/repositories/transformers/fileTreeTransformers.ts new file mode 100644 index 00000000..fa01c88a --- /dev/null +++ b/src/datasets/infra/repositories/transformers/fileTreeTransformers.ts @@ -0,0 +1,104 @@ +import { AxiosResponse } from 'axios' +import { FileTreeInclude, FileTreeOrder, FileTreePage } from '../../../domain/models/FileTreePage' +import { + FileTreeFileNode, + FileTreeFolderNode, + FileTreeNode, + FileTreeNodeType +} from '../../../domain/models/FileTreeNode' + +interface FolderItemPayload { + type: 'folder' + name: string + path: string + counts?: { files: number; folders: number } +} + +interface FileItemPayload { + type: 'file' + id: number + name: string + path: string + size: number + contentType?: string + access?: 'public' | 'restricted' | 'embargoed' + checksum?: { type: string; value: string } + downloadUrl: string +} + +type ItemPayload = FolderItemPayload | FileItemPayload + +interface TreeResponsePayload { + path: string + items: ItemPayload[] + nextCursor: string | null + limit: number + order: string + include: string + approximateCount?: number +} + +const ALLOWED_ORDERS: FileTreeOrder[] = [FileTreeOrder.NAME_AZ, FileTreeOrder.NAME_ZA] +const ALLOWED_INCLUDES: FileTreeInclude[] = [ + FileTreeInclude.ALL, + FileTreeInclude.FOLDERS, + FileTreeInclude.FILES +] + +export const transformTreeResponseToFileTreePage = (response: AxiosResponse): FileTreePage => { + const payload = unwrap(response.data) + return { + path: payload.path, + items: payload.items.map(transformItem), + nextCursor: payload.nextCursor, + limit: payload.limit, + order: parseOrder(payload.order), + include: parseInclude(payload.include), + approximateCount: payload.approximateCount + } +} + +const transformItem = (item: ItemPayload): FileTreeNode => { + if (item.type === 'folder') { + return transformFolder(item) + } + return transformFile(item) +} + +const transformFolder = (item: FolderItemPayload): FileTreeFolderNode => ({ + type: FileTreeNodeType.FOLDER, + name: item.name, + path: item.path, + counts: item.counts +}) + +const transformFile = (item: FileItemPayload): FileTreeFileNode => ({ + type: FileTreeNodeType.FILE, + id: item.id, + name: item.name, + path: item.path, + size: item.size, + contentType: item.contentType, + access: item.access, + checksum: item.checksum, + downloadUrl: item.downloadUrl +}) + +const parseOrder = (value: string): FileTreeOrder => { + return (ALLOWED_ORDERS as string[]).includes(value) + ? (value as FileTreeOrder) + : FileTreeOrder.NAME_AZ +} + +const parseInclude = (value: string): FileTreeInclude => { + return (ALLOWED_INCLUDES as string[]).includes(value) + ? (value as FileTreeInclude) + : FileTreeInclude.ALL +} + +const unwrap = (value: { data: T } | T): T => { + if (value && typeof value === 'object' && 'data' in (value as Record)) { + return (value as { data: T }).data + } + return value as T +} diff --git a/src/files/domain/models/FileUploadDestination.ts b/src/files/domain/models/FileUploadDestination.ts index 4bb42c2a..4d6edf13 100644 --- a/src/files/domain/models/FileUploadDestination.ts +++ b/src/files/domain/models/FileUploadDestination.ts @@ -4,4 +4,5 @@ export interface FileUploadDestination { partSize: number abortEndpoint?: string completeEndpoint?: string + tagging?: string } diff --git a/src/files/index.ts b/src/files/index.ts index a9d38386..e14bee75 100644 --- a/src/files/index.ts +++ b/src/files/index.ts @@ -100,3 +100,7 @@ export { FileMetadataChange, FileVersionSummarySubset } from './domain/models/FileVersionSummaryInfo' +// Re-export the direct-upload client config so consumers can construct +// their own `DirectUploadClient` with custom timeouts / retry counts +// without reaching into the SDK's `infra/` private path. +export { DirectUploadClientConfig } from './infra/clients/DirectUploadClient' diff --git a/src/files/infra/clients/DirectUploadClient.ts b/src/files/infra/clients/DirectUploadClient.ts index 8dfd1a9b..ab03bc6e 100644 --- a/src/files/infra/clients/DirectUploadClient.ts +++ b/src/files/infra/clients/DirectUploadClient.ts @@ -15,15 +15,22 @@ import { MultipartAbortError } from './errors/MultipartAbortError' import { FileUploadCancelError } from './errors/FileUploadCancelError' import { ApiConstants } from '../../../core/infra/repositories/ApiConstants' +export interface DirectUploadClientConfig { + /** Maximum number of retries for multipart upload parts. Default: 5 */ + maxMultipartRetries?: number + /** Timeout in milliseconds for file upload operations. Default: 60000 */ + fileUploadTimeoutMs?: number +} + export class DirectUploadClient implements IDirectUploadClient { private filesRepository: IFilesRepository private maxMultipartRetries: number + private readonly fileUploadTimeoutMs: number - private readonly fileUploadTimeoutMs: number = 60_000 - - constructor(filesRepository: IFilesRepository, maxMultipartRetries = 5) { + constructor(filesRepository: IFilesRepository, config: DirectUploadClientConfig = {}) { this.filesRepository = filesRepository - this.maxMultipartRetries = maxMultipartRetries + this.maxMultipartRetries = config.maxMultipartRetries ?? 5 + this.fileUploadTimeoutMs = config.fileUploadTimeoutMs ?? 60_000 } public async uploadFile( @@ -59,11 +66,14 @@ export class DirectUploadClient implements IDirectUploadClient { ): Promise { try { const arrayBuffer = await file.arrayBuffer() + const headers: Record = { + 'Content-Type': 'application/octet-stream' + } + if (destination.tagging !== undefined) { + headers['x-amz-tagging'] = destination.tagging + } await axios.put(destination.urls[0], arrayBuffer, { - headers: { - 'Content-Type': 'application/octet-stream', - 'x-amz-tagging': 'dv-state=temp' - }, + headers, timeout: this.fileUploadTimeoutMs, signal: abortController.signal, onUploadProgress: (progressEvent) => @@ -89,12 +99,17 @@ export class DirectUploadClient implements IDirectUploadClient { const eTags: Record = {} const maxRetries = this.maxMultipartRetries const limitConcurrency = pLimit(1) + let uploadFailed = false const uploadPart = async ( destinationUrl: string, index: number, retries = 0 ): Promise => { + if (uploadFailed) { + return + } + const offset = index * partMaxSize const partSize = Math.min(partMaxSize, file.size - offset) const fileSlice = file.slice(offset, offset + partSize) @@ -115,6 +130,8 @@ export class DirectUploadClient implements IDirectUploadClient { eTags[`${index + 1}`] = eTag } catch (error) { if (axios.isCancel(error)) { + uploadFailed = true + limitConcurrency.clearQueue() await this.abortMultipartUpload(file.name, datasetId, destination.abortEndpoint as string) throw new FileUploadCancelError(file.name, datasetId) } @@ -123,6 +140,8 @@ export class DirectUploadClient implements IDirectUploadClient { await new Promise((resolve) => setTimeout(resolve, backoffDelay)) await uploadPart(destinationUrl, index, retries + 1) } else { + uploadFailed = true + limitConcurrency.clearQueue() await this.abortMultipartUpload(file.name, datasetId, destination.abortEndpoint as string) const errorMessage = diff --git a/src/files/infra/repositories/transformers/fileUploadDestinationsTransformers.ts b/src/files/infra/repositories/transformers/fileUploadDestinationsTransformers.ts index 55a35757..f1a195dc 100644 --- a/src/files/infra/repositories/transformers/fileUploadDestinationsTransformers.ts +++ b/src/files/infra/repositories/transformers/fileUploadDestinationsTransformers.ts @@ -5,6 +5,7 @@ export interface FileSingleUploadDestinationPayload { url: string partSize: number storageIdentifier: string + tagging?: string } export interface FileMultipartUploadDestinationPayload { @@ -13,6 +14,7 @@ export interface FileMultipartUploadDestinationPayload { storageIdentifier: string complete?: string abort?: string + tagging?: string } export const transformUploadDestinationsResponseToUploadDestination = ( @@ -24,7 +26,8 @@ export const transformUploadDestinationsResponseToUploadDestination = ( return { urls: [fileUploadDestinationsPayload.url], partSize: fileUploadDestinationsPayload.partSize, - storageId: fileUploadDestinationsPayload.storageIdentifier + storageId: fileUploadDestinationsPayload.storageIdentifier, + tagging: fileUploadDestinationsPayload.tagging } } else { return transformMultipartUploadDestinationsPayloadToMultipartUploadDestinationModel( @@ -45,6 +48,7 @@ export const transformMultipartUploadDestinationsPayloadToMultipartUploadDestina partSize: fileUploadDestinationsPayload.partSize, storageId: fileUploadDestinationsPayload.storageIdentifier, abortEndpoint: fileUploadDestinationsPayload.abort?.substring(4), - completeEndpoint: fileUploadDestinationsPayload.complete?.substring(4) + completeEndpoint: fileUploadDestinationsPayload.complete?.substring(4), + tagging: fileUploadDestinationsPayload.tagging } } diff --git a/test/testHelpers/collections/collectionHelper.ts b/test/testHelpers/collections/collectionHelper.ts index b19b668f..6d274deb 100644 --- a/test/testHelpers/collections/collectionHelper.ts +++ b/test/testHelpers/collections/collectionHelper.ts @@ -134,7 +134,7 @@ export async function setStorageDriverViaApi( ): Promise { try { return await axios.put( - `${TestConstants.TEST_API_URL}/admin/dataverse/${collectionAlias}/storageDriver`, + `${TestConstants.TEST_API_URL}/dataverses/${collectionAlias}/storageDriver`, driverLabel, { headers: { 'Content-Type': 'text/plain', 'X-Dataverse-Key': process.env.TEST_API_KEY } diff --git a/test/unit/datasets/IterateDatasetTreeNode.test.ts b/test/unit/datasets/IterateDatasetTreeNode.test.ts new file mode 100644 index 00000000..c1d328ec --- /dev/null +++ b/test/unit/datasets/IterateDatasetTreeNode.test.ts @@ -0,0 +1,76 @@ +import { IterateDatasetTreeNode } from '../../../src/datasets/domain/useCases/IterateDatasetTreeNode' +import { + IDatasetsRepository, + ListDatasetTreeNodeParams +} from '../../../src/datasets/domain/repositories/IDatasetsRepository' +import { + FileTreeInclude, + FileTreeOrder, + FileTreePage +} from '../../../src/datasets/domain/models/FileTreePage' +import { FileTreeNodeType } from '../../../src/datasets/domain/models/FileTreeNode' + +const page = (overrides: Partial): FileTreePage => ({ + path: '', + items: [], + nextCursor: null, + limit: 100, + order: FileTreeOrder.NAME_AZ, + include: FileTreeInclude.ALL, + ...overrides +}) + +describe('IterateDatasetTreeNode (unit)', () => { + test('iterates a single page', async () => { + const file = { + type: FileTreeNodeType.FILE, + id: 1, + name: 'a.txt', + path: 'a.txt', + size: 100, + downloadUrl: '/api/access/datafile/1' + } + const repo: IDatasetsRepository = {} as IDatasetsRepository + repo.listDatasetTreeNode = jest.fn().mockResolvedValue(page({ items: [file] })) + + const sut = new IterateDatasetTreeNode(repo) + const collected: (typeof file)[] = [] + for await (const node of sut.execute({ datasetId: 1 })) { + collected.push(node as typeof file) + } + expect(collected.map((n) => n.id)).toEqual([1]) + }) + + test('walks the cursor chain until exhausted', async () => { + const fileFor = (id: number) => ({ + type: FileTreeNodeType.FILE, + id, + name: `f${id}.txt`, + path: `f${id}.txt`, + size: 100, + downloadUrl: `/api/access/datafile/${id}` + }) + const pages: FileTreePage[] = [ + page({ items: [fileFor(1), fileFor(2)], nextCursor: 'c2' }), + page({ items: [fileFor(3)], nextCursor: 'c3' }), + page({ items: [fileFor(4)] }) + ] + const calls: ListDatasetTreeNodeParams[] = [] + const repo: IDatasetsRepository = {} as IDatasetsRepository + repo.listDatasetTreeNode = jest.fn().mockImplementation((params: ListDatasetTreeNodeParams) => { + calls.push(params) + const idx = calls.length - 1 + return Promise.resolve(pages[idx]) + }) + + const sut = new IterateDatasetTreeNode(repo) + const ids: number[] = [] + for await (const node of sut.execute({ datasetId: 1 })) { + if (node.type === FileTreeNodeType.FILE) { + ids.push(node.id) + } + } + expect(ids).toEqual([1, 2, 3, 4]) + expect(calls.map((c) => c.cursor)).toEqual([undefined, 'c2', 'c3']) + }) +}) diff --git a/test/unit/datasets/ListDatasetTreeNode.test.ts b/test/unit/datasets/ListDatasetTreeNode.test.ts new file mode 100644 index 00000000..2b37f4df --- /dev/null +++ b/test/unit/datasets/ListDatasetTreeNode.test.ts @@ -0,0 +1,54 @@ +import { ListDatasetTreeNode } from '../../../src/datasets/domain/useCases/ListDatasetTreeNode' +import { IDatasetsRepository } from '../../../src/datasets/domain/repositories/IDatasetsRepository' +import { + FileTreeInclude, + FileTreeOrder, + FileTreePage +} from '../../../src/datasets/domain/models/FileTreePage' +import { FileTreeNodeType } from '../../../src/datasets/domain/models/FileTreeNode' +import { ReadError } from '../../../src/core/domain/repositories/ReadError' + +describe('ListDatasetTreeNode (unit)', () => { + const testPage: FileTreePage = { + path: 'data', + items: [ + { + type: FileTreeNodeType.FOLDER, + name: 'sub', + path: 'data/sub', + counts: { files: 1, folders: 0 } + }, + { + type: FileTreeNodeType.FILE, + id: 7, + name: 'a.txt', + path: 'data/a.txt', + size: 1024, + downloadUrl: '/api/access/datafile/7' + } + ], + nextCursor: null, + limit: 100, + order: FileTreeOrder.NAME_AZ, + include: FileTreeInclude.ALL, + approximateCount: 2 + } + + test('returns the page produced by the repository', async () => { + const repo: IDatasetsRepository = {} as IDatasetsRepository + repo.listDatasetTreeNode = jest.fn().mockResolvedValue(testPage) + + const sut = new ListDatasetTreeNode(repo) + const result = await sut.execute({ datasetId: 1, path: 'data' }) + expect(result).toEqual(testPage) + expect(repo.listDatasetTreeNode).toHaveBeenCalledWith({ datasetId: 1, path: 'data' }) + }) + + test('propagates ReadError', async () => { + const repo: IDatasetsRepository = {} as IDatasetsRepository + repo.listDatasetTreeNode = jest.fn().mockRejectedValue(new ReadError('[400] bad cursor')) + + const sut = new ListDatasetTreeNode(repo) + await expect(sut.execute({ datasetId: 1 })).rejects.toThrow(ReadError) + }) +}) diff --git a/test/unit/datasets/fileTreeTransformers.test.ts b/test/unit/datasets/fileTreeTransformers.test.ts new file mode 100644 index 00000000..658212e0 --- /dev/null +++ b/test/unit/datasets/fileTreeTransformers.test.ts @@ -0,0 +1,114 @@ +import { AxiosResponse } from 'axios' +import { transformTreeResponseToFileTreePage } from '../../../src/datasets/infra/repositories/transformers/fileTreeTransformers' +import { FileTreeInclude, FileTreeOrder } from '../../../src/datasets/domain/models/FileTreePage' +import { + FileTreeNodeType, + isFileTreeFolderNode, + isFileTreeFileNode +} from '../../../src/datasets/domain/models/FileTreeNode' + +const buildResponse = (data: unknown): AxiosResponse => + ({ + data: { data }, + status: 200, + statusText: 'OK', + headers: {}, + config: {} as never + } as AxiosResponse) + +describe('transformTreeResponseToFileTreePage', () => { + test('maps folder and file payloads to typed FileTreeNodes', () => { + const response = buildResponse({ + path: 'data', + items: [ + { type: 'folder', name: 'raw', path: 'data/raw', counts: { files: 3, folders: 0 } }, + { + type: 'file', + id: 42, + name: 'a.csv', + path: 'data/a.csv', + size: 1024, + contentType: 'text/csv', + access: 'public', + checksum: { type: 'MD5', value: 'abc' }, + downloadUrl: '/api/access/datafile/42' + } + ], + nextCursor: 'eyJ', + limit: 100, + order: 'NameAZ', + include: 'all', + approximateCount: 2 + }) + + const page = transformTreeResponseToFileTreePage(response) + expect(page.path).toBe('data') + expect(page.items).toHaveLength(2) + expect(page.nextCursor).toBe('eyJ') + expect(page.limit).toBe(100) + expect(page.order).toBe(FileTreeOrder.NAME_AZ) + expect(page.include).toBe(FileTreeInclude.ALL) + expect(page.approximateCount).toBe(2) + + const folder = page.items[0] + if (!isFileTreeFolderNode(folder)) { + throw new Error('expected folder') + } + expect(folder.name).toBe('raw') + expect(folder.counts).toEqual({ files: 3, folders: 0 }) + + const file = page.items[1] + if (!isFileTreeFileNode(file)) { + throw new Error('expected file') + } + expect(file.id).toBe(42) + expect(file.size).toBe(1024) + expect(file.access).toBe('public') + expect(file.checksum).toEqual({ type: 'MD5', value: 'abc' }) + }) + + test('falls back to defaults when order/include are unrecognized', () => { + const response = buildResponse({ + path: '', + items: [], + nextCursor: null, + limit: 100, + order: 'WhateverElse', + include: 'something' + }) + const page = transformTreeResponseToFileTreePage(response) + expect(page.order).toBe(FileTreeOrder.NAME_AZ) + expect(page.include).toBe(FileTreeInclude.ALL) + }) + + test('handles undecorated payload (no .data envelope)', () => { + const response = { + data: { + path: 'docs', + items: [ + { + type: 'file', + id: 1, + name: 'README.md', + path: 'docs/README.md', + size: 200, + downloadUrl: '/api/access/datafile/1' + } + ], + nextCursor: null, + limit: 100, + order: 'NameZA', + include: 'files' + }, + status: 200, + statusText: 'OK', + headers: {}, + config: {} as never + } as AxiosResponse + + const page = transformTreeResponseToFileTreePage(response) + expect(page.order).toBe(FileTreeOrder.NAME_ZA) + expect(page.include).toBe(FileTreeInclude.FILES) + expect(page.items[0].type).toBe(FileTreeNodeType.FILE) + }) +}) diff --git a/test/unit/files/DirectUploadClient.test.ts b/test/unit/files/DirectUploadClient.test.ts index 38f1921a..f4e54137 100644 --- a/test/unit/files/DirectUploadClient.test.ts +++ b/test/unit/files/DirectUploadClient.test.ts @@ -87,6 +87,82 @@ describe('uploadFile', () => { expect(actual).toEqual(testDestination.storageId) }) + + test('should include S3 tagging header when upload destination provides tagging', async () => { + const filesRepositoryStub: IFilesRepository = {} as IFilesRepository + const testDestination: FileUploadDestination = { + ...createSingleFileUploadDestinationModel(), + tagging: 'dv-state=temp' + } + filesRepositoryStub.getFileUploadDestination = jest.fn().mockResolvedValue(testDestination) + + const axiosPutSpy = jest.spyOn(axios, 'put').mockResolvedValue(undefined) + + const sut = new DirectUploadClient(filesRepositoryStub) + + const progressMock = jest.fn() + const abortController = new AbortController() + + await sut.uploadFile(1, testFile, progressMock, abortController) + + expect(axiosPutSpy).toHaveBeenCalledWith( + testDestination.urls[0], + expect.anything(), + expect.objectContaining({ + headers: expect.objectContaining({ + 'x-amz-tagging': 'dv-state=temp' + }) + }) + ) + }) + + test('should not include S3 tagging header when upload destination omits tagging', async () => { + const filesRepositoryStub: IFilesRepository = {} as IFilesRepository + const testDestination: FileUploadDestination = createSingleFileUploadDestinationModel() + filesRepositoryStub.getFileUploadDestination = jest.fn().mockResolvedValue(testDestination) + + const axiosPutSpy = jest.spyOn(axios, 'put').mockResolvedValue(undefined) + + const sut = new DirectUploadClient(filesRepositoryStub) + + const progressMock = jest.fn() + const abortController = new AbortController() + + await sut.uploadFile(1, testFile, progressMock, abortController) + + expect(axiosPutSpy).toHaveBeenCalledWith( + testDestination.urls[0], + expect.anything(), + expect.objectContaining({ + headers: expect.not.objectContaining({ + 'x-amz-tagging': expect.anything() + }) + }) + ) + }) + + test('should use configured file upload timeout', async () => { + const filesRepositoryStub: IFilesRepository = {} as IFilesRepository + const testDestination: FileUploadDestination = createSingleFileUploadDestinationModel() + filesRepositoryStub.getFileUploadDestination = jest.fn().mockResolvedValue(testDestination) + + const axiosPutSpy = jest.spyOn(axios, 'put').mockResolvedValue(undefined) + + const sut = new DirectUploadClient(filesRepositoryStub, { fileUploadTimeoutMs: 30_000 }) + + const progressMock = jest.fn() + const abortController = new AbortController() + + await sut.uploadFile(1, testFile, progressMock, abortController) + + expect(axiosPutSpy).toHaveBeenCalledWith( + testDestination.urls[0], + expect.anything(), + expect.objectContaining({ + timeout: 30_000 + }) + ) + }) }) describe('Multiple parts file', () => { @@ -113,7 +189,7 @@ describe('uploadFile', () => { jest.spyOn(axios, 'delete').mockResolvedValue(undefined) jest.spyOn(axios, 'put').mockRejectedValue('error') - const sut = new DirectUploadClient(filesRepositoryStub, 1) + const sut = new DirectUploadClient(filesRepositoryStub, { maxMultipartRetries: 1 }) const progressMock = jest.fn() const abortController = new AbortController() @@ -143,7 +219,7 @@ describe('uploadFile', () => { const progressMock = jest.fn() const abortController = new AbortController() - const sut = new DirectUploadClient(filesRepositoryStub, 1) + const sut = new DirectUploadClient(filesRepositoryStub, { maxMultipartRetries: 1 }) await expect(sut.uploadFile(1, testFile, progressMock, abortController)).rejects.toThrow( MultipartAbortError @@ -165,7 +241,7 @@ describe('uploadFile', () => { const progressMock = jest.fn() const abortController = new AbortController() - const sut = new DirectUploadClient(filesRepositoryStub, 1) + const sut = new DirectUploadClient(filesRepositoryStub, { maxMultipartRetries: 1 }) await expect(sut.uploadFile(1, testFile, progressMock, abortController)).rejects.toThrow( MultipartCompletionError ) @@ -187,7 +263,7 @@ describe('uploadFile', () => { .mockResolvedValueOnce(successfulPartResponse) .mockResolvedValueOnce(undefined) - const sut = new DirectUploadClient(filesRepositoryStub, 1) + const sut = new DirectUploadClient(filesRepositoryStub, { maxMultipartRetries: 1 }) const progressMock = jest.fn() const abortController = new AbortController() diff --git a/test/unit/files/FilesRepository.test.ts b/test/unit/files/FilesRepository.test.ts index 4a9df202..d10f50b3 100644 --- a/test/unit/files/FilesRepository.test.ts +++ b/test/unit/files/FilesRepository.test.ts @@ -148,6 +148,48 @@ describe('FilesRepository', () => { expect(actual).toEqual(testMultipleFileUploadDestination) }) + test('should return destination with tagging when single response includes tagging', async () => { + const tagging = 'dv-state=temp' + jest.spyOn(axios, 'get').mockResolvedValue({ + data: { + status: 'OK', + data: { + ...createSingleFileUploadDestinationPayload(), + tagging + } + } + }) + jest.spyOn(fs, 'statSync').mockReturnValue({ size: testFileSize } as fs.Stats) + + const actual = await sut.getFileUploadDestination(testDatasetId, singlepartFile) + + expect(actual).toEqual({ + ...testSingleFileUploadDestination, + tagging + }) + }) + + test('should return destination with tagging when multipart response includes tagging', async () => { + const tagging = 'dv-state=temp' + jest.spyOn(axios, 'get').mockResolvedValue({ + data: { + status: 'OK', + data: { + ...createMultipartFileUploadDestinationPayload(), + tagging + } + } + }) + jest.spyOn(fs, 'statSync').mockReturnValue({ size: testFileSize } as fs.Stats) + + const actual = await sut.getFileUploadDestination(testDatasetId, multipartFile) + + expect(actual).toEqual({ + ...testMultipleFileUploadDestination, + tagging + }) + }) + test('should return error on repository read error', async () => { jest.spyOn(axios, 'get').mockRejectedValue(TestConstants.TEST_ERROR_RESPONSE) jest.spyOn(fs, 'statSync').mockReturnValue({ size: testFileSize } as fs.Stats)