Skip to content

Commit 18a5181

Browse files
committed
Add collection Atom feed endpoint
1 parent 78ff82e commit 18a5181

5 files changed

Lines changed: 393 additions & 4 deletions

File tree

src/collection-api/routes/feed.js

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
import express from 'express';
2+
import { js2xml } from 'xml-js';
3+
4+
import { getCollection } from '../../archivist/collection/index.js';
5+
import { COMMIT_MESSAGE_PREFIXES } from '../../archivist/recorder/repositories/git/dataMapper.js';
6+
import { toISODateWithoutMilliseconds } from '../../archivist/utils/date.js';
7+
8+
import versionsRepository, { storageConfig } from './versionsRepository.js';
9+
10+
const TAG_AUTHORITY = 'opentermsarchive.org,2026';
11+
const FEED_AUTHOR_NAME = 'OTA-Bot';
12+
const DEFAULT_LIMIT = 100;
13+
14+
const RECORD_TYPES = {
15+
firstRecord: 'First record',
16+
technicalUpgrade: 'Technical upgrade',
17+
change: 'Change',
18+
};
19+
20+
const SCHEMES = {
21+
service: `tag:${TAG_AUTHORITY}:scheme:service`,
22+
termsType: `tag:${TAG_AUTHORITY}:scheme:terms-type`,
23+
recordType: `tag:${TAG_AUTHORITY}:scheme:record-type`,
24+
};
25+
26+
function buildAbsoluteBaseUrl(req) {
27+
return `${req.protocol}://${req.get('host')}${req.baseUrl}`;
28+
}
29+
30+
function classifyRecordType(version) {
31+
if (version.isFirstRecord) return RECORD_TYPES.firstRecord;
32+
if (version.isTechnicalUpgrade) return RECORD_TYPES.technicalUpgrade;
33+
34+
return RECORD_TYPES.change;
35+
}
36+
37+
function buildEntryTitle(version) {
38+
let prefix = COMMIT_MESSAGE_PREFIXES.update;
39+
40+
if (version.isFirstRecord) prefix = COMMIT_MESSAGE_PREFIXES.startTracking;
41+
else if (version.isTechnicalUpgrade) prefix = COMMIT_MESSAGE_PREFIXES.technicalUpgrade;
42+
43+
return `${prefix} ${version.serviceId} ${version.termsType}`;
44+
}
45+
46+
function buildVersionLink(baseUrl, version) {
47+
const encodedDate = encodeURIComponent(toISODateWithoutMilliseconds(version.fetchDate));
48+
const encodedService = encodeURIComponent(version.serviceId);
49+
const encodedTermsType = encodeURIComponent(version.termsType);
50+
51+
return `${baseUrl}/version/${encodedService}/${encodedTermsType}/${encodedDate}`;
52+
}
53+
54+
function buildEntryId(collection, version) {
55+
return `tag:${TAG_AUTHORITY}:version:${collection.metadata?.id}:${storageConfig.type}:${version.id}`;
56+
}
57+
58+
function buildEntry(collection, baseUrl, version) {
59+
return {
60+
id: { _text: buildEntryId(collection, version) },
61+
link: { _attributes: {
62+
rel: 'alternate',
63+
type: 'text/html',
64+
href: buildVersionLink(baseUrl, version),
65+
} },
66+
title: { _text: buildEntryTitle(version) },
67+
updated: { _text: version.fetchDate.toISOString() },
68+
category: [
69+
{ _attributes: { term: version.serviceId, scheme: SCHEMES.service } },
70+
{ _attributes: { term: version.termsType, scheme: SCHEMES.termsType } },
71+
{ _attributes: { term: classifyRecordType(version), scheme: SCHEMES.recordType } },
72+
],
73+
};
74+
}
75+
76+
function buildFeedDocument({ collection, selfHref, feedId, versions, baseUrl }) {
77+
const latestFetchDate = versions.length > 0 ? versions[0].fetchDate : new Date();
78+
79+
const feed = {
80+
_attributes: { xmlns: 'http://www.w3.org/2005/Atom' },
81+
title: { _text: collection.metadata?.name || '' },
82+
subtitle: { _text: collection.metadata?.tagline || '' },
83+
id: { _text: feedId },
84+
updated: { _text: latestFetchDate.toISOString() },
85+
link: { _attributes: { rel: 'self', href: selfHref } },
86+
author: { name: { _text: FEED_AUTHOR_NAME } },
87+
};
88+
89+
if (collection.metadata?.logo) {
90+
feed.logo = { _text: collection.metadata.logo };
91+
}
92+
93+
feed.entry = versions.map(version => buildEntry(collection, baseUrl, version));
94+
95+
return {
96+
_declaration: { _attributes: { version: '1.0', encoding: 'utf-8' } },
97+
feed,
98+
};
99+
}
100+
101+
function sendAtom(res, xml) {
102+
res.set('Content-Type', 'application/atom+xml; charset=utf-8');
103+
res.status(200).send(xml);
104+
}
105+
106+
function render(document) {
107+
return js2xml(document, { compact: true, spaces: 2 });
108+
}
109+
110+
/**
111+
* @returns {express.Router} The router instance
112+
* @swagger
113+
* tags:
114+
* name: Feeds
115+
* description: Atom feeds of version changes
116+
*/
117+
export default function feedRouter() {
118+
const router = express.Router();
119+
120+
/**
121+
* @swagger
122+
* /feed:
123+
* get:
124+
* summary: Atom feed of the latest version changes across the whole collection.
125+
* tags: [Feeds]
126+
* produces:
127+
* - application/atom+xml
128+
* responses:
129+
* 200:
130+
* description: An Atom 1.0 feed listing the latest version records, newest first.
131+
* content:
132+
* application/atom+xml:
133+
* schema:
134+
* type: string
135+
*/
136+
router.get('/feed', async (req, res) => {
137+
const collection = await getCollection();
138+
const baseUrl = buildAbsoluteBaseUrl(req);
139+
const selfHref = `${baseUrl}/feed`;
140+
const feedId = `tag:${TAG_AUTHORITY}:feed:${collection.metadata?.id}`;
141+
142+
const versions = await versionsRepository.findRecent(DEFAULT_LIMIT);
143+
const document = buildFeedDocument({ collection, selfHref, feedId, versions, baseUrl });
144+
145+
sendAtom(res, render(document));
146+
});
147+
148+
return router;
149+
}
Lines changed: 231 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,231 @@
1+
import { expect } from 'chai';
2+
import config from 'config';
3+
import supertest from 'supertest';
4+
5+
import { getCollection } from '../../archivist/collection/index.js';
6+
import RepositoryFactory from '../../archivist/recorder/repositories/factory.js';
7+
import Version from '../../archivist/recorder/version.js';
8+
import { toISODateWithoutMilliseconds } from '../../archivist/utils/date.js';
9+
import app from '../server.js';
10+
11+
const basePath = config.get('@opentermsarchive/engine.collection-api.basePath');
12+
const request = supertest(app);
13+
const storageConfig = config.get('@opentermsarchive/engine.recorder.versions.storage');
14+
15+
function extractTag(xml, tag) {
16+
const match = xml.match(new RegExp(`<${tag}>([\\s\\S]*?)</${tag}>`));
17+
18+
return match ? match[1] : null;
19+
}
20+
21+
describe('Feed API', () => {
22+
describe('GET /feed', () => {
23+
let response;
24+
let collection;
25+
26+
before(async () => {
27+
collection = await getCollection();
28+
response = await request.get(`${basePath}/v1/feed`);
29+
});
30+
31+
it('responds with 200 status code', () => {
32+
expect(response.status).to.equal(200);
33+
});
34+
35+
it('responds with Content-Type application/atom+xml', () => {
36+
expect(response.headers['content-type']).to.match(/^application\/atom\+xml/);
37+
});
38+
39+
it('is a valid Atom feed root', () => {
40+
expect(response.text).to.match(/^<\?xml version="1\.0"/);
41+
expect(response.text).to.include('<feed');
42+
expect(response.text).to.include('xmlns="http://www.w3.org/2005/Atom"');
43+
});
44+
45+
describe('feed-level metadata', () => {
46+
it('has a title matching the collection name', () => {
47+
expect(extractTag(response.text, 'title')).to.equal(collection.metadata.name);
48+
});
49+
50+
it('has a subtitle matching the collection tagline', () => {
51+
expect(extractTag(response.text, 'subtitle')).to.equal(collection.metadata.tagline);
52+
});
53+
54+
it('has a tag URI id based on the collection id', () => {
55+
expect(extractTag(response.text, 'id')).to.equal(`tag:opentermsarchive.org,2026:feed:${collection.metadata.id}`);
56+
});
57+
58+
it('has an updated element with a valid ISO 8601 datetime', () => {
59+
const updated = extractTag(response.text, 'updated');
60+
61+
expect(updated).to.be.a('string');
62+
expect(new Date(updated).toString()).to.not.equal('Invalid Date');
63+
});
64+
65+
it('has a self link pointing to the feed endpoint', () => {
66+
const selfHrefMatch = response.text.match(/<link[^>]*rel="self"[^>]*href="([^"]+)"/);
67+
68+
expect(selfHrefMatch).to.not.be.null;
69+
expect(selfHrefMatch[1]).to.match(new RegExp(`${basePath}/v1/feed$`));
70+
});
71+
72+
it('has an author named OTA-Bot', () => {
73+
expect(response.text).to.match(/<author>[\s\S]*<name>OTA-Bot<\/name>[\s\S]*<\/author>/);
74+
});
75+
76+
it('has a logo matching the collection logo', () => {
77+
expect(extractTag(response.text, 'logo')).to.equal(collection.metadata.logo);
78+
});
79+
});
80+
});
81+
82+
describe('GET /feed — entries', () => {
83+
const FETCH_DATE_FIRST = new Date('2023-01-01T12:00:00Z');
84+
const FETCH_DATE_CHANGE = new Date('2023-06-15T08:30:00Z');
85+
const FETCH_DATE_UPGRADE = new Date('2024-02-10T16:45:00Z');
86+
87+
let response;
88+
let repository;
89+
let savedVersions;
90+
91+
before(async function () {
92+
this.timeout(5000);
93+
repository = RepositoryFactory.create(storageConfig);
94+
await repository.initialize();
95+
96+
const firstRecord = await repository.save(new Version({
97+
serviceId: 'service-1',
98+
termsType: 'Terms of Service',
99+
content: 'first content',
100+
fetchDate: FETCH_DATE_FIRST,
101+
snapshotIds: ['snapshot_1'],
102+
}));
103+
104+
const changeRecord = await repository.save(new Version({
105+
serviceId: 'service-1',
106+
termsType: 'Terms of Service',
107+
content: 'changed content',
108+
fetchDate: FETCH_DATE_CHANGE,
109+
snapshotIds: ['snapshot_2'],
110+
}));
111+
112+
const upgradeRecord = await repository.save(new Version({
113+
serviceId: 'service-2',
114+
termsType: 'Privacy Policy',
115+
content: 'initial privacy',
116+
fetchDate: new Date('2024-01-01T00:00:00Z'),
117+
snapshotIds: ['snapshot_3'],
118+
}));
119+
120+
const technicalUpgradeRecord = await repository.save(new Version({
121+
serviceId: 'service-2',
122+
termsType: 'Privacy Policy',
123+
content: 'upgraded privacy',
124+
fetchDate: FETCH_DATE_UPGRADE,
125+
snapshotIds: ['snapshot_4'],
126+
isTechnicalUpgrade: true,
127+
}));
128+
129+
savedVersions = { firstRecord, changeRecord, upgradeRecord, technicalUpgradeRecord };
130+
response = await request.get(`${basePath}/v1/feed`);
131+
});
132+
133+
after(() => repository.removeAll());
134+
135+
it('orders entries newest-first', () => {
136+
const updates = [...response.text.matchAll(/<entry>[\s\S]*?<updated>([^<]+)<\/updated>[\s\S]*?<\/entry>/g)].map(match => match[1]);
137+
138+
expect(updates).to.deep.equal([...updates].sort().reverse());
139+
});
140+
141+
describe('entry metadata', () => {
142+
let firstEntry;
143+
144+
before(() => {
145+
firstEntry = response.text.match(/<entry>[\s\S]*?<\/entry>/)[0];
146+
});
147+
148+
it('has an id tag URI including storage type and record id', () => {
149+
const collectionId = 'test';
150+
const expected = `tag:opentermsarchive.org,2026:version:${collectionId}:${storageConfig.type}:${savedVersions.technicalUpgradeRecord.id}`;
151+
152+
expect(firstEntry).to.include(`<id>${expected}</id>`);
153+
});
154+
155+
it('has an alternate link to the version API endpoint', () => {
156+
const href = firstEntry.match(/<link[^>]*rel="alternate"[^>]*href="([^"]+)"/)[1];
157+
const expectedPathFragment = `/version/${encodeURIComponent('service-2')}/${encodeURIComponent('Privacy Policy')}/${encodeURIComponent(toISODateWithoutMilliseconds(FETCH_DATE_UPGRADE))}`;
158+
159+
expect(href).to.include(expectedPathFragment);
160+
});
161+
162+
it('has a type="text/html" on the alternate link', () => {
163+
expect(firstEntry).to.match(/<link[^>]*rel="alternate"[^>]*type="text\/html"/);
164+
});
165+
166+
it('has a title reconstructed from commit prefix + serviceId + termsType', () => {
167+
const title = firstEntry.match(/<title[^>]*>([\s\S]*?)<\/title>/)[1];
168+
169+
expect(title).to.include('Apply technical or declaration upgrade on');
170+
expect(title).to.include('service-2');
171+
expect(title).to.include('Privacy Policy');
172+
});
173+
174+
it('has an updated element matching the fetch date', () => {
175+
const updated = firstEntry.match(/<updated>([^<]+)<\/updated>/)[1];
176+
177+
expect(new Date(updated).toISOString()).to.equal(FETCH_DATE_UPGRADE.toISOString());
178+
});
179+
180+
it('has three categories with the expected schemes', () => {
181+
const categories = [...firstEntry.matchAll(/<category([^/]*)\/>/g)].map(match => match[1]);
182+
183+
expect(categories).to.have.length(3);
184+
185+
const schemes = categories.map(attrs => attrs.match(/scheme="([^"]+)"/)[1]);
186+
187+
expect(schemes).to.include('tag:opentermsarchive.org,2026:scheme:service');
188+
expect(schemes).to.include('tag:opentermsarchive.org,2026:scheme:terms-type');
189+
expect(schemes).to.include('tag:opentermsarchive.org,2026:scheme:record-type');
190+
});
191+
192+
it('has category terms for service, terms type and record type', () => {
193+
const categories = [...firstEntry.matchAll(/<category([^/]*)\/>/g)].map(match => match[1]);
194+
const terms = categories.map(attrs => attrs.match(/term="([^"]+)"/)[1]);
195+
196+
expect(terms).to.include('service-2');
197+
expect(terms).to.include('Privacy Policy');
198+
expect(terms).to.include('Technical upgrade');
199+
});
200+
});
201+
202+
describe('record-type classification', () => {
203+
function findEntryById(xml, recordId) {
204+
const match = [...xml.matchAll(/<entry>[\s\S]*?<\/entry>/g)].find(entry => entry[0].includes(`:${recordId}</id>`));
205+
206+
return match && match[0];
207+
}
208+
209+
it('classifies a first record as "First record"', () => {
210+
const entry = findEntryById(response.text, savedVersions.upgradeRecord.id);
211+
212+
expect(entry).to.not.be.undefined;
213+
expect(entry).to.match(/term="First record"/);
214+
});
215+
216+
it('classifies a content change as "Change"', () => {
217+
const entry = findEntryById(response.text, savedVersions.changeRecord.id);
218+
219+
expect(entry).to.not.be.undefined;
220+
expect(entry).to.match(/term="Change"/);
221+
});
222+
223+
it('classifies a technical upgrade as "Technical upgrade"', () => {
224+
const entry = findEntryById(response.text, savedVersions.technicalUpgradeRecord.id);
225+
226+
expect(entry).to.not.be.undefined;
227+
expect(entry).to.match(/term="Technical upgrade"/);
228+
});
229+
});
230+
});
231+
});

src/collection-api/routes/index.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import { getCollection } from '../../archivist/collection/index.js';
55
import * as Services from '../../archivist/services/index.js';
66

77
import docsRouter from './docs.js';
8+
import feedRouter from './feed.js';
89
import metadataRouter from './metadata.js';
910
import servicesRouter from './services.js';
1011
import versionsRouter from './versions.js';
@@ -37,6 +38,7 @@ export default async function apiRouter(basePath) {
3738
router.use(await metadataRouter(collection, services));
3839
router.use(servicesRouter(services));
3940
router.use(versionsRouter);
41+
router.use(feedRouter());
4042

4143
return router;
4244
}

0 commit comments

Comments
 (0)