OpenTermsArchive
diff --git a/‎src/collection-api/routes/feed.js‎
Lines changed: 149 additions & 0 deletions b/‎src/collection-api/routes/feed.js‎
Lines changed: 149 additions & 0 deletions
diff --git a/‎src/collection-api/routes/feed.test.js‎
Lines changed: 231 additions & 0 deletions b/‎src/collection-api/routes/feed.test.js‎
Lines changed: 231 additions & 0 deletions
diff --git a/‎src/collection-api/routes/index.js‎
Lines changed: 2 additions & 0 deletions b/‎src/collection-api/routes/index.js‎
Lines changed: 2 additions & 0 deletions
@@ -0,0 +1,149 @@
+import express from 'express';
+import { js2xml } from 'xml-js';
+
+import { getCollection } from '../../archivist/collection/index.js';
+import { COMMIT_MESSAGE_PREFIXES } from '../../archivist/recorder/repositories/git/dataMapper.js';
+import { toISODateWithoutMilliseconds } from '../../archivist/utils/date.js';
+
+import versionsRepository, { storageConfig } from './versionsRepository.js';
+
+const TAG_AUTHORITY = 'opentermsarchive.org,2026';
+const FEED_AUTHOR_NAME = 'OTA-Bot';
+const DEFAULT_LIMIT = 100;
+
+const RECORD_TYPES = {
+  firstRecord: 'First record',
+  technicalUpgrade: 'Technical upgrade',
+  change: 'Change',
+};
+
+const SCHEMES = {
+  service: `tag:${TAG_AUTHORITY}:scheme:service`,
+  termsType: `tag:${TAG_AUTHORITY}:scheme:terms-type`,
+  recordType: `tag:${TAG_AUTHORITY}:scheme:record-type`,
+};
+
+function buildAbsoluteBaseUrl(req) {
+  return `${req.protocol}://${req.get('host')}${req.baseUrl}`;
+}
+
+function classifyRecordType(version) {
+  if (version.isFirstRecord) return RECORD_TYPES.firstRecord;
+  if (version.isTechnicalUpgrade) return RECORD_TYPES.technicalUpgrade;
+
+  return RECORD_TYPES.change;
+}
+
+function buildEntryTitle(version) {
+  let prefix = COMMIT_MESSAGE_PREFIXES.update;
+
+  if (version.isFirstRecord) prefix = COMMIT_MESSAGE_PREFIXES.startTracking;
+  else if (version.isTechnicalUpgrade) prefix = COMMIT_MESSAGE_PREFIXES.technicalUpgrade;
+
+  return `${prefix} ${version.serviceId} ${version.termsType}`;
+}
+
+function buildVersionLink(baseUrl, version) {
+  const encodedDate = encodeURIComponent(toISODateWithoutMilliseconds(version.fetchDate));
+  const encodedService = encodeURIComponent(version.serviceId);
+  const encodedTermsType = encodeURIComponent(version.termsType);
+
+  return `${baseUrl}/version/${encodedService}/${encodedTermsType}/${encodedDate}`;
+}
+
+function buildEntryId(collection, version) {
+  return `tag:${TAG_AUTHORITY}:version:${collection.metadata?.id}:${storageConfig.type}:${version.id}`;
+}
+
+function buildEntry(collection, baseUrl, version) {
+  return {
+    id: { _text: buildEntryId(collection, version) },
+    link: { _attributes: {
+      rel: 'alternate',
+      type: 'text/html',
+      href: buildVersionLink(baseUrl, version),
+    } },
+    title: { _text: buildEntryTitle(version) },
+    updated: { _text: version.fetchDate.toISOString() },
+    category: [
+      { _attributes: { term: version.serviceId, scheme: SCHEMES.service } },
+      { _attributes: { term: version.termsType, scheme: SCHEMES.termsType } },
+      { _attributes: { term: classifyRecordType(version), scheme: SCHEMES.recordType } },
+    ],
+  };
+}
+
+function buildFeedDocument({ collection, selfHref, feedId, versions, baseUrl }) {
+  const latestFetchDate = versions.length > 0 ? versions[0].fetchDate : new Date();
+
+  const feed = {
+    _attributes: { xmlns: 'http://www.w3.org/2005/Atom' },
+    title: { _text: collection.metadata?.name || '' },
+    subtitle: { _text: collection.metadata?.tagline || '' },
+    id: { _text: feedId },
+    updated: { _text: latestFetchDate.toISOString() },
+    link: { _attributes: { rel: 'self', href: selfHref } },
+    author: { name: { _text: FEED_AUTHOR_NAME } },
+  };
+
+  if (collection.metadata?.logo) {
+    feed.logo = { _text: collection.metadata.logo };
+  }
+
+  feed.entry = versions.map(version => buildEntry(collection, baseUrl, version));
+
+  return {
+    _declaration: { _attributes: { version: '1.0', encoding: 'utf-8' } },
+    feed,
+  };
+}
+
+function sendAtom(res, xml) {
+  res.set('Content-Type', 'application/atom+xml; charset=utf-8');
+  res.status(200).send(xml);
+}
+
+function render(document) {
+  return js2xml(document, { compact: true, spaces: 2 });
+}
+
+/**
+ * @returns {express.Router} The router instance
+ * @swagger
+ * tags:
+ *   name: Feeds
+ *   description: Atom feeds of version changes
+ */
+export default function feedRouter() {
+  const router = express.Router();
+
+  /**
+   * @swagger
+   * /feed:
+   *   get:
+   *     summary: Atom feed of the latest version changes across the whole collection.
+   *     tags: [Feeds]
+   *     produces:
+   *       - application/atom+xml
+   *     responses:
+   *       200:
+   *         description: An Atom 1.0 feed listing the latest version records, newest first.
+   *         content:
+   *           application/atom+xml:
+   *             schema:
+   *               type: string
+   */
+  router.get('/feed', async (req, res) => {
+    const collection = await getCollection();
+    const baseUrl = buildAbsoluteBaseUrl(req);
+    const selfHref = `${baseUrl}/feed`;
+    const feedId = `tag:${TAG_AUTHORITY}:feed:${collection.metadata?.id}`;
+
+    const versions = await versionsRepository.findRecent(DEFAULT_LIMIT);
+    const document = buildFeedDocument({ collection, selfHref, feedId, versions, baseUrl });
+
+    sendAtom(res, render(document));
+  });
+
+  return router;
+}
@@ -0,0 +1,231 @@
+import { expect } from 'chai';
+import config from 'config';
+import supertest from 'supertest';
+
+import { getCollection } from '../../archivist/collection/index.js';
+import RepositoryFactory from '../../archivist/recorder/repositories/factory.js';
+import Version from '../../archivist/recorder/version.js';
+import { toISODateWithoutMilliseconds } from '../../archivist/utils/date.js';
+import app from '../server.js';
+
+const basePath = config.get('@opentermsarchive/engine.collection-api.basePath');
+const request = supertest(app);
+const storageConfig = config.get('@opentermsarchive/engine.recorder.versions.storage');
+
+function extractTag(xml, tag) {
+  const match = xml.match(new RegExp(`<${tag}>([\\s\\S]*?)</${tag}>`));
+
+  return match ? match[1] : null;
+}
+
+describe('Feed API', () => {
+  describe('GET /feed', () => {
+    let response;
+    let collection;
+
+    before(async () => {
+      collection = await getCollection();
+      response = await request.get(`${basePath}/v1/feed`);
+    });
+
+    it('responds with 200 status code', () => {
+      expect(response.status).to.equal(200);
+    });
+
+    it('responds with Content-Type application/atom+xml', () => {
+      expect(response.headers['content-type']).to.match(/^application\/atom\+xml/);
+    });
+
+    it('is a valid Atom feed root', () => {
+      expect(response.text).to.match(/^<\?xml version="1\.0"/);
+      expect(response.text).to.include('<feed');
+      expect(response.text).to.include('xmlns="http://www.w3.org/2005/Atom"');
+    });
+
+    describe('feed-level metadata', () => {
+      it('has a title matching the collection name', () => {
+        expect(extractTag(response.text, 'title')).to.equal(collection.metadata.name);
+      });
+
+      it('has a subtitle matching the collection tagline', () => {
+        expect(extractTag(response.text, 'subtitle')).to.equal(collection.metadata.tagline);
+      });
+
+      it('has a tag URI id based on the collection id', () => {
+        expect(extractTag(response.text, 'id')).to.equal(`tag:opentermsarchive.org,2026:feed:${collection.metadata.id}`);
+      });
+
+      it('has an updated element with a valid ISO 8601 datetime', () => {
+        const updated = extractTag(response.text, 'updated');
+
+        expect(updated).to.be.a('string');
+        expect(new Date(updated).toString()).to.not.equal('Invalid Date');
+      });
+
+      it('has a self link pointing to the feed endpoint', () => {
+        const selfHrefMatch = response.text.match(/<link[^>]*rel="self"[^>]*href="([^"]+)"/);
+
+        expect(selfHrefMatch).to.not.be.null;
+        expect(selfHrefMatch[1]).to.match(new RegExp(`${basePath}/v1/feed$`));
+      });
+
+      it('has an author named OTA-Bot', () => {
+        expect(response.text).to.match(/<author>[\s\S]*<name>OTA-Bot<\/name>[\s\S]*<\/author>/);
+      });
+
+      it('has a logo matching the collection logo', () => {
+        expect(extractTag(response.text, 'logo')).to.equal(collection.metadata.logo);
+      });
+    });
+  });
+
+  describe('GET /feed — entries', () => {
+    const FETCH_DATE_FIRST = new Date('2023-01-01T12:00:00Z');
+    const FETCH_DATE_CHANGE = new Date('2023-06-15T08:30:00Z');
+    const FETCH_DATE_UPGRADE = new Date('2024-02-10T16:45:00Z');
+
+    let response;
+    let repository;
+    let savedVersions;
+
+    before(async function () {
+      this.timeout(5000);
+      repository = RepositoryFactory.create(storageConfig);
+      await repository.initialize();
+
+      const firstRecord = await repository.save(new Version({
+        serviceId: 'service-1',
+        termsType: 'Terms of Service',
+        content: 'first content',
+        fetchDate: FETCH_DATE_FIRST,
+        snapshotIds: ['snapshot_1'],
+      }));
+
+      const changeRecord = await repository.save(new Version({
+        serviceId: 'service-1',
+        termsType: 'Terms of Service',
+        content: 'changed content',
+        fetchDate: FETCH_DATE_CHANGE,
+        snapshotIds: ['snapshot_2'],
+      }));
+
+      const upgradeRecord = await repository.save(new Version({
+        serviceId: 'service-2',
+        termsType: 'Privacy Policy',
+        content: 'initial privacy',
+        fetchDate: new Date('2024-01-01T00:00:00Z'),
+        snapshotIds: ['snapshot_3'],
+      }));
+
+      const technicalUpgradeRecord = await repository.save(new Version({
+        serviceId: 'service-2',
+        termsType: 'Privacy Policy',
+        content: 'upgraded privacy',
+        fetchDate: FETCH_DATE_UPGRADE,
+        snapshotIds: ['snapshot_4'],
+        isTechnicalUpgrade: true,
+      }));
+
+      savedVersions = { firstRecord, changeRecord, upgradeRecord, technicalUpgradeRecord };
+      response = await request.get(`${basePath}/v1/feed`);
+    });
+
+    after(() => repository.removeAll());
+
+    it('orders entries newest-first', () => {
+      const updates = [...response.text.matchAll(/<entry>[\s\S]*?<updated>([^<]+)<\/updated>[\s\S]*?<\/entry>/g)].map(match => match[1]);
+
+      expect(updates).to.deep.equal([...updates].sort().reverse());
+    });
+
+    describe('entry metadata', () => {
+      let firstEntry;
+
+      before(() => {
+        firstEntry = response.text.match(/<entry>[\s\S]*?<\/entry>/)[0];
+      });
+
+      it('has an id tag URI including storage type and record id', () => {
+        const collectionId = 'test';
+        const expected = `tag:opentermsarchive.org,2026:version:${collectionId}:${storageConfig.type}:${savedVersions.technicalUpgradeRecord.id}`;
+
+        expect(firstEntry).to.include(`<id>${expected}</id>`);
+      });
+
+      it('has an alternate link to the version API endpoint', () => {
+        const href = firstEntry.match(/<link[^>]*rel="alternate"[^>]*href="([^"]+)"/)[1];
+        const expectedPathFragment = `/version/${encodeURIComponent('service-2')}/${encodeURIComponent('Privacy Policy')}/${encodeURIComponent(toISODateWithoutMilliseconds(FETCH_DATE_UPGRADE))}`;
+
+        expect(href).to.include(expectedPathFragment);
+      });
+
+      it('has a type="text/html" on the alternate link', () => {
+        expect(firstEntry).to.match(/<link[^>]*rel="alternate"[^>]*type="text\/html"/);
+      });
+
+      it('has a title reconstructed from commit prefix + serviceId + termsType', () => {
+        const title = firstEntry.match(/<title[^>]*>([\s\S]*?)<\/title>/)[1];
+
+        expect(title).to.include('Apply technical or declaration upgrade on');
+        expect(title).to.include('service-2');
+        expect(title).to.include('Privacy Policy');
+      });
+
+      it('has an updated element matching the fetch date', () => {
+        const updated = firstEntry.match(/<updated>([^<]+)<\/updated>/)[1];
+
+        expect(new Date(updated).toISOString()).to.equal(FETCH_DATE_UPGRADE.toISOString());
+      });
+
+      it('has three categories with the expected schemes', () => {
+        const categories = [...firstEntry.matchAll(/<category([^/]*)\/>/g)].map(match => match[1]);
+
+        expect(categories).to.have.length(3);
+
+        const schemes = categories.map(attrs => attrs.match(/scheme="([^"]+)"/)[1]);
+
+        expect(schemes).to.include('tag:opentermsarchive.org,2026:scheme:service');
+        expect(schemes).to.include('tag:opentermsarchive.org,2026:scheme:terms-type');
+        expect(schemes).to.include('tag:opentermsarchive.org,2026:scheme:record-type');
+      });
+
+      it('has category terms for service, terms type and record type', () => {
+        const categories = [...firstEntry.matchAll(/<category([^/]*)\/>/g)].map(match => match[1]);
+        const terms = categories.map(attrs => attrs.match(/term="([^"]+)"/)[1]);
+
+        expect(terms).to.include('service-2');
+        expect(terms).to.include('Privacy Policy');
+        expect(terms).to.include('Technical upgrade');
+      });
+    });
+
+    describe('record-type classification', () => {
+      function findEntryById(xml, recordId) {
+        const match = [...xml.matchAll(/<entry>[\s\S]*?<\/entry>/g)].find(entry => entry[0].includes(`:${recordId}</id>`));
+
+        return match && match[0];
+      }
+
+      it('classifies a first record as "First record"', () => {
+        const entry = findEntryById(response.text, savedVersions.upgradeRecord.id);
+
+        expect(entry).to.not.be.undefined;
+        expect(entry).to.match(/term="First record"/);
+      });
+
+      it('classifies a content change as "Change"', () => {
+        const entry = findEntryById(response.text, savedVersions.changeRecord.id);
+
+        expect(entry).to.not.be.undefined;
+        expect(entry).to.match(/term="Change"/);
+      });
+
+      it('classifies a technical upgrade as "Technical upgrade"', () => {
+        const entry = findEntryById(response.text, savedVersions.technicalUpgradeRecord.id);
+
+        expect(entry).to.not.be.undefined;
+        expect(entry).to.match(/term="Technical upgrade"/);
+      });
+    });
+  });
+});
@@ -5,6 +5,7 @@ import { getCollection } from '../../archivist/collection/index.js';
 import * as Services from '../../archivist/services/index.js';
 
 import docsRouter from './docs.js';
+import feedRouter from './feed.js';
 import metadataRouter from './metadata.js';
 import servicesRouter from './services.js';
 import versionsRouter from './versions.js';
@@ -37,6 +38,7 @@ export default async function apiRouter(basePath) {
   router.use(await metadataRouter(collection, services));
   router.use(servicesRouter(services));
   router.use(versionsRouter);
+  router.use(feedRouter());
 
   return router;
 }