From 0b0a7f1adb8dd150efc67686b99964d966e1cc0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tadeusz=20So=C5=9Bnierz?= Date: Wed, 20 Apr 2022 18:13:06 +0200 Subject: [PATCH] Add RSS/Atom feed support --- config.sample.yml | 4 + package.json | 1 + src/Bridge.ts | 27 +++++- src/Config/Config.ts | 9 ++ src/ConnectionManager.ts | 16 +++- src/Connections/FeedConnection.ts | 68 ++++++++++++++ src/Connections/SetupConnection.ts | 21 +++++ src/Connections/index.ts | 3 +- src/feeds/FeedReader.ts | 144 +++++++++++++++++++++++++++++ yarn.lock | 28 +++++- 10 files changed, 315 insertions(+), 6 deletions(-) create mode 100644 src/Connections/FeedConnection.ts create mode 100644 src/feeds/FeedReader.ts diff --git a/config.sample.yml b/config.sample.yml index 3634604d..ea3157ad 100644 --- a/config.sample.yml +++ b/config.sample.yml @@ -80,6 +80,10 @@ generic: userIdPrefix: _webhooks_ allowJsTransformationFunctions: false waitForComplete: false +feeds: + # (Optional) Support for RSS/Atom feeds. + enabled: false + pollIntervalSeconds: 600 provisioning: # (Optional) Provisioning API for integration managers # diff --git a/package.json b/package.json index d9fb6224..fbd3338b 100644 --- a/package.json +++ b/package.json @@ -56,6 +56,7 @@ "node-emoji": "^1.11.0", "prom-client": "^14.0.1", "reflect-metadata": "^0.1.13", + "rss-parser": "^3.12.0", "source-map-support": "^0.5.21", "string-argv": "^0.3.1", "uuid": "^8.3.2", diff --git a/src/Bridge.ts b/src/Bridge.ts index c1a225d5..a972e170 100644 --- a/src/Bridge.ts +++ b/src/Bridge.ts @@ -10,7 +10,7 @@ import { GetIssueResponse, GetIssueOpts } from "./Gitlab/Types" import { GithubInstance } from "./Github/GithubInstance"; import { IBridgeStorageProvider } from "./Stores/StorageProvider"; import { IConnection, GitHubDiscussionSpace, GitHubDiscussionConnection, GitHubUserSpace, JiraProjectConnection, GitLabRepoConnection, - GitHubIssueConnection, GitHubProjectConnection, GitHubRepoConnection, GitLabIssueConnection, FigmaFileConnection } from "./Connections"; + GitHubIssueConnection, GitHubProjectConnection, GitHubRepoConnection, GitLabIssueConnection, FigmaFileConnection, FeedConnection } from "./Connections"; import { IGitLabWebhookIssueStateEvent, IGitLabWebhookMREvent, IGitLabWebhookNoteEvent, IGitLabWebhookPushEvent, IGitLabWebhookReleaseEvent, IGitLabWebhookTagPushEvent, IGitLabWebhookWikiPageEvent } from "./Gitlab/WebhookTypes"; import { JiraIssueEvent, JiraIssueUpdatedEvent } from "./Jira/WebhookTypes"; import { JiraOAuthResult } from "./Jira/Types"; @@ -42,6 +42,7 @@ import { JiraOAuthRequestCloud, JiraOAuthRequestOnPrem, JiraOAuthRequestResult } import { CLOUD_INSTANCE } from "./Jira/Client"; import { GenericWebhookEvent, GenericWebhookEventResult } from "./generic/types"; import { SetupWidget } from "./Widgets/SetupWidget"; +import { FeedEntry, FeedError, FeedReader } from "./feeds/FeedReader"; const log = new LogWrapper("Bridge"); export class Bridge { @@ -137,6 +138,16 @@ export class Bridge { await this.tokenStore.load(); const connManager = this.connectionManager = new ConnectionManager(this.as, this.config, this.tokenStore, this.commentProcessor, this.messageClient, this.storage, this.github); + + if (this.config.feeds?.enabled) { + new FeedReader( + this.config.feeds, + this.connectionManager, + this.queue, + this.as.botClient, + ); + } + if (this.config.provisioning) { const routers = []; @@ -196,6 +207,7 @@ export class Bridge { this.queue.subscribe("gitlab.*"); this.queue.subscribe("jira.*"); this.queue.subscribe("figma.*"); + this.queue.subscribe("feed.*"); const validateRepoIssue = (data: GitHubWebhookTypes.IssuesEvent|GitHubWebhookTypes.IssueCommentEvent) => { if (!data.repository || !data.issue) { @@ -591,6 +603,17 @@ export class Bridge { (c, data) => c.handleNewComment(data.payload), ) + this.bindHandlerToQueue( + "feed.entry", + (data) => connManager.getConnectionsForFeedUrl(data.feed.url), + (c, data) => c.handleFeedEntry(data), + ); + this.bindHandlerToQueue( + "feed.error", + (data) => connManager.getConnectionsForFeedUrl(data.url), + (c, data) => c.handleFeedError(data), + ); + // Set the name and avatar of the bot if (this.config.bot) { // Ensure we are registered before we set a profile @@ -1147,4 +1170,4 @@ export class Bridge { log.debug(`Set up ${roomId} as an admin room for ${adminRoom.userId}`); return adminRoom; } -} \ No newline at end of file +} diff --git a/src/Config/Config.ts b/src/Config/Config.ts index 199837b3..874da132 100644 --- a/src/Config/Config.ts +++ b/src/Config/Config.ts @@ -184,6 +184,11 @@ export class BridgeConfigGitLab { } } +export interface BridgeConfigFeeds { + enabled: boolean; + pollIntervalSeconds: number; +} + export interface BridgeConfigFigma { publicUrl: string; overrideUserId?: string; @@ -323,6 +328,7 @@ export interface BridgeConfigRoot { bot?: BridgeConfigBot; bridge: BridgeConfigBridge; figma?: BridgeConfigFigma; + feeds?: BridgeConfigFeeds; generic?: BridgeGenericWebhooksConfigYAML; github?: BridgeConfigGitHub; gitlab?: BridgeConfigGitLabYAML; @@ -362,6 +368,8 @@ export class BridgeConfig { public readonly generic?: BridgeConfigGenericWebhooks; @configKey("Configure this to enable Figma support", true) public readonly figma?: BridgeConfigFigma; + @configKey("Configure this to enable RSS/Atom feed support", true) + public readonly feeds?: BridgeConfigFeeds; @configKey("Define profile information for the bot user", true) public readonly bot?: BridgeConfigBot; @configKey("EXPERIMENTAL support for complimentary widgets", true) @@ -395,6 +403,7 @@ export class BridgeConfig { this.figma = configData.figma; this.jira = configData.jira && new BridgeConfigJira(configData.jira); this.generic = configData.generic && new BridgeConfigGenericWebhooks(configData.generic); + this.feeds = configData.feeds; this.provisioning = configData.provisioning; this.passFile = configData.passFile; this.bot = configData.bot; diff --git a/src/ConnectionManager.ts b/src/ConnectionManager.ts index 3b688fb8..271a4a5d 100644 --- a/src/ConnectionManager.ts +++ b/src/ConnectionManager.ts @@ -18,7 +18,7 @@ import { GetConnectionTypeResponseItem } from "./provisioning/api"; import { ApiError, ErrCode } from "./api"; import { UserTokenStore } from "./UserTokenStore"; import {v4 as uuid} from "uuid"; -import { FigmaFileConnection } from "./Connections/FigmaFileConnection"; +import { FigmaFileConnection, FeedConnection } from "./Connections"; import { IBridgeStorageProvider } from "./Stores/StorageProvider"; import Metrics from "./Metrics"; import EventEmitter from "events"; @@ -128,7 +128,7 @@ export class ConnectionManager extends EventEmitter { throw new ApiError(`Connection type not known`); } - private assertStateAllowed(state: StateEvent, serviceType: "github"|"gitlab"|"jira"|"figma"|"webhooks") { + private assertStateAllowed(state: StateEvent, serviceType: "github"|"gitlab"|"jira"|"figma"|"webhooks"|"feed") { if (state.sender === this.as.botUserId) { return; } @@ -248,6 +248,14 @@ export class ConnectionManager extends EventEmitter { return new FigmaFileConnection(roomId, state.stateKey, state.content, this.config.figma, this.as, this.storage); } + if (FeedConnection.EventTypes.includes(state.type)) { + if (!this.config.feeds?.enabled) { + throw Error('RSS/Atom feeds are not configured'); + } + this.assertStateAllowed(state, "feed"); + return new FeedConnection(roomId, state.stateKey, state.content, this.config.feeds, this.as, this.storage); + } + if (GenericHookConnection.EventTypes.includes(state.type) && this.config.generic?.enabled) { if (!this.config.generic) { throw Error('Generic webhooks are not configured'); @@ -377,6 +385,10 @@ export class ConnectionManager extends EventEmitter { public getForFigmaFile(fileKey: string, instanceName: string): FigmaFileConnection[] { return this.connections.filter((c) => (c instanceof FigmaFileConnection && (c.fileId === fileKey || c.instanceName === instanceName))) as FigmaFileConnection[]; } + + public getConnectionsForFeedUrl(url: string): FeedConnection[] { + return this.connections.filter(c => c instanceof FeedConnection && c.feedUrl === url) as FeedConnection[]; + } // eslint-disable-next-line @typescript-eslint/no-explicit-any public getAllConnectionsOfType(typeT: new (...params : any[]) => T): T[] { diff --git a/src/Connections/FeedConnection.ts b/src/Connections/FeedConnection.ts new file mode 100644 index 00000000..fe40f0b4 --- /dev/null +++ b/src/Connections/FeedConnection.ts @@ -0,0 +1,68 @@ +import {Appservice} from "matrix-bot-sdk"; +import { IConnection, IConnectionState } from "."; +import { BridgeConfigFeeds } from "../Config/Config"; +import { FeedEntry, FeedError} from "../feeds/FeedReader"; +import LogWrapper from "../LogWrapper"; +import { GetConnectionsResponseItem } from "../provisioning/api"; +import { IBridgeStorageProvider } from "../Stores/StorageProvider"; +import { BaseConnection } from "./BaseConnection"; +import markdown from "markdown-it"; + +const log = new LogWrapper("FeedConnection"); +const md = new markdown(); + +export interface FeedConnectionState extends IConnectionState { + url: string; +} + +export class FeedConnection extends BaseConnection implements IConnection { + static readonly CanonicalEventType = "uk.half-shot.matrix-hookshot.feed"; + static readonly EventTypes = [ FeedConnection.CanonicalEventType ]; + private hasError = false; + + public get feedUrl(): string { + return this.state.url; + } + + constructor( + roomId: string, + stateKey: string, + private state: FeedConnectionState, + private readonly config: BridgeConfigFeeds, + private readonly as: Appservice, + private readonly storage: IBridgeStorageProvider + ) { + super(roomId, stateKey, FeedConnection.CanonicalEventType) + log.info(`FeedConnection created for ${roomId}, ${JSON.stringify(state)}`); + } + + public isInterestedInStateEvent(eventType: string, stateKey: string): boolean { + return false; + } + + public async handleFeedEntry(entry: FeedEntry): Promise { + this.hasError = false; + const message = `New post in ${entry.feed.title}: [${entry.title}](${entry.link})` + await this.as.botIntent.sendEvent(this.roomId, { + msgtype: 'm.notice', + format: "org.matrix.custom.html", + formatted_body: md.renderInline(message), + body: message, + }); + } + + public async handleFeedError(error: FeedError): Promise { + if (!this.hasError) { + await this.as.botIntent.sendEvent(this.roomId, { + msgtype: 'm.notice', + format: 'm.text', + body: `Error fetching ${this.feedUrl}: ${error.cause.message}` + }); + this.hasError = true; + } + } + + toString(): string { + return `FeedConnection ${this.state.url}`; + } +} diff --git a/src/Connections/SetupConnection.ts b/src/Connections/SetupConnection.ts index 15481c89..60ace715 100644 --- a/src/Connections/SetupConnection.ts +++ b/src/Connections/SetupConnection.ts @@ -10,6 +10,7 @@ import { v4 as uuid } from "uuid"; import { BridgeConfig, BridgePermissionLevel } from "../Config/Config"; import markdown from "markdown-it"; import { FigmaFileConnection } from "./FigmaFileConnection"; +import { FeedConnection } from "./FeedConnection"; import { URL } from "url"; import { SetupWidget } from "../Widgets/SetupWidget"; import { AdminRoom } from "../AdminRoom"; @@ -45,6 +46,7 @@ export class SetupConnection extends CommandConnection { this.config.figma ? "figma": "", this.config.jira ? "jira": "", this.config.generic?.enabled ? "webhook": "", + this.config.feeds?.enabled ? "feed" : "", this.config.widgets?.roomSetupWidget ? "widget" : "", ]; this.includeTitlesInHelp = false; @@ -133,6 +135,25 @@ export class SetupConnection extends CommandConnection { return this.as.botClient.sendHtmlNotice(this.roomId, md.renderInline(`Room configured to bridge Figma file.`)); } + @botCommand("feed", { help: "Bridge an RSS/Atom feed to the room.", requiredArgs: ["url"], includeUserId: true, category: "feed"}) + public async onFeed(userId: string, url: string) { + if (!this.config.feeds?.enabled) { + throw new CommandError("not-configured", "The bridge is not configured to support feeds."); + } + + await this.checkUserPermissions(userId, "figma", FeedConnection.CanonicalEventType); + + try { + new URL(url); + // TODO: fetch and check content-type? + } catch { + throw new CommandError("Invalid URL", `${url} doesn't look like a valid feed URL`); + } + + await this.as.botClient.sendStateEvent(this.roomId, FeedConnection.CanonicalEventType, url, {url}); + return this.as.botClient.sendHtmlNotice(this.roomId, md.renderInline(`Room configured to bridge \`${url}\``)); + } + @botCommand("setup-widget", {category: "widget", help: "Open the setup widget in the room"}) public async onSetupWidget() { if (!this.config.widgets?.roomSetupWidget) { diff --git a/src/Connections/index.ts b/src/Connections/index.ts index dda89e0c..5b791ad2 100644 --- a/src/Connections/index.ts +++ b/src/Connections/index.ts @@ -9,4 +9,5 @@ export * from "./GitlabIssue"; export * from "./GitlabRepo"; export * from "./IConnection"; export * from "./JiraProject"; -export * from "./FigmaFileConnection"; \ No newline at end of file +export * from "./FigmaFileConnection"; +export * from "./FeedConnection"; diff --git a/src/feeds/FeedReader.ts b/src/feeds/FeedReader.ts new file mode 100644 index 00000000..cc54ebe2 --- /dev/null +++ b/src/feeds/FeedReader.ts @@ -0,0 +1,144 @@ +import { MatrixClient } from "matrix-bot-sdk"; +import { BridgeConfigFeeds } from "../Config/Config"; +import { ConnectionManager } from "../ConnectionManager"; +import { FeedConnection } from "../Connections"; +import LogWrapper from "../LogWrapper"; +import { MessageQueue } from "../MessageQueue"; + +import axios from "axios"; +import Parser from "rss-parser"; + +const log = new LogWrapper("FeedReader"); + +export class FeedError extends Error { + constructor( + public url: string, + public cause: Error, + ) { + super(`Error fetching feed ${url}: ${cause.message}`); + } +} + +export interface FeedEntry { + feed: { + title: string, + url: string, + }, + title: string, + link: string, +} + +interface AccountData { + [url: string]: string[], +} + +function stripHtml(input: string): string { + return input.replace(/<[^>]*?>/g, ''); +} + +export class FeedReader { + private observedFeedUrls: string[]; + private seenEntries: Map = new Map(); + static readonly seenEntriesEventType = "uk.half-shot.matrix-hookshot.feed.reader.seenEntries"; + + constructor( + private config: BridgeConfigFeeds, + private connectionManager: ConnectionManager, + private queue: MessageQueue, + private matrixClient: MatrixClient, + ) { + const feedConnections = this.connectionManager.getAllConnectionsOfType(FeedConnection); + this.observedFeedUrls = feedConnections.map(c => c.feedUrl); + connectionManager.on('new-connection', c => { + if (c instanceof FeedConnection) { + log.info('New connection tracked:', c.feedUrl); + this.observedFeedUrls.push(c.feedUrl); + } + }); + + log.info('Loaded feed URLs:', this.observedFeedUrls); + + void this.loadSeenEntries().then(() => { + return this.pollFeeds(); + }); + } + + private async loadSeenEntries(): Promise { + const accountData = await this.matrixClient.getAccountData(FeedReader.seenEntriesEventType).catch((err: any) => { + if (err.statusCode === 404) { + return {} as AccountData; + } else { + throw err; + } + }); + for (const url in accountData) { + this.seenEntries.set(url, accountData[url]); + } + } + + private async saveSeenEntries(): Promise { + const accountData: AccountData = {}; + for (const [url, guids] of this.seenEntries.entries()) { + accountData[url.toString()] = guids; + } + await this.matrixClient.setAccountData(FeedReader.seenEntriesEventType, accountData); + } + + private async pollFeeds(): Promise { + log.debug(`Checking for updates in ${this.observedFeedUrls.length} RSS/Atom feeds`); + + let seenEntriesChanged = false; + + await Promise.all(this.observedFeedUrls.map(async (url) => { + try { + const res = await axios.get(url.toString()); + const feed = await (new Parser()).parseString(res.data); + const seenGuids = this.seenEntries.get(url) || []; + const seenGuidsSet = new Set(seenGuids); + const newGuids = []; + log.debug(`Found ${feed.items.length} entries in ${url}`); + for (const item of feed.items) { + const guid = item.guid || item.id || item.link || item.title; + if (!guid) { + log.error(`Could not determine guid for entry in ${url}, skipping`); + continue; + } + newGuids.push(guid); + if (seenGuidsSet.has(guid)) { + log.debug('Skipping already seen entry', guid); + continue; + } + const entry = { + feed: { title: stripHtml(feed.title!), url: url.toString() }, + title: stripHtml(item.title!), + link: item.link!, + }; + + log.debug('New entry:', entry); + seenEntriesChanged = true; + + this.queue.push({ eventName: 'feed.entry', sender: 'FeedReader', data: entry }); + } + + if (seenEntriesChanged) { + // Some RSS feeds can return a very small number of items then bounce + // back to their "normal" size, so we cannot just clobber the recent GUID list per request or else we'll + // forget what we sent and resend it. Instead, we'll keep 2x the max number of items that we've ever + // seen from this feed, up to a max of 10,000. + // Adopted from https://github.com/matrix-org/go-neb/blob/babb74fa729882d7265ff507b09080e732d060ae/services/rssbot/rssbot.go#L304 + const maxGuids = Math.min(Math.max(2 * newGuids.length, seenGuids.length), 10_000);; + const newSeenItems = Array.from(new Set([ ...newGuids, ...seenGuids ]).values()).slice(0, maxGuids); + this.seenEntries.set(url, newSeenItems); + } + } catch (err: any) { + const error = new FeedError(url.toString(), err); + log.error(error.message); + this.queue.push({ eventName: 'feed.error', sender: 'FeedReader', data: error }); + } + })); + if (seenEntriesChanged) await this.saveSeenEntries(); + setTimeout(() => { + void this.pollFeeds(); + }, this.config.pollIntervalSeconds * 1000); + } +} diff --git a/yarn.lock b/yarn.lock index 420f5a55..1075e76c 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2866,7 +2866,7 @@ enquirer@^2.3.5: dependencies: ansi-colors "^4.1.1" -entities@^2.0.0: +entities@^2.0.0, entities@^2.0.3: version "2.2.0" resolved "https://registry.yarnpkg.com/entities/-/entities-2.2.0.tgz#098dc90ebb83d8dffa089d55256b351d34c4da55" integrity sha512-p92if5Nz619I0w+akJrLZH0MX0Pb5DX39XOwQTtXSdQQOaYH03S1uIQp4mhOZtAXrxq4ViO67YTiLBo2638o9A== @@ -6150,6 +6150,14 @@ rollup@~2.37.1: optionalDependencies: fsevents "~2.1.2" +rss-parser@^3.12.0: + version "3.12.0" + resolved "https://registry.yarnpkg.com/rss-parser/-/rss-parser-3.12.0.tgz#b8888699ea46304a74363fbd8144671b2997984c" + integrity sha512-aqD3E8iavcCdkhVxNDIdg1nkBI17jgqF+9OqPS1orwNaOgySdpvq6B+DoONLhzjzwV8mWg37sb60e4bmLK117A== + dependencies: + entities "^2.0.3" + xml2js "^0.4.19" + run-parallel@^1.1.9: version "1.2.0" resolved "https://registry.yarnpkg.com/run-parallel/-/run-parallel-1.2.0.tgz#66d1368da7bdf921eb9d95bd1a9229e7f21a43ee" @@ -6210,6 +6218,11 @@ sass@^1.3.0: immutable "^4.0.0" source-map-js ">=0.6.2 <2.0.0" +sax@>=0.6.0: + version "1.2.4" + resolved "https://registry.yarnpkg.com/sax/-/sax-1.2.4.tgz#2816234e2378bddc4e5354fab5caa895df7100d9" + integrity sha512-NqVDv9TpANUjFm0N8uM5GxL36UgKi9/atZw+x7YFnQ8ckwFGKrl4xX4yWtrey3UJm5nP1kUbnYgLopqWNSRhWw== + selderee@^0.6.0: version "0.6.0" resolved "https://registry.yarnpkg.com/selderee/-/selderee-0.6.0.tgz#f3bee66cfebcb6f33df98e4a1df77388b42a96f7" @@ -7087,6 +7100,19 @@ ws@^7.3.0: resolved "https://registry.yarnpkg.com/ws/-/ws-7.5.6.tgz#e59fc509fb15ddfb65487ee9765c5a51dec5fe7b" integrity sha512-6GLgCqo2cy2A2rjCNFlxQS6ZljG/coZfZXclldI8FB/1G3CCI36Zd8xy2HrFVACi8tfk5XrgLQEk+P0Tnz9UcA== +xml2js@^0.4.19: + version "0.4.23" + resolved "https://registry.yarnpkg.com/xml2js/-/xml2js-0.4.23.tgz#a0c69516752421eb2ac758ee4d4ccf58843eac66" + integrity sha512-ySPiMjM0+pLDftHgXY4By0uswI3SPKLDw/i3UXbnO8M/p28zqexCUoPmQFrYD+/1BzhGJSs2i1ERWKJAtiLrug== + dependencies: + sax ">=0.6.0" + xmlbuilder "~11.0.0" + +xmlbuilder@~11.0.0: + version "11.0.1" + resolved "https://registry.yarnpkg.com/xmlbuilder/-/xmlbuilder-11.0.1.tgz#be9bae1c8a046e76b31127726347d0ad7002beb3" + integrity sha512-fDlsI/kFEx7gLvbecc0/ohLG50fugQp8ryHzMTuW9vSa1GJ0XYWKnhsUx7oie3G98+r56aTQIUB4kht42R3JvA== + xtend@^4.0.0: version "4.0.2" resolved "https://registry.yarnpkg.com/xtend/-/xtend-4.0.2.tgz#bb72779f5fa465186b1f438f674fa347fdb5db54"