From 6c4bbc715038279c4777233c7db327951050ee28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tadeusz=20So=C5=9Bnierz?= Date: Fri, 24 Mar 2023 15:22:10 +0100 Subject: [PATCH] When setting up a FeedConnection, check if the feed loads and parses rather than just checking the Content-Type (#684) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * When setting up a FeedConnection, check if the feed loads and parses rather than just checking the Content-Type * Don't create a new Parser each time we fetch a feed * Create 684.bugfix --------- Co-authored-by: Tadeusz SoĊ›nierz Co-authored-by: Will Hunt --- changelog.d/684.bugfix | 1 + src/Connections/FeedConnection.ts | 18 +++-------- src/feeds/FeedReader.ts | 54 +++++++++++++++++++++---------- 3 files changed, 43 insertions(+), 30 deletions(-) create mode 100644 changelog.d/684.bugfix diff --git a/changelog.d/684.bugfix b/changelog.d/684.bugfix new file mode 100644 index 00000000..2c712807 --- /dev/null +++ b/changelog.d/684.bugfix @@ -0,0 +1 @@ +Don't check Content-Type of RSS feeds when adding a new connection, instead just check if the feed is valid. diff --git a/src/Connections/FeedConnection.ts b/src/Connections/FeedConnection.ts index 336e0948..5ff47502 100644 --- a/src/Connections/FeedConnection.ts +++ b/src/Connections/FeedConnection.ts @@ -2,7 +2,7 @@ import {Appservice, Intent, StateEvent} from "matrix-bot-sdk"; import { IConnection, IConnectionState, InstantiateConnectionOpts } from "."; import { ApiError, ErrCode } from "../api"; import { BridgeConfigFeeds } from "../Config/Config"; -import { FeedEntry, FeedError} from "../feeds/FeedReader"; +import { FeedEntry, FeedError, FeedReader} from "../feeds/FeedReader"; import { Logger } from "matrix-appservice-bridge"; import { IBridgeStorageProvider } from "../Stores/StorageProvider"; import { BaseConnection } from "./BaseConnection"; @@ -37,6 +37,7 @@ export interface FeedConnectionSecrets { export type FeedResponseItem = GetConnectionsResponseItem; const MAX_LAST_RESULT_ITEMS = 5; +const VALIDATION_FETCH_TIMEOUT_MS = 5000; @Connection export class FeedConnection extends BaseConnection implements IConnection { @@ -57,20 +58,11 @@ export class FeedConnection extends BaseConnection implements IConnection { } catch (ex) { throw new ApiError("Feed URL doesn't appear valid", ErrCode.BadValue); } - let res; + try { - res = await axios.head(url).catch(() => axios.get(url)); + await FeedReader.fetchFeed(url, {}, VALIDATION_FETCH_TIMEOUT_MS); } catch (ex) { - throw new ApiError(`Could not read from URL: ${ex.message}`, ErrCode.BadValue); - } - const contentType = res.headers['content-type']; - // we're deliberately liberal here, since different things pop up in the wild - if (!contentType.match(/xml/)) { - throw new ApiError( - `Feed responded with a content type of "${contentType}", which doesn't look like an RSS/Atom feed`, - ErrCode.BadValue, - StatusCodes.UNSUPPORTED_MEDIA_TYPE - ); + throw new ApiError(`Could not read feed from URL: ${ex.message}`, ErrCode.BadValue); } } diff --git a/src/feeds/FeedReader.ts b/src/feeds/FeedReader.ts index 0d37bf44..6d9fdc84 100644 --- a/src/feeds/FeedReader.ts +++ b/src/feeds/FeedReader.ts @@ -6,7 +6,7 @@ import { Logger } from "matrix-appservice-bridge"; import { MessageQueue } from "../MessageQueue"; import Ajv from "ajv"; -import axios from "axios"; +import axios, { AxiosResponse } from "axios"; import Parser from "rss-parser"; import Metrics from "../Metrics"; import UserAgent from "../UserAgent"; @@ -88,8 +88,7 @@ function normalizeUrl(input: string): string { } export class FeedReader { - - private readonly parser = new Parser(); + private readonly parser = FeedReader.buildParser(); private connections: FeedConnection[]; // ts should notice that we do in fact initialize it in constructor, but it doesn't (in this version) @@ -185,6 +184,28 @@ export class FeedReader { await this.matrixClient.setAccountData(FeedReader.seenEntriesEventType, accountData); } + private static buildParser(): Parser { + return new Parser(); + } + + public static async fetchFeed( + url: string, + headers: any, + timeoutMs: number, + parser: Parser = FeedReader.buildParser(), + ): Promise<{ response: AxiosResponse, feed: Parser.Output }> { + const response = await axios.get(url, { + headers: { + 'User-Agent': UserAgent, + ...headers, + }, + // We don't want to wait forever for the feed. + timeout: timeoutMs, + }); + const feed = await parser.parseString(response.data); + return { response, feed }; + } + private async pollFeeds(): Promise { log.debug(`Checking for updates in ${this.observedFeedUrls.size} RSS/Atom feeds`); @@ -196,28 +217,24 @@ export class FeedReader { const fetchKey = randomUUID(); const { etag, lastModified } = this.cacheTimes.get(url) || {}; try { - const res = await axios.get(url, { - headers: { - 'User-Agent': UserAgent, + const { response, feed } = await FeedReader.fetchFeed( + url, + { ...(lastModified && { 'If-Modified-Since': lastModified}), ...(etag && { 'If-None-Match': etag}), }, // We don't want to wait forever for the feed. - timeout: this.config.pollTimeoutSeconds * 1000, - }); - // Clear any HTTP failures - this.feedsFailingHttp.delete(url); + this.config.pollTimeoutSeconds * 1000, + this.parser, + ); // Store any entity tags/cache times. - if (res.headers.ETag) { - this.cacheTimes.set(url, { etag: res.headers.ETag}); - } else if (res.headers['Last-Modified']) { - this.cacheTimes.set(url, { lastModified: res.headers['Last-Modified'] }); + if (response.headers.ETag) { + this.cacheTimes.set(url, { etag: response.headers.ETag}); + } else if (response.headers['Last-Modified']) { + this.cacheTimes.set(url, { lastModified: response.headers['Last-Modified'] }); } - const feed = await this.parser.parseString(res.data); - this.feedsFailingParsing.delete(url); - let initialSync = false; let seenGuids = this.seenEntries.get(url); if (!seenGuids) { @@ -272,6 +289,9 @@ export class FeedReader { this.seenEntries.set(url, newSeenItems); } this.queue.push({ eventName: 'feed.success', sender: 'FeedReader', data: { url: url } }); + // Clear any feed failures + this.feedsFailingHttp.delete(url); + this.feedsFailingParsing.delete(url); } catch (err: unknown) { if (axios.isAxiosError(err)) { // No new feed items, skip.