mirror of
https://github.com/matrix-org/matrix-hookshot.git
synced 2025-03-10 21:19:13 +00:00
When setting up a FeedConnection, check if the feed loads and parses rather than just checking the Content-Type (#684)
* When setting up a FeedConnection, check if the feed loads and parses rather than just checking the Content-Type * Don't create a new Parser each time we fetch a feed * Create 684.bugfix --------- Co-authored-by: Tadeusz Sośnierz <tadeusz@sosnierz.com> Co-authored-by: Will Hunt <will@half-shot.uk>
This commit is contained in:
parent
5ccd64acd8
commit
6c4bbc7150
1
changelog.d/684.bugfix
Normal file
1
changelog.d/684.bugfix
Normal file
@ -0,0 +1 @@
|
||||
Don't check Content-Type of RSS feeds when adding a new connection, instead just check if the feed is valid.
|
@ -2,7 +2,7 @@ import {Appservice, Intent, StateEvent} from "matrix-bot-sdk";
|
||||
import { IConnection, IConnectionState, InstantiateConnectionOpts } from ".";
|
||||
import { ApiError, ErrCode } from "../api";
|
||||
import { BridgeConfigFeeds } from "../Config/Config";
|
||||
import { FeedEntry, FeedError} from "../feeds/FeedReader";
|
||||
import { FeedEntry, FeedError, FeedReader} from "../feeds/FeedReader";
|
||||
import { Logger } from "matrix-appservice-bridge";
|
||||
import { IBridgeStorageProvider } from "../Stores/StorageProvider";
|
||||
import { BaseConnection } from "./BaseConnection";
|
||||
@ -37,6 +37,7 @@ export interface FeedConnectionSecrets {
|
||||
export type FeedResponseItem = GetConnectionsResponseItem<FeedConnectionState, FeedConnectionSecrets>;
|
||||
|
||||
const MAX_LAST_RESULT_ITEMS = 5;
|
||||
const VALIDATION_FETCH_TIMEOUT_MS = 5000;
|
||||
|
||||
@Connection
|
||||
export class FeedConnection extends BaseConnection implements IConnection {
|
||||
@ -57,20 +58,11 @@ export class FeedConnection extends BaseConnection implements IConnection {
|
||||
} catch (ex) {
|
||||
throw new ApiError("Feed URL doesn't appear valid", ErrCode.BadValue);
|
||||
}
|
||||
let res;
|
||||
|
||||
try {
|
||||
res = await axios.head(url).catch(() => axios.get(url));
|
||||
await FeedReader.fetchFeed(url, {}, VALIDATION_FETCH_TIMEOUT_MS);
|
||||
} catch (ex) {
|
||||
throw new ApiError(`Could not read from URL: ${ex.message}`, ErrCode.BadValue);
|
||||
}
|
||||
const contentType = res.headers['content-type'];
|
||||
// we're deliberately liberal here, since different things pop up in the wild
|
||||
if (!contentType.match(/xml/)) {
|
||||
throw new ApiError(
|
||||
`Feed responded with a content type of "${contentType}", which doesn't look like an RSS/Atom feed`,
|
||||
ErrCode.BadValue,
|
||||
StatusCodes.UNSUPPORTED_MEDIA_TYPE
|
||||
);
|
||||
throw new ApiError(`Could not read feed from URL: ${ex.message}`, ErrCode.BadValue);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -6,7 +6,7 @@ import { Logger } from "matrix-appservice-bridge";
|
||||
import { MessageQueue } from "../MessageQueue";
|
||||
|
||||
import Ajv from "ajv";
|
||||
import axios from "axios";
|
||||
import axios, { AxiosResponse } from "axios";
|
||||
import Parser from "rss-parser";
|
||||
import Metrics from "../Metrics";
|
||||
import UserAgent from "../UserAgent";
|
||||
@ -88,8 +88,7 @@ function normalizeUrl(input: string): string {
|
||||
}
|
||||
|
||||
export class FeedReader {
|
||||
|
||||
private readonly parser = new Parser();
|
||||
private readonly parser = FeedReader.buildParser();
|
||||
|
||||
private connections: FeedConnection[];
|
||||
// ts should notice that we do in fact initialize it in constructor, but it doesn't (in this version)
|
||||
@ -185,6 +184,28 @@ export class FeedReader {
|
||||
await this.matrixClient.setAccountData(FeedReader.seenEntriesEventType, accountData);
|
||||
}
|
||||
|
||||
private static buildParser(): Parser {
|
||||
return new Parser();
|
||||
}
|
||||
|
||||
public static async fetchFeed(
|
||||
url: string,
|
||||
headers: any,
|
||||
timeoutMs: number,
|
||||
parser: Parser = FeedReader.buildParser(),
|
||||
): Promise<{ response: AxiosResponse<any, any>, feed: Parser.Output<any> }> {
|
||||
const response = await axios.get(url, {
|
||||
headers: {
|
||||
'User-Agent': UserAgent,
|
||||
...headers,
|
||||
},
|
||||
// We don't want to wait forever for the feed.
|
||||
timeout: timeoutMs,
|
||||
});
|
||||
const feed = await parser.parseString(response.data);
|
||||
return { response, feed };
|
||||
}
|
||||
|
||||
private async pollFeeds(): Promise<void> {
|
||||
log.debug(`Checking for updates in ${this.observedFeedUrls.size} RSS/Atom feeds`);
|
||||
|
||||
@ -196,28 +217,24 @@ export class FeedReader {
|
||||
const fetchKey = randomUUID();
|
||||
const { etag, lastModified } = this.cacheTimes.get(url) || {};
|
||||
try {
|
||||
const res = await axios.get(url, {
|
||||
headers: {
|
||||
'User-Agent': UserAgent,
|
||||
const { response, feed } = await FeedReader.fetchFeed(
|
||||
url,
|
||||
{
|
||||
...(lastModified && { 'If-Modified-Since': lastModified}),
|
||||
...(etag && { 'If-None-Match': etag}),
|
||||
},
|
||||
// We don't want to wait forever for the feed.
|
||||
timeout: this.config.pollTimeoutSeconds * 1000,
|
||||
});
|
||||
// Clear any HTTP failures
|
||||
this.feedsFailingHttp.delete(url);
|
||||
this.config.pollTimeoutSeconds * 1000,
|
||||
this.parser,
|
||||
);
|
||||
|
||||
// Store any entity tags/cache times.
|
||||
if (res.headers.ETag) {
|
||||
this.cacheTimes.set(url, { etag: res.headers.ETag});
|
||||
} else if (res.headers['Last-Modified']) {
|
||||
this.cacheTimes.set(url, { lastModified: res.headers['Last-Modified'] });
|
||||
if (response.headers.ETag) {
|
||||
this.cacheTimes.set(url, { etag: response.headers.ETag});
|
||||
} else if (response.headers['Last-Modified']) {
|
||||
this.cacheTimes.set(url, { lastModified: response.headers['Last-Modified'] });
|
||||
}
|
||||
|
||||
const feed = await this.parser.parseString(res.data);
|
||||
this.feedsFailingParsing.delete(url);
|
||||
|
||||
let initialSync = false;
|
||||
let seenGuids = this.seenEntries.get(url);
|
||||
if (!seenGuids) {
|
||||
@ -272,6 +289,9 @@ export class FeedReader {
|
||||
this.seenEntries.set(url, newSeenItems);
|
||||
}
|
||||
this.queue.push<FeedSuccess>({ eventName: 'feed.success', sender: 'FeedReader', data: { url: url } });
|
||||
// Clear any feed failures
|
||||
this.feedsFailingHttp.delete(url);
|
||||
this.feedsFailingParsing.delete(url);
|
||||
} catch (err: unknown) {
|
||||
if (axios.isAxiosError(err)) {
|
||||
// No new feed items, skip.
|
||||
|
Loading…
x
Reference in New Issue
Block a user