mirror of
https://github.com/matrix-org/matrix-hookshot.git
synced 2025-03-10 21:19:13 +00:00
When setting up a FeedConnection, check if the feed loads and parses rather than just checking the Content-Type (#684)
* When setting up a FeedConnection, check if the feed loads and parses rather than just checking the Content-Type * Don't create a new Parser each time we fetch a feed * Create 684.bugfix --------- Co-authored-by: Tadeusz Sośnierz <tadeusz@sosnierz.com> Co-authored-by: Will Hunt <will@half-shot.uk>
This commit is contained in:
parent
5ccd64acd8
commit
6c4bbc7150
1
changelog.d/684.bugfix
Normal file
1
changelog.d/684.bugfix
Normal file
@ -0,0 +1 @@
|
|||||||
|
Don't check Content-Type of RSS feeds when adding a new connection, instead just check if the feed is valid.
|
@ -2,7 +2,7 @@ import {Appservice, Intent, StateEvent} from "matrix-bot-sdk";
|
|||||||
import { IConnection, IConnectionState, InstantiateConnectionOpts } from ".";
|
import { IConnection, IConnectionState, InstantiateConnectionOpts } from ".";
|
||||||
import { ApiError, ErrCode } from "../api";
|
import { ApiError, ErrCode } from "../api";
|
||||||
import { BridgeConfigFeeds } from "../Config/Config";
|
import { BridgeConfigFeeds } from "../Config/Config";
|
||||||
import { FeedEntry, FeedError} from "../feeds/FeedReader";
|
import { FeedEntry, FeedError, FeedReader} from "../feeds/FeedReader";
|
||||||
import { Logger } from "matrix-appservice-bridge";
|
import { Logger } from "matrix-appservice-bridge";
|
||||||
import { IBridgeStorageProvider } from "../Stores/StorageProvider";
|
import { IBridgeStorageProvider } from "../Stores/StorageProvider";
|
||||||
import { BaseConnection } from "./BaseConnection";
|
import { BaseConnection } from "./BaseConnection";
|
||||||
@ -37,6 +37,7 @@ export interface FeedConnectionSecrets {
|
|||||||
export type FeedResponseItem = GetConnectionsResponseItem<FeedConnectionState, FeedConnectionSecrets>;
|
export type FeedResponseItem = GetConnectionsResponseItem<FeedConnectionState, FeedConnectionSecrets>;
|
||||||
|
|
||||||
const MAX_LAST_RESULT_ITEMS = 5;
|
const MAX_LAST_RESULT_ITEMS = 5;
|
||||||
|
const VALIDATION_FETCH_TIMEOUT_MS = 5000;
|
||||||
|
|
||||||
@Connection
|
@Connection
|
||||||
export class FeedConnection extends BaseConnection implements IConnection {
|
export class FeedConnection extends BaseConnection implements IConnection {
|
||||||
@ -57,20 +58,11 @@ export class FeedConnection extends BaseConnection implements IConnection {
|
|||||||
} catch (ex) {
|
} catch (ex) {
|
||||||
throw new ApiError("Feed URL doesn't appear valid", ErrCode.BadValue);
|
throw new ApiError("Feed URL doesn't appear valid", ErrCode.BadValue);
|
||||||
}
|
}
|
||||||
let res;
|
|
||||||
try {
|
try {
|
||||||
res = await axios.head(url).catch(() => axios.get(url));
|
await FeedReader.fetchFeed(url, {}, VALIDATION_FETCH_TIMEOUT_MS);
|
||||||
} catch (ex) {
|
} catch (ex) {
|
||||||
throw new ApiError(`Could not read from URL: ${ex.message}`, ErrCode.BadValue);
|
throw new ApiError(`Could not read feed from URL: ${ex.message}`, ErrCode.BadValue);
|
||||||
}
|
|
||||||
const contentType = res.headers['content-type'];
|
|
||||||
// we're deliberately liberal here, since different things pop up in the wild
|
|
||||||
if (!contentType.match(/xml/)) {
|
|
||||||
throw new ApiError(
|
|
||||||
`Feed responded with a content type of "${contentType}", which doesn't look like an RSS/Atom feed`,
|
|
||||||
ErrCode.BadValue,
|
|
||||||
StatusCodes.UNSUPPORTED_MEDIA_TYPE
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6,7 +6,7 @@ import { Logger } from "matrix-appservice-bridge";
|
|||||||
import { MessageQueue } from "../MessageQueue";
|
import { MessageQueue } from "../MessageQueue";
|
||||||
|
|
||||||
import Ajv from "ajv";
|
import Ajv from "ajv";
|
||||||
import axios from "axios";
|
import axios, { AxiosResponse } from "axios";
|
||||||
import Parser from "rss-parser";
|
import Parser from "rss-parser";
|
||||||
import Metrics from "../Metrics";
|
import Metrics from "../Metrics";
|
||||||
import UserAgent from "../UserAgent";
|
import UserAgent from "../UserAgent";
|
||||||
@ -88,8 +88,7 @@ function normalizeUrl(input: string): string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export class FeedReader {
|
export class FeedReader {
|
||||||
|
private readonly parser = FeedReader.buildParser();
|
||||||
private readonly parser = new Parser();
|
|
||||||
|
|
||||||
private connections: FeedConnection[];
|
private connections: FeedConnection[];
|
||||||
// ts should notice that we do in fact initialize it in constructor, but it doesn't (in this version)
|
// ts should notice that we do in fact initialize it in constructor, but it doesn't (in this version)
|
||||||
@ -185,6 +184,28 @@ export class FeedReader {
|
|||||||
await this.matrixClient.setAccountData(FeedReader.seenEntriesEventType, accountData);
|
await this.matrixClient.setAccountData(FeedReader.seenEntriesEventType, accountData);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static buildParser(): Parser {
|
||||||
|
return new Parser();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static async fetchFeed(
|
||||||
|
url: string,
|
||||||
|
headers: any,
|
||||||
|
timeoutMs: number,
|
||||||
|
parser: Parser = FeedReader.buildParser(),
|
||||||
|
): Promise<{ response: AxiosResponse<any, any>, feed: Parser.Output<any> }> {
|
||||||
|
const response = await axios.get(url, {
|
||||||
|
headers: {
|
||||||
|
'User-Agent': UserAgent,
|
||||||
|
...headers,
|
||||||
|
},
|
||||||
|
// We don't want to wait forever for the feed.
|
||||||
|
timeout: timeoutMs,
|
||||||
|
});
|
||||||
|
const feed = await parser.parseString(response.data);
|
||||||
|
return { response, feed };
|
||||||
|
}
|
||||||
|
|
||||||
private async pollFeeds(): Promise<void> {
|
private async pollFeeds(): Promise<void> {
|
||||||
log.debug(`Checking for updates in ${this.observedFeedUrls.size} RSS/Atom feeds`);
|
log.debug(`Checking for updates in ${this.observedFeedUrls.size} RSS/Atom feeds`);
|
||||||
|
|
||||||
@ -196,28 +217,24 @@ export class FeedReader {
|
|||||||
const fetchKey = randomUUID();
|
const fetchKey = randomUUID();
|
||||||
const { etag, lastModified } = this.cacheTimes.get(url) || {};
|
const { etag, lastModified } = this.cacheTimes.get(url) || {};
|
||||||
try {
|
try {
|
||||||
const res = await axios.get(url, {
|
const { response, feed } = await FeedReader.fetchFeed(
|
||||||
headers: {
|
url,
|
||||||
'User-Agent': UserAgent,
|
{
|
||||||
...(lastModified && { 'If-Modified-Since': lastModified}),
|
...(lastModified && { 'If-Modified-Since': lastModified}),
|
||||||
...(etag && { 'If-None-Match': etag}),
|
...(etag && { 'If-None-Match': etag}),
|
||||||
},
|
},
|
||||||
// We don't want to wait forever for the feed.
|
// We don't want to wait forever for the feed.
|
||||||
timeout: this.config.pollTimeoutSeconds * 1000,
|
this.config.pollTimeoutSeconds * 1000,
|
||||||
});
|
this.parser,
|
||||||
// Clear any HTTP failures
|
);
|
||||||
this.feedsFailingHttp.delete(url);
|
|
||||||
|
|
||||||
// Store any entity tags/cache times.
|
// Store any entity tags/cache times.
|
||||||
if (res.headers.ETag) {
|
if (response.headers.ETag) {
|
||||||
this.cacheTimes.set(url, { etag: res.headers.ETag});
|
this.cacheTimes.set(url, { etag: response.headers.ETag});
|
||||||
} else if (res.headers['Last-Modified']) {
|
} else if (response.headers['Last-Modified']) {
|
||||||
this.cacheTimes.set(url, { lastModified: res.headers['Last-Modified'] });
|
this.cacheTimes.set(url, { lastModified: response.headers['Last-Modified'] });
|
||||||
}
|
}
|
||||||
|
|
||||||
const feed = await this.parser.parseString(res.data);
|
|
||||||
this.feedsFailingParsing.delete(url);
|
|
||||||
|
|
||||||
let initialSync = false;
|
let initialSync = false;
|
||||||
let seenGuids = this.seenEntries.get(url);
|
let seenGuids = this.seenEntries.get(url);
|
||||||
if (!seenGuids) {
|
if (!seenGuids) {
|
||||||
@ -272,6 +289,9 @@ export class FeedReader {
|
|||||||
this.seenEntries.set(url, newSeenItems);
|
this.seenEntries.set(url, newSeenItems);
|
||||||
}
|
}
|
||||||
this.queue.push<FeedSuccess>({ eventName: 'feed.success', sender: 'FeedReader', data: { url: url } });
|
this.queue.push<FeedSuccess>({ eventName: 'feed.success', sender: 'FeedReader', data: { url: url } });
|
||||||
|
// Clear any feed failures
|
||||||
|
this.feedsFailingHttp.delete(url);
|
||||||
|
this.feedsFailingParsing.delete(url);
|
||||||
} catch (err: unknown) {
|
} catch (err: unknown) {
|
||||||
if (axios.isAxiosError(err)) {
|
if (axios.isAxiosError(err)) {
|
||||||
// No new feed items, skip.
|
// No new feed items, skip.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user