mirror of
https://github.com/matrix-org/matrix-hookshot.git
synced 2025-03-10 21:19:13 +00:00
Staggered RSS feed polling (#685)
* Ensure we poll feeds equally * Ensure we poll at least once * Create 685.misc * Tidy up
This commit is contained in:
parent
507b014bd5
commit
44eea7f7c3
1
changelog.d/685.misc
Normal file
1
changelog.d/685.misc
Normal file
@ -0,0 +1 @@
|
|||||||
|
Stagger RSS feed polling over the interval period, rather than attempting to poll all feeds at once. Should reduce memory / CPU spikes.
|
@ -6,11 +6,9 @@ import { FeedEntry, FeedError, FeedReader} from "../feeds/FeedReader";
|
|||||||
import { Logger } from "matrix-appservice-bridge";
|
import { Logger } from "matrix-appservice-bridge";
|
||||||
import { IBridgeStorageProvider } from "../Stores/StorageProvider";
|
import { IBridgeStorageProvider } from "../Stores/StorageProvider";
|
||||||
import { BaseConnection } from "./BaseConnection";
|
import { BaseConnection } from "./BaseConnection";
|
||||||
import axios from "axios";
|
|
||||||
import markdown from "markdown-it";
|
import markdown from "markdown-it";
|
||||||
import { Connection, ProvisionConnectionOpts } from "./IConnection";
|
import { Connection, ProvisionConnectionOpts } from "./IConnection";
|
||||||
import { GetConnectionsResponseItem } from "../provisioning/api";
|
import { GetConnectionsResponseItem } from "../provisioning/api";
|
||||||
import { StatusCodes } from "http-status-codes";
|
|
||||||
const log = new Logger("FeedConnection");
|
const log = new Logger("FeedConnection");
|
||||||
const md = new markdown();
|
const md = new markdown();
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
import { MatrixClient } from "matrix-bot-sdk";
|
import { MatrixClient, MatrixError } from "matrix-bot-sdk";
|
||||||
import { BridgeConfigFeeds } from "../Config/Config";
|
import { BridgeConfigFeeds } from "../Config/Config";
|
||||||
import { ConnectionManager } from "../ConnectionManager";
|
import { ConnectionManager } from "../ConnectionManager";
|
||||||
import { FeedConnection } from "../Connections";
|
import { FeedConnection } from "../Connections";
|
||||||
@ -88,12 +88,29 @@ function normalizeUrl(input: string): string {
|
|||||||
return url.toString();
|
return url.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function shuffle<T>(array: T[]): T[] {
|
||||||
|
for (let i = array.length - 1; i > 0; i--) {
|
||||||
|
const j = Math.floor(Math.random() * (i + 1));
|
||||||
|
[array[i], array[j]] = [array[j], array[i]];
|
||||||
|
}
|
||||||
|
return array;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface FeedItem {
|
||||||
|
title?: string;
|
||||||
|
link?: string;
|
||||||
|
id?: string;
|
||||||
|
}
|
||||||
|
|
||||||
export class FeedReader {
|
export class FeedReader {
|
||||||
private readonly parser = FeedReader.buildParser();
|
private readonly parser = FeedReader.buildParser();
|
||||||
|
|
||||||
private connections: FeedConnection[];
|
private connections: FeedConnection[];
|
||||||
// ts should notice that we do in fact initialize it in constructor, but it doesn't (in this version)
|
// ts should notice that we do in fact initialize it in constructor, but it doesn't (in this version)
|
||||||
private observedFeedUrls: Set<string> = new Set();
|
private observedFeedUrls: Set<string> = new Set();
|
||||||
|
|
||||||
|
private feedQueue: string[] = [];
|
||||||
|
|
||||||
private seenEntries: Map<string, string[]> = new Map();
|
private seenEntries: Map<string, string[]> = new Map();
|
||||||
// A set of last modified times for each url.
|
// A set of last modified times for each url.
|
||||||
private cacheTimes: Map<string, { etag?: string, lastModified?: string}> = new Map();
|
private cacheTimes: Map<string, { etag?: string, lastModified?: string}> = new Map();
|
||||||
@ -107,6 +124,10 @@ export class FeedReader {
|
|||||||
private shouldRun = true;
|
private shouldRun = true;
|
||||||
private timeout?: NodeJS.Timeout;
|
private timeout?: NodeJS.Timeout;
|
||||||
|
|
||||||
|
get sleepingInterval() {
|
||||||
|
return (this.config.pollIntervalSeconds * 1000) / (this.feedQueue.length || 1);
|
||||||
|
}
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
private readonly config: BridgeConfigFeeds,
|
private readonly config: BridgeConfigFeeds,
|
||||||
private readonly connectionManager: ConnectionManager,
|
private readonly connectionManager: ConnectionManager,
|
||||||
@ -152,20 +173,22 @@ export class FeedReader {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
this.observedFeedUrls = new Set(normalizedUrls);
|
this.observedFeedUrls = new Set(normalizedUrls);
|
||||||
|
this.feedQueue = shuffle([...this.observedFeedUrls.values()]);
|
||||||
|
|
||||||
Metrics.feedsCount.set(this.observedFeedUrls.size);
|
Metrics.feedsCount.set(this.observedFeedUrls.size);
|
||||||
}
|
}
|
||||||
|
|
||||||
private async loadSeenEntries(): Promise<void> {
|
private async loadSeenEntries(): Promise<void> {
|
||||||
try {
|
try {
|
||||||
const accountData = await this.matrixClient.getAccountData<any>(FeedReader.seenEntriesEventType).catch((err: any) => {
|
const accountData = await this.matrixClient.getAccountData<AccountData>(FeedReader.seenEntriesEventType).catch((err: MatrixError|unknown) => {
|
||||||
if (err.statusCode === 404) {
|
if (err instanceof MatrixError && err.statusCode === 404) {
|
||||||
return {};
|
return {} as AccountData;
|
||||||
} else {
|
} else {
|
||||||
throw err;
|
throw err;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
if (!validateAccountData(accountData)) {
|
if (!validateAccountData(accountData)) {
|
||||||
const errors = validateAccountData.errors!.map(e => `${e.instancePath} ${e.message}`);
|
const errors = validateAccountData.errors?.map(e => `${e.instancePath} ${e.message}`) || ['No error reported'];
|
||||||
throw new Error(`Invalid account data: ${errors.join(', ')}`);
|
throw new Error(`Invalid account data: ${errors.join(', ')}`);
|
||||||
}
|
}
|
||||||
for (const url in accountData) {
|
for (const url in accountData) {
|
||||||
@ -191,10 +214,10 @@ export class FeedReader {
|
|||||||
|
|
||||||
public static async fetchFeed(
|
public static async fetchFeed(
|
||||||
url: string,
|
url: string,
|
||||||
headers: any,
|
headers: Record<string, string>,
|
||||||
timeoutMs: number,
|
timeoutMs: number,
|
||||||
parser: Parser = FeedReader.buildParser(),
|
parser: Parser = FeedReader.buildParser(),
|
||||||
): Promise<{ response: AxiosResponse<any, any>, feed: Parser.Output<any> }> {
|
): Promise<{ response: AxiosResponse, feed: Parser.Output<FeedItem> }> {
|
||||||
const response = await axios.get(url, {
|
const response = await axios.get(url, {
|
||||||
headers: {
|
headers: {
|
||||||
'User-Agent': UserAgent,
|
'User-Agent': UserAgent,
|
||||||
@ -207,16 +230,19 @@ export class FeedReader {
|
|||||||
return { response, feed };
|
return { response, feed };
|
||||||
}
|
}
|
||||||
|
|
||||||
private async pollFeeds(): Promise<void> {
|
/**
|
||||||
log.debug(`Checking for updates in ${this.observedFeedUrls.size} RSS/Atom feeds`);
|
* Poll a given feed URL for data, pushing any entries found into the message queue.
|
||||||
|
* We also check the `cacheTimes` cache to see if the feed has recent entries that we can
|
||||||
|
* filter out.
|
||||||
|
*
|
||||||
|
* @param url The URL to be polled.
|
||||||
|
* @returns A boolean that returns if we saw any changes on the feed since the last poll time.
|
||||||
|
*/
|
||||||
|
private async pollFeed(url: string): Promise<boolean> {
|
||||||
let seenEntriesChanged = false;
|
let seenEntriesChanged = false;
|
||||||
|
|
||||||
const fetchingStarted = Date.now();
|
|
||||||
|
|
||||||
for (const url of this.observedFeedUrls.values()) {
|
|
||||||
const fetchKey = randomUUID();
|
const fetchKey = randomUUID();
|
||||||
const { etag, lastModified } = this.cacheTimes.get(url) || {};
|
const { etag, lastModified } = this.cacheTimes.get(url) || {};
|
||||||
|
log.debug(`Checking for updates in ${url} (${etag ?? lastModified})`);
|
||||||
try {
|
try {
|
||||||
const { response, feed } = await FeedReader.fetchFeed(
|
const { response, feed } = await FeedReader.fetchFeed(
|
||||||
url,
|
url,
|
||||||
@ -250,6 +276,7 @@ export class FeedReader {
|
|||||||
const seenGuidsSet = new Set(seenGuids);
|
const seenGuidsSet = new Set(seenGuids);
|
||||||
const newGuids = [];
|
const newGuids = [];
|
||||||
log.debug(`Found ${feed.items.length} entries in ${url}`);
|
log.debug(`Found ${feed.items.length} entries in ${url}`);
|
||||||
|
|
||||||
for (const item of feed.items) {
|
for (const item of feed.items) {
|
||||||
// Find the first guid-like that looks like a string.
|
// Find the first guid-like that looks like a string.
|
||||||
// Some feeds have a nasty habit of leading a empty tag there, making us parse it as garbage.
|
// Some feeds have a nasty habit of leading a empty tag there, making us parse it as garbage.
|
||||||
@ -304,7 +331,7 @@ export class FeedReader {
|
|||||||
if (axios.isAxiosError(err)) {
|
if (axios.isAxiosError(err)) {
|
||||||
// No new feed items, skip.
|
// No new feed items, skip.
|
||||||
if (err.response?.status === StatusCodes.NOT_MODIFIED) {
|
if (err.response?.status === StatusCodes.NOT_MODIFIED) {
|
||||||
continue;
|
return false;
|
||||||
}
|
}
|
||||||
this.feedsFailingHttp.add(url);
|
this.feedsFailingHttp.add(url);
|
||||||
} else {
|
} else {
|
||||||
@ -314,26 +341,37 @@ export class FeedReader {
|
|||||||
const feedError = new FeedError(url.toString(), error, fetchKey);
|
const feedError = new FeedError(url.toString(), error, fetchKey);
|
||||||
log.error("Unable to read feed:", feedError.message);
|
log.error("Unable to read feed:", feedError.message);
|
||||||
this.queue.push<FeedError>({ eventName: 'feed.error', sender: 'FeedReader', data: feedError});
|
this.queue.push<FeedError>({ eventName: 'feed.error', sender: 'FeedReader', data: feedError});
|
||||||
|
} finally {
|
||||||
|
this.feedQueue.push(url);
|
||||||
|
}
|
||||||
|
return seenEntriesChanged;
|
||||||
|
}
|
||||||
|
|
||||||
|
private async pollFeeds(): Promise<void> {
|
||||||
|
log.debug(`Checking for updates in ${this.observedFeedUrls.size} RSS/Atom feeds`);
|
||||||
|
|
||||||
|
const fetchingStarted = Date.now();
|
||||||
|
|
||||||
|
const [ url ] = this.feedQueue.splice(0, 1);
|
||||||
|
|
||||||
|
if (url) {
|
||||||
|
if (await this.pollFeed(url)) {
|
||||||
|
await this.saveSeenEntries();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Metrics.feedsFailing.set({ reason: "http" }, this.feedsFailingHttp.size );
|
Metrics.feedsFailing.set({ reason: "http" }, this.feedsFailingHttp.size );
|
||||||
Metrics.feedsFailing.set({ reason: "parsing" }, this.feedsFailingParsing.size);
|
Metrics.feedsFailing.set({ reason: "parsing" }, this.feedsFailingParsing.size);
|
||||||
|
|
||||||
if (seenEntriesChanged) await this.saveSeenEntries();
|
|
||||||
|
|
||||||
const elapsed = Date.now() - fetchingStarted;
|
const elapsed = Date.now() - fetchingStarted;
|
||||||
Metrics.feedFetchMs.set(elapsed);
|
Metrics.feedFetchMs.set(elapsed);
|
||||||
|
|
||||||
let sleepFor: number;
|
const sleepFor = Math.min(this.sleepingInterval - elapsed, 0);
|
||||||
if (elapsed > this.config.pollIntervalSeconds * 1000) {
|
|
||||||
log.warn(`It took us longer to update the feeds than the configured pool interval (${elapsed / 1000}s)`);
|
|
||||||
sleepFor = 0;
|
|
||||||
} else {
|
|
||||||
sleepFor = this.config.pollIntervalSeconds * 1000 - elapsed;
|
|
||||||
log.debug(`Feed fetching took ${elapsed / 1000}s, sleeping for ${sleepFor / 1000}s`);
|
log.debug(`Feed fetching took ${elapsed / 1000}s, sleeping for ${sleepFor / 1000}s`);
|
||||||
}
|
|
||||||
|
|
||||||
|
if (elapsed > this.sleepingInterval) {
|
||||||
|
log.warn(`It took us longer to update the feeds than the configured pool interval`);
|
||||||
|
}
|
||||||
|
|
||||||
this.timeout = setTimeout(() => {
|
this.timeout = setTimeout(() => {
|
||||||
if (!this.shouldRun) {
|
if (!this.shouldRun) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user