Add a new metric for recently failing fields.

This commit is contained in:
Half-Shot 2024-11-29 15:17:05 +00:00
parent 9aa9a4dea3
commit c6d15adf7f
3 changed files with 17 additions and 12 deletions

View File

@ -36,6 +36,7 @@ Below is the generated list of Prometheus metrics for Hookshot.
| hookshot_feeds_count | Number of RSS feeds that hookshot is subscribed to | | | hookshot_feeds_count | Number of RSS feeds that hookshot is subscribed to | |
| hookshot_feeds_fetch_ms | Time taken for hookshot to fetch all feeds | | | hookshot_feeds_fetch_ms | Time taken for hookshot to fetch all feeds | |
| hookshot_feeds_failing | Number of RSS feeds that hookshot is failing to read | reason | | hookshot_feeds_failing | Number of RSS feeds that hookshot is failing to read | reason |
| hookshot_feeds_failing_recent | Number of RSS feeds that hookshot is failing to read that have begun to fail recently | reason |
## matrix ## matrix
| Metric | Help | Labels | | Metric | Help | Labels |
|--------|------|--------| |--------|------|--------|
@ -43,12 +44,6 @@ Below is the generated list of Prometheus metrics for Hookshot.
| matrix_api_calls_failed | Number of Matrix client API calls which failed | method | | matrix_api_calls_failed | Number of Matrix client API calls which failed | method |
| matrix_appservice_events | Number of events sent over the AS API | | | matrix_appservice_events | Number of events sent over the AS API | |
| matrix_appservice_decryption_failed | Number of events sent over the AS API that failed to decrypt | | | matrix_appservice_decryption_failed | Number of events sent over the AS API that failed to decrypt | |
## feed
| Metric | Help | Labels |
|--------|------|--------|
| feed_count | (Deprecated) Number of RSS feeds that hookshot is subscribed to | |
| feed_fetch_ms | (Deprecated) Time taken for hookshot to fetch all feeds | |
| feed_failing | (Deprecated) Number of RSS feeds that hookshot is failing to read | reason |
## process ## process
| Metric | Help | Labels | | Metric | Help | Labels |
|--------|------|--------| |--------|------|--------|

View File

@ -27,6 +27,7 @@ export class Metrics {
public readonly feedsCount; public readonly feedsCount;
public readonly feedFetchMs; public readonly feedFetchMs;
public readonly feedsFailing; public readonly feedsFailing;
public readonly feedsFailingRecent;
constructor(private registry: Registry = register) { constructor(private registry: Registry = register) {
@ -52,6 +53,7 @@ export class Metrics {
this.feedsCount = new Gauge({ name: "hookshot_feeds_count", help: "Number of RSS feeds that hookshot is subscribed to", labelNames: [], registers: [this.registry]}); this.feedsCount = new Gauge({ name: "hookshot_feeds_count", help: "Number of RSS feeds that hookshot is subscribed to", labelNames: [], registers: [this.registry]});
this.feedFetchMs = new Gauge({ name: "hookshot_feeds_fetch_ms", help: "Time taken for hookshot to fetch all feeds", labelNames: [], registers: [this.registry]}); this.feedFetchMs = new Gauge({ name: "hookshot_feeds_fetch_ms", help: "Time taken for hookshot to fetch all feeds", labelNames: [], registers: [this.registry]});
this.feedsFailing = new Gauge({ name: "hookshot_feeds_failing", help: "Number of RSS feeds that hookshot is failing to read", labelNames: ["reason"], registers: [this.registry]}); this.feedsFailing = new Gauge({ name: "hookshot_feeds_failing", help: "Number of RSS feeds that hookshot is failing to read", labelNames: ["reason"], registers: [this.registry]});
this.feedsFailingRecent = new Gauge({ name: "hookshot_feeds_failing_recent", help: "Number of RSS feeds that hookshot is failing to read that have begun to fail recently", labelNames: ["reason"], registers: [this.registry]});
collectDefaultMetrics({ collectDefaultMetrics({
register: this.registry, register: this.registry,

View File

@ -17,6 +17,12 @@ const BACKOFF_TIME_MAX_MS = 24 * 60 * 60 * 1000;
const BACKOFF_POW = 1.05; const BACKOFF_POW = 1.05;
const BACKOFF_TIME_MS = 5 * 1000; const BACKOFF_TIME_MS = 5 * 1000;
/**
* If a feed fails this many times or more, we consider it effectively dead
* and while we might retry it, it won't be counted on the metrics.
*/
const FEEDS_FAILING_METRIC_MAX_DORMANT = 25;
export class FeedError extends Error { export class FeedError extends Error {
constructor( constructor(
public url: string, public url: string,
@ -88,9 +94,9 @@ export class FeedReader {
// A set of last modified times for each url. // A set of last modified times for each url.
private cacheTimes: Map<string, { etag?: string, lastModified?: string}> = new Map(); private cacheTimes: Map<string, { etag?: string, lastModified?: string}> = new Map();
// Reason failures to url map. // Reason failures to url map. // url -> fail count.
private feedsFailingHttp = new Set(); private feedsFailingHttp = new Map<string, number>();
private feedsFailingParsing = new Set(); private feedsFailingParsing = new Map<string, number>();
static readonly seenEntriesEventType = "uk.half-shot.matrix-hookshot.feed.reader.seenEntries"; static readonly seenEntriesEventType = "uk.half-shot.matrix-hookshot.feed.reader.seenEntries";
@ -282,9 +288,9 @@ export class FeedReader {
} catch (err: unknown) { } catch (err: unknown) {
// TODO: Proper Rust Type error. // TODO: Proper Rust Type error.
if ((err as Error).message.includes('Failed to fetch feed due to HTTP')) { if ((err as Error).message.includes('Failed to fetch feed due to HTTP')) {
this.feedsFailingHttp.add(url); this.feedsFailingHttp.set(url, (this.feedsFailingHttp.get(url) ?? 0) + 1);
} else { } else {
this.feedsFailingParsing.add(url); this.feedsFailingParsing.set(url, (this.feedsFailingParsing.get(url) ?? 0) + 1);
} }
const backoffDuration = this.feedQueue.backoff(url); const backoffDuration = this.feedQueue.backoff(url);
const error = err instanceof Error ? err : new Error(`Unknown error ${err}`); const error = err instanceof Error ? err : new Error(`Unknown error ${err}`);
@ -301,8 +307,10 @@ export class FeedReader {
public async pollFeeds(workerId: number): Promise<void> { public async pollFeeds(workerId: number): Promise<void> {
// Update on each iteration // Update on each iteration
Metrics.feedsFailing.set({ reason: "http" }, this.feedsFailingHttp.size ); Metrics.feedsFailing.set({ reason: "http" }, this.feedsFailingHttp.size);
Metrics.feedsFailing.set({ reason: "parsing" }, this.feedsFailingParsing.size); Metrics.feedsFailing.set({ reason: "parsing" }, this.feedsFailingParsing.size);
Metrics.feedsFailingRecent.set({ reason: "http" }, [...this.feedsFailingHttp.values()].filter(v => v < FEEDS_FAILING_METRIC_MAX_DORMANT).length);
Metrics.feedsFailingRecent.set({ reason: "parsing" }, [...this.feedsFailingParsing.values()].filter(v => v < FEEDS_FAILING_METRIC_MAX_DORMANT).length);
log.debug(`Checking for updates in ${this.feedQueue.length()} RSS/Atom feeds (worker: ${workerId})`); log.debug(`Checking for updates in ${this.feedQueue.length()} RSS/Atom feeds (worker: ${workerId})`);