mirror of
https://github.com/matrix-org/matrix-hookshot.git
synced 2025-03-10 13:17:08 +00:00
Make sure we're not treating garbage data in feed item as guids (#689)
* Make sure we're not treating garbage data in feed item as guids Some of these would parse as `{ '$': { isPermaLink: 'false' } }` or similar, which would then surprise us very much when assuming that it's a string stored in accountData. * Changelog * Exclude empty strings when looking for viable guids Co-authored-by: Christian Paul <christianp@matrix.org> * Hash seen feed entry guids for storing and checking This saves a little bit of space in the common case, and prevents bloating the storage were someone to feed us obnoxiously long guids. --------- Co-authored-by: Tadeusz Sośnierz <tadeusz@sosnierz.com> Co-authored-by: Christian Paul <christianp@matrix.org>
This commit is contained in:
parent
74a577a97f
commit
507b014bd5
1
changelog.d/687.bugfix
Normal file
1
changelog.d/687.bugfix
Normal file
@ -0,0 +1 @@
|
||||
Make sure we're not treating garbage data in feed items as guids.
|
@ -12,6 +12,7 @@ import Metrics from "../Metrics";
|
||||
import UserAgent from "../UserAgent";
|
||||
import { randomUUID } from "crypto";
|
||||
import { StatusCodes } from "http-status-codes";
|
||||
import { FormatUtil } from "../FormatUtil";
|
||||
|
||||
const log = new Logger("FeedReader");
|
||||
|
||||
@ -242,22 +243,29 @@ export class FeedReader {
|
||||
seenGuids = [];
|
||||
seenEntriesChanged = true; // to ensure we only treat it as an initialSync once
|
||||
}
|
||||
|
||||
// migrate legacy, cleartext guids to their md5-hashed counterparts
|
||||
seenGuids = seenGuids.map(guid => guid.startsWith('md5:') ? guid : this.hashGuid(guid));
|
||||
|
||||
const seenGuidsSet = new Set(seenGuids);
|
||||
const newGuids = [];
|
||||
log.debug(`Found ${feed.items.length} entries in ${url}`);
|
||||
for (const item of feed.items) {
|
||||
const guid = item.guid || item.id || item.link || item.title;
|
||||
// Find the first guid-like that looks like a string.
|
||||
// Some feeds have a nasty habit of leading a empty tag there, making us parse it as garbage.
|
||||
const guid = [item.guid, item.id, item.link, item.title].find(id => typeof id === 'string' && id);
|
||||
if (!guid) {
|
||||
log.error(`Could not determine guid for entry in ${url}, skipping`);
|
||||
continue;
|
||||
}
|
||||
newGuids.push(guid);
|
||||
const hashedGuid = this.hashGuid(guid);
|
||||
newGuids.push(hashedGuid);
|
||||
|
||||
if (initialSync) {
|
||||
log.debug(`Skipping entry ${guid} since we're performing an initial sync`);
|
||||
continue;
|
||||
}
|
||||
if (seenGuidsSet.has(guid)) {
|
||||
if (seenGuidsSet.has(hashedGuid)) {
|
||||
log.debug('Skipping already seen entry', guid);
|
||||
continue;
|
||||
}
|
||||
@ -334,4 +342,8 @@ export class FeedReader {
|
||||
void this.pollFeeds();
|
||||
}, sleepFor);
|
||||
}
|
||||
|
||||
private hashGuid(guid: string): string {
|
||||
return `md5:${FormatUtil.hashId(guid)}`;
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user