Make the feed parser less strict on HTML data. (#670)

* Less strict parser

* changelog

* Instantiate parser once
This commit is contained in:
Will Hunt 2023-03-22 20:17:28 +00:00 committed by GitHub
parent 735ae2d673
commit 61f25fae36
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 12 additions and 1 deletions

1
changelog.d/670.bugfix Normal file
View File

@ -0,0 +1 @@
Relax the Feed parser so that it allows RSS feeds with HTML elements in them.

View File

@ -88,6 +88,16 @@ function normalizeUrl(input: string): string {
}
export class FeedReader {
private readonly parser = new Parser({
xml2js: {
// Allow HTML bodies, such as value-less attributes.
strict: false,
// The parser will break if we don't do this, as it defaults to `res.FEED` rather than `res.feed`.
normalizeTags: true,
}
});
private connections: FeedConnection[];
// ts should notice that we do in fact initialize it in constructor, but it doesn't (in this version)
private observedFeedUrls: Set<string> = new Set();
@ -197,7 +207,7 @@ export class FeedReader {
this.cacheTimes.set(url, { lastModified: res.headers['Last-Modified'] });
}
const feed = await (new Parser()).parseString(res.data);
const feed = await this.parser.parseString(res.data);
let initialSync = false;
let seenGuids = this.seenEntries.get(url);
if (!seenGuids) {