mirror of
https://github.com/matrix-org/matrix-hookshot.git
synced 2025-03-10 13:17:08 +00:00
Don't process feed/item titles that aren't actually strings (#708)
* Don't process feed/item titles that aren't actually strings Another case of empty-tag with attributes, these would occasionally parse into non-string objects and make us crash when trying to stripHtml on them. * Changelog * Add tests for not-quite-empty <title> tags in feeds --------- Co-authored-by: Tadeusz Sośnierz <tadeusz@sosnierz.com>
This commit is contained in:
parent
a634cdeed8
commit
bc57dbbc83
1
changelog.d/708.bugfix
Normal file
1
changelog.d/708.bugfix
Normal file
@ -0,0 +1 @@
|
||||
Fix mishandling of empty feed/item title tags.
|
@ -68,6 +68,11 @@ interface AccountData {
|
||||
[url: string]: string[],
|
||||
}
|
||||
|
||||
interface AccountDataStore {
|
||||
getAccountData<T>(type: string): Promise<T>;
|
||||
setAccountData<T>(type: string, data: T): Promise<void>;
|
||||
}
|
||||
|
||||
const accountDataSchema = {
|
||||
type: 'object',
|
||||
patternProperties: {
|
||||
@ -81,6 +86,10 @@ const accountDataSchema = {
|
||||
const ajv = new Ajv();
|
||||
const validateAccountData = ajv.compile<AccountData>(accountDataSchema);
|
||||
|
||||
function isNonEmptyString(input: any): input is string {
|
||||
return input && typeof input === 'string';
|
||||
}
|
||||
|
||||
function stripHtml(input: string): string {
|
||||
return input.replace(/<[^>]*?>/g, '');
|
||||
}
|
||||
@ -123,8 +132,9 @@ export class FeedReader {
|
||||
headers: Record<string, string>,
|
||||
timeoutMs: number,
|
||||
parser: Parser = FeedReader.buildParser(),
|
||||
httpClient = axios,
|
||||
): Promise<{ response: AxiosResponse, feed: Parser.Output<FeedItem> }> {
|
||||
const response = await axios.get(url, {
|
||||
const response = await httpClient.get(url, {
|
||||
headers: {
|
||||
'User-Agent': UserAgent,
|
||||
...headers,
|
||||
@ -188,7 +198,8 @@ export class FeedReader {
|
||||
private readonly config: BridgeConfigFeeds,
|
||||
private readonly connectionManager: ConnectionManager,
|
||||
private readonly queue: MessageQueue,
|
||||
private readonly matrixClient: MatrixClient,
|
||||
private readonly accountDataStore: AccountDataStore,
|
||||
private readonly httpClient = axios,
|
||||
) {
|
||||
this.connections = this.connectionManager.getAllConnectionsOfType(FeedConnection);
|
||||
this.calculateFeedUrls();
|
||||
@ -237,7 +248,7 @@ export class FeedReader {
|
||||
|
||||
private async loadSeenEntries(): Promise<void> {
|
||||
try {
|
||||
const accountData = await this.matrixClient.getAccountData<AccountData>(FeedReader.seenEntriesEventType).catch((err: MatrixError|unknown) => {
|
||||
const accountData = await this.accountDataStore.getAccountData<AccountData>(FeedReader.seenEntriesEventType).catch((err: MatrixError|unknown) => {
|
||||
if (err instanceof MatrixError && err.statusCode === 404) {
|
||||
return {} as AccountData;
|
||||
} else {
|
||||
@ -262,7 +273,7 @@ export class FeedReader {
|
||||
for (const [url, guids] of this.seenEntries.entries()) {
|
||||
accountData[url.toString()] = guids;
|
||||
}
|
||||
await this.matrixClient.setAccountData(FeedReader.seenEntriesEventType, accountData);
|
||||
await this.accountDataStore.setAccountData(FeedReader.seenEntriesEventType, accountData);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -288,6 +299,7 @@ export class FeedReader {
|
||||
// We don't want to wait forever for the feed.
|
||||
this.config.pollTimeoutSeconds * 1000,
|
||||
this.parser,
|
||||
this.httpClient,
|
||||
);
|
||||
|
||||
// Store any entity tags/cache times.
|
||||
@ -315,7 +327,7 @@ export class FeedReader {
|
||||
for (const item of feed.items) {
|
||||
// Find the first guid-like that looks like a string.
|
||||
// Some feeds have a nasty habit of leading a empty tag there, making us parse it as garbage.
|
||||
const guid = [item.guid, item.id, item.link, item.title].find(id => typeof id === 'string' && id);
|
||||
const guid = [item.guid, item.id, item.link, item.title].find(isNonEmptyString);
|
||||
if (!guid) {
|
||||
log.error(`Could not determine guid for entry in ${url}, skipping`);
|
||||
continue;
|
||||
@ -334,10 +346,10 @@ export class FeedReader {
|
||||
|
||||
const entry = {
|
||||
feed: {
|
||||
title: feed.title ? stripHtml(feed.title) : null,
|
||||
title: isNonEmptyString(feed.title) ? stripHtml(feed.title) : null,
|
||||
url: url,
|
||||
},
|
||||
title: item.title ? stripHtml(item.title) : null,
|
||||
title: isNonEmptyString(item.title) ? stripHtml(item.title) : null,
|
||||
pubdate: item.pubDate ?? null,
|
||||
summary: item.summary ?? null,
|
||||
author: item.creator ?? null,
|
||||
|
89
tests/FeedReader.spec.ts
Normal file
89
tests/FeedReader.spec.ts
Normal file
@ -0,0 +1,89 @@
|
||||
import { AxiosResponse, AxiosStatic } from "axios";
|
||||
import { expect } from "chai";
|
||||
import EventEmitter from "events";
|
||||
import { BridgeConfigFeeds } from "../src/Config/Config";
|
||||
import { ConnectionManager } from "../src/ConnectionManager";
|
||||
import { IConnection } from "../src/Connections";
|
||||
import { FeedReader } from "../src/feeds/FeedReader";
|
||||
import { MessageQueue, MessageQueueMessage } from "../src/MessageQueue";
|
||||
|
||||
class MockConnectionManager extends EventEmitter {
|
||||
constructor(
|
||||
public connections: IConnection[]
|
||||
) {
|
||||
super();
|
||||
}
|
||||
|
||||
getAllConnectionsOfType(type: unknown) {
|
||||
return this.connections;
|
||||
}
|
||||
}
|
||||
|
||||
class MockMessageQueue extends EventEmitter implements MessageQueue {
|
||||
subscribe(eventGlob: string): void {
|
||||
this.emit('subscribed', eventGlob);
|
||||
}
|
||||
|
||||
unsubscribe(eventGlob: string): void {
|
||||
this.emit('unsubscribed', eventGlob);
|
||||
}
|
||||
|
||||
async push(data: MessageQueueMessage<unknown>, single?: boolean): Promise<void> {
|
||||
this.emit('pushed', data, single);
|
||||
}
|
||||
|
||||
async pushWait<T, X>(data: MessageQueueMessage<T>, timeout?: number, single?: boolean): Promise<X> {
|
||||
throw new Error('Not yet implemented');
|
||||
}
|
||||
}
|
||||
|
||||
class MockHttpClient {
|
||||
constructor(public response: AxiosResponse) {}
|
||||
|
||||
get(): Promise<AxiosResponse> {
|
||||
return Promise.resolve(this.response);
|
||||
}
|
||||
}
|
||||
|
||||
describe("FeedReader", () => {
|
||||
it("should correctly handle empty titles", async () => {
|
||||
const config = new BridgeConfigFeeds({
|
||||
enabled: true,
|
||||
pollIntervalSeconds: 1,
|
||||
pollTimeoutSeconds: 1,
|
||||
});
|
||||
const cm = new MockConnectionManager([{ feedUrl: 'http://test/' } as unknown as IConnection]) as unknown as ConnectionManager
|
||||
const mq = new MockMessageQueue();
|
||||
|
||||
const feedContents = `
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
|
||||
<channel><title type='text'></title><description>test feed</description><link>http://test/</link>
|
||||
<pubDate>Wed, 12 Apr 2023 09:53:00 GMT</pubDate>
|
||||
<item>
|
||||
<title type='text'></title><description>test item</description>
|
||||
<link>http://example.com/test/1681293180</link>
|
||||
<guid isPermaLink="true">http://example.com/test/1681293180</guid>
|
||||
<pubDate>Wed, 12 Apr 2023 09:53:00 GMT</pubDate>
|
||||
</item>
|
||||
</channel></rss>
|
||||
`;
|
||||
|
||||
const feedReader = new FeedReader(
|
||||
config, cm, mq,
|
||||
{
|
||||
getAccountData: <T>() => Promise.resolve({ 'http://test/': [] } as unknown as T),
|
||||
setAccountData: <T>() => Promise.resolve(),
|
||||
},
|
||||
new MockHttpClient({ headers: {}, data: feedContents } as AxiosResponse) as unknown as AxiosStatic,
|
||||
);
|
||||
|
||||
const event: any = await new Promise((resolve) => {
|
||||
mq.on('pushed', (data, _) => { resolve(data); feedReader.stop() });
|
||||
});
|
||||
|
||||
expect(event.eventName).to.equal('feed.entry');
|
||||
expect(event.data.feed.title).to.equal(null);
|
||||
expect(event.data.title).to.equal(null);
|
||||
});
|
||||
});
|
Loading…
x
Reference in New Issue
Block a user