mirror of
https://github.com/matrix-org/matrix-hookshot.git
synced 2025-03-10 21:19:13 +00:00
Don't process feed/item titles that aren't actually strings (#708)
* Don't process feed/item titles that aren't actually strings Another case of empty-tag with attributes, these would occasionally parse into non-string objects and make us crash when trying to stripHtml on them. * Changelog * Add tests for not-quite-empty <title> tags in feeds --------- Co-authored-by: Tadeusz Sośnierz <tadeusz@sosnierz.com>
This commit is contained in:
parent
a634cdeed8
commit
bc57dbbc83
1
changelog.d/708.bugfix
Normal file
1
changelog.d/708.bugfix
Normal file
@ -0,0 +1 @@
|
|||||||
|
Fix mishandling of empty feed/item title tags.
|
@ -68,6 +68,11 @@ interface AccountData {
|
|||||||
[url: string]: string[],
|
[url: string]: string[],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interface AccountDataStore {
|
||||||
|
getAccountData<T>(type: string): Promise<T>;
|
||||||
|
setAccountData<T>(type: string, data: T): Promise<void>;
|
||||||
|
}
|
||||||
|
|
||||||
const accountDataSchema = {
|
const accountDataSchema = {
|
||||||
type: 'object',
|
type: 'object',
|
||||||
patternProperties: {
|
patternProperties: {
|
||||||
@ -81,6 +86,10 @@ const accountDataSchema = {
|
|||||||
const ajv = new Ajv();
|
const ajv = new Ajv();
|
||||||
const validateAccountData = ajv.compile<AccountData>(accountDataSchema);
|
const validateAccountData = ajv.compile<AccountData>(accountDataSchema);
|
||||||
|
|
||||||
|
function isNonEmptyString(input: any): input is string {
|
||||||
|
return input && typeof input === 'string';
|
||||||
|
}
|
||||||
|
|
||||||
function stripHtml(input: string): string {
|
function stripHtml(input: string): string {
|
||||||
return input.replace(/<[^>]*?>/g, '');
|
return input.replace(/<[^>]*?>/g, '');
|
||||||
}
|
}
|
||||||
@ -123,8 +132,9 @@ export class FeedReader {
|
|||||||
headers: Record<string, string>,
|
headers: Record<string, string>,
|
||||||
timeoutMs: number,
|
timeoutMs: number,
|
||||||
parser: Parser = FeedReader.buildParser(),
|
parser: Parser = FeedReader.buildParser(),
|
||||||
|
httpClient = axios,
|
||||||
): Promise<{ response: AxiosResponse, feed: Parser.Output<FeedItem> }> {
|
): Promise<{ response: AxiosResponse, feed: Parser.Output<FeedItem> }> {
|
||||||
const response = await axios.get(url, {
|
const response = await httpClient.get(url, {
|
||||||
headers: {
|
headers: {
|
||||||
'User-Agent': UserAgent,
|
'User-Agent': UserAgent,
|
||||||
...headers,
|
...headers,
|
||||||
@ -188,7 +198,8 @@ export class FeedReader {
|
|||||||
private readonly config: BridgeConfigFeeds,
|
private readonly config: BridgeConfigFeeds,
|
||||||
private readonly connectionManager: ConnectionManager,
|
private readonly connectionManager: ConnectionManager,
|
||||||
private readonly queue: MessageQueue,
|
private readonly queue: MessageQueue,
|
||||||
private readonly matrixClient: MatrixClient,
|
private readonly accountDataStore: AccountDataStore,
|
||||||
|
private readonly httpClient = axios,
|
||||||
) {
|
) {
|
||||||
this.connections = this.connectionManager.getAllConnectionsOfType(FeedConnection);
|
this.connections = this.connectionManager.getAllConnectionsOfType(FeedConnection);
|
||||||
this.calculateFeedUrls();
|
this.calculateFeedUrls();
|
||||||
@ -237,7 +248,7 @@ export class FeedReader {
|
|||||||
|
|
||||||
private async loadSeenEntries(): Promise<void> {
|
private async loadSeenEntries(): Promise<void> {
|
||||||
try {
|
try {
|
||||||
const accountData = await this.matrixClient.getAccountData<AccountData>(FeedReader.seenEntriesEventType).catch((err: MatrixError|unknown) => {
|
const accountData = await this.accountDataStore.getAccountData<AccountData>(FeedReader.seenEntriesEventType).catch((err: MatrixError|unknown) => {
|
||||||
if (err instanceof MatrixError && err.statusCode === 404) {
|
if (err instanceof MatrixError && err.statusCode === 404) {
|
||||||
return {} as AccountData;
|
return {} as AccountData;
|
||||||
} else {
|
} else {
|
||||||
@ -262,7 +273,7 @@ export class FeedReader {
|
|||||||
for (const [url, guids] of this.seenEntries.entries()) {
|
for (const [url, guids] of this.seenEntries.entries()) {
|
||||||
accountData[url.toString()] = guids;
|
accountData[url.toString()] = guids;
|
||||||
}
|
}
|
||||||
await this.matrixClient.setAccountData(FeedReader.seenEntriesEventType, accountData);
|
await this.accountDataStore.setAccountData(FeedReader.seenEntriesEventType, accountData);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -288,6 +299,7 @@ export class FeedReader {
|
|||||||
// We don't want to wait forever for the feed.
|
// We don't want to wait forever for the feed.
|
||||||
this.config.pollTimeoutSeconds * 1000,
|
this.config.pollTimeoutSeconds * 1000,
|
||||||
this.parser,
|
this.parser,
|
||||||
|
this.httpClient,
|
||||||
);
|
);
|
||||||
|
|
||||||
// Store any entity tags/cache times.
|
// Store any entity tags/cache times.
|
||||||
@ -315,7 +327,7 @@ export class FeedReader {
|
|||||||
for (const item of feed.items) {
|
for (const item of feed.items) {
|
||||||
// Find the first guid-like that looks like a string.
|
// Find the first guid-like that looks like a string.
|
||||||
// Some feeds have a nasty habit of leading a empty tag there, making us parse it as garbage.
|
// Some feeds have a nasty habit of leading a empty tag there, making us parse it as garbage.
|
||||||
const guid = [item.guid, item.id, item.link, item.title].find(id => typeof id === 'string' && id);
|
const guid = [item.guid, item.id, item.link, item.title].find(isNonEmptyString);
|
||||||
if (!guid) {
|
if (!guid) {
|
||||||
log.error(`Could not determine guid for entry in ${url}, skipping`);
|
log.error(`Could not determine guid for entry in ${url}, skipping`);
|
||||||
continue;
|
continue;
|
||||||
@ -334,10 +346,10 @@ export class FeedReader {
|
|||||||
|
|
||||||
const entry = {
|
const entry = {
|
||||||
feed: {
|
feed: {
|
||||||
title: feed.title ? stripHtml(feed.title) : null,
|
title: isNonEmptyString(feed.title) ? stripHtml(feed.title) : null,
|
||||||
url: url,
|
url: url,
|
||||||
},
|
},
|
||||||
title: item.title ? stripHtml(item.title) : null,
|
title: isNonEmptyString(item.title) ? stripHtml(item.title) : null,
|
||||||
pubdate: item.pubDate ?? null,
|
pubdate: item.pubDate ?? null,
|
||||||
summary: item.summary ?? null,
|
summary: item.summary ?? null,
|
||||||
author: item.creator ?? null,
|
author: item.creator ?? null,
|
||||||
|
89
tests/FeedReader.spec.ts
Normal file
89
tests/FeedReader.spec.ts
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
import { AxiosResponse, AxiosStatic } from "axios";
|
||||||
|
import { expect } from "chai";
|
||||||
|
import EventEmitter from "events";
|
||||||
|
import { BridgeConfigFeeds } from "../src/Config/Config";
|
||||||
|
import { ConnectionManager } from "../src/ConnectionManager";
|
||||||
|
import { IConnection } from "../src/Connections";
|
||||||
|
import { FeedReader } from "../src/feeds/FeedReader";
|
||||||
|
import { MessageQueue, MessageQueueMessage } from "../src/MessageQueue";
|
||||||
|
|
||||||
|
class MockConnectionManager extends EventEmitter {
|
||||||
|
constructor(
|
||||||
|
public connections: IConnection[]
|
||||||
|
) {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
|
||||||
|
getAllConnectionsOfType(type: unknown) {
|
||||||
|
return this.connections;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class MockMessageQueue extends EventEmitter implements MessageQueue {
|
||||||
|
subscribe(eventGlob: string): void {
|
||||||
|
this.emit('subscribed', eventGlob);
|
||||||
|
}
|
||||||
|
|
||||||
|
unsubscribe(eventGlob: string): void {
|
||||||
|
this.emit('unsubscribed', eventGlob);
|
||||||
|
}
|
||||||
|
|
||||||
|
async push(data: MessageQueueMessage<unknown>, single?: boolean): Promise<void> {
|
||||||
|
this.emit('pushed', data, single);
|
||||||
|
}
|
||||||
|
|
||||||
|
async pushWait<T, X>(data: MessageQueueMessage<T>, timeout?: number, single?: boolean): Promise<X> {
|
||||||
|
throw new Error('Not yet implemented');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class MockHttpClient {
|
||||||
|
constructor(public response: AxiosResponse) {}
|
||||||
|
|
||||||
|
get(): Promise<AxiosResponse> {
|
||||||
|
return Promise.resolve(this.response);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
describe("FeedReader", () => {
|
||||||
|
it("should correctly handle empty titles", async () => {
|
||||||
|
const config = new BridgeConfigFeeds({
|
||||||
|
enabled: true,
|
||||||
|
pollIntervalSeconds: 1,
|
||||||
|
pollTimeoutSeconds: 1,
|
||||||
|
});
|
||||||
|
const cm = new MockConnectionManager([{ feedUrl: 'http://test/' } as unknown as IConnection]) as unknown as ConnectionManager
|
||||||
|
const mq = new MockMessageQueue();
|
||||||
|
|
||||||
|
const feedContents = `
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
|
||||||
|
<channel><title type='text'></title><description>test feed</description><link>http://test/</link>
|
||||||
|
<pubDate>Wed, 12 Apr 2023 09:53:00 GMT</pubDate>
|
||||||
|
<item>
|
||||||
|
<title type='text'></title><description>test item</description>
|
||||||
|
<link>http://example.com/test/1681293180</link>
|
||||||
|
<guid isPermaLink="true">http://example.com/test/1681293180</guid>
|
||||||
|
<pubDate>Wed, 12 Apr 2023 09:53:00 GMT</pubDate>
|
||||||
|
</item>
|
||||||
|
</channel></rss>
|
||||||
|
`;
|
||||||
|
|
||||||
|
const feedReader = new FeedReader(
|
||||||
|
config, cm, mq,
|
||||||
|
{
|
||||||
|
getAccountData: <T>() => Promise.resolve({ 'http://test/': [] } as unknown as T),
|
||||||
|
setAccountData: <T>() => Promise.resolve(),
|
||||||
|
},
|
||||||
|
new MockHttpClient({ headers: {}, data: feedContents } as AxiosResponse) as unknown as AxiosStatic,
|
||||||
|
);
|
||||||
|
|
||||||
|
const event: any = await new Promise((resolve) => {
|
||||||
|
mq.on('pushed', (data, _) => { resolve(data); feedReader.stop() });
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(event.eventName).to.equal('feed.entry');
|
||||||
|
expect(event.data.feed.title).to.equal(null);
|
||||||
|
expect(event.data.title).to.equal(null);
|
||||||
|
});
|
||||||
|
});
|
Loading…
x
Reference in New Issue
Block a user