mirror of
https://github.com/matrix-org/matrix-hookshot.git
synced 2025-03-10 21:19:13 +00:00
Fallback to parsing feeds as atom format if rss format fails. (#721)
* Support atom feeds in rust parser * Add an apply linting command * Add changelog * Fixup * Add tests for atom feeds + remove redundant code * Remove unused rss-parser * Tests for all formats. * Move hashing logic into rust to save cross-context calls * lint my rust * Use a String::from * Ensure guids are not repeated
This commit is contained in:
parent
ba5b28ab92
commit
43176adf7a
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -279,6 +279,7 @@ dependencies = [
|
|||||||
name = "matrix-hookshot"
|
name = "matrix-hookshot"
|
||||||
version = "1.8.1"
|
version = "1.8.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"atom_syndication",
|
||||||
"contrast",
|
"contrast",
|
||||||
"hex",
|
"hex",
|
||||||
"md-5",
|
"md-5",
|
||||||
|
@ -18,6 +18,7 @@ rgb = "0"
|
|||||||
md-5 = "0.8.0"
|
md-5 = "0.8.0"
|
||||||
hex = "0.4.3"
|
hex = "0.4.3"
|
||||||
rss = "2.0.3"
|
rss = "2.0.3"
|
||||||
|
atom_syndication = "0.12"
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
napi-build = "1"
|
napi-build = "1"
|
||||||
|
1
changelog.d/721.bugfix
Normal file
1
changelog.d/721.bugfix
Normal file
@ -0,0 +1 @@
|
|||||||
|
Switch to using Rust for parsing RSS feeds.
|
@ -36,6 +36,7 @@
|
|||||||
"lint": "yarn run lint:js && yarn run lint:rs",
|
"lint": "yarn run lint:js && yarn run lint:rs",
|
||||||
"lint:js": "eslint -c .eslintrc.js 'src/**/*.ts' 'tests/**/*.ts' 'web/**/*.ts' 'web/**/*.tsx'",
|
"lint:js": "eslint -c .eslintrc.js 'src/**/*.ts' 'tests/**/*.ts' 'web/**/*.ts' 'web/**/*.tsx'",
|
||||||
"lint:rs": "cargo fmt --all -- --check",
|
"lint:rs": "cargo fmt --all -- --check",
|
||||||
|
"lint:rs:apply": "cargo fmt --all",
|
||||||
"generate-default-config": "ts-node src/Config/Defaults.ts --config > config.sample.yml",
|
"generate-default-config": "ts-node src/Config/Defaults.ts --config > config.sample.yml",
|
||||||
"validate-config": "ts-node src/Config/Config.ts"
|
"validate-config": "ts-node src/Config/Config.ts"
|
||||||
},
|
},
|
||||||
@ -64,7 +65,6 @@
|
|||||||
"p-queue": "^6.6.2",
|
"p-queue": "^6.6.2",
|
||||||
"prom-client": "^14.0.1",
|
"prom-client": "^14.0.1",
|
||||||
"reflect-metadata": "^0.1.13",
|
"reflect-metadata": "^0.1.13",
|
||||||
"rss-parser": "^3.12.0",
|
|
||||||
"source-map-support": "^0.5.21",
|
"source-map-support": "^0.5.21",
|
||||||
"string-argv": "^0.3.1",
|
"string-argv": "^0.3.1",
|
||||||
"tiny-typed-emitter": "^2.1.0",
|
"tiny-typed-emitter": "^2.1.0",
|
||||||
|
@ -7,13 +7,12 @@ import { MessageQueue } from "../MessageQueue";
|
|||||||
|
|
||||||
import Ajv from "ajv";
|
import Ajv from "ajv";
|
||||||
import axios, { AxiosResponse } from "axios";
|
import axios, { AxiosResponse } from "axios";
|
||||||
import Parser from "rss-parser";
|
|
||||||
import Metrics from "../Metrics";
|
import Metrics from "../Metrics";
|
||||||
import UserAgent from "../UserAgent";
|
import UserAgent from "../UserAgent";
|
||||||
import { randomUUID } from "crypto";
|
import { randomUUID } from "crypto";
|
||||||
import { StatusCodes } from "http-status-codes";
|
import { StatusCodes } from "http-status-codes";
|
||||||
import { FormatUtil } from "../FormatUtil";
|
import { FormatUtil } from "../FormatUtil";
|
||||||
import { FeedItem, parseRSSFeed } from "../libRs";
|
import { JsRssChannel, parseFeed } from "../libRs";
|
||||||
|
|
||||||
const log = new Logger("FeedReader");
|
const log = new Logger("FeedReader");
|
||||||
|
|
||||||
@ -111,10 +110,6 @@ function shuffle<T>(array: T[]): T[] {
|
|||||||
|
|
||||||
|
|
||||||
export class FeedReader {
|
export class FeedReader {
|
||||||
private static buildParser(): Parser {
|
|
||||||
return new Parser();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Read a feed URL and parse it into a set of items.
|
* Read a feed URL and parse it into a set of items.
|
||||||
* @param url The feed URL.
|
* @param url The feed URL.
|
||||||
@ -128,7 +123,7 @@ export class FeedReader {
|
|||||||
headers: Record<string, string>,
|
headers: Record<string, string>,
|
||||||
timeoutMs: number,
|
timeoutMs: number,
|
||||||
httpClient = axios,
|
httpClient = axios,
|
||||||
): Promise<{ response: AxiosResponse, feed: Parser.Output<FeedItem> }> {
|
): Promise<{ response: AxiosResponse, feed: JsRssChannel }> {
|
||||||
const response = await httpClient.get(url, {
|
const response = await httpClient.get(url, {
|
||||||
headers: {
|
headers: {
|
||||||
'User-Agent': UserAgent,
|
'User-Agent': UserAgent,
|
||||||
@ -141,35 +136,10 @@ export class FeedReader {
|
|||||||
if (typeof response.data !== "string") {
|
if (typeof response.data !== "string") {
|
||||||
throw Error('Unexpected response type');
|
throw Error('Unexpected response type');
|
||||||
}
|
}
|
||||||
const feed = parseRSSFeed(response.data);
|
const feed = parseFeed(response.data);
|
||||||
return { response, feed };
|
return { response, feed };
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Attempt to parse a link from a feed item.
|
|
||||||
* @param item A feed item.
|
|
||||||
* @returns Return either a link to the item, or null.
|
|
||||||
*/
|
|
||||||
private static parseLinkFromItem(item: FeedItem) {
|
|
||||||
if (item.link) {
|
|
||||||
return item.link;
|
|
||||||
}
|
|
||||||
if (item.id && item.idIsPermalink) {
|
|
||||||
try {
|
|
||||||
// The feed librray doesn't give us attributes (needs isPermaLink), so we're not really sure if this a URL or not.
|
|
||||||
// Parse it and see.
|
|
||||||
// https://validator.w3.org/feed/docs/rss2.html#ltguidgtSubelementOfLtitemgt
|
|
||||||
const url = new URL(item.id);
|
|
||||||
return url.toString();
|
|
||||||
} catch (ex) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
private readonly parser = FeedReader.buildParser();
|
|
||||||
|
|
||||||
private connections: FeedConnection[];
|
private connections: FeedConnection[];
|
||||||
// ts should notice that we do in fact initialize it in constructor, but it doesn't (in this version)
|
// ts should notice that we do in fact initialize it in constructor, but it doesn't (in this version)
|
||||||
private observedFeedUrls: Set<string> = new Set();
|
private observedFeedUrls: Set<string> = new Set();
|
||||||
@ -283,7 +253,7 @@ export class FeedReader {
|
|||||||
* @param url The URL to be polled.
|
* @param url The URL to be polled.
|
||||||
* @returns A boolean that returns if we saw any changes on the feed since the last poll time.
|
* @returns A boolean that returns if we saw any changes on the feed since the last poll time.
|
||||||
*/
|
*/
|
||||||
private async pollFeed(url: string): Promise<boolean> {
|
public async pollFeed(url: string): Promise<boolean> {
|
||||||
let seenEntriesChanged = false;
|
let seenEntriesChanged = false;
|
||||||
const fetchKey = randomUUID();
|
const fetchKey = randomUUID();
|
||||||
const { etag, lastModified } = this.cacheTimes.get(url) || {};
|
const { etag, lastModified } = this.cacheTimes.get(url) || {};
|
||||||
@ -317,7 +287,6 @@ export class FeedReader {
|
|||||||
|
|
||||||
// migrate legacy, cleartext guids to their md5-hashed counterparts
|
// migrate legacy, cleartext guids to their md5-hashed counterparts
|
||||||
seenGuids = seenGuids.map(guid => guid.startsWith('md5:') ? guid : this.hashGuid(guid));
|
seenGuids = seenGuids.map(guid => guid.startsWith('md5:') ? guid : this.hashGuid(guid));
|
||||||
|
|
||||||
const seenGuidsSet = new Set(seenGuids);
|
const seenGuidsSet = new Set(seenGuids);
|
||||||
const newGuids = [];
|
const newGuids = [];
|
||||||
log.debug(`Found ${feed.items.length} entries in ${url}`);
|
log.debug(`Found ${feed.items.length} entries in ${url}`);
|
||||||
@ -325,33 +294,31 @@ export class FeedReader {
|
|||||||
for (const item of feed.items) {
|
for (const item of feed.items) {
|
||||||
// Find the first guid-like that looks like a string.
|
// Find the first guid-like that looks like a string.
|
||||||
// Some feeds have a nasty habit of leading a empty tag there, making us parse it as garbage.
|
// Some feeds have a nasty habit of leading a empty tag there, making us parse it as garbage.
|
||||||
const guid = [item.id, item.link, item.title].find(isNonEmptyString);
|
if (!item.hashId) {
|
||||||
if (!guid) {
|
|
||||||
log.error(`Could not determine guid for entry in ${url}, skipping`);
|
log.error(`Could not determine guid for entry in ${url}, skipping`);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const hashedGuid = this.hashGuid(guid);
|
const hashId = `md5:${item.hashId}`;
|
||||||
newGuids.push(hashedGuid);
|
newGuids.push(hashId);
|
||||||
|
|
||||||
if (initialSync) {
|
if (initialSync) {
|
||||||
log.debug(`Skipping entry ${guid} since we're performing an initial sync`);
|
log.debug(`Skipping entry ${item.id ?? hashId} since we're performing an initial sync`);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (seenGuidsSet.has(hashedGuid)) {
|
if (seenGuidsSet.has(hashId)) {
|
||||||
log.debug('Skipping already seen entry', guid);
|
log.debug('Skipping already seen entry', item.id ?? hashId);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
const entry = {
|
const entry = {
|
||||||
feed: {
|
feed: {
|
||||||
title: isNonEmptyString(feed.title) ? stripHtml(feed.title) : null,
|
title: isNonEmptyString(feed.title) ? stripHtml(feed.title) : null,
|
||||||
url: url,
|
url: url,
|
||||||
},
|
},
|
||||||
title: isNonEmptyString(item.title) ? stripHtml(item.title) : null,
|
title: isNonEmptyString(item.title) ? stripHtml(item.title) : null,
|
||||||
pubdate: item.pubDate ?? null,
|
pubdate: item.pubdate ?? null,
|
||||||
summary: item.summary ?? null,
|
summary: item.summary ?? null,
|
||||||
author: item.author ?? null,
|
author: item.author ?? null,
|
||||||
link: FeedReader.parseLinkFromItem(item),
|
link: item.link ?? null,
|
||||||
fetchKey
|
fetchKey
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -395,7 +362,10 @@ export class FeedReader {
|
|||||||
return seenEntriesChanged;
|
return seenEntriesChanged;
|
||||||
}
|
}
|
||||||
|
|
||||||
private async pollFeeds(): Promise<void> {
|
/**
|
||||||
|
* Start polling all the feeds.
|
||||||
|
*/
|
||||||
|
public async pollFeeds(): Promise<void> {
|
||||||
log.debug(`Checking for updates in ${this.observedFeedUrls.size} RSS/Atom feeds`);
|
log.debug(`Checking for updates in ${this.observedFeedUrls.size} RSS/Atom feeds`);
|
||||||
|
|
||||||
const fetchingStarted = Date.now();
|
const fetchingStarted = Date.now();
|
||||||
|
@ -1,8 +1,11 @@
|
|||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
|
|
||||||
|
use atom_syndication::{Error as AtomError, Feed, Person};
|
||||||
use napi::bindgen_prelude::{Error as JsError, Status};
|
use napi::bindgen_prelude::{Error as JsError, Status};
|
||||||
use rss::{Channel, Error as RssError};
|
use rss::{Channel, Error as RssError};
|
||||||
|
|
||||||
|
use crate::format_util::hash_id;
|
||||||
|
|
||||||
#[derive(Serialize, Debug, Deserialize)]
|
#[derive(Serialize, Debug, Deserialize)]
|
||||||
#[napi(object)]
|
#[napi(object)]
|
||||||
pub struct FeedItem {
|
pub struct FeedItem {
|
||||||
@ -13,6 +16,7 @@ pub struct FeedItem {
|
|||||||
pub pubdate: Option<String>,
|
pub pubdate: Option<String>,
|
||||||
pub summary: Option<String>,
|
pub summary: Option<String>,
|
||||||
pub author: Option<String>,
|
pub author: Option<String>,
|
||||||
|
pub hash_id: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Debug, Deserialize)]
|
#[derive(Serialize, Debug, Deserialize)]
|
||||||
@ -21,45 +25,128 @@ pub struct JsRssChannel {
|
|||||||
pub title: String,
|
pub title: String,
|
||||||
pub items: Vec<FeedItem>,
|
pub items: Vec<FeedItem>,
|
||||||
}
|
}
|
||||||
#[napi(js_name = "parseRSSFeed")]
|
|
||||||
pub fn js_parse_rss_feed(xml: String) -> Result<JsRssChannel, JsError> {
|
|
||||||
fn map_item_value(original: &str) -> String {
|
|
||||||
original.to_string()
|
|
||||||
}
|
|
||||||
|
|
||||||
Channel::from_str(&xml)
|
fn parse_channel_to_js_result(channel: &Channel) -> JsRssChannel {
|
||||||
.map(|channel| JsRssChannel {
|
JsRssChannel {
|
||||||
title: channel.title().to_string(),
|
title: channel.title().to_string(),
|
||||||
items: channel
|
items: channel
|
||||||
.items()
|
.items()
|
||||||
.iter()
|
.iter()
|
||||||
.map(|item| FeedItem {
|
.map(|item: &rss::Item| FeedItem {
|
||||||
title: item.title().map(map_item_value),
|
title: item.title().map(String::from),
|
||||||
link: item.link().map(map_item_value),
|
link: item.link().and_then(|v| Some(v.to_string())).or_else(|| {
|
||||||
id: item.guid().map(|f| f.value().to_string()),
|
item.guid()
|
||||||
id_is_permalink: item.guid().map_or(false, |f| f.is_permalink()),
|
.and_then(|i| i.permalink.then(|| i.value.to_string()))
|
||||||
pubdate: item.pub_date().map(map_item_value),
|
}),
|
||||||
summary: item.description().map(map_item_value),
|
id: item.guid().map(|f| f.value().to_string()),
|
||||||
author: item.author().map(map_item_value),
|
id_is_permalink: item.guid().map_or(false, |f| f.is_permalink()),
|
||||||
})
|
pubdate: item.pub_date().map(String::from),
|
||||||
.collect(),
|
summary: item.description().map(String::from),
|
||||||
})
|
author: item.author().map(String::from),
|
||||||
.map_err(|op| match op {
|
hash_id: item
|
||||||
RssError::Utf8(err) => JsError::new(
|
.guid
|
||||||
Status::Unknown,
|
.clone()
|
||||||
format!("An error while converting bytes to UTF8. {}'", err).to_string(),
|
.and_then(|f| Some(f.value))
|
||||||
),
|
.or(item.link.clone())
|
||||||
RssError::Xml(err) => JsError::new(
|
.or(item.title.clone())
|
||||||
Status::Unknown,
|
.and_then(|f| hash_id(f).ok()),
|
||||||
format!("XML parsing error. {}", err).to_string(),
|
})
|
||||||
),
|
.collect(),
|
||||||
RssError::InvalidStartTag => JsError::new(
|
}
|
||||||
Status::Unknown,
|
}
|
||||||
format!("The input didn't begin with an opening <rss> tag.").to_string(),
|
|
||||||
),
|
fn parse_feed_to_js_result(feed: &Feed) -> JsRssChannel {
|
||||||
err => JsError::new(
|
fn authors_to_string(persons: &[Person]) -> Option<String> {
|
||||||
Status::Unknown,
|
if persons.len() == 0 {
|
||||||
format!("Unknown error trying to parse feed parse feed '{}'", err).to_string(),
|
return None;
|
||||||
),
|
}
|
||||||
})
|
let mut outs = Vec::<String>::new();
|
||||||
|
for person in persons {
|
||||||
|
let email = person
|
||||||
|
.email
|
||||||
|
.clone()
|
||||||
|
.map_or_else(|| String::new(), |v| format!("<{}>", v));
|
||||||
|
let uri = person
|
||||||
|
.uri
|
||||||
|
.clone()
|
||||||
|
.map_or_else(|| String::new(), |v| format!("<{}>", v));
|
||||||
|
outs.push(format!("{}{}{}", person.name, email, uri))
|
||||||
|
}
|
||||||
|
Some(outs.join(", "))
|
||||||
|
}
|
||||||
|
JsRssChannel {
|
||||||
|
title: feed.title().to_string(),
|
||||||
|
items: feed
|
||||||
|
.entries()
|
||||||
|
.iter()
|
||||||
|
.map(|item| FeedItem {
|
||||||
|
title: Some(item.title().value.clone()),
|
||||||
|
link: item.links().first().map(|f| f.href.clone()),
|
||||||
|
id: Some(item.id.clone()),
|
||||||
|
// No equivalent
|
||||||
|
id_is_permalink: false,
|
||||||
|
pubdate: item
|
||||||
|
.published
|
||||||
|
.or(Some(item.updated))
|
||||||
|
.map(|date| date.to_rfc2822()),
|
||||||
|
summary: item.summary().map(|v| v.value.clone()),
|
||||||
|
author: authors_to_string(item.authors()),
|
||||||
|
hash_id: hash_id(item.id.clone()).ok(),
|
||||||
|
})
|
||||||
|
.collect(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[napi(js_name = "parseFeed")]
|
||||||
|
pub fn js_parse_feed(xml: String) -> Result<JsRssChannel, JsError> {
|
||||||
|
match Channel::from_str(&xml) {
|
||||||
|
Ok(channel) => Ok(parse_channel_to_js_result(&channel)),
|
||||||
|
Err(RssError::InvalidStartTag) =>
|
||||||
|
// If the tag is wrong, parse again as a feed.
|
||||||
|
{
|
||||||
|
match Feed::from_str(&xml) {
|
||||||
|
Ok(feed) => Ok(parse_feed_to_js_result(&feed)),
|
||||||
|
Err(AtomError::Eof) => Err(JsError::new(
|
||||||
|
Status::Unknown,
|
||||||
|
format!("Unexpected end of input.").to_string(),
|
||||||
|
)),
|
||||||
|
Err(AtomError::InvalidStartTag) => Err(JsError::new(
|
||||||
|
Status::Unknown,
|
||||||
|
format!("An error while converting bytes to UTF8.").to_string(),
|
||||||
|
)),
|
||||||
|
Err(AtomError::WrongAttribute { attribute, value }) => Err(JsError::new(
|
||||||
|
Status::Unknown,
|
||||||
|
format!(
|
||||||
|
"The attribute '{}' had the wrong value '{}'",
|
||||||
|
attribute, value
|
||||||
|
)
|
||||||
|
.to_string(),
|
||||||
|
)),
|
||||||
|
Err(AtomError::WrongDatetime(value)) => Err(JsError::new(
|
||||||
|
Status::Unknown,
|
||||||
|
format!("The format of the datetime ('{}') was wrong.", value).to_string(),
|
||||||
|
)),
|
||||||
|
Err(AtomError::Xml(err)) => Err(JsError::new(
|
||||||
|
Status::Unknown,
|
||||||
|
format!("XML parsing error . {}'", err).to_string(),
|
||||||
|
)),
|
||||||
|
Err(err) => Err(JsError::new(
|
||||||
|
Status::Unknown,
|
||||||
|
format!("Unknown error trying to parse feed parse feed '{}'", err).to_string(),
|
||||||
|
)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(RssError::Utf8(err)) => Err(JsError::new(
|
||||||
|
Status::Unknown,
|
||||||
|
format!("An error while converting bytes to UTF8. {}'", err).to_string(),
|
||||||
|
)),
|
||||||
|
Err(RssError::Xml(err)) => Err(JsError::new(
|
||||||
|
Status::Unknown,
|
||||||
|
format!("XML parsing error. {}", err).to_string(),
|
||||||
|
)),
|
||||||
|
Err(RssError::Eof) => Err(JsError::new(
|
||||||
|
Status::Unknown,
|
||||||
|
format!("Unexpected end of input").to_string(),
|
||||||
|
)),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -4,7 +4,7 @@ import EventEmitter from "events";
|
|||||||
import { BridgeConfigFeeds } from "../src/Config/Config";
|
import { BridgeConfigFeeds } from "../src/Config/Config";
|
||||||
import { ConnectionManager } from "../src/ConnectionManager";
|
import { ConnectionManager } from "../src/ConnectionManager";
|
||||||
import { IConnection } from "../src/Connections";
|
import { IConnection } from "../src/Connections";
|
||||||
import { FeedReader } from "../src/feeds/FeedReader";
|
import { FeedEntry, FeedReader } from "../src/feeds/FeedReader";
|
||||||
import { MessageQueue, MessageQueueMessage } from "../src/MessageQueue";
|
import { MessageQueue, MessageQueueMessage } from "../src/MessageQueue";
|
||||||
|
|
||||||
class MockConnectionManager extends EventEmitter {
|
class MockConnectionManager extends EventEmitter {
|
||||||
@ -45,17 +45,31 @@ class MockHttpClient {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const FEED_URL = 'http://test/';
|
||||||
|
|
||||||
|
function constructFeedReader(feedResponse: () => {headers: Record<string,string>, data: string}) {
|
||||||
|
const config = new BridgeConfigFeeds({
|
||||||
|
enabled: true,
|
||||||
|
pollIntervalSeconds: 1,
|
||||||
|
pollTimeoutSeconds: 1,
|
||||||
|
});
|
||||||
|
const cm = new MockConnectionManager([{ feedUrl: FEED_URL } as unknown as IConnection]) as unknown as ConnectionManager
|
||||||
|
const mq = new MockMessageQueue();
|
||||||
|
const feedReader = new FeedReader(
|
||||||
|
config, cm, mq,
|
||||||
|
{
|
||||||
|
getAccountData: <T>() => Promise.resolve({ [FEED_URL]: [] } as unknown as T),
|
||||||
|
setAccountData: () => Promise.resolve(),
|
||||||
|
},
|
||||||
|
new MockHttpClient({ ...feedResponse() } as AxiosResponse) as unknown as AxiosStatic,
|
||||||
|
);
|
||||||
|
return {config, cm, mq, feedReader};
|
||||||
|
}
|
||||||
|
|
||||||
describe("FeedReader", () => {
|
describe("FeedReader", () => {
|
||||||
it("should correctly handle empty titles", async () => {
|
it("should correctly handle empty titles", async () => {
|
||||||
const config = new BridgeConfigFeeds({
|
const { mq, feedReader} = constructFeedReader(() => ({
|
||||||
enabled: true,
|
headers: {}, data: `
|
||||||
pollIntervalSeconds: 1,
|
|
||||||
pollTimeoutSeconds: 1,
|
|
||||||
});
|
|
||||||
const cm = new MockConnectionManager([{ feedUrl: 'http://test/' } as unknown as IConnection]) as unknown as ConnectionManager
|
|
||||||
const mq = new MockMessageQueue();
|
|
||||||
|
|
||||||
const feedContents = `
|
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
|
<rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
|
||||||
<channel><title type='text'></title><description>test feed</description><link>http://test/</link>
|
<channel><title type='text'></title><description>test feed</description><link>http://test/</link>
|
||||||
@ -67,16 +81,8 @@ describe("FeedReader", () => {
|
|||||||
<pubDate>Wed, 12 Apr 2023 09:53:00 GMT</pubDate>
|
<pubDate>Wed, 12 Apr 2023 09:53:00 GMT</pubDate>
|
||||||
</item>
|
</item>
|
||||||
</channel></rss>
|
</channel></rss>
|
||||||
`;
|
`
|
||||||
|
}));
|
||||||
const feedReader = new FeedReader(
|
|
||||||
config, cm, mq,
|
|
||||||
{
|
|
||||||
getAccountData: <T>() => Promise.resolve({ 'http://test/': [] } as unknown as T),
|
|
||||||
setAccountData: () => Promise.resolve(),
|
|
||||||
},
|
|
||||||
new MockHttpClient({ headers: {}, data: feedContents } as AxiosResponse) as unknown as AxiosStatic,
|
|
||||||
);
|
|
||||||
|
|
||||||
const event: any = await new Promise((resolve) => {
|
const event: any = await new Promise((resolve) => {
|
||||||
mq.on('pushed', (data) => { resolve(data); feedReader.stop() });
|
mq.on('pushed', (data) => { resolve(data); feedReader.stop() });
|
||||||
@ -86,4 +92,147 @@ describe("FeedReader", () => {
|
|||||||
expect(event.data.feed.title).to.equal(null);
|
expect(event.data.feed.title).to.equal(null);
|
||||||
expect(event.data.title).to.equal(null);
|
expect(event.data.title).to.equal(null);
|
||||||
});
|
});
|
||||||
|
it("should handle RSS 2.0 feeds", async () => {
|
||||||
|
const { mq, feedReader} = constructFeedReader(() => ({
|
||||||
|
headers: {}, data: `
|
||||||
|
<?xml version="1.0" encoding="UTF-8" ?>
|
||||||
|
<rss version="2.0">
|
||||||
|
<channel>
|
||||||
|
<title>RSS Title</title>
|
||||||
|
<description>This is an example of an RSS feed</description>
|
||||||
|
<link>http://www.example.com/main.html</link>
|
||||||
|
<copyright>2020 Example.com All rights reserved</copyright>
|
||||||
|
<lastBuildDate>Mon, 6 Sep 2010 00:01:00 +0000</lastBuildDate>
|
||||||
|
<pubDate>Sun, 6 Sep 2009 16:20:00 +0000</pubDate>
|
||||||
|
<ttl>1800</ttl>
|
||||||
|
<item>
|
||||||
|
<title>Example entry</title>
|
||||||
|
<author>John Doe</author>
|
||||||
|
<description>Here is some text containing an interesting description.</description>
|
||||||
|
<link>http://www.example.com/blog/post/1</link>
|
||||||
|
<guid isPermaLink="false">7bd204c6-1655-4c27-aeee-53f933c5395f</guid>
|
||||||
|
<pubDate>Sun, 6 Sep 2009 16:20:00 +0000</pubDate>
|
||||||
|
</item>
|
||||||
|
</channel>
|
||||||
|
</rss>
|
||||||
|
`
|
||||||
|
}));
|
||||||
|
|
||||||
|
const event: MessageQueueMessage<FeedEntry> = await new Promise((resolve) => {
|
||||||
|
mq.on('pushed', (data) => { resolve(data); feedReader.stop() });
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(event.eventName).to.equal('feed.entry');
|
||||||
|
expect(event.data.feed.title).to.equal('RSS Title');
|
||||||
|
expect(event.data.author).to.equal('John Doe');
|
||||||
|
expect(event.data.title).to.equal('Example entry');
|
||||||
|
expect(event.data.summary).to.equal('Here is some text containing an interesting description.');
|
||||||
|
expect(event.data.link).to.equal('http://www.example.com/blog/post/1');
|
||||||
|
expect(event.data.pubdate).to.equal('Sun, 6 Sep 2009 16:20:00 +0000');
|
||||||
|
});
|
||||||
|
it("should handle RSS feeds with a permalink url", async () => {
|
||||||
|
const { mq, feedReader} = constructFeedReader(() => ({
|
||||||
|
headers: {}, data: `
|
||||||
|
<?xml version="1.0" encoding="UTF-8" ?>
|
||||||
|
<rss version="2.0">
|
||||||
|
<channel>
|
||||||
|
<title>RSS Title</title>
|
||||||
|
<description>This is an example of an RSS feed</description>
|
||||||
|
<link>http://www.example.com/main.html</link>
|
||||||
|
<copyright>2020 Example.com All rights reserved</copyright>
|
||||||
|
<lastBuildDate>Mon, 6 Sep 2010 00:01:00 +0000</lastBuildDate>
|
||||||
|
<pubDate>Sun, 6 Sep 2009 16:20:00 +0000</pubDate>
|
||||||
|
<ttl>1800</ttl>
|
||||||
|
<item>
|
||||||
|
<title>Example entry</title>
|
||||||
|
<author>John Doe</author>
|
||||||
|
<description>Here is some text containing an interesting description.</description>
|
||||||
|
<guid isPermaLink="true">http://www.example.com/blog/post/1</guid>
|
||||||
|
<pubDate>Sun, 6 Sep 2009 16:20:00 +0000</pubDate>
|
||||||
|
</item>
|
||||||
|
</channel>
|
||||||
|
</rss>
|
||||||
|
`
|
||||||
|
}));
|
||||||
|
|
||||||
|
const event: MessageQueueMessage<FeedEntry> = await new Promise((resolve) => {
|
||||||
|
mq.on('pushed', (data) => { resolve(data); feedReader.stop() });
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(event.eventName).to.equal('feed.entry');
|
||||||
|
expect(event.data.feed.title).to.equal('RSS Title');
|
||||||
|
expect(event.data.author).to.equal('John Doe');
|
||||||
|
expect(event.data.title).to.equal('Example entry');
|
||||||
|
expect(event.data.summary).to.equal('Here is some text containing an interesting description.');
|
||||||
|
expect(event.data.link).to.equal('http://www.example.com/blog/post/1');
|
||||||
|
expect(event.data.pubdate).to.equal('Sun, 6 Sep 2009 16:20:00 +0000');
|
||||||
|
});
|
||||||
|
it("should handle Atom feeds", async () => {
|
||||||
|
const { mq, feedReader} = constructFeedReader(() => ({
|
||||||
|
headers: {}, data: `
|
||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||||
|
|
||||||
|
<title>Example Feed</title>
|
||||||
|
<link href="http://example.org/"/>
|
||||||
|
<updated>2003-12-13T18:30:02Z</updated>
|
||||||
|
<author>
|
||||||
|
<name>John Doe</name>
|
||||||
|
</author>
|
||||||
|
<id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
|
||||||
|
|
||||||
|
<entry>
|
||||||
|
<author>
|
||||||
|
<name>John Doe</name>
|
||||||
|
</author>
|
||||||
|
<title>Atom-Powered Robots Run Amok</title>
|
||||||
|
<link href="http://example.org/2003/12/13/atom03"/>
|
||||||
|
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||||
|
<updated>2003-12-13T18:30:02Z</updated>
|
||||||
|
<summary>Some text.</summary>
|
||||||
|
</entry>
|
||||||
|
|
||||||
|
</feed>
|
||||||
|
`
|
||||||
|
}));
|
||||||
|
|
||||||
|
const event: MessageQueueMessage<FeedEntry> = await new Promise((resolve) => {
|
||||||
|
mq.on('pushed', (data) => { resolve(data); feedReader.stop() });
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(event.eventName).to.equal('feed.entry');
|
||||||
|
expect(event.data.feed.title).to.equal('Example Feed');
|
||||||
|
expect(event.data.title).to.equal('Atom-Powered Robots Run Amok');
|
||||||
|
expect(event.data.author).to.equal('John Doe');
|
||||||
|
expect(event.data.summary).to.equal('Some text.');
|
||||||
|
expect(event.data.link).to.equal('http://example.org/2003/12/13/atom03');
|
||||||
|
expect(event.data.pubdate).to.equal('Sat, 13 Dec 2003 18:30:02 +0000');
|
||||||
|
});
|
||||||
|
it("should not duplicate feed entries", async () => {
|
||||||
|
const { mq, feedReader} = constructFeedReader(() => ({
|
||||||
|
headers: {}, data: `
|
||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||||
|
<entry>
|
||||||
|
<author>
|
||||||
|
<name>John Doe</name>
|
||||||
|
</author>
|
||||||
|
<title>Atom-Powered Robots Run Amok</title>
|
||||||
|
<link href="http://example.org/2003/12/13/atom03"/>
|
||||||
|
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||||
|
<updated>2003-12-13T18:30:02Z</updated>
|
||||||
|
<summary>Some text.</summary>
|
||||||
|
</entry>
|
||||||
|
</feed>
|
||||||
|
`
|
||||||
|
}));
|
||||||
|
|
||||||
|
const events: MessageQueueMessage<FeedEntry>[] = [];
|
||||||
|
mq.on('pushed', (data) => { if (data.eventName === 'feed.entry') {events.push(data);} });
|
||||||
|
await feedReader.pollFeed(FEED_URL);
|
||||||
|
await feedReader.pollFeed(FEED_URL);
|
||||||
|
await feedReader.pollFeed(FEED_URL);
|
||||||
|
feedReader.stop();
|
||||||
|
expect(events).to.have.lengthOf(1);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
18
yarn.lock
18
yarn.lock
@ -2661,7 +2661,7 @@ enquirer@^2.3.5:
|
|||||||
dependencies:
|
dependencies:
|
||||||
ansi-colors "^4.1.1"
|
ansi-colors "^4.1.1"
|
||||||
|
|
||||||
entities@^2.0.0, entities@^2.0.3:
|
entities@^2.0.0:
|
||||||
version "2.2.0"
|
version "2.2.0"
|
||||||
resolved "https://registry.yarnpkg.com/entities/-/entities-2.2.0.tgz#098dc90ebb83d8dffa089d55256b351d34c4da55"
|
resolved "https://registry.yarnpkg.com/entities/-/entities-2.2.0.tgz#098dc90ebb83d8dffa089d55256b351d34c4da55"
|
||||||
integrity sha512-p92if5Nz619I0w+akJrLZH0MX0Pb5DX39XOwQTtXSdQQOaYH03S1uIQp4mhOZtAXrxq4ViO67YTiLBo2638o9A==
|
integrity sha512-p92if5Nz619I0w+akJrLZH0MX0Pb5DX39XOwQTtXSdQQOaYH03S1uIQp4mhOZtAXrxq4ViO67YTiLBo2638o9A==
|
||||||
@ -5369,14 +5369,6 @@ rollup@^3.10.0:
|
|||||||
optionalDependencies:
|
optionalDependencies:
|
||||||
fsevents "~2.3.2"
|
fsevents "~2.3.2"
|
||||||
|
|
||||||
rss-parser@^3.12.0:
|
|
||||||
version "3.12.0"
|
|
||||||
resolved "https://registry.yarnpkg.com/rss-parser/-/rss-parser-3.12.0.tgz#b8888699ea46304a74363fbd8144671b2997984c"
|
|
||||||
integrity sha512-aqD3E8iavcCdkhVxNDIdg1nkBI17jgqF+9OqPS1orwNaOgySdpvq6B+DoONLhzjzwV8mWg37sb60e4bmLK117A==
|
|
||||||
dependencies:
|
|
||||||
entities "^2.0.3"
|
|
||||||
xml2js "^0.4.19"
|
|
||||||
|
|
||||||
run-parallel@^1.1.9:
|
run-parallel@^1.1.9:
|
||||||
version "1.2.0"
|
version "1.2.0"
|
||||||
resolved "https://registry.yarnpkg.com/run-parallel/-/run-parallel-1.2.0.tgz#66d1368da7bdf921eb9d95bd1a9229e7f21a43ee"
|
resolved "https://registry.yarnpkg.com/run-parallel/-/run-parallel-1.2.0.tgz#66d1368da7bdf921eb9d95bd1a9229e7f21a43ee"
|
||||||
@ -6218,14 +6210,6 @@ write-file-atomic@^3.0.0:
|
|||||||
signal-exit "^3.0.2"
|
signal-exit "^3.0.2"
|
||||||
typedarray-to-buffer "^3.1.5"
|
typedarray-to-buffer "^3.1.5"
|
||||||
|
|
||||||
xml2js@^0.4.19:
|
|
||||||
version "0.4.23"
|
|
||||||
resolved "https://registry.yarnpkg.com/xml2js/-/xml2js-0.4.23.tgz#a0c69516752421eb2ac758ee4d4ccf58843eac66"
|
|
||||||
integrity sha512-ySPiMjM0+pLDftHgXY4By0uswI3SPKLDw/i3UXbnO8M/p28zqexCUoPmQFrYD+/1BzhGJSs2i1ERWKJAtiLrug==
|
|
||||||
dependencies:
|
|
||||||
sax ">=0.6.0"
|
|
||||||
xmlbuilder "~11.0.0"
|
|
||||||
|
|
||||||
xml2js@^0.5.0:
|
xml2js@^0.5.0:
|
||||||
version "0.5.0"
|
version "0.5.0"
|
||||||
resolved "https://registry.yarnpkg.com/xml2js/-/xml2js-0.5.0.tgz#d9440631fbb2ed800203fad106f2724f62c493b7"
|
resolved "https://registry.yarnpkg.com/xml2js/-/xml2js-0.5.0.tgz#d9440631fbb2ed800203fad106f2724f62c493b7"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user