Skip to content

Commit

Permalink
Performance improvements to Feeds (#786)
Browse files Browse the repository at this point in the history
* Various smaller changes

* Drop account data entirely

* Use max feed items

* Commit known working improvements

* Better status handlingh

* changelog

* Update changelog

* Add a note on Redis.

* Add proper HTTP tests

* Linty lint

* Tweaks

* New metrics woah

* Tweaks
  • Loading branch information
Half-Shot authored Jun 28, 2023
1 parent 2173a8c commit 3217b9e
Show file tree
Hide file tree
Showing 16 changed files with 1,087 additions and 265 deletions.
757 changes: 747 additions & 10 deletions Cargo.lock

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ edition = "2021"
crate-type = ["cdylib"]

[dependencies]
napi = {version="2", features=["serde-json"]}
napi = {version="2", features=["serde-json", "async"]}
napi-derive = "2"
url = "2"
serde_json = "1"
Expand All @@ -20,6 +20,7 @@ hex = "0.4.3"
rss = "2.0.3"
atom_syndication = "0.12"
ruma = { version = "0.8.2", features = ["events", "unstable-sanitize"] }
reqwest = "0.11"

[build-dependencies]
napi-build = "1"
5 changes: 5 additions & 0 deletions changelog.d/786.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Refactor Hookshot to use Redis for caching of feed information, massively improving memory usage.

Please note that this is a behavioural change: Hookshots configured to use in-memory caching (not Redis),
will no longer bridge any RSS entries it may have missed during downtime, and will instead perform an initial
sync (not reporting any entries) instead.
2 changes: 2 additions & 0 deletions docs/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ Below is the generated list of Prometheus metrics for Hookshot.
| nodejs_eventloop_lag_p50_seconds | The 50th percentile of the recorded event loop delays. | |
| nodejs_eventloop_lag_p90_seconds | The 90th percentile of the recorded event loop delays. | |
| nodejs_eventloop_lag_p99_seconds | The 99th percentile of the recorded event loop delays. | |
| nodejs_active_resources | Number of active resources that are currently keeping the event loop alive, grouped by async resource type. | type |
| nodejs_active_resources_total | Total number of active resources. | |
| nodejs_active_handles | Number of active libuv handles grouped by handle type. Every handle type is C++ class name. | type |
| nodejs_active_handles_total | Total number of active handles. | |
| nodejs_active_requests | Number of active libuv requests grouped by request type. Every request type is C++ class name. | type |
Expand Down
3 changes: 3 additions & 0 deletions docs/setup/feeds.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ Each feed will only be checked once, regardless of the number of rooms to which

No entries will be bridged upon the “initial sync” -- all entries that exist at the moment of setup will be considered to be already seen.

Please note that Hookshot **must** be configued with Redis to retain seen entries between restarts. By default, Hookshot will
run an "initial sync" on each startup and will not process any entries from feeds from before the first sync.

## Usage

### Adding new feeds
Expand Down
6 changes: 3 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
"node-emoji": "^1.11.0",
"nyc": "^15.1.0",
"p-queue": "^6.6.2",
"prom-client": "^14.0.1",
"prom-client": "^14.2.0",
"reflect-metadata": "^0.1.13",
"source-map-support": "^0.5.21",
"string-argv": "^0.3.1",
Expand All @@ -76,7 +76,7 @@
},
"devDependencies": {
"@codemirror/lang-javascript": "^6.0.2",
"@napi-rs/cli": "^2.2.0",
"@napi-rs/cli": "^2.13.2",
"@preact/preset-vite": "^2.2.0",
"@tsconfig/node18": "^2.0.0",
"@types/ajv": "^1.0.0",
Expand Down Expand Up @@ -105,7 +105,7 @@
"rimraf": "^3.0.2",
"sass": "^1.51.0",
"ts-node": "^10.9.1",
"typescript": "^5.0.4",
"typescript": "^5.1.3",
"vite": "^4.1.5",
"vite-svg-loader": "^4.0.0"
}
Expand Down
3 changes: 1 addition & 2 deletions src/Bridge.ts
Original file line number Diff line number Diff line change
Expand Up @@ -776,8 +776,7 @@ export class Bridge {
this.config.feeds,
this.connectionManager,
this.queue,
// Use default bot when storing account data
this.as.botClient,
this.storage,
);
}

Expand Down
12 changes: 8 additions & 4 deletions src/Connections/FeedConnection.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import {Intent, StateEvent} from "matrix-bot-sdk";
import { IConnection, IConnectionState, InstantiateConnectionOpts } from ".";
import { ApiError, ErrCode } from "../api";
import { FeedEntry, FeedError, FeedReader} from "../feeds/FeedReader";
import { FeedEntry, FeedError} from "../feeds/FeedReader";
import { Logger } from "matrix-appservice-bridge";
import { BaseConnection } from "./BaseConnection";
import markdown from "markdown-it";
import { Connection, ProvisionConnectionOpts } from "./IConnection";
import { GetConnectionsResponseItem } from "../provisioning/api";
import { sanitizeHtml } from "../libRs";
import { readFeed, sanitizeHtml } from "../libRs";
import UserAgent from "../UserAgent";
const log = new Logger("FeedConnection");
const md = new markdown({
html: true,
Expand Down Expand Up @@ -38,7 +39,7 @@ export interface FeedConnectionSecrets {
export type FeedResponseItem = GetConnectionsResponseItem<FeedConnectionState, FeedConnectionSecrets>;

const MAX_LAST_RESULT_ITEMS = 5;
const VALIDATION_FETCH_TIMEOUT_MS = 5000;
const VALIDATION_FETCH_TIMEOUT_S = 5;
const MAX_SUMMARY_LENGTH = 512;
const MAX_TEMPLATE_LENGTH = 1024;

Expand Down Expand Up @@ -68,7 +69,10 @@ export class FeedConnection extends BaseConnection implements IConnection {
}

try {
await FeedReader.fetchFeed(url, {}, VALIDATION_FETCH_TIMEOUT_MS);
await readFeed(url, {
userAgent: UserAgent,
pollTimeoutSeconds: VALIDATION_FETCH_TIMEOUT_S,
});
} catch (ex) {
throw new ApiError(`Could not read feed from URL: ${ex.message}`, ErrCode.BadValue);
}
Expand Down
61 changes: 43 additions & 18 deletions src/Metrics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,33 +7,58 @@ const log = new Logger("Metrics");
export class Metrics {
public readonly expressRouter = Router();

public readonly webhooksHttpRequest = new Counter({ name: "hookshot_webhooks_http_request", help: "Number of requests made to the hookshot webhooks handler", labelNames: ["path", "method"], registers: [this.registry]});
public readonly provisioningHttpRequest = new Counter({ name: "hookshot_provisioning_http_request", help: "Number of requests made to the hookshot webhooks handler", labelNames: ["path", "method"], registers: [this.registry]});
public readonly webhooksHttpRequest;
public readonly provisioningHttpRequest;

public readonly messageQueuePushes = new Counter({ name: "hookshot_queue_event_pushes", help: "Number of events pushed through the queue", labelNames: ["event"], registers: [this.registry]});
public readonly connectionsEventFailed = new Counter({ name: "hookshot_connection_event_failed", help: "The number of events that failed to process", labelNames: ["event", "connectionId"], registers: [this.registry]});
public readonly connections = new Gauge({ name: "hookshot_connections", help: "The number of active hookshot connections", labelNames: ["service"], registers: [this.registry]});
public readonly messageQueuePushes;
public readonly connectionsEventFailed;
public readonly connections;

public readonly notificationsPush = new Counter({ name: "hookshot_notifications_push", help: "Number of notifications pushed", labelNames: ["service"], registers: [this.registry]});
public readonly notificationsServiceUp = new Gauge({ name: "hookshot_notifications_service_up", help: "Is the notification service up or down", labelNames: ["service"], registers: [this.registry]});
public readonly notificationsWatchers = new Gauge({ name: "hookshot_notifications_watchers", help: "Number of notifications watchers running", labelNames: ["service"], registers: [this.registry]});
public readonly notificationsPush;
public readonly notificationsServiceUp;
public readonly notificationsWatchers;

private readonly matrixApiCalls = new Counter({ name: "matrix_api_calls", help: "The number of Matrix client API calls made", labelNames: ["method"], registers: [this.registry]});
private readonly matrixApiCallsFailed = new Counter({ name: "matrix_api_calls_failed", help: "The number of Matrix client API calls which failed", labelNames: ["method"], registers: [this.registry]});
private readonly matrixApiCalls;
private readonly matrixApiCallsFailed;

public readonly matrixAppserviceEvents = new Counter({ name: "matrix_appservice_events", help: "The number of events sent over the AS API", labelNames: [], registers: [this.registry]});
public readonly matrixAppserviceDecryptionFailed = new Counter({ name: "matrix_appservice_decryption_failed", help: "The number of events sent over the AS API that failed to decrypt", registers: [this.registry]});
public readonly matrixAppserviceEvents;
public readonly matrixAppserviceDecryptionFailed;

public readonly feedsCount = new Gauge({ name: "hookshot_feeds_count", help: "The number of RSS feeds that hookshot is subscribed to", labelNames: [], registers: [this.registry]});
public readonly feedFetchMs = new Gauge({ name: "hookshot_feeds_fetch_ms", help: "The time taken for hookshot to fetch all feeds", labelNames: [], registers: [this.registry]});
public readonly feedsFailing = new Gauge({ name: "hookshot_feeds_failing", help: "The number of RSS feeds that hookshot is failing to read", labelNames: ["reason"], registers: [this.registry]});
public readonly feedsCountDeprecated = new Gauge({ name: "feed_count", help: "(Deprecated) The number of RSS feeds that hookshot is subscribed to", labelNames: [], registers: [this.registry]});
public readonly feedsFetchMsDeprecated = new Gauge({ name: "feed_fetch_ms", help: "(Deprecated) The time taken for hookshot to fetch all feeds", labelNames: [], registers: [this.registry]});
public readonly feedsFailingDeprecated = new Gauge({ name: "feed_failing", help: "(Deprecated) The number of RSS feeds that hookshot is failing to read", labelNames: ["reason"], registers: [this.registry]});
public readonly feedsCount;
public readonly feedFetchMs;
public readonly feedsFailing;
public readonly feedsCountDeprecated;
public readonly feedsFetchMsDeprecated;
public readonly feedsFailingDeprecated;


constructor(private registry: Registry = register) {
this.expressRouter.get('/metrics', this.metricsFunc.bind(this));

this.webhooksHttpRequest = new Counter({ name: "hookshot_webhooks_http_request", help: "Number of requests made to the hookshot webhooks handler", labelNames: ["path", "method"], registers: [this.registry]});
this.provisioningHttpRequest = new Counter({ name: "hookshot_provisioning_http_request", help: "Number of requests made to the hookshot webhooks handler", labelNames: ["path", "method"], registers: [this.registry]});

this.messageQueuePushes = new Counter({ name: "hookshot_queue_event_pushes", help: "Number of events pushed through the queue", labelNames: ["event"], registers: [this.registry]});
this.connectionsEventFailed = new Counter({ name: "hookshot_connection_event_failed", help: "The number of events that failed to process", labelNames: ["event", "connectionId"], registers: [this.registry]});
this.connections = new Gauge({ name: "hookshot_connections", help: "The number of active hookshot connections", labelNames: ["service"], registers: [this.registry]});

this.notificationsPush = new Counter({ name: "hookshot_notifications_push", help: "Number of notifications pushed", labelNames: ["service"], registers: [this.registry]});
this.notificationsServiceUp = new Gauge({ name: "hookshot_notifications_service_up", help: "Is the notification service up or down", labelNames: ["service"], registers: [this.registry]});
this.notificationsWatchers = new Gauge({ name: "hookshot_notifications_watchers", help: "Number of notifications watchers running", labelNames: ["service"], registers: [this.registry]});

this.matrixApiCalls = new Counter({ name: "matrix_api_calls", help: "The number of Matrix client API calls made", labelNames: ["method"], registers: [this.registry]});
this.matrixApiCallsFailed = new Counter({ name: "matrix_api_calls_failed", help: "The number of Matrix client API calls which failed", labelNames: ["method"], registers: [this.registry]});

this.matrixAppserviceEvents = new Counter({ name: "matrix_appservice_events", help: "The number of events sent over the AS API", labelNames: [], registers: [this.registry]});
this.matrixAppserviceDecryptionFailed = new Counter({ name: "matrix_appservice_decryption_failed", help: "The number of events sent over the AS API that failed to decrypt", registers: [this.registry]});

this.feedsCount = new Gauge({ name: "hookshot_feeds_count", help: "The number of RSS feeds that hookshot is subscribed to", labelNames: [], registers: [this.registry]});
this.feedFetchMs = new Gauge({ name: "hookshot_feeds_fetch_ms", help: "The time taken for hookshot to fetch all feeds", labelNames: [], registers: [this.registry]});
this.feedsFailing = new Gauge({ name: "hookshot_feeds_failing", help: "The number of RSS feeds that hookshot is failing to read", labelNames: ["reason"], registers: [this.registry]});
this.feedsCountDeprecated = new Gauge({ name: "feed_count", help: "(Deprecated) The number of RSS feeds that hookshot is subscribed to", labelNames: [], registers: [this.registry]});
this.feedsFetchMsDeprecated = new Gauge({ name: "feed_fetch_ms", help: "(Deprecated) The time taken for hookshot to fetch all feeds", labelNames: [], registers: [this.registry]});
this.feedsFailingDeprecated = new Gauge({ name: "feed_failing", help: "(Deprecated) The number of RSS feeds that hookshot is failing to read", labelNames: ["reason"], registers: [this.registry]});

collectDefaultMetrics({
register: this.registry,
})
Expand Down
24 changes: 23 additions & 1 deletion src/Stores/MemoryStorageProvider.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { MemoryStorageProvider as MSP } from "matrix-bot-sdk";
import { IBridgeStorageProvider } from "./StorageProvider";
import { IBridgeStorageProvider, MAX_FEED_ITEMS } from "./StorageProvider";
import { IssuesGetResponseData } from "../github/Types";
import { ProvisionSession } from "matrix-appservice-bridge";
import QuickLRU from "@alloc/quick-lru";
Expand All @@ -11,10 +11,32 @@ export class MemoryStorageProvider extends MSP implements IBridgeStorageProvider
private figmaCommentIds: Map<string, string> = new Map();
private widgetSessions: Map<string, ProvisionSession> = new Map();
private storedFiles = new QuickLRU<string, string>({ maxSize: 128 });
private feedGuids = new Map<string, Array<string>>();

constructor() {
super();
}

async storeFeedGuids(url: string, ...guids: string[]): Promise<void> {
let set = this.feedGuids.get(url);
if (!set) {
set = []
this.feedGuids.set(url, set);
}
set.unshift(...guids);
while (set.length > MAX_FEED_ITEMS) {
set.pop();
}
}

async hasSeenFeed(url: string): Promise<boolean> {
return this.feedGuids.has(url);
}

async hasSeenFeedGuid(url: string, guid: string): Promise<boolean> {
return this.feedGuids.get(url)?.includes(guid) ?? false;
}

public async setGithubIssue(repo: string, issueNumber: string, data: IssuesGetResponseData, scope = "") {
this.issues.set(`${scope}${repo}/${issueNumber}`, data);
}
Expand Down
21 changes: 20 additions & 1 deletion src/Stores/RedisStorageProvider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { IssuesGetResponseData } from "../github/Types";
import { Redis, default as redis } from "ioredis";
import { Logger } from "matrix-appservice-bridge";

import { IBridgeStorageProvider } from "./StorageProvider";
import { IBridgeStorageProvider, MAX_FEED_ITEMS } from "./StorageProvider";
import { IFilterInfo, IStorageProvider } from "matrix-bot-sdk";
import { ProvisionSession } from "matrix-appservice-bridge";

Expand All @@ -25,6 +25,10 @@ const ISSUES_LAST_COMMENT_EXPIRE_AFTER = 14 * 24 * 60 * 60; // 7 days
const WIDGET_TOKENS = "widgets.tokens.";
const WIDGET_USER_TOKENS = "widgets.user-tokens.";

const FEED_GUIDS = "feeds.guids.";



const log = new Logger("RedisASProvider");

export class RedisStorageContextualProvider implements IStorageProvider {
Expand Down Expand Up @@ -61,6 +65,7 @@ export class RedisStorageContextualProvider implements IStorageProvider {

}


export class RedisStorageProvider extends RedisStorageContextualProvider implements IBridgeStorageProvider {
constructor(host: string, port: number, contextSuffix = '') {
super(new redis(port, host), contextSuffix);
Expand Down Expand Up @@ -198,4 +203,18 @@ export class RedisStorageProvider extends RedisStorageContextualProvider impleme
public async setStoredTempFile(key: string, value: string) {
await this.redis.set(STORED_FILES_KEY + key, value);
}

public async storeFeedGuids(url: string, ...guid: string[]): Promise<void> {
const feedKey = `${FEED_GUIDS}${url}`;
await this.redis.lpush(feedKey, ...guid);
await this.redis.ltrim(feedKey, 0, MAX_FEED_ITEMS);
}

public async hasSeenFeed(url: string): Promise<boolean> {
return (await this.redis.exists(`${FEED_GUIDS}${url}`)) === 1;
}

public async hasSeenFeedGuid(url: string, guid: string): Promise<boolean> {
return (await this.redis.lpos(`${FEED_GUIDS}${url}`, guid)) != null;
}
}
10 changes: 10 additions & 0 deletions src/Stores/StorageProvider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@ import { ProvisioningStore } from "matrix-appservice-bridge";
import { IAppserviceStorageProvider, IStorageProvider } from "matrix-bot-sdk";
import { IssuesGetResponseData } from "../github/Types";

// Some RSS feeds can return a very small number of items then bounce
// back to their "normal" size, so we cannot just clobber the recent GUID list per request or else we'll
// forget what we sent and resend it. Instead, we'll keep 2x the max number of items that we've ever
// seen from this feed, up to a max of 10,000.
// Adopted from https://github.com/matrix-org/go-neb/blob/babb74fa729882d7265ff507b09080e732d060ae/services/rssbot/rssbot.go#L304
export const MAX_FEED_ITEMS = 10_000;

export interface IBridgeStorageProvider extends IAppserviceStorageProvider, IStorageProvider, ProvisioningStore {
connect?(): Promise<void>;
disconnect?(): Promise<void>;
Expand All @@ -15,4 +22,7 @@ export interface IBridgeStorageProvider extends IAppserviceStorageProvider, ISto
getFigmaCommentEventId(roomId: string, figmaCommentId: string): Promise<string|null>;
getStoredTempFile(key: string): Promise<string|null>;
setStoredTempFile(key: string, value: string): Promise<void>;
storeFeedGuids(url: string, ...guid: string[]): Promise<void>;
hasSeenFeed(url: string, ...guid: string[]): Promise<boolean>;
hasSeenFeedGuid(url: string, guid: string): Promise<boolean>;
}
Loading

0 comments on commit 3217b9e

Please sign in to comment.