330 lines
11 KiB
Go
Raw Permalink Normal View History

2021-05-14 16:49:33 +01:00
package main
import (
"flag"
2022-04-01 13:44:09 +01:00
"fmt"
"log"
"net/http"
_ "net/http/pprof"
"os"
"os/signal"
2023-09-04 09:38:51 +02:00
"runtime/debug"
"strconv"
"strings"
"syscall"
"time"
2023-04-04 16:07:27 +01:00
"github.com/getsentry/sentry-go"
sentryhttp "github.com/getsentry/sentry-go/http"
"github.com/pressly/goose/v3"
add extensions for typing and receipts; bugfixes and additional perf improvements Features: - Add `typing` extension. - Add `receipts` extension. - Add comprehensive prometheus `/metrics` activated via `SYNCV3_PROM`. - Add `SYNCV3_PPROF` support. - Add `by_notification_level` sort order. - Add `include_old_rooms` support. - Add support for `$ME` and `$LAZY`. - Add correct filtering when `*,*` is used as `required_state`. - Add `num_live` to each room response to indicate how many timeline entries are live. Bug fixes: - Use a stricter comparison function on ranges: fixes an issue whereby UTs fail on go1.19 due to change in sorting algorithm. - Send back an `errcode` on HTTP errors (e.g expired sessions). - Remove `unsigned.txn_id` on insertion into the DB. Otherwise other users would see other users txn IDs :( - Improve range delta algorithm: previously it didn't handle cases like `[0,20] -> [20,30]` and would panic. - Send HTTP 400 for invalid range requests. - Don't publish no-op unread counts which just adds extra noise. - Fix leaking DB connections which could eventually consume all available connections. - Ensure we always unblock WaitUntilInitialSync even on invalid access tokens. Other code relies on WaitUntilInitialSync() actually returning at _some_ point e.g on startup we have N workers which bound the number of concurrent pollers made at any one time, we need to not just hog a worker forever. Improvements: - Greatly improve startup times of sync3 handlers by improving `JoinedRoomsTracker`: a modest amount of data would take ~28s to create the handler, now it takes 4s. - Massively improve initial initial v3 sync times, by refactoring `JoinedRoomsTracker`, from ~47s to <1s. - Add `SlidingSyncUntil...` in tests to reduce races. - Tweak the API shape of JoinedUsersForRoom to reduce state block processing time for large rooms from 63s to 39s. - Add trace task for initial syncs. - Include the proxy version in UA strings. - HTTP errors now wait 1s before returning to stop clients tight-looping on error. - Pending event buffer is now 2000. - Index the room ID first to cull the most events when returning timeline entries. Speeds up `SelectLatestEventsBetween` by a factor of 8. - Remove cancelled `m.room_key_requests` from the to-device inbox. Cuts down the amount of events in the inbox by ~94% for very large (20k+) inboxes, ~50% for moderate sized (200 events) inboxes. Adds book-keeping to remember the unacked to-device position for each client.
2022-12-14 18:53:55 +00:00
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/rs/zerolog"
"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
2023-09-04 09:38:51 +02:00
syncv3 "github.com/matrix-org/sliding-sync"
"github.com/matrix-org/sliding-sync/internal"
"github.com/matrix-org/sliding-sync/sync2"
2021-05-14 16:49:33 +01:00
)
2022-04-01 13:44:09 +01:00
var GitCommit string
2024-06-28 16:20:09 +01:00
const version = "0.99.19"
2022-04-01 13:44:09 +01:00
var (
flags = flag.NewFlagSet("goose", flag.ExitOnError)
)
const (
// Required fields
EnvServer = "SYNCV3_SERVER"
EnvDB = "SYNCV3_DB"
EnvSecret = "SYNCV3_SECRET"
add extensions for typing and receipts; bugfixes and additional perf improvements Features: - Add `typing` extension. - Add `receipts` extension. - Add comprehensive prometheus `/metrics` activated via `SYNCV3_PROM`. - Add `SYNCV3_PPROF` support. - Add `by_notification_level` sort order. - Add `include_old_rooms` support. - Add support for `$ME` and `$LAZY`. - Add correct filtering when `*,*` is used as `required_state`. - Add `num_live` to each room response to indicate how many timeline entries are live. Bug fixes: - Use a stricter comparison function on ranges: fixes an issue whereby UTs fail on go1.19 due to change in sorting algorithm. - Send back an `errcode` on HTTP errors (e.g expired sessions). - Remove `unsigned.txn_id` on insertion into the DB. Otherwise other users would see other users txn IDs :( - Improve range delta algorithm: previously it didn't handle cases like `[0,20] -> [20,30]` and would panic. - Send HTTP 400 for invalid range requests. - Don't publish no-op unread counts which just adds extra noise. - Fix leaking DB connections which could eventually consume all available connections. - Ensure we always unblock WaitUntilInitialSync even on invalid access tokens. Other code relies on WaitUntilInitialSync() actually returning at _some_ point e.g on startup we have N workers which bound the number of concurrent pollers made at any one time, we need to not just hog a worker forever. Improvements: - Greatly improve startup times of sync3 handlers by improving `JoinedRoomsTracker`: a modest amount of data would take ~28s to create the handler, now it takes 4s. - Massively improve initial initial v3 sync times, by refactoring `JoinedRoomsTracker`, from ~47s to <1s. - Add `SlidingSyncUntil...` in tests to reduce races. - Tweak the API shape of JoinedUsersForRoom to reduce state block processing time for large rooms from 63s to 39s. - Add trace task for initial syncs. - Include the proxy version in UA strings. - HTTP errors now wait 1s before returning to stop clients tight-looping on error. - Pending event buffer is now 2000. - Index the room ID first to cull the most events when returning timeline entries. Speeds up `SelectLatestEventsBetween` by a factor of 8. - Remove cancelled `m.room_key_requests` from the to-device inbox. Cuts down the amount of events in the inbox by ~94% for very large (20k+) inboxes, ~50% for moderate sized (200 events) inboxes. Adds book-keeping to remember the unacked to-device position for each client.
2022-12-14 18:53:55 +00:00
// Optional fields
2023-09-19 11:48:49 +02:00
EnvBindAddr = "SYNCV3_BINDADDR"
EnvTLSCert = "SYNCV3_TLS_CERT"
EnvTLSKey = "SYNCV3_TLS_KEY"
EnvPPROF = "SYNCV3_PPROF"
EnvPrometheus = "SYNCV3_PROM"
EnvDebug = "SYNCV3_DEBUG"
EnvOTLP = "SYNCV3_OTLP_URL"
EnvOTLPUsername = "SYNCV3_OTLP_USERNAME"
EnvOTLPPassword = "SYNCV3_OTLP_PASSWORD"
EnvSentryDsn = "SYNCV3_SENTRY_DSN"
EnvLogLevel = "SYNCV3_LOG_LEVEL"
EnvMaxConns = "SYNCV3_MAX_DB_CONN"
EnvIdleTimeoutSecs = "SYNCV3_DB_IDLE_TIMEOUT_SECS"
EnvHTTPTimeoutSecs = "SYNCV3_HTTP_TIMEOUT_SECS"
EnvHTTPInitialTimeoutSecs = "SYNCV3_HTTP_INITIAL_TIMEOUT_SECS"
2021-05-14 16:49:33 +01:00
)
var helpMsg = fmt.Sprintf(`
Environment var
2023-11-16 14:07:23 +00:00
%s Required. The destination homeserver to talk to (CS API HTTPS URL) e.g 'https://matrix-client.matrix.org' (Supports unix socket: /path/to/socket)
%s Required. The postgres connection string: https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNSTRING
%s Required. A secret to use to encrypt access tokens. Must remain the same for the lifetime of the database.
2023-11-16 14:07:23 +00:00
%s Default: 0.0.0.0:8008. The interface and port to listen on. (Supports unix socket: /path/to/socket)
%s Default: unset. Path to a certificate file to serve to HTTPS clients. Specifying this enables TLS on the bound address.
%s Default: unset. Path to a key file for the certificate. Must be provided along with the certificate file.
%s Default: unset. The bind addr for pprof debugging e.g ':6060'. If not set, does not listen.
%s Default: unset. The bind addr for Prometheus metrics, which will be accessible at /metrics at this address.
%s Default: unset. The OTLP HTTP URL to send spans to e.g https://localhost:4318 - if unset does not send OTLP traces.
%s Default: unset. The OTLP username for Basic auth. If unset, does not send an Authorization header.
%s Default: unset. The OTLP password for Basic auth. If unset, does not send an Authorization header.
2023-04-04 18:17:33 +01:00
%s Default: unset. The Sentry DSN to report events to e.g https://sliding-sync@sentry.example.com/123 - if unset does not send sentry events.
%s Default: info. The level of verbosity for messages logged. Available values are trace, debug, info, warn, error and fatal
%s Default: unset. Max database connections to use when communicating with postgres. Unset or 0 means no limit.
2023-09-15 11:12:47 +01:00
%s Default: 3600. The maximum amount of time a database connection may be idle, in seconds. 0 means no limit.
2023-09-19 11:48:49 +02:00
%s Default: 300. The timeout in seconds for normal HTTP requests.
%s Default: 1800. The timeout in seconds for initial sync requests.
`, EnvServer, EnvDB, EnvSecret, EnvBindAddr, EnvTLSCert, EnvTLSKey, EnvPPROF, EnvPrometheus, EnvOTLP, EnvOTLPUsername, EnvOTLPPassword,
2023-09-19 11:48:49 +02:00
EnvSentryDsn, EnvLogLevel, EnvMaxConns, EnvIdleTimeoutSecs, EnvHTTPTimeoutSecs, EnvHTTPInitialTimeoutSecs)
func defaulting(in, dft string) string {
if in == "" {
return dft
}
return in
}
2021-05-14 16:49:33 +01:00
func main() {
2022-04-01 13:44:09 +01:00
fmt.Printf("Sync v3 [%s] (%s)\n", version, GitCommit)
add extensions for typing and receipts; bugfixes and additional perf improvements Features: - Add `typing` extension. - Add `receipts` extension. - Add comprehensive prometheus `/metrics` activated via `SYNCV3_PROM`. - Add `SYNCV3_PPROF` support. - Add `by_notification_level` sort order. - Add `include_old_rooms` support. - Add support for `$ME` and `$LAZY`. - Add correct filtering when `*,*` is used as `required_state`. - Add `num_live` to each room response to indicate how many timeline entries are live. Bug fixes: - Use a stricter comparison function on ranges: fixes an issue whereby UTs fail on go1.19 due to change in sorting algorithm. - Send back an `errcode` on HTTP errors (e.g expired sessions). - Remove `unsigned.txn_id` on insertion into the DB. Otherwise other users would see other users txn IDs :( - Improve range delta algorithm: previously it didn't handle cases like `[0,20] -> [20,30]` and would panic. - Send HTTP 400 for invalid range requests. - Don't publish no-op unread counts which just adds extra noise. - Fix leaking DB connections which could eventually consume all available connections. - Ensure we always unblock WaitUntilInitialSync even on invalid access tokens. Other code relies on WaitUntilInitialSync() actually returning at _some_ point e.g on startup we have N workers which bound the number of concurrent pollers made at any one time, we need to not just hog a worker forever. Improvements: - Greatly improve startup times of sync3 handlers by improving `JoinedRoomsTracker`: a modest amount of data would take ~28s to create the handler, now it takes 4s. - Massively improve initial initial v3 sync times, by refactoring `JoinedRoomsTracker`, from ~47s to <1s. - Add `SlidingSyncUntil...` in tests to reduce races. - Tweak the API shape of JoinedUsersForRoom to reduce state block processing time for large rooms from 63s to 39s. - Add trace task for initial syncs. - Include the proxy version in UA strings. - HTTP errors now wait 1s before returning to stop clients tight-looping on error. - Pending event buffer is now 2000. - Index the room ID first to cull the most events when returning timeline entries. Speeds up `SelectLatestEventsBetween` by a factor of 8. - Remove cancelled `m.room_key_requests` from the to-device inbox. Cuts down the amount of events in the inbox by ~94% for very large (20k+) inboxes, ~50% for moderate sized (200 events) inboxes. Adds book-keeping to remember the unacked to-device position for each client.
2022-12-14 18:53:55 +00:00
sync2.ProxyVersion = version
2022-08-16 14:23:05 +01:00
syncv3.Version = fmt.Sprintf("%s (%s)", version, GitCommit)
2023-07-28 15:37:25 +02:00
if len(os.Args) > 1 && os.Args[1] == "migrate" {
executeMigrations()
return
}
add extensions for typing and receipts; bugfixes and additional perf improvements Features: - Add `typing` extension. - Add `receipts` extension. - Add comprehensive prometheus `/metrics` activated via `SYNCV3_PROM`. - Add `SYNCV3_PPROF` support. - Add `by_notification_level` sort order. - Add `include_old_rooms` support. - Add support for `$ME` and `$LAZY`. - Add correct filtering when `*,*` is used as `required_state`. - Add `num_live` to each room response to indicate how many timeline entries are live. Bug fixes: - Use a stricter comparison function on ranges: fixes an issue whereby UTs fail on go1.19 due to change in sorting algorithm. - Send back an `errcode` on HTTP errors (e.g expired sessions). - Remove `unsigned.txn_id` on insertion into the DB. Otherwise other users would see other users txn IDs :( - Improve range delta algorithm: previously it didn't handle cases like `[0,20] -> [20,30]` and would panic. - Send HTTP 400 for invalid range requests. - Don't publish no-op unread counts which just adds extra noise. - Fix leaking DB connections which could eventually consume all available connections. - Ensure we always unblock WaitUntilInitialSync even on invalid access tokens. Other code relies on WaitUntilInitialSync() actually returning at _some_ point e.g on startup we have N workers which bound the number of concurrent pollers made at any one time, we need to not just hog a worker forever. Improvements: - Greatly improve startup times of sync3 handlers by improving `JoinedRoomsTracker`: a modest amount of data would take ~28s to create the handler, now it takes 4s. - Massively improve initial initial v3 sync times, by refactoring `JoinedRoomsTracker`, from ~47s to <1s. - Add `SlidingSyncUntil...` in tests to reduce races. - Tweak the API shape of JoinedUsersForRoom to reduce state block processing time for large rooms from 63s to 39s. - Add trace task for initial syncs. - Include the proxy version in UA strings. - HTTP errors now wait 1s before returning to stop clients tight-looping on error. - Pending event buffer is now 2000. - Index the room ID first to cull the most events when returning timeline entries. Speeds up `SelectLatestEventsBetween` by a factor of 8. - Remove cancelled `m.room_key_requests` from the to-device inbox. Cuts down the amount of events in the inbox by ~94% for very large (20k+) inboxes, ~50% for moderate sized (200 events) inboxes. Adds book-keeping to remember the unacked to-device position for each client.
2022-12-14 18:53:55 +00:00
args := map[string]string{
2023-09-19 11:48:49 +02:00
EnvServer: os.Getenv(EnvServer),
EnvDB: os.Getenv(EnvDB),
EnvSecret: os.Getenv(EnvSecret),
EnvBindAddr: defaulting(os.Getenv(EnvBindAddr), "0.0.0.0:8008"),
EnvTLSCert: os.Getenv(EnvTLSCert),
EnvTLSKey: os.Getenv(EnvTLSKey),
EnvPPROF: os.Getenv(EnvPPROF),
EnvPrometheus: os.Getenv(EnvPrometheus),
EnvDebug: os.Getenv(EnvDebug),
EnvOTLP: os.Getenv(EnvOTLP),
EnvOTLPUsername: os.Getenv(EnvOTLPUsername),
EnvOTLPPassword: os.Getenv(EnvOTLPPassword),
EnvSentryDsn: os.Getenv(EnvSentryDsn),
EnvLogLevel: os.Getenv(EnvLogLevel),
EnvMaxConns: defaulting(os.Getenv(EnvMaxConns), "0"),
EnvIdleTimeoutSecs: defaulting(os.Getenv(EnvIdleTimeoutSecs), "3600"),
2023-09-19 15:58:22 +02:00
EnvHTTPTimeoutSecs: defaulting(os.Getenv(EnvHTTPTimeoutSecs), "300"),
2023-09-19 11:48:49 +02:00
EnvHTTPInitialTimeoutSecs: defaulting(os.Getenv(EnvHTTPInitialTimeoutSecs), "1800"),
2021-05-14 16:49:33 +01:00
}
add extensions for typing and receipts; bugfixes and additional perf improvements Features: - Add `typing` extension. - Add `receipts` extension. - Add comprehensive prometheus `/metrics` activated via `SYNCV3_PROM`. - Add `SYNCV3_PPROF` support. - Add `by_notification_level` sort order. - Add `include_old_rooms` support. - Add support for `$ME` and `$LAZY`. - Add correct filtering when `*,*` is used as `required_state`. - Add `num_live` to each room response to indicate how many timeline entries are live. Bug fixes: - Use a stricter comparison function on ranges: fixes an issue whereby UTs fail on go1.19 due to change in sorting algorithm. - Send back an `errcode` on HTTP errors (e.g expired sessions). - Remove `unsigned.txn_id` on insertion into the DB. Otherwise other users would see other users txn IDs :( - Improve range delta algorithm: previously it didn't handle cases like `[0,20] -> [20,30]` and would panic. - Send HTTP 400 for invalid range requests. - Don't publish no-op unread counts which just adds extra noise. - Fix leaking DB connections which could eventually consume all available connections. - Ensure we always unblock WaitUntilInitialSync even on invalid access tokens. Other code relies on WaitUntilInitialSync() actually returning at _some_ point e.g on startup we have N workers which bound the number of concurrent pollers made at any one time, we need to not just hog a worker forever. Improvements: - Greatly improve startup times of sync3 handlers by improving `JoinedRoomsTracker`: a modest amount of data would take ~28s to create the handler, now it takes 4s. - Massively improve initial initial v3 sync times, by refactoring `JoinedRoomsTracker`, from ~47s to <1s. - Add `SlidingSyncUntil...` in tests to reduce races. - Tweak the API shape of JoinedUsersForRoom to reduce state block processing time for large rooms from 63s to 39s. - Add trace task for initial syncs. - Include the proxy version in UA strings. - HTTP errors now wait 1s before returning to stop clients tight-looping on error. - Pending event buffer is now 2000. - Index the room ID first to cull the most events when returning timeline entries. Speeds up `SelectLatestEventsBetween` by a factor of 8. - Remove cancelled `m.room_key_requests` from the to-device inbox. Cuts down the amount of events in the inbox by ~94% for very large (20k+) inboxes, ~50% for moderate sized (200 events) inboxes. Adds book-keeping to remember the unacked to-device position for each client.
2022-12-14 18:53:55 +00:00
requiredEnvVars := []string{EnvServer, EnvDB, EnvSecret, EnvBindAddr}
for _, requiredEnvVar := range requiredEnvVars {
if args[requiredEnvVar] == "" {
fmt.Print(helpMsg)
fmt.Printf("\n%s is not set", requiredEnvVar)
fmt.Printf("\n%s must be set\n", strings.Join(requiredEnvVars, ", "))
os.Exit(1)
}
}
if (args[EnvTLSCert] != "" || args[EnvTLSKey] != "") && (args[EnvTLSCert] == "" || args[EnvTLSKey] == "") {
fmt.Print(helpMsg)
fmt.Printf("\nboth %s and %s must be set together\n", EnvTLSCert, EnvTLSKey)
os.Exit(1)
}
add extensions for typing and receipts; bugfixes and additional perf improvements Features: - Add `typing` extension. - Add `receipts` extension. - Add comprehensive prometheus `/metrics` activated via `SYNCV3_PROM`. - Add `SYNCV3_PPROF` support. - Add `by_notification_level` sort order. - Add `include_old_rooms` support. - Add support for `$ME` and `$LAZY`. - Add correct filtering when `*,*` is used as `required_state`. - Add `num_live` to each room response to indicate how many timeline entries are live. Bug fixes: - Use a stricter comparison function on ranges: fixes an issue whereby UTs fail on go1.19 due to change in sorting algorithm. - Send back an `errcode` on HTTP errors (e.g expired sessions). - Remove `unsigned.txn_id` on insertion into the DB. Otherwise other users would see other users txn IDs :( - Improve range delta algorithm: previously it didn't handle cases like `[0,20] -> [20,30]` and would panic. - Send HTTP 400 for invalid range requests. - Don't publish no-op unread counts which just adds extra noise. - Fix leaking DB connections which could eventually consume all available connections. - Ensure we always unblock WaitUntilInitialSync even on invalid access tokens. Other code relies on WaitUntilInitialSync() actually returning at _some_ point e.g on startup we have N workers which bound the number of concurrent pollers made at any one time, we need to not just hog a worker forever. Improvements: - Greatly improve startup times of sync3 handlers by improving `JoinedRoomsTracker`: a modest amount of data would take ~28s to create the handler, now it takes 4s. - Massively improve initial initial v3 sync times, by refactoring `JoinedRoomsTracker`, from ~47s to <1s. - Add `SlidingSyncUntil...` in tests to reduce races. - Tweak the API shape of JoinedUsersForRoom to reduce state block processing time for large rooms from 63s to 39s. - Add trace task for initial syncs. - Include the proxy version in UA strings. - HTTP errors now wait 1s before returning to stop clients tight-looping on error. - Pending event buffer is now 2000. - Index the room ID first to cull the most events when returning timeline entries. Speeds up `SelectLatestEventsBetween` by a factor of 8. - Remove cancelled `m.room_key_requests` from the to-device inbox. Cuts down the amount of events in the inbox by ~94% for very large (20k+) inboxes, ~50% for moderate sized (200 events) inboxes. Adds book-keeping to remember the unacked to-device position for each client.
2022-12-14 18:53:55 +00:00
// pprof
if args[EnvPPROF] != "" {
go func() {
fmt.Printf("Starting pprof listener on %s\n", args[EnvPPROF])
if err := http.ListenAndServe(args[EnvPPROF], nil); err != nil {
panic(err)
}
}()
}
if args[EnvPrometheus] != "" {
go func() {
fmt.Printf("Starting prometheus listener on %s\n", args[EnvPrometheus])
http.Handle("/metrics", promhttp.Handler())
if err := http.ListenAndServe(args[EnvPrometheus], nil); err != nil {
panic(err)
}
}()
}
if args[EnvOTLP] != "" {
fmt.Printf("Configuring OTLP collector...\n")
if err := internal.ConfigureOTLP(args[EnvOTLP], args[EnvOTLPUsername], args[EnvOTLPPassword], syncv3.Version); err != nil {
panic(err)
}
}
add extensions for typing and receipts; bugfixes and additional perf improvements Features: - Add `typing` extension. - Add `receipts` extension. - Add comprehensive prometheus `/metrics` activated via `SYNCV3_PROM`. - Add `SYNCV3_PPROF` support. - Add `by_notification_level` sort order. - Add `include_old_rooms` support. - Add support for `$ME` and `$LAZY`. - Add correct filtering when `*,*` is used as `required_state`. - Add `num_live` to each room response to indicate how many timeline entries are live. Bug fixes: - Use a stricter comparison function on ranges: fixes an issue whereby UTs fail on go1.19 due to change in sorting algorithm. - Send back an `errcode` on HTTP errors (e.g expired sessions). - Remove `unsigned.txn_id` on insertion into the DB. Otherwise other users would see other users txn IDs :( - Improve range delta algorithm: previously it didn't handle cases like `[0,20] -> [20,30]` and would panic. - Send HTTP 400 for invalid range requests. - Don't publish no-op unread counts which just adds extra noise. - Fix leaking DB connections which could eventually consume all available connections. - Ensure we always unblock WaitUntilInitialSync even on invalid access tokens. Other code relies on WaitUntilInitialSync() actually returning at _some_ point e.g on startup we have N workers which bound the number of concurrent pollers made at any one time, we need to not just hog a worker forever. Improvements: - Greatly improve startup times of sync3 handlers by improving `JoinedRoomsTracker`: a modest amount of data would take ~28s to create the handler, now it takes 4s. - Massively improve initial initial v3 sync times, by refactoring `JoinedRoomsTracker`, from ~47s to <1s. - Add `SlidingSyncUntil...` in tests to reduce races. - Tweak the API shape of JoinedUsersForRoom to reduce state block processing time for large rooms from 63s to 39s. - Add trace task for initial syncs. - Include the proxy version in UA strings. - HTTP errors now wait 1s before returning to stop clients tight-looping on error. - Pending event buffer is now 2000. - Index the room ID first to cull the most events when returning timeline entries. Speeds up `SelectLatestEventsBetween` by a factor of 8. - Remove cancelled `m.room_key_requests` from the to-device inbox. Cuts down the amount of events in the inbox by ~94% for very large (20k+) inboxes, ~50% for moderate sized (200 events) inboxes. Adds book-keeping to remember the unacked to-device position for each client.
2022-12-14 18:53:55 +00:00
2023-04-05 17:54:25 +01:00
// Initialise sentry. We do this in a separate block to the sentry code below,
2023-04-05 18:40:23 +01:00
// because we want to configure logging before the call to syncv3.Setup---which may
// want to log to sentry itself.
2023-04-04 16:07:27 +01:00
if args[EnvSentryDsn] != "" {
fmt.Printf("Configuring Sentry reporter...\n")
err := sentry.Init(sentry.ClientOptions{
2023-04-12 19:22:42 +01:00
Dsn: args[EnvSentryDsn],
Release: version,
Dist: GitCommit,
2023-04-04 16:07:27 +01:00
})
if err != nil {
panic(err)
}
}
2023-07-12 17:42:04 +01:00
fmt.Printf("Debug=%v LogLevel=%v MaxConns=%v\n", args[EnvDebug] == "1", args[EnvLogLevel], args[EnvMaxConns])
if args[EnvDebug] == "1" {
zerolog.SetGlobalLevel(zerolog.TraceLevel)
} else {
switch strings.ToLower(args[EnvLogLevel]) {
case "trace":
zerolog.SetGlobalLevel(zerolog.TraceLevel)
case "debug":
zerolog.SetGlobalLevel(zerolog.DebugLevel)
case "info":
zerolog.SetGlobalLevel(zerolog.InfoLevel)
case "warn":
zerolog.SetGlobalLevel(zerolog.WarnLevel)
case "err", "error":
zerolog.SetGlobalLevel(zerolog.ErrorLevel)
case "fatal":
zerolog.SetGlobalLevel(zerolog.FatalLevel)
default:
zerolog.SetGlobalLevel(zerolog.InfoLevel)
}
}
maxConnsInt, err := strconv.Atoi(args[EnvMaxConns])
if err != nil {
panic("invalid value for " + EnvMaxConns + ": " + args[EnvMaxConns])
}
2023-09-15 11:12:47 +01:00
idleTimeSecs, err := strconv.Atoi(args[EnvIdleTimeoutSecs])
if err != nil {
panic("invalid value for " + EnvIdleTimeoutSecs + ": " + args[EnvIdleTimeoutSecs])
}
2023-09-19 11:48:49 +02:00
httpTimeoutSecs, err := strconv.Atoi(args[EnvHTTPTimeoutSecs])
if err != nil {
panic("invalid value for " + EnvHTTPTimeoutSecs + ": " + args[EnvHTTPTimeoutSecs])
}
httpLongTimeoutSecs, err := strconv.Atoi(args[EnvHTTPInitialTimeoutSecs])
if err != nil {
panic("invalid value for " + EnvHTTPInitialTimeoutSecs + ": " + args[EnvHTTPInitialTimeoutSecs])
}
2023-04-05 18:40:23 +01:00
h2, h3 := syncv3.Setup(args[EnvServer], args[EnvDB], args[EnvSecret], syncv3.Opts{
AddPrometheusMetrics: args[EnvPrometheus] != "",
DBMaxConns: maxConnsInt,
2023-09-15 11:12:47 +01:00
DBConnMaxIdleTime: time.Duration(idleTimeSecs) * time.Second,
MaxTransactionIDDelay: time.Second,
2023-09-19 11:48:49 +02:00
HTTPTimeout: time.Duration(httpTimeoutSecs) * time.Second,
HTTPLongTimeout: time.Duration(httpLongTimeoutSecs) * time.Second,
2023-04-05 18:40:23 +01:00
})
add extensions for typing and receipts; bugfixes and additional perf improvements Features: - Add `typing` extension. - Add `receipts` extension. - Add comprehensive prometheus `/metrics` activated via `SYNCV3_PROM`. - Add `SYNCV3_PPROF` support. - Add `by_notification_level` sort order. - Add `include_old_rooms` support. - Add support for `$ME` and `$LAZY`. - Add correct filtering when `*,*` is used as `required_state`. - Add `num_live` to each room response to indicate how many timeline entries are live. Bug fixes: - Use a stricter comparison function on ranges: fixes an issue whereby UTs fail on go1.19 due to change in sorting algorithm. - Send back an `errcode` on HTTP errors (e.g expired sessions). - Remove `unsigned.txn_id` on insertion into the DB. Otherwise other users would see other users txn IDs :( - Improve range delta algorithm: previously it didn't handle cases like `[0,20] -> [20,30]` and would panic. - Send HTTP 400 for invalid range requests. - Don't publish no-op unread counts which just adds extra noise. - Fix leaking DB connections which could eventually consume all available connections. - Ensure we always unblock WaitUntilInitialSync even on invalid access tokens. Other code relies on WaitUntilInitialSync() actually returning at _some_ point e.g on startup we have N workers which bound the number of concurrent pollers made at any one time, we need to not just hog a worker forever. Improvements: - Greatly improve startup times of sync3 handlers by improving `JoinedRoomsTracker`: a modest amount of data would take ~28s to create the handler, now it takes 4s. - Massively improve initial initial v3 sync times, by refactoring `JoinedRoomsTracker`, from ~47s to <1s. - Add `SlidingSyncUntil...` in tests to reduce races. - Tweak the API shape of JoinedUsersForRoom to reduce state block processing time for large rooms from 63s to 39s. - Add trace task for initial syncs. - Include the proxy version in UA strings. - HTTP errors now wait 1s before returning to stop clients tight-looping on error. - Pending event buffer is now 2000. - Index the room ID first to cull the most events when returning timeline entries. Speeds up `SelectLatestEventsBetween` by a factor of 8. - Remove cancelled `m.room_key_requests` from the to-device inbox. Cuts down the amount of events in the inbox by ~94% for very large (20k+) inboxes, ~50% for moderate sized (200 events) inboxes. Adds book-keeping to remember the unacked to-device position for each client.
2022-12-14 18:53:55 +00:00
go h2.StartV2Pollers()
2024-04-22 08:59:23 +01:00
go h2.Store.Cleaner(time.Hour)
if args[EnvOTLP] != "" {
h3 = otelhttp.NewHandler(h3, "Sync")
}
2023-04-04 16:07:27 +01:00
2023-04-05 18:40:23 +01:00
// Install the Sentry middleware, if configured.
if args[EnvSentryDsn] != "" {
sentryHandler := sentryhttp.New(sentryhttp.Options{
Repanic: true,
})
h3 = sentryHandler.Handle(h3)
}
syncv3.RunSyncV3Server(h3, args[EnvBindAddr], args[EnvServer], args[EnvTLSCert], args[EnvTLSKey])
WaitForShutdown(args[EnvSentryDsn] != "")
}
// WaitForShutdown blocks until the process receives a SIGINT or SIGTERM signal
// (see `man 7 signal`). It performs any last cleanup tasks and then exits.
func WaitForShutdown(sentryInUse bool) {
sigs := make(chan os.Signal, 1)
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)
select {
case <-sigs:
}
signal.Reset(syscall.SIGINT, syscall.SIGTERM)
fmt.Printf("Shutdown signal received...")
if sentryInUse {
fmt.Printf("Flushing sentry events...")
if !sentry.Flush(time.Second * 5) {
fmt.Printf("Failed to flush all Sentry events!")
}
}
fmt.Printf("Exiting now")
2021-05-14 16:49:33 +01:00
}
func executeMigrations() {
envArgs := map[string]string{
EnvDB: os.Getenv(EnvDB),
}
requiredEnvVars := []string{EnvDB}
for _, requiredEnvVar := range requiredEnvVars {
if envArgs[requiredEnvVar] == "" {
fmt.Print(helpMsg)
fmt.Printf("\n%s is not set", requiredEnvVar)
fmt.Printf("\n%s must be set\n", strings.Join(requiredEnvVars, ", "))
os.Exit(1)
}
}
flags.Parse(os.Args[1:])
args := flags.Args()
if len(args) < 2 {
flags.Usage()
return
}
command := args[1]
db, err := goose.OpenDBWithDriver("postgres", envArgs[EnvDB])
if err != nil {
log.Fatalf("goose: failed to open DB: %v\n", err)
}
defer func() {
if err := db.Close(); err != nil {
log.Fatalf("goose: failed to close DB: %v\n", err)
}
}()
arguments := []string{}
if len(args) > 2 {
arguments = append(arguments, args[2:]...)
}
goose.SetBaseFS(syncv3.EmbedMigrations)
if err := goose.Run(command, db, "state/migrations", arguments...); err != nil {
log.Fatalf("goose %v: %v", command, err)
}
}
2023-09-04 09:38:51 +02:00
const gitRevLen = 7 // 7 matches the displayed characters on github.com
func init() {
// Try to get the revision sliding-sync was build from.
// If we can't, e.g. sliding-sync wasn't built (go run) or no VCS version is present,
// we just use the provided version above.
info, ok := debug.ReadBuildInfo()
if !ok {
return
}
for _, setting := range info.Settings {
if setting.Key == "vcs.revision" {
revLen := len(setting.Value)
if revLen >= gitRevLen {
GitCommit = setting.Value[:gitRevLen]
} else {
2023-09-04 09:46:32 +02:00
GitCommit = setting.Value[:revLen]
2023-09-04 09:38:51 +02:00
}
break
}
}
}