2021-06-16 17:18:04 +01:00
package state
import (
2022-04-26 14:42:30 +01:00
"context"
2021-06-16 17:18:04 +01:00
"encoding/json"
2021-08-05 17:57:40 +01:00
"fmt"
2022-04-26 12:00:06 +01:00
"os"
2022-04-22 12:12:51 +01:00
"strings"
2024-04-22 08:55:05 +01:00
"time"
2021-06-16 17:18:04 +01:00
2023-11-06 12:33:30 +00:00
"golang.org/x/exp/slices"
2023-10-02 09:45:40 +01:00
"github.com/matrix-org/sliding-sync/sync2"
2023-05-11 18:19:04 +01:00
"github.com/getsentry/sentry-go"
2023-08-18 17:02:12 +02:00
"github.com/lib/pq"
2023-05-11 18:19:04 +01:00
2021-06-16 17:18:04 +01:00
"github.com/jmoiron/sqlx"
2022-12-15 11:08:50 +00:00
"github.com/matrix-org/sliding-sync/internal"
"github.com/matrix-org/sliding-sync/sqlutil"
2022-04-26 12:00:06 +01:00
"github.com/rs/zerolog"
2021-08-05 17:57:40 +01:00
"github.com/tidwall/gjson"
2021-06-16 17:18:04 +01:00
)
2022-04-26 12:00:06 +01:00
var logger = zerolog . New ( os . Stdout ) . With ( ) . Timestamp ( ) . Logger ( ) . Output ( zerolog . ConsoleWriter {
Out : os . Stderr ,
TimeFormat : "15:04:05" ,
} )
2022-02-22 17:38:18 +00:00
// Max number of parameters in a single SQL command
const MaxPostgresParameters = 65535
2022-12-14 18:53:55 +00:00
// StartupSnapshot represents a snapshot of startup data for the sliding sync HTTP API instances
type StartupSnapshot struct {
GlobalMetadata map [ string ] internal . RoomMetadata // room_id -> metadata
AllJoinedMembers map [ string ] [ ] string // room_id -> [user_id]
}
2023-06-13 10:02:52 +01:00
type LatestEvents struct {
Timeline [ ] json . RawMessage
PrevBatch string
LatestNID int64
}
2023-08-03 18:57:27 +01:00
// DiscardIgnoredMessages modifies the struct in-place, replacing the Timeline with
// a copy that has all ignored events omitted. The order of timelines is preserved.
func ( e * LatestEvents ) DiscardIgnoredMessages ( shouldIgnore func ( sender string ) bool ) {
// A little bit sad to be effectively doing a copy here---most of the time there
// won't be any messages to ignore (and the timeline is likely short). But that copy
// is unlikely to be a bottleneck.
newTimeline := make ( [ ] json . RawMessage , 0 , len ( e . Timeline ) )
for _ , ev := range e . Timeline {
parsed := gjson . ParseBytes ( ev )
if parsed . Get ( "state_key" ) . Exists ( ) || ! shouldIgnore ( parsed . Get ( "sender" ) . Str ) {
newTimeline = append ( newTimeline , ev )
}
}
e . Timeline = newTimeline
}
2021-06-16 17:18:04 +01:00
type Storage struct {
2023-06-08 18:03:36 +01:00
Accumulator * Accumulator
2022-12-14 18:53:55 +00:00
EventsTable * EventTable
ToDeviceTable * ToDeviceTable
UnreadTable * UnreadTable
AccountDataTable * AccountDataTable
InvitesTable * InvitesTable
TransactionsTable * TransactionsTable
DeviceDataTable * DeviceDataTable
ReceiptTable * ReceiptTable
DB * sqlx . DB
2024-02-21 13:22:20 +00:00
MaxTimelineLimit int
2024-04-22 08:55:05 +01:00
shutdownCh chan struct { }
shutdown bool
2021-06-16 17:18:04 +01:00
}
func NewStorage ( postgresURI string ) * Storage {
db , err := sqlx . Open ( "postgres" , postgresURI )
if err != nil {
2023-04-05 18:39:51 +01:00
sentry . CaptureException ( err )
2023-04-13 15:02:17 +01:00
// TODO: if we panic(), will sentry have a chance to flush the event?
logger . Panic ( ) . Err ( err ) . Str ( "uri" , postgresURI ) . Msg ( "failed to open SQL DB" )
2021-06-16 17:18:04 +01:00
}
2023-09-07 10:02:50 +02:00
return NewStorageWithDB ( db , false )
2023-07-12 17:36:59 +01:00
}
2023-09-07 10:02:50 +02:00
func NewStorageWithDB ( db * sqlx . DB , addPrometheusMetrics bool ) * Storage {
2021-06-16 17:18:04 +01:00
acc := & Accumulator {
2021-08-23 15:21:51 +01:00
db : db ,
roomsTable : NewRoomsTable ( db ) ,
eventsTable : NewEventTable ( db ) ,
snapshotTable : NewSnapshotsTable ( db ) ,
2022-07-29 15:19:20 +01:00
spacesTable : NewSpacesTable ( db ) ,
2023-10-10 17:06:10 +01:00
invitesTable : NewInvitesTable ( db ) ,
2021-08-23 15:21:51 +01:00
entityName : "server" ,
2021-06-16 17:18:04 +01:00
}
2023-09-07 10:02:50 +02:00
2021-06-16 17:18:04 +01:00
return & Storage {
2023-06-08 18:03:36 +01:00
Accumulator : acc ,
2022-12-14 18:53:55 +00:00
ToDeviceTable : NewToDeviceTable ( db ) ,
UnreadTable : NewUnreadTable ( db ) ,
EventsTable : acc . eventsTable ,
AccountDataTable : NewAccountDataTable ( db ) ,
2023-10-10 17:06:10 +01:00
InvitesTable : acc . invitesTable ,
2022-12-14 18:53:55 +00:00
TransactionsTable : NewTransactionsTable ( db ) ,
DeviceDataTable : NewDeviceDataTable ( db ) ,
ReceiptTable : NewReceiptTable ( db ) ,
DB : db ,
2024-02-21 13:22:20 +00:00
MaxTimelineLimit : 50 ,
2024-04-22 08:55:05 +01:00
shutdownCh : make ( chan struct { } ) ,
2021-06-16 17:18:04 +01:00
}
}
2021-06-16 18:56:31 +01:00
func ( s * Storage ) LatestEventNID ( ) ( int64 , error ) {
2023-06-08 18:03:36 +01:00
return s . Accumulator . eventsTable . SelectHighestNID ( )
2021-06-16 18:56:31 +01:00
}
2022-12-14 18:53:55 +00:00
func ( s * Storage ) AccountData ( userID , roomID string , eventTypes [ ] string ) ( data [ ] AccountData , err error ) {
2023-06-08 18:03:36 +01:00
err = sqlutil . WithTransaction ( s . Accumulator . db , func ( txn * sqlx . Tx ) error {
2022-12-14 18:53:55 +00:00
data , err = s . AccountDataTable . Select ( txn , userID , eventTypes , roomID )
2021-11-09 15:08:08 +00:00
return err
} )
return
}
2022-08-22 18:02:48 +01:00
func ( s * Storage ) RoomAccountDatasWithType ( userID , eventType string ) ( data [ ] AccountData , err error ) {
2023-06-08 18:03:36 +01:00
err = sqlutil . WithTransaction ( s . Accumulator . db , func ( txn * sqlx . Tx ) error {
2022-08-22 18:02:48 +01:00
data , err = s . AccountDataTable . SelectWithType ( txn , userID , eventType )
return err
} )
return
}
2022-03-24 19:38:55 +00:00
// Pull out all account data for this user. If roomIDs is empty, global account data is returned.
// If roomIDs is non-empty, all account data for these rooms are extracted.
func ( s * Storage ) AccountDatas ( userID string , roomIDs ... string ) ( datas [ ] AccountData , err error ) {
2023-06-08 18:03:36 +01:00
err = sqlutil . WithTransaction ( s . Accumulator . db , func ( txn * sqlx . Tx ) error {
2022-03-24 19:38:55 +00:00
datas , err = s . AccountDataTable . SelectMany ( txn , userID , roomIDs ... )
return err
} )
return
}
2021-11-09 15:08:08 +00:00
func ( s * Storage ) InsertAccountData ( userID , roomID string , events [ ] json . RawMessage ) ( data [ ] AccountData , err error ) {
data = make ( [ ] AccountData , len ( events ) )
for i := range events {
data [ i ] = AccountData {
UserID : userID ,
RoomID : roomID ,
Data : events [ i ] ,
Type : gjson . ParseBytes ( events [ i ] ) . Get ( "type" ) . Str ,
}
}
2023-06-08 18:03:36 +01:00
err = sqlutil . WithTransaction ( s . Accumulator . db , func ( txn * sqlx . Tx ) error {
2021-11-09 15:08:08 +00:00
data , err = s . AccountDataTable . Insert ( txn , data )
return err
} )
return data , err
}
perf: improve startup speeds by using temp tables
When the proxy is run with large DBs (10m+ events), the
startup queries are very slow (around 30min to load the initial snapshot.
After much EXPLAIN ANALYZEing, the cause is due to Postgres' query planner
not making good decisions when the the tables are that large. Specifically,
the startup queries need to pull all joined members in all rooms, which
ends up being nearly 50% of the entire events table of 10m rows. When this
query is embedded in a subselect, the query planner assumes that the subselect
will return only a few rows, and decides to pull those rows via an index. In this
particular case, indexes are the wrong choice, as there are SO MANY rows a Seq Scan
is often more appropriate. By using an index (which is a btree), this ends up doing
log(n) operations _per row_ or `O(0.5 * n * log(n))` assuming we pull 50% of the
table of n rows. As n increases, this is increasingly the wrong call over a basic
O(n) seq scan. When n=10m, a seq scan has a cost of 10m, but using indexes has a
cost of 16.6m. By dumping the result of the subselect to a temporary table, this
allows the query planner to notice that using an index is the wrong thing to do,
resulting in better performance. On large DBs, this decreases the startup time
from 30m to ~5m.
2023-05-18 16:45:02 +01:00
// Prepare a snapshot of the database for calling snapshot functions.
func ( s * Storage ) PrepareSnapshot ( txn * sqlx . Tx ) ( tableName string , err error ) {
// create a temporary table with all the membership nids for the current snapshots for all rooms.
// A temporary table will be deleted when the postgres session ends (this process quits).
// We insert these into a temporary table to let the query planner make better decisions. In practice,
// if we instead nest this SELECT as a subselect, we see very poor query times for large tables as
// each event NID is queried using a btree index, rather than doing a seq scan as this query will pull
// out ~50% of the rows in syncv3_events.
tempTableName := "temp_snapshot"
_ , err = txn . Exec (
` SELECT UNNEST(membership_events) AS membership_nid INTO TEMP ` + tempTableName + ` FROM syncv3_snapshots
JOIN syncv3_rooms ON syncv3_snapshots . snapshot_id = syncv3_rooms . current_snapshot_id ` ,
)
return tempTableName , err
}
2022-12-14 18:53:55 +00:00
// GlobalSnapshot snapshots the entire database for the purposes of initialising
2023-01-03 14:28:34 +00:00
// a sliding sync instance. It will atomically grab metadata for all rooms and all joined members
// in a single transaction.
2022-12-14 18:53:55 +00:00
func ( s * Storage ) GlobalSnapshot ( ) ( ss StartupSnapshot , err error ) {
2023-06-08 18:03:36 +01:00
err = sqlutil . WithTransaction ( s . Accumulator . db , func ( txn * sqlx . Tx ) error {
perf: improve startup speeds by using temp tables
When the proxy is run with large DBs (10m+ events), the
startup queries are very slow (around 30min to load the initial snapshot.
After much EXPLAIN ANALYZEing, the cause is due to Postgres' query planner
not making good decisions when the the tables are that large. Specifically,
the startup queries need to pull all joined members in all rooms, which
ends up being nearly 50% of the entire events table of 10m rows. When this
query is embedded in a subselect, the query planner assumes that the subselect
will return only a few rows, and decides to pull those rows via an index. In this
particular case, indexes are the wrong choice, as there are SO MANY rows a Seq Scan
is often more appropriate. By using an index (which is a btree), this ends up doing
log(n) operations _per row_ or `O(0.5 * n * log(n))` assuming we pull 50% of the
table of n rows. As n increases, this is increasingly the wrong call over a basic
O(n) seq scan. When n=10m, a seq scan has a cost of 10m, but using indexes has a
cost of 16.6m. By dumping the result of the subselect to a temporary table, this
allows the query planner to notice that using an index is the wrong thing to do,
resulting in better performance. On large DBs, this decreases the startup time
from 30m to ~5m.
2023-05-18 16:45:02 +01:00
tempTableName , err := s . PrepareSnapshot ( txn )
if err != nil {
2023-06-06 11:59:44 +01:00
err = fmt . Errorf ( "GlobalSnapshot: failed to call PrepareSnapshot: %w" , err )
sentry . CaptureException ( err )
perf: improve startup speeds by using temp tables
When the proxy is run with large DBs (10m+ events), the
startup queries are very slow (around 30min to load the initial snapshot.
After much EXPLAIN ANALYZEing, the cause is due to Postgres' query planner
not making good decisions when the the tables are that large. Specifically,
the startup queries need to pull all joined members in all rooms, which
ends up being nearly 50% of the entire events table of 10m rows. When this
query is embedded in a subselect, the query planner assumes that the subselect
will return only a few rows, and decides to pull those rows via an index. In this
particular case, indexes are the wrong choice, as there are SO MANY rows a Seq Scan
is often more appropriate. By using an index (which is a btree), this ends up doing
log(n) operations _per row_ or `O(0.5 * n * log(n))` assuming we pull 50% of the
table of n rows. As n increases, this is increasingly the wrong call over a basic
O(n) seq scan. When n=10m, a seq scan has a cost of 10m, but using indexes has a
cost of 16.6m. By dumping the result of the subselect to a temporary table, this
allows the query planner to notice that using an index is the wrong thing to do,
resulting in better performance. On large DBs, this decreases the startup time
from 30m to ~5m.
2023-05-18 16:45:02 +01:00
return err
}
2023-01-03 17:13:27 +00:00
var metadata map [ string ] internal . RoomMetadata
perf: improve startup speeds by using temp tables
When the proxy is run with large DBs (10m+ events), the
startup queries are very slow (around 30min to load the initial snapshot.
After much EXPLAIN ANALYZEing, the cause is due to Postgres' query planner
not making good decisions when the the tables are that large. Specifically,
the startup queries need to pull all joined members in all rooms, which
ends up being nearly 50% of the entire events table of 10m rows. When this
query is embedded in a subselect, the query planner assumes that the subselect
will return only a few rows, and decides to pull those rows via an index. In this
particular case, indexes are the wrong choice, as there are SO MANY rows a Seq Scan
is often more appropriate. By using an index (which is a btree), this ends up doing
log(n) operations _per row_ or `O(0.5 * n * log(n))` assuming we pull 50% of the
table of n rows. As n increases, this is increasingly the wrong call over a basic
O(n) seq scan. When n=10m, a seq scan has a cost of 10m, but using indexes has a
cost of 16.6m. By dumping the result of the subselect to a temporary table, this
allows the query planner to notice that using an index is the wrong thing to do,
resulting in better performance. On large DBs, this decreases the startup time
from 30m to ~5m.
2023-05-18 16:45:02 +01:00
ss . AllJoinedMembers , metadata , err = s . AllJoinedMembers ( txn , tempTableName )
2022-12-14 18:53:55 +00:00
if err != nil {
2023-06-06 11:59:44 +01:00
err = fmt . Errorf ( "GlobalSnapshot: failed to call AllJoinedMembers: %w" , err )
sentry . CaptureException ( err )
2022-12-14 18:53:55 +00:00
return err
}
perf: improve startup speeds by using temp tables
When the proxy is run with large DBs (10m+ events), the
startup queries are very slow (around 30min to load the initial snapshot.
After much EXPLAIN ANALYZEing, the cause is due to Postgres' query planner
not making good decisions when the the tables are that large. Specifically,
the startup queries need to pull all joined members in all rooms, which
ends up being nearly 50% of the entire events table of 10m rows. When this
query is embedded in a subselect, the query planner assumes that the subselect
will return only a few rows, and decides to pull those rows via an index. In this
particular case, indexes are the wrong choice, as there are SO MANY rows a Seq Scan
is often more appropriate. By using an index (which is a btree), this ends up doing
log(n) operations _per row_ or `O(0.5 * n * log(n))` assuming we pull 50% of the
table of n rows. As n increases, this is increasingly the wrong call over a basic
O(n) seq scan. When n=10m, a seq scan has a cost of 10m, but using indexes has a
cost of 16.6m. By dumping the result of the subselect to a temporary table, this
allows the query planner to notice that using an index is the wrong thing to do,
resulting in better performance. On large DBs, this decreases the startup time
from 30m to ~5m.
2023-05-18 16:45:02 +01:00
err = s . MetadataForAllRooms ( txn , tempTableName , metadata )
2022-12-14 18:53:55 +00:00
if err != nil {
2023-06-06 11:59:44 +01:00
err = fmt . Errorf ( "GlobalSnapshot: failed to call MetadataForAllRooms: %w" , err )
sentry . CaptureException ( err )
2022-12-14 18:53:55 +00:00
return err
}
2023-01-03 14:43:31 +00:00
ss . GlobalMetadata = metadata
2022-12-14 18:53:55 +00:00
return err
} )
return
}
perf: improve startup speeds by using temp tables
When the proxy is run with large DBs (10m+ events), the
startup queries are very slow (around 30min to load the initial snapshot.
After much EXPLAIN ANALYZEing, the cause is due to Postgres' query planner
not making good decisions when the the tables are that large. Specifically,
the startup queries need to pull all joined members in all rooms, which
ends up being nearly 50% of the entire events table of 10m rows. When this
query is embedded in a subselect, the query planner assumes that the subselect
will return only a few rows, and decides to pull those rows via an index. In this
particular case, indexes are the wrong choice, as there are SO MANY rows a Seq Scan
is often more appropriate. By using an index (which is a btree), this ends up doing
log(n) operations _per row_ or `O(0.5 * n * log(n))` assuming we pull 50% of the
table of n rows. As n increases, this is increasingly the wrong call over a basic
O(n) seq scan. When n=10m, a seq scan has a cost of 10m, but using indexes has a
cost of 16.6m. By dumping the result of the subselect to a temporary table, this
allows the query planner to notice that using an index is the wrong thing to do,
resulting in better performance. On large DBs, this decreases the startup time
from 30m to ~5m.
2023-05-18 16:45:02 +01:00
// Extract hero info for all rooms. Requires a prepared snapshot in order to be called.
func ( s * Storage ) MetadataForAllRooms ( txn * sqlx . Tx , tempTableName string , result map [ string ] internal . RoomMetadata ) error {
2023-07-14 10:32:53 +01:00
loadMetadata := func ( roomID string ) internal . RoomMetadata {
metadata , ok := result [ roomID ]
if ! ok {
metadata = * internal . NewRoomMetadata ( roomID )
}
return metadata
}
2021-10-27 18:16:43 +01:00
// work out latest timestamps
2023-06-08 18:03:36 +01:00
events , err := s . Accumulator . eventsTable . selectLatestEventByTypeInAllRooms ( txn )
2021-10-27 18:16:43 +01:00
if err != nil {
2023-01-03 14:43:31 +00:00
return err
2021-10-27 18:16:43 +01:00
}
for _ , ev := range events {
2023-07-14 10:32:53 +01:00
metadata := loadMetadata ( ev . RoomID )
2023-06-06 11:18:02 +01:00
// For a given room, we'll see many events (one for each event type in the
// room's state). We need to pick the largest of these events' timestamps here.
ts := gjson . ParseBytes ( ev . JSON ) . Get ( "origin_server_ts" ) . Uint ( )
if ts > metadata . LastMessageTimestamp {
metadata . LastMessageTimestamp = ts
}
2023-06-01 16:02:57 +01:00
parsed := gjson . ParseBytes ( ev . JSON )
eventMetadata := internal . EventMetadata {
NID : ev . NID ,
Timestamp : parsed . Get ( "origin_server_ts" ) . Uint ( ) ,
}
metadata . LatestEventsByType [ parsed . Get ( "type" ) . Str ] = eventMetadata
2022-02-24 14:14:59 +00:00
// it's possible the latest event is a brand new room not caught by the first SELECT for joined
2023-05-11 18:19:04 +01:00
// rooms e.g when you're invited to a room so we need to make sure to set the metadata again here
2023-06-06 16:23:59 +01:00
// TODO: is the comment above now that we explicitly call NewRoomMetadata above
2023-06-06 13:11:22 +01:00
// when handling invites?
2022-02-24 14:14:59 +00:00
metadata . RoomID = ev . RoomID
2021-10-27 18:16:43 +01:00
result [ ev . RoomID ] = metadata
}
// Select the name / canonical alias for all rooms
2023-01-12 17:11:09 +00:00
roomIDToStateEvents , err := s . currentNotMembershipStateEventsInAllRooms ( txn , [ ] string {
2023-07-18 10:56:44 +01:00
"m.room.name" , "m.room.canonical_alias" , "m.room.avatar" ,
2021-10-27 18:16:43 +01:00
} )
if err != nil {
2023-01-03 14:43:31 +00:00
return fmt . Errorf ( "failed to load state events for all rooms: %s" , err )
2021-10-27 18:16:43 +01:00
}
for roomID , stateEvents := range roomIDToStateEvents {
2023-07-14 10:32:53 +01:00
metadata := loadMetadata ( roomID )
2021-10-27 18:16:43 +01:00
for _ , ev := range stateEvents {
if ev . Type == "m.room.name" && ev . StateKey == "" {
metadata . NameEvent = gjson . ParseBytes ( ev . JSON ) . Get ( "content.name" ) . Str
} else if ev . Type == "m.room.canonical_alias" && ev . StateKey == "" {
metadata . CanonicalAlias = gjson . ParseBytes ( ev . JSON ) . Get ( "content.alias" ) . Str
2023-07-18 10:56:44 +01:00
} else if ev . Type == "m.room.avatar" && ev . StateKey == "" {
metadata . AvatarEvent = gjson . ParseBytes ( ev . JSON ) . Get ( "content.url" ) . Str
2021-10-27 18:16:43 +01:00
}
}
result [ roomID ] = metadata
}
2023-06-08 18:03:36 +01:00
roomInfos , err := s . Accumulator . roomsTable . SelectRoomInfos ( txn )
2021-11-04 16:23:44 +00:00
if err != nil {
2023-01-03 14:43:31 +00:00
return fmt . Errorf ( "failed to select room infos: %s" , err )
2021-11-04 16:23:44 +00:00
}
2022-07-29 15:19:20 +01:00
var spaceRoomIDs [ ] string
2022-07-27 11:25:40 +01:00
for _ , info := range roomInfos {
2023-07-14 10:32:53 +01:00
metadata := loadMetadata ( info . ID )
2022-07-27 11:25:40 +01:00
metadata . Encrypted = info . IsEncrypted
2022-09-07 17:44:04 +01:00
metadata . UpgradedRoomID = info . UpgradedRoomID
2022-12-14 18:53:55 +00:00
metadata . PredecessorRoomID = info . PredecessorRoomID
2022-07-27 11:25:40 +01:00
metadata . RoomType = info . Type
result [ info . ID ] = metadata
2022-07-29 15:19:20 +01:00
if metadata . IsSpace ( ) {
spaceRoomIDs = append ( spaceRoomIDs , info . ID )
}
}
// select space children
2023-06-08 18:03:36 +01:00
spaceRoomToRelations , err := s . Accumulator . spacesTable . SelectChildren ( txn , spaceRoomIDs )
2022-07-29 15:19:20 +01:00
if err != nil {
2023-01-03 14:43:31 +00:00
return fmt . Errorf ( "failed to select space children: %s" , err )
2022-07-29 15:19:20 +01:00
}
for roomID , relations := range spaceRoomToRelations {
2023-07-14 10:37:25 +01:00
if _ , exists := result [ roomID ] ; ! exists {
// this can happen when you join a space (so it populates the spaces table) then leave the space,
// so there are no joined members in the space so result doesn't include the room. In this case,
// we don't want to have a stub metadata with just the space children, so skip it.
continue
}
2023-07-14 10:32:53 +01:00
metadata := loadMetadata ( roomID )
2022-07-29 15:19:20 +01:00
metadata . ChildSpaceRooms = make ( map [ string ] struct { } , len ( relations ) )
for _ , r := range relations {
// For now we only honour child state events, but we store all the mappings just in case.
if r . Relation == RelationMSpaceChild {
metadata . ChildSpaceRooms [ r . Child ] = struct { } { }
}
}
2022-07-29 16:04:12 +01:00
result [ roomID ] = metadata
2021-11-04 16:23:44 +00:00
}
2023-01-03 14:43:31 +00:00
return nil
2021-10-26 18:22:27 +01:00
}
2023-09-08 18:17:24 +01:00
// ResetMetadataState updates the given metadata in-place to reflect the current state
2023-09-14 11:08:28 +01:00
// of the room. This is only safe to call from the subscriber goroutine; it is not safe
// to call from the connection goroutines.
// TODO: could have this create a new RoomMetadata and get the caller to assign it.
2023-09-08 18:17:24 +01:00
func ( s * Storage ) ResetMetadataState ( metadata * internal . RoomMetadata ) error {
var events [ ] Event
err := s . DB . Select ( & events , `
WITH snapshot ( events , membership_events ) AS (
SELECT events , membership_events
FROM syncv3_snapshots
JOIN syncv3_rooms ON snapshot_id = current_snapshot_id
WHERE syncv3_rooms . room_id = $ 1
)
SELECT event_id , event_type , state_key , event , membership
FROM syncv3_events JOIN snapshot ON (
event_nid = ANY ( ARRAY_CAT ( events , membership_events ) )
)
2023-11-14 13:56:06 +00:00
WHERE ( event_type IN ( ' m . room . name ' , ' m . room . avatar ' , ' m . room . canonical_alias ' , ' m . room . encryption ' ) AND state_key = ' ' )
2023-09-08 18:17:24 +01:00
OR ( event_type = ' m . room . member ' AND membership IN ( ' join ' , ' _join ' , ' invite ' , ' _invite ' ) )
ORDER BY event_nid ASC
; ` , metadata . RoomID )
if err != nil {
return fmt . Errorf ( "ResetMetadataState[%s]: %w" , metadata . RoomID , err )
}
heroMemberships := circularSlice [ * Event ] { max : 6 }
metadata . JoinCount = 0
metadata . InviteCount = 0
metadata . ChildSpaceRooms = make ( map [ string ] struct { } )
2023-09-12 12:19:42 +01:00
for i , ev := range events {
2023-09-08 18:17:24 +01:00
switch ev . Type {
case "m.room.name" :
metadata . NameEvent = gjson . GetBytes ( ev . JSON , "content.name" ) . Str
case "m.room.avatar" :
2023-11-14 13:56:06 +00:00
metadata . AvatarEvent = gjson . GetBytes ( ev . JSON , "content.url" ) . Str
2023-09-08 18:17:24 +01:00
case "m.room.canonical_alias" :
metadata . CanonicalAlias = gjson . GetBytes ( ev . JSON , "content.alias" ) . Str
2023-11-14 13:56:06 +00:00
case "m.room.encryption" :
metadata . Encrypted = true
2023-09-08 18:17:24 +01:00
case "m.room.member" :
2023-09-12 12:19:42 +01:00
heroMemberships . append ( & events [ i ] )
2023-09-08 18:17:24 +01:00
switch ev . Membership {
case "join" :
fallthrough
case "_join" :
metadata . JoinCount ++
case "invite" :
fallthrough
case "_invite" :
metadata . InviteCount ++
}
case "m.space.child" :
metadata . ChildSpaceRooms [ ev . StateKey ] = struct { } { }
}
}
metadata . Heroes = make ( [ ] internal . Hero , 0 , len ( heroMemberships . vals ) )
for _ , ev := range heroMemberships . vals {
parsed := gjson . ParseBytes ( ev . JSON )
hero := internal . Hero {
ID : ev . StateKey ,
Name : parsed . Get ( "content.displayname" ) . Str ,
Avatar : parsed . Get ( "content.avatar_url" ) . Str ,
}
metadata . Heroes = append ( metadata . Heroes , hero )
}
2023-11-14 13:56:06 +00:00
// For now, don't bother reloading PredecessorID and UpgradedRoomID.
2023-09-08 18:17:24 +01:00
// These shouldn't be changing during a room's lifetime in normal operation.
2023-11-03 15:45:28 +00:00
// We haven't updated LatestEventsByType because that's not part of the timeline.
2023-09-08 18:17:24 +01:00
return nil
}
2023-11-02 15:47:17 +00:00
// FetchMemberships looks up the latest snapshot for the given room and determines the
// latest membership events in the room. Returns
// - the list of joined members,
// - the list of invited members, and then
// - the list of all other memberships. (This is called "leaves", but includes bans. It
// also includes knocks, but the proxy doesn't support those.)
//
// Each lists' members are arranged in no particular order.
//
// TODO: there is a very similar query in ResetMetadataState which also selects events
// events row for memberships. It is a shame to have to do this twice---can we query
// once and pass the data around?
func ( s * Storage ) FetchMemberships ( roomID string ) ( joins , invites , leaves [ ] string , err error ) {
var events [ ] Event
err = s . DB . Select ( & events , `
WITH snapshot ( membership_nids ) AS (
SELECT membership_events
FROM syncv3_snapshots
JOIN syncv3_rooms ON snapshot_id = current_snapshot_id
WHERE syncv3_rooms . room_id = $ 1
)
SELECT state_key , membership
FROM syncv3_events JOIN snapshot ON (
event_nid = ANY ( membership_nids )
)
` , roomID )
if err != nil {
return nil , nil , nil , err
}
joins = make ( [ ] string , 0 , len ( events ) )
invites = make ( [ ] string , 0 , len ( events ) )
leaves = make ( [ ] string , 0 , len ( events ) )
for _ , e := range events {
switch e . Membership {
case "_join" :
fallthrough
case "join" :
joins = append ( joins , e . StateKey )
case "_invite" :
fallthrough
case "invite" :
invites = append ( invites , e . StateKey )
default :
leaves = append ( leaves , e . StateKey )
}
}
return
}
2023-01-12 17:11:09 +00:00
// Returns all current NOT MEMBERSHIP state events matching the event types given in all rooms. Returns a map of
2021-09-30 13:30:49 +01:00
// room ID to events in that room.
2023-01-12 17:11:09 +00:00
func ( s * Storage ) currentNotMembershipStateEventsInAllRooms ( txn * sqlx . Tx , eventTypes [ ] string ) ( map [ string ] [ ] Event , error ) {
2021-09-30 13:30:49 +01:00
query , args , err := sqlx . In (
` SELECT syncv3_events . room_id , syncv3_events . event_type , syncv3_events . state_key , syncv3_events . event FROM syncv3_events
WHERE syncv3_events . event_type IN ( ? )
AND syncv3_events . event_nid IN (
2023-09-19 12:41:25 +01:00
SELECT UNNEST ( events ) FROM syncv3_snapshots WHERE syncv3_snapshots . snapshot_id IN ( SELECT current_snapshot_id FROM syncv3_rooms )
2021-09-30 13:30:49 +01:00
) ` ,
2021-10-07 13:59:53 +01:00
eventTypes ,
2021-09-30 13:30:49 +01:00
)
if err != nil {
return nil , err
}
2022-12-14 18:53:55 +00:00
rows , err := txn . Query ( txn . Rebind ( query ) , args ... )
2021-09-30 13:30:49 +01:00
if err != nil {
return nil , err
}
defer rows . Close ( )
result := make ( map [ string ] [ ] Event )
for rows . Next ( ) {
var ev Event
if err := rows . Scan ( & ev . RoomID , & ev . Type , & ev . StateKey , & ev . JSON ) ; err != nil {
return nil , err
}
result [ ev . RoomID ] = append ( result [ ev . RoomID ] , ev )
}
return result , nil
}
2023-09-19 12:41:25 +01:00
func ( s * Storage ) Accumulate ( userID , roomID string , timeline sync2 . TimelineResponse ) ( result AccumulateResult , err error ) {
2023-09-13 12:46:57 +01:00
if len ( timeline . Events ) == 0 {
2023-09-07 19:21:44 +01:00
return AccumulateResult { } , nil
2023-06-08 13:54:46 +01:00
}
2023-06-08 18:03:36 +01:00
err = sqlutil . WithTransaction ( s . Accumulator . db , func ( txn * sqlx . Tx ) error {
2023-09-19 12:40:13 +01:00
result , err = s . Accumulator . Accumulate ( txn , userID , roomID , timeline )
2023-06-08 13:54:46 +01:00
return err
} )
2023-09-07 19:21:44 +01:00
return result , err
2021-06-16 17:18:04 +01:00
}
2023-04-17 18:21:58 +01:00
func ( s * Storage ) Initialise ( roomID string , state [ ] json . RawMessage ) ( InitialiseResult , error ) {
2023-06-08 18:03:36 +01:00
return s . Accumulator . Initialise ( roomID , state )
2021-06-16 17:18:04 +01:00
}
2023-06-10 12:12:09 +01:00
// EventNIDs fetches the raw JSON form of events given a slice of eventNIDs. The events
// are returned in ascending NID order; the order of eventNIDs is ignored.
2022-12-14 18:53:55 +00:00
func ( s * Storage ) EventNIDs ( eventNIDs [ ] int64 ) ( [ ] json . RawMessage , error ) {
2023-06-10 12:12:09 +01:00
// TODO: this selects a bunch of rows from the DB, but we only use the raw JSON
// itself.
2022-12-14 18:53:55 +00:00
events , err := s . EventsTable . SelectByNIDs ( nil , true , eventNIDs )
if err != nil {
return nil , err
}
e := make ( [ ] json . RawMessage , len ( events ) )
for i := range events {
e [ i ] = events [ i ] . JSON
}
return e , nil
}
func ( s * Storage ) StateSnapshot ( snapID int64 ) ( state [ ] json . RawMessage , err error ) {
2023-06-08 18:03:36 +01:00
err = sqlutil . WithTransaction ( s . Accumulator . db , func ( txn * sqlx . Tx ) error {
snapshotRow , err := s . Accumulator . snapshotTable . Select ( txn , snapID )
2022-12-14 18:53:55 +00:00
if err != nil {
return err
}
2023-06-08 18:03:36 +01:00
events , err := s . Accumulator . eventsTable . SelectByNIDs ( txn , true , append ( snapshotRow . MembershipEvents , snapshotRow . OtherEvents ... ) )
2022-12-14 18:53:55 +00:00
if err != nil {
return fmt . Errorf ( "failed to select state snapshot %v: %s" , snapID , err )
}
state = make ( [ ] json . RawMessage , len ( events ) )
for i := range events {
state [ i ] = events [ i ] . JSON
}
return nil
} )
return
}
2022-04-26 12:00:06 +01:00
// Look up room state after the given event position and no further. eventTypesToStateKeys is a map of event type to a list of state keys for that event type.
2022-04-22 12:12:51 +01:00
// If the list of state keys is empty then all events matching that event type will be returned. If the map is empty entirely, then all room state
// will be returned.
2022-04-26 14:42:30 +01:00
func ( s * Storage ) RoomStateAfterEventPosition ( ctx context . Context , roomIDs [ ] string , pos int64 , eventTypesToStateKeys map [ string ] [ ] string ) ( roomToEvents map [ string ] [ ] Event , err error ) {
2023-02-20 14:57:49 +00:00
_ , span := internal . StartSpan ( ctx , "RoomStateAfterEventPosition" )
defer span . End ( )
2022-04-25 17:12:00 +01:00
roomToEvents = make ( map [ string ] [ ] Event , len ( roomIDs ) )
roomIndex := make ( map [ string ] int , len ( roomIDs ) )
2023-06-08 18:03:36 +01:00
err = sqlutil . WithTransaction ( s . Accumulator . db , func ( txn * sqlx . Tx ) error {
2022-04-26 12:00:06 +01:00
// we have 2 ways to pull the latest events:
// - superfast rooms table (which races as it can be updated before the new state hits the dispatcher)
// - slower events table query
// we will try to fulfill as many rooms as possible with the rooms table, only using the slower events table
// query if we can prove we have races. We can prove this because the latest NIDs will be > pos, meaning the
// database state is ahead of the in-memory state (which is normal as we update the DB first). This should
// happen infrequently though, so we will warn about this behaviour.
2023-06-08 18:03:36 +01:00
roomToLatestNIDs , err := s . Accumulator . roomsTable . LatestNIDs ( txn , roomIDs )
2022-04-26 12:00:06 +01:00
if err != nil {
return err
}
fastNIDs := make ( [ ] int64 , 0 , len ( roomToLatestNIDs ) )
var slowRooms [ ] string
for roomID , latestNID := range roomToLatestNIDs {
if latestNID > pos {
slowRooms = append ( slowRooms , roomID )
} else {
fastNIDs = append ( fastNIDs , latestNID )
}
}
2023-06-08 18:03:36 +01:00
latestEvents , err := s . Accumulator . eventsTable . SelectByNIDs ( txn , true , fastNIDs )
2022-04-25 20:35:27 +01:00
if err != nil {
2022-07-12 14:53:59 +01:00
return fmt . Errorf ( "failed to select latest nids in rooms %v: %s" , roomIDs , err )
2022-04-25 20:35:27 +01:00
}
2022-04-26 12:00:06 +01:00
if len ( slowRooms ) > 0 {
logger . Warn ( ) . Int ( "slow_rooms" , len ( slowRooms ) ) . Msg ( "RoomStateAfterEventPosition: pos value provided is far behind the database copy, performance degraded" )
2023-06-08 18:03:36 +01:00
latestSlowEvents , err := s . Accumulator . eventsTable . LatestEventInRooms ( txn , slowRooms , pos )
2022-04-26 12:00:06 +01:00
if err != nil {
return err
}
latestEvents = append ( latestEvents , latestSlowEvents ... )
}
2022-04-25 20:35:27 +01:00
for i , ev := range latestEvents {
roomIndex [ ev . RoomID ] = i
if ev . BeforeStateSnapshotID == 0 {
2022-04-25 17:12:00 +01:00
// if there is no before snapshot then this last event NID is _part of_ the initial state,
// ergo the state after this == the current state and we can safely ignore the lastEventNID
2022-04-25 20:35:27 +01:00
ev . BeforeStateSnapshotID = 0
2023-06-08 18:03:36 +01:00
ev . BeforeStateSnapshotID , err = s . Accumulator . roomsTable . CurrentAfterSnapshotID ( txn , ev . RoomID )
2022-04-25 17:12:00 +01:00
if err != nil {
return err
}
2022-04-25 20:35:27 +01:00
latestEvents [ i ] = ev
2021-08-18 18:21:40 +01:00
}
}
2021-10-07 13:59:53 +01:00
2022-04-22 12:12:51 +01:00
if len ( eventTypesToStateKeys ) == 0 {
2022-04-25 20:35:27 +01:00
for _ , ev := range latestEvents {
2023-06-08 18:03:36 +01:00
snapshotRow , err := s . Accumulator . snapshotTable . Select ( txn , ev . BeforeStateSnapshotID )
2022-04-25 17:12:00 +01:00
if err != nil {
return err
}
2023-01-12 17:11:09 +00:00
allStateEventNIDs := append ( snapshotRow . MembershipEvents , snapshotRow . OtherEvents ... )
2022-04-25 17:12:00 +01:00
// we need to roll forward if this event is state
2022-04-25 20:35:27 +01:00
if gjson . ParseBytes ( ev . JSON ) . Get ( "state_key" ) . Exists ( ) {
if ev . ReplacesNID != 0 {
2022-04-25 17:12:00 +01:00
// we determined at insert time of this event that this event replaces a nid in the snapshot.
// find it and replace it
2023-01-12 17:11:09 +00:00
for j := range allStateEventNIDs {
if allStateEventNIDs [ j ] == ev . ReplacesNID {
allStateEventNIDs [ j ] = ev . NID
2022-04-25 17:12:00 +01:00
break
}
2021-10-07 13:59:53 +01:00
}
2022-04-25 17:12:00 +01:00
} else {
2022-07-12 14:53:59 +01:00
// the event is still state, but it doesn't replace anything, so just add it onto the snapshot,
// but only if we haven't already
alreadyExists := false
2023-01-12 17:11:09 +00:00
for _ , nid := range allStateEventNIDs {
2022-07-12 14:53:59 +01:00
if nid == ev . NID {
alreadyExists = true
break
}
}
if ! alreadyExists {
2023-01-12 17:11:09 +00:00
allStateEventNIDs = append ( allStateEventNIDs , ev . NID )
2022-07-12 14:53:59 +01:00
}
2021-08-18 18:21:40 +01:00
}
}
2023-06-08 18:03:36 +01:00
events , err := s . Accumulator . eventsTable . SelectByNIDs ( txn , true , allStateEventNIDs )
2022-04-25 17:12:00 +01:00
if err != nil {
2022-07-12 14:53:59 +01:00
return fmt . Errorf ( "failed to select state snapshot %v for room %v: %s" , ev . BeforeStateSnapshotID , ev . RoomID , err )
2022-04-25 17:12:00 +01:00
}
2022-04-25 20:35:27 +01:00
roomToEvents [ ev . RoomID ] = events
2021-10-07 13:59:53 +01:00
}
} else {
2022-04-22 12:12:51 +01:00
// do an optimised query to pull out only the event types and state keys we care about.
var args [ ] interface { } // event type, state key, event type, state key, ....
2023-08-18 17:02:12 +02:00
snapIDs := make ( [ ] int64 , len ( latestEvents ) )
for i := range latestEvents {
snapIDs [ i ] = latestEvents [ i ] . BeforeStateSnapshotID
}
args = append ( args , pq . Int64Array ( snapIDs ) )
2022-04-22 12:12:51 +01:00
var wheres [ ] string
2023-01-12 17:11:09 +00:00
hasMembershipFilter := false
hasOtherFilter := false
2022-04-22 12:12:51 +01:00
for evType , skeys := range eventTypesToStateKeys {
2023-01-12 17:11:09 +00:00
if evType == "m.room.member" {
hasMembershipFilter = true
} else {
hasOtherFilter = true
}
2022-04-22 12:12:51 +01:00
for _ , skey := range skeys {
args = append ( args , evType , skey )
wheres = append ( wheres , "(syncv3_events.event_type = ? AND syncv3_events.state_key = ?)" )
}
if len ( skeys ) == 0 {
args = append ( args , evType )
wheres = append ( wheres , "syncv3_events.event_type = ?" )
}
}
2023-01-12 17:11:09 +00:00
// figure out which state events to look at - if there is no m.room.member filter we can be super fast
2023-08-18 17:02:12 +02:00
nidcols := "array_cat(events, membership_events)"
2023-01-12 17:11:09 +00:00
if hasMembershipFilter && ! hasOtherFilter {
2023-08-18 17:02:12 +02:00
nidcols = "membership_events"
2023-01-12 17:11:09 +00:00
} else if ! hasMembershipFilter && hasOtherFilter {
2023-08-18 17:02:12 +02:00
nidcols = "events"
2023-01-12 17:11:09 +00:00
}
// it's not possible for there to be no membership filter and no other filter, we wouldn't be executing this code
// it is possible to have both, so neither if will execute.
2021-10-07 13:59:53 +01:00
// Similar to CurrentStateEventsInAllRooms
2023-08-18 17:02:12 +02:00
// We're using a CTE here, since unnestting the nids is quite expensive. Using the array as is
// and using ANY() instead performs quite well (e.g. 86k membership events and 130ms execution time, vs
// the previous query with unnest took 2.5s)
2021-10-07 13:59:53 +01:00
query , args , err := sqlx . In (
2023-08-18 17:02:12 +02:00
`
WITH nids AS (
SELECT ` +nidcols+ ` AS allNids FROM syncv3_snapshots WHERE syncv3_snapshots . snapshot_id = ANY ( ? )
)
SELECT syncv3_events . event_nid , syncv3_events . room_id , syncv3_events . event_type , syncv3_events . state_key , syncv3_events . event
FROM syncv3_events , nids
WHERE ( ` +strings.Join(wheres, " OR ")+ ` ) AND syncv3_events . event_nid = ANY ( nids . allNids )
ORDER BY syncv3_events . event_nid ASC ` ,
2022-04-22 12:12:51 +01:00
args ... ,
2021-10-07 13:59:53 +01:00
)
if err != nil {
return fmt . Errorf ( "failed to form sql query: %s" , err )
}
2023-06-19 18:30:31 +01:00
rows , err := txn . Query ( txn . Rebind ( query ) , args ... )
2021-10-07 13:59:53 +01:00
if err != nil {
return fmt . Errorf ( "failed to execute query: %s" , err )
}
defer rows . Close ( )
for rows . Next ( ) {
var ev Event
2022-04-25 17:12:00 +01:00
if err := rows . Scan ( & ev . NID , & ev . RoomID , & ev . Type , & ev . StateKey , & ev . JSON ) ; err != nil {
2021-10-07 13:59:53 +01:00
return err
}
2022-04-25 17:12:00 +01:00
i := roomIndex [ ev . RoomID ]
2022-04-25 20:35:27 +01:00
if latestEvents [ i ] . ReplacesNID == ev . NID {
2021-10-07 13:59:53 +01:00
// this event is replaced by the last event
2022-04-25 20:35:27 +01:00
ev = latestEvents [ i ]
2021-10-07 13:59:53 +01:00
}
2022-04-25 17:12:00 +01:00
roomToEvents [ ev . RoomID ] = append ( roomToEvents [ ev . RoomID ] , ev )
2021-10-07 13:59:53 +01:00
}
2022-04-25 17:12:00 +01:00
// handle the most recent events which won't be in the snapshot but may need to be.
2022-04-22 12:12:51 +01:00
// we handle the replace case but don't handle brand new state events
2022-04-25 20:35:27 +01:00
for i := range latestEvents {
if latestEvents [ i ] . ReplacesNID == 0 {
2022-04-25 17:12:00 +01:00
// check if we should include it
for evType , stateKeys := range eventTypesToStateKeys {
2022-04-25 20:35:27 +01:00
if evType != latestEvents [ i ] . Type {
2022-04-25 17:12:00 +01:00
continue
}
if len ( stateKeys ) == 0 {
2022-04-25 20:35:27 +01:00
roomToEvents [ latestEvents [ i ] . RoomID ] = append ( roomToEvents [ latestEvents [ i ] . RoomID ] , latestEvents [ i ] )
2022-04-25 17:12:00 +01:00
} else {
for _ , skey := range stateKeys {
2022-04-25 20:35:27 +01:00
if skey == latestEvents [ i ] . StateKey {
roomToEvents [ latestEvents [ i ] . RoomID ] = append ( roomToEvents [ latestEvents [ i ] . RoomID ] , latestEvents [ i ] )
2022-04-25 17:12:00 +01:00
break
}
2022-04-22 12:12:51 +01:00
}
}
}
}
}
2021-08-18 18:21:40 +01:00
}
2021-10-07 13:59:53 +01:00
return nil
2021-08-18 18:21:40 +01:00
} )
return
}
2023-10-19 14:17:39 +01:00
// LatestEventsInRooms returns the most recent events
// - in the given rooms
// - that the user has permission to see
// - with NIDs <= `to`.
2024-02-21 13:22:20 +00:00
// Up to `limit` events are chosen per room. This limit be itself be limited according to MaxTimelineLimit.
2023-06-13 10:02:52 +01:00
func ( s * Storage ) LatestEventsInRooms ( userID string , roomIDs [ ] string , to int64 , limit int ) ( map [ string ] * LatestEvents , error ) {
2023-11-06 11:54:15 +00:00
roomIDToRange , err := s . visibleEventNIDsBetweenForRooms ( userID , roomIDs , 0 , to )
2021-10-22 18:18:02 +01:00
if err != nil {
2023-06-13 10:02:52 +01:00
return nil , err
2021-10-22 18:18:02 +01:00
}
2024-02-21 13:22:20 +00:00
if s . MaxTimelineLimit != 0 && limit > s . MaxTimelineLimit {
limit = s . MaxTimelineLimit
}
2023-06-13 10:02:52 +01:00
result := make ( map [ string ] * LatestEvents , len ( roomIDs ) )
2023-06-08 18:03:36 +01:00
err = sqlutil . WithTransaction ( s . Accumulator . db , func ( txn * sqlx . Tx ) error {
2023-11-06 11:54:15 +00:00
for roomID , r := range roomIDToRange {
2022-03-31 15:10:42 +01:00
var earliestEventNID int64
2023-06-13 10:02:52 +01:00
var latestEventNID int64
2021-10-22 18:18:02 +01:00
var roomEvents [ ] json . RawMessage
2023-11-06 11:54:15 +00:00
// the most recent event will be first
events , err := s . EventsTable . SelectLatestEventsBetween ( txn , roomID , r [ 0 ] - 1 , r [ 1 ] , limit )
if err != nil {
return fmt . Errorf ( "room %s failed to SelectEventsBetween: %s" , roomID , err )
}
for _ , ev := range events {
if latestEventNID == 0 { // set first time and never again
latestEventNID = ev . NID
}
2023-11-06 12:33:30 +00:00
roomEvents = append ( roomEvents , ev . JSON )
2023-11-06 11:54:15 +00:00
earliestEventNID = ev . NID
2021-10-22 18:18:02 +01:00
if len ( roomEvents ) >= limit {
break
}
}
2023-11-06 12:33:30 +00:00
// we want the most recent event to be last, so reverse the slice now in-place.
slices . Reverse ( roomEvents )
2023-06-13 10:02:52 +01:00
latestEvents := LatestEvents {
LatestNID : latestEventNID ,
Timeline : roomEvents ,
}
2022-03-31 15:10:42 +01:00
if earliestEventNID != 0 {
// the oldest event needs a prev batch token, so find one now
2023-06-19 17:58:56 +01:00
prevBatch , err := s . EventsTable . SelectClosestPrevBatch ( txn , roomID , earliestEventNID )
2022-03-31 15:10:42 +01:00
if err != nil {
return fmt . Errorf ( "failed to select prev_batch for room %s : %s" , roomID , err )
}
2023-06-13 10:02:52 +01:00
latestEvents . PrevBatch = prevBatch
2022-03-31 15:10:42 +01:00
}
2023-06-13 10:02:52 +01:00
result [ roomID ] = & latestEvents
2021-10-22 18:18:02 +01:00
}
return nil
} )
2023-06-13 10:02:52 +01:00
return result , err
2021-10-22 18:18:02 +01:00
}
2024-04-22 08:55:05 +01:00
// Remove state snapshots which cannot be accessed by clients. The latest MaxTimelineEvents
// snapshots must be kept, +1 for the current state. This handles the worst case where all
// MaxTimelineEvents are state events and hence each event makes a new snapshot. We can safely
// delete all snapshots older than this, as it's not possible to reach this snapshot as the proxy
// does not handle historical state (deferring to the homeserver for that).
func ( s * Storage ) RemoveInaccessibleStateSnapshots ( ) error {
numToKeep := s . MaxTimelineLimit + 1
// Create a CTE which ranks each snapshot so we can figure out which snapshots to delete
// then execute the delete using the CTE.
//
// A per-room version of this query:
// WITH ranked_snapshots AS (
// SELECT
// snapshot_id,
// room_id,
// ROW_NUMBER() OVER (PARTITION BY room_id ORDER BY snapshot_id DESC) AS row_num
// FROM syncv3_snapshots
// )
// DELETE FROM syncv3_snapshots WHERE snapshot_id IN(
// SELECT snapshot_id FROM ranked_snapshots WHERE row_num > 51 AND room_id='!....'
// );
awfulQuery := fmt . Sprintf ( ` WITH ranked_snapshots AS (
SELECT
snapshot_id ,
room_id ,
ROW_NUMBER ( ) OVER ( PARTITION BY room_id ORDER BY snapshot_id DESC ) AS row_num
FROM
syncv3_snapshots
)
DELETE FROM syncv3_snapshots USING ranked_snapshots
WHERE syncv3_snapshots . snapshot_id = ranked_snapshots . snapshot_id
AND ranked_snapshots . row_num > % d ; ` , numToKeep )
result , err := s . DB . Exec ( awfulQuery )
if err != nil {
return fmt . Errorf ( "failed to RemoveInaccessibleStateSnapshots: Exec %s" , err )
}
rowsAffected , err := result . RowsAffected ( )
if err == nil {
logger . Info ( ) . Int64 ( "rows_affected" , rowsAffected ) . Msg ( "RemoveInaccessibleStateSnapshots: deleted rows" )
}
return nil
}
2023-11-14 16:12:04 +00:00
func ( s * Storage ) GetClosestPrevBatch ( roomID string , eventNID int64 ) ( prevBatch string ) {
var err error
sqlutil . WithTransaction ( s . DB , func ( txn * sqlx . Tx ) error {
// discard the error, we don't care if we fail as it's best effort
prevBatch , err = s . EventsTable . SelectClosestPrevBatch ( txn , roomID , eventNID )
return err
} )
return
}
2023-10-19 14:17:39 +01:00
// visibleEventNIDsBetweenForRooms determines which events a given user has permission to see.
2023-11-06 12:33:30 +00:00
// It accepts a nid range [from, to]. For each given room, it calculates the NID range
// [A1, B1] within [from, to] in which the user has permission to see events.
2023-11-06 11:54:15 +00:00
func ( s * Storage ) visibleEventNIDsBetweenForRooms ( userID string , roomIDs [ ] string , from , to int64 ) ( map [ string ] [ 2 ] int64 , error ) {
2021-10-21 18:27:19 +01:00
// load *THESE* joined rooms for this user at from (inclusive)
2021-10-21 18:38:42 +01:00
var membershipEvents [ ] Event
var err error
if from != 0 {
// if from==0 then this query will return nothing, so optimise it out
2023-06-08 18:03:36 +01:00
membershipEvents , err = s . Accumulator . eventsTable . SelectEventsWithTypeStateKeyInRooms ( roomIDs , "m.room.member" , userID , 0 , from )
2021-10-21 18:38:42 +01:00
if err != nil {
return nil , fmt . Errorf ( "VisibleEventNIDsBetweenForRooms.SelectEventsWithTypeStateKeyInRooms: %s" , err )
}
2021-10-21 18:27:19 +01:00
}
2023-06-13 08:43:59 +01:00
joinTimingsAtFromByRoomID , err := s . determineJoinedRoomsFromMemberships ( membershipEvents )
2021-10-21 18:27:19 +01:00
if err != nil {
return nil , fmt . Errorf ( "failed to work out joined rooms for %s at pos %d: %s" , userID , from , err )
}
// load membership deltas for *THESE* rooms for this user
2023-06-08 18:03:36 +01:00
membershipEvents , err = s . Accumulator . eventsTable . SelectEventsWithTypeStateKeyInRooms ( roomIDs , "m.room.member" , userID , from , to )
2021-10-21 18:27:19 +01:00
if err != nil {
return nil , fmt . Errorf ( "failed to load membership events: %s" , err )
}
2023-06-13 08:43:59 +01:00
return s . visibleEventNIDsWithData ( joinTimingsAtFromByRoomID , membershipEvents , userID , from , to )
2021-10-21 18:13:06 +01:00
}
2021-08-20 11:59:04 +01:00
// Work out the NID ranges to pull events from for this user. Given a from and to event nid stream position,
2023-11-06 11:54:15 +00:00
// this function returns a map of room ID to a 2-element from|to positions. These positions are
2021-08-20 11:59:04 +01:00
// all INCLUSIVE, and the client should be informed of these events at some point. For example:
//
2022-12-14 18:53:55 +00:00
// Stream Positions
// 1 2 3 4 5 6 7 8 9 10
// Room A Maj E E E
// Room B E Maj E
// Room C E Mal E (a already joined to this room at position 0)
2021-08-20 11:59:04 +01:00
//
2022-12-14 18:53:55 +00:00
// E=message event, M=membership event, followed by user letter, followed by 'i' or 'j' or 'l' for invite|join|leave
2021-08-20 11:59:04 +01:00
//
2023-11-06 11:54:15 +00:00
// - For Room A: from=1, to=10, returns { RoomA: [ 1,10 ]} (tests events in joined room)
// - For Room B: from=1, to=10, returns { RoomB: [ 5,10 ]} (tests joining a room starts events)
// - For Room C: from=1, to=10, returns { RoomC: [ 0,9 ]} (tests leaving a room stops events)
2021-08-20 11:59:04 +01:00
//
2023-11-06 11:54:15 +00:00
// In cases where a user joins/leaves a room multiple times in the nid range, only the last range is returned.
// This is critical to ensure we don't skip out timeline events due to history visibility (which the proxy defers
// to the upstream HS for). See https://github.com/matrix-org/sliding-sync/issues/365 for what happens if we returned
// all ranges.
2021-08-20 11:59:04 +01:00
//
2022-12-14 18:53:55 +00:00
// Stream Positions
// 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
// Room D Maj E Mal E Maj E Mal E
// Room E E Mai E E Maj E E
2021-08-20 11:59:04 +01:00
//
2023-11-06 11:54:15 +00:00
// - For Room D: from=1, to=15 returns { RoomD: [ 8,10 ] } (tests multi-join/leave)
// - For Room E: from=1, to=15 returns { RoomE: [ 13,15 ] } (tests invites)
func ( s * Storage ) VisibleEventNIDsBetween ( userID string , from , to int64 ) ( map [ string ] [ 2 ] int64 , error ) {
2021-10-21 18:27:19 +01:00
// load *ALL* joined rooms for this user at from (inclusive)
2023-06-13 08:43:59 +01:00
joinTimingsAtFromByRoomID , err := s . JoinedRoomsAfterPosition ( userID , from )
2021-10-21 18:27:19 +01:00
if err != nil {
return nil , fmt . Errorf ( "failed to work out joined rooms for %s at pos %d: %s" , userID , from , err )
}
// load *ALL* membership deltas for all rooms for this user
2023-06-08 18:03:36 +01:00
membershipEvents , err := s . Accumulator . eventsTable . SelectEventsWithTypeStateKey ( "m.room.member" , userID , from , to )
2021-08-20 11:59:04 +01:00
if err != nil {
2021-08-23 15:21:51 +01:00
return nil , fmt . Errorf ( "failed to load membership events: %s" , err )
2021-08-20 11:59:04 +01:00
}
2021-10-21 18:27:19 +01:00
2023-06-13 08:43:59 +01:00
return s . visibleEventNIDsWithData ( joinTimingsAtFromByRoomID , membershipEvents , userID , from , to )
2021-10-21 18:27:19 +01:00
}
2023-11-06 11:54:15 +00:00
func ( s * Storage ) visibleEventNIDsWithData ( joinTimingsAtFromByRoomID map [ string ] internal . EventMetadata , membershipEvents [ ] Event , userID string , from , to int64 ) ( map [ string ] [ 2 ] int64 , error ) {
2021-08-20 11:59:04 +01:00
// load membership events in order and bucket based on room ID
roomIDToLogs := make ( map [ string ] [ ] membershipEvent )
for _ , ev := range membershipEvents {
evJSON := gjson . ParseBytes ( ev . JSON )
roomIDToLogs [ ev . RoomID ] = append ( roomIDToLogs [ ev . RoomID ] , membershipEvent {
Event : ev ,
StateKey : evJSON . Get ( "state_key" ) . Str ,
Membership : evJSON . Get ( "content.membership" ) . Str ,
} )
}
// Performs the algorithm
2023-11-06 11:54:15 +00:00
calculateVisibleEventNIDs := func ( isJoined bool , fromIncl , toIncl int64 , logs [ ] membershipEvent ) [ 2 ] int64 {
2021-10-21 18:27:19 +01:00
// short circuit when there are no membership deltas
if len ( logs ) == 0 {
2023-11-06 11:54:15 +00:00
return [ 2 ] int64 {
fromIncl , toIncl , // TODO: is this actually valid? Surely omitting it is the right answer?
2021-10-21 18:27:19 +01:00
}
}
var result [ ] [ 2 ] int64
var startIndex int64 = - 1
if isJoined {
startIndex = fromIncl
}
for _ , memEvent := range logs {
// check for a valid transition (join->leave|ban or leave|invite->join) - we won't always get valid transitions
// e.g logs will be there for things like leave->ban which we don't care about
isValidTransition := false
if isJoined && ( memEvent . Membership == "leave" || memEvent . Membership == "ban" ) {
isValidTransition = true
} else if ! isJoined && memEvent . Membership == "join" {
isValidTransition = true
} else if ! isJoined && memEvent . Membership == "invite" {
// short-circuit: invites are sent on their own and don't affect ranges
result = append ( result , [ 2 ] int64 { memEvent . NID , memEvent . NID } )
continue
}
if ! isValidTransition {
continue
}
if isJoined {
// transitioning to leave, we get all events up to and including the leave event
result = append ( result , [ 2 ] int64 { startIndex , memEvent . NID } )
isJoined = false
} else {
// transitioning to joined, we will get the join and some more events in a bit
startIndex = memEvent . NID
isJoined = true
}
}
// if we are still joined to the room at this point, grab all events up to toIncl
if isJoined {
result = append ( result , [ 2 ] int64 { startIndex , toIncl } )
}
2023-11-06 11:54:15 +00:00
if len ( result ) == 0 {
return [ 2 ] int64 { }
}
// we only care about the LAST nid range, otherwise we can end up with gaps being returned in the
// timeline. See https://github.com/matrix-org/sliding-sync/issues/365
return result [ len ( result ) - 1 ]
2021-10-21 18:27:19 +01:00
}
2021-08-20 11:59:04 +01:00
// For each joined room, perform the algorithm and delete the logs afterwards
2023-11-06 11:54:15 +00:00
result := make ( map [ string ] [ 2 ] int64 )
2023-06-13 08:43:59 +01:00
for joinedRoomID , _ := range joinTimingsAtFromByRoomID {
2021-10-21 18:27:19 +01:00
roomResult := calculateVisibleEventNIDs ( true , from , to , roomIDToLogs [ joinedRoomID ] )
2021-08-20 11:59:04 +01:00
result [ joinedRoomID ] = roomResult
delete ( roomIDToLogs , joinedRoomID )
}
// Handle rooms which we are not joined to but have logs for
for roomID , logs := range roomIDToLogs {
2021-10-21 18:27:19 +01:00
roomResult := calculateVisibleEventNIDs ( false , from , to , logs )
2021-08-20 11:59:04 +01:00
result [ roomID ] = roomResult
}
return result , nil
}
2021-08-06 15:39:36 +01:00
func ( s * Storage ) RoomMembershipDelta ( roomID string , from , to int64 , limit int ) ( eventJSON [ ] json . RawMessage , upTo int64 , err error ) {
2023-06-08 18:03:36 +01:00
err = sqlutil . WithTransaction ( s . Accumulator . db , func ( txn * sqlx . Tx ) error {
nids , err := s . Accumulator . eventsTable . SelectEventNIDsWithTypeInRoom ( txn , "m.room.member" , limit , roomID , from , to )
2021-08-06 12:14:03 +01:00
if err != nil {
return err
}
if len ( nids ) == 0 {
return nil
}
upTo = nids [ len ( nids ) - 1 ]
2023-06-08 18:03:36 +01:00
events , err := s . Accumulator . eventsTable . SelectByNIDs ( txn , true , nids )
2021-08-06 12:14:03 +01:00
if err != nil {
return err
}
eventJSON = make ( [ ] json . RawMessage , len ( events ) )
for i := range events {
eventJSON [ i ] = events [ i ] . JSON
}
return nil
} )
return
}
perf: improve startup speeds by using temp tables
When the proxy is run with large DBs (10m+ events), the
startup queries are very slow (around 30min to load the initial snapshot.
After much EXPLAIN ANALYZEing, the cause is due to Postgres' query planner
not making good decisions when the the tables are that large. Specifically,
the startup queries need to pull all joined members in all rooms, which
ends up being nearly 50% of the entire events table of 10m rows. When this
query is embedded in a subselect, the query planner assumes that the subselect
will return only a few rows, and decides to pull those rows via an index. In this
particular case, indexes are the wrong choice, as there are SO MANY rows a Seq Scan
is often more appropriate. By using an index (which is a btree), this ends up doing
log(n) operations _per row_ or `O(0.5 * n * log(n))` assuming we pull 50% of the
table of n rows. As n increases, this is increasingly the wrong call over a basic
O(n) seq scan. When n=10m, a seq scan has a cost of 10m, but using indexes has a
cost of 16.6m. By dumping the result of the subselect to a temporary table, this
allows the query planner to notice that using an index is the wrong thing to do,
resulting in better performance. On large DBs, this decreases the startup time
from 30m to ~5m.
2023-05-18 16:45:02 +01:00
// Extract all rooms with joined members, and include the joined user list. Requires a prepared snapshot in order to be called.
2023-07-19 18:23:09 +01:00
// Populates the join/invite count and heroes for the returned metadata.
2023-07-17 10:48:21 +01:00
func ( s * Storage ) AllJoinedMembers ( txn * sqlx . Tx , tempTableName string ) ( joinedMembers map [ string ] [ ] string , metadata map [ string ] internal . RoomMetadata , err error ) {
2023-07-19 18:23:09 +01:00
// Select the most recent members for each room to serve as Heroes. The spec is ambiguous here:
// "This should be the first 5 members of the room, ordered by stream ordering, which are joined or invited."
// Unclear if this is the first 5 *most recent* (backwards) or forwards. For now we'll use the most recent
// ones, and select 6 of them so we can always use 5 no matter who is requesting the room name.
2023-01-03 14:28:34 +00:00
rows , err := txn . Query (
2023-09-19 12:41:25 +01:00
` SELECT membership_nid, room_id, state_key, membership FROM ` + tempTableName + ` INNER JOIN syncv3_events
2023-07-17 10:48:21 +01:00
on membership_nid = event_nid WHERE membership = ' join ' OR membership = ' _join ' OR membership = ' invite ' OR membership = ' _invite ' ORDER BY event_nid ASC ` ,
2023-01-03 14:28:34 +00:00
)
2021-07-23 16:40:32 +01:00
if err != nil {
2023-01-03 17:13:27 +00:00
return nil , nil , err
2021-07-23 16:40:32 +01:00
}
2023-01-03 14:28:34 +00:00
defer rows . Close ( )
2023-07-17 10:48:21 +01:00
joinedMembers = make ( map [ string ] [ ] string )
inviteCounts := make ( map [ string ] int )
2023-09-08 18:16:38 +01:00
heroNIDs := make ( map [ string ] * circularSlice [ int64 ] )
2023-07-17 10:48:21 +01:00
var stateKey string
var membership string
2023-07-19 18:23:09 +01:00
var roomID string
var nid int64
2023-01-03 14:28:34 +00:00
for rows . Next ( ) {
2023-07-19 18:23:09 +01:00
if err := rows . Scan ( & nid , & roomID , & stateKey , & membership ) ; err != nil {
2023-01-03 17:13:27 +00:00
return nil , nil , err
2021-08-05 17:57:40 +01:00
}
2023-07-19 18:23:09 +01:00
heroes := heroNIDs [ roomID ]
if heroes == nil {
2023-09-08 18:16:38 +01:00
heroes = & circularSlice [ int64 ] { max : 6 }
2023-07-19 18:23:09 +01:00
heroNIDs [ roomID ] = heroes
}
2023-07-17 10:48:21 +01:00
switch membership {
case "join" :
fallthrough
case "_join" :
users := joinedMembers [ roomID ]
users = append ( users , stateKey )
joinedMembers [ roomID ] = users
2023-07-19 18:23:09 +01:00
heroes . append ( nid )
2023-07-17 10:48:21 +01:00
case "invite" :
fallthrough
case "_invite" :
inviteCounts [ roomID ] = inviteCounts [ roomID ] + 1
2023-07-19 18:23:09 +01:00
heroes . append ( nid )
2023-07-17 10:48:21 +01:00
}
2021-08-05 17:57:40 +01:00
}
2023-07-19 18:23:09 +01:00
// now select the membership events for the heroes
var allHeroNIDs [ ] int64
for _ , nids := range heroNIDs {
allHeroNIDs = append ( allHeroNIDs , nids . vals ... )
}
heroEvents , err := s . EventsTable . SelectByNIDs ( txn , true , allHeroNIDs )
if err != nil {
return nil , nil , err
}
heroes := make ( map [ string ] [ ] internal . Hero )
2023-07-19 18:29:02 +01:00
// loop backwards so the most recent hero is first in the hero list
for i := len ( heroEvents ) - 1 ; i >= 0 ; i -- {
ev := heroEvents [ i ]
2023-07-19 18:23:09 +01:00
evJSON := gjson . ParseBytes ( ev . JSON )
roomHeroes := heroes [ ev . RoomID ]
roomHeroes = append ( roomHeroes , internal . Hero {
ID : ev . StateKey ,
Name : evJSON . Get ( "content.displayname" ) . Str ,
Avatar : evJSON . Get ( "content.avatar_url" ) . Str ,
} )
heroes [ ev . RoomID ] = roomHeroes
}
2023-01-03 17:13:27 +00:00
metadata = make ( map [ string ] internal . RoomMetadata )
2023-07-17 10:48:21 +01:00
for roomID , members := range joinedMembers {
2023-05-11 18:19:04 +01:00
m := internal . NewRoomMetadata ( roomID )
2023-07-17 10:48:21 +01:00
m . JoinCount = len ( members )
m . InviteCount = inviteCounts [ roomID ]
2023-07-19 18:23:09 +01:00
m . Heroes = heroes [ roomID ]
2023-05-11 18:19:04 +01:00
metadata [ roomID ] = * m
2023-01-03 17:13:27 +00:00
}
2023-07-17 10:48:21 +01:00
return joinedMembers , metadata , nil
2021-07-23 16:40:32 +01:00
}
2021-08-13 16:01:01 +01:00
2024-04-22 08:55:05 +01:00
func ( s * Storage ) Cleaner ( n time . Duration ) {
Loop :
for {
select {
case <- time . After ( n ) :
now := time . Now ( )
boundaryTime := now . Add ( - 1 * n )
if n < time . Hour {
boundaryTime = now . Add ( - 1 * time . Hour )
}
logger . Info ( ) . Time ( "boundaryTime" , boundaryTime ) . Msg ( "Cleaner running" )
err := s . TransactionsTable . Clean ( boundaryTime )
if err != nil {
logger . Warn ( ) . Err ( err ) . Msg ( "failed to clean txn ID table" )
sentry . CaptureException ( err )
}
// we also want to clean up stale state snapshots which are inaccessible, to
// keep the size of the syncv3_snapshots table low.
if err = s . RemoveInaccessibleStateSnapshots ( ) ; err != nil {
logger . Warn ( ) . Err ( err ) . Msg ( "failed to remove inaccessible state snapshots" )
sentry . CaptureException ( err )
}
case <- s . shutdownCh :
break Loop
}
}
}
2023-07-13 18:19:00 +01:00
func ( s * Storage ) LatestEventNIDInRooms ( roomIDs [ ] string , highestNID int64 ) ( roomToNID map [ string ] int64 , err error ) {
roomToNID = make ( map [ string ] int64 )
err = sqlutil . WithTransaction ( s . Accumulator . db , func ( txn * sqlx . Tx ) error {
// Pull out the latest nids for all the rooms. If they are < highestNID then use them, else we need to query the
// events table (slow) for the latest nid in this room which is < highestNID.
fastRoomToLatestNIDs , err := s . Accumulator . roomsTable . LatestNIDs ( txn , roomIDs )
if err != nil {
return err
}
var slowRooms [ ] string
for _ , roomID := range roomIDs {
nid := fastRoomToLatestNIDs [ roomID ]
if nid > 0 && nid <= highestNID {
roomToNID [ roomID ] = nid
} else {
// we need to do a slow query for this
slowRooms = append ( slowRooms , roomID )
}
}
2023-07-13 18:39:53 +01:00
if len ( slowRooms ) == 0 {
return nil // no work to do
}
2023-07-14 10:29:21 +01:00
logger . Warn ( ) . Int ( "slow_rooms" , len ( slowRooms ) ) . Msg ( "LatestEventNIDInRooms: pos value provided is far behind the database copy, performance degraded" )
2023-07-13 18:39:53 +01:00
2023-07-13 18:19:00 +01:00
slowRoomToLatestNIDs , err := s . EventsTable . LatestEventNIDInRooms ( txn , slowRooms , highestNID )
if err != nil {
return err
}
for roomID , nid := range slowRoomToLatestNIDs {
roomToNID [ roomID ] = nid
}
return nil
} )
return roomToNID , err
}
2023-06-07 14:23:00 +01:00
// Returns a map from joined room IDs to EventMetadata, which is nil iff a non-nil error
// is returned.
2023-06-01 17:34:53 +01:00
func ( s * Storage ) JoinedRoomsAfterPosition ( userID string , pos int64 ) (
2023-06-06 14:22:51 +01:00
joinTimingByRoomID map [ string ] internal . EventMetadata , err error ,
2023-06-01 17:34:53 +01:00
) {
2021-08-20 16:26:11 +01:00
// fetch all the membership events up to and including pos
2023-06-08 18:03:36 +01:00
membershipEvents , err := s . Accumulator . eventsTable . SelectEventsWithTypeStateKey ( "m.room.member" , userID , 0 , pos )
2021-08-20 16:26:11 +01:00
if err != nil {
2023-06-05 13:03:41 +01:00
return nil , fmt . Errorf ( "JoinedRoomsAfterPosition.SelectEventsWithTypeStateKey: %s" , err )
2021-08-20 16:26:11 +01:00
}
2023-06-01 17:34:53 +01:00
return s . determineJoinedRoomsFromMemberships ( membershipEvents )
2021-10-21 18:27:19 +01:00
}
2023-06-01 17:34:53 +01:00
// determineJoinedRoomsFromMemberships scans a slice of membership events from multiple
// rooms, to determine which rooms a user is currently joined to. Those events MUST be
// - sorted by ascending NIDs, and
// - only memberships for the given user;
// neither of these preconditions are checked by this function.
//
2023-06-07 14:23:00 +01:00
// Returns a map from joined room IDs to EventMetadata, which is nil iff a non-nil error
// is returned.
2023-06-01 17:34:53 +01:00
func ( s * Storage ) determineJoinedRoomsFromMemberships ( membershipEvents [ ] Event ) (
2023-06-06 14:22:51 +01:00
joinTimingByRoomID map [ string ] internal . EventMetadata , err error ,
2023-06-01 17:34:53 +01:00
) {
2023-06-06 14:22:51 +01:00
joinTimingByRoomID = make ( map [ string ] internal . EventMetadata , len ( membershipEvents ) )
2021-08-20 16:26:11 +01:00
for _ , ev := range membershipEvents {
2023-06-06 14:22:51 +01:00
parsed := gjson . ParseBytes ( ev . JSON )
membership := parsed . Get ( "content.membership" ) . Str
2021-08-20 16:26:11 +01:00
switch membership {
2023-06-01 17:34:53 +01:00
// These are "join" and the only memberships that you can transition to after
// a join: see e.g. the transition diagram in
// https://spec.matrix.org/v1.7/client-server-api/#room-membership
2021-08-20 16:26:11 +01:00
case "join" :
2023-06-05 12:12:16 +01:00
// Only remember a join NID if we are not joined to this room according to
// the state before ev.
2023-06-06 14:22:51 +01:00
if _ , currentlyJoined := joinTimingByRoomID [ ev . RoomID ] ; ! currentlyJoined {
joinTimingByRoomID [ ev . RoomID ] = internal . EventMetadata {
NID : ev . NID ,
Timestamp : parsed . Get ( "origin_server_ts" ) . Uint ( ) ,
}
2023-06-01 17:34:53 +01:00
}
2021-08-20 16:26:11 +01:00
case "ban" :
fallthrough
case "leave" :
2023-06-06 14:22:51 +01:00
delete ( joinTimingByRoomID , ev . RoomID )
2021-08-20 16:26:11 +01:00
}
}
2023-06-06 14:22:51 +01:00
return joinTimingByRoomID , nil
2021-08-13 16:01:01 +01:00
}
2022-04-01 17:23:48 +01:00
func ( s * Storage ) Teardown ( ) {
2024-04-22 08:55:05 +01:00
if ! s . shutdown {
s . shutdown = true
close ( s . shutdownCh )
}
2023-06-08 18:03:36 +01:00
err := s . Accumulator . db . Close ( )
2022-12-14 18:53:55 +00:00
if err != nil {
panic ( "Storage.Teardown: " + err . Error ( ) )
}
2022-04-01 17:23:48 +01:00
}
2023-07-19 18:23:09 +01:00
// circularSlice is a slice which can be appended to which will wraparound at `max`.
// Mostly useful for lazily calculating heroes. The values returned aren't sorted.
2023-09-08 18:16:38 +01:00
type circularSlice [ T any ] struct {
2023-07-19 18:23:09 +01:00
i int
2023-09-08 18:16:38 +01:00
vals [ ] T
2023-07-19 18:23:09 +01:00
max int
}
2023-09-08 18:16:38 +01:00
func ( s * circularSlice [ T ] ) append ( val T ) {
2023-07-19 18:23:09 +01:00
if len ( s . vals ) < s . max {
// populate up to max
s . vals = append ( s . vals , val )
s . i ++
return
}
// wraparound
if s . i == s . max {
s . i = 0
}
// replace this entry
s . vals [ s . i ] = val
s . i ++
}