sliding-sync/sync3/lists.go

263 lines
8.7 KiB
Go
Raw Normal View History

package sync3
import (
"context"
"strings"
"github.com/matrix-org/sliding-sync/internal"
)
type OverwriteVal bool
var (
DoNotOverwrite OverwriteVal = false
Overwrite OverwriteVal = true
)
// ListOp represents the possible operations on a list
type ListOp uint8
var (
// The room is added to the list
ListOpAdd ListOp = 1
// The room is removed from the list
ListOpDel ListOp = 2
// The room may change position in the list
ListOpChange ListOp = 3
)
type RoomListDelta struct {
ListKey string
Op ListOp
}
type RoomDelta struct {
bugfix: fix a bug with list ops when sorting with unread counts; fix a bug which could cause typing/receipts to not be live streamed Previously, we would not send unread count INCREASES to the client, as we would expect the actual event update to wake up the client conn. This was great because it meant the event+unread count arrived atomically on the client. This was implemented as "parse unread counts first, then events". However, this introduced a bug when there were >1 user in the same room. In this scenario, one poller may get the event first, which would go through to the client. The subsequent unread count update would then be dropped and not sent to the client. This would just be an unfortunate UI bug if it weren't for sorting by_notification_count and sorting by_notification_level. Both of these sort operations use the unread counts to determine room list ordering. This list would be updated on the server, but no list operation would be sent to the client, causing the room lists to de-sync, and resulting in incorrect DELETE/INSERT ops. This would manifest as duplicate rooms on the room list. In the process of fixing this, also fix a bug where typing notifications would not always be sent to the client - it would only do so when piggybacked due to incorrect type switches. Also fix another bug which prevented receipts from always being sent to the client. This was caused by the extensions handler not checking if the receipt extension had data to determine if it should return. This the interacted with an as-yet unfixed bug which cleared the extension on subequent updates, causing the receipt to be lost entirely. A fix for this will be inbound soon.
2023-02-07 13:34:26 +00:00
RoomNameChanged bool
RoomAvatarChanged bool
bugfix: fix a bug with list ops when sorting with unread counts; fix a bug which could cause typing/receipts to not be live streamed Previously, we would not send unread count INCREASES to the client, as we would expect the actual event update to wake up the client conn. This was great because it meant the event+unread count arrived atomically on the client. This was implemented as "parse unread counts first, then events". However, this introduced a bug when there were >1 user in the same room. In this scenario, one poller may get the event first, which would go through to the client. The subsequent unread count update would then be dropped and not sent to the client. This would just be an unfortunate UI bug if it weren't for sorting by_notification_count and sorting by_notification_level. Both of these sort operations use the unread counts to determine room list ordering. This list would be updated on the server, but no list operation would be sent to the client, causing the room lists to de-sync, and resulting in incorrect DELETE/INSERT ops. This would manifest as duplicate rooms on the room list. In the process of fixing this, also fix a bug where typing notifications would not always be sent to the client - it would only do so when piggybacked due to incorrect type switches. Also fix another bug which prevented receipts from always being sent to the client. This was caused by the extensions handler not checking if the receipt extension had data to determine if it should return. This the interacted with an as-yet unfixed bug which cleared the extension on subequent updates, causing the receipt to be lost entirely. A fix for this will be inbound soon.
2023-02-07 13:34:26 +00:00
JoinCountChanged bool
InviteCountChanged bool
NotificationCountChanged bool
HighlightCountChanged bool
Lists []RoomListDelta
}
// InternalRequestLists is a list of lists which matches each index position in the request
// JSON 'lists'. It contains all the internal metadata for rooms and controls access and updatings of said
// lists.
type InternalRequestLists struct {
allRooms map[string]*RoomConnMetadata
lists map[string]*FilteredSortableRooms
}
func NewInternalRequestLists() *InternalRequestLists {
return &InternalRequestLists{
allRooms: make(map[string]*RoomConnMetadata, 10),
lists: make(map[string]*FilteredSortableRooms),
}
}
2023-05-24 15:16:47 +01:00
func (s *InternalRequestLists) SetRoom(r RoomConnMetadata) (delta RoomDelta) {
existing, exists := s.allRooms[r.RoomID]
if exists {
bugfix: fix a bug with list ops when sorting with unread counts; fix a bug which could cause typing/receipts to not be live streamed Previously, we would not send unread count INCREASES to the client, as we would expect the actual event update to wake up the client conn. This was great because it meant the event+unread count arrived atomically on the client. This was implemented as "parse unread counts first, then events". However, this introduced a bug when there were >1 user in the same room. In this scenario, one poller may get the event first, which would go through to the client. The subsequent unread count update would then be dropped and not sent to the client. This would just be an unfortunate UI bug if it weren't for sorting by_notification_count and sorting by_notification_level. Both of these sort operations use the unread counts to determine room list ordering. This list would be updated on the server, but no list operation would be sent to the client, causing the room lists to de-sync, and resulting in incorrect DELETE/INSERT ops. This would manifest as duplicate rooms on the room list. In the process of fixing this, also fix a bug where typing notifications would not always be sent to the client - it would only do so when piggybacked due to incorrect type switches. Also fix another bug which prevented receipts from always being sent to the client. This was caused by the extensions handler not checking if the receipt extension had data to determine if it should return. This the interacted with an as-yet unfixed bug which cleared the extension on subequent updates, causing the receipt to be lost entirely. A fix for this will be inbound soon.
2023-02-07 13:34:26 +00:00
if existing.NotificationCount != r.NotificationCount {
delta.NotificationCountChanged = true
}
bugfix: fix a bug with list ops when sorting with unread counts; fix a bug which could cause typing/receipts to not be live streamed Previously, we would not send unread count INCREASES to the client, as we would expect the actual event update to wake up the client conn. This was great because it meant the event+unread count arrived atomically on the client. This was implemented as "parse unread counts first, then events". However, this introduced a bug when there were >1 user in the same room. In this scenario, one poller may get the event first, which would go through to the client. The subsequent unread count update would then be dropped and not sent to the client. This would just be an unfortunate UI bug if it weren't for sorting by_notification_count and sorting by_notification_level. Both of these sort operations use the unread counts to determine room list ordering. This list would be updated on the server, but no list operation would be sent to the client, causing the room lists to de-sync, and resulting in incorrect DELETE/INSERT ops. This would manifest as duplicate rooms on the room list. In the process of fixing this, also fix a bug where typing notifications would not always be sent to the client - it would only do so when piggybacked due to incorrect type switches. Also fix another bug which prevented receipts from always being sent to the client. This was caused by the extensions handler not checking if the receipt extension had data to determine if it should return. This the interacted with an as-yet unfixed bug which cleared the extension on subequent updates, causing the receipt to be lost entirely. A fix for this will be inbound soon.
2023-02-07 13:34:26 +00:00
if existing.HighlightCount != r.HighlightCount {
delta.HighlightCountChanged = true
}
delta.InviteCountChanged = !existing.SameInviteCount(&r.RoomMetadata)
delta.JoinCountChanged = !existing.SameJoinCount(&r.RoomMetadata)
delta.RoomNameChanged = !existing.SameRoomName(&r.RoomMetadata)
if delta.RoomNameChanged {
// update the canonical name to allow room name sorting to continue to work
2023-09-15 15:08:07 +02:00
roomName, _ := internal.CalculateRoomName(&r.RoomMetadata, 5)
r.CanonicalisedName = strings.ToLower(
2023-09-15 15:08:07 +02:00
strings.Trim(roomName, "#!():_@"),
)
2023-07-18 13:40:12 +01:00
} else {
// XXX: during TestConnectionTimeoutNotReset there is some situation where
// r.CanonicalisedName is the empty string. Looking at the SetRoom
// call in connstate_live.go, this is because the UserRoomMetadata on
// the RoomUpdate has an empty CanonicalisedName. Either
// a) that is expected, in which case we should _always_ write to
// r.CanonicalisedName here; or
// b) that is not expected, in which case... erm, I don't know what
// to conclude.
r.CanonicalisedName = existing.CanonicalisedName
}
delta.RoomAvatarChanged = !existing.SameRoomAvatar(&r)
if delta.RoomAvatarChanged {
r.ResolvedAvatarURL = internal.CalculateAvatar(&r.RoomMetadata, r.IsDM)
}
2023-05-24 15:16:47 +01:00
// Interpret the timestamp map on r as the changes we should apply atop the
// existing timestamps.
2023-05-25 15:17:21 +01:00
newTimestamps := r.LastInterestedEventTimestamps
2023-05-24 15:16:47 +01:00
r.LastInterestedEventTimestamps = make(map[string]uint64, len(s.lists))
for listKey := range s.lists {
2023-05-25 15:17:21 +01:00
newTs, bump := newTimestamps[listKey]
2023-05-24 15:16:47 +01:00
if bump {
r.LastInterestedEventTimestamps[listKey] = newTs
} else {
prevTs, hadPreviousTs := existing.LastInterestedEventTimestamps[listKey]
if hadPreviousTs {
r.LastInterestedEventTimestamps[listKey] = prevTs
} else {
// This can happen if the listKey is brand-new in this request.
r.LastInterestedEventTimestamps[listKey] = existing.LastMessageTimestamp
}
}
}
} else {
// set the canonical name to allow room name sorting to work
2023-09-15 15:08:07 +02:00
roomName, _ := internal.CalculateRoomName(&r.RoomMetadata, 5)
r.CanonicalisedName = strings.ToLower(
2023-09-15 15:08:07 +02:00
strings.Trim(roomName, "#!():_@"),
)
r.ResolvedAvatarURL = internal.CalculateAvatar(&r.RoomMetadata, r.IsDM)
// We'll automatically use the LastInterestedEventTimestamps provided by the
// caller, so that recency sorts work.
}
add extensions for typing and receipts; bugfixes and additional perf improvements Features: - Add `typing` extension. - Add `receipts` extension. - Add comprehensive prometheus `/metrics` activated via `SYNCV3_PROM`. - Add `SYNCV3_PPROF` support. - Add `by_notification_level` sort order. - Add `include_old_rooms` support. - Add support for `$ME` and `$LAZY`. - Add correct filtering when `*,*` is used as `required_state`. - Add `num_live` to each room response to indicate how many timeline entries are live. Bug fixes: - Use a stricter comparison function on ranges: fixes an issue whereby UTs fail on go1.19 due to change in sorting algorithm. - Send back an `errcode` on HTTP errors (e.g expired sessions). - Remove `unsigned.txn_id` on insertion into the DB. Otherwise other users would see other users txn IDs :( - Improve range delta algorithm: previously it didn't handle cases like `[0,20] -> [20,30]` and would panic. - Send HTTP 400 for invalid range requests. - Don't publish no-op unread counts which just adds extra noise. - Fix leaking DB connections which could eventually consume all available connections. - Ensure we always unblock WaitUntilInitialSync even on invalid access tokens. Other code relies on WaitUntilInitialSync() actually returning at _some_ point e.g on startup we have N workers which bound the number of concurrent pollers made at any one time, we need to not just hog a worker forever. Improvements: - Greatly improve startup times of sync3 handlers by improving `JoinedRoomsTracker`: a modest amount of data would take ~28s to create the handler, now it takes 4s. - Massively improve initial initial v3 sync times, by refactoring `JoinedRoomsTracker`, from ~47s to <1s. - Add `SlidingSyncUntil...` in tests to reduce races. - Tweak the API shape of JoinedUsersForRoom to reduce state block processing time for large rooms from 63s to 39s. - Add trace task for initial syncs. - Include the proxy version in UA strings. - HTTP errors now wait 1s before returning to stop clients tight-looping on error. - Pending event buffer is now 2000. - Index the room ID first to cull the most events when returning timeline entries. Speeds up `SelectLatestEventsBetween` by a factor of 8. - Remove cancelled `m.room_key_requests` from the to-device inbox. Cuts down the amount of events in the inbox by ~94% for very large (20k+) inboxes, ~50% for moderate sized (200 events) inboxes. Adds book-keeping to remember the unacked to-device position for each client.
2022-12-14 18:53:55 +00:00
// filter.Include may call on this room ID in the RoomFinder, so make sure it finds it.
s.allRooms[r.RoomID] = &r
add extensions for typing and receipts; bugfixes and additional perf improvements Features: - Add `typing` extension. - Add `receipts` extension. - Add comprehensive prometheus `/metrics` activated via `SYNCV3_PROM`. - Add `SYNCV3_PPROF` support. - Add `by_notification_level` sort order. - Add `include_old_rooms` support. - Add support for `$ME` and `$LAZY`. - Add correct filtering when `*,*` is used as `required_state`. - Add `num_live` to each room response to indicate how many timeline entries are live. Bug fixes: - Use a stricter comparison function on ranges: fixes an issue whereby UTs fail on go1.19 due to change in sorting algorithm. - Send back an `errcode` on HTTP errors (e.g expired sessions). - Remove `unsigned.txn_id` on insertion into the DB. Otherwise other users would see other users txn IDs :( - Improve range delta algorithm: previously it didn't handle cases like `[0,20] -> [20,30]` and would panic. - Send HTTP 400 for invalid range requests. - Don't publish no-op unread counts which just adds extra noise. - Fix leaking DB connections which could eventually consume all available connections. - Ensure we always unblock WaitUntilInitialSync even on invalid access tokens. Other code relies on WaitUntilInitialSync() actually returning at _some_ point e.g on startup we have N workers which bound the number of concurrent pollers made at any one time, we need to not just hog a worker forever. Improvements: - Greatly improve startup times of sync3 handlers by improving `JoinedRoomsTracker`: a modest amount of data would take ~28s to create the handler, now it takes 4s. - Massively improve initial initial v3 sync times, by refactoring `JoinedRoomsTracker`, from ~47s to <1s. - Add `SlidingSyncUntil...` in tests to reduce races. - Tweak the API shape of JoinedUsersForRoom to reduce state block processing time for large rooms from 63s to 39s. - Add trace task for initial syncs. - Include the proxy version in UA strings. - HTTP errors now wait 1s before returning to stop clients tight-looping on error. - Pending event buffer is now 2000. - Index the room ID first to cull the most events when returning timeline entries. Speeds up `SelectLatestEventsBetween` by a factor of 8. - Remove cancelled `m.room_key_requests` from the to-device inbox. Cuts down the amount of events in the inbox by ~94% for very large (20k+) inboxes, ~50% for moderate sized (200 events) inboxes. Adds book-keeping to remember the unacked to-device position for each client.
2022-12-14 18:53:55 +00:00
for listKey, list := range s.lists {
_, alreadyExists := list.roomIDToIndex[r.RoomID]
shouldExist := list.filter.Include(&r, s)
if shouldExist && r.HasLeft {
shouldExist = false
}
// weird nesting ensures we handle all 4 cases
if alreadyExists {
if shouldExist { // could be a change
delta.Lists = append(delta.Lists, RoomListDelta{
ListKey: listKey,
Op: ListOpChange,
})
} else { // removal
delta.Lists = append(delta.Lists, RoomListDelta{
ListKey: listKey,
Op: ListOpDel,
})
}
} else {
if shouldExist { // addition
delta.Lists = append(delta.Lists, RoomListDelta{
ListKey: listKey,
Op: ListOpAdd,
})
} // else it doesn't exist and it shouldn't exist, so do nothing e.g room isn't relevant to this list
}
}
return delta
}
// Remove a room from all lists e.g retired an invite, left a room
func (s *InternalRequestLists) RemoveRoom(roomID string) {
delete(s.allRooms, roomID)
// TODO: update lists?
}
func (s *InternalRequestLists) DeleteList(listKey string) {
2023-05-25 19:11:45 +01:00
delete(s.lists, listKey)
for _, room := range s.allRooms {
delete(room.LastInterestedEventTimestamps, listKey)
}
}
2023-05-25 19:11:45 +01:00
// Returns the underlying RoomConnMetadata object. Returns a shared pointer, not a copy.
// It is only safe to read this data, never to write.
func (s *InternalRequestLists) ReadOnlyRoom(roomID string) *RoomConnMetadata {
return s.allRooms[roomID]
}
2023-05-25 19:11:45 +01:00
// Get returns the sorted list of rooms. Returns a shared pointer, not a copy.
// It is only safe to read this data, never to write.
func (s *InternalRequestLists) Get(listKey string) *FilteredSortableRooms {
return s.lists[listKey]
}
// ListKeys returns a copy of the list keys currently tracked by this
// InternalRequestLists struct, in no particular order. Outside of test code, you
// probably don't want to call this---you probably have the set of list keys tracked
// elsewhere in the application.
func (s *InternalRequestLists) ListKeys() []string {
keys := make([]string, len(s.lists))
for listKey, _ := range s.lists {
keys = append(keys, listKey)
}
return keys
}
2023-03-29 16:11:27 +01:00
// ListsByVisibleRoomIDs builds a map from room IDs to a slice of list names. Keys are
// all room IDs that are currently visible in at least one sliding window. Values are
// the names of all lists (in no particular order) in which the given room ID is
// currently visible. The value slices are nonnil and contain at least one list name
// (possibly more).
//
2023-03-29 16:11:27 +01:00
// The returned map is a copy, i.e. is safe to modify by the caller.
func (s *InternalRequestLists) ListsByVisibleRoomIDs(muxedReqLists map[string]RequestList) map[string][]string {
listsByRoomIDs := make(map[string][]string, len(muxedReqLists))
// Loop over each list, and mark each room in its sliding window as being visible in this list.
2023-05-24 15:16:47 +01:00
for listKey, reqList := range muxedReqLists {
sortedRooms := s.lists[listKey].SortableRooms
2023-03-29 16:11:27 +01:00
if sortedRooms == nil {
continue
}
// If we've requested all rooms, every room is visible in this list---we don't
// have to worry about extracting room IDs in the sliding windows' ranges.
if reqList.SlowGetAllRooms != nil && *reqList.SlowGetAllRooms {
2023-03-29 16:11:27 +01:00
for _, roomID := range sortedRooms.RoomIDs() {
2023-05-24 15:16:47 +01:00
listsByRoomIDs[roomID] = append(listsByRoomIDs[roomID], listKey)
2023-03-29 16:11:27 +01:00
}
} else {
2023-03-29 16:11:27 +01:00
subslices := reqList.Ranges.SliceInto(sortedRooms)
for _, subslice := range subslices {
sortedRooms = subslice.(*SortableRooms)
2023-03-29 16:11:27 +01:00
for _, roomID := range sortedRooms.RoomIDs() {
2023-05-24 15:16:47 +01:00
listsByRoomIDs[roomID] = append(listsByRoomIDs[roomID], listKey)
2023-03-29 16:11:27 +01:00
}
}
}
}
2023-03-29 16:11:27 +01:00
return listsByRoomIDs
}
// Assign a new list at the given key. If Overwrite, any existing list is replaced. If DoNotOverwrite, the existing
// list is returned if one exists, else a new list is created. Returns the list and true if the list was overwritten.
func (s *InternalRequestLists) AssignList(ctx context.Context, listKey string, filters *RequestFilters, sort []string, shouldOverwrite OverwriteVal) (*FilteredSortableRooms, bool) {
if shouldOverwrite == DoNotOverwrite {
_, exists := s.lists[listKey]
if exists {
return s.lists[listKey], false
}
}
roomIDs := make([]string, len(s.allRooms))
i := 0
for roomID := range s.allRooms {
roomIDs[i] = roomID
i++
}
2023-05-24 15:20:49 +01:00
roomList := NewFilteredSortableRooms(s, listKey, roomIDs, filters)
if sort != nil {
2023-04-05 15:36:05 +01:00
err := roomList.Sort(sort)
if err != nil {
logger.Err(err).Strs("sort_by", sort).Msg("failed to sort")
internal.GetSentryHubFromContextOrDefault(ctx).CaptureException(err)
}
}
s.lists[listKey] = roomList
return roomList, true
}
// Count returns the count of total rooms in this list
func (s *InternalRequestLists) Count(listKey string) int {
return int(s.lists[listKey].Len())
}
func (s *InternalRequestLists) Len() int {
return len(s.lists)
}