Merge pull request #326 from matrix-org/kegan/give-up-poll

bugfix: give up polling if the /sync response keeps erroring for >50min
This commit is contained in:
kegsay 2023-10-03 15:56:54 +01:00 committed by GitHub
commit 4d8cbb2709
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 44 additions and 0 deletions

View File

@ -552,6 +552,14 @@ func (p *poller) poll(ctx context.Context, s *pollLoopState) error {
p.totalNumPolls.Inc()
}
if s.failCount > 0 {
if s.failCount > 1000 {
// 3s * 1000 = 3000s = 50 minutes
errMsg := "poller: access token has failed >1000 times to /sync, terminating loop"
p.logger.Warn().Msg(errMsg)
p.receiver.OnExpiredToken(ctx, hashToken(p.accessToken), p.userID, p.deviceID)
p.Terminate()
return fmt.Errorf(errMsg)
}
// don't backoff when doing v2 syncs because the response is only in the cache for a short
// period of time (on massive accounts on matrix.org) such that if you wait 2,4,8min between
// requests it might force the server to do the work all over again :(

View File

@ -553,6 +553,42 @@ func mustEqualSince(t *testing.T, gotSince, expectedSince string) {
}
}
func TestPollerGivesUpEventually(t *testing.T) {
deviceID := "FOOBAR"
hasPolledSuccessfully := make(chan struct{})
accumulator, client := newMocks(func(authHeader, since string) (*SyncResponse, int, error) {
return nil, 524, fmt.Errorf("gateway timeout")
})
timeSleep = func(d time.Duration) {
// actually sleep to make sure async actions can happen if any
time.Sleep(1 * time.Microsecond)
}
defer func() { // reset the value after the test runs
timeSleep = time.Sleep
}()
var wg sync.WaitGroup
wg.Add(1)
poller := newPoller(PollerID{UserID: "@alice:localhost", DeviceID: deviceID}, "Authorization: hello world", client, accumulator, zerolog.New(os.Stderr), false)
go func() {
defer wg.Done()
poller.Poll("")
}()
go func() {
poller.WaitUntilInitialSync()
close(hasPolledSuccessfully)
}()
wg.Wait()
select {
case <-hasPolledSuccessfully:
case <-time.After(100 * time.Millisecond):
break
}
// poller should be in the terminated state
if !poller.terminated.Load() {
t.Errorf("poller was not terminated")
}
}
// Tests that the poller backs off in 2,4,8,etc second increments to a variety of errors
func TestPollerBackoff(t *testing.T) {
deviceID := "FOOBAR"