161f145176
* Add NATS JetStream support Update shopify/sarama * Fix addresses * Don't change Addresses in Defaults * Update saramajetstream * Add missing error check Keep typing events for at least one minute * Use all configured NATS addresses * Update saramajetstream * Try setting up with NATS * Make sure NATS uses own persistent directory (TODO: make this configurable) * Update go.mod/go.sum * Jetstream package * Various other refactoring * Build fixes * Config tweaks, make random jetstream storage path for CI * Disable interest policies * Try to sane default on jetstream base path * Try to use in-memory for CI * Restore storage/retention * Update nats.go dependency * Adapt changes to config * Remove unneeded TopicFor * Dep update * Revert "Remove unneeded TopicFor" This reverts commit f5a4e4a339b6f94ec215778dca22204adaa893d1. * Revert changes made to streams * Fix build problems * Update nats-server * Update go.mod/go.sum * Roomserver input API queuing using NATS * Fix topic naming * Prometheus metrics * More refactoring to remove saramajetstream * Add missing topic * Don't try to populate map that doesn't exist * Roomserver output topic * Update go.mod/go.sum * Message acknowledgements * Ack tweaks * Try to resume transaction re-sends * Try to resume transaction re-sends * Update to matrix-org/gomatrixserverlib@91dadfb * Remove internal.PartitionStorer from components that don't consume keychanges * Try to reduce re-allocations a bit in resolveConflictsV2 * Tweak delivery options on RS input * Publish send-to-device messages into correct JetStream subject * Async and sync roomserver input * Update dendrite-config.yaml * Remove roomserver tests for now (they need rewriting) * Remove roomserver test again (was merged back in) * Update documentation * Docker updates * More Docker updates * Update Docker readme again * Fix lint issues * Send final event in `processEvent` synchronously (since this might stop Sytest from being so upset) * Don't report event rejection errors via `/send`, since apparently this is upsetting tests that don't expect that * Go 1.16 instead of Go 1.13 for upgrade tests and Complement * Revert "Don't report event rejection errors via `/send`, since apparently this is upsetting tests that don't expect that" This reverts commit 368675283fc44501f227639811bdb16dd5deef8c. * Don't report any errors on `/send` to see what fun that creates * Fix panics on closed channel sends * Enforce state key matches sender * Do the same for leave * Various tweaks to make tests happier Squashed commit of the following: commit 13f9028e7a63662759ce7c55504a9d2423058668 Author: Neil Alexander <neilalexander@users.noreply.github.com> Date: Tue Jan 4 15:47:14 2022 +0000 Do the same for leave commit e6be7f05c349fafbdddfe818337a17a60c867be1 Author: Neil Alexander <neilalexander@users.noreply.github.com> Date: Tue Jan 4 15:33:42 2022 +0000 Enforce state key matches sender commit 85ede6d64bf10ce9b91cdd6d80f87350ee55242f Author: Neil Alexander <neilalexander@users.noreply.github.com> Date: Tue Jan 4 14:07:04 2022 +0000 Fix panics on closed channel sends commit 9755494a98bed62450f8001d8128e40481d27e15 Author: Neil Alexander <neilalexander@users.noreply.github.com> Date: Tue Jan 4 13:38:22 2022 +0000 Don't report any errors on `/send` to see what fun that creates commit 3bb4f87b5dd56882febb4db5621db484c8789b7c Author: Neil Alexander <neilalexander@users.noreply.github.com> Date: Tue Jan 4 13:00:26 2022 +0000 Revert "Don't report event rejection errors via `/send`, since apparently this is upsetting tests that don't expect that" This reverts commit 368675283fc44501f227639811bdb16dd5deef8c. commit fe2673ed7be9559eaca134424e403a4faca100b0 Author: Neil Alexander <neilalexander@users.noreply.github.com> Date: Tue Jan 4 12:09:34 2022 +0000 Go 1.16 instead of Go 1.13 for upgrade tests and Complement commit 368675283fc44501f227639811bdb16dd5deef8c Author: Neil Alexander <neilalexander@users.noreply.github.com> Date: Tue Jan 4 11:51:45 2022 +0000 Don't report event rejection errors via `/send`, since apparently this is upsetting tests that don't expect that commit b028dfc08577bcf52e6cb498026e15fa5d46d07c Author: Neil Alexander <neilalexander@users.noreply.github.com> Date: Tue Jan 4 10:29:08 2022 +0000 Send final event in `processEvent` synchronously (since this might stop Sytest from being so upset) * Merge in NATS Server v2.6.6 and nats.go v1.13 into the in-process connection fork * Add `jetstream.WithJetStreamMessage` to make ack/nak-ing less messy, use process context in consumers * Fix consumer component name in federation API * Add comment explaining where streams are defined * Tweaks to roomserver input with comments * Finish that sentence that I apparently forgot to finish in INSTALL.md * Bump version number of config to 2 * Add comments around asynchronous sends to roomserver in processEventWithMissingState * More useful error message when the config version does not match * Set version in generate-config * Fix version in config.Defaults Co-authored-by: Neil Alexander <neilalexander@users.noreply.github.com>
280 lines
9.5 KiB
Go
280 lines
9.5 KiB
Go
// Copyright 2017 Vector Creations Ltd
|
|
// Copyright 2018 New Vector Ltd
|
|
// Copyright 2019-2020 The Matrix.org Foundation C.I.C.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package input
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"fmt"
|
|
"time"
|
|
|
|
"github.com/matrix-org/dendrite/internal/eventutil"
|
|
"github.com/matrix-org/dendrite/roomserver/api"
|
|
"github.com/matrix-org/dendrite/roomserver/internal/helpers"
|
|
"github.com/matrix-org/dendrite/roomserver/state"
|
|
"github.com/matrix-org/dendrite/roomserver/types"
|
|
"github.com/matrix-org/gomatrixserverlib"
|
|
"github.com/matrix-org/util"
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"github.com/sirupsen/logrus"
|
|
)
|
|
|
|
func init() {
|
|
prometheus.MustRegister(processRoomEventDuration)
|
|
}
|
|
|
|
var processRoomEventDuration = prometheus.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Namespace: "dendrite",
|
|
Subsystem: "roomserver",
|
|
Name: "processroomevent_duration_millis",
|
|
Help: "How long it takes the roomserver to process an event",
|
|
Buckets: []float64{ // milliseconds
|
|
5, 10, 25, 50, 75, 100, 250, 500,
|
|
1000, 2000, 3000, 4000, 5000, 6000,
|
|
7000, 8000, 9000, 10000, 15000, 20000,
|
|
},
|
|
},
|
|
[]string{"room_id"},
|
|
)
|
|
|
|
// processRoomEvent can only be called once at a time
|
|
//
|
|
// TODO(#375): This should be rewritten to allow concurrent calls. The
|
|
// difficulty is in ensuring that we correctly annotate events with the correct
|
|
// state deltas when sending to kafka streams
|
|
// TODO: Break up function - we should probably do transaction ID checks before calling this.
|
|
// nolint:gocyclo
|
|
func (r *Inputer) processRoomEvent(
|
|
ctx context.Context,
|
|
input *api.InputRoomEvent,
|
|
) (err error) {
|
|
// Measure how long it takes to process this event.
|
|
started := time.Now()
|
|
defer func() {
|
|
timetaken := time.Since(started)
|
|
processRoomEventDuration.With(prometheus.Labels{
|
|
"room_id": input.Event.RoomID(),
|
|
}).Observe(float64(timetaken.Milliseconds()))
|
|
}()
|
|
|
|
// Parse and validate the event JSON
|
|
headered := input.Event
|
|
event := headered.Unwrap()
|
|
|
|
// if we have already got this event then do not process it again, if the input kind is an outlier.
|
|
// Outliers contain no extra information which may warrant a re-processing.
|
|
if input.Kind == api.KindOutlier {
|
|
evs, err2 := r.DB.EventsFromIDs(ctx, []string{event.EventID()})
|
|
if err2 == nil && len(evs) == 1 {
|
|
// check hash matches if we're on early room versions where the event ID was a random string
|
|
idFormat, err2 := headered.RoomVersion.EventIDFormat()
|
|
if err2 == nil {
|
|
switch idFormat {
|
|
case gomatrixserverlib.EventIDFormatV1:
|
|
if bytes.Equal(event.EventReference().EventSHA256, evs[0].EventReference().EventSHA256) {
|
|
util.GetLogger(ctx).WithField("event_id", event.EventID()).Infof("Already processed event; ignoring")
|
|
return nil
|
|
}
|
|
default:
|
|
util.GetLogger(ctx).WithField("event_id", event.EventID()).Infof("Already processed event; ignoring")
|
|
return nil
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Check that the event passes authentication checks and work out
|
|
// the numeric IDs for the auth events.
|
|
isRejected := false
|
|
authEventNIDs, rejectionErr := helpers.CheckAuthEvents(ctx, r.DB, headered, input.AuthEventIDs)
|
|
if rejectionErr != nil {
|
|
logrus.WithError(rejectionErr).WithField("event_id", event.EventID()).WithField("auth_event_ids", input.AuthEventIDs).Error("helpers.CheckAuthEvents failed for event, rejecting event")
|
|
isRejected = true
|
|
}
|
|
|
|
var softfail bool
|
|
if input.Kind == api.KindNew {
|
|
// Check that the event passes authentication checks based on the
|
|
// current room state.
|
|
softfail, err = helpers.CheckForSoftFail(ctx, r.DB, headered, input.StateEventIDs)
|
|
if err != nil {
|
|
logrus.WithFields(logrus.Fields{
|
|
"event_id": event.EventID(),
|
|
"type": event.Type(),
|
|
"room": event.RoomID(),
|
|
}).WithError(err).Info("Error authing soft-failed event")
|
|
}
|
|
}
|
|
|
|
// Store the event.
|
|
_, _, stateAtEvent, redactionEvent, redactedEventID, err := r.DB.StoreEvent(ctx, event, authEventNIDs, isRejected)
|
|
if err != nil {
|
|
return fmt.Errorf("r.DB.StoreEvent: %w", err)
|
|
}
|
|
|
|
// if storing this event results in it being redacted then do so.
|
|
if !isRejected && redactedEventID == event.EventID() {
|
|
r, rerr := eventutil.RedactEvent(redactionEvent, event)
|
|
if rerr != nil {
|
|
return fmt.Errorf("eventutil.RedactEvent: %w", rerr)
|
|
}
|
|
event = r
|
|
}
|
|
|
|
// For outliers we can stop after we've stored the event itself as it
|
|
// doesn't have any associated state to store and we don't need to
|
|
// notify anyone about it.
|
|
if input.Kind == api.KindOutlier {
|
|
logrus.WithFields(logrus.Fields{
|
|
"event_id": event.EventID(),
|
|
"type": event.Type(),
|
|
"room": event.RoomID(),
|
|
"sender": event.Sender(),
|
|
}).Debug("Stored outlier")
|
|
return nil
|
|
}
|
|
|
|
roomInfo, err := r.DB.RoomInfo(ctx, event.RoomID())
|
|
if err != nil {
|
|
return fmt.Errorf("r.DB.RoomInfo: %w", err)
|
|
}
|
|
if roomInfo == nil {
|
|
return fmt.Errorf("r.DB.RoomInfo missing for room %s", event.RoomID())
|
|
}
|
|
|
|
if stateAtEvent.BeforeStateSnapshotNID == 0 {
|
|
// We haven't calculated a state for this event yet.
|
|
// Lets calculate one.
|
|
err = r.calculateAndSetState(ctx, input, *roomInfo, &stateAtEvent, event, isRejected)
|
|
if err != nil && input.Kind != api.KindOld {
|
|
return fmt.Errorf("r.calculateAndSetState: %w", err)
|
|
}
|
|
}
|
|
|
|
// We stop here if the event is rejected: We've stored it but won't update forward extremities or notify anyone about it.
|
|
if isRejected || softfail {
|
|
logrus.WithFields(logrus.Fields{
|
|
"event_id": event.EventID(),
|
|
"type": event.Type(),
|
|
"room": event.RoomID(),
|
|
"soft_fail": softfail,
|
|
"sender": event.Sender(),
|
|
}).Debug("Stored rejected event")
|
|
return rejectionErr
|
|
}
|
|
|
|
switch input.Kind {
|
|
case api.KindNew:
|
|
if err = r.updateLatestEvents(
|
|
ctx, // context
|
|
roomInfo, // room info for the room being updated
|
|
stateAtEvent, // state at event (below)
|
|
event, // event
|
|
input.SendAsServer, // send as server
|
|
input.TransactionID, // transaction ID
|
|
input.HasState, // rewrites state?
|
|
); err != nil {
|
|
return fmt.Errorf("r.updateLatestEvents: %w", err)
|
|
}
|
|
case api.KindOld:
|
|
err = r.WriteOutputEvents(event.RoomID(), []api.OutputEvent{
|
|
{
|
|
Type: api.OutputTypeOldRoomEvent,
|
|
OldRoomEvent: &api.OutputOldRoomEvent{
|
|
Event: headered,
|
|
},
|
|
},
|
|
})
|
|
if err != nil {
|
|
return fmt.Errorf("r.WriteOutputEvents (old): %w", err)
|
|
}
|
|
}
|
|
|
|
// processing this event resulted in an event (which may not be the one we're processing)
|
|
// being redacted. We are guaranteed to have both sides (the redaction/redacted event),
|
|
// so notify downstream components to redact this event - they should have it if they've
|
|
// been tracking our output log.
|
|
if redactedEventID != "" {
|
|
err = r.WriteOutputEvents(event.RoomID(), []api.OutputEvent{
|
|
{
|
|
Type: api.OutputTypeRedactedEvent,
|
|
RedactedEvent: &api.OutputRedactedEvent{
|
|
RedactedEventID: redactedEventID,
|
|
RedactedBecause: redactionEvent.Headered(headered.RoomVersion),
|
|
},
|
|
},
|
|
})
|
|
if err != nil {
|
|
return fmt.Errorf("r.WriteOutputEvents (redactions): %w", err)
|
|
}
|
|
}
|
|
|
|
// Update the extremities of the event graph for the room
|
|
return nil
|
|
}
|
|
|
|
func (r *Inputer) calculateAndSetState(
|
|
ctx context.Context,
|
|
input *api.InputRoomEvent,
|
|
roomInfo types.RoomInfo,
|
|
stateAtEvent *types.StateAtEvent,
|
|
event *gomatrixserverlib.Event,
|
|
isRejected bool,
|
|
) error {
|
|
var err error
|
|
roomState := state.NewStateResolution(r.DB, roomInfo)
|
|
|
|
if input.HasState && !isRejected {
|
|
// Check here if we think we're in the room already.
|
|
stateAtEvent.Overwrite = true
|
|
var joinEventNIDs []types.EventNID
|
|
// Request join memberships only for local users only.
|
|
if joinEventNIDs, err = r.DB.GetMembershipEventNIDsForRoom(ctx, roomInfo.RoomNID, true, true); err == nil {
|
|
// If we have no local users that are joined to the room then any state about
|
|
// the room that we have is quite possibly out of date. Therefore in that case
|
|
// we should overwrite it rather than merge it.
|
|
stateAtEvent.Overwrite = len(joinEventNIDs) == 0
|
|
}
|
|
|
|
// We've been told what the state at the event is so we don't need to calculate it.
|
|
// Check that those state events are in the database and store the state.
|
|
var entries []types.StateEntry
|
|
if entries, err = r.DB.StateEntriesForEventIDs(ctx, input.StateEventIDs); err != nil {
|
|
return fmt.Errorf("r.DB.StateEntriesForEventIDs: %w", err)
|
|
}
|
|
entries = types.DeduplicateStateEntries(entries)
|
|
|
|
if stateAtEvent.BeforeStateSnapshotNID, err = r.DB.AddState(ctx, roomInfo.RoomNID, nil, entries); err != nil {
|
|
return fmt.Errorf("r.DB.AddState: %w", err)
|
|
}
|
|
} else {
|
|
stateAtEvent.Overwrite = false
|
|
|
|
// We haven't been told what the state at the event is so we need to calculate it from the prev_events
|
|
if stateAtEvent.BeforeStateSnapshotNID, err = roomState.CalculateAndStoreStateBeforeEvent(ctx, event, isRejected); err != nil {
|
|
return fmt.Errorf("roomState.CalculateAndStoreStateBeforeEvent: %w", err)
|
|
}
|
|
}
|
|
|
|
err = r.DB.SetState(ctx, stateAtEvent.EventNID, stateAtEvent.BeforeStateSnapshotNID)
|
|
if err != nil {
|
|
return fmt.Errorf("r.DB.SetState: %w", err)
|
|
}
|
|
return nil
|
|
}
|