From 7a1aa04bbbc9f95442c8850ef61d1d58bb12df74 Mon Sep 17 00:00:00 2001 From: kim <89579420+NyaaaWhatsUpDoc@users.noreply.github.com> Date: Sun, 25 Sep 2022 12:09:41 +0100 Subject: [PATCH] [bugfix] update thread iterators to not use recursion (#851) * update thread iterators to not use recursion, rewrote both Signed-off-by: kim * fix endless descendant deref, don't error if fetching existing status Signed-off-by: kim * don't refetch remote ancestor statuses, improve descendant iter commenting Signed-off-by: kim * move collection page next logic so we capture first page of entities Signed-off-by: kim * improve log format argument quoting Signed-off-by: kim * improve code commenting of collection paging Signed-off-by: kim * only dereference announce's originating status if _not_ local. update DereferenceThread() signature. cleanup searchStatusByURI() Signed-off-by: kim Signed-off-by: kim --- internal/federation/dereference.go | 4 +- internal/federation/dereferencing/announce.go | 42 +- .../federation/dereferencing/dereferencer.go | 2 +- internal/federation/dereferencing/status.go | 3 +- internal/federation/dereferencing/thread.go | 409 ++++++++++-------- internal/federation/federator.go | 2 +- internal/processing/search.go | 40 +- 7 files changed, 299 insertions(+), 203 deletions(-) diff --git a/internal/federation/dereference.go b/internal/federation/dereference.go index 705cdbd19..6095a4d52 100644 --- a/internal/federation/dereference.go +++ b/internal/federation/dereference.go @@ -39,8 +39,8 @@ func (f *federator) EnrichRemoteStatus(ctx context.Context, username string, sta return f.dereferencer.EnrichRemoteStatus(ctx, username, status, includeParent) } -func (f *federator) DereferenceRemoteThread(ctx context.Context, username string, statusIRI *url.URL) error { - return f.dereferencer.DereferenceThread(ctx, username, statusIRI) +func (f *federator) DereferenceRemoteThread(ctx context.Context, username string, statusIRI *url.URL, status *gtsmodel.Status, statusable ap.Statusable) { + f.dereferencer.DereferenceThread(ctx, username, statusIRI, status, statusable) } func (f *federator) GetRemoteInstance(ctx context.Context, username string, remoteInstanceURI *url.URL) (*gtsmodel.Instance, error) { diff --git a/internal/federation/dereferencing/announce.go b/internal/federation/dereferencing/announce.go index c740bb20a..144ddcb13 100644 --- a/internal/federation/dereferencing/announce.go +++ b/internal/federation/dereferencing/announce.go @@ -24,31 +24,50 @@ "fmt" "net/url" + "github.com/superseriousbusiness/gotosocial/internal/config" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" ) func (d *deref) DereferenceAnnounce(ctx context.Context, announce *gtsmodel.Status, requestingUsername string) error { - if announce.BoostOf == nil || announce.BoostOf.URI == "" { + if announce.BoostOf == nil { // we can't do anything unfortunately return errors.New("DereferenceAnnounce: no URI to dereference") } - boostedStatusURI, err := url.Parse(announce.BoostOf.URI) + // Parse the boosted status' URI + boostedURI, err := url.Parse(announce.BoostOf.URI) if err != nil { return fmt.Errorf("DereferenceAnnounce: couldn't parse boosted status URI %s: %s", announce.BoostOf.URI, err) } - if blocked, err := d.db.IsDomainBlocked(ctx, boostedStatusURI.Host); blocked || err != nil { - return fmt.Errorf("DereferenceAnnounce: domain %s is blocked", boostedStatusURI.Host) + + // Check whether the originating status is from a blocked host + if blocked, err := d.db.IsDomainBlocked(ctx, boostedURI.Host); blocked || err != nil { + return fmt.Errorf("DereferenceAnnounce: domain %s is blocked", boostedURI.Host) } - // dereference statuses in the thread of the boosted status - if err := d.DereferenceThread(ctx, requestingUsername, boostedStatusURI); err != nil { - return fmt.Errorf("DereferenceAnnounce: error dereferencing thread of boosted status: %s", err) - } + var boostedStatus *gtsmodel.Status - boostedStatus, _, err := d.GetRemoteStatus(ctx, requestingUsername, boostedStatusURI, false, true) - if err != nil { - return fmt.Errorf("DereferenceAnnounce: error dereferencing remote status with id %s: %s", announce.BoostOf.URI, err) + if boostedURI.Host == config.GetHost() { + // This is a local status, fetch from the database + status, err := d.db.GetStatusByURI(ctx, boostedURI.String()) + if err != nil { + return fmt.Errorf("DereferenceAnnounce: error fetching local status %q: %v", announce.BoostOf.URI, err) + } + + // Set boosted status + boostedStatus = status + } else { + // This is a boost of a remote status, we need to dereference it. + status, statusable, err := d.GetRemoteStatus(ctx, requestingUsername, boostedURI, true, true) + if err != nil { + return fmt.Errorf("DereferenceAnnounce: error dereferencing remote status with id %s: %s", announce.BoostOf.URI, err) + } + + // Dereference all statuses in the thread of the boosted status + d.DereferenceThread(ctx, requestingUsername, boostedURI, status, statusable) + + // Set boosted status + boostedStatus = status } announce.Content = boostedStatus.Content @@ -65,5 +84,6 @@ func (d *deref) DereferenceAnnounce(ctx context.Context, announce *gtsmodel.Stat announce.Replyable = boostedStatus.Replyable announce.Likeable = boostedStatus.Likeable announce.BoostOf = boostedStatus + return nil } diff --git a/internal/federation/dereferencing/dereferencer.go b/internal/federation/dereferencing/dereferencer.go index 0fad2405e..331df3215 100644 --- a/internal/federation/dereferencing/dereferencer.go +++ b/internal/federation/dereferencing/dereferencer.go @@ -44,7 +44,7 @@ type Dereferencer interface { GetRemoteEmoji(ctx context.Context, requestingUsername string, remoteURL string, shortcode string, id string, emojiURI string, ai *media.AdditionalEmojiInfo) (*media.ProcessingEmoji, error) DereferenceAnnounce(ctx context.Context, announce *gtsmodel.Status, requestingUsername string) error - DereferenceThread(ctx context.Context, username string, statusIRI *url.URL) error + DereferenceThread(ctx context.Context, username string, statusIRI *url.URL, status *gtsmodel.Status, statusable ap.Statusable) Handshaking(ctx context.Context, username string, remoteAccountID *url.URL) bool } diff --git a/internal/federation/dereferencing/status.go b/internal/federation/dereferencing/status.go index f3b7ee96e..645910d19 100644 --- a/internal/federation/dereferencing/status.go +++ b/internal/federation/dereferencing/status.go @@ -105,7 +105,7 @@ func (d *deref) GetRemoteStatus(ctx context.Context, username string, remoteStat return nil, nil, fmt.Errorf("GetRemoteStatus: error populating status fields: %s", err) } - if err := d.db.PutStatus(ctx, gtsStatus); err != nil { + if err := d.db.PutStatus(ctx, gtsStatus); err != nil && !errors.Is(err, db.ErrAlreadyExists) { return nil, nil, fmt.Errorf("GetRemoteStatus: error putting new status: %s", err) } @@ -441,7 +441,6 @@ func (d *deref) populateStatusEmojis(ctx context.Context, status *gtsmodel.Statu Disabled: e.Disabled, VisibleInPicker: e.VisibleInPicker, }) - if err != nil { log.Errorf("populateStatusEmojis: couldn't get remote emoji %s: %s", e.URI, err) continue diff --git a/internal/federation/dereferencing/thread.go b/internal/federation/dereferencing/thread.go index 7d7431110..d15f05964 100644 --- a/internal/federation/dereferencing/thread.go +++ b/internal/federation/dereferencing/thread.go @@ -24,218 +24,287 @@ "net/url" "codeberg.org/gruf/go-kv" + "github.com/superseriousbusiness/activity/streams/vocab" "github.com/superseriousbusiness/gotosocial/internal/ap" "github.com/superseriousbusiness/gotosocial/internal/config" + "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" "github.com/superseriousbusiness/gotosocial/internal/log" "github.com/superseriousbusiness/gotosocial/internal/uris" ) +// maxIter defines how many iterations of descendants or +// ancesters we are willing to follow before returning error. +const maxIter = 1000 + // DereferenceThread takes a statusable (something that has withReplies and withInReplyTo), // and dereferences statusables in the conversation. // // This process involves working up and down the chain of replies, and parsing through the collections of IDs // presented by remote instances as part of their replies collections, and will likely involve making several calls to // multiple different hosts. -func (d *deref) DereferenceThread(ctx context.Context, username string, statusIRI *url.URL) error { +// +// This does not return error, as for robustness we do not want to error-out on a status because another further up / down has issues. +func (d *deref) DereferenceThread(ctx context.Context, username string, statusIRI *url.URL, status *gtsmodel.Status, statusable ap.Statusable) { l := log.WithFields(kv.Fields{ - {"username", username}, - {"statusIRI", statusIRI}, + {"statusIRI", status.URI}, }...) - l.Trace("entering DereferenceThread") - // if it's our status we already have everything stashed so we can bail early - if statusIRI.Host == config.GetHost() { - l.Trace("iri belongs to us, bailing") - return nil + // Log function start + l.Trace("beginning") + + // Ensure that ancestors have been fully dereferenced + if err := d.dereferenceStatusAncestors(ctx, username, status); err != nil { + l.Errorf("error dereferencing status ancestors: %v", err) + // we don't return error, we have deref'd as much as we can } - // first make sure we have this status in our db - _, statusable, err := d.GetRemoteStatus(ctx, username, statusIRI, true, false) - if err != nil { - return fmt.Errorf("DereferenceThread: error getting initial status with id %s: %s", statusIRI.String(), err) + // Ensure that descendants have been fully dereferenced + if err := d.dereferenceStatusDescendants(ctx, username, statusIRI, statusable); err != nil { + l.Errorf("error dereferencing status descendants: %v", err) + // we don't return error, we have deref'd as much as we can } - - // first iterate up through ancestors, dereferencing if necessary as we go - if err := d.iterateAncestors(ctx, username, *statusIRI); err != nil { - return fmt.Errorf("error iterating ancestors of status %s: %s", statusIRI.String(), err) - } - - // now iterate down through descendants, again dereferencing as we go - if err := d.iterateDescendants(ctx, username, *statusIRI, statusable); err != nil { - return fmt.Errorf("error iterating descendants of status %s: %s", statusIRI.String(), err) - } - - return nil } -// iterateAncestors has the goal of reaching the oldest ancestor of a given status, and stashing all statuses along the way. -func (d *deref) iterateAncestors(ctx context.Context, username string, statusIRI url.URL) error { +// dereferenceAncestors has the goal of reaching the oldest ancestor of a given status, and stashing all statuses along the way. +func (d *deref) dereferenceStatusAncestors(ctx context.Context, username string, status *gtsmodel.Status) error { + // Take ref to original + ogIRI := status.URI + + // Start log entry with fields l := log.WithFields(kv.Fields{ - {"username", username}, - {"statusIRI", statusIRI}, + {"statusIRI", ogIRI}, }...) - l.Trace("entering iterateAncestors") - // if it's our status we don't need to dereference anything so we can immediately move up the chain - if statusIRI.Host == config.GetHost() { - l.Trace("iri belongs to us, moving up to next ancestor") - - // since this is our status, we know we can extract the id from the status path - _, id, err := uris.ParseStatusesPath(&statusIRI) - if err != nil { - return err - } - - status, err := d.db.GetStatusByID(ctx, id) - if err != nil { - return err - } + // Log function start + l.Trace("beginning") + for i := 0; i < maxIter; i++ { if status.InReplyToURI == "" { // status doesn't reply to anything return nil } - nextIRI, err := url.Parse(status.URI) + // Parse this status's replied IRI + replyIRI, err := url.Parse(status.InReplyToURI) if err != nil { - return err + return fmt.Errorf("invalid status InReplyToURI %q: %w", status.InReplyToURI, err) } - return d.iterateAncestors(ctx, username, *nextIRI) - } + if replyIRI.Host == config.GetHost() { + l.Tracef("following local status ancestors: %s", status.InReplyToURI) - // If we reach here, we're looking at a remote status - _, statusable, err := d.GetRemoteStatus(ctx, username, &statusIRI, true, false) - if err != nil { - l.Debugf("couldn't get remote status %s: %s; can't iterate any more ancestors", statusIRI.String(), err) - return nil - } - - inReplyTo := ap.ExtractInReplyToURI(statusable) - if inReplyTo == nil || inReplyTo.String() == "" { - // status doesn't reply to anything - return nil - } - - // now move up to the next ancestor - return d.iterateAncestors(ctx, username, *inReplyTo) -} - -func (d *deref) iterateDescendants(ctx context.Context, username string, statusIRI url.URL, statusable ap.Statusable) error { - l := log.WithFields(kv.Fields{ - - {"username", username}, - {"statusIRI", statusIRI}, - }...) - l.Trace("entering iterateDescendants") - - // if it's our status we already have descendants stashed so we can bail early - if statusIRI.Host == config.GetHost() { - l.Trace("iri belongs to us, bailing") - return nil - } - - replies := statusable.GetActivityStreamsReplies() - if replies == nil || !replies.IsActivityStreamsCollection() { - l.Trace("no replies, bailing") - return nil - } - - repliesCollection := replies.GetActivityStreamsCollection() - if repliesCollection == nil { - l.Trace("replies collection is nil, bailing") - return nil - } - - first := repliesCollection.GetActivityStreamsFirst() - if first == nil { - l.Trace("replies collection has no first, bailing") - return nil - } - - firstPage := first.GetActivityStreamsCollectionPage() - if firstPage == nil { - l.Trace("first has no collection page, bailing") - return nil - } - - firstPageNext := firstPage.GetActivityStreamsNext() - if firstPageNext == nil || !firstPageNext.IsIRI() { - l.Trace("next is not an iri, bailing") - return nil - } - - var foundReplies int - currentPageIRI := firstPageNext.GetIRI() - -pageLoop: - for { - l.Tracef("dereferencing page %s", currentPageIRI) - collectionPage, err := d.DereferenceCollectionPage(ctx, username, currentPageIRI) - if err != nil { - l.Debugf("couldn't get remote collection page %s: %s; breaking pageLoop", currentPageIRI, err) - break pageLoop - } - - pageItems := collectionPage.GetActivityStreamsItems() - if pageItems.Len() == 0 { - // no items on this page, which means we're done - break pageLoop - } - - // have a look through items and see what we can find - for iter := pageItems.Begin(); iter != pageItems.End(); iter = iter.Next() { - // We're looking for a url to feed to GetRemoteStatus. - // Each item can be either an IRI, or a Note. - // If a note, we grab the ID from it and call it, rather than parsing the note. - var itemURI *url.URL - switch { - case iter.IsIRI(): - // iri, easy - itemURI = iter.GetIRI() - case iter.IsActivityStreamsNote(): - // note, get the id from it to use as iri - note := iter.GetActivityStreamsNote() - noteID := note.GetJSONLDId() - if noteID != nil && noteID.IsIRI() { - itemURI = noteID.GetIRI() - } - default: - // if it's not an iri or a note, we don't know how to process it - continue + // This is our status, extract ID from path + _, id, err := uris.ParseStatusesPath(replyIRI) + if err != nil { + return fmt.Errorf("invalid local status IRI %q: %w", status.InReplyToURI, err) } - if itemURI.Host == config.GetHost() { - // skip if the reply is from us -- we already have it then - continue + // Fetch this status from the database + localStatus, err := d.db.GetStatusByID(ctx, id) + if err != nil { + return fmt.Errorf("error fetching local status %q: %w", id, err) } - // we can confidently say now that we found something - foundReplies++ + // Set the fetched status + status = localStatus - // get the remote statusable and put it in the db - _, statusable, err := d.GetRemoteStatus(ctx, username, itemURI, true, false) - if err == nil { - // now iterate descendants of *that* status - if err := d.iterateDescendants(ctx, username, *itemURI, statusable); err != nil { - continue - } - } - } - - nextPage := collectionPage.GetActivityStreamsNext() - if nextPage != nil && nextPage.IsIRI() { - nextPageIRI := nextPage.GetIRI() - l.Tracef("moving on to next page %s", nextPageIRI) - currentPageIRI = nextPageIRI } else { - l.Trace("no next page, bailing") - break pageLoop + l.Tracef("following remote status ancestors: %s", status.InReplyToURI) + + // Fetch the remote status found at this IRI + remoteStatus, _, err := d.GetRemoteStatus(ctx, username, replyIRI, false, false) + if err != nil { + return fmt.Errorf("error fetching remote status %q: %w", status.InReplyToURI, err) + } + + // Set the fetched status + status = remoteStatus } } - l.Debugf("foundReplies %d", foundReplies) - return nil + return fmt.Errorf("reached %d ancestor iterations for %q", maxIter, ogIRI) +} + +func (d *deref) dereferenceStatusDescendants(ctx context.Context, username string, statusIRI *url.URL, parent ap.Statusable) error { + // Take ref to original + ogIRI := statusIRI + + // Start log entry with fields + l := log.WithFields(kv.Fields{ + {"username", username}, + {"statusIRI", ogIRI}, + }...) + + // Log function start + l.Trace("beginning") + + // frame represents a single stack frame when iteratively + // dereferencing status descendants. where statusIRI and + // statusable are of the status whose children we are to + // descend, page is the current activity streams collection + // page of entities we are on (as we often push a frame to + // stack mid-paging), and item___ are entity iterators for + // this activity streams collection page. + type frame struct { + statusIRI *url.URL + statusable ap.Statusable + page ap.CollectionPageable + itemIter vocab.ActivityStreamsItemsPropertyIterator + iterLen int + iterIdx int + } + + var ( + // current is the current stack frame + current *frame + + // stack is a list of "shelved" descendand iterator + // frames. this is pushed to when a child status frame + // is found that we need to further iterate down, and + // popped from into 'current' when that child's tree + // of further descendants is exhausted. + stack = []*frame{ + { + // Starting input is first frame + statusIRI: statusIRI, + statusable: parent, + }, + } + + // popStack will remove and return the top frame + // from the stack, or nil if currently empty. + popStack = func() *frame { + if len(stack) == 0 { + return nil + } + + // Get frame index + idx := len(stack) - 1 + + // Pop last frame + frame := stack[idx] + stack = stack[:idx] + + return frame + } + ) + +stackLoop: + for i := 0; i < maxIter; i++ { + // Pop next frame, nil means we are at end + if current = popStack(); current == nil { + return nil + } + + if current.page == nil { + // This is a local status, no looping to do + if current.statusIRI.Host == config.GetHost() { + continue stackLoop + } + + l.Tracef("following remote status descendants: %s", current.statusIRI) + + // Look for an attached status replies (as collection) + replies := current.statusable.GetActivityStreamsReplies() + if replies == nil || !replies.IsActivityStreamsCollection() { + continue stackLoop + } + + // Get the status replies collection + collection := replies.GetActivityStreamsCollection() + + // Get the "first" property of the replies collection + first := collection.GetActivityStreamsFirst() + if first == nil || !first.IsActivityStreamsCollectionPage() { + continue stackLoop + } + + // Set the first activity stream collection page + current.page = first.GetActivityStreamsCollectionPage() + } + + for /* page loop */ { + if current.itemIter == nil { + // Check this page contains any items... + items := current.page.GetActivityStreamsItems() + if current.iterLen = items.Len(); current.iterLen == 0 { + continue stackLoop + } + + // Start off the item iterator + current.itemIter = items.Begin() + current.iterIdx = -1 + } + + itemLoop: + for current.iterIdx++; current.iterIdx < current.iterLen; current.iterIdx++ { + var itemIRI *url.URL + + // Get next item iterator object + current.itemIter = current.itemIter.Next() + + switch { + // Item is already an IRI + case current.itemIter.IsIRI(): + itemIRI = current.itemIter.GetIRI() + + // Item is a note, get the note ID IRI + case current.itemIter.IsActivityStreamsNote(): + note := current.itemIter.GetActivityStreamsNote() + if id := note.GetJSONLDId(); id != nil && id.IsIRI() { + itemIRI = id.GetIRI() + } + } + + if itemIRI == nil { + // Unusable iter object + continue itemLoop + } + + if itemIRI.Host == config.GetHost() { + // This child is one of ours, + continue itemLoop + } + + // Dereference the remote status and store in the database + _, statusable, err := d.GetRemoteStatus(ctx, username, itemIRI, true, false) + if err != nil { + l.Errorf("error dereferencing remote status %q: %s", itemIRI.String(), err) + continue itemLoop + } + + // Put current and next frame at top of stack + stack = append(stack, current, &frame{ + statusIRI: itemIRI, + statusable: statusable, + }) + } + + // Item iterator is done + current.itemIter = nil + + // Get the current page's "next" property + pageNext := current.page.GetActivityStreamsNext() + if pageNext == nil || !pageNext.IsIRI() { + continue stackLoop + } + + // Get the "next" page property IRI + pageNextIRI := pageNext.GetIRI() + + // Dereference this next collection page by its IRI + collectionPage, err := d.DereferenceCollectionPage(ctx, username, pageNextIRI) + if err != nil { + l.Errorf("error dereferencing remote collection page %q: %s", pageNextIRI.String(), err) + continue stackLoop + } + + // Set the updated collection page + current.page = collectionPage + } + } + + return fmt.Errorf("reached %d descendant iterations for %q", maxIter, ogIRI.String()) } diff --git a/internal/federation/federator.go b/internal/federation/federator.go index 2f0606338..0b09144c2 100644 --- a/internal/federation/federator.go +++ b/internal/federation/federator.go @@ -53,7 +53,7 @@ type Federator interface { // If something goes wrong during authentication, nil, false, and an error will be returned. AuthenticateFederatedRequest(ctx context.Context, username string) (*url.URL, gtserror.WithCode) - DereferenceRemoteThread(ctx context.Context, username string, statusURI *url.URL) error + DereferenceRemoteThread(ctx context.Context, username string, statusURI *url.URL, status *gtsmodel.Status, statusable ap.Statusable) DereferenceAnnounce(ctx context.Context, announce *gtsmodel.Status, requestingUsername string) error GetRemoteAccount(ctx context.Context, params dereferencing.GetRemoteAccountParams) (*gtsmodel.Account, error) diff --git a/internal/processing/search.go b/internal/processing/search.go index 3d7f3e56e..8bb2224a5 100644 --- a/internal/processing/search.go +++ b/internal/processing/search.go @@ -140,28 +140,36 @@ func (p *processor) SearchGet(ctx context.Context, authed *oauth.Auth, search *a } func (p *processor) searchStatusByURI(ctx context.Context, authed *oauth.Auth, uri *url.URL, resolve bool) (*gtsmodel.Status, error) { - l := log.WithFields(kv.Fields{ - {"uri", uri.String()}, - {"resolve", resolve}, - }...) + // Calculate URI string once + uriStr := uri.String() - if maybeStatus, err := p.db.GetStatusByURI(ctx, uri.String()); err == nil { - return maybeStatus, nil - } else if maybeStatus, err := p.db.GetStatusByURL(ctx, uri.String()); err == nil { - return maybeStatus, nil + // Look for status locally (by URI), we only accept "not found" errors. + status, err := p.db.GetStatusByURI(ctx, uriStr) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return nil, fmt.Errorf("searchStatusByURI: error fetching status %q: %v", uriStr, err) + } else if err == nil { + return status, nil + } + + // Again, look for status locally (by URL), we only accept "not found" errors. + status, err = p.db.GetStatusByURL(ctx, uriStr) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return nil, fmt.Errorf("searchStatusByURI: error fetching status %q: %v", uriStr, err) + } else if err == nil { + return status, nil } - // we don't have it locally so dereference it if we're allowed to if resolve { - status, _, err := p.federator.GetRemoteStatus(ctx, authed.Account.Username, uri, false, true) - if err == nil { - if err := p.federator.DereferenceRemoteThread(ctx, authed.Account.Username, uri); err != nil { - // try to deref the thread while we're here - l.Debugf("searchStatusByURI: error dereferencing remote thread: %s", err) - } - return status, nil + // This is a non-local status and we're allowed to resolve, so dereference it + status, statusable, err := p.federator.GetRemoteStatus(ctx, authed.Account.Username, uri, true, true) + if err != nil { + return nil, fmt.Errorf("searchStatusByURI: error fetching remote status %q: %v", uriStr, err) } + + // Attempt to dereference the status thread while we are here + p.federator.DereferenceRemoteThread(ctx, authed.Account.Username, uri, status, statusable) } + return nil, nil }