[bugfix] update thread iterators to not use recursion (#851)

* update thread iterators to not use recursion, rewrote both

Signed-off-by: kim <grufwub@gmail.com>

* fix endless descendant deref, don't error if fetching existing status

Signed-off-by: kim <grufwub@gmail.com>

* don't refetch remote ancestor statuses, improve descendant iter commenting

Signed-off-by: kim <grufwub@gmail.com>

* move collection page next logic so we capture first page of entities

Signed-off-by: kim <grufwub@gmail.com>

* improve log format argument quoting

Signed-off-by: kim <grufwub@gmail.com>

* improve code commenting of collection paging

Signed-off-by: kim <grufwub@gmail.com>

* only dereference announce's originating status if _not_ local. update DereferenceThread() signature. cleanup searchStatusByURI()

Signed-off-by: kim <grufwub@gmail.com>

Signed-off-by: kim <grufwub@gmail.com>
This commit is contained in:
kim 2022-09-25 12:09:41 +01:00 committed by GitHub
parent 78409f1985
commit 7a1aa04bbb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 299 additions and 203 deletions

View File

@ -39,8 +39,8 @@ func (f *federator) EnrichRemoteStatus(ctx context.Context, username string, sta
return f.dereferencer.EnrichRemoteStatus(ctx, username, status, includeParent)
}
func (f *federator) DereferenceRemoteThread(ctx context.Context, username string, statusIRI *url.URL) error {
return f.dereferencer.DereferenceThread(ctx, username, statusIRI)
func (f *federator) DereferenceRemoteThread(ctx context.Context, username string, statusIRI *url.URL, status *gtsmodel.Status, statusable ap.Statusable) {
f.dereferencer.DereferenceThread(ctx, username, statusIRI, status, statusable)
}
func (f *federator) GetRemoteInstance(ctx context.Context, username string, remoteInstanceURI *url.URL) (*gtsmodel.Instance, error) {

View File

@ -24,31 +24,50 @@
"fmt"
"net/url"
"github.com/superseriousbusiness/gotosocial/internal/config"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
)
func (d *deref) DereferenceAnnounce(ctx context.Context, announce *gtsmodel.Status, requestingUsername string) error {
if announce.BoostOf == nil || announce.BoostOf.URI == "" {
if announce.BoostOf == nil {
// we can't do anything unfortunately
return errors.New("DereferenceAnnounce: no URI to dereference")
}
boostedStatusURI, err := url.Parse(announce.BoostOf.URI)
// Parse the boosted status' URI
boostedURI, err := url.Parse(announce.BoostOf.URI)
if err != nil {
return fmt.Errorf("DereferenceAnnounce: couldn't parse boosted status URI %s: %s", announce.BoostOf.URI, err)
}
if blocked, err := d.db.IsDomainBlocked(ctx, boostedStatusURI.Host); blocked || err != nil {
return fmt.Errorf("DereferenceAnnounce: domain %s is blocked", boostedStatusURI.Host)
// Check whether the originating status is from a blocked host
if blocked, err := d.db.IsDomainBlocked(ctx, boostedURI.Host); blocked || err != nil {
return fmt.Errorf("DereferenceAnnounce: domain %s is blocked", boostedURI.Host)
}
// dereference statuses in the thread of the boosted status
if err := d.DereferenceThread(ctx, requestingUsername, boostedStatusURI); err != nil {
return fmt.Errorf("DereferenceAnnounce: error dereferencing thread of boosted status: %s", err)
}
var boostedStatus *gtsmodel.Status
boostedStatus, _, err := d.GetRemoteStatus(ctx, requestingUsername, boostedStatusURI, false, true)
if err != nil {
return fmt.Errorf("DereferenceAnnounce: error dereferencing remote status with id %s: %s", announce.BoostOf.URI, err)
if boostedURI.Host == config.GetHost() {
// This is a local status, fetch from the database
status, err := d.db.GetStatusByURI(ctx, boostedURI.String())
if err != nil {
return fmt.Errorf("DereferenceAnnounce: error fetching local status %q: %v", announce.BoostOf.URI, err)
}
// Set boosted status
boostedStatus = status
} else {
// This is a boost of a remote status, we need to dereference it.
status, statusable, err := d.GetRemoteStatus(ctx, requestingUsername, boostedURI, true, true)
if err != nil {
return fmt.Errorf("DereferenceAnnounce: error dereferencing remote status with id %s: %s", announce.BoostOf.URI, err)
}
// Dereference all statuses in the thread of the boosted status
d.DereferenceThread(ctx, requestingUsername, boostedURI, status, statusable)
// Set boosted status
boostedStatus = status
}
announce.Content = boostedStatus.Content
@ -65,5 +84,6 @@ func (d *deref) DereferenceAnnounce(ctx context.Context, announce *gtsmodel.Stat
announce.Replyable = boostedStatus.Replyable
announce.Likeable = boostedStatus.Likeable
announce.BoostOf = boostedStatus
return nil
}

View File

@ -44,7 +44,7 @@ type Dereferencer interface {
GetRemoteEmoji(ctx context.Context, requestingUsername string, remoteURL string, shortcode string, id string, emojiURI string, ai *media.AdditionalEmojiInfo) (*media.ProcessingEmoji, error)
DereferenceAnnounce(ctx context.Context, announce *gtsmodel.Status, requestingUsername string) error
DereferenceThread(ctx context.Context, username string, statusIRI *url.URL) error
DereferenceThread(ctx context.Context, username string, statusIRI *url.URL, status *gtsmodel.Status, statusable ap.Statusable)
Handshaking(ctx context.Context, username string, remoteAccountID *url.URL) bool
}

View File

@ -105,7 +105,7 @@ func (d *deref) GetRemoteStatus(ctx context.Context, username string, remoteStat
return nil, nil, fmt.Errorf("GetRemoteStatus: error populating status fields: %s", err)
}
if err := d.db.PutStatus(ctx, gtsStatus); err != nil {
if err := d.db.PutStatus(ctx, gtsStatus); err != nil && !errors.Is(err, db.ErrAlreadyExists) {
return nil, nil, fmt.Errorf("GetRemoteStatus: error putting new status: %s", err)
}
@ -441,7 +441,6 @@ func (d *deref) populateStatusEmojis(ctx context.Context, status *gtsmodel.Statu
Disabled: e.Disabled,
VisibleInPicker: e.VisibleInPicker,
})
if err != nil {
log.Errorf("populateStatusEmojis: couldn't get remote emoji %s: %s", e.URI, err)
continue

View File

@ -24,218 +24,287 @@
"net/url"
"codeberg.org/gruf/go-kv"
"github.com/superseriousbusiness/activity/streams/vocab"
"github.com/superseriousbusiness/gotosocial/internal/ap"
"github.com/superseriousbusiness/gotosocial/internal/config"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/uris"
)
// maxIter defines how many iterations of descendants or
// ancesters we are willing to follow before returning error.
const maxIter = 1000
// DereferenceThread takes a statusable (something that has withReplies and withInReplyTo),
// and dereferences statusables in the conversation.
//
// This process involves working up and down the chain of replies, and parsing through the collections of IDs
// presented by remote instances as part of their replies collections, and will likely involve making several calls to
// multiple different hosts.
func (d *deref) DereferenceThread(ctx context.Context, username string, statusIRI *url.URL) error {
//
// This does not return error, as for robustness we do not want to error-out on a status because another further up / down has issues.
func (d *deref) DereferenceThread(ctx context.Context, username string, statusIRI *url.URL, status *gtsmodel.Status, statusable ap.Statusable) {
l := log.WithFields(kv.Fields{
{"username", username},
{"statusIRI", statusIRI},
{"statusIRI", status.URI},
}...)
l.Trace("entering DereferenceThread")
// if it's our status we already have everything stashed so we can bail early
if statusIRI.Host == config.GetHost() {
l.Trace("iri belongs to us, bailing")
return nil
// Log function start
l.Trace("beginning")
// Ensure that ancestors have been fully dereferenced
if err := d.dereferenceStatusAncestors(ctx, username, status); err != nil {
l.Errorf("error dereferencing status ancestors: %v", err)
// we don't return error, we have deref'd as much as we can
}
// first make sure we have this status in our db
_, statusable, err := d.GetRemoteStatus(ctx, username, statusIRI, true, false)
if err != nil {
return fmt.Errorf("DereferenceThread: error getting initial status with id %s: %s", statusIRI.String(), err)
// Ensure that descendants have been fully dereferenced
if err := d.dereferenceStatusDescendants(ctx, username, statusIRI, statusable); err != nil {
l.Errorf("error dereferencing status descendants: %v", err)
// we don't return error, we have deref'd as much as we can
}
// first iterate up through ancestors, dereferencing if necessary as we go
if err := d.iterateAncestors(ctx, username, *statusIRI); err != nil {
return fmt.Errorf("error iterating ancestors of status %s: %s", statusIRI.String(), err)
}
// now iterate down through descendants, again dereferencing as we go
if err := d.iterateDescendants(ctx, username, *statusIRI, statusable); err != nil {
return fmt.Errorf("error iterating descendants of status %s: %s", statusIRI.String(), err)
}
return nil
}
// iterateAncestors has the goal of reaching the oldest ancestor of a given status, and stashing all statuses along the way.
func (d *deref) iterateAncestors(ctx context.Context, username string, statusIRI url.URL) error {
// dereferenceAncestors has the goal of reaching the oldest ancestor of a given status, and stashing all statuses along the way.
func (d *deref) dereferenceStatusAncestors(ctx context.Context, username string, status *gtsmodel.Status) error {
// Take ref to original
ogIRI := status.URI
// Start log entry with fields
l := log.WithFields(kv.Fields{
{"username", username},
{"statusIRI", statusIRI},
{"statusIRI", ogIRI},
}...)
l.Trace("entering iterateAncestors")
// if it's our status we don't need to dereference anything so we can immediately move up the chain
if statusIRI.Host == config.GetHost() {
l.Trace("iri belongs to us, moving up to next ancestor")
// since this is our status, we know we can extract the id from the status path
_, id, err := uris.ParseStatusesPath(&statusIRI)
if err != nil {
return err
}
status, err := d.db.GetStatusByID(ctx, id)
if err != nil {
return err
}
// Log function start
l.Trace("beginning")
for i := 0; i < maxIter; i++ {
if status.InReplyToURI == "" {
// status doesn't reply to anything
return nil
}
nextIRI, err := url.Parse(status.URI)
// Parse this status's replied IRI
replyIRI, err := url.Parse(status.InReplyToURI)
if err != nil {
return err
return fmt.Errorf("invalid status InReplyToURI %q: %w", status.InReplyToURI, err)
}
return d.iterateAncestors(ctx, username, *nextIRI)
}
if replyIRI.Host == config.GetHost() {
l.Tracef("following local status ancestors: %s", status.InReplyToURI)
// If we reach here, we're looking at a remote status
_, statusable, err := d.GetRemoteStatus(ctx, username, &statusIRI, true, false)
if err != nil {
l.Debugf("couldn't get remote status %s: %s; can't iterate any more ancestors", statusIRI.String(), err)
return nil
}
inReplyTo := ap.ExtractInReplyToURI(statusable)
if inReplyTo == nil || inReplyTo.String() == "" {
// status doesn't reply to anything
return nil
}
// now move up to the next ancestor
return d.iterateAncestors(ctx, username, *inReplyTo)
}
func (d *deref) iterateDescendants(ctx context.Context, username string, statusIRI url.URL, statusable ap.Statusable) error {
l := log.WithFields(kv.Fields{
{"username", username},
{"statusIRI", statusIRI},
}...)
l.Trace("entering iterateDescendants")
// if it's our status we already have descendants stashed so we can bail early
if statusIRI.Host == config.GetHost() {
l.Trace("iri belongs to us, bailing")
return nil
}
replies := statusable.GetActivityStreamsReplies()
if replies == nil || !replies.IsActivityStreamsCollection() {
l.Trace("no replies, bailing")
return nil
}
repliesCollection := replies.GetActivityStreamsCollection()
if repliesCollection == nil {
l.Trace("replies collection is nil, bailing")
return nil
}
first := repliesCollection.GetActivityStreamsFirst()
if first == nil {
l.Trace("replies collection has no first, bailing")
return nil
}
firstPage := first.GetActivityStreamsCollectionPage()
if firstPage == nil {
l.Trace("first has no collection page, bailing")
return nil
}
firstPageNext := firstPage.GetActivityStreamsNext()
if firstPageNext == nil || !firstPageNext.IsIRI() {
l.Trace("next is not an iri, bailing")
return nil
}
var foundReplies int
currentPageIRI := firstPageNext.GetIRI()
pageLoop:
for {
l.Tracef("dereferencing page %s", currentPageIRI)
collectionPage, err := d.DereferenceCollectionPage(ctx, username, currentPageIRI)
if err != nil {
l.Debugf("couldn't get remote collection page %s: %s; breaking pageLoop", currentPageIRI, err)
break pageLoop
}
pageItems := collectionPage.GetActivityStreamsItems()
if pageItems.Len() == 0 {
// no items on this page, which means we're done
break pageLoop
}
// have a look through items and see what we can find
for iter := pageItems.Begin(); iter != pageItems.End(); iter = iter.Next() {
// We're looking for a url to feed to GetRemoteStatus.
// Each item can be either an IRI, or a Note.
// If a note, we grab the ID from it and call it, rather than parsing the note.
var itemURI *url.URL
switch {
case iter.IsIRI():
// iri, easy
itemURI = iter.GetIRI()
case iter.IsActivityStreamsNote():
// note, get the id from it to use as iri
note := iter.GetActivityStreamsNote()
noteID := note.GetJSONLDId()
if noteID != nil && noteID.IsIRI() {
itemURI = noteID.GetIRI()
}
default:
// if it's not an iri or a note, we don't know how to process it
continue
// This is our status, extract ID from path
_, id, err := uris.ParseStatusesPath(replyIRI)
if err != nil {
return fmt.Errorf("invalid local status IRI %q: %w", status.InReplyToURI, err)
}
if itemURI.Host == config.GetHost() {
// skip if the reply is from us -- we already have it then
continue
// Fetch this status from the database
localStatus, err := d.db.GetStatusByID(ctx, id)
if err != nil {
return fmt.Errorf("error fetching local status %q: %w", id, err)
}
// we can confidently say now that we found something
foundReplies++
// Set the fetched status
status = localStatus
// get the remote statusable and put it in the db
_, statusable, err := d.GetRemoteStatus(ctx, username, itemURI, true, false)
if err == nil {
// now iterate descendants of *that* status
if err := d.iterateDescendants(ctx, username, *itemURI, statusable); err != nil {
continue
}
}
}
nextPage := collectionPage.GetActivityStreamsNext()
if nextPage != nil && nextPage.IsIRI() {
nextPageIRI := nextPage.GetIRI()
l.Tracef("moving on to next page %s", nextPageIRI)
currentPageIRI = nextPageIRI
} else {
l.Trace("no next page, bailing")
break pageLoop
l.Tracef("following remote status ancestors: %s", status.InReplyToURI)
// Fetch the remote status found at this IRI
remoteStatus, _, err := d.GetRemoteStatus(ctx, username, replyIRI, false, false)
if err != nil {
return fmt.Errorf("error fetching remote status %q: %w", status.InReplyToURI, err)
}
// Set the fetched status
status = remoteStatus
}
}
l.Debugf("foundReplies %d", foundReplies)
return nil
return fmt.Errorf("reached %d ancestor iterations for %q", maxIter, ogIRI)
}
func (d *deref) dereferenceStatusDescendants(ctx context.Context, username string, statusIRI *url.URL, parent ap.Statusable) error {
// Take ref to original
ogIRI := statusIRI
// Start log entry with fields
l := log.WithFields(kv.Fields{
{"username", username},
{"statusIRI", ogIRI},
}...)
// Log function start
l.Trace("beginning")
// frame represents a single stack frame when iteratively
// dereferencing status descendants. where statusIRI and
// statusable are of the status whose children we are to
// descend, page is the current activity streams collection
// page of entities we are on (as we often push a frame to
// stack mid-paging), and item___ are entity iterators for
// this activity streams collection page.
type frame struct {
statusIRI *url.URL
statusable ap.Statusable
page ap.CollectionPageable
itemIter vocab.ActivityStreamsItemsPropertyIterator
iterLen int
iterIdx int
}
var (
// current is the current stack frame
current *frame
// stack is a list of "shelved" descendand iterator
// frames. this is pushed to when a child status frame
// is found that we need to further iterate down, and
// popped from into 'current' when that child's tree
// of further descendants is exhausted.
stack = []*frame{
{
// Starting input is first frame
statusIRI: statusIRI,
statusable: parent,
},
}
// popStack will remove and return the top frame
// from the stack, or nil if currently empty.
popStack = func() *frame {
if len(stack) == 0 {
return nil
}
// Get frame index
idx := len(stack) - 1
// Pop last frame
frame := stack[idx]
stack = stack[:idx]
return frame
}
)
stackLoop:
for i := 0; i < maxIter; i++ {
// Pop next frame, nil means we are at end
if current = popStack(); current == nil {
return nil
}
if current.page == nil {
// This is a local status, no looping to do
if current.statusIRI.Host == config.GetHost() {
continue stackLoop
}
l.Tracef("following remote status descendants: %s", current.statusIRI)
// Look for an attached status replies (as collection)
replies := current.statusable.GetActivityStreamsReplies()
if replies == nil || !replies.IsActivityStreamsCollection() {
continue stackLoop
}
// Get the status replies collection
collection := replies.GetActivityStreamsCollection()
// Get the "first" property of the replies collection
first := collection.GetActivityStreamsFirst()
if first == nil || !first.IsActivityStreamsCollectionPage() {
continue stackLoop
}
// Set the first activity stream collection page
current.page = first.GetActivityStreamsCollectionPage()
}
for /* page loop */ {
if current.itemIter == nil {
// Check this page contains any items...
items := current.page.GetActivityStreamsItems()
if current.iterLen = items.Len(); current.iterLen == 0 {
continue stackLoop
}
// Start off the item iterator
current.itemIter = items.Begin()
current.iterIdx = -1
}
itemLoop:
for current.iterIdx++; current.iterIdx < current.iterLen; current.iterIdx++ {
var itemIRI *url.URL
// Get next item iterator object
current.itemIter = current.itemIter.Next()
switch {
// Item is already an IRI
case current.itemIter.IsIRI():
itemIRI = current.itemIter.GetIRI()
// Item is a note, get the note ID IRI
case current.itemIter.IsActivityStreamsNote():
note := current.itemIter.GetActivityStreamsNote()
if id := note.GetJSONLDId(); id != nil && id.IsIRI() {
itemIRI = id.GetIRI()
}
}
if itemIRI == nil {
// Unusable iter object
continue itemLoop
}
if itemIRI.Host == config.GetHost() {
// This child is one of ours,
continue itemLoop
}
// Dereference the remote status and store in the database
_, statusable, err := d.GetRemoteStatus(ctx, username, itemIRI, true, false)
if err != nil {
l.Errorf("error dereferencing remote status %q: %s", itemIRI.String(), err)
continue itemLoop
}
// Put current and next frame at top of stack
stack = append(stack, current, &frame{
statusIRI: itemIRI,
statusable: statusable,
})
}
// Item iterator is done
current.itemIter = nil
// Get the current page's "next" property
pageNext := current.page.GetActivityStreamsNext()
if pageNext == nil || !pageNext.IsIRI() {
continue stackLoop
}
// Get the "next" page property IRI
pageNextIRI := pageNext.GetIRI()
// Dereference this next collection page by its IRI
collectionPage, err := d.DereferenceCollectionPage(ctx, username, pageNextIRI)
if err != nil {
l.Errorf("error dereferencing remote collection page %q: %s", pageNextIRI.String(), err)
continue stackLoop
}
// Set the updated collection page
current.page = collectionPage
}
}
return fmt.Errorf("reached %d descendant iterations for %q", maxIter, ogIRI.String())
}

View File

@ -53,7 +53,7 @@ type Federator interface {
// If something goes wrong during authentication, nil, false, and an error will be returned.
AuthenticateFederatedRequest(ctx context.Context, username string) (*url.URL, gtserror.WithCode)
DereferenceRemoteThread(ctx context.Context, username string, statusURI *url.URL) error
DereferenceRemoteThread(ctx context.Context, username string, statusURI *url.URL, status *gtsmodel.Status, statusable ap.Statusable)
DereferenceAnnounce(ctx context.Context, announce *gtsmodel.Status, requestingUsername string) error
GetRemoteAccount(ctx context.Context, params dereferencing.GetRemoteAccountParams) (*gtsmodel.Account, error)

View File

@ -140,28 +140,36 @@ func (p *processor) SearchGet(ctx context.Context, authed *oauth.Auth, search *a
}
func (p *processor) searchStatusByURI(ctx context.Context, authed *oauth.Auth, uri *url.URL, resolve bool) (*gtsmodel.Status, error) {
l := log.WithFields(kv.Fields{
{"uri", uri.String()},
{"resolve", resolve},
}...)
// Calculate URI string once
uriStr := uri.String()
if maybeStatus, err := p.db.GetStatusByURI(ctx, uri.String()); err == nil {
return maybeStatus, nil
} else if maybeStatus, err := p.db.GetStatusByURL(ctx, uri.String()); err == nil {
return maybeStatus, nil
// Look for status locally (by URI), we only accept "not found" errors.
status, err := p.db.GetStatusByURI(ctx, uriStr)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
return nil, fmt.Errorf("searchStatusByURI: error fetching status %q: %v", uriStr, err)
} else if err == nil {
return status, nil
}
// Again, look for status locally (by URL), we only accept "not found" errors.
status, err = p.db.GetStatusByURL(ctx, uriStr)
if err != nil && !errors.Is(err, db.ErrNoEntries) {
return nil, fmt.Errorf("searchStatusByURI: error fetching status %q: %v", uriStr, err)
} else if err == nil {
return status, nil
}
// we don't have it locally so dereference it if we're allowed to
if resolve {
status, _, err := p.federator.GetRemoteStatus(ctx, authed.Account.Username, uri, false, true)
if err == nil {
if err := p.federator.DereferenceRemoteThread(ctx, authed.Account.Username, uri); err != nil {
// try to deref the thread while we're here
l.Debugf("searchStatusByURI: error dereferencing remote thread: %s", err)
}
return status, nil
// This is a non-local status and we're allowed to resolve, so dereference it
status, statusable, err := p.federator.GetRemoteStatus(ctx, authed.Account.Username, uri, true, true)
if err != nil {
return nil, fmt.Errorf("searchStatusByURI: error fetching remote status %q: %v", uriStr, err)
}
// Attempt to dereference the status thread while we are here
p.federator.DereferenceRemoteThread(ctx, authed.Account.Username, uri, status, statusable)
}
return nil, nil
}