2023-03-12 16:00:57 +01:00
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
2021-08-10 13:32:39 +02:00
package dereferencing
import (
"context"
"encoding/json"
"errors"
2022-01-16 18:52:55 +01:00
"io"
2021-08-10 13:32:39 +02:00
"net/url"
2022-01-24 18:12:04 +01:00
"time"
2021-08-10 13:32:39 +02:00
2021-11-13 17:29:43 +01:00
"github.com/superseriousbusiness/activity/streams"
"github.com/superseriousbusiness/activity/streams/vocab"
2021-08-10 13:32:39 +02:00
"github.com/superseriousbusiness/gotosocial/internal/ap"
2022-08-20 22:47:19 +02:00
"github.com/superseriousbusiness/gotosocial/internal/config"
2022-06-11 11:01:34 +02:00
"github.com/superseriousbusiness/gotosocial/internal/db"
2023-05-31 10:39:54 +02:00
"github.com/superseriousbusiness/gotosocial/internal/gtscontext"
2023-05-28 14:08:35 +02:00
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
2021-08-10 13:32:39 +02:00
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/id"
2022-07-19 10:47:55 +02:00
"github.com/superseriousbusiness/gotosocial/internal/log"
2022-01-09 18:41:22 +01:00
"github.com/superseriousbusiness/gotosocial/internal/media"
2023-02-09 09:27:07 +01:00
"github.com/superseriousbusiness/gotosocial/internal/transport"
2021-08-10 13:32:39 +02:00
)
2023-05-12 11:15:54 +02:00
// accountUpToDate returns whether the given account model is both updateable (i.e.
// non-instance remote account) and whether it needs an update based on `fetched_at`.
func accountUpToDate ( account * gtsmodel . Account ) bool {
if account . IsLocal ( ) {
// Can't update local accounts.
return true
}
if ! account . CreatedAt . IsZero ( ) && account . IsInstance ( ) {
// Existing instance account. No need for update.
return true
}
// If this account was updated recently (last interval), we return as-is.
if next := account . FetchedAt . Add ( 6 * time . Hour ) ; time . Now ( ) . Before ( next ) {
return true
}
return false
}
2023-10-23 11:58:13 +02:00
// GetAccountByURI will attempt to fetch an accounts by its URI, first checking the database. In the case of a newly-met remote model, or a remote model
// whose last_fetched date is beyond a certain interval, the account will be dereferenced. In the case of dereferencing, some low-priority account information
// may be enqueued for asynchronous fetching, e.g. featured account statuses (pins). An ActivityPub object indicates the account was dereferenced.
func ( d * Dereferencer ) GetAccountByURI ( ctx context . Context , requestUser string , uri * url . URL ) ( * gtsmodel . Account , ap . Accountable , error ) {
2023-05-12 11:15:54 +02:00
// Fetch and dereference account if necessary.
account , apubAcc , err := d . getAccountByURI ( ctx ,
requestUser ,
uri ,
)
if err != nil {
return nil , nil , err
}
if apubAcc != nil {
// This account was updated, enqueue re-dereference featured posts.
d . state . Workers . Federator . MustEnqueueCtx ( ctx , func ( ctx context . Context ) {
if err := d . dereferenceAccountFeatured ( ctx , requestUser , account ) ; err != nil {
log . Errorf ( ctx , "error fetching account featured collection: %v" , err )
}
} )
}
return account , apubAcc , nil
}
// getAccountByURI is a package internal form of .GetAccountByURI() that doesn't bother dereferencing featured posts on update.
2023-10-23 11:58:13 +02:00
func ( d * Dereferencer ) getAccountByURI ( ctx context . Context , requestUser string , uri * url . URL ) ( * gtsmodel . Account , ap . Accountable , error ) {
2023-02-03 21:03:05 +01:00
var (
account * gtsmodel . Account
uriStr = uri . String ( )
err error
)
2022-11-29 10:24:55 +01:00
2023-05-31 10:39:54 +02:00
// Search the database for existing account with URI.
account , err = d . state . DB . GetAccountByURI (
// request a barebones object, it may be in the
// db but with related models not yet dereferenced.
gtscontext . SetBarebones ( ctx ) ,
uriStr ,
)
2023-02-03 21:03:05 +01:00
if err != nil && ! errors . Is ( err , db . ErrNoEntries ) {
2023-05-28 14:08:35 +02:00
return nil , nil , gtserror . Newf ( "error checking database for account %s by uri: %w" , uriStr , err )
2023-02-03 21:03:05 +01:00
}
2022-11-29 10:24:55 +01:00
2023-02-03 21:03:05 +01:00
if account == nil {
2023-05-31 10:39:54 +02:00
// Else, search the database for existing by URL.
account , err = d . state . DB . GetAccountByURL (
gtscontext . SetBarebones ( ctx ) ,
uriStr ,
)
2023-02-03 21:03:05 +01:00
if err != nil && ! errors . Is ( err , db . ErrNoEntries ) {
2023-05-28 14:08:35 +02:00
return nil , nil , gtserror . Newf ( "error checking database for account %s by url: %w" , uriStr , err )
2022-11-29 10:24:55 +01:00
}
}
2023-02-03 21:03:05 +01:00
if account == nil {
// Ensure that this is isn't a search for a local account.
if uri . Host == config . GetHost ( ) || uri . Host == config . GetAccountDomain ( ) {
2023-06-22 21:46:36 +02:00
return nil , nil , gtserror . SetUnretrievable ( err ) // this will be db.ErrNoEntries
2022-08-20 22:47:19 +02:00
}
2023-02-03 21:03:05 +01:00
// Create and pass-through a new bare-bones model for dereferencing.
2023-10-31 12:12:22 +01:00
return d . enrichAccountSafely ( ctx , requestUser , uri , & gtsmodel . Account {
2023-02-03 21:03:05 +01:00
ID : id . NewULID ( ) ,
Domain : uri . Host ,
URI : uriStr ,
2023-05-12 11:15:54 +02:00
} , nil )
}
// Check whether needs update.
if accountUpToDate ( account ) {
2023-05-31 10:39:54 +02:00
// This is existing up-to-date account, ensure it is populated.
if err := d . state . DB . PopulateAccount ( ctx , account ) ; err != nil {
log . Errorf ( ctx , "error populating existing account: %v" , err )
}
2023-05-12 11:15:54 +02:00
return account , nil , nil
2022-06-11 11:01:34 +02:00
}
2023-05-12 11:15:54 +02:00
// Try to update existing account model.
2023-10-31 12:12:22 +01:00
latest , apubAcc , err := d . enrichAccountSafely ( ctx ,
2023-05-12 11:15:54 +02:00
requestUser ,
uri ,
account ,
nil ,
)
2022-06-11 11:01:34 +02:00
if err != nil {
2023-02-17 12:02:29 +01:00
log . Errorf ( ctx , "error enriching remote account: %v" , err )
2023-05-12 11:15:54 +02:00
// Fallback to existing.
return account , nil , nil
2022-06-11 11:01:34 +02:00
}
2023-05-12 11:15:54 +02:00
return latest , apubAcc , nil
2023-02-03 21:03:05 +01:00
}
2022-06-11 11:01:34 +02:00
2023-10-23 11:58:13 +02:00
// GetAccountByUsernameDomain will attempt to fetch an accounts by its username@domain, first checking the database. In the case of a newly-met remote model,
// or a remote model whose last_fetched date is beyond a certain interval, the account will be dereferenced. In the case of dereferencing, some low-priority
// account information may be enqueued for asynchronous fetching, e.g. featured account statuses (pins). An ActivityPub object indicates the account was dereferenced.
func ( d * Dereferencer ) GetAccountByUsernameDomain ( ctx context . Context , requestUser string , username string , domain string ) ( * gtsmodel . Account , ap . Accountable , error ) {
2023-10-31 12:05:17 +01:00
account , apubAcc , err := d . getAccountByUsernameDomain (
ctx ,
requestUser ,
username ,
domain ,
)
if err != nil {
return nil , nil , err
}
if apubAcc != nil {
// This account was updated, enqueue re-dereference featured posts.
d . state . Workers . Federator . MustEnqueueCtx ( ctx , func ( ctx context . Context ) {
if err := d . dereferenceAccountFeatured ( ctx , requestUser , account ) ; err != nil {
log . Errorf ( ctx , "error fetching account featured collection: %v" , err )
}
} )
}
return account , apubAcc , nil
}
// getAccountByUsernameDomain is a package internal form
// of .GetAccountByUsernameDomain() that doesn't bother
// dereferencing featured posts.
func ( d * Dereferencer ) getAccountByUsernameDomain (
ctx context . Context ,
requestUser string ,
username string ,
domain string ,
) ( * gtsmodel . Account , ap . Accountable , error ) {
2023-02-03 21:03:05 +01:00
if domain == config . GetHost ( ) || domain == config . GetAccountDomain ( ) {
// We do local lookups using an empty domain,
// else it will fail the db search below.
domain = ""
}
2022-11-29 10:24:55 +01:00
2023-05-12 11:15:54 +02:00
// Search the database for existing account with USERNAME@DOMAIN.
2023-05-31 10:39:54 +02:00
account , err := d . state . DB . GetAccountByUsernameDomain (
// request a barebones object, it may be in the
// db but with related models not yet dereferenced.
gtscontext . SetBarebones ( ctx ) ,
username , domain ,
)
2023-02-03 21:03:05 +01:00
if err != nil && ! errors . Is ( err , db . ErrNoEntries ) {
2023-05-28 14:08:35 +02:00
return nil , nil , gtserror . Newf ( "error checking database for account %s@%s: %w" , username , domain , err )
2023-02-03 21:03:05 +01:00
}
2022-01-24 18:12:04 +01:00
2023-02-03 21:03:05 +01:00
if account == nil {
if domain == "" {
2023-05-12 11:15:54 +02:00
// failed local lookup, will be db.ErrNoEntries.
2023-06-22 21:46:36 +02:00
return nil , nil , gtserror . SetUnretrievable ( err )
2021-08-10 13:32:39 +02:00
}
2023-02-03 21:03:05 +01:00
// Create and pass-through a new bare-bones model for dereferencing.
2023-10-31 12:12:22 +01:00
account , apubAcc , err := d . enrichAccountSafely ( ctx , requestUser , nil , & gtsmodel . Account {
2023-02-03 21:03:05 +01:00
ID : id . NewULID ( ) ,
Username : username ,
Domain : domain ,
2023-05-12 11:15:54 +02:00
} , nil )
if err != nil {
return nil , nil , err
2023-05-09 12:16:10 +02:00
}
2023-05-12 11:15:54 +02:00
return account , apubAcc , nil
2021-08-10 13:32:39 +02:00
}
2023-05-12 11:15:54 +02:00
// Try to update existing account model.
latest , apubAcc , err := d . RefreshAccount ( ctx ,
requestUser ,
account ,
nil ,
false ,
)
2023-02-03 21:03:05 +01:00
if err != nil {
2023-05-12 11:15:54 +02:00
// Fallback to existing.
return account , nil , nil //nolint
2022-06-11 11:01:34 +02:00
}
2023-05-31 10:39:54 +02:00
if apubAcc == nil {
// This is existing up-to-date account, ensure it is populated.
2023-10-31 12:12:22 +01:00
if err := d . state . DB . PopulateAccount ( ctx , latest ) ; err != nil {
2023-05-31 10:39:54 +02:00
log . Errorf ( ctx , "error populating existing account: %v" , err )
}
}
2023-05-12 11:15:54 +02:00
return latest , apubAcc , nil
2023-02-03 21:03:05 +01:00
}
2023-10-23 11:58:13 +02:00
// RefreshAccount updates the given account if remote and last_fetched is beyond fetch interval, or if force is set. An updated account model is returned,
// but in the case of dereferencing, some low-priority account information may be enqueued for asynchronous fetching, e.g. featured account statuses (pins).
// An ActivityPub object indicates the account was dereferenced (i.e. updated).
func ( d * Dereferencer ) RefreshAccount ( ctx context . Context , requestUser string , account * gtsmodel . Account , apubAcc ap . Accountable , force bool ) ( * gtsmodel . Account , ap . Accountable , error ) {
2023-05-12 11:15:54 +02:00
// Check whether needs update (and not forced).
if accountUpToDate ( account ) && ! force {
return account , nil , nil
2023-05-09 12:16:10 +02:00
}
2023-05-12 11:15:54 +02:00
// Parse the URI from account.
uri , err := url . Parse ( account . URI )
2023-05-09 12:16:10 +02:00
if err != nil {
2023-05-28 14:08:35 +02:00
return nil , nil , gtserror . Newf ( "invalid account uri %q: %w" , account . URI , err )
2023-05-09 12:16:10 +02:00
}
2023-10-31 12:12:22 +01:00
// Try to update + deref passed account model.
latest , apubAcc , err := d . enrichAccountSafely ( ctx ,
2023-05-12 11:15:54 +02:00
requestUser ,
uri ,
account ,
apubAcc ,
2023-05-09 12:16:10 +02:00
)
if err != nil {
2023-05-12 11:15:54 +02:00
log . Errorf ( ctx , "error enriching remote account: %v" , err )
2023-10-31 12:12:22 +01:00
return nil , nil , gtserror . Newf ( "error enriching remote account: %w" , err )
2023-05-09 12:16:10 +02:00
}
2023-10-31 12:12:22 +01:00
if apubAcc != nil {
// This account was updated, enqueue re-dereference featured posts.
d . state . Workers . Federator . MustEnqueueCtx ( ctx , func ( ctx context . Context ) {
if err := d . dereferenceAccountFeatured ( ctx , requestUser , latest ) ; err != nil {
log . Errorf ( ctx , "error fetching account featured collection: %v" , err )
}
} )
}
2023-05-12 11:15:54 +02:00
return latest , apubAcc , nil
2023-02-03 21:03:05 +01:00
}
2023-10-23 11:58:13 +02:00
// RefreshAccountAsync enqueues the given account for an asychronous update fetching, if last_fetched is beyond fetch interval, or if forcc is set.
// This is a more optimized form of manually enqueueing .UpdateAccount() to the federation worker, since it only enqueues update if necessary.
func ( d * Dereferencer ) RefreshAccountAsync ( ctx context . Context , requestUser string , account * gtsmodel . Account , apubAcc ap . Accountable , force bool ) {
2023-05-12 11:15:54 +02:00
// Check whether needs update (and not forced).
if accountUpToDate ( account ) && ! force {
return
2022-11-29 10:24:55 +01:00
}
2023-05-12 11:15:54 +02:00
// Parse the URI from account.
uri , err := url . Parse ( account . URI )
if err != nil {
2023-05-28 14:08:35 +02:00
log . Errorf ( ctx , "invalid account uri %q: %v" , account . URI , err )
2023-05-12 11:15:54 +02:00
return
2022-06-11 11:01:34 +02:00
}
2023-05-12 11:15:54 +02:00
// Enqueue a worker function to enrich this account async.
d . state . Workers . Federator . MustEnqueueCtx ( ctx , func ( ctx context . Context ) {
2023-10-31 12:12:22 +01:00
latest , apubAcc , err := d . enrichAccountSafely ( ctx , requestUser , uri , account , apubAcc )
2023-05-12 11:15:54 +02:00
if err != nil {
log . Errorf ( ctx , "error enriching remote account: %v" , err )
return
}
2023-02-03 21:03:05 +01:00
2023-10-31 12:12:22 +01:00
if apubAcc != nil {
// This account was updated, enqueue re-dereference featured posts.
d . state . Workers . Federator . MustEnqueueCtx ( ctx , func ( ctx context . Context ) {
if err := d . dereferenceAccountFeatured ( ctx , requestUser , latest ) ; err != nil {
log . Errorf ( ctx , "error fetching account featured collection: %v" , err )
}
} )
2022-06-11 11:01:34 +02:00
}
2023-05-12 11:15:54 +02:00
} )
}
2022-06-11 11:01:34 +02:00
2023-10-31 12:12:22 +01:00
// enrichAccountSafely wraps enrichAccount() to perform
// it within the State{}.FedLocks mutexmap, which protects
// dereferencing actions with per-URI mutex locks.
func ( d * Dereferencer ) enrichAccountSafely (
ctx context . Context ,
requestUser string ,
uri * url . URL ,
account * gtsmodel . Account ,
apubAcc ap . Accountable ,
) ( * gtsmodel . Account , ap . Accountable , error ) {
// By default use account.URI
// as the per-URI deref lock.
uriStr := account . URI
if uriStr == "" {
// No URI is set yet, instead generate a faux-one from user+domain.
uriStr = "https://" + account . Domain + "/user/" + account . Username
}
// Acquire per-URI deref lock, wraping unlock
// to safely defer in case of panic, while still
// performing more granular unlocks when needed.
unlock := d . state . FedLocks . Lock ( uriStr )
unlock = doOnce ( unlock )
defer unlock ( )
// Perform status enrichment with passed vars.
latest , apubAcc , err := d . enrichAccount ( ctx ,
requestUser ,
uri ,
account ,
apubAcc ,
)
if gtserror . StatusCode ( err ) >= 400 {
// Update fetch-at to slow re-attempts.
account . FetchedAt = time . Now ( )
_ = d . state . DB . UpdateAccount ( ctx , account , "fetched_at" )
}
// Unlock now
// we're done.
unlock ( )
if errors . Is ( err , db . ErrAlreadyExists ) {
// Ensure AP model isn't set,
// otherwise this indicates WE
// enriched the account.
apubAcc = nil
// DATA RACE! We likely lost out to another goroutine
// in a call to db.Put(Account). Look again in DB by URI.
latest , err = d . state . DB . GetAccountByURI ( ctx , account . URI )
if err != nil {
err = gtserror . Newf ( "error getting account %s from database after race: %w" , uriStr , err )
}
}
return latest , apubAcc , err
}
// enrichAccount will enrich the given account, whether a
// new barebones model, or existing model from the database.
// It handles necessary dereferencing, webfingering etc.
func ( d * Dereferencer ) enrichAccount (
ctx context . Context ,
requestUser string ,
uri * url . URL ,
account * gtsmodel . Account ,
apubAcc ap . Accountable ,
) ( * gtsmodel . Account , ap . Accountable , error ) {
2023-02-10 21:15:23 +01:00
// Pre-fetch a transport for requesting username, used by later deref procedures.
2023-05-12 11:15:54 +02:00
tsport , err := d . transportController . NewTransportForUsername ( ctx , requestUser )
2023-02-09 09:27:07 +01:00
if err != nil {
2023-05-28 14:08:35 +02:00
return nil , nil , gtserror . Newf ( "couldn't create transport: %w" , err )
2023-02-09 09:27:07 +01:00
}
2023-02-03 21:03:05 +01:00
if account . Username != "" {
// A username was provided so we can attempt a webfinger, this ensures up-to-date accountdomain info.
2023-05-12 11:15:54 +02:00
accDomain , accURI , err := d . fingerRemoteAccount ( ctx , tsport , account . Username , account . Domain )
if err != nil {
if account . URI == "" {
2023-06-22 21:46:36 +02:00
// this is a new account (to us) with username@domain
// but failed webfinger, nothing more we can do.
err := gtserror . Newf ( "error webfingering account: %w" , err )
return nil , nil , gtserror . SetUnretrievable ( err )
2023-05-12 11:15:54 +02:00
}
2021-08-10 13:32:39 +02:00
2023-05-12 11:15:54 +02:00
// Simply log this error and move on, we already have an account URI.
2023-03-08 18:19:49 +01:00
log . Errorf ( ctx , "error webfingering[1] remote account %s@%s: %v" , account . Username , account . Domain , err )
2023-05-12 11:15:54 +02:00
}
2023-03-08 18:19:49 +01:00
2023-05-12 11:15:54 +02:00
if err == nil {
2023-02-09 10:34:44 +01:00
if account . Domain != accDomain {
2023-02-10 21:15:23 +01:00
// After webfinger, we now have correct account domain from which we can do a final DB check.
2023-05-12 11:15:54 +02:00
alreadyAccount , err := d . state . DB . GetAccountByUsernameDomain ( ctx , account . Username , accDomain )
2023-02-09 10:34:44 +01:00
if err != nil && ! errors . Is ( err , db . ErrNoEntries ) {
2023-05-28 14:08:35 +02:00
return nil , nil , gtserror . Newf ( "db err looking for account again after webfinger: %w" , err )
2023-02-09 10:34:44 +01:00
}
2023-05-12 11:15:54 +02:00
if alreadyAccount != nil {
2023-02-10 21:15:23 +01:00
// Enrich existing account.
2023-02-09 10:34:44 +01:00
account = alreadyAccount
}
}
2023-02-03 21:03:05 +01:00
// Update account with latest info.
account . URI = accURI . String ( )
account . Domain = accDomain
uri = accURI
2021-08-10 13:32:39 +02:00
}
2023-02-03 21:03:05 +01:00
}
2021-08-10 13:32:39 +02:00
2023-02-03 21:03:05 +01:00
if uri == nil {
// No URI provided / found, must parse from account.
uri , err = url . Parse ( account . URI )
if err != nil {
2023-05-28 14:08:35 +02:00
return nil , nil , gtserror . Newf ( "invalid uri %q: %w" , account . URI , err )
2021-08-10 13:32:39 +02:00
}
2023-02-03 21:03:05 +01:00
}
2021-08-10 13:32:39 +02:00
2023-02-10 21:15:23 +01:00
// Check whether this account URI is a blocked domain / subdomain.
2023-05-12 11:15:54 +02:00
if blocked , err := d . state . DB . IsDomainBlocked ( ctx , uri . Host ) ; err != nil {
2023-05-28 14:08:35 +02:00
return nil , nil , gtserror . Newf ( "error checking blocked domain: %w" , err )
2023-02-03 21:03:05 +01:00
} else if blocked {
2023-05-28 14:08:35 +02:00
return nil , nil , gtserror . Newf ( "%s is blocked" , uri . Host )
2023-02-03 21:03:05 +01:00
}
2022-06-11 11:01:34 +02:00
2023-02-10 21:15:23 +01:00
// Mark deref+update handshake start.
2023-02-03 21:03:05 +01:00
d . startHandshake ( requestUser , uri )
defer d . stopHandshake ( requestUser , uri )
2022-01-24 13:12:17 +01:00
2023-05-12 11:15:54 +02:00
if apubAcc == nil {
// Dereference latest version of the account.
b , err := tsport . Dereference ( ctx , uri )
if err != nil {
2023-06-22 21:46:36 +02:00
err := gtserror . Newf ( "error deferencing %s: %w" , uri , err )
return nil , nil , gtserror . SetUnretrievable ( err )
2023-05-12 11:15:54 +02:00
}
2023-06-22 21:46:36 +02:00
// Attempt to resolve ActivityPub acc from data.
2023-05-12 11:15:54 +02:00
apubAcc , err = ap . ResolveAccountable ( ctx , b )
if err != nil {
2023-05-28 14:08:35 +02:00
return nil , nil , gtserror . Newf ( "error resolving accountable from data for account %s: %w" , uri , err )
2023-05-12 11:15:54 +02:00
}
2023-06-22 21:46:36 +02:00
}
2023-05-12 11:15:54 +02:00
2023-06-22 21:46:36 +02:00
// Convert the dereferenced AP account object to our GTS model.
2023-09-23 18:44:11 +02:00
latestAcc , err := d . converter . ASRepresentationToAccount ( ctx ,
2023-06-22 21:46:36 +02:00
apubAcc ,
account . Domain ,
)
if err != nil {
return nil , nil , gtserror . Newf ( "error converting accountable to gts model for account %s: %w" , uri , err )
2021-08-10 13:32:39 +02:00
}
2023-02-03 21:03:05 +01:00
if account . Username == "" {
// No username was provided, so no webfinger was attempted earlier.
//
2023-03-19 16:45:13 +01:00
// Now we have a username we can attempt again, to ensure up-to-date
// accountDomain info. For this final attempt we should use the domain
// of the ID of the dereffed account, rather than the URI we were given.
//
// This avoids cases where we were given a URI like
// https://example.org/@someone@somewhere.else and we've been redirected
// from example.org to somewhere.else: we want to take somewhere.else
// as the accountDomain then, not the example.org we were redirected from.
// Assume the host from the returned ActivityPub representation.
idProp := apubAcc . GetJSONLDId ( )
if idProp == nil || ! idProp . IsIRI ( ) {
2023-05-28 14:08:35 +02:00
return nil , nil , gtserror . New ( "no id property found on person, or id was not an iri" )
2023-03-19 16:45:13 +01:00
}
2023-05-12 11:15:54 +02:00
// Get IRI host value.
2023-03-19 16:45:13 +01:00
accHost := idProp . GetIRI ( ) . Host
2023-03-08 18:19:49 +01:00
2023-05-12 11:15:54 +02:00
latestAcc . Domain , _ , err = d . fingerRemoteAccount ( ctx ,
tsport ,
latestAcc . Username ,
accHost ,
)
2023-03-19 16:45:13 +01:00
if err != nil {
// We still couldn't webfinger the account, so we're not certain
// what the accountDomain actually is. Still, we can make a solid
// guess that it's the Host of the ActivityPub URI of the account.
// If we're wrong, we can just try again in a couple days.
log . Errorf ( ctx , "error webfingering[2] remote account %s@%s: %v" , latestAcc . Username , accHost , err )
latestAcc . Domain = accHost
2022-09-23 21:27:35 +02:00
}
}
2023-02-03 21:03:05 +01:00
// Ensure ID is set and update fetch time.
latestAcc . ID = account . ID
latestAcc . FetchedAt = time . Now ( )
2023-06-22 21:46:36 +02:00
// Ensure the account's avatar media is populated, passing in existing to check for chages.
if err := d . fetchRemoteAccountAvatar ( ctx , tsport , account , latestAcc ) ; err != nil {
log . Errorf ( ctx , "error fetching remote avatar for account %s: %v" , uri , err )
2023-02-09 09:27:07 +01:00
}
2023-06-22 21:46:36 +02:00
// Ensure the account's avatar media is populated, passing in existing to check for chages.
if err := d . fetchRemoteAccountHeader ( ctx , tsport , account , latestAcc ) ; err != nil {
log . Errorf ( ctx , "error fetching remote header for account %s: %v" , uri , err )
2022-01-24 13:12:17 +01:00
}
2023-02-03 21:03:05 +01:00
// Fetch the latest remote account emoji IDs used in account display name/bio.
2023-03-01 18:52:44 +01:00
if _ , err = d . fetchRemoteAccountEmojis ( ctx , latestAcc , requestUser ) ; err != nil {
2023-02-17 12:02:29 +01:00
log . Errorf ( ctx , "error fetching remote emojis for account %s: %v" , uri , err )
2022-01-24 13:12:17 +01:00
}
2023-02-03 21:03:05 +01:00
if account . CreatedAt . IsZero ( ) {
// CreatedAt will be zero if no local copy was
// found in one of the GetAccountBy___() functions.
//
// Set time of creation from the last-fetched date.
latestAcc . CreatedAt = latestAcc . FetchedAt
latestAcc . UpdatedAt = latestAcc . FetchedAt
2023-03-03 09:34:34 +01:00
// This is new, put it in the database.
2023-05-12 11:15:54 +02:00
err := d . state . DB . PutAccount ( ctx , latestAcc )
2023-03-03 09:34:34 +01:00
if err != nil {
2023-05-28 14:08:35 +02:00
return nil , nil , gtserror . Newf ( "error putting in database: %w" , err )
2023-02-03 21:03:05 +01:00
}
} else {
// Set time of update from the last-fetched date.
latestAcc . UpdatedAt = latestAcc . FetchedAt
// Use existing account values.
latestAcc . CreatedAt = account . CreatedAt
latestAcc . Language = account . Language
// This is an existing account, update the model in the database.
2023-05-12 11:15:54 +02:00
if err := d . state . DB . UpdateAccount ( ctx , latestAcc ) ; err != nil {
2023-05-28 14:08:35 +02:00
return nil , nil , gtserror . Newf ( "error updating database: %w" , err )
2022-01-25 11:21:22 +01:00
}
2022-01-24 13:12:17 +01:00
}
2023-05-12 11:15:54 +02:00
return latestAcc , apubAcc , nil
2021-08-10 13:32:39 +02:00
}
2023-10-23 11:58:13 +02:00
func ( d * Dereferencer ) fetchRemoteAccountAvatar ( ctx context . Context , tsport transport . Transport , existing , latestAcc * gtsmodel . Account ) error {
2023-07-12 13:20:15 +02:00
if latestAcc . AvatarRemoteURL == "" {
// No avatar set on newest model, leave
// latest avatar attachment ID empty.
2023-06-22 21:46:36 +02:00
return nil
}
2023-07-12 13:20:15 +02:00
// By default we keep the previous media attachment ID. This will only
// be changed if and when we have the new media loaded into storage.
latestAcc . AvatarMediaAttachmentID = existing . AvatarMediaAttachmentID
2023-06-22 21:46:36 +02:00
2023-07-12 13:20:15 +02:00
// If we had a media attachment ID already, and the URL
// of the attachment hasn't changed from existing -> latest,
// then we may be able to just keep our existing attachment
// without having to make any remote calls.
if latestAcc . AvatarMediaAttachmentID != "" &&
existing . AvatarRemoteURL == latestAcc . AvatarRemoteURL {
// Ensure we have media attachment with the known ID.
2023-06-22 21:46:36 +02:00
media , err := d . state . DB . GetAttachmentByID ( ctx , existing . AvatarMediaAttachmentID )
if err != nil && ! errors . Is ( err , db . ErrNoEntries ) {
return gtserror . Newf ( "error getting attachment %s: %w" , existing . AvatarMediaAttachmentID , err )
}
2023-07-12 13:20:15 +02:00
// Ensure attachment has correct properties.
if media != nil && media . RemoteURL == latestAcc . AvatarRemoteURL {
// We already have the most up-to-date
// media attachment, keep using it.
2023-06-22 21:46:36 +02:00
return nil
}
}
2023-07-12 13:20:15 +02:00
// If we reach here, we know we need to fetch the most
// up-to-date version of the attachment from remote.
2023-06-22 21:46:36 +02:00
// Parse and validate the newly provided media URL.
2023-07-12 13:20:15 +02:00
avatarURI , err := url . Parse ( latestAcc . AvatarRemoteURL )
2023-02-10 21:15:23 +01:00
if err != nil {
2023-07-12 13:20:15 +02:00
return gtserror . Newf ( "error parsing url %s: %w" , latestAcc . AvatarRemoteURL , err )
2023-02-10 21:15:23 +01:00
}
// Acquire lock for derefs map.
2023-10-31 12:12:22 +01:00
unlock := d . state . FedLocks . Lock ( latestAcc . AvatarRemoteURL )
unlock = doOnce ( unlock )
2023-02-10 21:15:23 +01:00
defer unlock ( )
2023-02-13 21:19:51 +01:00
// Look for an existing dereference in progress.
2023-07-12 13:20:15 +02:00
processing , ok := d . derefAvatars [ latestAcc . AvatarRemoteURL ]
2023-02-10 21:15:23 +01:00
2023-02-13 21:19:51 +01:00
if ! ok {
var err error
2023-02-10 21:15:23 +01:00
2023-02-13 21:19:51 +01:00
// Set the media data function to dereference avatar from URI.
data := func ( ctx context . Context ) ( io . ReadCloser , int64 , error ) {
return tsport . DereferenceMedia ( ctx , avatarURI )
}
// Create new media processing request from the media manager instance.
2023-07-12 13:20:15 +02:00
processing , err = d . mediaManager . PreProcessMedia ( ctx , data , latestAcc . ID , & media . AdditionalMediaInfo {
2023-02-13 21:19:51 +01:00
Avatar : func ( ) * bool { v := true ; return & v } ( ) ,
2023-07-12 13:20:15 +02:00
RemoteURL : & latestAcc . AvatarRemoteURL ,
2023-02-13 21:19:51 +01:00
} )
if err != nil {
2023-07-12 13:20:15 +02:00
return gtserror . Newf ( "error preprocessing media for attachment %s: %w" , latestAcc . AvatarRemoteURL , err )
2023-02-13 21:19:51 +01:00
}
2023-02-10 21:15:23 +01:00
2023-02-13 21:19:51 +01:00
// Store media in map to mark as processing.
2023-07-12 13:20:15 +02:00
d . derefAvatars [ latestAcc . AvatarRemoteURL ] = processing
2023-02-13 21:19:51 +01:00
defer func ( ) {
// On exit safely remove media from map.
2023-10-31 12:12:22 +01:00
unlock := d . state . FedLocks . Lock ( latestAcc . AvatarRemoteURL )
2023-07-12 13:20:15 +02:00
delete ( d . derefAvatars , latestAcc . AvatarRemoteURL )
2023-02-13 21:19:51 +01:00
unlock ( )
} ( )
}
2023-02-10 21:15:23 +01:00
// Unlock map.
unlock ( )
// Start media attachment loading (blocking call).
if _ , err := processing . LoadAttachment ( ctx ) ; err != nil {
2023-07-12 13:20:15 +02:00
return gtserror . Newf ( "error loading attachment %s: %w" , latestAcc . AvatarRemoteURL , err )
2023-02-10 21:15:23 +01:00
}
2023-06-22 21:46:36 +02:00
// Set the newly loaded avatar media attachment ID.
2023-07-12 13:20:15 +02:00
latestAcc . AvatarMediaAttachmentID = processing . AttachmentID ( )
2023-06-22 21:46:36 +02:00
return nil
2023-02-10 21:15:23 +01:00
}
2023-10-23 11:58:13 +02:00
func ( d * Dereferencer ) fetchRemoteAccountHeader ( ctx context . Context , tsport transport . Transport , existing , latestAcc * gtsmodel . Account ) error {
2023-07-12 13:20:15 +02:00
if latestAcc . HeaderRemoteURL == "" {
// No header set on newest model, leave
// latest header attachment ID empty.
2023-06-22 21:46:36 +02:00
return nil
}
2023-07-12 13:20:15 +02:00
// By default we keep the previous media attachment ID. This will only
// be changed if and when we have the new media loaded into storage.
latestAcc . HeaderMediaAttachmentID = existing . HeaderMediaAttachmentID
2023-06-22 21:46:36 +02:00
2023-07-12 13:20:15 +02:00
// If we had a media attachment ID already, and the URL
// of the attachment hasn't changed from existing -> latest,
// then we may be able to just keep our existing attachment
// without having to make any remote calls.
if latestAcc . HeaderMediaAttachmentID != "" &&
existing . HeaderRemoteURL == latestAcc . HeaderRemoteURL {
// Ensure we have media attachment with the known ID.
2023-06-22 21:46:36 +02:00
media , err := d . state . DB . GetAttachmentByID ( ctx , existing . HeaderMediaAttachmentID )
if err != nil && ! errors . Is ( err , db . ErrNoEntries ) {
return gtserror . Newf ( "error getting attachment %s: %w" , existing . HeaderMediaAttachmentID , err )
}
2023-07-12 13:20:15 +02:00
// Ensure attachment has correct properties.
if media != nil && media . RemoteURL == latestAcc . HeaderRemoteURL {
// We already have the most up-to-date
// media attachment, keep using it.
2023-06-22 21:46:36 +02:00
return nil
}
}
2023-07-12 13:20:15 +02:00
// If we reach here, we know we need to fetch the most
// up-to-date version of the attachment from remote.
2023-06-22 21:46:36 +02:00
// Parse and validate the newly provided media URL.
2023-07-12 13:20:15 +02:00
headerURI , err := url . Parse ( latestAcc . HeaderRemoteURL )
2023-02-09 09:27:07 +01:00
if err != nil {
2023-07-12 13:20:15 +02:00
return gtserror . Newf ( "error parsing url %s: %w" , latestAcc . HeaderRemoteURL , err )
2021-08-10 13:32:39 +02:00
}
2023-02-10 21:15:23 +01:00
// Acquire lock for derefs map.
2023-10-31 12:12:22 +01:00
unlock := d . state . FedLocks . Lock ( latestAcc . HeaderRemoteURL )
unlock = doOnce ( unlock )
2023-02-10 21:15:23 +01:00
defer unlock ( )
2023-02-13 21:19:51 +01:00
// Look for an existing dereference in progress.
2023-07-12 13:20:15 +02:00
processing , ok := d . derefHeaders [ latestAcc . HeaderRemoteURL ]
2023-02-10 21:15:23 +01:00
2023-02-13 21:19:51 +01:00
if ! ok {
var err error
2022-11-11 20:27:37 +01:00
2023-06-22 21:46:36 +02:00
// Set the media data function to dereference avatar from URI.
2023-02-13 21:19:51 +01:00
data := func ( ctx context . Context ) ( io . ReadCloser , int64 , error ) {
return tsport . DereferenceMedia ( ctx , headerURI )
}
// Create new media processing request from the media manager instance.
2023-07-12 13:20:15 +02:00
processing , err = d . mediaManager . PreProcessMedia ( ctx , data , latestAcc . ID , & media . AdditionalMediaInfo {
2023-02-13 21:19:51 +01:00
Header : func ( ) * bool { v := true ; return & v } ( ) ,
2023-07-12 13:20:15 +02:00
RemoteURL : & latestAcc . HeaderRemoteURL ,
2023-02-13 21:19:51 +01:00
} )
if err != nil {
2023-07-12 13:20:15 +02:00
return gtserror . Newf ( "error preprocessing media for attachment %s: %w" , latestAcc . HeaderRemoteURL , err )
2023-02-13 21:19:51 +01:00
}
// Store media in map to mark as processing.
2023-07-12 13:20:15 +02:00
d . derefHeaders [ latestAcc . HeaderRemoteURL ] = processing
2022-01-24 18:12:04 +01:00
2023-02-13 21:19:51 +01:00
defer func ( ) {
// On exit safely remove media from map.
2023-10-31 12:12:22 +01:00
unlock := d . state . FedLocks . Lock ( latestAcc . HeaderRemoteURL )
2023-07-12 13:20:15 +02:00
delete ( d . derefHeaders , latestAcc . HeaderRemoteURL )
2023-02-13 21:19:51 +01:00
unlock ( )
} ( )
}
2023-02-10 21:15:23 +01:00
// Unlock map.
unlock ( )
// Start media attachment loading (blocking call).
if _ , err := processing . LoadAttachment ( ctx ) ; err != nil {
2023-07-12 13:20:15 +02:00
return gtserror . Newf ( "error loading attachment %s: %w" , latestAcc . HeaderRemoteURL , err )
2021-08-10 13:32:39 +02:00
}
2022-01-24 13:12:17 +01:00
2023-06-22 21:46:36 +02:00
// Set the newly loaded avatar media attachment ID.
2023-07-12 13:20:15 +02:00
latestAcc . HeaderMediaAttachmentID = processing . AttachmentID ( )
2023-06-22 21:46:36 +02:00
return nil
2021-08-10 13:32:39 +02:00
}
2022-01-24 13:12:17 +01:00
2023-10-23 11:58:13 +02:00
func ( d * Dereferencer ) fetchRemoteAccountEmojis ( ctx context . Context , targetAccount * gtsmodel . Account , requestingUsername string ) ( bool , error ) {
2022-09-26 11:56:01 +02:00
maybeEmojis := targetAccount . Emojis
maybeEmojiIDs := targetAccount . EmojiIDs
// It's possible that the account had emoji IDs set on it, but not Emojis
// themselves, depending on how it was fetched before being passed to us.
//
// If we only have IDs, fetch the emojis from the db. We know they're in
// there or else they wouldn't have IDs.
if len ( maybeEmojiIDs ) > len ( maybeEmojis ) {
2022-11-11 20:27:37 +01:00
maybeEmojis = make ( [ ] * gtsmodel . Emoji , 0 , len ( maybeEmojiIDs ) )
2022-09-26 11:56:01 +02:00
for _ , emojiID := range maybeEmojiIDs {
2023-05-12 11:15:54 +02:00
maybeEmoji , err := d . state . DB . GetEmojiByID ( ctx , emojiID )
2022-09-26 11:56:01 +02:00
if err != nil {
return false , err
}
maybeEmojis = append ( maybeEmojis , maybeEmoji )
}
}
// For all the maybe emojis we have, we either fetch them from the database
// (if we haven't already), or dereference them from the remote instance.
gotEmojis , err := d . populateEmojis ( ctx , maybeEmojis , requestingUsername )
if err != nil {
return false , err
}
// Extract the ID of each fetched or dereferenced emoji, so we can attach
// this to the account if necessary.
gotEmojiIDs := make ( [ ] string , 0 , len ( gotEmojis ) )
for _ , e := range gotEmojis {
gotEmojiIDs = append ( gotEmojiIDs , e . ID )
}
var (
changed = false // have the emojis for this account changed?
maybeLen = len ( maybeEmojis )
gotLen = len ( gotEmojis )
)
// if the length of everything is zero, this is simple:
// nothing has changed and there's nothing to do
if maybeLen == 0 && gotLen == 0 {
return changed , nil
}
// if the *amount* of emojis on the account has changed, then the got emojis
// are definitely different from the previous ones (if there were any) --
// the account has either more or fewer emojis set on it now, so take the
// discovered emojis as the new correct ones.
if maybeLen != gotLen {
changed = true
targetAccount . Emojis = gotEmojis
targetAccount . EmojiIDs = gotEmojiIDs
return changed , nil
}
// if the lengths are the same but not all of the slices are
// zero, something *might* have changed, so we have to check
// 1. did we have emojis before that we don't have now?
for _ , maybeEmoji := range maybeEmojis {
var stillPresent bool
for _ , gotEmoji := range gotEmojis {
if maybeEmoji . URI == gotEmoji . URI {
// the emoji we maybe had is still present now,
// so we can stop checking gotEmojis
stillPresent = true
break
}
}
if ! stillPresent {
// at least one maybeEmoji is no longer present in
// the got emojis, so we can stop checking now
changed = true
targetAccount . Emojis = gotEmojis
targetAccount . EmojiIDs = gotEmojiIDs
return changed , nil
}
}
// 2. do we have emojis now that we didn't have before?
for _ , gotEmoji := range gotEmojis {
var wasPresent bool
for _ , maybeEmoji := range maybeEmojis {
// check emoji IDs here as well, because unreferenced
// maybe emojis we didn't already have would not have
// had IDs set on them yet
if gotEmoji . URI == maybeEmoji . URI && gotEmoji . ID == maybeEmoji . ID {
// this got emoji was present already in the maybeEmoji,
// so we can stop checking through maybeEmojis
wasPresent = true
break
}
}
if ! wasPresent {
// at least one gotEmojis was not present in
// the maybeEmojis, so we can stop checking now
changed = true
targetAccount . Emojis = gotEmojis
targetAccount . EmojiIDs = gotEmojiIDs
return changed , nil
}
}
return changed , nil
}
2023-03-01 18:52:44 +01:00
2023-05-12 11:15:54 +02:00
// dereferenceAccountFeatured dereferences an account's featuredCollectionURI (if not empty). For each discovered status, this status will
// be dereferenced (if necessary) and marked as pinned (if necessary). Then, old pins will be removed if they're not included in new pins.
2023-10-23 11:58:13 +02:00
func ( d * Dereferencer ) dereferenceAccountFeatured ( ctx context . Context , requestUser string , account * gtsmodel . Account ) error {
2023-05-12 11:15:54 +02:00
uri , err := url . Parse ( account . FeaturedCollectionURI )
2023-03-01 18:52:44 +01:00
if err != nil {
return err
}
2023-05-12 11:15:54 +02:00
// Pre-fetch a transport for requesting username, used by later deref procedures.
tsport , err := d . transportController . NewTransportForUsername ( ctx , requestUser )
2023-03-01 18:52:44 +01:00
if err != nil {
2023-05-28 14:08:35 +02:00
return gtserror . Newf ( "couldn't create transport: %w" , err )
2023-03-01 18:52:44 +01:00
}
b , err := tsport . Dereference ( ctx , uri )
if err != nil {
return err
}
m := make ( map [ string ] interface { } )
if err := json . Unmarshal ( b , & m ) ; err != nil {
2023-05-28 14:08:35 +02:00
return gtserror . Newf ( "error unmarshalling bytes into json: %w" , err )
2023-03-01 18:52:44 +01:00
}
t , err := streams . ToType ( ctx , m )
if err != nil {
2023-05-28 14:08:35 +02:00
return gtserror . Newf ( "error resolving json into ap vocab type: %w" , err )
2023-03-01 18:52:44 +01:00
}
if t . GetTypeName ( ) != ap . ObjectOrderedCollection {
2023-05-28 14:08:35 +02:00
return gtserror . Newf ( "%s was not an OrderedCollection" , uri )
2023-03-01 18:52:44 +01:00
}
collection , ok := t . ( vocab . ActivityStreamsOrderedCollection )
if ! ok {
2023-05-28 14:08:35 +02:00
return gtserror . New ( "couldn't coerce OrderedCollection" )
2023-03-01 18:52:44 +01:00
}
items := collection . GetActivityStreamsOrderedItems ( )
if items == nil {
2023-05-28 14:08:35 +02:00
return gtserror . New ( "nil orderedItems" )
2023-03-01 18:52:44 +01:00
}
// Get previous pinned statuses (we'll need these later).
2023-05-12 11:15:54 +02:00
wasPinned , err := d . state . DB . GetAccountPinnedStatuses ( ctx , account . ID )
2023-03-01 18:52:44 +01:00
if err != nil && ! errors . Is ( err , db . ErrNoEntries ) {
2023-05-28 14:08:35 +02:00
return gtserror . Newf ( "error getting account pinned statuses: %w" , err )
2023-03-01 18:52:44 +01:00
}
statusURIs := make ( [ ] * url . URL , 0 , items . Len ( ) )
for iter := items . Begin ( ) ; iter != items . End ( ) ; iter = iter . Next ( ) {
var statusURI * url . URL
switch {
case iter . IsActivityStreamsNote ( ) :
// We got a whole Note. Extract the URI.
if note := iter . GetActivityStreamsNote ( ) ; note != nil {
if id := note . GetJSONLDId ( ) ; id != nil {
statusURI = id . GetIRI ( )
}
}
case iter . IsActivityStreamsArticle ( ) :
// We got a whole Article. Extract the URI.
if article := iter . GetActivityStreamsArticle ( ) ; article != nil {
if id := article . GetJSONLDId ( ) ; id != nil {
statusURI = id . GetIRI ( )
}
}
default :
// Try to get just the URI.
statusURI = iter . GetIRI ( )
}
if statusURI == nil {
continue
}
if statusURI . Host != uri . Host {
// If this status doesn't share a host with its featured
// collection URI, we shouldn't trust it. Just move on.
continue
}
// Already append this status URI to our slice.
// We do this here so that even if we can't get
// the status in the next part for some reason,
// we still know it was *meant* to be pinned.
statusURIs = append ( statusURIs , statusURI )
2023-05-12 11:15:54 +02:00
status , _ , err := d . getStatusByURI ( ctx , requestUser , statusURI )
2023-03-01 18:52:44 +01:00
if err != nil {
2023-05-12 11:15:54 +02:00
// We couldn't get the status, bummer. Just log + move on, we can try later.
log . Errorf ( ctx , "error getting status from featured collection %s: %v" , statusURI , err )
2023-03-01 18:52:44 +01:00
continue
}
// If the status was already pinned, we don't need to do anything.
if ! status . PinnedAt . IsZero ( ) {
continue
}
2023-05-12 11:15:54 +02:00
if status . AccountID != account . ID {
2023-03-01 18:52:44 +01:00
// Someone's pinned a status that doesn't
// belong to them, this doesn't work for us.
continue
}
if status . BoostOfID != "" {
// Someone's pinned a boost. This also
// doesn't work for us.
continue
}
// All conditions are met for this status to
// be pinned, so we can finally update it.
status . PinnedAt = time . Now ( )
2023-05-12 11:15:54 +02:00
if err := d . state . DB . UpdateStatus ( ctx , status , "pinned_at" ) ; err != nil {
log . Errorf ( ctx , "error updating status in featured collection %s: %v" , status . URI , err )
continue
2023-03-01 18:52:44 +01:00
}
}
// Now that we know which statuses are pinned, we should
// *unpin* previous pinned statuses that aren't included.
outerLoop :
for _ , status := range wasPinned {
for _ , statusURI := range statusURIs {
if status . URI == statusURI . String ( ) {
// This status is included in most recent
// pinned uris. No need to keep checking.
continue outerLoop
}
}
// Status was pinned before, but is not included
// in most recent pinned uris, so unpin it now.
status . PinnedAt = time . Time { }
2023-05-12 11:15:54 +02:00
if err := d . state . DB . UpdateStatus ( ctx , status , "pinned_at" ) ; err != nil {
log . Errorf ( ctx , "error unpinning status %s: %v" , status . URI , err )
continue
2023-03-01 18:52:44 +01:00
}
}
return nil
}