gotosocial/internal/text/goldmark_extension.go

314 lines
8.7 KiB
Go
Raw Normal View History

// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package text
import (
"context"
"fmt"
"strings"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/regexes"
"github.com/superseriousbusiness/gotosocial/internal/util"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/renderer"
"github.com/yuin/goldmark/text"
mdutil "github.com/yuin/goldmark/util"
)
// A goldmark extension that parses potential mentions and hashtags separately from regular
// text, so that they stay as one contiguous text fragment in the AST, and then renders
// them separately too, to avoid scanning normal text for mentions and tags.
// mention and hashtag fulfil the goldmark ast.Node interface.
type mention struct {
ast.BaseInline
Segment text.Segment
}
type hashtag struct {
ast.BaseInline
Segment text.Segment
}
type emoji struct {
ast.BaseInline
Segment text.Segment
}
var (
kindMention = ast.NewNodeKind("Mention")
kindHashtag = ast.NewNodeKind("Hashtag")
kindEmoji = ast.NewNodeKind("Emoji")
)
func (n *mention) Kind() ast.NodeKind {
return kindMention
}
func (n *hashtag) Kind() ast.NodeKind {
return kindHashtag
}
func (n *emoji) Kind() ast.NodeKind {
return kindEmoji
}
// Dump can be used for debugging.
func (n *mention) Dump(source []byte, level int) {
fmt.Printf("%sMention: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source)))
}
func (n *hashtag) Dump(source []byte, level int) {
fmt.Printf("%sHashtag: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source)))
}
func (n *emoji) Dump(source []byte, level int) {
fmt.Printf("%sEmoji: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source)))
}
// newMention and newHashtag create a goldmark ast.Node from a goldmark text.Segment.
// The contained segment is used in rendering.
func newMention(s text.Segment) *mention {
return &mention{
BaseInline: ast.BaseInline{},
Segment: s,
}
}
func newHashtag(s text.Segment) *hashtag {
return &hashtag{
BaseInline: ast.BaseInline{},
Segment: s,
}
}
func newEmoji(s text.Segment) *emoji {
return &emoji{
BaseInline: ast.BaseInline{},
Segment: s,
}
}
// mentionParser and hashtagParser fulfil the goldmark parser.InlineParser interface.
type mentionParser struct{}
type hashtagParser struct{}
type emojiParser struct{}
func (p *mentionParser) Trigger() []byte {
return []byte{'@'}
}
func (p *hashtagParser) Trigger() []byte {
return []byte{'#'}
}
func (p *emojiParser) Trigger() []byte {
return []byte{':'}
}
func (p *mentionParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
before := block.PrecendingCharacter()
line, segment := block.PeekLine()
if !util.IsMentionOrHashtagBoundary(before) {
return nil
}
// unideal for performance but makes use of existing regex
loc := regexes.MentionFinder.FindIndex(line)
switch {
case loc == nil:
fallthrough
case loc[0] != 0: // fail if not found at start
return nil
default:
block.Advance(loc[1])
return newMention(segment.WithStop(segment.Start + loc[1]))
}
}
func (p *hashtagParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
before := block.PrecendingCharacter()
line, segment := block.PeekLine()
s := string(line)
if !util.IsMentionOrHashtagBoundary(before) || len(s) == 1 {
return nil
}
for i, r := range s {
switch {
case r == '#' && i == 0:
// ignore initial #
continue
case !util.IsPlausiblyInHashtag(r) && !util.IsMentionOrHashtagBoundary(r):
// Fake hashtag, don't trust it
return nil
case util.IsMentionOrHashtagBoundary(r):
if i <= 1 {
// empty
return nil
}
// End of hashtag
block.Advance(i)
return newHashtag(segment.WithStop(segment.Start + i))
}
}
// If we don't find invalid characters before the end of the line then it's all hashtag, babey
block.Advance(segment.Len())
return newHashtag(segment)
}
func (p *emojiParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
line, segment := block.PeekLine()
// unideal for performance but makes use of existing regex
loc := regexes.EmojiFinder.FindIndex(line)
switch {
case loc == nil:
fallthrough
case loc[0] != 0: // fail if not found at start
return nil
default:
block.Advance(loc[1])
return newEmoji(segment.WithStop(segment.Start + loc[1]))
}
}
// customRenderer fulfils both the renderer.NodeRenderer and goldmark.Extender interfaces.
// It is created in FromMarkdown and FromPlain to be used as a goldmark extension, and the
// fields are used to report tags and mentions to the caller for use as metadata.
type customRenderer struct {
f *formatter
ctx context.Context
parseMention gtsmodel.ParseMentionFunc
accountID string
statusID string
emojiOnly bool
result *FormatResult
}
func (r *customRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) {
reg.Register(kindMention, r.renderMention)
reg.Register(kindHashtag, r.renderHashtag)
reg.Register(kindEmoji, r.renderEmoji)
}
func (r *customRenderer) Extend(m goldmark.Markdown) {
// 1000 is set as the lowest priority, but it's arbitrary
m.Parser().AddOptions(parser.WithInlineParsers(
mdutil.Prioritized(&emojiParser{}, 1000),
))
if !r.emojiOnly {
m.Parser().AddOptions(parser.WithInlineParsers(
mdutil.Prioritized(&mentionParser{}, 1000),
mdutil.Prioritized(&hashtagParser{}, 1000),
))
}
m.Renderer().AddOptions(renderer.WithNodeRenderers(
mdutil.Prioritized(r, 1000),
))
}
// renderMention and renderHashtag take a mention or a hashtag ast.Node and render it as HTML.
func (r *customRenderer) renderMention(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
if !entering {
return ast.WalkSkipChildren, nil
}
n, ok := node.(*mention) // this function is only registered for kindMention
if !ok {
[performance] refactoring + add fave / follow / request / visibility caching (#1607) * refactor visibility checking, add caching for visibility * invalidate visibility cache items on account / status deletes * fix requester ID passed to visibility cache nil ptr * de-interface caches, fix home / public timeline caching + visibility * finish adding code comments for visibility filter * fix angry goconst linter warnings * actually finish adding filter visibility code comments for timeline functions * move home timeline status author check to after visibility * remove now-unused code * add more code comments * add TODO code comment, update printed cache start names * update printed cache names on stop * start adding separate follow(request) delete db functions, add specific visibility cache tests * add relationship type caching * fix getting local account follows / followed-bys, other small codebase improvements * simplify invalidation using cache hooks, add more GetAccountBy___() functions * fix boosting to return 404 if not boostable but no error (to not leak status ID) * remove dead code * improved placement of cache invalidation * update license headers * add example follow, follow-request config entries * add example visibility cache configuration to config file * use specific PutFollowRequest() instead of just Put() * add tests for all GetAccountBy() * add GetBlockBy() tests * update block to check primitive fields * update and finish adding Get{Account,Block,Follow,FollowRequest}By() tests * fix copy-pasted code * update envparsing test * whitespace * fix bun struct tag * add license header to gtscontext * fix old license header * improved error creation to not use fmt.Errorf() when not needed * fix various rebase conflicts, fix account test * remove commented-out code, fix-up mention caching * fix mention select bun statement * ensure mention target account populated, pass in context to customrenderer logging * remove more uncommented code, fix typeutil test * add statusfave database model caching * add status fave cache configuration * add status fave cache example config * woops, catch missed error. nice catch linter! * add back testrig panic on nil db * update example configuration to match defaults, slight tweak to cache configuration defaults * update envparsing test with new defaults * fetch followingget to use the follow target account * use accounnt.IsLocal() instead of empty domain check * use constants for the cache visibility type check * use bun.In() for notification type restriction in db query * include replies when fetching PublicTimeline() (to account for single-author threads in Visibility{}.StatusPublicTimelineable()) * use bun query building for nested select statements to ensure working with postgres * update public timeline future status checks to match visibility filter * same as previous, for home timeline * update public timeline tests to dynamically check for appropriate statuses * migrate accounts to allow unique constraint on public_key * provide minimal account with publicKey --------- Signed-off-by: kim <grufwub@gmail.com> Co-authored-by: tsmethurst <tobi.smethurst@protonmail.com>
2023-03-28 15:03:14 +02:00
log.Panic(r.ctx, "type assertion failed")
}
text := string(n.Segment.Value(source))
html := r.replaceMention(text)
// we don't have much recourse if this fails
if _, err := w.WriteString(html); err != nil {
[performance] refactoring + add fave / follow / request / visibility caching (#1607) * refactor visibility checking, add caching for visibility * invalidate visibility cache items on account / status deletes * fix requester ID passed to visibility cache nil ptr * de-interface caches, fix home / public timeline caching + visibility * finish adding code comments for visibility filter * fix angry goconst linter warnings * actually finish adding filter visibility code comments for timeline functions * move home timeline status author check to after visibility * remove now-unused code * add more code comments * add TODO code comment, update printed cache start names * update printed cache names on stop * start adding separate follow(request) delete db functions, add specific visibility cache tests * add relationship type caching * fix getting local account follows / followed-bys, other small codebase improvements * simplify invalidation using cache hooks, add more GetAccountBy___() functions * fix boosting to return 404 if not boostable but no error (to not leak status ID) * remove dead code * improved placement of cache invalidation * update license headers * add example follow, follow-request config entries * add example visibility cache configuration to config file * use specific PutFollowRequest() instead of just Put() * add tests for all GetAccountBy() * add GetBlockBy() tests * update block to check primitive fields * update and finish adding Get{Account,Block,Follow,FollowRequest}By() tests * fix copy-pasted code * update envparsing test * whitespace * fix bun struct tag * add license header to gtscontext * fix old license header * improved error creation to not use fmt.Errorf() when not needed * fix various rebase conflicts, fix account test * remove commented-out code, fix-up mention caching * fix mention select bun statement * ensure mention target account populated, pass in context to customrenderer logging * remove more uncommented code, fix typeutil test * add statusfave database model caching * add status fave cache configuration * add status fave cache example config * woops, catch missed error. nice catch linter! * add back testrig panic on nil db * update example configuration to match defaults, slight tweak to cache configuration defaults * update envparsing test with new defaults * fetch followingget to use the follow target account * use accounnt.IsLocal() instead of empty domain check * use constants for the cache visibility type check * use bun.In() for notification type restriction in db query * include replies when fetching PublicTimeline() (to account for single-author threads in Visibility{}.StatusPublicTimelineable()) * use bun query building for nested select statements to ensure working with postgres * update public timeline future status checks to match visibility filter * same as previous, for home timeline * update public timeline tests to dynamically check for appropriate statuses * migrate accounts to allow unique constraint on public_key * provide minimal account with publicKey --------- Signed-off-by: kim <grufwub@gmail.com> Co-authored-by: tsmethurst <tobi.smethurst@protonmail.com>
2023-03-28 15:03:14 +02:00
log.Errorf(r.ctx, "error writing HTML: %s", err)
}
return ast.WalkSkipChildren, nil
}
func (r *customRenderer) renderHashtag(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
if !entering {
return ast.WalkSkipChildren, nil
}
n, ok := node.(*hashtag) // this function is only registered for kindHashtag
if !ok {
[performance] refactoring + add fave / follow / request / visibility caching (#1607) * refactor visibility checking, add caching for visibility * invalidate visibility cache items on account / status deletes * fix requester ID passed to visibility cache nil ptr * de-interface caches, fix home / public timeline caching + visibility * finish adding code comments for visibility filter * fix angry goconst linter warnings * actually finish adding filter visibility code comments for timeline functions * move home timeline status author check to after visibility * remove now-unused code * add more code comments * add TODO code comment, update printed cache start names * update printed cache names on stop * start adding separate follow(request) delete db functions, add specific visibility cache tests * add relationship type caching * fix getting local account follows / followed-bys, other small codebase improvements * simplify invalidation using cache hooks, add more GetAccountBy___() functions * fix boosting to return 404 if not boostable but no error (to not leak status ID) * remove dead code * improved placement of cache invalidation * update license headers * add example follow, follow-request config entries * add example visibility cache configuration to config file * use specific PutFollowRequest() instead of just Put() * add tests for all GetAccountBy() * add GetBlockBy() tests * update block to check primitive fields * update and finish adding Get{Account,Block,Follow,FollowRequest}By() tests * fix copy-pasted code * update envparsing test * whitespace * fix bun struct tag * add license header to gtscontext * fix old license header * improved error creation to not use fmt.Errorf() when not needed * fix various rebase conflicts, fix account test * remove commented-out code, fix-up mention caching * fix mention select bun statement * ensure mention target account populated, pass in context to customrenderer logging * remove more uncommented code, fix typeutil test * add statusfave database model caching * add status fave cache configuration * add status fave cache example config * woops, catch missed error. nice catch linter! * add back testrig panic on nil db * update example configuration to match defaults, slight tweak to cache configuration defaults * update envparsing test with new defaults * fetch followingget to use the follow target account * use accounnt.IsLocal() instead of empty domain check * use constants for the cache visibility type check * use bun.In() for notification type restriction in db query * include replies when fetching PublicTimeline() (to account for single-author threads in Visibility{}.StatusPublicTimelineable()) * use bun query building for nested select statements to ensure working with postgres * update public timeline future status checks to match visibility filter * same as previous, for home timeline * update public timeline tests to dynamically check for appropriate statuses * migrate accounts to allow unique constraint on public_key * provide minimal account with publicKey --------- Signed-off-by: kim <grufwub@gmail.com> Co-authored-by: tsmethurst <tobi.smethurst@protonmail.com>
2023-03-28 15:03:14 +02:00
log.Panic(r.ctx, "type assertion failed")
}
text := string(n.Segment.Value(source))
html := r.replaceHashtag(text)
_, err := w.WriteString(html)
// we don't have much recourse if this fails
if err != nil {
[performance] refactoring + add fave / follow / request / visibility caching (#1607) * refactor visibility checking, add caching for visibility * invalidate visibility cache items on account / status deletes * fix requester ID passed to visibility cache nil ptr * de-interface caches, fix home / public timeline caching + visibility * finish adding code comments for visibility filter * fix angry goconst linter warnings * actually finish adding filter visibility code comments for timeline functions * move home timeline status author check to after visibility * remove now-unused code * add more code comments * add TODO code comment, update printed cache start names * update printed cache names on stop * start adding separate follow(request) delete db functions, add specific visibility cache tests * add relationship type caching * fix getting local account follows / followed-bys, other small codebase improvements * simplify invalidation using cache hooks, add more GetAccountBy___() functions * fix boosting to return 404 if not boostable but no error (to not leak status ID) * remove dead code * improved placement of cache invalidation * update license headers * add example follow, follow-request config entries * add example visibility cache configuration to config file * use specific PutFollowRequest() instead of just Put() * add tests for all GetAccountBy() * add GetBlockBy() tests * update block to check primitive fields * update and finish adding Get{Account,Block,Follow,FollowRequest}By() tests * fix copy-pasted code * update envparsing test * whitespace * fix bun struct tag * add license header to gtscontext * fix old license header * improved error creation to not use fmt.Errorf() when not needed * fix various rebase conflicts, fix account test * remove commented-out code, fix-up mention caching * fix mention select bun statement * ensure mention target account populated, pass in context to customrenderer logging * remove more uncommented code, fix typeutil test * add statusfave database model caching * add status fave cache configuration * add status fave cache example config * woops, catch missed error. nice catch linter! * add back testrig panic on nil db * update example configuration to match defaults, slight tweak to cache configuration defaults * update envparsing test with new defaults * fetch followingget to use the follow target account * use accounnt.IsLocal() instead of empty domain check * use constants for the cache visibility type check * use bun.In() for notification type restriction in db query * include replies when fetching PublicTimeline() (to account for single-author threads in Visibility{}.StatusPublicTimelineable()) * use bun query building for nested select statements to ensure working with postgres * update public timeline future status checks to match visibility filter * same as previous, for home timeline * update public timeline tests to dynamically check for appropriate statuses * migrate accounts to allow unique constraint on public_key * provide minimal account with publicKey --------- Signed-off-by: kim <grufwub@gmail.com> Co-authored-by: tsmethurst <tobi.smethurst@protonmail.com>
2023-03-28 15:03:14 +02:00
log.Errorf(r.ctx, "error writing HTML: %s", err)
}
return ast.WalkSkipChildren, nil
}
// renderEmoji doesn't turn an emoji into HTML, but adds it to the metadata.
func (r *customRenderer) renderEmoji(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
if !entering {
return ast.WalkSkipChildren, nil
}
n, ok := node.(*emoji) // this function is only registered for kindEmoji
if !ok {
[performance] refactoring + add fave / follow / request / visibility caching (#1607) * refactor visibility checking, add caching for visibility * invalidate visibility cache items on account / status deletes * fix requester ID passed to visibility cache nil ptr * de-interface caches, fix home / public timeline caching + visibility * finish adding code comments for visibility filter * fix angry goconst linter warnings * actually finish adding filter visibility code comments for timeline functions * move home timeline status author check to after visibility * remove now-unused code * add more code comments * add TODO code comment, update printed cache start names * update printed cache names on stop * start adding separate follow(request) delete db functions, add specific visibility cache tests * add relationship type caching * fix getting local account follows / followed-bys, other small codebase improvements * simplify invalidation using cache hooks, add more GetAccountBy___() functions * fix boosting to return 404 if not boostable but no error (to not leak status ID) * remove dead code * improved placement of cache invalidation * update license headers * add example follow, follow-request config entries * add example visibility cache configuration to config file * use specific PutFollowRequest() instead of just Put() * add tests for all GetAccountBy() * add GetBlockBy() tests * update block to check primitive fields * update and finish adding Get{Account,Block,Follow,FollowRequest}By() tests * fix copy-pasted code * update envparsing test * whitespace * fix bun struct tag * add license header to gtscontext * fix old license header * improved error creation to not use fmt.Errorf() when not needed * fix various rebase conflicts, fix account test * remove commented-out code, fix-up mention caching * fix mention select bun statement * ensure mention target account populated, pass in context to customrenderer logging * remove more uncommented code, fix typeutil test * add statusfave database model caching * add status fave cache configuration * add status fave cache example config * woops, catch missed error. nice catch linter! * add back testrig panic on nil db * update example configuration to match defaults, slight tweak to cache configuration defaults * update envparsing test with new defaults * fetch followingget to use the follow target account * use accounnt.IsLocal() instead of empty domain check * use constants for the cache visibility type check * use bun.In() for notification type restriction in db query * include replies when fetching PublicTimeline() (to account for single-author threads in Visibility{}.StatusPublicTimelineable()) * use bun query building for nested select statements to ensure working with postgres * update public timeline future status checks to match visibility filter * same as previous, for home timeline * update public timeline tests to dynamically check for appropriate statuses * migrate accounts to allow unique constraint on public_key * provide minimal account with publicKey --------- Signed-off-by: kim <grufwub@gmail.com> Co-authored-by: tsmethurst <tobi.smethurst@protonmail.com>
2023-03-28 15:03:14 +02:00
log.Panic(r.ctx, "type assertion failed")
}
text := string(n.Segment.Value(source))
shortcode := text[1 : len(text)-1]
emoji, err := r.f.db.GetEmojiByShortcodeDomain(r.ctx, shortcode, "")
if err != nil {
if err != db.ErrNoEntries {
log.Errorf(nil, "error getting local emoji with shortcode %s: %s", shortcode, err)
}
} else if *emoji.VisibleInPicker && !*emoji.Disabled {
listed := false
for _, e := range r.result.Emojis {
if e.Shortcode == emoji.Shortcode {
listed = true
break
}
}
if !listed {
r.result.Emojis = append(r.result.Emojis, emoji)
}
}
// we don't have much recourse if this fails
if _, err := w.WriteString(text); err != nil {
[performance] refactoring + add fave / follow / request / visibility caching (#1607) * refactor visibility checking, add caching for visibility * invalidate visibility cache items on account / status deletes * fix requester ID passed to visibility cache nil ptr * de-interface caches, fix home / public timeline caching + visibility * finish adding code comments for visibility filter * fix angry goconst linter warnings * actually finish adding filter visibility code comments for timeline functions * move home timeline status author check to after visibility * remove now-unused code * add more code comments * add TODO code comment, update printed cache start names * update printed cache names on stop * start adding separate follow(request) delete db functions, add specific visibility cache tests * add relationship type caching * fix getting local account follows / followed-bys, other small codebase improvements * simplify invalidation using cache hooks, add more GetAccountBy___() functions * fix boosting to return 404 if not boostable but no error (to not leak status ID) * remove dead code * improved placement of cache invalidation * update license headers * add example follow, follow-request config entries * add example visibility cache configuration to config file * use specific PutFollowRequest() instead of just Put() * add tests for all GetAccountBy() * add GetBlockBy() tests * update block to check primitive fields * update and finish adding Get{Account,Block,Follow,FollowRequest}By() tests * fix copy-pasted code * update envparsing test * whitespace * fix bun struct tag * add license header to gtscontext * fix old license header * improved error creation to not use fmt.Errorf() when not needed * fix various rebase conflicts, fix account test * remove commented-out code, fix-up mention caching * fix mention select bun statement * ensure mention target account populated, pass in context to customrenderer logging * remove more uncommented code, fix typeutil test * add statusfave database model caching * add status fave cache configuration * add status fave cache example config * woops, catch missed error. nice catch linter! * add back testrig panic on nil db * update example configuration to match defaults, slight tweak to cache configuration defaults * update envparsing test with new defaults * fetch followingget to use the follow target account * use accounnt.IsLocal() instead of empty domain check * use constants for the cache visibility type check * use bun.In() for notification type restriction in db query * include replies when fetching PublicTimeline() (to account for single-author threads in Visibility{}.StatusPublicTimelineable()) * use bun query building for nested select statements to ensure working with postgres * update public timeline future status checks to match visibility filter * same as previous, for home timeline * update public timeline tests to dynamically check for appropriate statuses * migrate accounts to allow unique constraint on public_key * provide minimal account with publicKey --------- Signed-off-by: kim <grufwub@gmail.com> Co-authored-by: tsmethurst <tobi.smethurst@protonmail.com>
2023-03-28 15:03:14 +02:00
log.Errorf(r.ctx, "error writing HTML: %s", err)
}
return ast.WalkSkipChildren, nil
}