mirror of
https://github.com/glanceapp/glance.git
synced 2025-06-21 02:18:22 +02:00
Refactor RSS widget
This commit is contained in:
parent
f36527995e
commit
129441713b
@ -2,7 +2,6 @@ package glance
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"html"
|
"html"
|
||||||
"html/template"
|
"html/template"
|
||||||
@ -13,6 +12,7 @@ import (
|
|||||||
"regexp"
|
"regexp"
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/mmcdole/gofeed"
|
"github.com/mmcdole/gofeed"
|
||||||
@ -26,11 +26,7 @@ var (
|
|||||||
rssWidgetHorizontalCards2Template = mustParseTemplate("rss-horizontal-cards-2.html", "widget-base.html")
|
rssWidgetHorizontalCards2Template = mustParseTemplate("rss-horizontal-cards-2.html", "widget-base.html")
|
||||||
)
|
)
|
||||||
|
|
||||||
type cachedFeed struct {
|
var feedParser = gofeed.NewParser()
|
||||||
LastModified time.Time
|
|
||||||
Etag string
|
|
||||||
Items rssFeedItemList
|
|
||||||
}
|
|
||||||
|
|
||||||
type rssWidget struct {
|
type rssWidget struct {
|
||||||
widgetBase `yaml:",inline"`
|
widgetBase `yaml:",inline"`
|
||||||
@ -38,17 +34,20 @@ type rssWidget struct {
|
|||||||
Style string `yaml:"style"`
|
Style string `yaml:"style"`
|
||||||
ThumbnailHeight float64 `yaml:"thumbnail-height"`
|
ThumbnailHeight float64 `yaml:"thumbnail-height"`
|
||||||
CardHeight float64 `yaml:"card-height"`
|
CardHeight float64 `yaml:"card-height"`
|
||||||
Items rssFeedItemList `yaml:"-"`
|
|
||||||
Limit int `yaml:"limit"`
|
Limit int `yaml:"limit"`
|
||||||
CollapseAfter int `yaml:"collapse-after"`
|
CollapseAfter int `yaml:"collapse-after"`
|
||||||
SingleLineTitles bool `yaml:"single-line-titles"`
|
SingleLineTitles bool `yaml:"single-line-titles"`
|
||||||
PreserveOrder bool `yaml:"preserve-order"`
|
PreserveOrder bool `yaml:"preserve-order"`
|
||||||
|
|
||||||
|
Items rssFeedItemList `yaml:"-"`
|
||||||
NoItemsMessage string `yaml:"-"`
|
NoItemsMessage string `yaml:"-"`
|
||||||
CachedFeeds map[string]cachedFeed `yaml:"-"`
|
|
||||||
|
feedCacheMutex sync.Mutex
|
||||||
|
cachedFeeds map[string]*cachedRSSFeed `yaml:"-"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (widget *rssWidget) initialize() error {
|
func (widget *rssWidget) initialize() error {
|
||||||
widget.withTitle("RSS Feed").withCacheDuration(1 * time.Hour)
|
widget.withTitle("RSS Feed").withCacheDuration(2 * time.Hour)
|
||||||
|
|
||||||
if widget.Limit <= 0 {
|
if widget.Limit <= 0 {
|
||||||
widget.Limit = 25
|
widget.Limit = 25
|
||||||
@ -73,46 +72,27 @@ func (widget *rssWidget) initialize() error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
widget.NoItemsMessage = "No items were returned from the feeds."
|
widget.NoItemsMessage = "No items were returned from the feeds."
|
||||||
|
widget.cachedFeeds = make(map[string]*cachedRSSFeed)
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (widget *rssWidget) update(ctx context.Context) {
|
func (widget *rssWidget) update(ctx context.Context) {
|
||||||
// Populate If-Modified-Since header and Etag
|
items, err := widget.fetchItemsFromFeeds()
|
||||||
for i, req := range widget.FeedRequests {
|
|
||||||
if cachedFeed, ok := widget.CachedFeeds[req.URL]; ok {
|
|
||||||
widget.FeedRequests[i].IfModifiedSince = cachedFeed.LastModified
|
|
||||||
widget.FeedRequests[i].Etag = cachedFeed.Etag
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
allItems, feeds, err := fetchItemsFromRSSFeeds(widget.FeedRequests, widget.CachedFeeds)
|
|
||||||
|
|
||||||
if !widget.canContinueUpdateAfterHandlingErr(err) {
|
if !widget.canContinueUpdateAfterHandlingErr(err) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if !widget.PreserveOrder {
|
if !widget.PreserveOrder {
|
||||||
allItems.sortByNewest()
|
items.sortByNewest()
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(allItems) > widget.Limit {
|
if len(items) > widget.Limit {
|
||||||
allItems = allItems[:widget.Limit]
|
items = items[:widget.Limit]
|
||||||
}
|
}
|
||||||
|
|
||||||
widget.Items = allItems
|
widget.Items = items
|
||||||
|
|
||||||
cachedFeeds := make(map[string]cachedFeed)
|
|
||||||
for _, feed := range feeds {
|
|
||||||
if !feed.LastModified.IsZero() || feed.Etag != "" {
|
|
||||||
cachedFeeds[feed.URL] = cachedFeed{
|
|
||||||
LastModified: feed.LastModified,
|
|
||||||
Etag: feed.Etag,
|
|
||||||
Items: feed.Items,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
widget.CachedFeeds = cachedFeeds
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (widget *rssWidget) Render() template.HTML {
|
func (widget *rssWidget) Render() template.HTML {
|
||||||
@ -131,6 +111,12 @@ func (widget *rssWidget) Render() template.HTML {
|
|||||||
return widget.renderTemplate(widget, rssWidgetTemplate)
|
return widget.renderTemplate(widget, rssWidgetTemplate)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type cachedRSSFeed struct {
|
||||||
|
etag string
|
||||||
|
lastModified string
|
||||||
|
items []rssFeedItem
|
||||||
|
}
|
||||||
|
|
||||||
type rssFeedItem struct {
|
type rssFeedItem struct {
|
||||||
ChannelName string
|
ChannelName string
|
||||||
ChannelURL string
|
ChannelURL string
|
||||||
@ -142,35 +128,6 @@ type rssFeedItem struct {
|
|||||||
PublishedAt time.Time
|
PublishedAt time.Time
|
||||||
}
|
}
|
||||||
|
|
||||||
// doesn't cover all cases but works the vast majority of the time
|
|
||||||
var htmlTagsWithAttributesPattern = regexp.MustCompile(`<\/?[a-zA-Z0-9-]+ *(?:[a-zA-Z-]+=(?:"|').*?(?:"|') ?)* *\/?>`)
|
|
||||||
|
|
||||||
func sanitizeFeedDescription(description string) string {
|
|
||||||
if description == "" {
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
|
|
||||||
description = strings.ReplaceAll(description, "\n", " ")
|
|
||||||
description = htmlTagsWithAttributesPattern.ReplaceAllString(description, "")
|
|
||||||
description = sequentialWhitespacePattern.ReplaceAllString(description, " ")
|
|
||||||
description = strings.TrimSpace(description)
|
|
||||||
description = html.UnescapeString(description)
|
|
||||||
|
|
||||||
return description
|
|
||||||
}
|
|
||||||
|
|
||||||
func shortenFeedDescriptionLen(description string, maxLen int) string {
|
|
||||||
description, _ = limitStringLength(description, 1000)
|
|
||||||
description = sanitizeFeedDescription(description)
|
|
||||||
description, limited := limitStringLength(description, maxLen)
|
|
||||||
|
|
||||||
if limited {
|
|
||||||
description += "…"
|
|
||||||
}
|
|
||||||
|
|
||||||
return description
|
|
||||||
}
|
|
||||||
|
|
||||||
type rssFeedRequest struct {
|
type rssFeedRequest struct {
|
||||||
URL string `yaml:"url"`
|
URL string `yaml:"url"`
|
||||||
Title string `yaml:"title"`
|
Title string `yaml:"title"`
|
||||||
@ -180,19 +137,10 @@ type rssFeedRequest struct {
|
|||||||
ItemLinkPrefix string `yaml:"item-link-prefix"`
|
ItemLinkPrefix string `yaml:"item-link-prefix"`
|
||||||
Headers map[string]string `yaml:"headers"`
|
Headers map[string]string `yaml:"headers"`
|
||||||
IsDetailed bool `yaml:"-"`
|
IsDetailed bool `yaml:"-"`
|
||||||
IfModifiedSince time.Time `yaml:"-"`
|
|
||||||
Etag string `yaml:"-"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type rssFeedItemList []rssFeedItem
|
type rssFeedItemList []rssFeedItem
|
||||||
|
|
||||||
type rssFeedResponse struct {
|
|
||||||
URL string
|
|
||||||
Items rssFeedItemList
|
|
||||||
LastModified time.Time
|
|
||||||
Etag string
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f rssFeedItemList) sortByNewest() rssFeedItemList {
|
func (f rssFeedItemList) sortByNewest() rssFeedItemList {
|
||||||
sort.Slice(f, func(i, j int) bool {
|
sort.Slice(f, func(i, j int) bool {
|
||||||
return f[i].PublishedAt.After(f[j].PublishedAt)
|
return f[i].PublishedAt.After(f[j].PublishedAt)
|
||||||
@ -201,69 +149,99 @@ func (f rssFeedItemList) sortByNewest() rssFeedItemList {
|
|||||||
return f
|
return f
|
||||||
}
|
}
|
||||||
|
|
||||||
var feedParser = gofeed.NewParser()
|
func (widget *rssWidget) fetchItemsFromFeeds() (rssFeedItemList, error) {
|
||||||
|
requests := widget.FeedRequests
|
||||||
|
|
||||||
func fetchItemsFromRSSFeedTask(request rssFeedRequest) (rssFeedResponse, error) {
|
job := newJob(widget.fetchItemsFromFeedTask, requests).withWorkers(30)
|
||||||
feedResponse := rssFeedResponse{URL: request.URL}
|
feeds, errs, err := workerPoolDo(job)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("%w: %v", errNoContent, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
failed := 0
|
||||||
|
entries := make(rssFeedItemList, 0, len(feeds)*10)
|
||||||
|
seen := make(map[string]struct{})
|
||||||
|
|
||||||
|
for i := range feeds {
|
||||||
|
if errs[i] != nil {
|
||||||
|
failed++
|
||||||
|
slog.Error("Failed to get RSS feed", "url", requests[i].URL, "error", errs[i])
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, item := range feeds[i] {
|
||||||
|
if _, exists := seen[item.Link]; exists {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
entries = append(entries, item)
|
||||||
|
seen[item.Link] = struct{}{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if failed == len(requests) {
|
||||||
|
return nil, errNoContent
|
||||||
|
}
|
||||||
|
|
||||||
|
if failed > 0 {
|
||||||
|
return entries, fmt.Errorf("%w: missing %d RSS feeds", errPartialContent, failed)
|
||||||
|
}
|
||||||
|
|
||||||
|
return entries, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (widget *rssWidget) fetchItemsFromFeedTask(request rssFeedRequest) ([]rssFeedItem, error) {
|
||||||
req, err := http.NewRequest("GET", request.URL, nil)
|
req, err := http.NewRequest("GET", request.URL, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return feedResponse, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
req.Header.Add("User-Agent", fmt.Sprintf("Glance v%s", buildVersion))
|
req.Header.Add("User-Agent", glanceUserAgentString)
|
||||||
|
|
||||||
|
widget.feedCacheMutex.Lock()
|
||||||
|
cache, isCached := widget.cachedFeeds[request.URL]
|
||||||
|
if isCached {
|
||||||
|
if cache.etag != "" {
|
||||||
|
req.Header.Add("If-None-Match", cache.etag)
|
||||||
|
}
|
||||||
|
if cache.lastModified != "" {
|
||||||
|
req.Header.Add("If-Modified-Since", cache.lastModified)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
widget.feedCacheMutex.Unlock()
|
||||||
|
|
||||||
for key, value := range request.Headers {
|
for key, value := range request.Headers {
|
||||||
req.Header.Add(key, value)
|
req.Header.Set(key, value)
|
||||||
}
|
|
||||||
|
|
||||||
if !request.IfModifiedSince.IsZero() {
|
|
||||||
req.Header.Add("If-Modified-Since", request.IfModifiedSince.Format(http.TimeFormat))
|
|
||||||
}
|
|
||||||
|
|
||||||
if request.Etag != "" {
|
|
||||||
req.Header.Add("If-None-Match", request.Etag)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
resp, err := defaultHTTPClient.Do(req)
|
resp, err := defaultHTTPClient.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return feedResponse, err
|
return nil, err
|
||||||
}
|
}
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
|
|
||||||
if resp.StatusCode == http.StatusNotModified {
|
if resp.StatusCode == http.StatusNotModified && isCached {
|
||||||
return feedResponse, errNotModified
|
return cache.items, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if resp.StatusCode != http.StatusOK {
|
if resp.StatusCode != http.StatusOK {
|
||||||
return feedResponse, fmt.Errorf("unexpected status code %d from %s", resp.StatusCode, request.URL)
|
return nil, fmt.Errorf("unexpected status code %d from %s", resp.StatusCode, request.URL)
|
||||||
}
|
}
|
||||||
|
|
||||||
body, err := io.ReadAll(resp.Body)
|
body, err := io.ReadAll(resp.Body)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return feedResponse, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
feed, err := feedParser.ParseString(string(body))
|
feed, err := feedParser.ParseString(string(body))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return feedResponse, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if request.Limit > 0 && len(feed.Items) > request.Limit {
|
if request.Limit > 0 && len(feed.Items) > request.Limit {
|
||||||
feed.Items = feed.Items[:request.Limit]
|
feed.Items = feed.Items[:request.Limit]
|
||||||
}
|
}
|
||||||
|
|
||||||
items := make([]rssFeedItem, 0, len(feed.Items))
|
items := make(rssFeedItemList, 0, len(feed.Items))
|
||||||
|
|
||||||
if lastModified := resp.Header.Get("Last-Modified"); lastModified != "" {
|
|
||||||
if t, err := time.Parse(http.TimeFormat, lastModified); err == nil {
|
|
||||||
feedResponse.LastModified = t
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if etag := resp.Header.Get("Etag"); etag != "" {
|
|
||||||
feedResponse.Etag = etag
|
|
||||||
}
|
|
||||||
|
|
||||||
for i := range feed.Items {
|
for i := range feed.Items {
|
||||||
item := feed.Items[i]
|
item := feed.Items[i]
|
||||||
@ -352,8 +330,27 @@ func fetchItemsFromRSSFeedTask(request rssFeedRequest) (rssFeedResponse, error)
|
|||||||
items = append(items, rssItem)
|
items = append(items, rssItem)
|
||||||
}
|
}
|
||||||
|
|
||||||
feedResponse.Items = items
|
if resp.Header.Get("ETag") != "" || resp.Header.Get("Last-Modified") != "" {
|
||||||
return feedResponse, nil
|
widget.feedCacheMutex.Lock()
|
||||||
|
widget.cachedFeeds[request.URL] = &cachedRSSFeed{
|
||||||
|
etag: resp.Header.Get("ETag"),
|
||||||
|
lastModified: resp.Header.Get("Last-Modified"),
|
||||||
|
items: items,
|
||||||
|
}
|
||||||
|
widget.feedCacheMutex.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
return items, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func findThumbnailInItemExtensions(item *gofeed.Item) string {
|
||||||
|
media, ok := item.Extensions["media"]
|
||||||
|
|
||||||
|
if !ok {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
return recursiveFindThumbnailInExtensions(media)
|
||||||
}
|
}
|
||||||
|
|
||||||
func recursiveFindThumbnailInExtensions(extensions map[string][]gofeedext.Extension) string {
|
func recursiveFindThumbnailInExtensions(extensions map[string][]gofeedext.Extension) string {
|
||||||
@ -376,48 +373,30 @@ func recursiveFindThumbnailInExtensions(extensions map[string][]gofeedext.Extens
|
|||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
func findThumbnailInItemExtensions(item *gofeed.Item) string {
|
var htmlTagsWithAttributesPattern = regexp.MustCompile(`<\/?[a-zA-Z0-9-]+ *(?:[a-zA-Z-]+=(?:"|').*?(?:"|') ?)* *\/?>`)
|
||||||
media, ok := item.Extensions["media"]
|
|
||||||
|
|
||||||
if !ok {
|
func sanitizeFeedDescription(description string) string {
|
||||||
|
if description == "" {
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
return recursiveFindThumbnailInExtensions(media)
|
description = strings.ReplaceAll(description, "\n", " ")
|
||||||
|
description = htmlTagsWithAttributesPattern.ReplaceAllString(description, "")
|
||||||
|
description = sequentialWhitespacePattern.ReplaceAllString(description, " ")
|
||||||
|
description = strings.TrimSpace(description)
|
||||||
|
description = html.UnescapeString(description)
|
||||||
|
|
||||||
|
return description
|
||||||
}
|
}
|
||||||
|
|
||||||
func fetchItemsFromRSSFeeds(requests []rssFeedRequest, cachedFeeds map[string]cachedFeed) (rssFeedItemList, []rssFeedResponse, error) {
|
func shortenFeedDescriptionLen(description string, maxLen int) string {
|
||||||
job := newJob(fetchItemsFromRSSFeedTask, requests).withWorkers(30)
|
description, _ = limitStringLength(description, 1000)
|
||||||
feeds, errs, err := workerPoolDo(job)
|
description = sanitizeFeedDescription(description)
|
||||||
if err != nil {
|
description, limited := limitStringLength(description, maxLen)
|
||||||
return nil, nil, fmt.Errorf("%w: %v", errNoContent, err)
|
|
||||||
|
if limited {
|
||||||
|
description += "…"
|
||||||
}
|
}
|
||||||
|
|
||||||
failed := 0
|
return description
|
||||||
notModified := 0
|
|
||||||
|
|
||||||
entries := make(rssFeedItemList, 0, len(feeds)*10)
|
|
||||||
|
|
||||||
for i := range feeds {
|
|
||||||
if errs[i] == nil {
|
|
||||||
entries = append(entries, feeds[i].Items...)
|
|
||||||
} else if errors.Is(errs[i], errNotModified) {
|
|
||||||
notModified++
|
|
||||||
entries = append(entries, cachedFeeds[feeds[i].URL].Items...)
|
|
||||||
slog.Debug("Feed not modified", "url", requests[i].URL, "debug", errs[i])
|
|
||||||
} else {
|
|
||||||
failed++
|
|
||||||
slog.Error("Failed to get RSS feed", "url", requests[i].URL, "error", errs[i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if failed == len(requests) {
|
|
||||||
return nil, nil, errNoContent
|
|
||||||
}
|
|
||||||
|
|
||||||
if failed > 0 {
|
|
||||||
return entries, feeds, fmt.Errorf("%w: missing %d RSS feeds", errPartialContent, failed)
|
|
||||||
}
|
|
||||||
|
|
||||||
return entries, feeds, nil
|
|
||||||
}
|
}
|
||||||
|
@ -19,7 +19,6 @@ import (
|
|||||||
var (
|
var (
|
||||||
errNoContent = errors.New("failed to retrieve any content")
|
errNoContent = errors.New("failed to retrieve any content")
|
||||||
errPartialContent = errors.New("failed to retrieve some of the content")
|
errPartialContent = errors.New("failed to retrieve some of the content")
|
||||||
errNotModified = errors.New("content not modified")
|
|
||||||
)
|
)
|
||||||
|
|
||||||
const defaultClientTimeout = 5 * time.Second
|
const defaultClientTimeout = 5 * time.Second
|
||||||
@ -39,6 +38,7 @@ type requestDoer interface {
|
|||||||
Do(*http.Request) (*http.Response, error)
|
Do(*http.Request) (*http.Response, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var glanceUserAgentString = "Glance/" + buildVersion + " +https://github.com/glanceapp/glance"
|
||||||
var userAgentPersistentVersion atomic.Int32
|
var userAgentPersistentVersion atomic.Int32
|
||||||
|
|
||||||
func setBrowserUserAgentHeader(request *http.Request) {
|
func setBrowserUserAgentHeader(request *http.Request) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user