diff --git a/internal/glance/widget-rss.go b/internal/glance/widget-rss.go index 1598371..add5796 100644 --- a/internal/glance/widget-rss.go +++ b/internal/glance/widget-rss.go @@ -12,6 +12,7 @@ import ( "regexp" "sort" "strings" + "sync" "time" "github.com/mmcdole/gofeed" @@ -25,22 +26,28 @@ var ( rssWidgetHorizontalCards2Template = mustParseTemplate("rss-horizontal-cards-2.html", "widget-base.html") ) +var feedParser = gofeed.NewParser() + type rssWidget struct { widgetBase `yaml:",inline"` FeedRequests []rssFeedRequest `yaml:"feeds"` Style string `yaml:"style"` ThumbnailHeight float64 `yaml:"thumbnail-height"` CardHeight float64 `yaml:"card-height"` - Items rssFeedItemList `yaml:"-"` Limit int `yaml:"limit"` CollapseAfter int `yaml:"collapse-after"` SingleLineTitles bool `yaml:"single-line-titles"` PreserveOrder bool `yaml:"preserve-order"` - NoItemsMessage string `yaml:"-"` + + Items rssFeedItemList `yaml:"-"` + NoItemsMessage string `yaml:"-"` + + feedCacheMutex sync.Mutex + cachedFeeds map[string]*cachedRSSFeed `yaml:"-"` } func (widget *rssWidget) initialize() error { - widget.withTitle("RSS Feed").withCacheDuration(1 * time.Hour) + widget.withTitle("RSS Feed").withCacheDuration(2 * time.Hour) if widget.Limit <= 0 { widget.Limit = 25 @@ -65,12 +72,13 @@ func (widget *rssWidget) initialize() error { } widget.NoItemsMessage = "No items were returned from the feeds." + widget.cachedFeeds = make(map[string]*cachedRSSFeed) return nil } func (widget *rssWidget) update(ctx context.Context) { - items, err := fetchItemsFromRSSFeeds(widget.FeedRequests) + items, err := widget.fetchItemsFromFeeds() if !widget.canContinueUpdateAfterHandlingErr(err) { return @@ -103,6 +111,12 @@ func (widget *rssWidget) Render() template.HTML { return widget.renderTemplate(widget, rssWidgetTemplate) } +type cachedRSSFeed struct { + etag string + lastModified string + items []rssFeedItem +} + type rssFeedItem struct { ChannelName string ChannelURL string @@ -114,35 +128,6 @@ type rssFeedItem struct { PublishedAt time.Time } -// doesn't cover all cases but works the vast majority of the time -var htmlTagsWithAttributesPattern = regexp.MustCompile(`<\/?[a-zA-Z0-9-]+ *(?:[a-zA-Z-]+=(?:"|').*?(?:"|') ?)* *\/?>`) - -func sanitizeFeedDescription(description string) string { - if description == "" { - return "" - } - - description = strings.ReplaceAll(description, "\n", " ") - description = htmlTagsWithAttributesPattern.ReplaceAllString(description, "") - description = sequentialWhitespacePattern.ReplaceAllString(description, " ") - description = strings.TrimSpace(description) - description = html.UnescapeString(description) - - return description -} - -func shortenFeedDescriptionLen(description string, maxLen int) string { - description, _ = limitStringLength(description, 1000) - description = sanitizeFeedDescription(description) - description, limited := limitStringLength(description, maxLen) - - if limited { - description += "…" - } - - return description -} - type rssFeedRequest struct { URL string `yaml:"url"` Title string `yaml:"title"` @@ -164,16 +149,68 @@ func (f rssFeedItemList) sortByNewest() rssFeedItemList { return f } -var feedParser = gofeed.NewParser() +func (widget *rssWidget) fetchItemsFromFeeds() (rssFeedItemList, error) { + requests := widget.FeedRequests -func fetchItemsFromRSSFeedTask(request rssFeedRequest) ([]rssFeedItem, error) { + job := newJob(widget.fetchItemsFromFeedTask, requests).withWorkers(30) + feeds, errs, err := workerPoolDo(job) + if err != nil { + return nil, fmt.Errorf("%w: %v", errNoContent, err) + } + + failed := 0 + entries := make(rssFeedItemList, 0, len(feeds)*10) + seen := make(map[string]struct{}) + + for i := range feeds { + if errs[i] != nil { + failed++ + slog.Error("Failed to get RSS feed", "url", requests[i].URL, "error", errs[i]) + continue + } + + for _, item := range feeds[i] { + if _, exists := seen[item.Link]; exists { + continue + } + entries = append(entries, item) + seen[item.Link] = struct{}{} + } + } + + if failed == len(requests) { + return nil, errNoContent + } + + if failed > 0 { + return entries, fmt.Errorf("%w: missing %d RSS feeds", errPartialContent, failed) + } + + return entries, nil +} + +func (widget *rssWidget) fetchItemsFromFeedTask(request rssFeedRequest) ([]rssFeedItem, error) { req, err := http.NewRequest("GET", request.URL, nil) if err != nil { return nil, err } + req.Header.Add("User-Agent", glanceUserAgentString) + + widget.feedCacheMutex.Lock() + cache, isCached := widget.cachedFeeds[request.URL] + if isCached { + if cache.etag != "" { + req.Header.Add("If-None-Match", cache.etag) + } + if cache.lastModified != "" { + req.Header.Add("If-Modified-Since", cache.lastModified) + } + } + widget.feedCacheMutex.Unlock() + for key, value := range request.Headers { - req.Header.Add(key, value) + req.Header.Set(key, value) } resp, err := defaultHTTPClient.Do(req) @@ -182,6 +219,10 @@ func fetchItemsFromRSSFeedTask(request rssFeedRequest) ([]rssFeedItem, error) { } defer resp.Body.Close() + if resp.StatusCode == http.StatusNotModified && isCached { + return cache.items, nil + } + if resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("unexpected status code %d from %s", resp.StatusCode, request.URL) } @@ -289,9 +330,29 @@ func fetchItemsFromRSSFeedTask(request rssFeedRequest) ([]rssFeedItem, error) { items = append(items, rssItem) } + if resp.Header.Get("ETag") != "" || resp.Header.Get("Last-Modified") != "" { + widget.feedCacheMutex.Lock() + widget.cachedFeeds[request.URL] = &cachedRSSFeed{ + etag: resp.Header.Get("ETag"), + lastModified: resp.Header.Get("Last-Modified"), + items: items, + } + widget.feedCacheMutex.Unlock() + } + return items, nil } +func findThumbnailInItemExtensions(item *gofeed.Item) string { + media, ok := item.Extensions["media"] + + if !ok { + return "" + } + + return recursiveFindThumbnailInExtensions(media) +} + func recursiveFindThumbnailInExtensions(extensions map[string][]gofeedext.Extension) string { for _, exts := range extensions { for _, ext := range exts { @@ -312,50 +373,30 @@ func recursiveFindThumbnailInExtensions(extensions map[string][]gofeedext.Extens return "" } -func findThumbnailInItemExtensions(item *gofeed.Item) string { - media, ok := item.Extensions["media"] +var htmlTagsWithAttributesPattern = regexp.MustCompile(`<\/?[a-zA-Z0-9-]+ *(?:[a-zA-Z-]+=(?:"|').*?(?:"|') ?)* *\/?>`) - if !ok { +func sanitizeFeedDescription(description string) string { + if description == "" { return "" } - return recursiveFindThumbnailInExtensions(media) + description = strings.ReplaceAll(description, "\n", " ") + description = htmlTagsWithAttributesPattern.ReplaceAllString(description, "") + description = sequentialWhitespacePattern.ReplaceAllString(description, " ") + description = strings.TrimSpace(description) + description = html.UnescapeString(description) + + return description } -func fetchItemsFromRSSFeeds(requests []rssFeedRequest) (rssFeedItemList, error) { - job := newJob(fetchItemsFromRSSFeedTask, requests).withWorkers(30) - feeds, errs, err := workerPoolDo(job) - if err != nil { - return nil, fmt.Errorf("%w: %v", errNoContent, err) +func shortenFeedDescriptionLen(description string, maxLen int) string { + description, _ = limitStringLength(description, 1000) + description = sanitizeFeedDescription(description) + description, limited := limitStringLength(description, maxLen) + + if limited { + description += "…" } - failed := 0 - entries := make(rssFeedItemList, 0, len(feeds)*10) - seen := make(map[string]struct{}) - - for i := range feeds { - if errs[i] != nil { - failed++ - slog.Error("Failed to get RSS feed", "url", requests[i].URL, "error", errs[i]) - continue - } - - for _, item := range feeds[i] { - if _, exists := seen[item.Link]; exists { - continue - } - entries = append(entries, item) - seen[item.Link] = struct{}{} - } - } - - if failed == len(requests) { - return nil, errNoContent - } - - if failed > 0 { - return entries, fmt.Errorf("%w: missing %d RSS feeds", errPartialContent, failed) - } - - return entries, nil + return description } diff --git a/internal/glance/widget-utils.go b/internal/glance/widget-utils.go index 03b0155..fa2fad5 100644 --- a/internal/glance/widget-utils.go +++ b/internal/glance/widget-utils.go @@ -41,6 +41,7 @@ type requestDoer interface { Do(*http.Request) (*http.Response, error) } +var glanceUserAgentString = "Glance/" + buildVersion + " +https://github.com/glanceapp/glance" var userAgentPersistentVersion atomic.Int32 func getBrowserUserAgentHeader() string {