mirror of
https://github.com/glanceapp/glance.git
synced 2025-06-20 18:07:59 +02:00
Merge pull request #462 from charlesharries/rss-feed-conditional-requests
feat: Use conditional requests for RSS feeds
This commit is contained in:
commit
7d08eb312f
@ -12,6 +12,7 @@ import (
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/mmcdole/gofeed"
|
||||
@ -25,22 +26,28 @@ var (
|
||||
rssWidgetHorizontalCards2Template = mustParseTemplate("rss-horizontal-cards-2.html", "widget-base.html")
|
||||
)
|
||||
|
||||
var feedParser = gofeed.NewParser()
|
||||
|
||||
type rssWidget struct {
|
||||
widgetBase `yaml:",inline"`
|
||||
FeedRequests []rssFeedRequest `yaml:"feeds"`
|
||||
Style string `yaml:"style"`
|
||||
ThumbnailHeight float64 `yaml:"thumbnail-height"`
|
||||
CardHeight float64 `yaml:"card-height"`
|
||||
Items rssFeedItemList `yaml:"-"`
|
||||
Limit int `yaml:"limit"`
|
||||
CollapseAfter int `yaml:"collapse-after"`
|
||||
SingleLineTitles bool `yaml:"single-line-titles"`
|
||||
PreserveOrder bool `yaml:"preserve-order"`
|
||||
NoItemsMessage string `yaml:"-"`
|
||||
|
||||
Items rssFeedItemList `yaml:"-"`
|
||||
NoItemsMessage string `yaml:"-"`
|
||||
|
||||
feedCacheMutex sync.Mutex
|
||||
cachedFeeds map[string]*cachedRSSFeed `yaml:"-"`
|
||||
}
|
||||
|
||||
func (widget *rssWidget) initialize() error {
|
||||
widget.withTitle("RSS Feed").withCacheDuration(1 * time.Hour)
|
||||
widget.withTitle("RSS Feed").withCacheDuration(2 * time.Hour)
|
||||
|
||||
if widget.Limit <= 0 {
|
||||
widget.Limit = 25
|
||||
@ -65,12 +72,13 @@ func (widget *rssWidget) initialize() error {
|
||||
}
|
||||
|
||||
widget.NoItemsMessage = "No items were returned from the feeds."
|
||||
widget.cachedFeeds = make(map[string]*cachedRSSFeed)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (widget *rssWidget) update(ctx context.Context) {
|
||||
items, err := fetchItemsFromRSSFeeds(widget.FeedRequests)
|
||||
items, err := widget.fetchItemsFromFeeds()
|
||||
|
||||
if !widget.canContinueUpdateAfterHandlingErr(err) {
|
||||
return
|
||||
@ -103,6 +111,12 @@ func (widget *rssWidget) Render() template.HTML {
|
||||
return widget.renderTemplate(widget, rssWidgetTemplate)
|
||||
}
|
||||
|
||||
type cachedRSSFeed struct {
|
||||
etag string
|
||||
lastModified string
|
||||
items []rssFeedItem
|
||||
}
|
||||
|
||||
type rssFeedItem struct {
|
||||
ChannelName string
|
||||
ChannelURL string
|
||||
@ -114,35 +128,6 @@ type rssFeedItem struct {
|
||||
PublishedAt time.Time
|
||||
}
|
||||
|
||||
// doesn't cover all cases but works the vast majority of the time
|
||||
var htmlTagsWithAttributesPattern = regexp.MustCompile(`<\/?[a-zA-Z0-9-]+ *(?:[a-zA-Z-]+=(?:"|').*?(?:"|') ?)* *\/?>`)
|
||||
|
||||
func sanitizeFeedDescription(description string) string {
|
||||
if description == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
description = strings.ReplaceAll(description, "\n", " ")
|
||||
description = htmlTagsWithAttributesPattern.ReplaceAllString(description, "")
|
||||
description = sequentialWhitespacePattern.ReplaceAllString(description, " ")
|
||||
description = strings.TrimSpace(description)
|
||||
description = html.UnescapeString(description)
|
||||
|
||||
return description
|
||||
}
|
||||
|
||||
func shortenFeedDescriptionLen(description string, maxLen int) string {
|
||||
description, _ = limitStringLength(description, 1000)
|
||||
description = sanitizeFeedDescription(description)
|
||||
description, limited := limitStringLength(description, maxLen)
|
||||
|
||||
if limited {
|
||||
description += "…"
|
||||
}
|
||||
|
||||
return description
|
||||
}
|
||||
|
||||
type rssFeedRequest struct {
|
||||
URL string `yaml:"url"`
|
||||
Title string `yaml:"title"`
|
||||
@ -164,16 +149,68 @@ func (f rssFeedItemList) sortByNewest() rssFeedItemList {
|
||||
return f
|
||||
}
|
||||
|
||||
var feedParser = gofeed.NewParser()
|
||||
func (widget *rssWidget) fetchItemsFromFeeds() (rssFeedItemList, error) {
|
||||
requests := widget.FeedRequests
|
||||
|
||||
func fetchItemsFromRSSFeedTask(request rssFeedRequest) ([]rssFeedItem, error) {
|
||||
job := newJob(widget.fetchItemsFromFeedTask, requests).withWorkers(30)
|
||||
feeds, errs, err := workerPoolDo(job)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%w: %v", errNoContent, err)
|
||||
}
|
||||
|
||||
failed := 0
|
||||
entries := make(rssFeedItemList, 0, len(feeds)*10)
|
||||
seen := make(map[string]struct{})
|
||||
|
||||
for i := range feeds {
|
||||
if errs[i] != nil {
|
||||
failed++
|
||||
slog.Error("Failed to get RSS feed", "url", requests[i].URL, "error", errs[i])
|
||||
continue
|
||||
}
|
||||
|
||||
for _, item := range feeds[i] {
|
||||
if _, exists := seen[item.Link]; exists {
|
||||
continue
|
||||
}
|
||||
entries = append(entries, item)
|
||||
seen[item.Link] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
if failed == len(requests) {
|
||||
return nil, errNoContent
|
||||
}
|
||||
|
||||
if failed > 0 {
|
||||
return entries, fmt.Errorf("%w: missing %d RSS feeds", errPartialContent, failed)
|
||||
}
|
||||
|
||||
return entries, nil
|
||||
}
|
||||
|
||||
func (widget *rssWidget) fetchItemsFromFeedTask(request rssFeedRequest) ([]rssFeedItem, error) {
|
||||
req, err := http.NewRequest("GET", request.URL, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
req.Header.Add("User-Agent", glanceUserAgentString)
|
||||
|
||||
widget.feedCacheMutex.Lock()
|
||||
cache, isCached := widget.cachedFeeds[request.URL]
|
||||
if isCached {
|
||||
if cache.etag != "" {
|
||||
req.Header.Add("If-None-Match", cache.etag)
|
||||
}
|
||||
if cache.lastModified != "" {
|
||||
req.Header.Add("If-Modified-Since", cache.lastModified)
|
||||
}
|
||||
}
|
||||
widget.feedCacheMutex.Unlock()
|
||||
|
||||
for key, value := range request.Headers {
|
||||
req.Header.Add(key, value)
|
||||
req.Header.Set(key, value)
|
||||
}
|
||||
|
||||
resp, err := defaultHTTPClient.Do(req)
|
||||
@ -182,6 +219,10 @@ func fetchItemsFromRSSFeedTask(request rssFeedRequest) ([]rssFeedItem, error) {
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode == http.StatusNotModified && isCached {
|
||||
return cache.items, nil
|
||||
}
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("unexpected status code %d from %s", resp.StatusCode, request.URL)
|
||||
}
|
||||
@ -289,9 +330,29 @@ func fetchItemsFromRSSFeedTask(request rssFeedRequest) ([]rssFeedItem, error) {
|
||||
items = append(items, rssItem)
|
||||
}
|
||||
|
||||
if resp.Header.Get("ETag") != "" || resp.Header.Get("Last-Modified") != "" {
|
||||
widget.feedCacheMutex.Lock()
|
||||
widget.cachedFeeds[request.URL] = &cachedRSSFeed{
|
||||
etag: resp.Header.Get("ETag"),
|
||||
lastModified: resp.Header.Get("Last-Modified"),
|
||||
items: items,
|
||||
}
|
||||
widget.feedCacheMutex.Unlock()
|
||||
}
|
||||
|
||||
return items, nil
|
||||
}
|
||||
|
||||
func findThumbnailInItemExtensions(item *gofeed.Item) string {
|
||||
media, ok := item.Extensions["media"]
|
||||
|
||||
if !ok {
|
||||
return ""
|
||||
}
|
||||
|
||||
return recursiveFindThumbnailInExtensions(media)
|
||||
}
|
||||
|
||||
func recursiveFindThumbnailInExtensions(extensions map[string][]gofeedext.Extension) string {
|
||||
for _, exts := range extensions {
|
||||
for _, ext := range exts {
|
||||
@ -312,50 +373,30 @@ func recursiveFindThumbnailInExtensions(extensions map[string][]gofeedext.Extens
|
||||
return ""
|
||||
}
|
||||
|
||||
func findThumbnailInItemExtensions(item *gofeed.Item) string {
|
||||
media, ok := item.Extensions["media"]
|
||||
var htmlTagsWithAttributesPattern = regexp.MustCompile(`<\/?[a-zA-Z0-9-]+ *(?:[a-zA-Z-]+=(?:"|').*?(?:"|') ?)* *\/?>`)
|
||||
|
||||
if !ok {
|
||||
func sanitizeFeedDescription(description string) string {
|
||||
if description == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
return recursiveFindThumbnailInExtensions(media)
|
||||
description = strings.ReplaceAll(description, "\n", " ")
|
||||
description = htmlTagsWithAttributesPattern.ReplaceAllString(description, "")
|
||||
description = sequentialWhitespacePattern.ReplaceAllString(description, " ")
|
||||
description = strings.TrimSpace(description)
|
||||
description = html.UnescapeString(description)
|
||||
|
||||
return description
|
||||
}
|
||||
|
||||
func fetchItemsFromRSSFeeds(requests []rssFeedRequest) (rssFeedItemList, error) {
|
||||
job := newJob(fetchItemsFromRSSFeedTask, requests).withWorkers(30)
|
||||
feeds, errs, err := workerPoolDo(job)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%w: %v", errNoContent, err)
|
||||
func shortenFeedDescriptionLen(description string, maxLen int) string {
|
||||
description, _ = limitStringLength(description, 1000)
|
||||
description = sanitizeFeedDescription(description)
|
||||
description, limited := limitStringLength(description, maxLen)
|
||||
|
||||
if limited {
|
||||
description += "…"
|
||||
}
|
||||
|
||||
failed := 0
|
||||
entries := make(rssFeedItemList, 0, len(feeds)*10)
|
||||
seen := make(map[string]struct{})
|
||||
|
||||
for i := range feeds {
|
||||
if errs[i] != nil {
|
||||
failed++
|
||||
slog.Error("Failed to get RSS feed", "url", requests[i].URL, "error", errs[i])
|
||||
continue
|
||||
}
|
||||
|
||||
for _, item := range feeds[i] {
|
||||
if _, exists := seen[item.Link]; exists {
|
||||
continue
|
||||
}
|
||||
entries = append(entries, item)
|
||||
seen[item.Link] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
if failed == len(requests) {
|
||||
return nil, errNoContent
|
||||
}
|
||||
|
||||
if failed > 0 {
|
||||
return entries, fmt.Errorf("%w: missing %d RSS feeds", errPartialContent, failed)
|
||||
}
|
||||
|
||||
return entries, nil
|
||||
return description
|
||||
}
|
||||
|
@ -41,6 +41,7 @@ type requestDoer interface {
|
||||
Do(*http.Request) (*http.Response, error)
|
||||
}
|
||||
|
||||
var glanceUserAgentString = "Glance/" + buildVersion + " +https://github.com/glanceapp/glance"
|
||||
var userAgentPersistentVersion atomic.Int32
|
||||
|
||||
func getBrowserUserAgentHeader() string {
|
||||
|
Loading…
x
Reference in New Issue
Block a user