diff options
| author | AKP <abi@tdpain.net> | 2025-02-10 22:09:37 +0000 |
|---|---|---|
| committer | AKP <abi@tdpain.net> | 2025-02-10 22:12:34 +0000 |
| commit | b8181ee20e5743c2a4ab63694fc8d9840d751400 (patch) | |
| tree | 53c8a0758ad3fe6697f2c8b8de59a51199b21df5 | |
| parent | 30f4549463e758e7e93f1a8a86467c06b94fa134 (diff) | |
Use old time-based new item detection method when no known feed items exist in the database
This can happen after a database migration, when a set of feeds are imported/first added.
| -rw-r--r-- | CHANGELOG.md | 2 | ||||
| -rw-r--r-- | walrss/internal/rss/processor.go | 80 |
2 files changed, 68 insertions, 14 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 05c5d1a..55e0d1f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,8 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## Unreleased +### Changed +* When no feed items are stored in the database for a particular feed, fall back to using the old time-based detection method ## 0.4.2 - 2025-02-09 ### Fixed diff --git a/walrss/internal/rss/processor.go b/walrss/internal/rss/processor.go index 8f1974f..7d1258a 100644 --- a/walrss/internal/rss/processor.go +++ b/walrss/internal/rss/processor.go @@ -120,8 +120,9 @@ func ProcessUserFeed(st *state.State, user *db.User, progressChannel chan string pf.Error = err reportProgress(progressChannel, "Failed to fetch: "+err.Error()) } else { - pf.Items, err = filterFeedContent( + ffcRes, err := filterFeedContent( st, + interval, rawFeed, f.ID, ) @@ -129,10 +130,12 @@ func ProcessUserFeed(st *state.State, user *db.User, progressChannel chan string return fmt.Errorf("filter for new feed items in %s: %w", f.ID, err) } + pf.Items = ffcRes.filtered + // add new items to DB cache { var newItems []*db.FeedItem - for _, i := range pf.Items { + for _, i := range ffcRes.new { newItems = append(newItems, &db.FeedItem{ FeedID: f.ID, ItemID: i.ID, @@ -253,7 +256,20 @@ type feedItem struct { PublishTime time.Time } -func filterFeedContent(st *state.State, feed *gofeed.Feed, feedID string) ([]*feedItem, error) { +type filterFeedContentResult struct { + // filtered are the items that should be shown to a user in an email + filtered []*feedItem + + // items are the items that have never been seen before and should be added to the database. + // + // items in new may only have the ID field set. + new []*feedItem + + // filtered and new may differ when using time-based intervals and only a recently-published portion of the overall + // new set should be shown to the user. +} + +func filterFeedContent(st *state.State, interval time.Duration, feed *gofeed.Feed, feedID string) (*filterFeedContentResult, error) { knownItemsList, err := core.GetFeedItemsForFeed(st, feedID) if err != nil { return nil, fmt.Errorf("get known feed items: %w", err) @@ -264,24 +280,60 @@ func filterFeedContent(st *state.State, feed *gofeed.Feed, feedID string) ([]*fe knownItems[i.ItemID] = struct{}{} } - var o []*feedItem + res := new(filterFeedContentResult) + + if len(knownItemsList) == 0 { + // This might happen in the case of database migrations or when a new feed is added. + // In this instance, we'll fall back to the old behaviour of using a time interval to select new posts. + + // Intervals are in terms of days, never hours. + // This gets a data for the previous day/week and sets it to the start of that day. + + selectPublishedSince := time.Now().UTC().Add(-interval) + selectPublishedSince = time.Date(selectPublishedSince.Year(), selectPublishedSince.Month(), selectPublishedSince.Day(), 0, 0, 0, 0, time.UTC) + + for _, item := range feed.Items { + res.new = append(res.new, &feedItem{ID: item.GUID}) - for _, item := range feed.Items { - if _, found := knownItems[item.GUID]; !found { if item.PublishedParsed == nil { - item.PublishedParsed = &time.Time{} + continue } - o = append(o, &feedItem{ - ID: item.GUID, - Title: strings.TrimSpace(item.Title), - URL: item.Link, - PublishTime: *item.PublishedParsed, - }) + if item.PublishedParsed.After(selectPublishedSince) || item.PublishedParsed.Equal(selectPublishedSince) { + if item.PublishedParsed == nil { + item.PublishedParsed = &time.Time{} + } + + res.filtered = append(res.filtered, &feedItem{ + ID: item.GUID, + Title: strings.TrimSpace(item.Title), + URL: item.Link, + PublishTime: *item.PublishedParsed, + }) + } + } + + } else { + for _, item := range feed.Items { + if _, found := knownItems[item.GUID]; found { + if item.PublishedParsed == nil { + item.PublishedParsed = &time.Time{} + } + + x := &feedItem{ + ID: item.GUID, + Title: strings.TrimSpace(item.Title), + URL: item.Link, + PublishTime: *item.PublishedParsed, + } + + res.new = append(res.new, x) + res.filtered = append(res.filtered, x) + } } } - return o, nil + return res, nil } func generateEmail(st *state.State, processedItems []*processedFeed, interval, timeToGenerate time.Duration) (plain, html []byte, err error) { |
