diff options
| -rw-r--r-- | CHANGELOG.md | 2 | ||||
| -rw-r--r-- | walrss/internal/db/20250209222240_lastfetched.up.sql | 1 | ||||
| -rw-r--r-- | walrss/internal/db/db.go | 12 | ||||
| -rw-r--r-- | walrss/internal/rss/processor.go | 103 |
4 files changed, 62 insertions, 56 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 1c0db96..d7f26e3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,8 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## Unreleased +### Changed +* Feed fetching will reuse cached content within an hour of a previous fetch without checking for a HTTP 304 (Not Modified) from the remote resource ## 0.4.0 - 2025-02-09 ### Changed diff --git a/walrss/internal/db/20250209222240_lastfetched.up.sql b/walrss/internal/db/20250209222240_lastfetched.up.sql new file mode 100644 index 0000000..96c40c5 --- /dev/null +++ b/walrss/internal/db/20250209222240_lastfetched.up.sql @@ -0,0 +1 @@ +ALTER TABLE feeds ADD COLUMN last_fetched TEXT
\ No newline at end of file diff --git a/walrss/internal/db/db.go b/walrss/internal/db/db.go index 880644f..1426d79 100644 --- a/walrss/internal/db/db.go +++ b/walrss/internal/db/db.go @@ -7,6 +7,7 @@ import ( "github.com/uptrace/bun" "github.com/uptrace/bun/dialect/sqlitedialect" "strings" + "time" ) func New(filename string) (*bun.DB, error) { @@ -43,20 +44,21 @@ type Feed struct { Name string `bun:"name,notnull"` UserID string `bun:"user_id,notnull"` - LastEtag string `bun:"last_etag,nullzero"` - LastModified string `bun:"last_modified,nullzero"` - CachedContent string `bun:"cached_content,nullzero"` + LastFetched time.Time `bun:"last_fetched,nullzero"` + LastEtag string `bun:"last_etag,nullzero"` + LastModified string `bun:"last_modified,nullzero"` + CachedContent string `bun:"cached_content,nullzero"` User *User `bun:",rel:belongs-to,join:user_id=id"` } -func (f *Feed) CacheWithEtag(etag, content string) { +func (f *Feed) SetCacheWithEtag(etag, content string) { f.LastModified = "" f.LastEtag = etag f.CachedContent = content } -func (f *Feed) CacheWithLastModified(lastModified, content string) { +func (f *Feed) SetCacheWithLastModified(lastModified, content string) { f.LastEtag = "" f.LastModified = lastModified f.CachedContent = content diff --git a/walrss/internal/rss/processor.go b/walrss/internal/rss/processor.go index ce53d5d..f729bad 100644 --- a/walrss/internal/rss/processor.go +++ b/walrss/internal/rss/processor.go @@ -13,7 +13,6 @@ import ( "github.com/jordan-wright/email" "github.com/matcornic/hermes" "github.com/mmcdole/gofeed" - "github.com/patrickmn/go-cache" "github.com/rs/zerolog/log" "net/http" "net/smtp" @@ -162,69 +161,73 @@ func ProcessUserFeed(st *state.State, user *db.User, progressChannel chan string return err } -var ( - feedCache = cache.New(time.Minute*10, time.Minute*20) - feedFetchLock = new(sync.Mutex) -) +var feedFetchLock = new(sync.Mutex) func getFeedContent(st *state.State, f *db.Feed) (*gofeed.Feed, error) { - feedFetchLock.Lock() + feedFetchLock.Lock() // I would like to be able to get rid of this lock, however, in order to do so, a lot of the + // database infrastructure needs removing and rewriting to use proper transactions. So we'll leave it here for now. defer feedFetchLock.Unlock() - if v, found := feedCache.Get(f.URL); found { - return v.(*gofeed.Feed), nil - } - buf := new(bytes.Buffer) - ctx, cancel := context.WithTimeout(context.Background(), time.Second*10) - defer cancel() - var notModified bool - headers := make(textproto.MIMEHeader) + // If a feed was cached in the last hour, Walrss will not re-query the remote server and will just use the cache. + hasCachedFeed := f.CachedContent != "" + cachedFeedIsFresh := !f.LastFetched.IsZero() && time.Now().UTC().Sub(f.LastFetched) < time.Hour - requestBuilder := requests.URL(f.URL).ToBytesBuffer(buf).UserAgent(getUserAgent(st)).CopyHeaders(headers) + if hasCachedFeed && cachedFeedIsFresh { + log.Debug().Msgf("%s using fresh cache (%v)", f.URL, f.LastFetched) + buf.WriteString(f.CachedContent) + } else { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*10) + defer cancel() - if f.LastEtag != "" || f.LastModified != "" { - requestBuilder.AddValidator( - func(resp *http.Response) error { - if resp.StatusCode == http.StatusNotModified { - notModified = true - return nil - } else { - return requests.DefaultValidator(resp) - } - }, - ) + var notModified bool + headers := make(textproto.MIMEHeader) + + requestBuilder := requests.URL(f.URL).ToBytesBuffer(buf).UserAgent(getUserAgent(st)).CopyHeaders(headers) + + if f.LastEtag != "" || f.LastModified != "" { + requestBuilder.AddValidator( + func(resp *http.Response) error { + if resp.StatusCode == http.StatusNotModified { + notModified = true + return nil + } else { + return requests.DefaultValidator(resp) + } + }, + ) + + if f.LastEtag != "" { + requestBuilder.Header("If-None-Match", f.LastEtag) + } else if f.LastModified != "" { + requestBuilder.Header("If-Modified-Since", f.LastModified) + } - if f.LastEtag != "" { - requestBuilder.Header("If-None-Match", f.LastEtag) - } else if f.LastModified != "" { - requestBuilder.Header("If-Modified-Since", f.LastModified) + } else { + requestBuilder.AddValidator(requests.DefaultValidator) // Since we're using CopyHeaders, we need to add the + // default validator back ourselves. } - } else { - requestBuilder.AddValidator(requests.DefaultValidator) // Since we're using CopyHeaders, we need to add the - // default validator back ourselves. - } + if err := requestBuilder.Fetch(ctx); err != nil { + return nil, err + } - if err := requestBuilder.Fetch(ctx); err != nil { - return nil, err - } + f.LastFetched = time.Now().UTC() - if notModified { - log.Debug().Msgf("%s not modified", f.URL) - buf.WriteString(f.CachedContent) - } else if etag := headers.Get("ETag"); etag != "" { - log.Debug().Msgf("%s modified (ETag)", f.URL) - f.CacheWithEtag(etag, buf.String()) - if err := core.UpdateFeed(st, f); err != nil { - return nil, fmt.Errorf("failed to cache ETag-ed response: %v", err) + if notModified { + log.Debug().Msgf("%s not modified", f.URL) + buf.WriteString(f.CachedContent) + } else if etag := headers.Get("ETag"); etag != "" { + log.Debug().Msgf("%s modified (ETag)", f.URL) + f.SetCacheWithEtag(etag, buf.String()) + } else if lastModified := headers.Get("Last-Modified"); lastModified != "" { + log.Debug().Msgf("%s modified (Last-Modified)", f.URL) + f.SetCacheWithLastModified(lastModified, buf.String()) } - } else if lastModified := headers.Get("Last-Modified"); lastModified != "" { - log.Debug().Msgf("%s modified (Last-Modified)", f.URL) - f.CacheWithLastModified(lastModified, buf.String()) + if err := core.UpdateFeed(st, f); err != nil { - return nil, fmt.Errorf("failed to cache Last-Modified enabled response: %v", err) + return nil, fmt.Errorf("update feed after fetch: %v", err) } } @@ -233,8 +236,6 @@ func getFeedContent(st *state.State, f *db.Feed) (*gofeed.Feed, error) { return nil, err } - _ = feedCache.Add(f.URL, feed, cache.DefaultExpiration) - return feed, nil } |
