aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAKP <abi@tdpain.net>2025-02-09 22:43:24 +0000
committerAKP <abi@tdpain.net>2025-02-09 22:46:06 +0000
commitaed9fe5d5fddc2ce6293ced9751ab15715263df9 (patch)
treeda262e226d4164543ce781aae812a3de9585fc1e
parent4dbbdadd90b11ff13d8888b5be8329e223f5cc40 (diff)
Cache feed data in database
-rw-r--r--CHANGELOG.md2
-rw-r--r--walrss/internal/db/20250209222240_lastfetched.up.sql1
-rw-r--r--walrss/internal/db/db.go12
-rw-r--r--walrss/internal/rss/processor.go103
4 files changed, 62 insertions, 56 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1c0db96..d7f26e3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,8 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## Unreleased
+### Changed
+* Feed fetching will reuse cached content within an hour of a previous fetch without checking for a HTTP 304 (Not Modified) from the remote resource
## 0.4.0 - 2025-02-09
### Changed
diff --git a/walrss/internal/db/20250209222240_lastfetched.up.sql b/walrss/internal/db/20250209222240_lastfetched.up.sql
new file mode 100644
index 0000000..96c40c5
--- /dev/null
+++ b/walrss/internal/db/20250209222240_lastfetched.up.sql
@@ -0,0 +1 @@
+ALTER TABLE feeds ADD COLUMN last_fetched TEXT \ No newline at end of file
diff --git a/walrss/internal/db/db.go b/walrss/internal/db/db.go
index 880644f..1426d79 100644
--- a/walrss/internal/db/db.go
+++ b/walrss/internal/db/db.go
@@ -7,6 +7,7 @@ import (
"github.com/uptrace/bun"
"github.com/uptrace/bun/dialect/sqlitedialect"
"strings"
+ "time"
)
func New(filename string) (*bun.DB, error) {
@@ -43,20 +44,21 @@ type Feed struct {
Name string `bun:"name,notnull"`
UserID string `bun:"user_id,notnull"`
- LastEtag string `bun:"last_etag,nullzero"`
- LastModified string `bun:"last_modified,nullzero"`
- CachedContent string `bun:"cached_content,nullzero"`
+ LastFetched time.Time `bun:"last_fetched,nullzero"`
+ LastEtag string `bun:"last_etag,nullzero"`
+ LastModified string `bun:"last_modified,nullzero"`
+ CachedContent string `bun:"cached_content,nullzero"`
User *User `bun:",rel:belongs-to,join:user_id=id"`
}
-func (f *Feed) CacheWithEtag(etag, content string) {
+func (f *Feed) SetCacheWithEtag(etag, content string) {
f.LastModified = ""
f.LastEtag = etag
f.CachedContent = content
}
-func (f *Feed) CacheWithLastModified(lastModified, content string) {
+func (f *Feed) SetCacheWithLastModified(lastModified, content string) {
f.LastEtag = ""
f.LastModified = lastModified
f.CachedContent = content
diff --git a/walrss/internal/rss/processor.go b/walrss/internal/rss/processor.go
index ce53d5d..f729bad 100644
--- a/walrss/internal/rss/processor.go
+++ b/walrss/internal/rss/processor.go
@@ -13,7 +13,6 @@ import (
"github.com/jordan-wright/email"
"github.com/matcornic/hermes"
"github.com/mmcdole/gofeed"
- "github.com/patrickmn/go-cache"
"github.com/rs/zerolog/log"
"net/http"
"net/smtp"
@@ -162,69 +161,73 @@ func ProcessUserFeed(st *state.State, user *db.User, progressChannel chan string
return err
}
-var (
- feedCache = cache.New(time.Minute*10, time.Minute*20)
- feedFetchLock = new(sync.Mutex)
-)
+var feedFetchLock = new(sync.Mutex)
func getFeedContent(st *state.State, f *db.Feed) (*gofeed.Feed, error) {
- feedFetchLock.Lock()
+ feedFetchLock.Lock() // I would like to be able to get rid of this lock, however, in order to do so, a lot of the
+ // database infrastructure needs removing and rewriting to use proper transactions. So we'll leave it here for now.
defer feedFetchLock.Unlock()
- if v, found := feedCache.Get(f.URL); found {
- return v.(*gofeed.Feed), nil
- }
-
buf := new(bytes.Buffer)
- ctx, cancel := context.WithTimeout(context.Background(), time.Second*10)
- defer cancel()
- var notModified bool
- headers := make(textproto.MIMEHeader)
+ // If a feed was cached in the last hour, Walrss will not re-query the remote server and will just use the cache.
+ hasCachedFeed := f.CachedContent != ""
+ cachedFeedIsFresh := !f.LastFetched.IsZero() && time.Now().UTC().Sub(f.LastFetched) < time.Hour
- requestBuilder := requests.URL(f.URL).ToBytesBuffer(buf).UserAgent(getUserAgent(st)).CopyHeaders(headers)
+ if hasCachedFeed && cachedFeedIsFresh {
+ log.Debug().Msgf("%s using fresh cache (%v)", f.URL, f.LastFetched)
+ buf.WriteString(f.CachedContent)
+ } else {
+ ctx, cancel := context.WithTimeout(context.Background(), time.Second*10)
+ defer cancel()
- if f.LastEtag != "" || f.LastModified != "" {
- requestBuilder.AddValidator(
- func(resp *http.Response) error {
- if resp.StatusCode == http.StatusNotModified {
- notModified = true
- return nil
- } else {
- return requests.DefaultValidator(resp)
- }
- },
- )
+ var notModified bool
+ headers := make(textproto.MIMEHeader)
+
+ requestBuilder := requests.URL(f.URL).ToBytesBuffer(buf).UserAgent(getUserAgent(st)).CopyHeaders(headers)
+
+ if f.LastEtag != "" || f.LastModified != "" {
+ requestBuilder.AddValidator(
+ func(resp *http.Response) error {
+ if resp.StatusCode == http.StatusNotModified {
+ notModified = true
+ return nil
+ } else {
+ return requests.DefaultValidator(resp)
+ }
+ },
+ )
+
+ if f.LastEtag != "" {
+ requestBuilder.Header("If-None-Match", f.LastEtag)
+ } else if f.LastModified != "" {
+ requestBuilder.Header("If-Modified-Since", f.LastModified)
+ }
- if f.LastEtag != "" {
- requestBuilder.Header("If-None-Match", f.LastEtag)
- } else if f.LastModified != "" {
- requestBuilder.Header("If-Modified-Since", f.LastModified)
+ } else {
+ requestBuilder.AddValidator(requests.DefaultValidator) // Since we're using CopyHeaders, we need to add the
+ // default validator back ourselves.
}
- } else {
- requestBuilder.AddValidator(requests.DefaultValidator) // Since we're using CopyHeaders, we need to add the
- // default validator back ourselves.
- }
+ if err := requestBuilder.Fetch(ctx); err != nil {
+ return nil, err
+ }
- if err := requestBuilder.Fetch(ctx); err != nil {
- return nil, err
- }
+ f.LastFetched = time.Now().UTC()
- if notModified {
- log.Debug().Msgf("%s not modified", f.URL)
- buf.WriteString(f.CachedContent)
- } else if etag := headers.Get("ETag"); etag != "" {
- log.Debug().Msgf("%s modified (ETag)", f.URL)
- f.CacheWithEtag(etag, buf.String())
- if err := core.UpdateFeed(st, f); err != nil {
- return nil, fmt.Errorf("failed to cache ETag-ed response: %v", err)
+ if notModified {
+ log.Debug().Msgf("%s not modified", f.URL)
+ buf.WriteString(f.CachedContent)
+ } else if etag := headers.Get("ETag"); etag != "" {
+ log.Debug().Msgf("%s modified (ETag)", f.URL)
+ f.SetCacheWithEtag(etag, buf.String())
+ } else if lastModified := headers.Get("Last-Modified"); lastModified != "" {
+ log.Debug().Msgf("%s modified (Last-Modified)", f.URL)
+ f.SetCacheWithLastModified(lastModified, buf.String())
}
- } else if lastModified := headers.Get("Last-Modified"); lastModified != "" {
- log.Debug().Msgf("%s modified (Last-Modified)", f.URL)
- f.CacheWithLastModified(lastModified, buf.String())
+
if err := core.UpdateFeed(st, f); err != nil {
- return nil, fmt.Errorf("failed to cache Last-Modified enabled response: %v", err)
+ return nil, fmt.Errorf("update feed after fetch: %v", err)
}
}
@@ -233,8 +236,6 @@ func getFeedContent(st *state.State, f *db.Feed) (*gofeed.Feed, error) {
return nil, err
}
- _ = feedCache.Add(f.URL, feed, cache.DefaultExpiration)
-
return feed, nil
}