From aecda64030ef1231244b8b2ab8be0174f6bfd992 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Guillot?= Date: Mon, 20 Nov 2017 17:12:37 -0800 Subject: [PATCH] Make sure XML feeds are always encoded in UTF-8 --- locale/translations.go | 7 ++++--- locale/translations/fr_FR.json | 3 ++- reader/feed/handler.go | 23 +++++++++++++++++------ reader/http/client.go | 29 +++++++++++++++++------------ reader/http/response.go | 21 +++++++++++++++------ reader/icon/finder.go | 11 ++++++----- reader/subscription/finder.go | 9 +++++---- server/static/bin.go | 2 +- server/static/css.go | 2 +- server/static/js.go | 2 +- server/template/common.go | 2 +- server/template/views.go | 2 +- sql/sql.go | 2 +- 13 files changed, 72 insertions(+), 43 deletions(-) diff --git a/locale/translations.go b/locale/translations.go index 402afe58..298d0e02 100644 --- a/locale/translations.go +++ b/locale/translations.go @@ -1,5 +1,5 @@ // Code generated by go generate; DO NOT EDIT. -// 2017-11-20 16:03:46.536096032 -0800 PST m=+0.030567687 +// 2017-11-20 17:09:36.257679981 -0800 PST m=+0.024050336 package locale @@ -129,12 +129,13 @@ var Translations = map[string]string{ "Unable to parse OPML file: %v": "Impossible de lire le fichier OPML : %v", "Unable to parse RSS feed: %v": "Impossible de lire ce flux RSS: %v", "Unable to parse Atom feed: %v": "Impossible de lire ce flux Atom: %v", - "Unable to parse JSON feed: %v": "Impossible de lire ce flux Json: %v" + "Unable to parse JSON feed: %v": "Impossible de lire ce flux Json: %v", + "Unable to normalize encoding: %v.": "Impossible de normaliser l'encodage : %v." } `, } var TranslationsChecksums = map[string]string{ "en_US": "6fe95384260941e8a5a3c695a655a932e0a8a6a572c1e45cb2b1ae8baa01b897", - "fr_FR": "e9b3753645cb83a338f48bdc24825e629d568ebd3a65a4be2978ff6b4f3bc380", + "fr_FR": "0ff93081d867ab27a190b5cbe6aaed65dbdcd80079ad667b515428a147cb20ee", } diff --git a/locale/translations/fr_FR.json b/locale/translations/fr_FR.json index f7536f68..40aa51af 100644 --- a/locale/translations/fr_FR.json +++ b/locale/translations/fr_FR.json @@ -113,5 +113,6 @@ "Unable to parse OPML file: %v": "Impossible de lire le fichier OPML : %v", "Unable to parse RSS feed: %v": "Impossible de lire ce flux RSS: %v", "Unable to parse Atom feed: %v": "Impossible de lire ce flux Atom: %v", - "Unable to parse JSON feed: %v": "Impossible de lire ce flux Json: %v" + "Unable to parse JSON feed: %v": "Impossible de lire ce flux Json: %v", + "Unable to normalize encoding: %v.": "Impossible de normaliser l'encodage : %v." } diff --git a/reader/feed/handler.go b/reader/feed/handler.go index 27ff126b..c046ad98 100644 --- a/reader/feed/handler.go +++ b/reader/feed/handler.go @@ -6,14 +6,15 @@ package feed import ( "fmt" + "log" + "time" + "github.com/miniflux/miniflux2/errors" "github.com/miniflux/miniflux2/helper" "github.com/miniflux/miniflux2/model" "github.com/miniflux/miniflux2/reader/http" "github.com/miniflux/miniflux2/reader/icon" "github.com/miniflux/miniflux2/storage" - "log" - "time" ) var ( @@ -21,6 +22,7 @@ var ( errServerFailure = "Unable to fetch feed (statusCode=%d)." errDuplicate = "This feed already exists (%s)." errNotFound = "Feed %d not found" + errEncoding = "Unable to normalize encoding: %v." ) // Handler contains all the logic to create and refresh feeds. @@ -32,7 +34,7 @@ type Handler struct { func (h *Handler) CreateFeed(userID, categoryID int64, url string) (*model.Feed, error) { defer helper.ExecutionTime(time.Now(), fmt.Sprintf("[Handler:CreateFeed] feedUrl=%s", url)) - client := http.NewHttpClient(url) + client := http.NewClient(url) response, err := client.Get() if err != nil { return nil, errors.NewLocalizedError(errRequestFailed, err) @@ -46,7 +48,12 @@ func (h *Handler) CreateFeed(userID, categoryID int64, url string) (*model.Feed, return nil, errors.NewLocalizedError(errDuplicate, response.EffectiveURL) } - subscription, err := parseFeed(response.Body) + body, err := response.NormalizeBodyEncoding() + if err != nil { + return nil, errors.NewLocalizedError(errEncoding, err) + } + + subscription, err := parseFeed(body) if err != nil { return nil, err } @@ -89,7 +96,7 @@ func (h *Handler) RefreshFeed(userID, feedID int64) error { return errors.NewLocalizedError(errNotFound, feedID) } - client := http.NewHttpClientWithCacheHeaders(originalFeed.FeedURL, originalFeed.EtagHeader, originalFeed.LastModifiedHeader) + client := http.NewClientWithCacheHeaders(originalFeed.FeedURL, originalFeed.EtagHeader, originalFeed.LastModifiedHeader) response, err := client.Get() if err != nil { customErr := errors.NewLocalizedError(errRequestFailed, err) @@ -111,8 +118,12 @@ func (h *Handler) RefreshFeed(userID, feedID int64) error { if response.IsModified(originalFeed.EtagHeader, originalFeed.LastModifiedHeader) { log.Printf("[Handler:RefreshFeed] Feed #%d has been modified\n", feedID) + body, err := response.NormalizeBodyEncoding() + if err != nil { + return errors.NewLocalizedError(errEncoding, err) + } - subscription, err := parseFeed(response.Body) + subscription, err := parseFeed(body) if err != nil { originalFeed.ParsingErrorCount++ originalFeed.ParsingErrorMsg = err.Error() diff --git a/reader/http/client.go b/reader/http/client.go index 745ff0db..edb3c86b 100644 --- a/reader/http/client.go +++ b/reader/http/client.go @@ -7,23 +7,26 @@ package http import ( "crypto/tls" "fmt" - "github.com/miniflux/miniflux2/helper" "log" "net/http" "net/url" "time" + + "github.com/miniflux/miniflux2/helper" ) -const HTTP_USER_AGENT = "Miniflux " +const userAgent = "Miniflux " -type HttpClient struct { +// Client is a HTTP Client :) +type Client struct { url string etagHeader string lastModifiedHeader string Insecure bool } -func (h *HttpClient) Get() (*ServerResponse, error) { +// Get execute a GET HTTP request. +func (h *Client) Get() (*Response, error) { defer helper.ExecutionTime(time.Now(), fmt.Sprintf("[HttpClient:Get] url=%s", h.url)) u, _ := url.Parse(h.url) @@ -39,7 +42,7 @@ func (h *HttpClient) Get() (*ServerResponse, error) { return nil, err } - response := &ServerResponse{ + response := &Response{ Body: resp.Body, StatusCode: resp.StatusCode, EffectiveURL: resp.Request.URL.String(), @@ -59,7 +62,7 @@ func (h *HttpClient) Get() (*ServerResponse, error) { return response, err } -func (h *HttpClient) buildClient() http.Client { +func (h *Client) buildClient() http.Client { if h.Insecure { transport := &http.Transport{ TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, @@ -71,9 +74,9 @@ func (h *HttpClient) buildClient() http.Client { return http.Client{} } -func (h *HttpClient) buildHeaders() http.Header { +func (h *Client) buildHeaders() http.Header { headers := make(http.Header) - headers.Add("User-Agent", HTTP_USER_AGENT) + headers.Add("User-Agent", userAgent) if h.etagHeader != "" { headers.Add("If-None-Match", h.etagHeader) @@ -86,10 +89,12 @@ func (h *HttpClient) buildHeaders() http.Header { return headers } -func NewHttpClient(url string) *HttpClient { - return &HttpClient{url: url, Insecure: false} +// NewClient returns a new HTTP client. +func NewClient(url string) *Client { + return &Client{url: url, Insecure: false} } -func NewHttpClientWithCacheHeaders(url, etagHeader, lastModifiedHeader string) *HttpClient { - return &HttpClient{url: url, etagHeader: etagHeader, lastModifiedHeader: lastModifiedHeader, Insecure: false} +// NewClientWithCacheHeaders returns a new HTTP client that send cache headers. +func NewClientWithCacheHeaders(url, etagHeader, lastModifiedHeader string) *Client { + return &Client{url: url, etagHeader: etagHeader, lastModifiedHeader: lastModifiedHeader, Insecure: false} } diff --git a/reader/http/response.go b/reader/http/response.go index 49e9f196..b2dfb547 100644 --- a/reader/http/response.go +++ b/reader/http/response.go @@ -5,8 +5,10 @@ package http import "io" +import "golang.org/x/net/html/charset" -type ServerResponse struct { +// Response wraps a server response. +type Response struct { Body io.Reader StatusCode int EffectiveURL string @@ -15,18 +17,25 @@ type ServerResponse struct { ContentType string } -func (s *ServerResponse) HasServerFailure() bool { - return s.StatusCode >= 400 +// HasServerFailure returns true if the status code represents a failure. +func (r *Response) HasServerFailure() bool { + return r.StatusCode >= 400 } -func (s *ServerResponse) IsModified(etag, lastModified string) bool { - if s.StatusCode == 304 { +// IsModified returns true if the resource has been modified. +func (r *Response) IsModified(etag, lastModified string) bool { + if r.StatusCode == 304 { return false } - if s.ETag != "" && s.LastModified != "" && (s.ETag == etag || s.LastModified == lastModified) { + if r.ETag != "" && r.LastModified != "" && (r.ETag == etag || r.LastModified == lastModified) { return false } return true } + +// NormalizeBodyEncoding make sure the body is encoded in UTF-8. +func (r *Response) NormalizeBodyEncoding() (io.Reader, error) { + return charset.NewReader(r.Body, r.ContentType) +} diff --git a/reader/icon/finder.go b/reader/icon/finder.go index 54d509f5..fe6e86df 100644 --- a/reader/icon/finder.go +++ b/reader/icon/finder.go @@ -6,13 +6,14 @@ package icon import ( "fmt" + "io" + "io/ioutil" + "log" + "github.com/miniflux/miniflux2/helper" "github.com/miniflux/miniflux2/model" "github.com/miniflux/miniflux2/reader/http" "github.com/miniflux/miniflux2/reader/url" - "io" - "io/ioutil" - "log" "github.com/PuerkitoBio/goquery" ) @@ -20,7 +21,7 @@ import ( // FindIcon try to find the website's icon. func FindIcon(websiteURL string) (*model.Icon, error) { rootURL := url.GetRootURL(websiteURL) - client := http.NewHttpClient(rootURL) + client := http.NewClient(rootURL) response, err := client.Get() if err != nil { return nil, fmt.Errorf("unable to download website index page: %v", err) @@ -80,7 +81,7 @@ func parseDocument(websiteURL string, data io.Reader) (string, error) { } func downloadIcon(iconURL string) (*model.Icon, error) { - client := http.NewHttpClient(iconURL) + client := http.NewClient(iconURL) response, err := client.Get() if err != nil { return nil, fmt.Errorf("unable to download iconURL: %v", err) diff --git a/reader/subscription/finder.go b/reader/subscription/finder.go index 7314644d..cb6fbf20 100644 --- a/reader/subscription/finder.go +++ b/reader/subscription/finder.go @@ -7,14 +7,15 @@ package subscription import ( "bytes" "fmt" + "io" + "log" + "time" + "github.com/miniflux/miniflux2/errors" "github.com/miniflux/miniflux2/helper" "github.com/miniflux/miniflux2/reader/feed" "github.com/miniflux/miniflux2/reader/http" "github.com/miniflux/miniflux2/reader/url" - "io" - "log" - "time" "github.com/PuerkitoBio/goquery" ) @@ -28,7 +29,7 @@ var ( func FindSubscriptions(websiteURL string) (Subscriptions, error) { defer helper.ExecutionTime(time.Now(), fmt.Sprintf("[FindSubscriptions] url=%s", websiteURL)) - client := http.NewHttpClient(websiteURL) + client := http.NewClient(websiteURL) response, err := client.Get() if err != nil { return nil, errors.NewLocalizedError(errConnectionFailure, err) diff --git a/server/static/bin.go b/server/static/bin.go index 0a7f6798..b95023b7 100644 --- a/server/static/bin.go +++ b/server/static/bin.go @@ -1,5 +1,5 @@ // Code generated by go generate; DO NOT EDIT. -// 2017-11-20 16:03:46.511191455 -0800 PST m=+0.005663110 +// 2017-11-20 17:09:36.239163817 -0800 PST m=+0.005534172 package static diff --git a/server/static/css.go b/server/static/css.go index 50c0ccab..bfe6b6a6 100644 --- a/server/static/css.go +++ b/server/static/css.go @@ -1,5 +1,5 @@ // Code generated by go generate; DO NOT EDIT. -// 2017-11-20 16:03:46.51268594 -0800 PST m=+0.007157595 +// 2017-11-20 17:09:36.24112331 -0800 PST m=+0.007493665 package static diff --git a/server/static/js.go b/server/static/js.go index f8524454..816728e9 100644 --- a/server/static/js.go +++ b/server/static/js.go @@ -1,5 +1,5 @@ // Code generated by go generate; DO NOT EDIT. -// 2017-11-20 16:03:46.51596478 -0800 PST m=+0.010436435 +// 2017-11-20 17:09:36.242888415 -0800 PST m=+0.009258770 package static diff --git a/server/template/common.go b/server/template/common.go index acb1db94..16db6eae 100644 --- a/server/template/common.go +++ b/server/template/common.go @@ -1,5 +1,5 @@ // Code generated by go generate; DO NOT EDIT. -// 2017-11-20 16:03:46.53440477 -0800 PST m=+0.028876425 +// 2017-11-20 17:09:36.256513528 -0800 PST m=+0.022883883 package template diff --git a/server/template/views.go b/server/template/views.go index 856927f6..8b224b61 100644 --- a/server/template/views.go +++ b/server/template/views.go @@ -1,5 +1,5 @@ // Code generated by go generate; DO NOT EDIT. -// 2017-11-20 16:03:46.517489275 -0800 PST m=+0.011960930 +// 2017-11-20 17:09:36.24386504 -0800 PST m=+0.010235395 package template diff --git a/sql/sql.go b/sql/sql.go index 1200ea42..5f327c87 100644 --- a/sql/sql.go +++ b/sql/sql.go @@ -1,5 +1,5 @@ // Code generated by go generate; DO NOT EDIT. -// 2017-11-20 16:03:46.509724835 -0800 PST m=+0.004196490 +// 2017-11-20 17:09:36.23789781 -0800 PST m=+0.004268165 package sql