Normalize URL query string before executing HTTP requests
- Make sure query strings parameters are encoded - As opposed to the standard library, do not append equal sign for query parameters with empty value - Strip URL fragments like Web browsers
This commit is contained in:
parent
200b1c304b
commit
3debf75eb9
8 changed files with 128 additions and 26 deletions
|
@ -22,6 +22,7 @@ import (
|
|||
"miniflux.app/errors"
|
||||
"miniflux.app/logger"
|
||||
"miniflux.app/timer"
|
||||
url_helper "miniflux.app/url"
|
||||
"miniflux.app/version"
|
||||
)
|
||||
|
||||
|
@ -37,7 +38,8 @@ var (
|
|||
|
||||
// Client is a HTTP Client :)
|
||||
type Client struct {
|
||||
url string
|
||||
inputURL string
|
||||
requestURL string
|
||||
etagHeader string
|
||||
lastModifiedHeader string
|
||||
authorizationHeader string
|
||||
|
@ -47,6 +49,18 @@ type Client struct {
|
|||
Insecure bool
|
||||
}
|
||||
|
||||
func (c *Client) String() string {
|
||||
return fmt.Sprintf(
|
||||
`InputURL=%q RequestURL=%q ETag=%s LastModified=%q BasicAuth=%v UserAgent=%q`,
|
||||
c.inputURL,
|
||||
c.requestURL,
|
||||
c.etagHeader,
|
||||
c.lastModifiedHeader,
|
||||
c.authorizationHeader != "" || (c.username != "" && c.password != ""),
|
||||
c.userAgent,
|
||||
)
|
||||
}
|
||||
|
||||
// WithCredentials defines the username/password for HTTP Basic authentication.
|
||||
func (c *Client) WithCredentials(username, password string) *Client {
|
||||
if username != "" && password != "" {
|
||||
|
@ -115,7 +129,12 @@ func (c *Client) PostJSON(data interface{}) (*Response, error) {
|
|||
}
|
||||
|
||||
func (c *Client) executeRequest(request *http.Request) (*Response, error) {
|
||||
defer timer.ExecutionTime(time.Now(), fmt.Sprintf("[HttpClient] url=%s", c.url))
|
||||
defer timer.ExecutionTime(time.Now(), fmt.Sprintf("[HttpClient] inputURL=%s", c.inputURL))
|
||||
|
||||
logger.Debug("[HttpClient:Before] Method=%s %s",
|
||||
request.Method,
|
||||
c.String(),
|
||||
)
|
||||
|
||||
client := c.buildClient()
|
||||
resp, err := client.Do(request)
|
||||
|
@ -162,21 +181,15 @@ func (c *Client) executeRequest(request *http.Request) (*Response, error) {
|
|||
EffectiveURL: resp.Request.URL.String(),
|
||||
LastModified: resp.Header.Get("Last-Modified"),
|
||||
ETag: resp.Header.Get("ETag"),
|
||||
Expires: resp.Header.Get("Expires"),
|
||||
ContentType: resp.Header.Get("Content-Type"),
|
||||
ContentLength: resp.ContentLength,
|
||||
}
|
||||
|
||||
logger.Debug("[HttpClient:%s] URL=%s, EffectiveURL=%s, Code=%d, Length=%d, Type=%s, ETag=%s, LastMod=%s, Expires=%s, Auth=%v",
|
||||
logger.Debug("[HttpClient:After] Method=%s %s; Response => %s",
|
||||
request.Method,
|
||||
c.url,
|
||||
response.EffectiveURL,
|
||||
response.StatusCode,
|
||||
resp.ContentLength,
|
||||
response.ContentType,
|
||||
response.ETag,
|
||||
response.LastModified,
|
||||
resp.Header.Get("Expires"),
|
||||
c.username != "",
|
||||
c.String(),
|
||||
response,
|
||||
)
|
||||
|
||||
// Ignore caching headers for feeds that do not want any cache.
|
||||
|
@ -190,7 +203,8 @@ func (c *Client) executeRequest(request *http.Request) (*Response, error) {
|
|||
}
|
||||
|
||||
func (c *Client) buildRequest(method string, body io.Reader) (*http.Request, error) {
|
||||
request, err := http.NewRequest(method, c.url, body)
|
||||
c.requestURL = url_helper.RequestURI(c.inputURL)
|
||||
request, err := http.NewRequest(method, c.requestURL, body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -238,5 +252,5 @@ func (c *Client) buildHeaders() http.Header {
|
|||
|
||||
// New returns a new HTTP client.
|
||||
func New(url string) *Client {
|
||||
return &Client{url: url, userAgent: DefaultUserAgent, Insecure: false}
|
||||
return &Client{inputURL: url, userAgent: DefaultUserAgent, Insecure: false}
|
||||
}
|
||||
|
|
|
@ -6,6 +6,7 @@ package client // import "miniflux.app/http/client"
|
|||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"regexp"
|
||||
|
@ -24,10 +25,24 @@ type Response struct {
|
|||
EffectiveURL string
|
||||
LastModified string
|
||||
ETag string
|
||||
Expires string
|
||||
ContentType string
|
||||
ContentLength int64
|
||||
}
|
||||
|
||||
func (r *Response) String() string {
|
||||
return fmt.Sprintf(
|
||||
`StatusCode=%d EffectiveURL=%q LastModified=%q ETag=%s Expires=%s ContentType=%q ContentLength=%d`,
|
||||
r.StatusCode,
|
||||
r.EffectiveURL,
|
||||
r.LastModified,
|
||||
r.ETag,
|
||||
r.Expires,
|
||||
r.ContentType,
|
||||
r.ContentLength,
|
||||
)
|
||||
}
|
||||
|
||||
// IsNotFound returns true if the resource doesn't exists anymore.
|
||||
func (r *Response) IsNotFound() bool {
|
||||
return r.StatusCode == 404 || r.StatusCode == 410
|
||||
|
@ -105,8 +120,8 @@ func (r *Response) EnsureUnicodeBody() (err error) {
|
|||
return err
|
||||
}
|
||||
|
||||
// String returns the response body as string.
|
||||
func (r *Response) String() string {
|
||||
// BodyAsString returns the response body as string.
|
||||
func (r *Response) BodyAsString() string {
|
||||
bytes, _ := ioutil.ReadAll(r.Body)
|
||||
return string(bytes)
|
||||
}
|
||||
|
|
|
@ -95,7 +95,7 @@ func TestToString(t *testing.T) {
|
|||
input := `test`
|
||||
r := &Response{Body: strings.NewReader(input)}
|
||||
|
||||
if r.String() != input {
|
||||
if r.BodyAsString() != input {
|
||||
t.Error(`Unexpected ouput`)
|
||||
}
|
||||
}
|
||||
|
@ -140,7 +140,7 @@ func TestEnsureUnicodeWithHTMLDocuments(t *testing.T) {
|
|||
t.Fatalf(`Unicode conversion error for %q - %q: %v`, tc.filename, tc.contentType, parseErr)
|
||||
}
|
||||
|
||||
isUnicode := utf8.ValidString(r.String())
|
||||
isUnicode := utf8.ValidString(r.BodyAsString())
|
||||
if isUnicode != tc.convertedToUnicode {
|
||||
t.Errorf(`Unicode conversion %q - %q, got: %v, expected: %v`,
|
||||
tc.filename, tc.contentType, isUnicode, tc.convertedToUnicode)
|
||||
|
|
|
@ -52,7 +52,7 @@ func (h *Handler) CreateFeed(userID, categoryID int64, url string, crawler bool,
|
|||
return nil, errors.NewLocalizedError(errDuplicate, response.EffectiveURL)
|
||||
}
|
||||
|
||||
subscription, parseErr := parser.ParseFeed(response.String())
|
||||
subscription, parseErr := parser.ParseFeed(response.BodyAsString())
|
||||
if parseErr != nil {
|
||||
return nil, parseErr
|
||||
}
|
||||
|
@ -106,7 +106,7 @@ func (h *Handler) RefreshFeed(userID, feedID int64) error {
|
|||
if response.IsModified(originalFeed.EtagHeader, originalFeed.LastModifiedHeader) {
|
||||
logger.Debug("[Handler:RefreshFeed] Feed #%d has been modified", feedID)
|
||||
|
||||
updatedFeed, parseErr := parser.ParseFeed(response.String())
|
||||
updatedFeed, parseErr := parser.ParseFeed(response.BodyAsString())
|
||||
if parseErr != nil {
|
||||
originalFeed.WithError(parseErr.Localize(printer))
|
||||
h.store.UpdateFeedError(originalFeed)
|
||||
|
|
|
@ -191,7 +191,7 @@ func TestDifferentEncodingWithResponse(t *testing.T) {
|
|||
t.Fatalf(`Encoding error for %q: %v`, tc.filename, encodingErr)
|
||||
}
|
||||
|
||||
feed, parseErr := ParseFeed(r.String())
|
||||
feed, parseErr := ParseFeed(r.BodyAsString())
|
||||
if parseErr != nil {
|
||||
t.Fatalf(`Parsing error for %q - %q: %v`, tc.filename, tc.contentType, parseErr)
|
||||
}
|
||||
|
|
|
@ -18,7 +18,7 @@ import (
|
|||
)
|
||||
|
||||
var (
|
||||
errUnreadableDoc = "Unable to analyze this page: %v"
|
||||
errUnreadableDoc = "Unable to analyze this page: %v"
|
||||
)
|
||||
|
||||
// FindSubscriptions downloads and try to find one or more subscriptions from an URL.
|
||||
|
@ -31,7 +31,7 @@ func FindSubscriptions(websiteURL, userAgent, username, password string) (Subscr
|
|||
return nil, err
|
||||
}
|
||||
|
||||
body := response.String()
|
||||
body := response.BodyAsString()
|
||||
if format := parser.DetectFeedFormat(body); format != parser.FormatUnknown {
|
||||
var subscriptions Subscriptions
|
||||
subscriptions = append(subscriptions, &Subscription{
|
||||
|
|
57
url/url.go
57
url/url.go
|
@ -4,9 +4,12 @@
|
|||
|
||||
package url // import "miniflux.app/url"
|
||||
|
||||
import "net/url"
|
||||
import "fmt"
|
||||
import "strings"
|
||||
import (
|
||||
"fmt"
|
||||
"net/url"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// AbsoluteURL converts the input URL as absolute URL if necessary.
|
||||
func AbsoluteURL(baseURL, input string) (string, error) {
|
||||
|
@ -69,3 +72,51 @@ func Domain(websiteURL string) string {
|
|||
|
||||
return parsedURL.Host
|
||||
}
|
||||
|
||||
// RequestURI returns the encoded URI to be used in HTTP requests.
|
||||
func RequestURI(websiteURL string) string {
|
||||
u, err := url.Parse(websiteURL)
|
||||
if err != nil {
|
||||
return websiteURL
|
||||
}
|
||||
|
||||
queryValues := u.Query()
|
||||
u.RawQuery = "" // Clear RawQuery to make sure it's encoded properly.
|
||||
u.Fragment = "" // Clear fragment because Web browsers strip #fragment before sending the URL to a web server.
|
||||
|
||||
var buf strings.Builder
|
||||
buf.WriteString(u.String())
|
||||
|
||||
if len(queryValues) > 0 {
|
||||
buf.WriteByte('?')
|
||||
|
||||
// Sort keys.
|
||||
keys := make([]string, 0, len(queryValues))
|
||||
for k := range queryValues {
|
||||
keys = append(keys, k)
|
||||
}
|
||||
sort.Strings(keys)
|
||||
|
||||
i := 0
|
||||
for _, key := range keys {
|
||||
keyEscaped := url.QueryEscape(key)
|
||||
values := queryValues[key]
|
||||
for _, value := range values {
|
||||
if i > 0 {
|
||||
buf.WriteByte('&')
|
||||
}
|
||||
buf.WriteString(keyEscaped)
|
||||
|
||||
// As opposed to the standard library, we append the = only if the value is not empty.
|
||||
if value != "" {
|
||||
buf.WriteByte('=')
|
||||
buf.WriteString(url.QueryEscape(value))
|
||||
}
|
||||
|
||||
i++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return buf.String()
|
||||
}
|
||||
|
|
|
@ -71,3 +71,25 @@ func TestDomain(t *testing.T) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRequestURI(t *testing.T) {
|
||||
scenarios := map[string]string{
|
||||
"https://www.example.org": "https://www.example.org",
|
||||
"https://user:password@www.example.org": "https://user:password@www.example.org",
|
||||
"https://www.example.org/path with spaces": "https://www.example.org/path%20with%20spaces",
|
||||
"https://www.example.org/path#test": "https://www.example.org/path",
|
||||
"https://www.example.org/path?abc#test": "https://www.example.org/path?abc",
|
||||
"https://www.example.org/path?a=b&a=c": "https://www.example.org/path?a=b&a=c",
|
||||
"https://www.example.org/path?a=b&a=c&d": "https://www.example.org/path?a=b&a=c&d",
|
||||
"https://www.example.org/path?atom": "https://www.example.org/path?atom",
|
||||
"https://www.example.org/path?测试=测试": "https://www.example.org/path?%E6%B5%8B%E8%AF%95=%E6%B5%8B%E8%AF%95",
|
||||
"https://www.example.org/url=http%3A%2F%2Fwww.example.com%2Ffeed%2F&max=20": "https://www.example.org/url=http%3A%2F%2Fwww.example.com%2Ffeed%2F&max=20",
|
||||
}
|
||||
|
||||
for input, expected := range scenarios {
|
||||
actual := RequestURI(input)
|
||||
if actual != expected {
|
||||
t.Errorf(`Unexpected result, got %q instead of %q`, actual, expected)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue