reader/fetcher: add brotli content encoding support

This commit is contained in:
Frédéric Guillot 2024-04-18 21:44:55 -07:00
parent 647c66e70a
commit 771f9d2b5f
5 changed files with 80 additions and 2 deletions

1
go.mod
View file

@ -27,6 +27,7 @@ require (
) )
require ( require (
github.com/andybalholm/brotli v1.1.0 // indirect
github.com/andybalholm/cascadia v1.3.2 // indirect github.com/andybalholm/cascadia v1.3.2 // indirect
github.com/beorn7/perks v1.0.1 // indirect github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect

2
go.sum
View file

@ -2,6 +2,8 @@ github.com/PuerkitoBio/goquery v1.9.1 h1:mTL6XjbJTZdpfL+Gwl5U2h1l9yEkJjhmlTeV9VP
github.com/PuerkitoBio/goquery v1.9.1/go.mod h1:cW1n6TmIMDoORQU5IU/P1T3tGFunOeXEpGP2WHRwkbY= github.com/PuerkitoBio/goquery v1.9.1/go.mod h1:cW1n6TmIMDoORQU5IU/P1T3tGFunOeXEpGP2WHRwkbY=
github.com/abadojack/whatlanggo v1.0.1 h1:19N6YogDnf71CTHm3Mp2qhYfkRdyvbgwWdd2EPxJRG4= github.com/abadojack/whatlanggo v1.0.1 h1:19N6YogDnf71CTHm3Mp2qhYfkRdyvbgwWdd2EPxJRG4=
github.com/abadojack/whatlanggo v1.0.1/go.mod h1:66WiQbSbJBIlOZMsvbKe5m6pzQovxCH9B/K8tQB2uoc= github.com/abadojack/whatlanggo v1.0.1/go.mod h1:66WiQbSbJBIlOZMsvbKe5m6pzQovxCH9B/K8tQB2uoc=
github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M=
github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY=
github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss= github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss=
github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU= github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=

View file

@ -0,0 +1,55 @@
package fetcher
import (
"compress/gzip"
"io"
"github.com/andybalholm/brotli"
)
type brotliReadCloser struct {
body io.ReadCloser
brotliReader io.Reader
}
func NewBrotliReadCloser(body io.ReadCloser) *brotliReadCloser {
return &brotliReadCloser{
body: body,
brotliReader: brotli.NewReader(body),
}
}
func (b *brotliReadCloser) Read(p []byte) (n int, err error) {
return b.brotliReader.Read(p)
}
func (b *brotliReadCloser) Close() error {
return b.body.Close()
}
type gzipReadCloser struct {
body io.ReadCloser
gzipReader io.Reader
gzipErr error
}
func NewGzipReadCloser(body io.ReadCloser) *gzipReadCloser {
return &gzipReadCloser{body: body}
}
func (gz *gzipReadCloser) Read(p []byte) (n int, err error) {
if gz.gzipReader == nil {
if gz.gzipErr == nil {
gz.gzipReader, gz.gzipErr = gzip.NewReader(gz.body)
}
if gz.gzipErr != nil {
return 0, gz.gzipErr
}
}
return gz.gzipReader.Read(p)
}
func (gz *gzipReadCloser) Close() error {
return gz.body.Close()
}

View file

@ -169,6 +169,7 @@ func (r *RequestBuilder) ExecuteRequest(requestURL string) (*http.Response, erro
} }
req.Header = r.headers req.Header = r.headers
req.Header.Set("Accept-Encoding", "br, gzip")
req.Header.Set("Accept", defaultAcceptHeader) req.Header.Set("Accept", defaultAcceptHeader)
req.Header.Set("Connection", "close") req.Header.Set("Connection", "close")

View file

@ -8,6 +8,7 @@ import (
"errors" "errors"
"fmt" "fmt"
"io" "io"
"log/slog"
"net" "net"
"net/http" "net/http"
"net/url" "net/url"
@ -71,12 +72,30 @@ func (r *ResponseHandler) Close() {
} }
} }
func (r *ResponseHandler) getReader(maxBodySize int64) io.ReadCloser {
slog.Debug("Request response",
slog.String("effective_url", r.EffectiveURL()),
slog.Int64("content_length", r.httpResponse.ContentLength),
slog.String("content_encoding", r.httpResponse.Header.Get("Content-Encoding")),
slog.String("content_type", r.httpResponse.Header.Get("Content-Type")),
)
reader := r.httpResponse.Body
switch r.httpResponse.Header.Get("Content-Encoding") {
case "br":
reader = NewBrotliReadCloser(r.httpResponse.Body)
case "gzip":
reader = NewGzipReadCloser(r.httpResponse.Body)
}
return http.MaxBytesReader(nil, reader, maxBodySize)
}
func (r *ResponseHandler) Body(maxBodySize int64) io.ReadCloser { func (r *ResponseHandler) Body(maxBodySize int64) io.ReadCloser {
return http.MaxBytesReader(nil, r.httpResponse.Body, maxBodySize) return r.getReader(maxBodySize)
} }
func (r *ResponseHandler) ReadBody(maxBodySize int64) ([]byte, *locale.LocalizedErrorWrapper) { func (r *ResponseHandler) ReadBody(maxBodySize int64) ([]byte, *locale.LocalizedErrorWrapper) {
limitedReader := http.MaxBytesReader(nil, r.httpResponse.Body, maxBodySize) limitedReader := r.getReader(maxBodySize)
buffer, err := io.ReadAll(limitedReader) buffer, err := io.ReadAll(limitedReader)
if err != nil && err != io.EOF { if err != nil && err != io.EOF {