2023-10-22 04:50:29 +02:00
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
package fetcher // import "miniflux.app/v2/internal/reader/fetcher"
import (
"crypto/x509"
"errors"
"fmt"
"io"
2024-04-19 06:44:55 +02:00
"log/slog"
2023-10-22 04:50:29 +02:00
"net"
"net/http"
2024-03-17 21:26:51 +01:00
"net/url"
"os"
2024-04-19 20:43:20 +02:00
"strings"
2023-10-22 04:50:29 +02:00
"miniflux.app/v2/internal/locale"
)
type ResponseHandler struct {
httpResponse * http . Response
clientErr error
}
func NewResponseHandler ( httpResponse * http . Response , clientErr error ) * ResponseHandler {
return & ResponseHandler { httpResponse : httpResponse , clientErr : clientErr }
}
func ( r * ResponseHandler ) EffectiveURL ( ) string {
return r . httpResponse . Request . URL . String ( )
}
func ( r * ResponseHandler ) ContentType ( ) string {
return r . httpResponse . Header . Get ( "Content-Type" )
}
func ( r * ResponseHandler ) LastModified ( ) string {
// Ignore caching headers for feeds that do not want any cache.
if r . httpResponse . Header . Get ( "Expires" ) == "0" {
return ""
}
return r . httpResponse . Header . Get ( "Last-Modified" )
}
func ( r * ResponseHandler ) ETag ( ) string {
// Ignore caching headers for feeds that do not want any cache.
if r . httpResponse . Header . Get ( "Expires" ) == "0" {
return ""
}
return r . httpResponse . Header . Get ( "ETag" )
}
func ( r * ResponseHandler ) IsModified ( lastEtagValue , lastModifiedValue string ) bool {
if r . httpResponse . StatusCode == http . StatusNotModified {
return false
}
2024-07-02 17:12:53 +02:00
if r . ETag ( ) != "" {
return r . ETag ( ) != lastEtagValue
2023-10-22 04:50:29 +02:00
}
2024-07-02 17:12:53 +02:00
if r . LastModified ( ) != "" {
return r . LastModified ( ) != lastModifiedValue
2023-10-22 04:50:29 +02:00
}
return true
}
func ( r * ResponseHandler ) Close ( ) {
if r . httpResponse != nil && r . httpResponse . Body != nil && r . clientErr == nil {
r . httpResponse . Body . Close ( )
}
}
2024-04-19 06:44:55 +02:00
func ( r * ResponseHandler ) getReader ( maxBodySize int64 ) io . ReadCloser {
2024-04-19 20:43:20 +02:00
contentEncoding := strings . ToLower ( r . httpResponse . Header . Get ( "Content-Encoding" ) )
2024-04-19 06:44:55 +02:00
slog . Debug ( "Request response" ,
slog . String ( "effective_url" , r . EffectiveURL ( ) ) ,
2024-04-19 20:43:20 +02:00
slog . String ( "content_length" , r . httpResponse . Header . Get ( "Content-Length" ) ) ,
slog . String ( "content_encoding" , contentEncoding ) ,
2024-04-19 06:44:55 +02:00
slog . String ( "content_type" , r . httpResponse . Header . Get ( "Content-Type" ) ) ,
)
reader := r . httpResponse . Body
2024-04-19 20:43:20 +02:00
switch contentEncoding {
2024-04-19 06:44:55 +02:00
case "br" :
reader = NewBrotliReadCloser ( r . httpResponse . Body )
case "gzip" :
reader = NewGzipReadCloser ( r . httpResponse . Body )
}
return http . MaxBytesReader ( nil , reader , maxBodySize )
}
2023-10-22 04:50:29 +02:00
func ( r * ResponseHandler ) Body ( maxBodySize int64 ) io . ReadCloser {
2024-04-19 06:44:55 +02:00
return r . getReader ( maxBodySize )
2023-10-22 04:50:29 +02:00
}
func ( r * ResponseHandler ) ReadBody ( maxBodySize int64 ) ( [ ] byte , * locale . LocalizedErrorWrapper ) {
2024-04-19 06:44:55 +02:00
limitedReader := r . getReader ( maxBodySize )
2023-10-22 04:50:29 +02:00
buffer , err := io . ReadAll ( limitedReader )
if err != nil && err != io . EOF {
2023-11-01 19:28:24 +01:00
if err , ok := err . ( * http . MaxBytesError ) ; ok {
return nil , locale . NewLocalizedErrorWrapper ( fmt . Errorf ( "fetcher: response body too large: %d bytes" , err . Limit ) , "error.http_response_too_large" )
2023-10-22 04:50:29 +02:00
}
return nil , locale . NewLocalizedErrorWrapper ( fmt . Errorf ( "fetcher: unable to read response body: %w" , err ) , "error.http_body_read" , err )
}
if len ( buffer ) == 0 {
return nil , locale . NewLocalizedErrorWrapper ( fmt . Errorf ( "fetcher: empty response body" ) , "error.http_empty_response_body" )
}
return buffer , nil
}
func ( r * ResponseHandler ) LocalizedError ( ) * locale . LocalizedErrorWrapper {
if r . clientErr != nil {
2024-03-17 21:26:51 +01:00
switch {
case isSSLError ( r . clientErr ) :
2023-10-23 01:07:06 +02:00
return locale . NewLocalizedErrorWrapper ( fmt . Errorf ( "fetcher: %w" , r . clientErr ) , "error.tls_error" , r . clientErr )
2024-03-17 21:26:51 +01:00
case isNetworkError ( r . clientErr ) :
2023-10-23 01:07:06 +02:00
return locale . NewLocalizedErrorWrapper ( fmt . Errorf ( "fetcher: %w" , r . clientErr ) , "error.network_operation" , r . clientErr )
2024-03-17 21:26:51 +01:00
case os . IsTimeout ( r . clientErr ) :
return locale . NewLocalizedErrorWrapper ( fmt . Errorf ( "fetcher: %w" , r . clientErr ) , "error.network_timeout" , r . clientErr )
case errors . Is ( r . clientErr , io . EOF ) :
2023-10-22 04:50:29 +02:00
return locale . NewLocalizedErrorWrapper ( fmt . Errorf ( "fetcher: %w" , r . clientErr ) , "error.http_empty_response" )
2024-03-17 21:26:51 +01:00
default :
return locale . NewLocalizedErrorWrapper ( fmt . Errorf ( "fetcher: %w" , r . clientErr ) , "error.http_client_error" , r . clientErr )
2023-10-22 04:50:29 +02:00
}
}
switch r . httpResponse . StatusCode {
case http . StatusUnauthorized :
return locale . NewLocalizedErrorWrapper ( fmt . Errorf ( "fetcher: access unauthorized (401 status code)" ) , "error.http_not_authorized" )
case http . StatusForbidden :
return locale . NewLocalizedErrorWrapper ( fmt . Errorf ( "fetcher: access forbidden (403 status code)" ) , "error.http_forbidden" )
case http . StatusTooManyRequests :
return locale . NewLocalizedErrorWrapper ( fmt . Errorf ( "fetcher: too many requests (429 status code)" ) , "error.http_too_many_requests" )
case http . StatusNotFound , http . StatusGone :
return locale . NewLocalizedErrorWrapper ( fmt . Errorf ( "fetcher: resource not found (%d status code)" , r . httpResponse . StatusCode ) , "error.http_resource_not_found" )
case http . StatusInternalServerError :
return locale . NewLocalizedErrorWrapper ( fmt . Errorf ( "fetcher: remote server error (%d status code)" , r . httpResponse . StatusCode ) , "error.http_internal_server_error" )
case http . StatusBadGateway :
return locale . NewLocalizedErrorWrapper ( fmt . Errorf ( "fetcher: bad gateway (%d status code)" , r . httpResponse . StatusCode ) , "error.http_bad_gateway" )
case http . StatusServiceUnavailable :
return locale . NewLocalizedErrorWrapper ( fmt . Errorf ( "fetcher: service unavailable (%d status code)" , r . httpResponse . StatusCode ) , "error.http_service_unavailable" )
case http . StatusGatewayTimeout :
return locale . NewLocalizedErrorWrapper ( fmt . Errorf ( "fetcher: gateway timeout (%d status code)" , r . httpResponse . StatusCode ) , "error.http_gateway_timeout" )
}
if r . httpResponse . StatusCode >= 400 {
return locale . NewLocalizedErrorWrapper ( fmt . Errorf ( "fetcher: unexpected status code (%d status code)" , r . httpResponse . StatusCode ) , "error.http_unexpected_status_code" , r . httpResponse . StatusCode )
}
if r . httpResponse . StatusCode != 304 {
// Content-Length = -1 when no Content-Length header is sent.
if r . httpResponse . ContentLength == 0 {
return locale . NewLocalizedErrorWrapper ( fmt . Errorf ( "fetcher: empty response body" ) , "error.http_empty_response_body" )
}
}
return nil
}
2024-03-17 21:26:51 +01:00
func isNetworkError ( err error ) bool {
if _ , ok := err . ( * url . Error ) ; ok {
return true
}
if err == io . EOF {
return true
}
var opErr * net . OpError
if ok := errors . As ( err , & opErr ) ; ok {
return true
}
return false
}
func isSSLError ( err error ) bool {
var certErr x509 . UnknownAuthorityError
if errors . As ( err , & certErr ) {
return true
}
var hostErr x509 . HostnameError
if errors . As ( err , & hostErr ) {
return true
}
var algErr x509 . InsecureAlgorithmError
return errors . As ( err , & algErr )
}