From 12506255f006bd9bf5aea3f768eb9e8a09965a94 Mon Sep 17 00:00:00 2001 From: SouthFox Date: Sat, 22 Jun 2024 22:40:56 +0800 Subject: [PATCH] [post] new post --- .../20240622204025-对象存储反代设置.md | 274 ++++++++++++++++++ 1 file changed, 274 insertions(+) create mode 100644 content/articles/20240622204025-对象存储反代设置.md diff --git a/content/articles/20240622204025-对象存储反代设置.md b/content/articles/20240622204025-对象存储反代设置.md new file mode 100644 index 0000000..0dc2320 --- /dev/null +++ b/content/articles/20240622204025-对象存储反代设置.md @@ -0,0 +1,274 @@ ++++ +title = "对象存储反代设置" +author = ["SouthFox"] +date = 2024-06-22T20:40:00+08:00 +lastmod = 2024-06-22T22:40:55+08:00 +tags = ["publish"] +draft = false ++++ + +## 为什么要为对象存储做一层反代? {#为什么要为对象存储做一层反代} + +在对象存储商前面挂一层[反向代理]({{< relref "../main/nw_反向代理.md" >}})做一层[缓存]({{< relref "../main/nw_缓存.md" >}})是一个好主意,可以压缩请求数和流量,节省消费开支。同时挡下一些恶意请求。 + +对象存储很多服务商都是有着「请求数」和「流量」的计费项目,如果被有心之人大量恶意请求那么就有可能产生高额账单。 + + +## 对策 {#对策} + +- 创建桶名时为其添加随机后缀 +- 最好不要直接使用服务商提供的链接 +- 设置为公开读的桶最好做一层反代 + + +## Nginx 反代配置 {#nginx-反代配置} + +这里以我所用的 mastadon 所用的配置来举例: + +```cfg +proxy_cache_path /tmp/nginx_mstdn_media levels=1:2 keys_zone=mastodon_media:100m max_size=1g inactive=24h; + + server { + listen 80; + listen [::]:80; + server_name [media-host]; + return 301 https://[media-host]$request_uri; + + access_log /dev/null; + error_log /dev/null; + } + + server { + listen 443 ssl http2; + listen [::]:443 ssl http2; + server_name [media-host]; + + ssl_certificate /etc/letsencrypt/live/[media-host]/fullchain.pem; # ma> + ssl_certificate_key /etc/letsencrypt/live/[media-host]/privkey.pem; # > + include /etc/letsencrypt/options-ssl-nginx.conf; # managed by Certbot + ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem; # managed by Certbot + + access_log /var/log/nginx/mstdn-media-access.log; + error_log /var/log/nginx/mstdn-media-error.log; + + location = / { + return 418; + } + + location / { + limit_except GET { + deny all; + } + proxy_cache mastodon_media; + proxy_cache_revalidate on; + proxy_buffering on; + proxy_cache_use_stale error timeout updating http_500 http_502 http_503 http_504; + proxy_cache_background_update on; + proxy_cache_lock on; + proxy_cache_valid 30d; + proxy_cache_valid 404 1h; + proxy_hide_header 'Access-Control-Allow-Origin'; + proxy_hide_header 'Access-Control-Allow-Methods'; + proxy_hide_header 'Access-Control-Allow-Headers'; + proxy_hide_header x-amz-id-2; + proxy_hide_header x-amz-request-id; + proxy_hide_header x-amz-meta-server-side-encryption; + proxy_hide_header x-amz-server-side-encryption; + proxy_hide_header x-amz-bucket-region; + proxy_hide_header x-amzn-requestid; + proxy_hide_header x-amz-version-id; + proxy_ignore_headers Set-Cookie; + proxy_ignore_headers Cache-Control; + add_header X-Cached $upstream_cache_status; + add_header 'Access-Control-Allow-Origin' '*'; + expires 1y; + proxy_pass [s3-url]; + } + proxy_intercept_errors on; + error_page 400 403 404 = @fallback; + + location @fallback { + return 404; + } + + } +``` + +```cfg +location = / { + return 418; +} +``` + +如果设置为公开读的桶,那么访问域名主页会列出当下桶前面一些数量的对象,这可能会带来一些风险,所以针对直接访问主页的请求直接拒绝并返回。 + +```cfg +limit_except GET { + deny all; +} +``` + +只允许 HTTP GET 方法,毕竟很多对象存储服务是连失败的请求也要计入费用的。 + +```cfg +proxy_cache_path /tmp/nginx_mstdn_media levels=1:2 keys_zone=mastodon_media:100m max_size=1g inactive=24h; + +... + + proxy_cache mastodon_media; + proxy_cache_revalidate on; + proxy_buffering on; + proxy_cache_use_stale error timeout updating http_500 http_502 http_503 http_504; + proxy_cache_background_update on; + proxy_cache_lock on; + proxy_cache_valid 30d; + proxy_cache_valid 404 1h; +``` + +`nginx` 缓存设置,缓存到服务器本地并设置为 30 天有效期 + +```cfg +add_header 'Access-Control-Allow-Origin' '*'; +expires 1y; +proxy_pass [s3-url]; +``` + +`add_header` 放行跨域设置,对于联邦宇宙而言有些实例会直接让用户读取源站媒体,所以不能做限制。 + +`expires` 设置客户端的缓存失效时间为一年。 + +`proxy_pass` 服务商给的桶公开链接。 + +```cfg +proxy_intercept_errors on; +error_page 400 403 404 = @fallback; + +location @fallback { + return 404; +} +``` + +一些服务商的 404 页面里会包含桶名信息,这是一个会被知晓桶名的点。 + +`proxy_intercept_errors` 开启后将允许自定义设置错误,将其定位为 `nginx` 的默认 404 页面。 + + +## 设置迁移 {#设置迁移} + +有时候对一个服务商感到不满准备迁移,例如使用 [Rclone](https://rclone.org/) 工具将原来的桶拷贝到另一个服务商的桶,对于一个几十上千万的对象的桶来说迁移将是一个漫长的过程。而在迁移时也不可能完全停止服务,但可以在反代层设置一个策略,当找不到对象时可以导向原来的桶,这样对于用户来说也是无感的。具体来说: + +- 更改应用程序配置 s3 设置为新服务商,让新写入数据导向新桶 +- 编辑 `nginx` 配置,设置反代为新服务商 + - 当新服务商报 404 时,往旧服务商的地址再进行反代 +- 运行迁移,将旧服务商的桶对象拷贝到新服务商的桶内 + +具体配置: + +```cfg +proxy_cache_path /tmp/nginx_mstdn_media levels=1:2 keys_zone=mastodon_media:100m max_size=1g inactive=24h; + + server { + listen 80; + listen [::]:80; + server_name [media-host]; + return 301 https://[media-host]$request_uri; + + access_log /dev/null; + error_log /dev/null; + } + + server { + listen 443 ssl http2; + listen [::]:443 ssl http2; + server_name [media-host]; + + ssl_certificate /etc/letsencrypt/live/[media-host]/fullchain.pem; # ma> + ssl_certificate_key /etc/letsencrypt/live/[media-host]/privkey.pem; # > + include /etc/letsencrypt/options-ssl-nginx.conf; # managed by Certbot + ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem; # managed by Certbot + + access_log /var/log/nginx/mstdn-media-access.log; + error_log /var/log/nginx/mstdn-media-error.log; + + location / { + limit_except GET { + deny all; + } + proxy_cache mastodon_media; + proxy_cache_revalidate on; + proxy_buffering on; + proxy_cache_use_stale error timeout updating http_500 http_502 http_503 http_504; + proxy_cache_background_update on; + proxy_cache_lock on; + proxy_cache_valid 30d; + proxy_cache_valid 404 1h; + proxy_hide_header 'Access-Control-Allow-Origin'; + proxy_hide_header 'Access-Control-Allow-Methods'; + proxy_hide_header 'Access-Control-Allow-Headers'; + proxy_hide_header x-amz-id-2; + proxy_hide_header x-amz-request-id; + proxy_hide_header x-amz-meta-server-side-encryption; + proxy_hide_header x-amz-server-side-encryption; + proxy_hide_header x-amz-bucket-region; + proxy_hide_header x-amzn-requestid; + proxy_hide_header x-amz-version-id; + proxy_ignore_headers Set-Cookie; + proxy_ignore_headers Cache-Control; + add_header X-Cached $upstream_cache_status; + add_header 'Access-Control-Allow-Origin' '*'; + expires 1y; + proxy_pass [new-s3-url]; + } + proxy_intercept_errors on; + + recursive_error_pages on; + error_page 400 403 404 = @404; + + location @404 { + proxy_pass [old-s3-url]; + proxy_hide_header 'Access-Control-Allow-Origin'; + proxy_hide_header 'Access-Control-Allow-Methods'; + proxy_hide_header 'Access-Control-Allow-Headers'; + proxy_hide_header x-amz-id-2; + proxy_hide_header x-amz-request-id; + proxy_hide_header x-amz-meta-server-side-encryption; + proxy_hide_header x-amz-server-side-encryption; + proxy_hide_header x-amz-bucket-region; + proxy_hide_header x-amzn-requestid; + proxy_hide_header x-amz-version-id; + proxy_ignore_headers Set-Cookie; + proxy_ignore_headers Cache-Control; + + error_page 400 403 404 = @fallback; + } + + location @fallback { + return 418; + } + + } +``` + +其中新增加内容并不多: + +`recursive_error_pages` 允许多次定义错误页。 + +```cfg +location @fallback { + return 418; +} +``` + +不能再返回 404 了否则会陷入循环,可以另选一个其它错误,这里我选择 418 。 + + +## 免责声明 {#免责声明} + +安全无小事,我也无法保证上面的设置是没有漏洞的或者完全安全的,对使用相关配置带来的后果也不负任何责任。 + + +## 参考 {#参考} + +[Proxying object storage through nginx | Mastodon Docs](https://docs.joinmastodon.org/admin/optional/object-storage-proxy/) + +[云上黑暗森林:打爆云账单,只需要S3桶名 | Pigsty](https://pigsty.io/zh/blog/cloud/s3-scam/)