publish: Add '--cache-bypass-threshold'.

* guix/scripts/publish.scm (show-help, %options): Add
'--cache-bypass-threshold'.
(low-compression): New procedure.
(cache-bypass-threshold): New parameter.
(bypass-cache?): New procedure.
(render-narinfo/cached): Call 'render-narinfo' when 'bypass-cache?'
returns true.
(render-nar/cached): Call 'render-nar' when 'bypass-cache?' returns
true.
(guix-publish): Parameterize 'cache-bypass-threshold'.
* tests/publish.scm ("with cache", "with cache, lzip + gzip")
("with cache, uncompressed"): Pass '--cache-bypass-threshold=0'.
("with cache, vanishing item"): Expect 200 for RESPONSE.
("with cache, cache bypass"): New test.
This commit is contained in:
Ludovic Courtès 2020-10-24 16:31:18 +02:00
parent 6d1fd37182
commit ecaa102a58
No known key found for this signature in database
GPG key ID: 090B11993D9AEBB5
3 changed files with 130 additions and 22 deletions

View file

@ -12086,13 +12086,20 @@ in advance, so @command{guix publish} does not add a
prevents clients from knowing the amount of data being downloaded.
Conversely, when @option{--cache} is used, the first request for a store
item (@i{via} a @code{.narinfo} URL) returns 404 and triggers a
item (@i{via} a @code{.narinfo} URL) triggers a
background process to @dfn{bake} the archive---computing its
@code{.narinfo} and compressing the archive, if needed. Once the
archive is cached in @var{directory}, subsequent requests succeed and
are served directly from the cache, which guarantees that clients get
the best possible bandwidth.
That first @code{.narinfo} request nonetheless returns 200, provided the
requested store item is ``small enough'', below the cache bypass
threshold---see @option{--cache-bypass-threshold} below. That way,
clients do not have to wait until the archive is baked. For larger
store items, the first @code{.narinfo} request returns 404, meaning that
clients have to wait until the archive is baked.
The ``baking'' process is performed by worker threads. By default, one
thread per CPU core is created, but this can be customized. See
@option{--workers} below.
@ -12118,6 +12125,21 @@ Additionally, when @option{--cache} is used, cached entries that have
not been accessed for @var{ttl} and that no longer have a corresponding
item in the store, may be deleted.
@item --cache-bypass-threshold=@var{size}
When used in conjunction with @option{--cache}, store items smaller than
@var{size} are immediately available, even when they are not yet in
cache. @var{size} is a size in bytes, or it can be prefixed by @code{M}
for megabytes and so on. The default is @code{10M}.
``Cache bypass'' allows you to reduce the publication delay for clients
at the expense of possibly additional I/O and CPU use on the server
side: depending on the client access patterns, those store items can end
up being baked several times until a copy is available in cache.
Increasing the threshold may be useful for sites that have few users, or
to guarantee that users get substitutes even for store items that are
not popular.
@item --nar-path=@var{path}
Use @var{path} as the prefix for the URLs of ``nar'' files
(@pxref{Invoking guix archive, normalized archives}).

View file

@ -82,6 +82,9 @@ (define (show-help)
compress archives with METHOD at LEVEL"))
(display (G_ "
-c, --cache=DIRECTORY cache published items to DIRECTORY"))
(display (G_ "
--cache-bypass-threshold=SIZE
serve store items below SIZE even when not cached"))
(display (G_ "
--workers=N use N workers to bake items"))
(display (G_ "
@ -135,6 +138,12 @@ (define (actual-compressions item requested)
(list %no-compression)
requested))
(define (low-compression c)
"Return <compression> of the same type as C, but optimized for low CPU
usage."
(compression (compression-type c)
(min (compression-level c) 2)))
(define %options
(list (option '(#\h "help") #f #f
(lambda _
@ -185,6 +194,10 @@ (define %options
(option '(#\c "cache") #t #f
(lambda (opt name arg result)
(alist-cons 'cache arg result)))
(option '("cache-bypass-threshold") #t #f
(lambda (opt name arg result)
(alist-cons 'cache-bypass-threshold (size->number arg)
result)))
(option '("workers") #t #f
(lambda (opt name arg result)
(alist-cons 'workers (string->number* arg)
@ -435,7 +448,7 @@ (define (nar-expiration-time ttl)
(expiration-time file))))))
(define (hash-part->path* store hash cache)
"Like 'hash-part->path' but cached results under CACHE. This ensures we can
"Like 'hash-part->path' but cache results under CACHE. This ensures we can
still map HASH to the corresponding store file name, even if said store item
vanished from the store in the meantime."
(let ((cached (hash-part-mapping-cache-file cache hash)))
@ -455,6 +468,18 @@ (define (hash-part->path* store hash cache)
result))
(apply throw args))))))
(define cache-bypass-threshold
;; Maximum size of a store item that may be served by the '/cached' handlers
;; below even when not in cache.
(make-parameter (* 10 (expt 2 20))))
(define (bypass-cache? store item)
"Return true if we allow ITEM to be downloaded before it is cached. ITEM is
interpreted as the basename of a store item."
(guard (c ((store-error? c) #f))
(< (path-info-nar-size (query-path-info store item))
(cache-bypass-threshold))))
(define* (render-narinfo/cached store request hash
#:key ttl (compressions (list %no-compression))
(nar-path "nar")
@ -514,9 +539,20 @@ (define (delete-entry narinfo)
(nar-expiration-time ttl)
#:delete-entry delete-entry
#:cleanup-period ttl))))
(not-found request
#:phrase "We're baking it"
#:ttl 300)) ;should be available within 5m
;; If ITEM passes 'bypass-cache?', render a temporary narinfo right
;; away, with a short TTL. The narinfo is temporary because it
;; lacks 'FileSize', for instance, which the cached narinfo will
;; have. Chances are that the nar will be baked by the time the
;; client asks for it.
(if (bypass-cache? store item)
(render-narinfo store request hash
#:ttl 300 ;temporary
#:nar-path nar-path
#:compressions compressions)
(not-found request
#:phrase "We're baking it"
#:ttl 300))) ;should be available within 5m
(else
(not-found request #:phrase "")))))
@ -628,19 +664,31 @@ (define* (render-nar/cached store cache request store-item
'Cache-Control' expiration time."
(let ((cached (nar-cache-file cache store-item
#:compression compression)))
(if (file-exists? cached)
(values `((content-type . (application/octet-stream
(charset . "ISO-8859-1")))
,@(if ttl
`((cache-control (max-age . ,ttl)))
'())
(cond ((file-exists? cached)
(values `((content-type . (application/octet-stream
(charset . "ISO-8859-1")))
,@(if ttl
`((cache-control (max-age . ,ttl)))
'())
;; XXX: We're not returning the actual contents, deferring
;; instead to 'http-write'. This is a hack to work around
;; <http://bugs.gnu.org/21093>.
(x-raw-file . ,cached))
#f)
(not-found request))))
;; XXX: We're not returning the actual contents, deferring
;; instead to 'http-write'. This is a hack to work around
;; <http://bugs.gnu.org/21093>.
(x-raw-file . ,cached))
#f))
((let* ((hash (and=> (string-index store-item #\-)
(cut string-take store-item <>)))
(item (and hash
(guard (c ((store-error? c) #f))
(hash-part->path store hash)))))
(and item (bypass-cache? store item)))
;; Render STORE-ITEM live. We reach this because STORE-ITEM is
;; being baked but clients are already asking for it. Thus, we're
;; duplicating work, but doing so allows us to reduce delays.
(render-nar store request store-item
#:compression (low-compression compression)))
(else
(not-found request)))))
(define (render-content-addressed-file store request
name algo hash)
@ -1077,7 +1125,10 @@ (define-command (guix-publish . args)
consider using the '--user' option!~%")))
(parameterize ((%public-key public-key)
(%private-key private-key))
(%private-key private-key)
(cache-bypass-threshold
(or (assoc-ref opts 'cache-bypass-threshold)
(cache-bypass-threshold))))
(info (G_ "publishing ~a on ~a, port ~d~%")
%store-directory
(inet-ntop (sockaddr:fam address) (sockaddr:addr address))

View file

@ -413,7 +413,8 @@ (define %gzip-magic-bytes
(call-with-new-thread
(lambda ()
(guix-publish "--port=6797" "-C2"
(string-append "--cache=" cache)))))))
(string-append "--cache=" cache)
"--cache-bypass-threshold=0"))))))
(wait-until-ready 6797)
(let* ((base "http://localhost:6797/")
(part (store-path-hash-part %item))
@ -462,7 +463,8 @@ (define %gzip-magic-bytes
(call-with-new-thread
(lambda ()
(guix-publish "--port=6794" "-Cgzip:2" "-Clzip:2"
(string-append "--cache=" cache)))))))
(string-append "--cache=" cache)
"--cache-bypass-threshold=0"))))))
(wait-until-ready 6794)
(let* ((base "http://localhost:6794/")
(part (store-path-hash-part %item))
@ -517,7 +519,8 @@ (define %gzip-magic-bytes
(call-with-new-thread
(lambda ()
(guix-publish "--port=6796" "-C2" "--ttl=42h"
(string-append "--cache=" cache)))))))
(string-append "--cache=" cache)
"--cache-bypass-threshold=0"))))))
(wait-until-ready 6796)
(let* ((base "http://localhost:6796/")
(part (store-path-hash-part item))
@ -581,12 +584,44 @@ (define %gzip-magic-bytes
(basename item)
".narinfo"))
(response (http-get url)))
(and (= 404 (response-code response))
(and (= 200 (response-code response)) ;we're below the threshold
(wait-for-file cached)
(begin
(delete-paths %store (list item))
(response-code (pk 'response (http-get url))))))))))
(test-equal "with cache, cache bypass"
200
(call-with-temporary-directory
(lambda (cache)
(let ((thread (with-separate-output-ports
(call-with-new-thread
(lambda ()
(guix-publish "--port=6788" "-C" "gzip"
(string-append "--cache=" cache)))))))
(wait-until-ready 6788)
(let* ((base "http://localhost:6788/")
(item (add-text-to-store %store "random" (random-text)))
(part (store-path-hash-part item))
(narinfo (string-append base part ".narinfo"))
(nar (string-append base "nar/gzip/" (basename item)))
(cached (string-append cache "/gzip/" (basename item)
".narinfo")))
;; We're below the default cache bypass threshold, so NAR and NARINFO
;; should immediately return 200. The NARINFO request should trigger
;; caching, and the next request to NAR should return 200 as well.
(and (let ((response (pk 'r1 (http-get nar))))
(and (= 200 (response-code response))
(not (response-content-length response)))) ;not known
(= 200 (response-code (http-get narinfo)))
(begin
(wait-for-file cached)
(let ((response (pk 'r2 (http-get nar))))
(and (> (response-content-length response)
(stat:size (stat item)))
(response-code response))))))))))
(test-equal "/log/NAME"
`(200 #t application/x-bzip2)
(let ((drv (run-with-store %store