gnu-maintenance: ‘generic-html’ update honors <base href="…">.

This fixes updates of ‘curl’: <https://curl.se/download/> includes
<base href="…"> in its head and ignoring it would lead to incorrect
download URLs.

* guix/gnu-maintenance.scm (html-links): Keep track of <base href="…">
in ‘loop’.  Rewrite relative links at the end.

Change-Id: I989da78df3431034c9a584f8e10cad87ae6dc920
This commit is contained in:
Ludovic Courtès 2024-11-28 23:20:00 +01:00
parent db59509931
commit 5d6691d33e
No known key found for this signature in database
GPG key ID: 090B11993D9AEBB5

View file

@ -39,6 +39,7 @@ (define-module (guix gnu-maintenance)
#:use-module (guix utils) #:use-module (guix utils)
#:use-module (guix diagnostics) #:use-module (guix diagnostics)
#:use-module (guix i18n) #:use-module (guix i18n)
#:autoload (guix combinators) (fold2)
#:use-module (guix memoization) #:use-module (guix memoization)
#:use-module (guix records) #:use-module (guix records)
#:use-module (guix upstream) #:use-module (guix upstream)
@ -483,19 +484,33 @@ (define* (import-release* package #:key (version #f))
(define (html-links sxml) (define (html-links sxml)
"Return the list of links found in SXML, the SXML tree of an HTML page." "Return the list of links found in SXML, the SXML tree of an HTML page."
(let loop ((sxml sxml) (define-values (links base)
(links '())) (let loop ((sxml sxml)
(match sxml (links '())
(('a ('@ attributes ...) body ...) (base #f))
(match (assq 'href attributes) (match sxml
(#f (fold loop links body)) (('a ('@ attributes ...) body ...)
(('href url) (fold loop (cons url links) body)))) (match (assq 'href attributes)
((tag ('@ _ ...) body ...) (#f (fold2 loop links base body))
(fold loop links body)) (('href url) (fold2 loop (cons url links) base body))))
((tag body ...) (('base ('@ ('href new-base)))
(fold loop links body)) ;; The base against which relative URL paths must be resolved.
(_ (values links new-base))
links)))) ((tag ('@ _ ...) body ...)
(fold2 loop links base body))
((tag body ...)
(fold2 loop links base body))
(_
(values links base)))))
(if base
(map (lambda (link)
(let ((uri (string->uri link)))
(if (or uri (string-prefix? "/" link))
link
(in-vicinity base link))))
links)
links))
(define (url->links url) (define (url->links url)
"Return the unique links on the HTML page accessible at URL." "Return the unique links on the HTML page accessible at URL."