From 50927338dd3932324bd93b22632351301798a599 Mon Sep 17 00:00:00 2001 From: Tomas Volf <~@wolfsden.cz> Date: Sun, 6 Oct 2024 17:42:26 +0200 Subject: [PATCH] gexp: Improve support of Unicode characters. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Support for non-ASCII characters was mixed. Some gexp forms did support them, while others did not. Combined with current value for %default-port-conversion-strategy, that sometimes led to unpleasant surprises. For example: (scheme-file "utf8" #~(with-output-to-file #$output (λ _ (display "猫")))) Was written to the store as: ((? _ (display "\u732b"))) No, that is not font issue on your part, that is an actual #\? instead of the lambda character. Which, surprisingly, does not do what it should when executed. The solution is to switch to C.UTF-8 locale where possible, since it is now always available. Or to explicitly set the port encoding. No tests are provided, since majority of tests/gexp.scm use guile in version 2, and it tends to work under it. The issues occur mostly with guile 3. I did test it locally using: #!/bin/sh set -eu set -x [ -f guix.scm ] || { echo >&2 Run from root of Guix repo.; exit 1; } [ -f gnu.scm ] || { echo >&2 Run from root of Guix repo.; exit 1; } cat >猫.scm <<'EOF' (define-module (猫) #:export (say)) (define (say) "nyaaaa~~~~!") EOF mkdir -p dir-with-utf8-file cp 猫.scm dir-with-utf8-file/ cat >repro.scm <<'EOF' (use-modules (guix build utils) (guix derivations) (guix gexp) (guix store) (ice-9 ftw) (ice-9 textual-ports)) (define cat "猫") (define (drv-content drv) (call-with-input-file (derivation->output-path drv) get-string-all)) (define (out-content out) (call-with-input-file out get-string-all)) (define (drv-listing drv) (scandir (derivation->output-path drv))) (define (dir-listing dir) (scandir dir)) (define-macro (test exp lower? report) (let ((type (car exp))) `(false-if-exception (let ((drv (with-store %store (run-with-store %store (,(if lower? lower-object identity) ,exp))))) (format #t "~%~a:~%" ',type) (when (with-store %store (build-derivations %store (list drv))) (format #t "~a~%" (,report drv))))))) (test (computed-file "utf8" #~(with-output-to-file #$output (λ _ (display #$cat)))) #t drv-content) (test (program-file "utf8" #~((λ _ (display #$cat)))) #t drv-content) (test (scheme-file "utf8" #~((λ _ (display #$cat)))) #t drv-content) (test (text-file* "utf8" cat cat cat) #f drv-content) (test (compiled-modules '((猫))) #f drv-listing) (test (file-union "utf8" `((,cat ,(plain-file "utf8" cat)))) #t drv-listing) ;;; No fix needed: (test (imported-modules '((猫))) #f dir-listing) (test (local-file "dir-with-utf8-file" #:recursive? #t) #t dir-listing) (test (plain-file "utf8" cat) #t out-content) (test (mixed-text-file "utf8" cat cat cat) #t drv-content) (test (directory-union "utf8" (list (local-file "dir-with-utf8-file" #:recursive? #t))) #t dir-listing) EOF guix shell -CWN -D guix glibc-locales -- \ env LANG=C.UTF-8 ./pre-inst-env guix repl -- ./repro.scm Before this commit, the output is: + '[' -f guix.scm ']' + '[' -f gnu.scm ']' + cat + mkdir -p dir-with-utf8-file + cp 猫.scm dir-with-utf8-file/ + cat + guix shell -CWN -D guix glibc-locales -- env LANG=C.UTF-8 ./pre-inst-env guix repl -- ./repro.scm computed-file: ? program-file: #!/gnu/store/mfkz7fvlfpv3ppwbkv0imb19nrf95akf-guile-3.0.9/bin/guile --no-auto-compile !# ((? _ (display "\u732b"))) scheme-file: ((? _ (display "\u732b"))) text-file*: ??? compiled-modules: building path(s) `/gnu/store/ay3jifyvliigfgnz67jf0kgngzpya5a5-module-import-compiled' Backtrace: 5 (primitive-load "/gnu/store/rn7b0dq6iqfmmqyqzamix2mjmfy?") In ice-9/eval.scm: 619:8 4 (_ #f) In srfi/srfi-1.scm: 460:18 3 (fold # ?) In ice-9/eval.scm: 245:16 2 (_ #(#(#) # ?)) In ice-9/boot-9.scm: 1982:24 1 (_ _) In unknown file: 0 (stat "./???.scm" #) ERROR: In procedure stat: In procedure stat: No such file or directory: "./???.scm" builder for `/gnu/store/dxg87135zcd6a1c92dlrkyvxlbhfwfld-module-import-compiled.drv' failed with exit code 1 file-union: (. .. ?) imported-modules: (. .. 猫.scm) local-file: (. .. 猫.scm) plain-file: 猫 mixed-text-file: 猫猫猫 directory-union: (. .. 猫.scm) Which I think you will agree is far from optimal. After my fix the output changes to: + '[' -f guix.scm ']' + '[' -f gnu.scm ']' + cat + mkdir -p dir-with-utf8-file + cp 猫.scm dir-with-utf8-file/ + cat + guix shell -CWN -D guix glibc-locales -- env LANG=C.UTF-8 ./pre-inst-env guix repl -- ./repro.scm computed-file: 猫 program-file: #!/gnu/store/8kbmn359jqkgsbqgqxnmiryvd9ynz8w7-guile-3.0.9/bin/guile --no-auto-compile !# ((λ _ (display "猫"))) scheme-file: ((λ _ (display "猫"))) text-file*: 猫猫猫 compiled-modules: (. .. 猫.go) file-union: (. .. 猫) imported-modules: (. .. 猫.scm) local-file: (. .. 猫.scm) plain-file: 猫 mixed-text-file: 猫猫猫 directory-union: (. .. 猫.scm) Which is actually what the user would expect. I also added missing arguments to the documentation. * guix/gexp.scm (computed-file): Set LANG to C.UTF-8 by default. (compiled-modules): Try to `setlocale'. (gexp->script), (gexp->file): New `locale' argument defaulting to C.UTF-8. (text-file*): Set output port encoding to UTF-8. * doc/guix.texi (G-Expressions)[computed-file]: Document the changes. Use @var. Document #:guile. [gexp->script]: Document #:locale. Fix default value for #:target. [gexp->file]: Document #:locale, #:system and #:target. Change-Id: Ib323b51af88a588b780ff48ddd04db8be7c729fb --- doc/guix.texi | 11 +++++++---- guix/gexp.scm | 24 ++++++++++++++++++------ 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/doc/guix.texi b/doc/guix.texi index d6e17c74cd..8ac5ec098d 100644 --- a/doc/guix.texi +++ b/doc/guix.texi @@ -12415,7 +12415,9 @@ Return an object representing a text file called @var{name} with the given This is the declarative counterpart of @code{text-file}. @end deffn -@deffn {Procedure} computed-file name gexp [#:local-build? #t] [#:options '()] +@deffn {Procedure} computed-file @var{name} @var{gexp} @ + [#:local-build? #t] [#:guile] @ + [#:options '(#:env-vars (("LANG" . "C.UTF-8")))] Return an object representing the store item @var{name}, a file or directory computed by @var{gexp}. When @var{local-build?} is true (the default), the derivation is built locally. @var{options} is a list of @@ -12426,7 +12428,7 @@ This is the declarative counterpart of @code{gexp->derivation}. @deffn {Monadic Procedure} gexp->script @var{name} @var{exp} @ [#:guile (default-guile)] [#:module-path %load-path] @ - [#:system (%current-system)] [#:target #f] + [#:system (%current-system)] [#:target 'current] [#:locale "C.UTF-8"] Return an executable script @var{name} that runs @var{exp} using @var{guile}, with @var{exp}'s imported modules in its search path. Look up @var{exp}'s modules in @var{module-path}. @@ -12463,8 +12465,9 @@ This is the declarative counterpart of @code{gexp->script}. @deffn {Monadic Procedure} gexp->file @var{name} @var{exp} @ [#:set-load-path? #t] [#:module-path %load-path] @ - [#:splice? #f] @ - [#:guile (default-guile)] + [#:splice? #f] [#:guile (default-guile)] @ + [#:system (%current-system)] [#:target 'current] @ + [#:locale "C.UTF-8"] Return a derivation that builds a file @var{name} containing @var{exp}. When @var{splice?} is true, @var{exp} is considered to be a list of expressions that will be spliced in the resulting file. diff --git a/guix/gexp.scm b/guix/gexp.scm index e44aea6420..c8aba91779 100644 --- a/guix/gexp.scm +++ b/guix/gexp.scm @@ -597,7 +597,10 @@ (define-record-type (options computed-file-options)) ;list of arguments (define* (computed-file name gexp - #:key guile (local-build? #t) (options '())) + #:key + guile + (local-build? #t) + (options '(#:env-vars (("LANG" . "C.UTF-8"))))) "Return an object representing the store item NAME, a file or directory computed by GEXP. When LOCAL-BUILD? is #t (the default), it ensures the corresponding derivation is built locally. OPTIONS may be used to pass @@ -1700,6 +1703,9 @@ (define build (system base target) (system base compile)) + ;; Best effort. The locale is not installed in all contexts. + (false-if-exception (setlocale LC_ALL "C.UTF-8")) + (define modules (getenv "modules")) @@ -1990,7 +1996,8 @@ (define* (gexp->script name exp #:key (guile (default-guile)) (module-path %load-path) (system (%current-system)) - (target 'current)) + (target 'current) + (locale "C.UTF-8")) "Return an executable script NAME that runs EXP using GUILE, with EXP's imported modules in its search path. Look up EXP's modules in MODULE-PATH." (mlet* %store-monad ((target (if (eq? target 'current) @@ -2033,7 +2040,8 @@ (define* (gexp->script name exp ;; These derivations are not worth offloading or ;; substituting. #:local-build? #t - #:substitutable? #f))) + #:substitutable? #f + #:env-vars `(("LANG" . ,locale))))) (define* (gexp->file name exp #:key (guile (default-guile)) @@ -2041,7 +2049,8 @@ (define* (gexp->file name exp #:key (module-path %load-path) (splice? #f) (system (%current-system)) - (target 'current)) + (target 'current) + (locale "C.UTF-8")) "Return a derivation that builds a file NAME containing EXP. When SPLICE? is true, EXP is considered to be a list of expressions that will be spliced in the resulting file. @@ -2081,7 +2090,8 @@ (define extensions (gexp-extensions exp)) #:local-build? #t #:substitutable? #f #:system system - #:target target) + #:target target + #:env-vars `(("LANG" . ,locale))) (gexp->derivation name (gexp (call-with-output-file (ungexp output) @@ -2098,7 +2108,8 @@ (define extensions (gexp-extensions exp)) #:local-build? #t #:substitutable? #f #:system system - #:target target)))) + #:target target + #:env-vars `(("LANG" . ,locale)))))) (define* (text-file* name #:rest text) "Return as a monadic value a derivation that builds a text file containing @@ -2108,6 +2119,7 @@ (define* (text-file* name #:rest text) (define builder (gexp (call-with-output-file (ungexp output "out") (lambda (port) + (set-port-encoding! port "UTF-8") (display (string-append (ungexp-splicing text)) port))))) (gexp->derivation name builder