gexp: Improve support of Unicode characters.

Support for non-ASCII characters was mixed.  Some gexp forms did support them,
while others did not.  Combined with current value for
%default-port-conversion-strategy, that sometimes led to unpleasant surprises.
For example:

    (scheme-file "utf8" #~(with-output-to-file #$output
                            (λ _ (display "猫"))))

Was written to the store as:

    ((? _ (display "\u732b")))

No, that is not font issue on your part, that is an actual #\? instead of the
lambda character.  Which, surprisingly, does not do what it should when
executed.

The solution is to switch to C.UTF-8 locale where possible, since it is now
always available.  Or to explicitly set the port encoding.

No tests are provided, since majority of tests/gexp.scm use guile in version
2, and it tends to work under it.  The issues occur mostly with guile 3.

I did test it locally using:

      #!/bin/sh
      set -eu
      set -x

      [ -f guix.scm ] || { echo >&2 Run from root of Guix repo.; exit 1; }
      [ -f gnu.scm  ] || { echo >&2 Run from root of Guix repo.; exit 1; }

      cat >猫.scm <<'EOF'
      (define-module (猫)
        #:export (say))

      (define (say)
        "nyaaaa~~~~!")
      EOF

      mkdir -p dir-with-utf8-file
      cp 猫.scm dir-with-utf8-file/

      cat >repro.scm <<'EOF'
      (use-modules (guix build utils)
                   (guix derivations)
                   (guix gexp)
                   (guix store)
                   (ice-9 ftw)
                   (ice-9 textual-ports))

      (define cat "猫")

      (define (drv-content drv)
        (call-with-input-file (derivation->output-path drv)
          get-string-all))

      (define (out-content out)
        (call-with-input-file out
          get-string-all))

      (define (drv-listing drv)
        (scandir (derivation->output-path drv)))

      (define (dir-listing dir)
        (scandir dir))

      (define-macro (test exp lower? report)
        (let ((type (car exp)))
          `(false-if-exception
            (let ((drv (with-store %store
                         (run-with-store %store
                           (,(if lower? lower-object identity) ,exp)))))
              (format #t "~%~a:~%" ',type)
              (when (with-store %store
                      (build-derivations %store (list drv)))
                (format #t "~a~%" (,report drv)))))))

      (test (computed-file "utf8"
                           #~(with-output-to-file #$output
                               (λ _ (display #$cat))))
            #t drv-content)

      (test (program-file "utf8"
                          #~((λ _ (display #$cat))))
            #t drv-content)

      (test (scheme-file "utf8"
                         #~((λ _ (display #$cat))))
            #t drv-content)

      (test (text-file* "utf8" cat cat cat)
            #f drv-content)

      (test (compiled-modules '((猫)))
            #f drv-listing)

      (test (file-union "utf8" `((,cat ,(plain-file "utf8" cat))))
            #t drv-listing)

      ;;; No fix needed:
      (test (imported-modules '((猫)))
            #f dir-listing)

      (test (local-file "dir-with-utf8-file" #:recursive? #t)
            #t dir-listing)

      (test (plain-file "utf8" cat)
            #t out-content)

      (test (mixed-text-file "utf8" cat cat cat)
            #t drv-content)

      (test (directory-union "utf8" (list (local-file "dir-with-utf8-file"
                                                      #:recursive? #t)))
            #t dir-listing)
      EOF

      guix shell -CWN -D guix glibc-locales -- \
           env LANG=C.UTF-8 ./pre-inst-env guix repl -- ./repro.scm

Before this commit, the output is:

      + '[' -f guix.scm ']'
      + '[' -f gnu.scm ']'
      + cat
      + mkdir -p dir-with-utf8-file
      + cp 猫.scm dir-with-utf8-file/
      + cat
      + guix shell -CWN -D guix glibc-locales -- env LANG=C.UTF-8 ./pre-inst-env guix repl -- ./repro.scm

      computed-file:
      ?

      program-file:
      #!/gnu/store/mfkz7fvlfpv3ppwbkv0imb19nrf95akf-guile-3.0.9/bin/guile --no-auto-compile
      !#
      ((? _ (display "\u732b")))

      scheme-file:
      ((? _ (display "\u732b")))

      text-file*:
      ???

      compiled-modules:
      building path(s) `/gnu/store/ay3jifyvliigfgnz67jf0kgngzpya5a5-module-import-compiled'
      Backtrace:
                 5 (primitive-load "/gnu/store/rn7b0dq6iqfmmqyqzamix2mjmfy?")
      In ice-9/eval.scm:
          619:8  4 (_ #f)
      In srfi/srfi-1.scm:
         460:18  3 (fold #<procedure 7ffff79245e0 at ice-9/eval.scm:336:1?> ?)
      In ice-9/eval.scm:
         245:16  2 (_ #(#(#<directory (guix build utils) 7ffff779f320>) # ?))
      In ice-9/boot-9.scm:
        1982:24  1 (_ _)
      In unknown file:
                 0 (stat "./???.scm" #<undefined>)

      ERROR: In procedure stat:
      In procedure stat: No such file or directory: "./???.scm"
      builder for `/gnu/store/dxg87135zcd6a1c92dlrkyvxlbhfwfld-module-import-compiled.drv' failed with exit code 1

      file-union:
      (. .. ?)

      imported-modules:
      (. .. 猫.scm)

      local-file:
      (. .. 猫.scm)

      plain-file:
      猫

      mixed-text-file:
      猫猫猫

      directory-union:
      (. .. 猫.scm)

Which I think you will agree is far from optimal.  After my fix the output
changes to:

      + '[' -f guix.scm ']'
      + '[' -f gnu.scm ']'
      + cat
      + mkdir -p dir-with-utf8-file
      + cp 猫.scm dir-with-utf8-file/
      + cat
      + guix shell -CWN -D guix glibc-locales -- env LANG=C.UTF-8 ./pre-inst-env guix repl -- ./repro.scm

      computed-file:
      猫

      program-file:
      #!/gnu/store/8kbmn359jqkgsbqgqxnmiryvd9ynz8w7-guile-3.0.9/bin/guile --no-auto-compile
      !#
      ((λ _ (display "猫")))

      scheme-file:
      ((λ _ (display "猫")))

      text-file*:
      猫猫猫

      compiled-modules:
      (. .. 猫.go)

      file-union:
      (. .. 猫)

      imported-modules:
      (. .. 猫.scm)

      local-file:
      (. .. 猫.scm)

      plain-file:
      猫

      mixed-text-file:
      猫猫猫

      directory-union:
      (. .. 猫.scm)

Which is actually what the user would expect.

I also added missing arguments to the documentation.

* guix/gexp.scm (computed-file):  Set LANG to C.UTF-8 by default.
(compiled-modules): Try to `setlocale'.
(gexp->script), (gexp->file): New `locale' argument defaulting to C.UTF-8.
(text-file*): Set output port encoding to UTF-8.
* doc/guix.texi (G-Expressions)[computed-file]: Document the changes.  Use
@var.  Document #:guile.
[gexp->script]: Document #:locale.  Fix default value for #:target.
[gexp->file]: Document #:locale, #:system and #:target.

Change-Id: Ib323b51af88a588b780ff48ddd04db8be7c729fb
This commit is contained in:
Tomas Volf 2024-10-06 17:42:26 +02:00 committed by Janneke Nieuwenhuizen
parent c731443ea2
commit 50927338dd
No known key found for this signature in database
GPG key ID: F3C1A0D9C1D65273
2 changed files with 25 additions and 10 deletions

View file

@ -12415,7 +12415,9 @@ Return an object representing a text file called @var{name} with the given
This is the declarative counterpart of @code{text-file}.
@end deffn
@deffn {Procedure} computed-file name gexp [#:local-build? #t] [#:options '()]
@deffn {Procedure} computed-file @var{name} @var{gexp} @
[#:local-build? #t] [#:guile] @
[#:options '(#:env-vars (("LANG" . "C.UTF-8")))]
Return an object representing the store item @var{name}, a file or
directory computed by @var{gexp}. When @var{local-build?} is true (the
default), the derivation is built locally. @var{options} is a list of
@ -12426,7 +12428,7 @@ This is the declarative counterpart of @code{gexp->derivation}.
@deffn {Monadic Procedure} gexp->script @var{name} @var{exp} @
[#:guile (default-guile)] [#:module-path %load-path] @
[#:system (%current-system)] [#:target #f]
[#:system (%current-system)] [#:target 'current] [#:locale "C.UTF-8"]
Return an executable script @var{name} that runs @var{exp} using
@var{guile}, with @var{exp}'s imported modules in its search path.
Look up @var{exp}'s modules in @var{module-path}.
@ -12463,8 +12465,9 @@ This is the declarative counterpart of @code{gexp->script}.
@deffn {Monadic Procedure} gexp->file @var{name} @var{exp} @
[#:set-load-path? #t] [#:module-path %load-path] @
[#:splice? #f] @
[#:guile (default-guile)]
[#:splice? #f] [#:guile (default-guile)] @
[#:system (%current-system)] [#:target 'current] @
[#:locale "C.UTF-8"]
Return a derivation that builds a file @var{name} containing @var{exp}.
When @var{splice?} is true, @var{exp} is considered to be a list of
expressions that will be spliced in the resulting file.

View file

@ -597,7 +597,10 @@ (define-record-type <computed-file>
(options computed-file-options)) ;list of arguments
(define* (computed-file name gexp
#:key guile (local-build? #t) (options '()))
#:key
guile
(local-build? #t)
(options '(#:env-vars (("LANG" . "C.UTF-8")))))
"Return an object representing the store item NAME, a file or directory
computed by GEXP. When LOCAL-BUILD? is #t (the default), it ensures the
corresponding derivation is built locally. OPTIONS may be used to pass
@ -1700,6 +1703,9 @@ (define build
(system base target)
(system base compile))
;; Best effort. The locale is not installed in all contexts.
(false-if-exception (setlocale LC_ALL "C.UTF-8"))
(define modules
(getenv "modules"))
@ -1990,7 +1996,8 @@ (define* (gexp->script name exp
#:key (guile (default-guile))
(module-path %load-path)
(system (%current-system))
(target 'current))
(target 'current)
(locale "C.UTF-8"))
"Return an executable script NAME that runs EXP using GUILE, with EXP's
imported modules in its search path. Look up EXP's modules in MODULE-PATH."
(mlet* %store-monad ((target (if (eq? target 'current)
@ -2033,7 +2040,8 @@ (define* (gexp->script name exp
;; These derivations are not worth offloading or
;; substituting.
#:local-build? #t
#:substitutable? #f)))
#:substitutable? #f
#:env-vars `(("LANG" . ,locale)))))
(define* (gexp->file name exp #:key
(guile (default-guile))
@ -2041,7 +2049,8 @@ (define* (gexp->file name exp #:key
(module-path %load-path)
(splice? #f)
(system (%current-system))
(target 'current))
(target 'current)
(locale "C.UTF-8"))
"Return a derivation that builds a file NAME containing EXP. When SPLICE?
is true, EXP is considered to be a list of expressions that will be spliced in
the resulting file.
@ -2081,7 +2090,8 @@ (define extensions (gexp-extensions exp))
#:local-build? #t
#:substitutable? #f
#:system system
#:target target)
#:target target
#:env-vars `(("LANG" . ,locale)))
(gexp->derivation name
(gexp
(call-with-output-file (ungexp output)
@ -2098,7 +2108,8 @@ (define extensions (gexp-extensions exp))
#:local-build? #t
#:substitutable? #f
#:system system
#:target target))))
#:target target
#:env-vars `(("LANG" . ,locale))))))
(define* (text-file* name #:rest text)
"Return as a monadic value a derivation that builds a text file containing
@ -2108,6 +2119,7 @@ (define* (text-file* name #:rest text)
(define builder
(gexp (call-with-output-file (ungexp output "out")
(lambda (port)
(set-port-encoding! port "UTF-8")
(display (string-append (ungexp-splicing text)) port)))))
(gexp->derivation name builder