From 78da6951787f07e9460091885d7a9eb3e667b512 Mon Sep 17 00:00:00 2001 From: Reepca Russelstein Date: Tue, 24 Dec 2024 05:40:58 -0600 Subject: [PATCH] daemon: Explicitly unlock output path in the has-become-valid case. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes . Similar to , fixed in . We can't rely on Goal deletion to release our locks in a timely manner. In the case in which multiple guix-daemon processes simultaneously try producing an output path path1, the one that gets there first (P1) will get the lock, and the second one (P2) will continue trying to acquire the lock until it is released. Once it has acquired the lock, it checks to see whether the path has already become valid in the meantime, and if so it reports success to those Goals waiting on its completion and finishes. Unfortunately, it fails to release the locks it holds first, so those stay held until that Goal gets deleted. Suppose we have the following store path dependency graph: path4 / | \ path1 path2 path3 P2 is now sitting on path1's lock, and will continue to do so until path4 is completed. Suppose there is also a P3, and it has been blocked while P1 builds path2. Now P3 is sitting on path2's lock, and can't acquire path1's lock to determine that it has been completed. Likewise P2 is sitting on path1's lock, and now can't acquire path2's lock to determine that it has been completed. Finally, P3 completes path3 while P2 is blocked. Now: - P1 knows that path1 and path2 are complete, and holds no locks, but can't determine that path3 is complete - P2 knows that path1 and path3 are complete, and holds locks on path1 and path3, but can't determine that path2 is complete - P3 knows that path2 and path3 are complete, and holds a lock on path2, but can't determine that path1 is complete And none of these locks will be released until path4 is complete. Thus, we have a deadlock. To resolve this, we should explicitly release these locks as soon as they should be released. * nix/libstore/build.cc (DerivationGoal::tryToBuild, SubstitutionGoal::tryToRun): Explicitly release locks in the has-become-valid case. * tests/store-deadlock.scm: New file. * Makefile.am (SCM_TESTS): Add it. Change-Id: Ie510f84828892315fe6776c830db33d0f70bcef8 Signed-off-by: Ludovic Courtès --- Makefile.am | 1 + nix/libstore/build.cc | 2 + tests/store-deadlock.scm | 90 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 93 insertions(+) create mode 100644 tests/store-deadlock.scm diff --git a/Makefile.am b/Makefile.am index d3eeaddaf4..908c48b4ef 100644 --- a/Makefile.am +++ b/Makefile.am @@ -597,6 +597,7 @@ SCM_TESTS = \ tests/size.scm \ tests/status.scm \ tests/store-database.scm \ + tests/store-deadlock.scm \ tests/store-deduplication.scm \ tests/store-roots.scm \ tests/store.scm \ diff --git a/nix/libstore/build.cc b/nix/libstore/build.cc index 43a8a37184..edd01bab34 100644 --- a/nix/libstore/build.cc +++ b/nix/libstore/build.cc @@ -1200,6 +1200,7 @@ void DerivationGoal::tryToBuild() if (buildMode != bmCheck && validPaths.size() == drv.outputs.size()) { debug(format("skipping build of derivation `%1%', someone beat us to it") % drvPath); outputLocks.setDeletion(true); + outputLocks.unlock(); done(BuildResult::AlreadyValid); return; } @@ -3070,6 +3071,7 @@ void SubstitutionGoal::tryToRun() if (!repair && worker.store.isValidPath(storePath)) { debug(format("store path `%1%' has become valid") % storePath); outputLock->setDeletion(true); + outputLock.reset(); amDone(ecSuccess); return; } diff --git a/tests/store-deadlock.scm b/tests/store-deadlock.scm new file mode 100644 index 0000000000..e960448133 --- /dev/null +++ b/tests/store-deadlock.scm @@ -0,0 +1,90 @@ +;;; GNU Guix --- Functional package management for GNU +;;; Copyright © 2024 Reepca Russelstein +;;; Copyright © 2024 Ludovic Courtès +;;; +;;; This file is part of GNU Guix. +;;; +;;; GNU Guix is free software; you can redistribute it and/or modify it +;;; under the terms of the GNU General Public License as published by +;;; the Free Software Foundation; either version 3 of the License, or (at +;;; your option) any later version. +;;; +;;; GNU Guix is distributed in the hope that it will be useful, but +;;; WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with GNU Guix. If not, see . + +(define-module (test-store-deadlock) + #:use-module (guix tests) + #:use-module (guix store) + #:use-module (guix derivations) + #:use-module (guix gexp) + #:use-module (guix monads) + #:use-module (gnu packages bootstrap) + #:use-module (ice-9 threads) + #:use-module (srfi srfi-1) + #:use-module (srfi srfi-64)) + +(define input-drvs + (map (lambda (n) + (computed-file (string-append "drv" (number->string n)) + #~(begin + #$(random-text) + (sleep 4) + (mkdir #$output)) + #:guile %bootstrap-guile)) + (iota 15))) + +(define top-drv + (computed-file "top-drv" + #~(begin + #$(random-text) + (sleep 3) + (pk 'deps: #$@input-drvs) + (mkdir #$output)) + #:guile %bootstrap-guile)) + +(%graft? #f) + + +(test-begin "store-deadlock") + +(test-equal "no deadlock" ;https://issues.guix.gnu.org/31785 + '(thread1 thread2 thread3) + + ;; This test checks for the absence of a deadlock when guix-daemon spawns + ;; several child processes (one for each client) that compete to build the + ;; same set of derivations. See . + (let* ((drvs (cons top-drv input-drvs)) + (builder (lambda (name lst) + (call-with-new-thread + (lambda () + (with-store store + (set-build-options store + #:verbosity 3 + #:max-build-jobs 1) + (run-with-store store + (mlet %store-monad ((lst (mapm %store-monad + lower-object lst))) + (mbegin %store-monad + (built-derivations lst) + (return name))))))))) + (thread1 (builder 'thread1 drvs)) + (thread2 (begin + (sleep 1) + (builder 'thread2 drvs))) + (thread3 (begin + (sleep 1) + (builder 'thread3 drvs)))) + + ;; This test should complete in less than a minute; if not, there's a + ;; deadlock. + (let ((deadline (+ (current-time) 120))) + (list (join-thread thread1 deadline 'timeout) + (join-thread thread2 deadline 'timeout) + (join-thread thread3 deadline 'timeout))))) + +(test-end)