Skip to content

Commit 00ad6fd

Browse files
kpamnanyTakafumi Arakaki
authored andcommitted
Make compilecache atomic (JuliaLang#36416)
When several Julia processes compile the same package concurrently (e.g. during a cluster run), they can conflict on the compile cache file. This change makes a Julia process create a compile cache in a temporary file and atomically rename it to the final cache file. Co-authored-by: Takafumi Arakaki <tkf@@users.noreply.github.com>
1 parent dc1a752 commit 00ad6fd

File tree

4 files changed

+44
-18
lines changed

4 files changed

+44
-18
lines changed

NEWS.md

+2
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ Compiler/Runtime improvements
1818
This allows executable-relative paths to be embedded within executables on all
1919
platforms, not just MacOS, which the syntax is borrowed from. ([#35627])
2020
* Constant propogation now occurs through keyword arguments ([#35976])
21+
* The precompilation cache is now created atomically ([#36416]). Invoking _n_
22+
Julia processes simultaneously may create _n_ temporary caches.
2123

2224
Command-line option changes
2325
---------------------------

base/loading.jl

+24-10
Original file line numberDiff line numberDiff line change
@@ -1301,9 +1301,9 @@ const MAX_NUM_PRECOMPILE_FILES = 10
13011301
function compilecache(pkg::PkgId, path::String)
13021302
# decide where to put the resulting cache file
13031303
cachefile = compilecache_path(pkg)
1304+
cachepath = dirname(cachefile)
13041305
# prune the directory with cache files
13051306
if pkg.uuid !== nothing
1306-
cachepath = dirname(cachefile)
13071307
entrypath, entryfile = cache_file_entry(pkg)
13081308
cachefiles = filter!(x -> startswith(x, entryfile * "_"), readdir(cachepath))
13091309
if length(cachefiles) >= MAX_NUM_PRECOMPILE_FILES
@@ -1321,20 +1321,34 @@ function compilecache(pkg::PkgId, path::String)
13211321
# run the expression and cache the result
13221322
verbosity = isinteractive() ? CoreLogging.Info : CoreLogging.Debug
13231323
@logmsg verbosity "Precompiling $pkg"
1324-
p = create_expr_cache(path, cachefile, concrete_deps, pkg.uuid)
1325-
if success(p)
1326-
# append checksum to the end of the .ji file:
1327-
open(cachefile, "a+") do f
1328-
write(f, _crc32c(seekstart(f)))
1324+
1325+
# create a temporary file in `cachepath` directory, write the cache in it,
1326+
# write the checksum, _and then_ atomically move the file to `cachefile`.
1327+
tmppath, tmpio = mktemp(cachepath)
1328+
local p
1329+
try
1330+
close(tmpio)
1331+
p = create_expr_cache(path, tmppath, concrete_deps, pkg.uuid)
1332+
if success(p)
1333+
# append checksum to the end of the .ji file:
1334+
open(tmppath, "a+") do f
1335+
write(f, _crc32c(seekstart(f)))
1336+
end
1337+
# inherit permission from the source file
1338+
chmod(tmppath, filemode(path) & 0o777)
1339+
1340+
# this is atomic according to POSIX:
1341+
rename(tmppath, cachefile)
1342+
return cachefile
13291343
end
1330-
# inherit permission from the source file
1331-
chmod(cachefile, filemode(path) & 0o777)
1332-
elseif p.exitcode == 125
1344+
finally
1345+
rm(tmppath, force=true)
1346+
end
1347+
if p.exitcode == 125
13331348
return PrecompilableError()
13341349
else
13351350
error("Failed to precompile $pkg to $cachefile.")
13361351
end
1337-
return cachefile
13381352
end
13391353

13401354
module_build_id(m::Module) = ccall(:jl_module_build_id, UInt64, (Any,), m)

doc/src/manual/faq.md

+16
Original file line numberDiff line numberDiff line change
@@ -935,6 +935,22 @@ Julia compiles and uses its own copy of OpenBLAS, with threads currently capped
935935

936936
Modifying OpenBLAS settings or compiling Julia with a different BLAS library, eg [Intel MKL](https://software.intel.com/en-us/mkl), may provide performance improvements. You can use [MKL.jl](https://github.com/JuliaComputing/MKL.jl), a package that makes Julia's linear algebra use Intel MKL BLAS and LAPACK instead of OpenBLAS, or search the discussion forum for suggestions on how to set this up manually. Note that Intel MKL cannot be bundled with Julia, as it is not open source.
937937

938+
## Computing cluster
939+
940+
### How do I manage precompilation caches in distributed file systems?
941+
942+
When using `julia` in high-performance computing (HPC) facilities, invoking
943+
_n_ `julia` processes simultaneously creates at most _n_ temporary copies of
944+
precompilation cache files. If this is an issue (slow and/or small distributed
945+
file system), you may:
946+
947+
1. Use `julia` with `--compiled-modules=no` flag to turn off precompilation.
948+
2. Configure a private writable depot using `pushfirst!(DEPOT_PATH, private_path)`
949+
where `private_path` is a path unique to this `julia` process. This
950+
can also be done by setting environment variable `JULIA_DEPOT_PATH` to
951+
`$private_path:$HOME/.julia`.
952+
3. Create a symlink from `~/.julia/compiled` to a directory in a scratch space.
953+
938954
## Julia Releases
939955

940956
### Do I want to use the Stable, LTS, or nightly version of Julia?

src/dump.c

+2-8
Original file line numberDiff line numberDiff line change
@@ -2096,11 +2096,10 @@ JL_DLLEXPORT void jl_init_restored_modules(jl_array_t *init_order)
20962096
JL_DLLEXPORT int jl_save_incremental(const char *fname, jl_array_t *worklist)
20972097
{
20982098
JL_TIMING(SAVE_MODULE);
2099-
char *tmpfname = strcat(strcpy((char *) alloca(strlen(fname)+8), fname), ".XXXXXX");
21002099
ios_t f;
21012100
jl_array_t *mod_array = NULL, *udeps = NULL;
2102-
if (ios_mkstemp(&f, tmpfname) == NULL) {
2103-
jl_printf(JL_STDERR, "Cannot open cache file \"%s\" for writing.\n", tmpfname);
2101+
if (ios_file(&f, fname, 1, 1, 1, 1) == NULL) {
2102+
jl_printf(JL_STDERR, "Cannot open cache file \"%s\" for writing.\n", fname);
21042103
return 1;
21052104
}
21062105
JL_GC_PUSH2(&mod_array, &udeps);
@@ -2213,12 +2212,7 @@ JL_DLLEXPORT int jl_save_incremental(const char *fname, jl_array_t *worklist)
22132212
}
22142213
write_int32(&f, 0); // mark the end of the source text
22152214
ios_close(&f);
2216-
22172215
JL_GC_POP();
2218-
if (jl_fs_rename(tmpfname, fname) < 0) {
2219-
jl_printf(JL_STDERR, "Cannot write cache file \"%s\".\n", fname);
2220-
return 1;
2221-
}
22222216

22232217
return 0;
22242218
}

0 commit comments

Comments
 (0)