Skip to content

Commit 78d0316

Browse files
committed
#18 programmatically construct configuration from provisioning provider
1 parent 6d5d86e commit 78d0316

File tree

54 files changed

+177
-118
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+177
-118
lines changed

README.md

+3-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@ This project provides provisioned HPC cluster models using underlying virtualiza
44

55
The purpose of this project is to provide a common baseline for repeatable HPC experiments. This has been used for education, distributed collaboration, tool development colaboration, failure signature discovery, local HPC debugging and cluster configuration comparisons, enabled by construction and use of short-lived and common baseline hpc cluster models. In short, extend the "systems as cattle not pets" <A HREF="http://www.pass.org/eventdownload.aspx?suid=1902">[1]</A> <A HREF="http://cloudscaling.com/blog/cloud-computing/the-history-of-pets-vs-cattle/">[2]</A> analogy into the realm of "clusters as cattle, not pets."
66

7-
The initial release requires local enablers: gmake, vagrant and virtualbox. Lighterweight and multi-node mechanisms, such as containers, jails and pods, are planned.
7+
The initial release requires local enablers: gmake, vagrant and virtualbox and/or libvirt. Lighterweight and multi-node mechanisms, such as containers, jails and pods, are planned. Virtualbox is slower than libvirt provisioning by 50%, although
8+
<A HREF="https://github.com/hpc/hpc-collab/issues/158">more consistent</A> and
9+
<A HREF="https://github.com/hpc/hpc-collab/issues/159">reliable</A>.
810

911
Two representative HPC cluster recipes are provided. Cluster recipes are in the <EM>clusters</EM> directory. Presently, recipes generate clusters local to the installation host.
1012

clusters/common/Makefile

+36-14
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,11 @@ MAKEFILE_DIR = $(CURDIR)
2525
BASE_DIR = $(realpath $(MAKEFILE_DIR)/../..)
2626
CLUSTERS_DIR = $(BASE_DIR)/clusters
2727

28+
ifeq ($(WHERE_INVOKED),clusters)
29+
VIRTUALCLUSTER = vc
30+
else
2831
VIRTUALCLUSTER ?= $(IAM)
32+
endif
2933
export VC ?= $(VIRTUALCLUSTER)
3034

3135
CFG = cfg
@@ -157,6 +161,7 @@ MARK_PROVISIONED = markprovisioned.sh
157161
VERIFY_LOCALENV = verifylocalenv.sh
158162
SAVE_LOGSDB = savelogsdb.sh
159163
COMPILE_VAGRANTFILE = compilevagrantfile.sh
164+
GENERATE_PROVIDER_FILES = generateproviderfiles.sh
160165

161166
# match entries in the directories defined by $(STATE_D)/<state>
162167
STATE_LIST = nonexistent poweroff running provisioned
@@ -176,6 +181,9 @@ NODES_POWEROFF = $(foreach n,$(NODES),$(POWEROFF_D)/$(n))
176181
NODES_NONEXISTENT = $(foreach n,$(NODES),$(NONEXISTENT_D)/$(n))
177182
NODES_UNPROVISION = $(foreach n,$(NODES),$(n)_UNPROVISION)
178183

184+
SRC_PROVIDER_FILES = $(shell find $(CLUSTERS_DIR)/$(VC) -name \*%\*% -type f)
185+
TARGET_PROVIDER_FILES = $(foreach f,$(SRC_PROVIDER_FILES),$(basename $(f)))
186+
179187
QUICK_HUMANFRIENDLY_FLAG_NAMES = quick quick-flag flag-quick flags-quick flag-quicker \
180188
flags-quicker flags-faster flag-faster provision-flags-quick
181189
NORMAL_HUMANFRIENDLY_FLAG_NAMES = normal normal-flag flag-normal flags-normal provision-flags-normal
@@ -187,16 +195,17 @@ HUMANFRIENDLY_FLAGS = $(QUICK_HUMANFRIENDLY_FLAG_NAMES) \
187195

188196
SAVELOGS_TARGETS = savelogs save-logs savelog save-log collectlogs collect-logs collectlog collect-log
189197

190-
HUMANFRIENDLY_TARGETS = clean clean-state compare-vc-cksum compilevagrantfile copyright \
191-
doc help \
192-
ingest-state scopyright how show-state show-vars status \
193-
todo verifylocalenv \
198+
HUMANFRIENDLY_TARGETS = clean clean-state compare-vc-cksum compilevagrantfile generateproviderfiles \
199+
doc help \
200+
ingest-state copyright show show-state show-vars status \
201+
todo verifylocalenv \
194202
$(SAVELOGS_TARGETS)
195203

196204
PHONY = $(HUMANFRIENDLY_TARGETS) $(HUMANFRIENDLY_FLAGS)
197205

198206
.PHONY: $(PHONY)
199207

208+
#.DELETE_ON_ERROR: $(NODES_STATE) $(GENERATED_PROVIDERFILES_TARGETS)
200209
.DELETE_ON_ERROR: $(NODES_STATE)
201210

202211
all: show
@@ -213,6 +222,7 @@ clean: $(NONEXISTENT_FLAGS)
213222
$(HUSH)$(DISABLE_RM) rm -f $(VAGRANTFILE) $(VAGRANTFILE)~
214223
$(HUSH)$(DISABLE_RM) rm -f $(DOXYGEN_OUT)
215224
$(HUSH)$(DISABLE_RM) rm -f $(PROVISIONED_D)/*
225+
$(HUSH)$(DISABLED_RM) rm -f $(GENERATED_PROVIDERFILES_TARGETS)
216226
#$(HUSH)find . -name ._\* -type f -exec $(DISABLE_RM) rm -f \{\} \;
217227

218228
show show-state status: compilevagrantfile $(STATE_DIRS_ALL) ingest-state
@@ -235,16 +245,21 @@ todo:
235245

236246
show-vars:
237247
$(HUSH)$(info )
238-
$(HUSH)$(info NODES_WITH_REQUIRED_PROVISIONED_NODE_TARGETS: $(NODES_WITH_REQUIRED_PROVISIONED_NODE_TARGETS))
239-
$(HUSH)$(info PATH: $(PATH))
240-
$(HUSH)$(info PATH: $${PATH})
241248
$(HUSH)$(info )
242-
$(HUSH)$(info FLAGS: $(FLAGS))
243-
$(HUSH)$(info FLAGS_OFF: $(FLAGS_OFF))
244-
$(HUSH)$(info FLAGS_ON: $(FLAGS_ON))
245-
$(HUSH)$(info FLAGS_QUICK: $(FLAGS_QUICK))
246-
$(HUSH)$(info FLAGS_COMPLETE: $(FLAGS_COMPLETE))
249+
$(HUSH)$(info SRC_PROVIDER_FILES: $(SRC_PROVIDER_FILES))
250+
$(HUSH)$(info TARGET_PROVIDER_FILES: $(TARGET_PROVIDER_FILES))
251+
$(HUSH)$(info GENERATED_PROVIDER_FILES: $(GENERATED_PROVIDER_FILES))
247252
$(HUSH)$(info )
253+
# $(HUSH)$(info NODES_WITH_REQUIRED_PROVISIONED_NODE_TARGETS: $(NODES_WITH_REQUIRED_PROVISIONED_NODE_TARGETS))
254+
# $(HUSH)$(info PATH: $(PATH))
255+
# $(HUSH)$(info PATH: $${PATH})
256+
# $(HUSH)$(info )
257+
# $(HUSH)$(info FLAGS: $(FLAGS))
258+
# $(HUSH)$(info FLAGS_OFF: $(FLAGS_OFF))
259+
# $(HUSH)$(info FLAGS_ON: $(FLAGS_ON))
260+
# $(HUSH)$(info FLAGS_QUICK: $(FLAGS_QUICK))
261+
# $(HUSH)$(info FLAGS_COMPLETE: $(FLAGS_COMPLETE))
262+
# $(HUSH)$(info )
248263

249264
## @todo use graphviz on Makefile to self-generate this
250265
help: Makefile
@@ -331,12 +346,19 @@ flags flag: | $(PROVISION_FLAG_D)
331346

332347
###
333348

349+
### XXX use $(VC)
350+
351+
generateproviderfiles: $(TARGET_PROVIDER_FILES)
352+
353+
$(TARGET_PROVIDER_FILES): $(SRC_PROVIDER_FILES)
354+
cd $(CLUSTERS_DIR)/vc ; env VC=vc $(GENERATE_PROVIDER_FILES)
355+
334356
compilevagrantfile: $(VAGRANTFILE_PREREQ)
357+
358+
$(VAGRANTFILE): $(VAGRANTFILE_PREREQ) $(TARGET_PROVIDER_FILES)
335359
env VC=$(VC) $(COMPILE_VAGRANTFILE)
336360
vagrant validate >/dev/null 2>&1
337361

338-
$(VAGRANTFILE): compilevagrantfile
339-
340362
ingest-state: clean-state
341363
$(HUSH)env VC=$(VC) $(INGEST_STATE)
342364

clusters/common/Vagrantfile.d/Vagrantfile.template

+2
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ ENV["VC"] = "#{clustername}"
2727

2828
Vagrant.configure("2") do |config|
2929
config.vm.box = "generic/centos7"
30+
config.vm.box_check_update = false
31+
config.vm.graceful_halt_timeout = 15
3032

3133
config.ssh.forward_env = ["TZ", "LC_ALL", "VC"]
3234
config.ssh.forward_agent = true
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
virtualbox
1+
libvirt

clusters/common/Vagrantfile.d/cfg.vm.providers.d/libvirt

+37-14
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,51 @@
11

22
cfg.vm.provider :libvirt do |lv, override|
3+
override.vm.hostname = hostnm
4+
5+
# @see: https://fedoraproject.org/wiki/Changes/Vagrant_2.2_with_QEMU_Session
6+
lv.qemu_use_session = false
7+
8+
# @see: https://bugzilla.redhat.com/show_bug.cgi?id=1283989
9+
lv.cpu_mode = 'host-passthrough'
10+
11+
# ## this may be necessary if the initial virtual host configuration fails
12+
# ## at least on some linux/qemu variants
13+
# ## lv.management_network_device = 'virbr0'
14+
# ## lv.uri = 'qemu:///system'
15+
# ## lv.host = 'localhost'
16+
# ## lv.driver = 'kvm'
17+
318
lv.memory = manifest[:memory].chomp
4-
lv.cpus = manifest[:cpus].chomp
19+
lv.cpus = manifest[:cpus].chomp
520

6-
override.vm.hostname = hostnm
7-
override.vm.graceful_halt_timeout = 10
21+
# eth0
22+
# override.vm.network "public_network"
23+
24+
# eth1
825
override.vm.network "private_network",
9-
nic_type: "virtio",
10-
:ip => manifest[:ip],
11-
:mac => manifest[:mac]
26+
:ip => manifest[:ip]
1227

28+
# claimed to address indefinite wait for "waiting for IP address" error, but causes:
29+
# "Call to virDomainCreateWithFlags failed: Unable to get index for interface eth0: No such device"
30+
# :libvirt__dhcp_enabled => false
31+
32+
# eth2, if it exists
1333
if manifest[:bridge] then
1434
override.vm.network "public_network",
15-
nic_type: "virtio",
16-
:ip => manifest[:ip],
35+
:ip => manifest[:ip],
1736
:bridge => manifest[:bridge]
1837
end # manifest[:bridge]
1938

39+
if manifest[:adddiskpath] then
40+
lv.storage :file,
41+
#
42+
# Libvirt provider: "absolute volume paths like '/tmp/vcfs_repodisk.vdi' not yet supported"
43+
# :path => manifest[:adddiskpath],
44+
#
45+
:size => REPODISK_SIZE * 1024,
46+
:allow_existing => true
47+
end # manifest[:adddiskpath]
48+
2049
override.trigger.before [:up] do |noduplicate|
2150
noduplicate.run = {path: "../common/bin/nodup.sh", args: hostnm }
2251
end # override.trigger.before[:up]
@@ -36,12 +65,6 @@
3665
end # override.trigger.before [:up]
3766
end # manifest[:ingestfromhost]
3867

39-
if manifest[:adddiskpath] then
40-
lv.storage :file => manifest[:adddiskpath],
41-
:size => REPODISK_SIZE * 1024,
42-
:type => 'raw'
43-
end # manifest[:adddiskpath]
44-
4568
override.trigger.before [:destroy] do |unprovision|
4669
unprovision.run = {path: "../common/bin/unprovision.sh", args: hostnm }
4770
end # override.trigger.before

clusters/common/Vagrantfile.d/cfg.vm.providers.d/virtualbox

+3-2
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,15 @@
55

66
override.vm.hostname = hostnm
77
override.vm.graceful_halt_timeout = 10
8-
override.vm.network "private_network", nic_type: "virtio",
8+
override.vm.network "private_network",
9+
nic_type: "virtio",
910
:ip => manifest[:ip],
1011
:mac => manifest[:mac]
1112

1213
if manifest[:bridge] then
1314
override.vm.network "public_network",
14-
:ip => manifest[:ip],
1515
nic_type: "virtio",
16+
:ip => manifest[:ip],
1617
:bridge => manifest[:bridge]
1718
end # manifest[:bridge]
1819

clusters/common/bin/ingestvagrantstate.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ do
8181
${DISABLE_RM} rm -f ${running}/${n}
8282
fi
8383
;;
84-
"poweroff")
84+
"poweroff"|"shutoff")
8585
touch ${poweroff}/${n}
8686
${DISABLE_RM} rm -f ${running}/${n} ${nonexistent}/${n}
8787
;;

clusters/common/bin/nodup.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ fi
7676

7777
## @todo collect the provider from the Vagrantfile and select which mechanism to list, or use vagrant commands
7878
#existing=$(VBoxManage list vms)
79-
existing=$(echo $(vagrant global-status | egrep 'running|poweroff|suspend' | awk '{print $2}'))
79+
existing=$(echo $(vagrant global-status | egrep 'running|poweroff|suspend|shutoff' | awk '{print $2}'))
8080
for m in ${existing}
8181
do
8282
if [ -f ${STATE_PROVISIONED}/${m} ] ; then

clusters/common/bin/provision.sh

+36-19
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,10 @@ declare -x DEBUG_DEFAULT_ORDER_OF_OPERATIONS="DebugNote VerbosePWD ClearSELinuxE
9393

9494
declare -x NORMAL_ORDER_OF_OPERATIONS="${CORE_ORDER_OF_OPERATIONS} FlagSlashVagrant TimeStamp"
9595

96-
declare -x REPO_DISK=/dev/sdb
97-
declare -x REPO_PART=${REPO_DISK}1
96+
declare -a REPO_DISK_LIST=( '/dev/vdb' '/dev/sdb' )
97+
declare -x REPO_DISK
98+
declare -x REPO_PART
99+
declare -x REPO_PART_NO=1
98100

99101
## yes, there's a bash one-liner to do this, but no, this may be more readable
100102
if [ -n "${DEBUG}" ] ; then
@@ -424,9 +426,9 @@ ConfigureLocalRepos() {
424426
[ ! -x "${createrepo}" ] && return
425427
[ ! -x "${reposync}" ] && return
426428
[ ! -x "${rsync}" ] && return
427-
[ ! -b "${REPO_DISK}" ] && return
428-
# [ ! -b "${REPO_PART}" ] && return
429429
[ -z "${REPO_MOUNT}" ] && return
430+
[ ! -b "${REPO_DISK}" ] && return
431+
[ ! -b "${REPO_PART}" ] && return
430432

431433
Rc ErrExit ${EX_OSERR} "mkdir -p ${REPO_MOUNT} 2>&1"
432434
Rc ErrExit ${EX_OSERR} "mount ${REPO_MOUNT} 2>&1"
@@ -595,25 +597,32 @@ CopyCommon() {
595597
##
596598
SetupSecondDisk() {
597599

600+
export REPO_MOUNT=${COMMON}/repos
601+
export REPO_LOCAL=${REPO_MOUNT}/local
602+
603+
# Sensibly skip these: so, if we don't have a 2nd disk, but could otherwise proceed, continue
604+
for dsk in ${REPO_DISK_LIST[@]}
605+
do
606+
if [ ! -b "${dsk}" ] ; then
607+
continue
608+
fi
609+
REPO_DISK=${dsk}
610+
REPO_PART=${dsk}${REPO_PART_NO}
611+
break
612+
done
613+
598614
if [ -z "${REPO_DISK}" ] ; then
599615
return
600616
fi
601-
if [ ! -b ${REPO_DISK} ] ; then
602-
return
603-
fi
604-
605-
# Rc ErrExit ${EX_CONFIG} "yes | parted ${REPO_DISK} --align opt mklabel gpt 2>&1"
606-
# Rc ErrExit ${EX_CONFIG} "yes | parted ${REPO_DISK} mkpart primary 2048s 20G 2>&1"
607-
# Rc ErrExit ${EX_CONFIG} "mkfs.xfs -L repos ${REPO_PART} 2>&1"
608-
# Rc ErrExit ${EX_CONFIG} "xfs_repair ${REPO_PART} 2>&1"
609-
# Verbose " ${REPO_PART} ${REPO_MOUNT}"
610-
611-
export REPO_MOUNT=${COMMON}/repos
612-
export REPO_LOCAL=${REPO_MOUNT}/local
613617

614-
Rc ErrExit ${EX_CONFIG} "mkfs.xfs -f -L repos ${REPO_DISK} 2>&1"
615-
Rc ErrExit ${EX_CONFIG} "xfs_repair ${REPO_DISK} 2>&1"
616-
Verbose " ${REPO_DISK} ${REPO_MOUNT}"
618+
if [ ! -b "${REPO_PART}" ] ; then
619+
Rc ErrExit ${EX_CONFIG} "yes | parted ${REPO_DISK} --align opt mklabel gpt 2>&1"
620+
Rc ErrExit ${EX_CONFIG} "yes | parted ${REPO_DISK} mkpart primary 2048s 16G 2>&1"
621+
fi
622+
Rc ErrExit ${EX_CONFIG} "mkfs.xfs -f -L repos ${REPO_PART} 2>&1"
623+
Rc ErrExit ${EX_CONFIG} "xfs_repair ${REPO_PART} 2>&1"
624+
echo "${REPO_PART} ${COMMON}/repos xfs rw,defaults,noatime,async,nobarrier 0 0" >> /etc/fstab
625+
Verbose " ${REPO_PART} ${REPO_MOUNT}"
617626
return
618627
}
619628

@@ -1274,10 +1283,12 @@ SetServices() {
12741283
local _on
12751284
local _off
12761285
local turnsvcmsg=""
1286+
local virt_type=""
12771287

12781288
if [ -f /.docker.env ] ; then
12791289
Verbose " docker, skipped"
12801290
fi
1291+
virt_type=$(echo $(virt-what))
12811292

12821293
for _d in ${SERVICES_D} ${SERVICES_ON} ${SERVICES_OFF}
12831294
do
@@ -1308,6 +1319,12 @@ SetServices() {
13081319
if [[ ${_sysctl_do} = *"No such file or directory"* ]] ; then
13091320
continue
13101321
fi
1322+
if [ "${_s}" = "vboxadd" ] ; then
1323+
if [[ "${virt}" != *virtualbox* ]] ; then
1324+
Verbose " ${_s} [skipped]"
1325+
continue
1326+
fi
1327+
fi
13111328
svcs_msg="${svcs_msg} ${_s}"
13121329
for _c in ${_sysctl_do}
13131330
do

clusters/vc/cfg/vc1/rootfs/etc/chrony.conf

+2-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
# Use local server
2-
server 192.168.56.71
1+
# Use local server: vcsvc
2+
server 192.168.78.71
33

44
# Record the rate at which the system clock gains/losses time.
55
driftfile /var/lib/chrony/drift
@@ -19,7 +19,6 @@ rtcsync
1919
#minsources 2
2020

2121
# Allow NTP client access from local network.
22-
#allow 192.168.56.0/24
2322
allow 127.0.0.1
2423

2524
# Serve time even if not synchronized to a time source.
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
appendwhere: # Generated by NetworkManager
2-
appendwhat: nameserver 192.168.56.71
2+
appendwhat: nameserver 192.168.78.71

clusters/vc/cfg/vc2/rootfs/etc/chrony.conf

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Use local server
2-
server 192.168.56.71
2+
server 192.168.78.71
33

44
# Record the rate at which the system clock gains/losses time.
55
driftfile /var/lib/chrony/drift
@@ -19,7 +19,6 @@ rtcsync
1919
#minsources 2
2020

2121
# Allow NTP client access from local network.
22-
#allow 192.168.56.0/24
2322
allow 127.0.0.1
2423

2524
# Serve time even if not synchronized to a time source.
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
appendwhere: # Generated by NetworkManager
2-
appendwhat: nameserver 192.168.56.71
2+
appendwhat: nameserver 192.168.78.71
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
192.168.56.77
1+
192.168.78.77

clusters/vc/cfg/vcaltdb/rootfs/etc/chrony.conf

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Use local server
2-
server 192.168.56.71
2+
server 192.168.78.71
33

44
# Record the rate at which the system clock gains/losses time.
55
driftfile /var/lib/chrony/drift
@@ -19,7 +19,6 @@ rtcsync
1919
#minsources 2
2020

2121
# Allow NTP client access from local network.
22-
#allow 192.168.56.0/24
2322
allow 127.0.0.1
2423

2524
# Serve time even if not synchronized to a time source.

0 commit comments

Comments
 (0)