[araim1@tara-fe1 ~]$ wget http://www.umbc.edu/hpcf/code/scripting-case-study/scripting-case-study.tar.gz --2011-06-25 11:36:12-- http://www.umbc.edu/hpcf/code/scripting-case-study/scripting-case-study.tar.gz Resolving www.umbc.edu... 130.85.12.11 Connecting to www.umbc.edu|130.85.12.11|:80... connected. HTTP request sent, awaiting response... 200 OK Length: 3040 (3.0K) [application/x-tar] Saving to: `scripting-case-study.tar.gz' 100%[======================================>] 3,040 --.-K/s in 0s 2011-06-25 11:36:12 (108 MB/s) - `scripting-case-study.tar.gz' saved [3040/3040] [araim1@tara-fe1 ~]$
[araim1@tara-fe1 ~]$ tar xvzf scripting-case-study.tar.gz scripting-case-study/ scripting-case-study/studies/ scripting-case-study/studies/ppn.tex scripting-case-study/studies/create-study.bash scripting-case-study/studies/get-summary-table.bash scripting-case-study/studies/get-ppn-table-latex.bash scripting-case-study/studies/get-summary-table-latex.bash scripting-case-study/studies/summary.tex scripting-case-study/studies/summary.pdf scripting-case-study/studies/get-ppn-table.bash scripting-case-study/studies/ppn.pdf scripting-case-study/src/ scripting-case-study/src/Makefile scripting-case-study/src/utilities.c scripting-case-study/src/utilities.h scripting-case-study/src/report_time.c [araim1@tara-fe1 ~]$ cd scripting-case-study/ [araim1@tara-fe1 scripting-case-study]$ ls src studies [araim1@tara-fe1 scripting-case-study]$
[araim1@tara-fe1 ~]$ cat diag_time.dat 00:02:03 0.03 2.06 123.45 % HH:MM:SS=hours=minutes=seconds [araim1@tara-fe1 ~]$
[araim1@tara-fe1 scripting-case-study]$ cd src/ [araim1@tara-fe1 src]$ ls Makefile report_time.c utilities.c utilities.h [araim1@tara-fe1 src]$ make mpicc -g -O3 -c utilities.c -o utilities.o mpicc -g -O3 -c -o report_time.o report_time.c mpicc -g -O3 utilities.o report_time.o -o report_time -lm [araim1@tara-fe1 src]$ ls Makefile report_time report_time.c report_time.o utilities.c utilities.h utilities.o [araim1@tara-fe1 src]$
[araim1@tara-fe1 src]$ cd ../studies/ [araim1@tara-fe1 studies]$
[araim1@tara-fe1 studies]$ ls study_* study_n01024: n001ppn1 n001ppn4 n002ppn1 n002ppn4 n004ppn1 n004ppn4 n008ppn1 n008ppn4 n016ppn1 n016ppn4 n032ppn1 n032ppn4 n001ppn2 n001ppn8 n002ppn2 n002ppn8 n004ppn2 n004ppn8 n008ppn2 n008ppn8 n016ppn2 n016ppn8 n032ppn2 n032ppn8 study_n02048: n001ppn1 n001ppn4 n002ppn1 n002ppn4 n004ppn1 n004ppn4 n008ppn1 n008ppn4 n016ppn1 n016ppn4 n032ppn1 n032ppn4 n001ppn2 n001ppn8 n002ppn2 n002ppn8 n004ppn2 n004ppn8 n008ppn2 n008ppn8 n016ppn2 n016ppn8 n032ppn2 n032ppn8 study_n04096: n001ppn1 n001ppn4 n002ppn1 n002ppn4 n004ppn1 n004ppn4 n008ppn1 n008ppn4 n016ppn1 n016ppn4 n032ppn1 n032ppn4 n001ppn2 n001ppn8 n002ppn2 n002ppn8 n004ppn2 n004ppn8 n008ppn2 n008ppn8 n016ppn2 n016ppn8 n032ppn2 n032ppn8 [araim1@tara-fe1 studies]$
[araim1@tara-fe1 studies]$ ls -l study_n01024/n002ppn4/ total 20 lrwxrwxrwx 1 araim1 pi_nagaraj 44 Jun 25 11:01 report_time -> /home/araim1/scripting-case-study/src/report_time -rwxrwx--- 1 araim1 pi_nagaraj 282 Jun 25 11:01 run.slurm [araim1@tara-fe1 studies]$
[araim1@tara-fe1 studies]$ cat study_n01024/n002ppn4/run.slurm #!/bin/bash #SBATCH --job-name=test_study #SBATCH --output=slurm.out #SBATCH --error=slurm.err #SBATCH --partition=develop #SBATCH --nodes=2 #SBATCH --ntasks-per-node=4 srun ./report_time 1024 [araim1@tara-fe1 studies]$
#!/bin/bash
EXECUTABLE='/home/araim1/scripting-case-study/src/report_time'
# This function writes a SLURM script. We can call it with different parameter
# settings to create different experiments
function write_script
{
STUDY_NAME=$(printf 'study_n%05d' ${N})
DIR_NAME=$(printf '%s/n%03dppn%d' ${STUDY_NAME} ${NODES} ${NPERNODE})
if [ -d $DIR_NAME ] ; then
echo "$DIR_NAME already exists, skipping..."
return 0
else
echo "Creating job $DIR_NAME"
fi
mkdir -p $DIR_NAME
cat << _EOF_ > ${DIR_NAME}/run.slurm
#!/bin/bash
#SBATCH --job-name=test_study
#SBATCH --output=slurm.out
#SBATCH --error=slurm.err
#SBATCH --partition=batch
#SBATCH --nodes=${NODES}
#SBATCH --ntasks-per-node=${NPERNODE}
srun ./report_time ${N}
_EOF_
chmod 775 ${DIR_NAME}/run.slurm
ln -s ${EXECUTABLE} ${DIR_NAME}/
}
# For each problem size, we'll run the experiment with 1, 2, 4, and 8 processors
# on 1, 2, 4, ..., 32 nodes
for N in 1024 2048 4096
do
for NPERNODE in 1 2 4 8
do
for NODES in 1 2 4 8 16 32
do
write_script
done
done
done
[araim1@tara-fe1 studies]$ ./create-study.bash Creating job study_n01024/n001ppn1 Creating job study_n01024/n002ppn1 Creating job study_n01024/n004ppn1 Creating job study_n01024/n008ppn1 Creating job study_n01024/n016ppn1 Creating job study_n01024/n032ppn1 Creating job study_n01024/n001ppn2 Creating job study_n01024/n002ppn2 Creating job study_n01024/n004ppn2 ... Creating job study_n04096/n008ppn8 Creating job study_n04096/n016ppn8 Creating job study_n04096/n032ppn8 [araim1@tara-fe1 studies]$
[araim1@tara-fe1 studies]$ for i in study_n*/n*ppn*; > do > cd $i; sbatch run.slurm; cd ../../; > done Submitted batch job 64989 Submitted batch job 64990 Submitted batch job 64991 ... [araim1@tara-fe1 studies]$
[araim1@tara-fe1 studies]$ for i in study_n01024/n032ppn8 study_n02048/n032ppn8 study_n04096/n032ppn8; > do > cd $i; sbatch run.slurm; cd ../../; > done Submitted batch job 64992 Submitted batch job 64993 Submitted batch job 64994 [araim1@tara-fe1 studies]$
[araim1@tara-fe1 studies]$ cat study_n01024/n001ppn1/diag_time.dat 00:17:04 0.28 17.07 1024.00 % HH:MM:SS=hours=minutes=seconds [araim1@tara-fe1 studies]$
N = 1024
p=1 p=2 p=4 p=8 p=16 p=32
1 process per node 00:17:04 00:08:32 00:04:16 00:02:08 00:01:04 00:00:32
2 processes per node 00:08:32 00:04:16 00:02:08 00:01:04 00:00:32 00:00:16
4 processes per node 00:04:16 00:02:08 00:01:04 00:00:32 00:00:16 00:00:08
8 processes per node 00:02:08 00:01:04 00:00:32 00:00:16 00:00:08 00:00:04
N = 2048
p=1 p=2 p=4 p=8 p=16 p=32
1 process per node 00:34:08 00:17:04 00:08:32 00:04:16 00:02:08 00:01:04
2 processes per node 00:17:04 00:08:32 00:04:16 00:02:08 00:01:04 00:00:32
4 processes per node 00:08:32 00:04:16 00:02:08 00:01:04 00:00:32 00:00:16
8 processes per node 00:04:16 00:02:08 00:01:04 00:00:32 00:00:16 00:00:08
N = 4096
p=1 p=2 p=4 p=8 p=16 p=32
1 process per node 01:08:16 00:34:08 00:17:04 00:08:32 00:04:16 00:02:08
2 processes per node 00:34:08 00:17:04 00:08:32 00:04:16 00:02:08 00:01:04
4 processes per node 00:17:04 00:08:32 00:04:16 00:02:08 00:01:04 00:00:32
8 processes per node 00:08:32 00:04:16 00:02:08 00:01:04 00:00:32 00:00:16
[araim1@tara-fe1 studies]$ cat study_n04096/n032ppn8/diag_time.dat
00:00:16 0.00 0.27 16.00 % HH:MM:SS=hours=minutes=seconds
[araim1@tara-fe1 studies]$ gawk -F' ' '{ print $1 }' study_n04096/n032ppn8/diag_time.dat
00:00:16
[araim1@tara-fe1 studies]$ gawk -F' ' '{ print $2 }' study_n04096/n032ppn8/diag_time.dat
0.00
[araim1@tara-fe1 studies]$ gawk -F' ' '{ print $3 }' study_n04096/n032ppn8/diag_time.dat
0.27
[araim1@tara-fe1 studies]$ gawk -F' ' '{ print $4 }' study_n04096/n032ppn8/diag_time.dat
16.00
[araim1@tara-fe1 studies]$
#!/bin/bash
write_result()
{
N=$1
NODES=$2
NPN=$3
FILENAME=$(printf 'study_n%05d/n%03dppn%d/diag_time.dat' $N $NODES $NPN)
if [ -f $FILENAME ] ; then
RESULT=$(gawk -F' ' '{ print $1 }' $FILENAME 2>/dev/null)
printf ' %8s ' $RESULT
else
# If the file does not exist, write out a '---'
printf ' %8s ' '---'
fi
}
write_header()
{
printf '%20s' ''
for i in $@
{
printf '%10s ' $i
}
printf '\n'
}
for N in 1024 2048 4096
do
echo "N = $N"
write_header 'p=1' 'p=2' 'p=4' 'p=8' 'p=16' 'p=32'
for NPERNODE in 1 2 4 8
do
if [ $NPERNODE -eq 1 ] ; then
printf '%d process per node' $NPERNODE
else
printf '%d processes per node' $NPERNODE
fi
for NODES in 1 2 4 8 16 32
do
write_result $N $NODES $NPERNODE
done
printf '\n'
done
printf '\n'
done
[araim1@tara-fe1 studies]$ ./get-summary-table.bash
N = 1024
p=1 p=2 p=4 p=8 p=16 p=32
1 process per node 00:17:04 00:08:32 00:04:16 00:02:08 00:01:04 00:00:32
2 processes per node 00:08:32 00:04:16 00:02:08 00:01:04 00:00:32 00:00:16
4 processes per node 00:04:16 00:02:08 00:01:04 00:00:32 00:00:16 00:00:08
8 processes per node 00:02:08 00:01:04 00:00:32 00:00:16 00:00:08 00:00:04
N = 2048
p=1 p=2 p=4 p=8 p=16 p=32
1 process per node 00:34:08 00:17:04 00:08:32 00:04:16 00:02:08 00:01:04
2 processes per node 00:17:04 00:08:32 00:04:16 00:02:08 00:01:04 00:00:32
4 processes per node 00:08:32 00:04:16 00:02:08 00:01:04 00:00:32 00:00:16
8 processes per node 00:04:16 00:02:08 00:01:04 00:00:32 00:00:16 00:00:08
N = 4096
p=1 p=2 p=4 p=8 p=16 p=32
1 process per node 01:08:16 00:34:08 00:17:04 00:08:32 00:04:16 00:02:08
2 processes per node 00:34:08 00:17:04 00:08:32 00:04:16 00:02:08 00:01:04
4 processes per node 00:17:04 00:08:32 00:04:16 00:02:08 00:01:04 00:00:32
8 processes per node 00:08:32 00:04:16 00:02:08 00:01:04 --- ---
[araim1@tara-fe1 studies]$
[araim1@tara-fe1 studies]$ ./get-summary-table-latex.bash
N = 1024
p=1 p=2 p=4 p=8 p=16 p=32
1 process per node& 00:17:04 & 00:08:32 & 00:04:16 & 00:02:08 & 00:01:04 & 00:00:32 \\
2 processes per node& 00:08:32 & 00:04:16 & 00:02:08 & 00:01:04 & 00:00:32 & 00:00:16 \\
4 processes per node& 00:04:16 & 00:02:08 & 00:01:04 & 00:00:32 & 00:00:16 & 00:00:08 \\
8 processes per node& 00:02:08 & 00:01:04 & 00:00:32 & 00:00:16 & 00:00:08 & 00:00:04 \\
N = 2048
p=1 p=2 p=4 p=8 p=16 p=32
1 process per node& 00:34:08 & 00:17:04 & 00:08:32 & 00:04:16 & 00:02:08 & 00:01:04 \\
2 processes per node& 00:17:04 & 00:08:32 & 00:04:16 & 00:02:08 & 00:01:04 & 00:00:32 \\
4 processes per node& 00:08:32 & 00:04:16 & 00:02:08 & 00:01:04 & 00:00:32 & 00:00:16 \\
8 processes per node& 00:04:16 & 00:02:08 & 00:01:04 & 00:00:32 & 00:00:16 & 00:00:08 \\
N = 4096
p=1 p=2 p=4 p=8 p=16 p=32
1 process per node& 01:08:16 & 00:34:08 & 00:17:04 & 00:08:32 & 00:04:16 & 00:02:08 \\
2 processes per node& 00:34:08 & 00:17:04 & 00:08:32 & 00:04:16 & 00:02:08 & 00:01:04 \\
4 processes per node& 00:17:04 & 00:08:32 & 00:04:16 & 00:02:08 & 00:01:04 & 00:00:32 \\
8 processes per node& 00:08:32 & 00:04:16 & 00:02:08 & 00:01:04 & --- & --- \\
[araim1@tara-fe1 studies]$
[araim1@tara-fe1 studies]$ pdflatex summary.tex This is pdfeTeX, Version 3.141592-1.21a-2.2 (Web2C 7.5.4) ... Output written on summary.pdf (1 page, 26486 bytes). Transcript written on summary.log. [araim1@tara-fe1 studies]$
Results for npn = 1
p=1 p=2 p=4 p=8 p=16 p=32 p=64 p=128 p=256
00:17:04 00:08:32 00:04:16 00:02:08 00:01:04 00:00:32 --- ---
00:34:08 00:17:04 00:08:32 00:04:16 00:02:08 00:01:04 --- ---
01:08:16 00:34:08 00:17:04 00:08:32 00:04:16 00:02:08 --- ---
Results for npn = 2
p=1 p=2 p=4 p=8 p=16 p=32 p=64 p=128 p=256
00:17:04 00:08:32 00:04:16 00:02:08 00:01:04 00:00:32 00:00:16 --- ---
00:34:08 00:17:04 00:08:32 00:04:16 00:02:08 00:01:04 00:00:32 --- ---
01:08:16 00:34:08 00:17:04 00:08:32 00:04:16 00:02:08 00:01:04 --- ---
Results for npn = 4
p=1 p=2 p=4 p=8 p=16 p=32 p=64 p=128 p=256
00:17:04 00:08:32 00:04:16 00:02:08 00:01:04 00:00:32 00:00:16 00:00:08 ---
00:34:08 00:17:04 00:08:32 00:04:16 00:02:08 00:01:04 00:00:32 00:00:16 ---
01:08:16 00:34:08 00:17:04 00:08:32 00:04:16 00:02:08 00:01:04 00:00:32 ---
Results for npn = 8
p=1 p=2 p=4 p=8 p=16 p=32 p=64 p=128 p=256
00:17:04 00:08:32 00:04:16 00:02:08 00:01:04 00:00:32 00:00:16 00:00:08 00:00:04
00:34:08 00:17:04 00:08:32 00:04:16 00:02:08 00:01:04 00:00:32 00:00:16 00:00:08
01:08:16 00:34:08 00:17:04 00:08:32 00:04:16 00:02:08 00:01:04 --- ---