[araim1@tara-fe1 ~]$ wget http://www.umbc.edu/hpcf/code/scripting-case-study/scripting-case-study.tar.gz --2011-06-25 11:36:12-- http://www.umbc.edu/hpcf/code/scripting-case-study/scripting-case-study.tar.gz Resolving www.umbc.edu... 130.85.12.11 Connecting to www.umbc.edu|130.85.12.11|:80... connected. HTTP request sent, awaiting response... 200 OK Length: 3040 (3.0K) [application/x-tar] Saving to: `scripting-case-study.tar.gz' 100%[======================================>] 3,040 --.-K/s in 0s 2011-06-25 11:36:12 (108 MB/s) - `scripting-case-study.tar.gz' saved [3040/3040] [araim1@tara-fe1 ~]$
[araim1@tara-fe1 ~]$ tar xvzf scripting-case-study.tar.gz scripting-case-study/ scripting-case-study/studies/ scripting-case-study/studies/ppn.tex scripting-case-study/studies/create-study.bash scripting-case-study/studies/get-summary-table.bash scripting-case-study/studies/get-ppn-table-latex.bash scripting-case-study/studies/get-summary-table-latex.bash scripting-case-study/studies/summary.tex scripting-case-study/studies/summary.pdf scripting-case-study/studies/get-ppn-table.bash scripting-case-study/studies/ppn.pdf scripting-case-study/src/ scripting-case-study/src/Makefile scripting-case-study/src/utilities.c scripting-case-study/src/utilities.h scripting-case-study/src/report_time.c [araim1@tara-fe1 ~]$ cd scripting-case-study/ [araim1@tara-fe1 scripting-case-study]$ ls src studies [araim1@tara-fe1 scripting-case-study]$
[araim1@tara-fe1 ~]$ cat diag_time.dat 00:02:03 0.03 2.06 123.45 % HH:MM:SS=hours=minutes=seconds [araim1@tara-fe1 ~]$
[araim1@tara-fe1 scripting-case-study]$ cd src/ [araim1@tara-fe1 src]$ ls Makefile report_time.c utilities.c utilities.h [araim1@tara-fe1 src]$ make mpicc -g -O3 -c utilities.c -o utilities.o mpicc -g -O3 -c -o report_time.o report_time.c mpicc -g -O3 utilities.o report_time.o -o report_time -lm [araim1@tara-fe1 src]$ ls Makefile report_time report_time.c report_time.o utilities.c utilities.h utilities.o [araim1@tara-fe1 src]$
[araim1@tara-fe1 src]$ cd ../studies/ [araim1@tara-fe1 studies]$
[araim1@tara-fe1 studies]$ ls study_* study_n01024: n001ppn1 n001ppn4 n002ppn1 n002ppn4 n004ppn1 n004ppn4 n008ppn1 n008ppn4 n016ppn1 n016ppn4 n032ppn1 n032ppn4 n001ppn2 n001ppn8 n002ppn2 n002ppn8 n004ppn2 n004ppn8 n008ppn2 n008ppn8 n016ppn2 n016ppn8 n032ppn2 n032ppn8 study_n02048: n001ppn1 n001ppn4 n002ppn1 n002ppn4 n004ppn1 n004ppn4 n008ppn1 n008ppn4 n016ppn1 n016ppn4 n032ppn1 n032ppn4 n001ppn2 n001ppn8 n002ppn2 n002ppn8 n004ppn2 n004ppn8 n008ppn2 n008ppn8 n016ppn2 n016ppn8 n032ppn2 n032ppn8 study_n04096: n001ppn1 n001ppn4 n002ppn1 n002ppn4 n004ppn1 n004ppn4 n008ppn1 n008ppn4 n016ppn1 n016ppn4 n032ppn1 n032ppn4 n001ppn2 n001ppn8 n002ppn2 n002ppn8 n004ppn2 n004ppn8 n008ppn2 n008ppn8 n016ppn2 n016ppn8 n032ppn2 n032ppn8 [araim1@tara-fe1 studies]$
[araim1@tara-fe1 studies]$ ls -l study_n01024/n002ppn4/ total 20 lrwxrwxrwx 1 araim1 pi_nagaraj 44 Jun 25 11:01 report_time -> /home/araim1/scripting-case-study/src/report_time -rwxrwx--- 1 araim1 pi_nagaraj 282 Jun 25 11:01 run.slurm [araim1@tara-fe1 studies]$
[araim1@tara-fe1 studies]$ cat study_n01024/n002ppn4/run.slurm #!/bin/bash #SBATCH --job-name=test_study #SBATCH --output=slurm.out #SBATCH --error=slurm.err #SBATCH --partition=develop #SBATCH --nodes=2 #SBATCH --ntasks-per-node=4 srun ./report_time 1024 [araim1@tara-fe1 studies]$
#!/bin/bash EXECUTABLE='/home/araim1/scripting-case-study/src/report_time' # This function writes a SLURM script. We can call it with different parameter # settings to create different experiments function write_script { STUDY_NAME=$(printf 'study_n%05d' ${N}) DIR_NAME=$(printf '%s/n%03dppn%d' ${STUDY_NAME} ${NODES} ${NPERNODE}) if [ -d $DIR_NAME ] ; then echo "$DIR_NAME already exists, skipping..." return 0 else echo "Creating job $DIR_NAME" fi mkdir -p $DIR_NAME cat << _EOF_ > ${DIR_NAME}/run.slurm #!/bin/bash #SBATCH --job-name=test_study #SBATCH --output=slurm.out #SBATCH --error=slurm.err #SBATCH --partition=batch #SBATCH --nodes=${NODES} #SBATCH --ntasks-per-node=${NPERNODE} srun ./report_time ${N} _EOF_ chmod 775 ${DIR_NAME}/run.slurm ln -s ${EXECUTABLE} ${DIR_NAME}/ } # For each problem size, we'll run the experiment with 1, 2, 4, and 8 processors # on 1, 2, 4, ..., 32 nodes for N in 1024 2048 4096 do for NPERNODE in 1 2 4 8 do for NODES in 1 2 4 8 16 32 do write_script done done done
[araim1@tara-fe1 studies]$ ./create-study.bash Creating job study_n01024/n001ppn1 Creating job study_n01024/n002ppn1 Creating job study_n01024/n004ppn1 Creating job study_n01024/n008ppn1 Creating job study_n01024/n016ppn1 Creating job study_n01024/n032ppn1 Creating job study_n01024/n001ppn2 Creating job study_n01024/n002ppn2 Creating job study_n01024/n004ppn2 ... Creating job study_n04096/n008ppn8 Creating job study_n04096/n016ppn8 Creating job study_n04096/n032ppn8 [araim1@tara-fe1 studies]$
[araim1@tara-fe1 studies]$ for i in study_n*/n*ppn*; > do > cd $i; sbatch run.slurm; cd ../../; > done Submitted batch job 64989 Submitted batch job 64990 Submitted batch job 64991 ... [araim1@tara-fe1 studies]$
[araim1@tara-fe1 studies]$ for i in study_n01024/n032ppn8 study_n02048/n032ppn8 study_n04096/n032ppn8; > do > cd $i; sbatch run.slurm; cd ../../; > done Submitted batch job 64992 Submitted batch job 64993 Submitted batch job 64994 [araim1@tara-fe1 studies]$
[araim1@tara-fe1 studies]$ cat study_n01024/n001ppn1/diag_time.dat 00:17:04 0.28 17.07 1024.00 % HH:MM:SS=hours=minutes=seconds [araim1@tara-fe1 studies]$
N = 1024 p=1 p=2 p=4 p=8 p=16 p=32 1 process per node 00:17:04 00:08:32 00:04:16 00:02:08 00:01:04 00:00:32 2 processes per node 00:08:32 00:04:16 00:02:08 00:01:04 00:00:32 00:00:16 4 processes per node 00:04:16 00:02:08 00:01:04 00:00:32 00:00:16 00:00:08 8 processes per node 00:02:08 00:01:04 00:00:32 00:00:16 00:00:08 00:00:04 N = 2048 p=1 p=2 p=4 p=8 p=16 p=32 1 process per node 00:34:08 00:17:04 00:08:32 00:04:16 00:02:08 00:01:04 2 processes per node 00:17:04 00:08:32 00:04:16 00:02:08 00:01:04 00:00:32 4 processes per node 00:08:32 00:04:16 00:02:08 00:01:04 00:00:32 00:00:16 8 processes per node 00:04:16 00:02:08 00:01:04 00:00:32 00:00:16 00:00:08 N = 4096 p=1 p=2 p=4 p=8 p=16 p=32 1 process per node 01:08:16 00:34:08 00:17:04 00:08:32 00:04:16 00:02:08 2 processes per node 00:34:08 00:17:04 00:08:32 00:04:16 00:02:08 00:01:04 4 processes per node 00:17:04 00:08:32 00:04:16 00:02:08 00:01:04 00:00:32 8 processes per node 00:08:32 00:04:16 00:02:08 00:01:04 00:00:32 00:00:16
[araim1@tara-fe1 studies]$ cat study_n04096/n032ppn8/diag_time.dat 00:00:16 0.00 0.27 16.00 % HH:MM:SS=hours=minutes=seconds [araim1@tara-fe1 studies]$ gawk -F' ' '{ print $1 }' study_n04096/n032ppn8/diag_time.dat 00:00:16 [araim1@tara-fe1 studies]$ gawk -F' ' '{ print $2 }' study_n04096/n032ppn8/diag_time.dat 0.00 [araim1@tara-fe1 studies]$ gawk -F' ' '{ print $3 }' study_n04096/n032ppn8/diag_time.dat 0.27 [araim1@tara-fe1 studies]$ gawk -F' ' '{ print $4 }' study_n04096/n032ppn8/diag_time.dat 16.00 [araim1@tara-fe1 studies]$
#!/bin/bash write_result() { N=$1 NODES=$2 NPN=$3 FILENAME=$(printf 'study_n%05d/n%03dppn%d/diag_time.dat' $N $NODES $NPN) if [ -f $FILENAME ] ; then RESULT=$(gawk -F' ' '{ print $1 }' $FILENAME 2>/dev/null) printf ' %8s ' $RESULT else # If the file does not exist, write out a '---' printf ' %8s ' '---' fi } write_header() { printf '%20s' '' for i in $@ { printf '%10s ' $i } printf '\n' } for N in 1024 2048 4096 do echo "N = $N" write_header 'p=1' 'p=2' 'p=4' 'p=8' 'p=16' 'p=32' for NPERNODE in 1 2 4 8 do if [ $NPERNODE -eq 1 ] ; then printf '%d process per node' $NPERNODE else printf '%d processes per node' $NPERNODE fi for NODES in 1 2 4 8 16 32 do write_result $N $NODES $NPERNODE done printf '\n' done printf '\n' done
[araim1@tara-fe1 studies]$ ./get-summary-table.bash N = 1024 p=1 p=2 p=4 p=8 p=16 p=32 1 process per node 00:17:04 00:08:32 00:04:16 00:02:08 00:01:04 00:00:32 2 processes per node 00:08:32 00:04:16 00:02:08 00:01:04 00:00:32 00:00:16 4 processes per node 00:04:16 00:02:08 00:01:04 00:00:32 00:00:16 00:00:08 8 processes per node 00:02:08 00:01:04 00:00:32 00:00:16 00:00:08 00:00:04 N = 2048 p=1 p=2 p=4 p=8 p=16 p=32 1 process per node 00:34:08 00:17:04 00:08:32 00:04:16 00:02:08 00:01:04 2 processes per node 00:17:04 00:08:32 00:04:16 00:02:08 00:01:04 00:00:32 4 processes per node 00:08:32 00:04:16 00:02:08 00:01:04 00:00:32 00:00:16 8 processes per node 00:04:16 00:02:08 00:01:04 00:00:32 00:00:16 00:00:08 N = 4096 p=1 p=2 p=4 p=8 p=16 p=32 1 process per node 01:08:16 00:34:08 00:17:04 00:08:32 00:04:16 00:02:08 2 processes per node 00:34:08 00:17:04 00:08:32 00:04:16 00:02:08 00:01:04 4 processes per node 00:17:04 00:08:32 00:04:16 00:02:08 00:01:04 00:00:32 8 processes per node 00:08:32 00:04:16 00:02:08 00:01:04 --- --- [araim1@tara-fe1 studies]$
[araim1@tara-fe1 studies]$ ./get-summary-table-latex.bash N = 1024 p=1 p=2 p=4 p=8 p=16 p=32 1 process per node& 00:17:04 & 00:08:32 & 00:04:16 & 00:02:08 & 00:01:04 & 00:00:32 \\ 2 processes per node& 00:08:32 & 00:04:16 & 00:02:08 & 00:01:04 & 00:00:32 & 00:00:16 \\ 4 processes per node& 00:04:16 & 00:02:08 & 00:01:04 & 00:00:32 & 00:00:16 & 00:00:08 \\ 8 processes per node& 00:02:08 & 00:01:04 & 00:00:32 & 00:00:16 & 00:00:08 & 00:00:04 \\ N = 2048 p=1 p=2 p=4 p=8 p=16 p=32 1 process per node& 00:34:08 & 00:17:04 & 00:08:32 & 00:04:16 & 00:02:08 & 00:01:04 \\ 2 processes per node& 00:17:04 & 00:08:32 & 00:04:16 & 00:02:08 & 00:01:04 & 00:00:32 \\ 4 processes per node& 00:08:32 & 00:04:16 & 00:02:08 & 00:01:04 & 00:00:32 & 00:00:16 \\ 8 processes per node& 00:04:16 & 00:02:08 & 00:01:04 & 00:00:32 & 00:00:16 & 00:00:08 \\ N = 4096 p=1 p=2 p=4 p=8 p=16 p=32 1 process per node& 01:08:16 & 00:34:08 & 00:17:04 & 00:08:32 & 00:04:16 & 00:02:08 \\ 2 processes per node& 00:34:08 & 00:17:04 & 00:08:32 & 00:04:16 & 00:02:08 & 00:01:04 \\ 4 processes per node& 00:17:04 & 00:08:32 & 00:04:16 & 00:02:08 & 00:01:04 & 00:00:32 \\ 8 processes per node& 00:08:32 & 00:04:16 & 00:02:08 & 00:01:04 & --- & --- \\ [araim1@tara-fe1 studies]$
[araim1@tara-fe1 studies]$ pdflatex summary.tex This is pdfeTeX, Version 3.141592-1.21a-2.2 (Web2C 7.5.4) ... Output written on summary.pdf (1 page, 26486 bytes). Transcript written on summary.log. [araim1@tara-fe1 studies]$
Results for npn = 1 p=1 p=2 p=4 p=8 p=16 p=32 p=64 p=128 p=256 00:17:04 00:08:32 00:04:16 00:02:08 00:01:04 00:00:32 --- --- 00:34:08 00:17:04 00:08:32 00:04:16 00:02:08 00:01:04 --- --- 01:08:16 00:34:08 00:17:04 00:08:32 00:04:16 00:02:08 --- --- Results for npn = 2 p=1 p=2 p=4 p=8 p=16 p=32 p=64 p=128 p=256 00:17:04 00:08:32 00:04:16 00:02:08 00:01:04 00:00:32 00:00:16 --- --- 00:34:08 00:17:04 00:08:32 00:04:16 00:02:08 00:01:04 00:00:32 --- --- 01:08:16 00:34:08 00:17:04 00:08:32 00:04:16 00:02:08 00:01:04 --- --- Results for npn = 4 p=1 p=2 p=4 p=8 p=16 p=32 p=64 p=128 p=256 00:17:04 00:08:32 00:04:16 00:02:08 00:01:04 00:00:32 00:00:16 00:00:08 --- 00:34:08 00:17:04 00:08:32 00:04:16 00:02:08 00:01:04 00:00:32 00:00:16 --- 01:08:16 00:34:08 00:17:04 00:08:32 00:04:16 00:02:08 00:01:04 00:00:32 --- Results for npn = 8 p=1 p=2 p=4 p=8 p=16 p=32 p=64 p=128 p=256 00:17:04 00:08:32 00:04:16 00:02:08 00:01:04 00:00:32 00:00:16 00:00:08 00:00:04 00:34:08 00:17:04 00:08:32 00:04:16 00:02:08 00:01:04 00:00:32 00:00:16 00:00:08 01:08:16 00:34:08 00:17:04 00:08:32 00:04:16 00:02:08 00:01:04 --- ---