[araim1@tara-fe1 ~]$ squeue JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) 59512 develop test-job araim1 R 0:01 2 n[1-2] [araim1@tara-fe1 ~]$
[araim1@tara-fe1 ~]$ srun --jobid 59512 hostname n1 n2 [araim1@tara-fe1 ~]$
[araim1@tara-fe1 ~]$ srun --jobid 59512 ps u -u araim1 USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND araim1 12400 0.0 0.0 40748 1852 ? S 14:05 0:00 /usr/cluster/openmpi/1.3.3-p1/gcc/4/bin/orted ... araim1 12402 99.2 0.2 381676 50368 ? RLl 14:05 2:36 /usr/lib64/R/bin/exec/R --slave --no-restore ... araim1 12403 99.2 0.2 360664 51112 ? RLl 14:05 2:36 /usr/lib64/R/bin/exec/R --slave --no-restore ... araim1 12404 99.2 0.2 315640 49884 ? RLl 14:05 2:36 /usr/lib64/R/bin/exec/R --slave --no-restore ... araim1 12405 99.1 0.2 381180 49920 ? RLl 14:05 2:36 /usr/lib64/R/bin/exec/R --slave --no-restore ... araim1 12406 99.2 0.2 381340 50064 ? RLl 14:05 2:36 /usr/lib64/R/bin/exec/R --slave --no-restore ... araim1 12407 99.2 0.2 315640 49908 ? RLl 14:05 2:36 /usr/lib64/R/bin/exec/R --slave --no-restore ... araim1 12408 99.2 0.2 315804 50040 ? RLl 14:05 2:36 /usr/lib64/R/bin/exec/R --slave --no-restore ... araim1 12409 99.1 0.2 315804 50052 ? RLl 14:05 2:36 /usr/lib64/R/bin/exec/R --slave --no-restore ... araim1 12512 0.0 0.0 63760 920 ? R 14:08 0:00 /bin/ps u -u araim1 USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND araim1 31143 0.0 0.0 63868 1084 ? S 14:05 0:00 /bin/bash /var/spool/slurmd/job59512/slurm_script ... araim1 31145 0.0 0.0 49388 2080 ? S 14:05 0:00 mpirun -np 1 R --no-save araim1 31146 75.5 1.3 592456 324488 ? RLl 14:05 1:59 /usr/lib64/R/bin/exec/R --no-save araim1 31153 0.0 0.0 89188 4120 ? Sl 14:05 0:00 srun --nodes=1 --ntasks=1 --kill-on-bad-exit ... araim1 31157 78.7 0.2 315624 49892 ? RLl 14:05 2:04 /usr/lib64/R/bin/exec/R --slave --no-restore ... araim1 31158 99.2 0.2 315628 49888 ? RLl 14:05 2:36 /usr/lib64/R/bin/exec/R --slave --no-restore ... araim1 31159 59.7 0.2 315624 49900 ? RLl 14:05 1:34 /usr/lib64/R/bin/exec/R --slave --no-restore ... araim1 31160 99.1 0.2 315792 50040 ? RLl 14:05 2:36 /usr/lib64/R/bin/exec/R --slave --no-restore ... araim1 31161 86.0 0.2 315628 49892 ? RLl 14:05 2:15 /usr/lib64/R/bin/exec/R --slave --no-restore ... araim1 31162 99.2 0.2 360816 51252 ? RLl 14:05 2:36 /usr/lib64/R/bin/exec/R --slave --no-restore ... araim1 31163 98.2 0.2 381332 50064 ? RLl 14:05 2:35 /usr/lib64/R/bin/exec/R --slave --no-restore ... araim1 31165 99.2 0.2 381668 50352 ? RLl 14:05 2:36 /usr/lib64/R/bin/exec/R --slave --no-restore ... araim1 31274 0.0 0.0 63748 912 ? R 14:08 0:00 /bin/ps u -u araim1
#!/bin/bash HOSTNAME=$(hostname) TMP="cmd-$HOSTNAME-$$.out" echo "----------------- $HOSTNAME -------------------" > $TMP ps u -u $USER >> $TMP cat $TMP rm $TMP
[araim1@tara-fe1 ~]$ srun --jobid 59513 ./diagnostics.bash ----------------- n2 ------------------- USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND araim1 12400 0.0 0.0 40748 1852 ? S 14:05 0:00 /usr/cluster/openmpi/1.3.3-p1/gcc/4/bin/orted ... araim1 12402 99.2 0.2 381676 50368 ? RLl 14:05 2:36 /usr/lib64/R/bin/exec/R --slave --no-restore ... araim1 12403 99.2 0.2 360664 51112 ? RLl 14:05 2:36 /usr/lib64/R/bin/exec/R --slave --no-restore ... araim1 12404 99.2 0.2 315640 49884 ? RLl 14:05 2:36 /usr/lib64/R/bin/exec/R --slave --no-restore ... araim1 12405 99.1 0.2 381180 49920 ? RLl 14:05 2:36 /usr/lib64/R/bin/exec/R --slave --no-restore ... araim1 12406 99.2 0.2 381340 50064 ? RLl 14:05 2:36 /usr/lib64/R/bin/exec/R --slave --no-restore ... araim1 12407 99.2 0.2 315640 49908 ? RLl 14:05 2:36 /usr/lib64/R/bin/exec/R --slave --no-restore ... araim1 12408 99.2 0.2 315804 50040 ? RLl 14:05 2:36 /usr/lib64/R/bin/exec/R --slave --no-restore ... araim1 12409 99.1 0.2 315804 50052 ? RLl 14:05 2:36 /usr/lib64/R/bin/exec/R --slave --no-restore ... araim1 12512 0.0 0.0 63760 920 ? R 14:08 0:00 /bin/ps u -u araim1 ----------------- n1 ------------------- USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND araim1 31143 0.0 0.0 63868 1084 ? S 14:05 0:00 /bin/bash /var/spool/slurmd/job59512/slurm_script ... araim1 31145 0.0 0.0 49388 2080 ? S 14:05 0:00 mpirun -np 1 R --no-save araim1 31146 75.5 1.3 592456 324488 ? RLl 14:05 1:59 /usr/lib64/R/bin/exec/R --no-save araim1 31153 0.0 0.0 89188 4120 ? Sl 14:05 0:00 srun --nodes=1 --ntasks=1 --kill-on-bad-exit ... araim1 31157 78.7 0.2 315624 49892 ? RLl 14:05 2:04 /usr/lib64/R/bin/exec/R --slave --no-restore ... araim1 31158 99.2 0.2 315628 49888 ? RLl 14:05 2:36 /usr/lib64/R/bin/exec/R --slave --no-restore ... araim1 31159 59.7 0.2 315624 49900 ? RLl 14:05 1:34 /usr/lib64/R/bin/exec/R --slave --no-restore ... araim1 31160 99.1 0.2 315792 50040 ? RLl 14:05 2:36 /usr/lib64/R/bin/exec/R --slave --no-restore ... araim1 31161 86.0 0.2 315628 49892 ? RLl 14:05 2:15 /usr/lib64/R/bin/exec/R --slave --no-restore ... araim1 31162 99.2 0.2 360816 51252 ? RLl 14:05 2:36 /usr/lib64/R/bin/exec/R --slave --no-restore ... araim1 31163 98.2 0.2 381332 50064 ? RLl 14:05 2:35 /usr/lib64/R/bin/exec/R --slave --no-restore ... araim1 31165 99.2 0.2 381668 50352 ? RLl 14:05 2:36 /usr/lib64/R/bin/exec/R --slave --no-restore ... araim1 31274 0.0 0.0 63748 912 ? R 14:08 0:00 /bin/ps u -u araim1 [araim1@tara-fe1 ~]$