To follow along, ensure you are logged into maya-usr2 and have the Intel MPI and MIC modules loaded.
This webpage is based on slides provided by Colfax International during a Developer Boot Camp on July 15, 2014 sponsored by Intel. For more information about programming on the Phi, see the Intel Developer Zone website.[khsa1@maya-usr2 hello_phi]$ module list Currently Loaded Modulefiles: 1) dot 8) intel/mic/runtime/3.3 2) matlab/r2014a 9) default-environment 3) comsol/4.4 10) intel-mpi/mic/4.1.3/049 4) gcc/4.8.2 5) slurm/14.03.6 6) texlive/2014 7) intel/mic/sdk/3.3
#include <stdio.h> #include <unistd.h> int main(int argc, char *argv[]){ printf("Hello world! I have %ld logical cores.\n", sysconf(_SC_NPROCESSORS_ONLN )); }
[khsa1@maya-usr2 hello_phi]$ icc hello_native.c -o hello_native -mmic
#!/bin/bash #SBATCH --job-name=hello_phi #SBATCH --output=slurm.out #SBATCH --error=slurm.err #SBATCH --partition=mic #SBATCH --nodes=1 #SBATCH --ntasks-per-node=1 #SBATCH --constraint=miccard mpiexec.hydra ./hello_native
[khsa1@maya-usr2 hello_phi]$ cat slurm.out Hello world! I have 240 logical cores.
#include <stdio.h> #include <unistd.h> #include <omp.h> int main(int argc, char *argv[]){ #pragma omp parallel { printf("Hello from thread %03d of %03d\n",omp_get_thread_num(), omp_get_num_threads()); } }
[khsa1@maya-usr2 hello_phi]$ icc -mmic -openmp -o hello_openmp hello_openmp.c
#!/bin/bash #SBATCH --job-name=hello_phi #SBATCH --output=slurm.out #SBATCH --error=slurm.err #SBATCH --partition=mic #SBATCH --exclusive #SBATCH --nodes=1 #SBATCH --ntasks-per-node=1 #SBATCH --constraint=miccard export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/cm/shared/apps/intel/composer_xe/current/compiler/lib/mic export MIC_OMP_NUM_THREADS=8 mpiexec.hydra ./hello_openmp
[khsa1@maya-usr2 hello_phi]$ cat slurm.out Hello from thread 000 of 008 Hello from thread 001 of 008 Hello from thread 002 of 008 Hello from thread 003 of 008 Hello from thread 004 of 008 Hello from thread 005 of 008 Hello from thread 006 of 008 Hello from thread 007 of 008
#include <stdio.h> #include <mpi.h> int main (int argc, char *argv[]) { int id, np; char processor_name[MPI_MAX_PROCESSOR_NAME]; int processor_name_len; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &np); MPI_Comm_rank(MPI_COMM_WORLD, &id); MPI_Get_processor_name(processor_name, &processor_name_len); printf("Hello world from process %03d out of %03d, processor name %s\n", id, np, processor_name); MPI_Finalize(); return 0; }
[khsa1@maya-usr2 hello_phi]$ mpiicc -mmic -o hello_mpi hello_mpi.c
#!/bin/bash #SBATCH --job-name=hello_phi #SBATCH --output=slurm.out #SBATCH --error=slurm.err #SBATCH --partition=mic #SBATCH --exclusive #SBATCH --nodes=1 #SBATCH --ntasks-per-node=1 #SBATCH --constraint=miccard unset I_MPI_FABRICS mpiexec.hydra -n 8 ./hello_mpi
[khsa1@maya-usr2 hello_phi]$ cat slurm.out Hello world from process 001 out of 008, processor name n51-mic0 Hello world from process 002 out of 008, processor name n51-mic0 Hello world from process 003 out of 008, processor name n51-mic0 Hello world from process 005 out of 008, processor name n51-mic0 Hello world from process 006 out of 008, processor name n51-mic0 Hello world from process 000 out of 008, processor name n51-mic0 Hello world from process 004 out of 008, processor name n51-mic0 Hello world from process 007 out of 008, processor name n51-mic0
[khsa1@maya-usr2 hello_phi]$ module list Currently Loaded Modulefiles: 1) dot 9) intel/mic/sdk/3.3 2) matlab/r2014a 10) intel/mic/runtime/3.3 3) comsol/4.4 11) default-environment 4) gcc/4.8.2 5) slurm/14.03.6 6) intel/compiler/64/14.0/2013_sp1.3.174 7) intel-mpi/64/4.1.3/049 8) texlive/2014
#include <stdio.h> #include "offload.h" int main(int argc, char * argv[] ) { printf("Hello World from CPU!\n"); #pragma offload target(mic) { #ifdef __MIC__ printf("Hello World from Phi!\n"); #else printf("Hello world from CPU (offload to Phi failed)."); #endif fflush(0); } }
[khsa1@maya-usr2 hello_phi]$ icc hello_offload.c -o hello_offload
#!/bin/bash #SBATCH --job-name=hello_phi #SBATCH --output=slurm.out #SBATCH --error=slurm.err #SBATCH --partition=batch #SBATCH --nodes=1 #SBATCH --ntasks-per-node=1 #SBATCH --constraint=mic_5110p srun ./hello_offload
[khsa1@maya-usr2 hello_phi]$ cat slurm.out Hello World from CPU! Hello World from Phi!
#include <stdio.h> #include <unistd.h> #include <mpi.h> int main(int argc, char * argv[] ) { int id, np, namelen, idleft, idright, micid; char name[128]; MPI_Comm comm; MPI_Init (&argc, &argv); MPI_Comm_size (MPI_COMM_WORLD, &np); MPI_Comm_rank (MPI_COMM_WORLD, &id); comm = MPI_COMM_WORLD; if (id % 2 == 0) micid = 0; else micid = 1; gethostname(name, sizeof name); printf("Hello World from rank %d on %s!\n", id, name); #pragma offload target(mic : micid) { gethostname(name, sizeof name); printf("Hello World from rank %d on %s\n", id, name); } MPI_Finalize(); }
[khsa1@maya-usr2 hello_phi]$ mpiicc -openmp hello_multioffload.c -o hello_multioffload
#!/bin/bash #SBATCH --job-name=hello_phi #SBATCH --output=slurm.out #SBATCH --error=slurm.err #SBATCH --partition=batch #SBATCH --nodes=1 #SBATCH --ntasks-per-node=2 #SBATCH --constraint=mic_5110p srun ./hello_multioffload
[khsa1@maya-usr2 hello_phi]$ cat slurm.out Hello World from rank 0 on n34! Hello World from rank 1 on n34! Hello World from rank 1 on n34-mic1 Hello World from rank 0 on n34-mic0
#define ALLOC alloc_if(1) free_if(0) #define FREE alloc_if(0) free_if(1) #define REUSE alloc_if(0) free_if(0) #include <stdio.h> #include "axpby.h" int main(int argc, char * argv[] ) { double *x, *y, *z1, *z2; double a, b; int i, n; n = 8; a = 2, b = 1; x = (double*) calloc (n, sizeof(double)); y = (double*) calloc (n, sizeof(double)); z1 = (double*) calloc (n, sizeof(double)); z2 = (double*) calloc (n, sizeof(double)); for(i = 0; i < n; i++) { x[i] = (double) i; y[i] = (double) i+1; } for(i = 0; i < n; i++) printf("x[%d]=%f\n", i, x[i]); printf("\n"); for(i = 0; i < n; i++) printf("y[%d]=%f\n", i, y[i]); printf("\n"); #pragma offload target(mic) inout(x,y,z1 : length(n)) { axpby1(z1, a, x, b, y, n); } #pragma offload target(mic) in(x, y, z2: length(n) ALLOC) {} axpby2(z2, a, x, b, y, n); #pragma offload target(mic) out(z2 : length(n) FREE) \ nocopy(x,y : length(n) FREE) {} for(i = 0; i < n; i++) printf("z1[%d] = %f\n", i, z1[i]); printf("\n"); for(i = 0; i < n; i++) printf("z2[%d] = %f\n", i, z2[i]); free(x); free(y); free(z1); free(z2); }
#include "axpby.h" __attribute__ ((target(mic))) void axpby1(double *z, double a, double *x, double b, double *y, int n) { #pragma omp parallel { int i; #pragma omp for for (i = 0; i < n; i++) z[i] = a*x[i] + b*y[i]; } } void axpby2(double *z, double a, double *x, double b, double *y, int n) { #pragma offload target(mic) in(x, y, z : length(0) REUSE) { #pragma omp parallel { int i; #pragma omp for for (i = 0; i < n; i++) z[i] = a*x[i] + b*y[i]; } } }
#include <stdio.h> #include <math.h> #include <omp.h> #include "offload.h" #define ALLOC alloc_if(1) free_if(0) #define FREE alloc_if(0) free_if(1) #define REUSE alloc_if(0) free_if(0) __attribute__ ((target(mic))) void axpby1(double *z, double a, double *x, double b, double *y, int n); void axpby2(double *z, double a, double *x, double b, double *y, int n);
axpby: main.o axpby.o icc -openmp main.o axpby.o -o axpby main.o: main.c icc -openmp -c -o main.o main.c axpby.o: axpby.c icc -openmp -c axpby.c -o axpby.o clean: -rm -f *.o axpby
#!/bin/bash #SBATCH --job-name=axpby #SBATCH --output=slurm.out #SBATCH --error=slurm.err #SBATCH --partition=batch #SBATCH --nodes=1 #SBATCH --ntasks-per-node=1 #SBATCH --constraint=mic_5110p export OFFLOAD_REPORT=2 srun ./axpby
[khsa1@maya-usr2 phi]$ cat slurm.out x[0]=0.000000 x[1]=1.000000 x[2]=2.000000 x[3]=3.000000 x[4]=4.000000 x[5]=5.000000 x[6]=6.000000 x[7]=7.000000 y[0]=1.000000 y[1]=2.000000 y[2]=3.000000 y[3]=4.000000 y[4]=5.000000 y[5]=6.000000 y[6]=7.000000 y[7]=8.000000 [Offload] [MIC 0] [File] main.c [Offload] [MIC 0] [Line] 32 [Offload] [MIC 0] [Tag] Tag 0 [Offload] [HOST] [Tag 0] [CPU Time] 0.796314(seconds) [Offload] [MIC 0] [Tag 0] [CPU->MIC Data] 212 (bytes) [Offload] [MIC 0] [Tag 0] [MIC Time] 0.248634(seconds) [Offload] [MIC 0] [Tag 0] [MIC->CPU Data] 192 (bytes) [Offload] [MIC 0] [File] main.c [Offload] [MIC 0] [Line] 37 [Offload] [MIC 0] [Tag] Tag 1 [Offload] [HOST] [Tag 1] [CPU Time] 0.004586(seconds) [Offload] [MIC 0] [Tag 1] [CPU->MIC Data] 192 (bytes) [Offload] [MIC 0] [Tag 1] [MIC Time] 0.000066(seconds) [Offload] [MIC 0] [Tag 1] [MIC->CPU Data] 24 (bytes) [Offload] [MIC 0] [File] axpby.c [Offload] [MIC 0] [Line] 14 [Offload] [MIC 0] [Tag] Tag 2 [Offload] [HOST] [Tag 2] [CPU Time] 0.000511(seconds) [Offload] [MIC 0] [Tag 2] [CPU->MIC Data] 44 (bytes) [Offload] [MIC 0] [Tag 2] [MIC Time] 0.000334(seconds) [Offload] [MIC 0] [Tag 2] [MIC->CPU Data] 20 (bytes) [Offload] [MIC 0] [File] main.c [Offload] [MIC 0] [Line] 42 [Offload] [MIC 0] [Tag] Tag 3 [Offload] [HOST] [Tag 3] [CPU Time] 0.011541(seconds) [Offload] [MIC 0] [Tag 3] [CPU->MIC Data] 48 (bytes) [Offload] [MIC 0] [Tag 3] [MIC Time] 0.000060(seconds) [Offload] [MIC 0] [Tag 3] [MIC->CPU Data] 64 (bytes) z1[0] = 1.000000 z1[1] = 4.000000 z1[2] = 7.000000 z1[3] = 10.000000 z1[4] = 13.000000 z1[5] = 16.000000 z1[6] = 19.000000 z1[7] = 22.000000 z2[0] = 1.000000 z2[1] = 4.000000 z2[2] = 7.000000 z2[3] = 10.000000 z2[4] = 13.000000 z2[5] = 16.000000 z2[6] = 19.000000 z2[7] = 22.000000
#include <stdio.h> #include <mpi.h> int main (int argc, char *argv[]) { int id, np; char processor_name[MPI_MAX_PROCESSOR_NAME]; int processor_name_len; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &np); MPI_Comm_rank(MPI_COMM_WORLD, &id); MPI_Get_processor_name(processor_name, &processor_name_len); printf("Hello world from process %03d out of %03d, processor name %s\n", id, np, processor_name); MPI_Finalize(); return 0; }
[khsa1@maya-usr2 hello_phi]$ mpiicc -mmic hello_mpi.c -o hello_mpi.MIC [khsa1@maya-usr2 hello_phi]$ mpiicc hello_mpi.c -o hello_mpi.XEON
#!/bin/bash #SBATCH --job-name=hello_phi #SBATCH --output=slurm.out #SBATCH --error=slurm.err #SBATCH --partition=batch #SBATCH --nodes=1 #SBATCH --constraint=mic_5110p mpirun -host mic0 -n 8 ./hello_offload.MIC : -host localhost -n 8 ./hello_offload.XEON
[khsa1@maya-usr2 hello_phi]$ cat slurm.out Hello world from process 008 out of 016, processor name n34 Hello world from process 009 out of 016, processor name n34 Hello world from process 010 out of 016, processor name n34 Hello world from process 011 out of 016, processor name n34 Hello world from process 012 out of 016, processor name n34 Hello world from process 013 out of 016, processor name n34 Hello world from process 014 out of 016, processor name n34 Hello world from process 015 out of 016, processor name n34 Hello world from process 000 out of 016, processor name n34-mic0 Hello world from process 001 out of 016, processor name n34-mic0 Hello world from process 002 out of 016, processor name n34-mic0 Hello world from process 003 out of 016, processor name n34-mic0 Hello world from process 004 out of 016, processor name n34-mic0 Hello world from process 005 out of 016, processor name n34-mic0 Hello world from process 006 out of 016, processor name n34-mic0 Hello world from process 007 out of 016, processor name n34-mic0