For more information about the BLAS routines in ACML, see
#include <stdio.h> #include <acml.h> #define MATRIX_IDX(n, i, j) j*n + i #define MATRIX_ELEMENT(A, m, n, i, j) A[ MATRIX_IDX(m, i, j) ] void init_matrix(double* A, int m, int n) { double element = 1.0; for (int j = 0; j < n; j++) { for (int i = 0; i < m; i++) { MATRIX_ELEMENT(A, m, n, i, j) = element; element *= 0.9; } } } void print_matrix(const double* A, int m, int n) { for (int i = 0; i < m; i++) { for (int j = 0; j < n; j++) { printf("%8.4f", MATRIX_ELEMENT(A, m, n, i, j)); } printf("\n"); } } int main(int argc, char** argv) { int m = 3; int n = 4; int k = 5; double A[m * k]; double B[k * n]; double C[m * n]; init_matrix(A, m, k); init_matrix(B, k, n); printf("Matrix A (%d x %d) is:\n", m, k); print_matrix(A, m, k); printf("\nMatrix B (%d x %d) is:\n", k, n); print_matrix(B, k, n); dgemm('N', 'N', m, n, k, 1.0, A, m, B, k, 0.0, C, m); printf("\nMatrix C (%d x %d) = AB is:\n", m, n); print_matrix(C, m, n); return 0; }
dgemm('N', 'N', m, n, k, 1.0, A, m, B, k, 0.0, C, m);
C := alpha*op( A )*op( B ) + beta*C,
which allows us to easily accumulate results into C if we wish. In this example we have taken the constant alpha to be 1.0 and the constant beta to be 0.0. Notice that we did not have to initialize C's entries to zeros for this reason.
Next is the Makefile. The important part is that we need to link to two more libraries: libacml and libpgftnrtl. This is accomplished by adding them to the LDFLAGS variable.
EXECUTABLE := matrix_multiply OBJS := main.o CFLAGS := -O3 -c99 -Minform=warn -fastsse INCLUDES := LIBLOCS := LDFLAGS := -lm -lacml -lpgftnrtl CC := mpicc $(INCLUDES) %.o: %.c %.h $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) -c $< -o $@ $(EXECUTABLE): $(OBJS) $(CC) $(CFLAGS) $(DEFS) $(INCLUDES) $(OBJS) -o $@ $(LIBLOCS) $(LDFLAGS) clean: -rm -f *.o $(EXECUTABLE)
[araim1@tara-fe1 pgi_acml]$ make mpicc -O3 -c99 -Minform=warn -fastsse -c -o main.o main.c mpicc -O3 -c99 -Minform=warn -fastsse main.o -o matrix_multiply -lm -lacml -lpgftnrtl [araim1@tara-fe1 pgi_acml]$ ls main.c main.o Makefile matrix_multiply [araim1@tara-fe1 pgi_acml]$
[araim1@tara-fe1 pgi_acml]$ ./matrix_multiply Matrix A (3 x 5) is: 1.0000 0.7290 0.5314 0.3874 0.2824 0.9000 0.6561 0.4783 0.3487 0.2542 0.8100 0.5905 0.4305 0.3138 0.2288 Matrix B (5 x 4) is: 1.0000 0.5905 0.3487 0.2059 0.9000 0.5314 0.3138 0.1853 0.8100 0.4783 0.2824 0.1668 0.7290 0.4305 0.2542 0.1501 0.6561 0.3874 0.2288 0.1351 Matrix C (3 x 4) = AB is: 2.5543 1.5083 0.8906 0.5259 2.2989 1.3575 0.8016 0.4733 2.0690 1.2217 0.7214 0.4260 [araim1@tara-fe1 pgi_acml]$
#!/bin/bash #SBATCH --job-name=matrix_multiply #SBATCH --output=slurm.out #SBATCH --error=slurm.err #SBATCH --partition=develop #SBATCH --nodes=1 #SBATCH --ntasks-per-node=1 ./matrix_multiply