#include #define NUM_BLOCKS 2 #define BLOCK_WIDTH 16 __global__ void hello() { printf("Hello world! I'm thread %d in block %d\n", threadIdx.x, blockIdx.x); } int main() { // launch the kernel hello<<>>(); // force the printf()s to flush cudaDeviceSynchronize(); printf("That's all!\n"); return 0; }