Strumenti Utente

Strumenti Sito


magistraleinformaticanetworking:spm:samplevvcode

Vectorizing compiler (sample code)

This is the code for matrix multiplication we used during the lesson. To be compiled with the -O3 flag (Gnu compiler suite). Vectorization details can be obtained with the -ftree-vectorizer-verbose=NN with NN being 1 to 9 (see Gnu gcc/g++ compiler manual)

mm.c
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <time.h>
 
float a[N][N];
float b[N][N];
float c[N][N];
 
struct timespec diff(struct timespec t0, struct timespec t1);
 
int main(int argc, char * argv[]) {
 
  // timer resolution
  struct timespec res; 
  struct timespec t0, t1; 
 
  clock_getres(CLOCCHE, &res);
  printf("Clock resolution is %ld nsec\n", res.tv_nsec);
 
  // init matrixes
  srand(getpid());
  for(int i=0; i<N; i++) 
    for(int j=0; j<N; j++) {
      a[i][j]=rand();
      b[i][j]=rand();
      c[i][j]=0.0;
    }
 
  clock_gettime(CLOCCHE,&t0);
  for(int i=0; i<N; i++) 
    for(int j=0; j<N; j++) 
      for(int k=0; k<N; k++)
        c[i][j]+=a[i][k]*b[k][j];
  clock_gettime(CLOCCHE,&t1);
  diff(t0,t1);
 
  float sum = 0.0;
  for(int i=0; i<N; i++) 
    for(int j=0; j<N; j++) 
      sum+=c[i][j];
 
  printf("---> %d <---\n", ((int) sum)%2);
  return(0); 
}
 
timespec diff(timespec start, timespec end)
{
	timespec temp;
	if ((end.tv_nsec-start.tv_nsec)<0) {
		temp.tv_sec = end.tv_sec-start.tv_sec-1;
		temp.tv_nsec = 1000000000+end.tv_nsec-start.tv_nsec;
	} else {
		temp.tv_sec = end.tv_sec-start.tv_sec;
		temp.tv_nsec = end.tv_nsec-start.tv_nsec;
	}
        printf("Time elapsed: %ld sec, %ld usec\n", temp.tv_sec, temp.tv_nsec/1000);
	return temp;
}
mmkj.c
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <time.h>
 
float a[N][N];
float b[N][N];
float c[N][N];
 
struct timespec diff(struct timespec t0, struct timespec t1);
 
int main(int argc, char * argv[]) {
 
  // timer resolution
  struct timespec res; 
  struct timespec t0, t1; 
 
  clock_getres(CLOCCHE, &res);
  printf("Clock resolution is %ld nsec\n", res.tv_nsec);
 
  // init matrixes
  srand(getpid());
  for(int i=0; i<N; i++) 
    for(int j=0; j<N; j++) {
      a[i][j]=rand();
      b[i][j]=rand();
      c[i][j]=0.0;
    }
 
  clock_gettime(CLOCCHE,&t0);
  for(int i=0; i<N; i++) 
    for(int k=0; k<N; k++) {
      float aik = a[i][k];
      for(int j=0; j<N; j++) 
        c[i][j] += aik*b[k][j];
    }
  clock_gettime(CLOCCHE,&t1);
  diff(t0,t1);
 
  float sum = 0.0;
  for(int i=0; i<N; i++) 
    for(int j=0; j<N; j++) 
      sum+=c[i][j];
 
  printf("---> %d <---\n", ((int) sum)%2);
  return(0); 
}
 
timespec diff(timespec start, timespec end)
{
	timespec temp;
	if ((end.tv_nsec-start.tv_nsec)<0) {
		temp.tv_sec = end.tv_sec-start.tv_sec-1;
		temp.tv_nsec = 1000000000+end.tv_nsec-start.tv_nsec;
	} else {
		temp.tv_sec = end.tv_sec-start.tv_sec;
		temp.tv_nsec = end.tv_nsec-start.tv_nsec;
	}
        printf("Time elapsed: %ld sec, %ld usec\n", temp.tv_sec, temp.tv_nsec/1000);
	return temp;
}
CC = g++
CFLAGS = -DN=128 -DCLOCCHE=CLOCK_THREAD_CPUTIME_ID 
LDFLAGS = -lrt
OBJS = mm mmo3 mmkj mmkjo3
 
all:	mm mmo3 mmkj mmkjo3
 
mm:	mm.c
	$(CC) $(CFLAGS) mm.c -o mm $(LDFLAGS) -ftree-vectorizer-verbose=2
mmo3:	mm.c
	$(CC) $(CFLAGS) mm.c -o mmo3 $(LDFLAGS) -ftree-vectorizer-verbose=2 -O3
mmkj:	mm.c
	$(CC) $(CFLAGS) mmkj.c -o mmkj $(LDFLAGS) -ftree-vectorizer-verbose=2 
mmkjo3:	mm.c
	$(CC) $(CFLAGS) mmkj.c -o mmkjo3 $(LDFLAGS) -ftree-vectorizer-verbose=2 -O3
magistraleinformaticanetworking/spm/samplevvcode.txt · Ultima modifica: 11/11/2013 alle 15:41 (6 anni fa) da Marco Danelutto