simd - using restrict qualifier with C99 variable length arrays (VLAs) -


i exploring how different implementations of simple loops in c99 auto-vectorize based upon function signature.

here code:

/* #define pragma_simd _pragma("simd") */ #define pragma_simd  #ifdef __intel_compiler #define assume_aligned(a) __assume_aligned(a,64) #else #define assume_aligned(a) #endif  #ifndef array_restrict #define array_restrict #endif  void foo1(double * restrict a, const double * restrict b, const double * restrict c)  {      assume_aligned(a);     assume_aligned(b);     assume_aligned(c);     pragma_simd     (int = 0; < 2048; ++i) {         if (c[i] > 0) {             a[i] = b[i];         } else {             a[i] = 0.0;         }      } }  void foo2(double * restrict a, const double * restrict b, const double * restrict c)  {      assume_aligned(a);     assume_aligned(b);     assume_aligned(c);     pragma_simd     (int = 0; < 2048; ++i) {         a[i] = ((c[i] > 0) ? b[i] : 0.0);     } }  /* undetermined size version */  void foo3(int n, double * restrict a, const double * restrict b, const double * restrict c)  {      assume_aligned(a);     assume_aligned(b);     assume_aligned(c);     pragma_simd     (int = 0; < n; ++i) {         if (c[i] > 0) {             a[i] = b[i];         } else {             a[i] = 0.0;         }      } }  void foo4(int n, double * restrict a, const double * restrict b, const double * restrict c)  {      assume_aligned(a);     assume_aligned(b);     assume_aligned(c);     pragma_simd     (int = 0; < n; ++i) {         a[i] = ((c[i] > 0) ? b[i] : 0.0);     } }  /* static array versions */  void foo5(double array_restrict a[2048], const double array_restrict b[2048], const double array_restrict c[2048])  {      assume_aligned(a);     assume_aligned(b);     assume_aligned(c);     pragma_simd     (int = 0; < 2048; ++i) {         if (c[i] > 0) {             a[i] = b[i];         } else {             a[i] = 0.0;         }      } }  void foo6(double array_restrict a[2048], const double array_restrict b[2048], const double array_restrict c[2048])  {      assume_aligned(a);     assume_aligned(b);     assume_aligned(c);     pragma_simd     (int = 0; < 2048; ++i) {         a[i] = ((c[i] > 0) ? b[i] : 0.0);     } }  /* vla versions */  void foo7(int n, double array_restrict a[n], const double array_restrict b[n], const double array_restrict c[n])  {      assume_aligned(a);     assume_aligned(b);     assume_aligned(c);     pragma_simd     (int = 0; < n; ++i) {         if (c[i] > 0) {             a[i] = b[i];         } else {             a[i] = 0.0;         }      } }  void foo8(int n, double array_restrict a[n], const double array_restrict b[n], const double array_restrict c[n])  {      assume_aligned(a);     assume_aligned(b);     assume_aligned(c);     pragma_simd     (int = 0; < n; ++i) {         a[i] = ((c[i] > 0) ? b[i] : 0.0);     } } 

when compile with

$ icc -o3 -std=c99 -opt-report5 -mavx -s foo.c  icc: remark #10397: optimization reports generated in *.optrpt files in output location 

i see vla cases not auto-vectorized, when add flag assert no aliasing -fno-alias, are. thus, conclude should prescribe in source, attempt compiling with

$ icc -o3 -std=c99 -opt-report5 -mavx -darray_restrict=restrict -s foo.c  icc: remark #10397: optimization reports generated in *.optrpt files in output location 

the compiler error output includes

foo.c(98): error: "restrict" not allowed void foo7(int n, double array_restrict a[n], const double array_restrict b[n],  const double array_restrict c[n])                ^ 

but can see, restrict not allowed on vla arguments.

so question is: there no way assert no aliasing of vla in iso c?

note can assert no aliasing in source code using pragmas - e.g. simd, omp simd, ivdep etc. - , auto-vectorization want these aren't iso c.

in context, iso c means recent version of c, of course c11 of writing of post.

your original code fails nicely me messages such as:

 void foo7(int n, double array_restrict a[n], const double array_restrict b[n], const double array_restrict c[n])  ^ restrict.c:126:1: error: invalid use of ‘restrict’ restrict.c:126:1: error: invalid use of ‘restrict’ restrict.c:145:1: error: invalid use of ‘restrict’ 

transferring selected parts of comments

§6.7.6.3 function declarators (including prototypes) has example 5 says following function prototype declarators equivalent:

void f(double (* restrict a)[5]); void f(double a[restrict][5]); void f(double a[restrict 3][5]); void f(double a[restrict static 3][5]); 

this place in standard restrict appears associated directly array types. §6.7.6 on declarators generally, , §6.7.6.2 on array declarators, , looks me though restrict has appear inside first component of array dimension. in context, should be:

void foo7(int n, double a[array_restrict n],            const double b[array_restrict n],            const double c[array_restrict n]) 

i wouldn't have believed notation without seeing examples in standard , asking question! note applies arrays vlas.

this revised code, based on commentary, compiles cleanly under same compilation options:

gcc -g -o3 -std=c11 -wall -wextra -wmissing-prototypes -wstrict-prototypes \     -wold-style-definition -wold-style-declaration -werror -c new.restrict.c 

the compilation options demand prior declarations of non-static functions, hence declarations @ top of file. forced #define array_restrict restrict in source, rather leaving compilation option.

the compiler gcc 4.9.2 running on ubuntu 14.04 derivative.

file new.restrict.c:

/* #define pragma_simd _pragma("simd") */ #define pragma_simd  #ifdef __intel_compiler #define assume_aligned(a) __assume_aligned(a, 64) #else #define assume_aligned(a) #endif  #define array_restrict restrict  #ifndef array_restrict #define array_restrict #endif  void foo1(double *restrict a, const double *restrict b, const double *restrict c); void foo2(double *restrict a, const double *restrict b, const double *restrict c); void foo3(int n, double *restrict a, const double *restrict b, const double *restrict c); void foo4(int n, double *restrict a, const double *restrict b, const double *restrict c); void foo5(double a[array_restrict 2048], const double b[array_restrict 2048], const double c[array_restrict 2048]); void foo6(double a[array_restrict 2048], const double b[array_restrict 2048], const double c[array_restrict 2048]); void foo7(int n, double a[array_restrict n], const double b[array_restrict n], const double c[array_restrict n]); void foo8(int n, double a[array_restrict n], const double b[array_restrict n], const double c[array_restrict n]);  void foo1(double *restrict a, const double *restrict b, const double *restrict c) {     assume_aligned(a);     assume_aligned(b);     assume_aligned(c);     pragma_simd     (int = 0; < 2048; ++i)     {         if (c[i] > 0)         {             a[i] = b[i];         }         else         {             a[i] = 0.0;         }     } }  void foo2(double *restrict a, const double *restrict b, const double *restrict c) {     assume_aligned(a);     assume_aligned(b);     assume_aligned(c);     pragma_simd     (int = 0; < 2048; ++i)     {         a[i] = ((c[i] > 0) ? b[i] : 0.0);     } }  /* undetermined size version */  void foo3(int n, double *restrict a, const double *restrict b, const double *restrict c) {     assume_aligned(a);     assume_aligned(b);     assume_aligned(c);     pragma_simd     (int = 0; < n; ++i)     {         if (c[i] > 0)         {             a[i] = b[i];         }         else         {             a[i] = 0.0;         }     } }  void foo4(int n, double *restrict a, const double *restrict b, const double *restrict c) {     assume_aligned(a);     assume_aligned(b);     assume_aligned(c);     pragma_simd     (int = 0; < n; ++i)     {         a[i] = ((c[i] > 0) ? b[i] : 0.0);     } }  /* static array versions */  void foo5(double a[array_restrict 2048], const double b[array_restrict 2048], const double c[array_restrict 2048]) {     assume_aligned(a);     assume_aligned(b);     assume_aligned(c);     pragma_simd     (int = 0; < 2048; ++i)     {         if (c[i] > 0)         {             a[i] = b[i];         }         else         {             a[i] = 0.0;         }     } }  void foo6(double a[array_restrict 2048], const double b[array_restrict 2048], const double c[array_restrict 2048]) {     assume_aligned(a);     assume_aligned(b);     assume_aligned(c);     pragma_simd     (int = 0; < 2048; ++i)     {         a[i] = ((c[i] > 0) ? b[i] : 0.0);     } }  /* vla versions */  void foo7(int n, double a[array_restrict n], const double b[array_restrict n], const double c[array_restrict n]) {     assume_aligned(a);     assume_aligned(b);     assume_aligned(c);     pragma_simd     (int = 0; < n; ++i)     {         if (c[i] > 0)         {             a[i] = b[i];         }         else         {             a[i] = 0.0;         }     } }  void foo8(int n, double a[array_restrict n], const double b[array_restrict n], const double c[array_restrict n]) {     assume_aligned(a);     assume_aligned(b);     assume_aligned(c);     pragma_simd     (int = 0; < n; ++i)     {         a[i] = ((c[i] > 0) ? b[i] : 0.0);     } } 

Comments

Popular posts from this blog

node.js - Mongoose: Cast to ObjectId failed for value on newly created object after setting the value -

[C++][SFML 2.2] Strange Performance Issues - Moving Mouse Lowers CPU Usage -

ios - Possible to get UIButton sizeThatFits to work? -