simd - using restrict qualifier with C99 variable length arrays (VLAs) -
i exploring how different implementations of simple loops in c99 auto-vectorize based upon function signature.
here code:
/* #define pragma_simd _pragma("simd") */ #define pragma_simd #ifdef __intel_compiler #define assume_aligned(a) __assume_aligned(a,64) #else #define assume_aligned(a) #endif #ifndef array_restrict #define array_restrict #endif void foo1(double * restrict a, const double * restrict b, const double * restrict c) { assume_aligned(a); assume_aligned(b); assume_aligned(c); pragma_simd (int = 0; < 2048; ++i) { if (c[i] > 0) { a[i] = b[i]; } else { a[i] = 0.0; } } } void foo2(double * restrict a, const double * restrict b, const double * restrict c) { assume_aligned(a); assume_aligned(b); assume_aligned(c); pragma_simd (int = 0; < 2048; ++i) { a[i] = ((c[i] > 0) ? b[i] : 0.0); } } /* undetermined size version */ void foo3(int n, double * restrict a, const double * restrict b, const double * restrict c) { assume_aligned(a); assume_aligned(b); assume_aligned(c); pragma_simd (int = 0; < n; ++i) { if (c[i] > 0) { a[i] = b[i]; } else { a[i] = 0.0; } } } void foo4(int n, double * restrict a, const double * restrict b, const double * restrict c) { assume_aligned(a); assume_aligned(b); assume_aligned(c); pragma_simd (int = 0; < n; ++i) { a[i] = ((c[i] > 0) ? b[i] : 0.0); } } /* static array versions */ void foo5(double array_restrict a[2048], const double array_restrict b[2048], const double array_restrict c[2048]) { assume_aligned(a); assume_aligned(b); assume_aligned(c); pragma_simd (int = 0; < 2048; ++i) { if (c[i] > 0) { a[i] = b[i]; } else { a[i] = 0.0; } } } void foo6(double array_restrict a[2048], const double array_restrict b[2048], const double array_restrict c[2048]) { assume_aligned(a); assume_aligned(b); assume_aligned(c); pragma_simd (int = 0; < 2048; ++i) { a[i] = ((c[i] > 0) ? b[i] : 0.0); } } /* vla versions */ void foo7(int n, double array_restrict a[n], const double array_restrict b[n], const double array_restrict c[n]) { assume_aligned(a); assume_aligned(b); assume_aligned(c); pragma_simd (int = 0; < n; ++i) { if (c[i] > 0) { a[i] = b[i]; } else { a[i] = 0.0; } } } void foo8(int n, double array_restrict a[n], const double array_restrict b[n], const double array_restrict c[n]) { assume_aligned(a); assume_aligned(b); assume_aligned(c); pragma_simd (int = 0; < n; ++i) { a[i] = ((c[i] > 0) ? b[i] : 0.0); } }
when compile with
$ icc -o3 -std=c99 -opt-report5 -mavx -s foo.c icc: remark #10397: optimization reports generated in *.optrpt files in output location
i see vla cases not auto-vectorized, when add flag assert no aliasing -fno-alias
, are. thus, conclude should prescribe in source, attempt compiling with
$ icc -o3 -std=c99 -opt-report5 -mavx -darray_restrict=restrict -s foo.c icc: remark #10397: optimization reports generated in *.optrpt files in output location
the compiler error output includes
foo.c(98): error: "restrict" not allowed void foo7(int n, double array_restrict a[n], const double array_restrict b[n], const double array_restrict c[n]) ^
but can see, restrict not allowed on vla arguments.
so question is: there no way assert no aliasing of vla in iso c?
note can assert no aliasing in source code using pragmas - e.g. simd
, omp simd
, ivdep
etc. - , auto-vectorization want these aren't iso c.
in context, iso c means recent version of c, of course c11 of writing of post.
your original code fails nicely me messages such as:
void foo7(int n, double array_restrict a[n], const double array_restrict b[n], const double array_restrict c[n]) ^ restrict.c:126:1: error: invalid use of ‘restrict’ restrict.c:126:1: error: invalid use of ‘restrict’ restrict.c:145:1: error: invalid use of ‘restrict’
transferring selected parts of comments
§6.7.6.3 function declarators (including prototypes) has example 5 says following function prototype declarators equivalent:
void f(double (* restrict a)[5]); void f(double a[restrict][5]); void f(double a[restrict 3][5]); void f(double a[restrict static 3][5]);
this place in standard restrict appears associated directly array types. §6.7.6 on declarators generally, , §6.7.6.2 on array declarators, , looks me though restrict has appear inside first component of array dimension. in context, should be:
void foo7(int n, double a[array_restrict n], const double b[array_restrict n], const double c[array_restrict n])
i wouldn't have believed notation without seeing examples in standard , asking question! note applies arrays vlas.
this revised code, based on commentary, compiles cleanly under same compilation options:
gcc -g -o3 -std=c11 -wall -wextra -wmissing-prototypes -wstrict-prototypes \ -wold-style-definition -wold-style-declaration -werror -c new.restrict.c
the compilation options demand prior declarations of non-static functions, hence declarations @ top of file. forced #define array_restrict restrict
in source, rather leaving compilation option.
the compiler gcc 4.9.2 running on ubuntu 14.04 derivative.
file
new.restrict.c
:
/* #define pragma_simd _pragma("simd") */ #define pragma_simd #ifdef __intel_compiler #define assume_aligned(a) __assume_aligned(a, 64) #else #define assume_aligned(a) #endif #define array_restrict restrict #ifndef array_restrict #define array_restrict #endif void foo1(double *restrict a, const double *restrict b, const double *restrict c); void foo2(double *restrict a, const double *restrict b, const double *restrict c); void foo3(int n, double *restrict a, const double *restrict b, const double *restrict c); void foo4(int n, double *restrict a, const double *restrict b, const double *restrict c); void foo5(double a[array_restrict 2048], const double b[array_restrict 2048], const double c[array_restrict 2048]); void foo6(double a[array_restrict 2048], const double b[array_restrict 2048], const double c[array_restrict 2048]); void foo7(int n, double a[array_restrict n], const double b[array_restrict n], const double c[array_restrict n]); void foo8(int n, double a[array_restrict n], const double b[array_restrict n], const double c[array_restrict n]); void foo1(double *restrict a, const double *restrict b, const double *restrict c) { assume_aligned(a); assume_aligned(b); assume_aligned(c); pragma_simd (int = 0; < 2048; ++i) { if (c[i] > 0) { a[i] = b[i]; } else { a[i] = 0.0; } } } void foo2(double *restrict a, const double *restrict b, const double *restrict c) { assume_aligned(a); assume_aligned(b); assume_aligned(c); pragma_simd (int = 0; < 2048; ++i) { a[i] = ((c[i] > 0) ? b[i] : 0.0); } } /* undetermined size version */ void foo3(int n, double *restrict a, const double *restrict b, const double *restrict c) { assume_aligned(a); assume_aligned(b); assume_aligned(c); pragma_simd (int = 0; < n; ++i) { if (c[i] > 0) { a[i] = b[i]; } else { a[i] = 0.0; } } } void foo4(int n, double *restrict a, const double *restrict b, const double *restrict c) { assume_aligned(a); assume_aligned(b); assume_aligned(c); pragma_simd (int = 0; < n; ++i) { a[i] = ((c[i] > 0) ? b[i] : 0.0); } } /* static array versions */ void foo5(double a[array_restrict 2048], const double b[array_restrict 2048], const double c[array_restrict 2048]) { assume_aligned(a); assume_aligned(b); assume_aligned(c); pragma_simd (int = 0; < 2048; ++i) { if (c[i] > 0) { a[i] = b[i]; } else { a[i] = 0.0; } } } void foo6(double a[array_restrict 2048], const double b[array_restrict 2048], const double c[array_restrict 2048]) { assume_aligned(a); assume_aligned(b); assume_aligned(c); pragma_simd (int = 0; < 2048; ++i) { a[i] = ((c[i] > 0) ? b[i] : 0.0); } } /* vla versions */ void foo7(int n, double a[array_restrict n], const double b[array_restrict n], const double c[array_restrict n]) { assume_aligned(a); assume_aligned(b); assume_aligned(c); pragma_simd (int = 0; < n; ++i) { if (c[i] > 0) { a[i] = b[i]; } else { a[i] = 0.0; } } } void foo8(int n, double a[array_restrict n], const double b[array_restrict n], const double c[array_restrict n]) { assume_aligned(a); assume_aligned(b); assume_aligned(c); pragma_simd (int = 0; < n; ++i) { a[i] = ((c[i] > 0) ? b[i] : 0.0); } }
Comments
Post a Comment