blob: b61aebd47ef4e35ad53dbebe20371e31506fbf2d [file] [log] [blame] [edit]
// LLVM 14 itself has a bug compiling eigen, even the original code without AD
// RUN: if [ %llvmver -ne 14 ] && [ %llvmver -ne 15 ]; then %clang++ -I/usr/include/eigen3 -Xclang -new-struct-path-tbaa -mllvm -force-vector-width=1 -ffast-math -fno-unroll-loops -fno-vectorize -fno-slp-vectorize -fno-exceptions -O3 %s -S -emit-llvm -o - | %opt - %OPloadEnzyme %enzyme -S | %lli - ; fi
// RUN: if [ %llvmver -ne 14 ] && [ %llvmver -ne 15 ]; then %clang++ -I/usr/include/eigen3 -Xclang -new-struct-path-tbaa -fno-unroll-loops -fno-vectorize -fno-slp-vectorize -fno-exceptions -O2 %s -S -emit-llvm -o - | %opt - %OPloadEnzyme %enzyme -S | %lli - ; fi
// RUN: if [ %llvmver -ne 14 ] && [ %llvmver -ne 15 ]; then %clang++ -I/usr/include/eigen3 -Xclang -new-struct-path-tbaa -fno-unroll-loops -fno-vectorize -fno-slp-vectorize -fno-exceptions -O1 %s -S -emit-llvm -o - | %opt - %OPloadEnzyme %enzyme -S | %lli - ; fi
// RUN: if [ %llvmver -ne 14 ] && [ %llvmver -ne 15 ]; then %clang++ -I/usr/include/eigen3 -Xclang -new-struct-path-tbaa -fno-unroll-loops -fno-vectorize -fno-slp-vectorize -fno-exceptions %O0TBAA %s -S -emit-llvm -o - | %opt - %OPloadEnzyme %enzyme -S | %lli - ; fi
// RUN: if [ %llvmver -ne 14 ] && [ %llvmver -ne 15 ]; then %clang++ -I/usr/include/eigen3 -Xclang -new-struct-path-tbaa -fno-unroll-loops -fno-vectorize -fno-slp-vectorize -fno-exceptions -O3 %s -S -emit-llvm -o - | %opt - %OPloadEnzyme %enzyme -enzyme-inline=1 -S | %lli -; fi
// RUN: if [ %llvmver -ne 14 ] && [ %llvmver -ne 15 ]; then %clang++ -I/usr/include/eigen3 -Xclang -new-struct-path-tbaa -fno-unroll-loops -fno-vectorize -fno-slp-vectorize -fno-exceptions -O2 %s -S -emit-llvm -o - | %opt - %OPloadEnzyme %enzyme -enzyme-inline=1 -S | %lli -; fi
// RUN: if [ %llvmver -ne 14 ] && [ %llvmver -ne 15 ]; then clang++ -I/usr/include/eigen3 -Xclang -new-struct-path-tbaa -fno-unroll-loops -fno-vectorize -fno-slp-vectorize -fno-exceptions -O1 %s -S -emit-llvm -o - | %opt - %OPloadEnzyme %enzyme -enzyme-inline=1 -S | %lli - ; fi
// RUN: if [ %llvmver -ne 14 ] && [ %llvmver -ne 15 ]; then %clang++ -I/usr/include/eigen3 -Xclang -new-struct-path-tbaa -fno-unroll-loops -fno-vectorize -fno-slp-vectorize -fno-exceptions %O0TBAA %s -S -emit-llvm -o - | %opt - %OPloadEnzyme %enzyme -enzyme-inline=1 -S | %lli - ; fi
#define EIGEN_NO_AUTOMATIC_RESIZING 1
#define EIGEN_DONT_ALIGN 1
#define EIGEN_NO_DEBUG 1
#define EIGEN_UNROLLING_LIMIT 0
#define EIGEN_DONT_VECTORIZE 1
#include "../test_utils.h"
/*
void memcpy(float* __restrict dst, float* __restrict src, size_t count) {
for(size_t i=0; i<count/sizeof(float); i++) {
dst[i] = src[i];
}
}
void memcpy(double* __restrict dst, double* __restrict src, size_t count) {
for(size_t i=0; i<count/sizeof(double); i++) {
dst[i] = src[i];
}
}
template<typename T>
void memcpy(T* __restrict dst, T* __restrict src, size_t count) {
for(size_t i=0; i<count/sizeof(T); i++) {
dst[i] = src[i];
}
}*/
#include <eigen3/Eigen/Dense>
#include <eigen3/unsupported/Eigen/CXX11/Tensor>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <stdlib.h>
#include <math.h>
#include <inttypes.h>
#include <stdlib.h>
#include <string.h>
using Eigen::MatrixXd;
using Eigen::Matrix;
using Eigen::Tensor;
constexpr size_t IN = 4, OUT = 4, NUM = 5;
namespace Eigen {
namespace internal {
template<> struct smart_copy_helper<float, true> {
EIGEN_DEVICE_FUNC static inline void run(const float* start, const float* end, float* target) {
for(unsigned i=0; start+i != end; i++) {
target[i] = start[i];
}
}
};
};
};
extern "C" {
extern double __enzyme_autodiff(void*, const Tensor<float, 2>* __restrict K, const Tensor<float, 2>* __restrict Kp, const Tensor<float, 4>* __restrict I, const Tensor<float, 4>* __restrict Ip, Tensor<float, 4>* __restrict O, Tensor<float, 4>* __restrict Op);
}
__attribute__((noinline))
static void matvec(const Tensor<float, 2>* __restrict K, const Tensor<float, 4>* __restrict In, Tensor<float, 4>* Out) {
Eigen::array<ptrdiff_t, 2> dims({1, 2});
*Out = In->convolve(*K, dims);
}
int main(int argc, char** argv) {
Tensor<float, 4> input(3, 3, 7, 11);
Tensor<float, 2> kernel(2, 2);
Tensor<float, 4> output(3, 2, 6, 11);
input.setRandom();
kernel.setRandom();
Tensor<float, 4> inputp(3, 3, 7, 11);
Tensor<float, 2> kernelp(2, 2);
Tensor<float, 4> outputp(3, 2, 6, 11);
inputp.setZero();
kernelp.setZero();
outputp.setRandom(); //One();
Tensor<float, 2> expected_kernel(2, 2);
expected_kernel.setZero();
for (int i = 0; i < 3; ++i) {
for (int j = 0; j < 2; ++j) {
for (int k = 0; k < 6; ++k) {
for (int l = 0; l < 11; ++l) {
const float result = output(i,j,k,l);
const float expected = input(i,j+0,k+0,l) * kernel(0,0) +
input(i,j+1,k+0,l) * kernel(1,0) +
input(i,j+0,k+1,l) * kernel(0,1) +
input(i,j+1,k+1,l) * kernel(1,1);
//VERIFY_IS_APPROX(result, expected);
//VERIFY_IS_APPROX(result, expected);
for(int si=0; si<2; si++)
for(int sj=0; sj<2; sj++)
expected_kernel(si,sj) += outputp(i, j, k, l) * input(i, j+si, k+sj, l);
}
}
}
}
matvec(&kernel, &input, &output);
printf("did original\n");
__enzyme_autodiff((void*)matvec, &kernel, &kernelp, &input, &inputp, &output, &outputp);
for(int si=0; si<2; si++)
for(int sj=0; sj<2; sj++) {
fprintf(stderr, "kernelp(si=%d, sj=%d)=%f, expected_kernel(si=%d, sj=%d)=%f\n", si, sj, kernelp(si, sj), si, sj, expected_kernel(si, sj) );
APPROX_EQ( kernelp(si, sj), expected_kernel(si, sj), 1e-3);
}
}