blob: 5bcb96a48b51613ca97c5a42c09c5963b1632239 [file] [log] [blame] [edit]
// RUN: %clang++ -I/usr/include/eigen3 -Xclang -new-struct-path-tbaa -mllvm -force-vector-width=1 -ffast-math -fno-unroll-loops -fno-vectorize -fno-slp-vectorize -fno-exceptions -O3 %s -S -emit-llvm -o - | %opt - %loadEnzyme -enzyme -S | %lli -
// RUN: %clang++ -I/usr/include/eigen3 -Xclang -new-struct-path-tbaa -fno-unroll-loops -fno-vectorize -fno-slp-vectorize -fno-exceptions -O2 %s -S -emit-llvm -o - | %opt - %loadEnzyme -enzyme -S | %lli -
// RUN: %clang++ -I/usr/include/eigen3 -Xclang -new-struct-path-tbaa -fno-unroll-loops -fno-vectorize -fno-slp-vectorize -fno-exceptions -O1 %s -S -emit-llvm -o - | %opt - %loadEnzyme -enzyme -S | %lli -
// note not doing O0 below as to ensure we get tbaa
// TODO: %clang++ -I/usr/include/eigen3 -Xclang -new-struct-path-tbaa -fno-unroll-loops -fno-vectorize -fno-slp-vectorize -fno-exceptions -O1 -Xclang -disable-llvm-optzns %s -S -emit-llvm -o - | %opt - %loadEnzyme -enzyme -S | %lli -
// RUN: %clang++ -I/usr/include/eigen3 -Xclang -new-struct-path-tbaa -fno-unroll-loops -fno-vectorize -fno-slp-vectorize -fno-exceptions -O3 %s -S -emit-llvm -o - | %opt - %loadEnzyme -enzyme -enzyme-inline=1 -S | %lli -
// RUN: %clang++ -I/usr/include/eigen3 -Xclang -new-struct-path-tbaa -fno-unroll-loops -fno-vectorize -fno-slp-vectorize -fno-exceptions -O2 %s -S -emit-llvm -o - | %opt - %loadEnzyme -enzyme -enzyme-inline=1 -S | %lli -
// RUN: if [ %llvmver -ge 8 ]; then %clang++ -I/usr/include/eigen3 -Xclang -new-struct-path-tbaa -fno-unroll-loops -fno-vectorize -fno-slp-vectorize -fno-exceptions -O1 %s -S -emit-llvm -o - | %opt - %loadEnzyme -enzyme -enzyme-inline=1 -S | %lli - ; fi
// note not doing O0 below as to ensure we get tbaa
// RUN: if [ %llvmver -ge 8 ]; then %clang++ -I/usr/include/eigen3 -Xclang -new-struct-path-tbaa -fno-unroll-loops -fno-vectorize -fno-slp-vectorize -fno-exceptions -O1 -Xclang -disable-llvm-optzns %s -S -emit-llvm -o - | %opt - %loadEnzyme -enzyme -enzyme-inline=1 -S | %lli - ; fi
#define EIGEN_NO_AUTOMATIC_RESIZING 1
#define EIGEN_DONT_ALIGN 1
#define EIGEN_NO_DEBUG 1
#define EIGEN_UNROLLING_LIMIT 0
#define EIGEN_DONT_VECTORIZE 1
#include "test_utils.h"
#include <eigen3/Eigen/Dense>
#include <eigen3/unsupported/Eigen/CXX11/Tensor>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <stdlib.h>
#include <math.h>
#include <inttypes.h>
#include <stdlib.h>
#include <string.h>
using Eigen::MatrixXd;
using Eigen::Matrix;
using Eigen::Tensor;
constexpr size_t IN = 4, OUT = 4, NUM = 5;
extern "C" {
extern double __enzyme_autodiff(void*, const Tensor<float, 2>* __restrict K, const Tensor<float, 2>* __restrict Kp, const Tensor<float, 4>* __restrict I, const Tensor<float, 4>* __restrict Ip, Tensor<float, 4>* __restrict O, Tensor<float, 4>* __restrict Op);
}
__attribute__((noinline))
static void matvec(const Tensor<float, 2>* __restrict K, const Tensor<float, 4>* __restrict In, Tensor<float, 4>* Out) {
Eigen::array<ptrdiff_t, 2> dims({1, 2});
*Out = In->convolve(*K, dims);
}
int main(int argc, char** argv) {
Tensor<float, 4> input(3, 3, 7, 11);
Tensor<float, 2> kernel(2, 2);
Tensor<float, 4> output(3, 2, 6, 11);
input.setRandom();
kernel.setRandom();
Tensor<float, 4> inputp(3, 3, 7, 11);
Tensor<float, 2> kernelp(2, 2);
Tensor<float, 4> outputp(3, 2, 6, 11);
inputp.setZero();
kernelp.setZero();
outputp.setRandom(); //One();
Tensor<float, 2> expected_kernel(2, 2);
expected_kernel.setZero();
for (int i = 0; i < 3; ++i) {
for (int j = 0; j < 2; ++j) {
for (int k = 0; k < 6; ++k) {
for (int l = 0; l < 11; ++l) {
const float result = output(i,j,k,l);
const float expected = input(i,j+0,k+0,l) * kernel(0,0) +
input(i,j+1,k+0,l) * kernel(1,0) +
input(i,j+0,k+1,l) * kernel(0,1) +
input(i,j+1,k+1,l) * kernel(1,1);
//VERIFY_IS_APPROX(result, expected);
//VERIFY_IS_APPROX(result, expected);
for(int si=0; si<2; si++)
for(int sj=0; sj<2; sj++)
expected_kernel(si,sj) += outputp(i, j, k, l) * input(i, j+si, k+sj, l);
}
}
}
}
matvec(&kernel, &input, &output);
printf("did original\n");
__enzyme_autodiff((void*)matvec, &kernel, &kernelp, &input, &inputp, &output, &outputp);
for(int si=0; si<2; si++)
for(int sj=0; sj<2; sj++) {
fprintf(stderr, "kernelp(si=%d, sj=%d)=%f, expected_kernel(si=%d, sj=%d)=%f\n", si, sj, kernelp(si, sj), si, sj, expected_kernel(si, sj) );
APPROX_EQ( kernelp(si, sj), expected_kernel(si, sj), 1e-3);
}
}