blob: 7c7113df964157f90dd8c0ed7910557fad95593d [file] [log] [blame] [edit]
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <stdlib.h>
#include <math.h>
#include <inttypes.h>
#include <string.h>
#include <adept_source.h>
#include <adept.h>
#include <adept_arrays.h>
using adept::adouble;
using adept::aVector;
template<typename Return, typename... T>
Return __enzyme_autodiff(T...);
extern "C" {
extern int enzyme_dup;
extern int enzyme_const;
extern int enzyme_dupnoneed;
}
float tdiff(struct timeval *start, struct timeval *end) {
return (end->tv_sec-start->tv_sec) + 1e-6*(end->tv_usec-start->tv_usec);
}
#define BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS
#define BOOST_NO_EXCEPTIONS
#include <iostream>
#include <boost/array.hpp>
#include <boost/numeric/odeint.hpp>
#include <boost/throw_exception.hpp>
void boost::throw_exception(std::exception const & e){
//do nothing
}
using namespace std;
using namespace boost::numeric::odeint;
#define N 32
#define xmin 0.
#define xmax 1.
#define ymin 0.
#define ymax 1.
#include <assert.h>
#define RANGE(min, max, i, N) ((max-min)/(N-1)*i + min)
#define GETnb(x, i, j) (x)[N*i+j]
#define GET(x, i, j) GETnb(x, i, j)
//#define GET(x, i, j) ({ assert(i >=0); assert( j>=0); assert(j<N); assert(j<N); GETnb(x, i, j); })
template <typename T>
T brusselator_f(T x, T y, T t) {
bool eq1 = ((x-0.3)*(x-0.3) + (y-0.6)*(y-0.6)) <= 0.1*0.1;
bool eq2 = t >= 1.1;
if (eq1 && eq2) {
return T(5);
} else {
return T(0);
}
}
void init_brusselator(double* __restrict u, double* __restrict v) {
for(int i=0; i<N; i++) {
for(int j=0; j<N; j++) {
double x = RANGE(xmin, xmax, i, N);
double y = RANGE(ymin, ymax, j, N);
GETnb(u, i, j) = 22*(y*(1-y))*sqrt(y*(1-y));
GETnb(v, i, j) = 27*(x*(1-x))*sqrt(x*(1-x));
}
}
}
__attribute__((noinline))
void brusselator_2d_loop(double* __restrict du, double* __restrict dv, const double* __restrict u, const double* __restrict v, const double* __restrict p, double t) {
double A = p[0];
double B = p[1];
double alpha = p[2];
double dx = (double)1/(N-1);
alpha = alpha/(dx*dx);
for(int i=0; i<N; i++) {
for(int j=0; j<N; j++) {
double x = RANGE(xmin, xmax, i, N);
double y = RANGE(ymin, ymax, j, N);
unsigned ip1 = (i == N-1) ? i : (i+1);
unsigned im1 = (i == 0) ? i : (i-1);
unsigned jp1 = (j == N-1) ? j : (j+1);
unsigned jm1 = (j == 0) ? j : (j-1);
double u2v = GET(u, i, j) * GET(u, i, j) * GET(v, i, j);
GETnb(du, i, j) = alpha*( GET(u, im1, j) + GET(u, ip1, j) + GET(u, i, jp1) + GET(u, i, jm1) - 4 * GET(u, i, j))
+ B + u2v - (A + 1)*GET(u, i, j) + brusselator_f(x, y, t);
GETnb(dv, i, j) = alpha*( GET(v, im1, j) + GET(v, ip1, j) + GET(v, i, jp1) + GET(v, i, jm1) - 4 * GET(v, i, j))
+ A * GET(u, i, j) - u2v;
}
}
}
typedef boost::array< double , 2 * N * N > state_type;
void lorenz( const state_type &x , state_type &dxdt , double t )
{
// Extract the parameters
double p[3] = { /*A*/ 3.4, /*B*/ 1, /*alpha*/10. };
brusselator_2d_loop(dxdt.c_array(), dxdt.c_array() + N * N, x.data(), x.data() + N * N, p, t);
}
// init_brusselator(x.c_array(), x.c_array() + N*N)
double foobar(const double* p, const state_type x, const state_type adjoint, double t) {
double dp[3] = { 0. };
state_type dx = { 0. };
state_type dadjoint_inp = adjoint;
state_type dxdu;
__enzyme_autodiff<void>(brusselator_2d_loop,
// enzyme_dup, dxdu.c_array(), dadjoint_inp.c_array(),
// enzyme_dup, dxdu.c_array() + N * N, dadjoint_inp.c_array() + N * N,
enzyme_dupnoneed, nullptr, dadjoint_inp.data(),
enzyme_dupnoneed, nullptr, dadjoint_inp.data() + N * N,
enzyme_dup, x.data(), dx.data(),
enzyme_dup, x.data() + N * N, dx.data() + N * N,
enzyme_dup, p, dp,
enzyme_const, t);
return dx[0];
}
#undef GETnb
#define GETnb(x, i, j) (x)(N*i+j)
void abrusselator_2d_loop(aVector& du, aVector& dv, aVector& u, aVector& v, aVector& p, double t) {
adouble A = p(0);
adouble B = p(1);
adouble alpha = p(2);
adouble dx = (double)1/(N-1);
alpha = alpha/(dx*dx);
for(int i=0; i<N; i++) {
for(int j=0; j<N; j++) {
adouble x = RANGE(xmin, xmax, i, N);
adouble y = RANGE(ymin, ymax, j, N);
unsigned ip1 = (i == N-1) ? i : (i+1);
unsigned im1 = (i == 0) ? i : (i-1);
unsigned jp1 = (j == N-1) ? j : (j+1);
unsigned jm1 = (j == 0) ? j : (j-1);
adouble u2v = GET(u, i, j) * GET(u, i, j) * GET(v, i, j);
GETnb(du, i, j) = alpha*( GET(u, im1, j) + GET(u, ip1, j) + GET(u, i, jp1) + GET(u, i, jm1) - 4 * GET(u, i, j))
+ B + u2v - (A + 1)*GET(u, i, j) + brusselator_f<adouble>(x, y, t);
GETnb(dv, i, j) = alpha*( GET(v, im1, j) + GET(v, ip1, j) + GET(v, i, jp1) + GET(v, i, jm1) - 4 * GET(v, i, j))
+ A * GET(u, i, j) - u2v;
}
}
}
double afoobar(const double* p_in, const state_type x, const state_type adjoint, double t) {
adept::Stack stack;
aVector p(3);
for(unsigned i=0; i<3; i++) p(i) = p_in[i];
aVector ax(N*N);
aVector ay(N*N);
for(unsigned i=0; i<N*N; i++) {
ax(i) = x[i];
ay(i) = x[i+N*N];
}
aVector dxdu(N*N);
aVector dydu(N*N);
stack.new_recording();
abrusselator_2d_loop(dxdu, dydu, ax, ay, p, t);
for(unsigned i=0; i<N*N; i++) {
dxdu(i).set_gradient(adjoint[i]);
dydu(i).set_gradient(adjoint[i+N*N]);
}
stack.compute_adjoint();
return ax(0).get_gradient();
}
//! Tapenade
extern "C" {
/* Generated by TAPENADE (INRIA, Ecuador team)
Tapenade 3.15 (master) - 8 Jan 2020 10:48
*/
#include <adBuffer.h>
/*
Differentiation of get in reverse (adjoint) mode (with options i4 dr8 r4):
gradient of useful results: *x get
with respect to varying inputs: *x
Plus diff mem management of: x:in
*/
void get_b(const double *x, double *xb, unsigned int i, unsigned int j, double getb)
{
double get;
xb[N*i + j] = xb[N*i + j] + getb;
}
double get_nodiff(const double *x, unsigned int i, unsigned int j) {
return x[N*i + j];
}
double brusselator_f_nodiff(double x, double y, double t) {
if ((x-0.3)*(x-0.3) + (y-0.6)*(y-0.6) <= 0.1*0.1 && t >= 1.1)
return 5.0;
else
return 0.0;
}
#if 1
void brusselator_2d_loop_b(double *du, double *dub, double *dv, double *dvb,
const double *u, double *ub, const double *v, double *vb, const double *p, double *pb,
double t) {
double A = p[0];
double Ab = 0.0;
double B = p[1];
double Bb = 0.0;
double alpha = p[2];
double alphab = 0.0;
double dx = (double)1/(N-1);
alpha = alpha/(dx*dx);
for (int i = 0; i < N; ++i)
for (int j = 0; j < N; ++j) {
double x = (xmax-xmin)/(N-1)*i + xmin;
double y = (ymax-ymin)/(N-1)*j + ymin;
unsigned int ip1 = (i == N - 1 ? i : i + 1);
unsigned int im1 = (i == 0 ? i : i - 1);
unsigned int jp1 = (j == N - 1 ? j : j + 1);
unsigned int jm1 = (j == 0 ? j : j - 1);
double u2v = u[N*i+j]*u[N*i+j]*v[N*i+j];
double result1;
pushInteger4(jm1);
pushInteger4(jp1);
pushInteger4(im1);
pushInteger4(ip1);
}
*ub = 0.0;
*vb = 0.0;
alphab = 0.0;
Ab = 0.0;
Bb = 0.0;
for (int i = N-1; i > -1; --i)
for (int j = N-1; j > -1; --j) {
double x;
double y;
unsigned int ip1;
unsigned int im1;
unsigned int jp1;
unsigned int jm1;
double u2v;
double u2vb = 0.0;
double result1;
double temp;
double tempb;
popInteger4((int*)&ip1);
popInteger4((int*)&im1);
popInteger4((int*)&jp1);
popInteger4((int*)&jm1);
temp = u[N*i + j];
alphab = alphab + (v[N*im1+j]+v[N*ip1+j]+v[N*i+jp1]+v[N*i+jm1]
-4*v[N*i+j])*dvb[N*i+j] + (u[N*im1+j]+u[N*ip1+j]+u[N*i+
jp1]+u[N*i+jm1]-4*u[N*i+j])*dub[N*i+j];
tempb = alpha*dvb[N*i+j];
Ab = Ab + u[N*i+j]*dvb[N*i+j] - u[N*i+j]*dub[N*i+j];
ub[N*i + j] = ub[N*i + j] + A*dvb[N*i+j] - (A+1)*dub[N*i+j];
u2vb = dub[N*i + j] - dvb[N*i + j];
dvb[N*i + j] = 0.0;
vb[N*im1 + j] = vb[N*im1 + j] + tempb;
vb[N*ip1 + j] = vb[N*ip1 + j] + tempb;
vb[N*i + jp1] = vb[N*i + jp1] + tempb;
vb[N*i + jm1] = vb[N*i + jm1] + tempb;
vb[N*i + j] = vb[N*i + j] + temp*temp*u2vb - 4*tempb;
tempb = alpha*dub[N*i+j];
Bb = Bb + dub[N*i + j];
dub[N*i + j] = 0.0;
ub[N*im1 + j] = ub[N*im1 + j] + tempb;
ub[N*ip1 + j] = ub[N*ip1 + j] + tempb;
ub[N*i + jp1] = ub[N*i + jp1] + tempb;
ub[N*i + jm1] = ub[N*i + jm1] + tempb;
ub[N*i + j] = ub[N*i + j] + 2*temp*v[N*i+j]*u2vb - 4*tempb;
}
alphab = alphab/(dx*dx);
pb[2] = pb[2] + alphab;
pb[1] = pb[1] + Bb;
pb[0] = pb[0] + Ab;
}
#else
/*
Differentiation of brusselator_2d_loop in reverse (adjoint) mode (with options i4 dr8 r4):
gradient of useful results: *du *dv
with respect to varying inputs: *p *u *du *v *dv
RW status of diff variables: *p:out *u:out *du:in-out *v:out
*dv:in-out
Plus diff mem management of: p:in u:in du:in v:in dv:in
*/
void brusselator_2d_loop_b(double *du, double *dub, double *dv, double *dvb,
const double *u, double *ub, const double *v, double *vb, const double *p, double *pb,
double t) {
double A = p[0];
double Ab = 0.0;
double B = p[1];
double Bb = 0.0;
double alpha = p[2];
double alphab = 0.0;
double dx = (double)1/(N-1);
alpha = alpha/(dx*dx);
for (int i = 0; i < N; ++i)
for (int j = 0; j < N; ++j) {
double x = (xmax-xmin)/(N-1)*i + xmin;
double y = (ymax-ymin)/(N-1)*j + ymin;
unsigned int ip1 = (i == N - 1 ? i : i + 1);
unsigned int im1 = (i == 0 ? i : i - 1);
unsigned int jp1 = (j == N - 1 ? j : j + 1);
unsigned int jm1 = (j == 0 ? j : j - 1);
double u2v;
double result1;
double result2;
double result3;
double result4;
double result5;
double result6;
double result7;
result1 = get_nodiff(u, i, j);
result2 = get_nodiff(u, i, j);
result3 = get_nodiff(v, i, j);
pushReal8(result1);
result1 = get_nodiff(u, im1, j);
pushReal8(result2);
result2 = get_nodiff(u, ip1, j);
pushReal8(result3);
result3 = get_nodiff(u, i, jp1);
result4 = get_nodiff(u, i, jm1);
result5 = get_nodiff(u, i, j);
result6 = get_nodiff(u, i, j);
pushReal8(result1);
result1 = get_nodiff(v, im1, j);
pushReal8(result2);
result2 = get_nodiff(v, ip1, j);
pushReal8(result3);
result3 = get_nodiff(v, i, jp1);
pushReal8(result4);
result4 = get_nodiff(v, i, jm1);
pushReal8(result5);
result5 = get_nodiff(v, i, j);
pushReal8(result6);
result6 = get_nodiff(u, i, j);
pushInteger4(jm1);
pushInteger4(jp1);
pushReal8(result6);
pushReal8(result5);
pushReal8(result4);
pushReal8(result3);
pushReal8(result2);
pushReal8(result1);
pushInteger4(im1);
pushInteger4(ip1);
}
*ub = 0.0;
*vb = 0.0;
alphab = 0.0;
Ab = 0.0;
Bb = 0.0;
for (int i = N-1; i > -1; --i)
for (int j = N-1; j > -1; --j) {
double x;
double y;
unsigned int ip1;
unsigned int im1;
unsigned int jp1;
unsigned int jm1;
double u2v;
double u2vb;
double result1;
double result1b;
double result2;
double result2b;
double result3;
double result3b;
double result4;
double result4b;
double result5;
double result5b;
double result6;
double result6b;
double result7;
double tempb;
popInteger4((int*)&ip1);
popInteger4((int*)&im1);
popReal8(&result1);
popReal8(&result2);
popReal8(&result3);
popReal8(&result4);
popReal8(&result5);
popReal8(&result6);
popInteger4((int*)&jp1);
popInteger4((int*)&jm1);
alphab = alphab + (result1+result2+result3+result4-4*result5)*dvb[
N*i+j];
tempb = alpha*dvb[N*i+j];
Ab = Ab + result6*dvb[N*i+j];
result6b = A*dvb[N*i+j];
u2vb = dub[N*i + j] - dvb[N*i + j];
dvb[N*i + j] = 0.0;
result1b = tempb;
result2b = tempb;
result3b = tempb;
result4b = tempb;
result5b = -(4*tempb);
popReal8(&result6);
get_b(u, ub, i, j, result6b);
popReal8(&result5);
get_b(v, vb, i, j, result5b);
popReal8(&result4);
get_b(v, vb, i, jm1, result4b);
popReal8(&result3);
get_b(v, vb, i, jp1, result3b);
popReal8(&result2);
get_b(v, vb, ip1, j, result2b);
popReal8(&result1);
get_b(v, vb, im1, j, result1b);
alphab = alphab + (result1+result2+result3+result4-4*result5)*dub[
N*i+j];
tempb = alpha*dub[N*i+j];
Bb = Bb + dub[N*i + j];
Ab = Ab - result6*dub[N*i+j];
result6b = -((A+1)*dub[N*i+j]);
dub[N*i + j] = 0.0;
result1b = tempb;
result2b = tempb;
result3b = tempb;
result4b = tempb;
result5b = -(4*tempb);
get_b(u, ub, i, j, result6b);
get_b(u, ub, i, j, result5b);
get_b(u, ub, i, jm1, result4b);
popReal8(&result3);
get_b(u, ub, i, jp1, result3b);
popReal8(&result2);
get_b(u, ub, ip1, j, result2b);
popReal8(&result1);
get_b(u, ub, im1, j, result1b);
result1b = result2*result3*u2vb;
result2b = result1*result3*u2vb;
result3b = result1*result2*u2vb;
get_b(v, vb, i, j, result3b);
get_b(u, ub, i, j, result2b);
get_b(u, ub, i, j, result1b);
}
alphab = alphab/(dx*dx);
pb[2] = pb[2] + alphab;
pb[1] = pb[1] + Bb;
pb[0] = pb[0] + Ab;
}
#endif
}
double tfoobar(const double* p, const state_type x, const state_type adjoint, double t) {
double dp[3] = { 0. };
state_type dx = { 0. };
state_type dadjoint_inp = adjoint;
state_type dxdu;
brusselator_2d_loop_b(nullptr, dadjoint_inp.data(),
nullptr, dadjoint_inp.data() + N * N,
x.data(), dx.data(),
x.data() + N * N, dx.data() + N * N,
p, dp,
t);
return dx[0];
}
//! Main
int main(int argc, char** argv) {
const double p[3] = { /*A*/ 3.4, /*B*/ 1, /*alpha*/10. };
state_type x;
init_brusselator(x.data(), x.data() + N * N);
state_type adjoint;
init_brusselator(adjoint.data(), adjoint.data() + N * N);
double t = 2.1;
{
struct timeval start, end;
gettimeofday(&start, NULL);
double res;
for(int i=0; i<10000; i++)
res = afoobar(p, x, adjoint, t);
gettimeofday(&end, NULL);
printf("Adept combined %0.6f res=%f\n", tdiff(&start, &end), res);
}
{
struct timeval start, end;
gettimeofday(&start, NULL);
double res;
for(int i=0; i<10000; i++)
res = tfoobar(p, x, adjoint, t);
gettimeofday(&end, NULL);
printf("Tapenade combined %0.6f res=%f\n", tdiff(&start, &end), res);
}
{
struct timeval start, end;
gettimeofday(&start, NULL);
double res;
for(int i=0; i<10000; i++)
res = foobar(p, x, adjoint, t);
gettimeofday(&end, NULL);
printf("Enzyme combined %0.6f res=%f\n", tdiff(&start, &end), res);
}
//printf("res=%f\n", foobar(1000));
}
#if 0
typedef boost::array< double , 6 > state_type;
void lorenz( const state_type &x , state_type &dxdt , double t )
{
// Extract the parameters
double k1 = x[3];
double k2 = x[4];
double k3 = x[5];
dxdt[0] = -k1 * x[0] + k3 * x[1] * x[2];
dxdt[1] = k1 * x[0] - k2 * x[1] * x[1] - k3 * x[1] * x[2];
dxdt[2] = k2 * x[1] * x[1];
// Don't change the parameters p
dxdt[3] = 0;
dxdt[4] = 0;
dxdt[5] = 0;
}
double foobar(double* p, uint64_t iters) {
state_type x = { 1.0, 0, 0, p[0], p[1], p[2] }; // initial conditions
double t = 1e5;
typedef controlled_runge_kutta< runge_kutta_dopri5< state_type , typename state_type::value_type , state_type , double > > stepper_type;
//typedef euler< state_type , typename state_type::value_type , state_type , double > stepper_type;
integrate_const( stepper_type(), lorenz , x , 0.0 , t, t/iters );
return x[0];
}
typedef boost::array< adouble , 6 > astate_type;
void alorenz( const astate_type &x , astate_type &dxdt , adouble t )
{
// Extract the parameters
adouble k1 = x[3];
adouble k2 = x[4];
adouble k3 = x[5];
dxdt[0] = -k1 * x[0] + k3 * x[1] * x[2];
dxdt[1] = k1 * x[0] - k2 * x[1] * x[1] - k3 * x[1] * x[2];
dxdt[2] = k2 * x[1] * x[1];
// Don't change the parameters p
dxdt[3] = 0;
dxdt[4] = 0;
dxdt[5] = 0;
}
adouble afoobar(adouble* p, uint64_t iters) {
astate_type x = { 1.0, 0, 0, p[0], p[1], p[2] }; // initial conditions
double t = 1e5;
typedef controlled_runge_kutta< runge_kutta_dopri5< astate_type , typename astate_type::value_type , astate_type , adouble > > stepper_type;
//typedef euler< astate_type , typename astate_type::value_type , astate_type , adouble > stepper_type;
integrate_const( stepper_type(), alorenz , x , 0.0 , t, t/iters );
return x[0];
}
static
double afoobar_and_gradient(double* p_in, double* dp_out, uint64_t iters) {
adept::Stack stack;
adouble x[3] = { p_in[0], p_in[1], p_in[2] };
stack.new_recording();
adouble y = afoobar(x, iters);
y.set_gradient(1.0);
stack.compute_adjoint();
for(int i=0; i<3; i++)
dp_out[i] = x[i].get_gradient();
return y.value();
}
static void adept_sincos(uint64_t iters) {
{
struct timeval start, end;
gettimeofday(&start, NULL);
double p[3] = { 0.04,3e7,1e4 };
double res = foobar(p, iters);
gettimeofday(&end, NULL);
printf("Adept real %0.6f res=%f\n", tdiff(&start, &end), res);
}
{
struct timeval start, end;
gettimeofday(&start, NULL);
adept::Stack stack;
adouble p[3] = { 0.04,3e7,1e4 };
// stack.new_recording();
adouble resa = afoobar(p, iters);
double res = resa.value();
gettimeofday(&end, NULL);
printf("Adept forward %0.6f res=%f\n", tdiff(&start, &end), res);
}
{
struct timeval start, end;
gettimeofday(&start, NULL);
double p[3] = { 0.04,3e7,1e4 };
double dp[3] = { 0 };
afoobar_and_gradient(p, dp, iters);
gettimeofday(&end, NULL);
printf("Adept combined %0.6f res'=%f\n", tdiff(&start, &end), dp[0]);
}
}
static void enzyme_sincos(double inp, uint64_t iters) {
{
struct timeval start, end;
gettimeofday(&start, NULL);
double p[3] = { 0.04,3e7,1e4 };
double res = foobar(p, iters);
gettimeofday(&end, NULL);
printf("Enzyme real %0.6f res=%f\n", tdiff(&start, &end), res);
}
{
struct timeval start, end;
gettimeofday(&start, NULL);
double p[3] = { 0.04,3e7,1e4 };
double res = foobar(p, iters);
gettimeofday(&end, NULL);
printf("Enzyme forward %0.6f res=%f\n", tdiff(&start, &end), res);
}
{
struct timeval start, end;
gettimeofday(&start, NULL);
double p[3] = { 0.04,3e7,1e4 };
double dp[3] = { 0 };
__enzyme_autodiff<void>(foobar, p, dp, iters);
gettimeofday(&end, NULL);
printf("Enzyme combined %0.6f res'=%f\n", tdiff(&start, &end), dp[0]);
}
}
int main(int argc, char** argv) {
int max_iters = atoi(argv[1]) ;
double inp = 2.1;
//for(int iters=max_iters/20; iters<=max_iters; iters+=max_iters/20) {
auto iters = max_iters;
printf("iters=%d\n", iters);
adept_sincos(inp, iters);
enzyme_sincos(inp, iters);
//}
}
#endif