various cleanups
diff --git a/enzyme/benchmarks/ReverseMode/adbench/Makefile.config b/enzyme/benchmarks/ReverseMode/adbench/Makefile.config
new file mode 100644
index 0000000..c620d4a
--- /dev/null
+++ b/enzyme/benchmarks/ReverseMode/adbench/Makefile.config
@@ -0,0 +1,9 @@
+CLANG := /home/manuel/prog/rust-middle/build/x86_64-unknown-linux-gnu/llvm/build/bin/clang++
+OPT := /home/manuel/prog/rust-middle/build/x86_64-unknown-linux-gnu/llvm/build/bin/opt
+
+PASSES1 := verify,annotation2metadata,forceattrs,inferattrs,coro-early,function<eager-inv>(ee-instrument<>,lower-expect,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;no-switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,sroa<modify-cfg>,early-cse<>,callsite-splitting),openmp-opt,ipsccp,called-value-propagation,globalopt,function<eager-inv>(mem2reg,instcombine<max-iterations=1;no-verify-fixpoint>,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>),always-inline,require<globals-aa>,function(invalidate<aa>),require<profile-summary>,cgscc(devirt<4>(inline,function-attrs<skip-non-recursive-function-attrs>,argpromotion,openmp-opt-cgscc,function<eager-inv;no-rerun>(sroa<modify-cfg>,early-cse<memssa>,speculative-execution<only-if-divergent-target>,jump-threading,correlated-propagation,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,aggressive-instcombine,libcalls-shrinkwrap,tailcallelim,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,reassociate,constraint-elimination,loop-mssa(loop-instsimplify,loop-simplifycfg,licm<no-allowspeculation>,loop-rotate<header-duplication;prepare-for-lto>,licm<allowspeculation>,simple-loop-unswitch<nontrivial;trivial>),simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,loop(loop-idiom,indvars,extra-simple-loop-unswitch-passes,loop-deletion,loop-unroll-full),sroa<modify-cfg>,vector-combine,mldst-motion<no-split-footer-bb>,gvn<>,sccp,bdce,instcombine<max-iterations=1;no-verify-fixpoint>,jump-threading,correlated-propagation,adce,memcpyopt,dse,move-auto-init,loop-mssa(licm<allowspeculation>),coro-elide,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>),function-attrs,function(require<should-not-run-function-passes>),coro-split,coro-annotation-elide)),deadargelim,coro-cleanup,globalopt,globaldce,rpo-function-attrs,recompute-globalsaa,function<eager-inv>(float2int,lower-constant-intrinsics,chr,loop(loop-rotate<header-duplication;prepare-for-lto>,loop-deletion),loop-distribute,inject-tli-mappings,loop-vectorize<interleave-forced-only;vectorize-forced-only;>,infer-alignment,loop-load-elim,instcombine<max-iterations=1;no-verify-fixpoint>,simplifycfg<bonus-inst-threshold=1;forward-switch-cond;switch-range-to-icmp;switch-to-lookup;no-keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,vector-combine,instcombine<max-iterations=1;no-verify-fixpoint>,loop-unroll<O3>,transform-warning,sroa<preserve-cfg>,infer-alignment,instcombine<max-iterations=1;no-verify-fixpoint>,loop-mssa(licm<allowspeculation>),alignment-from-assumptions,loop-sink,instsimplify,div-rem-pairs,tailcallelim,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>),globaldce,constmerge,function(annotation-remarks),canonicalize-aliases,name-anon-globals,verify
+
+PASSES2 := cross-dso-cfi,openmp-opt,globaldce<vfe-linkage-unit-visibility>,inferattrs,function<eager-inv>(callsite-splitting),pgo-icall-prom,cgscc(function-attrs,argpromotion,function(sroa<modify-cfg>)),ipsccp,called-value-propagation,rpo-function-attrs,globalsplit,wholeprogramdevirt,globalopt,function(mem2reg),constmerge,deadargelim,function<eager-inv>(instcombine<max-iterations=1;no-verify-fixpoint>,aggressive-instcombine),expand-variadics,cgscc(inline<only-mandatory>,inline),globalopt,openmp-opt,globaldce<vfe-linkage-unit-visibility>,cgscc(argpromotion),function<eager-inv>(instcombine<max-iterations=1;no-verify-fixpoint>,constraint-elimination,jump-threading,sroa<modify-cfg>,tailcallelim),cgscc(function-attrs),require<globals-aa>,function(invalidate<aa>),cgscc(openmp-opt-cgscc),function<eager-inv>(loop-mssa(licm<allowspeculation>),gvn<>,memcpyopt,dse,move-auto-init,mldst-motion<no-split-footer-bb>,loop(indvars,loop-deletion,loop-unroll-full),loop-distribute,loop-vectorize<interleave-forced-only;vectorize-forced-only;>,infer-alignment,loop-unroll<O3>,transform-warning,sroa<preserve-cfg>,instcombine<max-iterations=1;no-verify-fixpoint>,simplifycfg<bonus-inst-threshold=1;forward-switch-cond;switch-range-to-icmp;switch-to-lookup;no-keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,sccp,instcombine<max-iterations=1;no-verify-fixpoint>,bdce,vector-combine,infer-alignment,instcombine<max-iterations=1;no-verify-fixpoint>,loop-mssa(licm<allowspeculation>),alignment-from-assumptions,jump-threading),lowertypetests,lowertypetests,function(loop-sink,div-rem-pairs,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>),elim-avail-extern,globaldce<vfe-linkage-unit-visibility>,rel-lookup-table-converter,cg-profile,function(annotation-remarks),canonicalize-aliases,name-anon-globals
+#PASSES2 := cross-dso-cfi,openmp-opt,globaldce<vfe-linkage-unit-visibility>,inferattrs,function<eager-inv>(callsite-splitting),pgo-icall-prom,cgscc(function-attrs,argpromotion,function(sroa<modify-cfg>)),ipsccp,called-value-propagation,rpo-function-attrs,globalsplit,wholeprogramdevirt,globalopt,function(mem2reg),constmerge,deadargelim,function<eager-inv>(instcombine<max-iterations=1;no-verify-fixpoint>,aggressive-instcombine),expand-variadics,cgscc(inline<only-mandatory>,inline),globalopt,openmp-opt,globaldce<vfe-linkage-unit-visibility>,cgscc(argpromotion),function<eager-inv>(instcombine<max-iterations=1;no-verify-fixpoint>,constraint-elimination,jump-threading,sroa<modify-cfg>,tailcallelim),cgscc(function-attrs),require<globals-aa>,function(invalidate<aa>),cgscc(openmp-opt-cgscc),function<eager-inv>(loop-mssa(licm<allowspeculation>),gvn<>,memcpyopt,dse,move-auto-init,mldst-motion<no-split-footer-bb>,loop(indvars,loop-deletion,loop-unroll-full),loop-distribute,loop-vectorize<interleave-forced-only;vectorize-forced-only;>,infer-alignment,loop-unroll<O3>,transform-warning,sroa<preserve-cfg>,instcombine<max-iterations=1;no-verify-fixpoint>,simplifycfg<bonus-inst-threshold=1;forward-switch-cond;switch-range-to-icmp;switch-to-lookup;no-keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,sccp,instcombine<max-iterations=1;no-verify-fixpoint>,bdce,vector-combine,infer-alignment,instcombine<max-iterations=1;no-verify-fixpoint>,loop-mssa(licm<allowspeculation>),alignment-from-assumptions,jump-threading),lowertypetests,lowertypetests,function(loop-sink,div-rem-pairs,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>),elim-avail-extern,globaldce<vfe-linkage-unit-visibility>,rel-lookup-table-converter,cg-profile,function(annotation-remarks),canonicalize-aliases,name-anon-globals,EnzymeNewPM
+
+PASSES3 := cross-dso-cfi,openmp-opt,globaldce<vfe-linkage-unit-visibility>,inferattrs,function<eager-inv>(callsite-splitting),pgo-icall-prom,cgscc(function-attrs,argpromotion,function(sroa<modify-cfg>)),ipsccp,called-value-propagation,rpo-function-attrs,globalsplit,wholeprogramdevirt,globalopt,function(mem2reg),constmerge,deadargelim,function<eager-inv>(instcombine<max-iterations=1;no-verify-fixpoint>,aggressive-instcombine),expand-variadics,cgscc(inline<only-mandatory>,inline),globalopt,openmp-opt,globaldce<vfe-linkage-unit-visibility>,cgscc(argpromotion),function<eager-inv>(instcombine<max-iterations=1;no-verify-fixpoint>,constraint-elimination,jump-threading,sroa<modify-cfg>,tailcallelim),cgscc(function-attrs),require<globals-aa>,function(invalidate<aa>),cgscc(openmp-opt-cgscc),function<eager-inv>(loop-mssa(licm<allowspeculation>),gvn<>,memcpyopt,dse,move-auto-init,mldst-motion<no-split-footer-bb>,loop(indvars,loop-deletion,loop-unroll-full),loop-distribute,loop-vectorize<no-interleave-forced-only;no-vectorize-forced-only;>,infer-alignment,loop-unroll<O3>,transform-warning,sroa<preserve-cfg>,instcombine<max-iterations=1;no-verify-fixpoint>,simplifycfg<bonus-inst-threshold=1;forward-switch-cond;switch-range-to-icmp;switch-to-lookup;no-keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,sccp,instcombine<max-iterations=1;no-verify-fixpoint>,bdce,slp-vectorizer,vector-combine,infer-alignment,instcombine<max-iterations=1;no-verify-fixpoint>,loop-mssa(licm<allowspeculation>),alignment-from-assumptions,jump-threading),lowertypetests,lowertypetests,function(loop-sink,div-rem-pairs,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>),elim-avail-extern,globaldce<vfe-linkage-unit-visibility>,mergefunc,rel-lookup-table-converter,cg-profile,function(annotation-remarks),canonicalize-aliases,name-anon-globals
diff --git a/enzyme/benchmarks/ReverseMode/adbench/ba.h b/enzyme/benchmarks/ReverseMode/adbench/ba.h
index 6a3f977..131a5f8 100644
--- a/enzyme/benchmarks/ReverseMode/adbench/ba.h
+++ b/enzyme/benchmarks/ReverseMode/adbench/ba.h
@@ -427,7 +427,7 @@
     }
     }
 
-    {
+    for (int j=0;j<5;j++) {
 
     struct BAInput input;
     read_ba_instance("data/" + path, input.n, input.m, input.p, input.cams,
@@ -659,7 +659,7 @@
     }
     }
 
-    {
+    for(int j=0;j<5;j++){
 
     struct BAInput input;
     read_ba_instance("data/" + path, input.n, input.m, input.p, input.cams,
diff --git a/enzyme/benchmarks/ReverseMode/adbench/gmm.h b/enzyme/benchmarks/ReverseMode/adbench/gmm.h
index c5ec727..35f4423 100644
--- a/enzyme/benchmarks/ReverseMode/adbench/gmm.h
+++ b/enzyme/benchmarks/ReverseMode/adbench/gmm.h
@@ -213,17 +213,11 @@
 
     std::vector<std::string> paths = { "10k/gmm_d10_K200.txt" };
 
-    //getTests(paths, "data/1k", "1k/");
-    if (std::getenv("BENCH_LARGE")) {
-      getTests(paths, "data/2.5k", "2.5k/");
-      getTests(paths, "data/10k", "10k/");
-    }
-
     getTests(paths, "data/1k", "1k/");
-    if (std::getenv("BENCH_LARGE")) {
+    //if (std::getenv("BENCH_LARGE")) {
       getTests(paths, "data/2.5k", "2.5k/");
       getTests(paths, "data/10k", "10k/");
-    }
+    //}
 
     std::ofstream jsonfile("results.json", std::ofstream::trunc);
     json test_results;
@@ -274,7 +268,7 @@
 
     struct GMMOutput result = { 0, std::vector<double>(Jcols) };
 
-    //if (0) {
+    if (0) {
       try {
         struct timeval start, end;
         gettimeofday(&start, NULL);
@@ -294,7 +288,7 @@
       } catch (std::bad_alloc) {
         printf("Adept combined 88888888 ooms\n");
       }
-    //}
+    }
     }
 
     for (size_t i = 0; i < 5; i++)
diff --git a/enzyme/benchmarks/ReverseMode/adbench/lstm.h b/enzyme/benchmarks/ReverseMode/adbench/lstm.h
index 4f99841..80452b4 100644
--- a/enzyme/benchmarks/ReverseMode/adbench/lstm.h
+++ b/enzyme/benchmarks/ReverseMode/adbench/lstm.h
@@ -243,8 +243,8 @@
 int main(const int argc, const char* argv[]) {
     printf("starting main\n");
 
-    //std::vector<std::string> paths = { "lstm_l2_c1024.txt", "lstm_l4_c1024.txt", "lstm_l2_c4096.txt", "lstm_l4_c4096.txt" };
-    std::vector<std::string> paths = { "lstm_l4_c4096.txt" };
+    std::vector<std::string> paths = { "lstm_l2_c1024.txt", "lstm_l4_c1024.txt", "lstm_l2_c4096.txt", "lstm_l4_c4096.txt" };
+    //std::vector<std::string> paths = { "lstm_l4_c4096.txt" };
     
     std::ofstream jsonfile("results.json", std::ofstream::trunc);
     json test_results;
@@ -289,7 +289,7 @@
 
     }
 
-    {
+    if (0){
 
      struct LSTMInput input = {};
 
@@ -323,7 +323,7 @@
 
     }
 
-    {
+    for (int j=0; j<5; j++){
 
     struct LSTMInput input = {};
 
@@ -390,7 +390,7 @@
     }
     }
 
-    {
+    for (int j=0; j<5; j++){
 
     struct LSTMInput input = {};
 
diff --git a/enzyme/benchmarks/ReverseMode/ba/Makefile.make b/enzyme/benchmarks/ReverseMode/ba/Makefile.make
index 50ab0cf..cec8d4b 100644
--- a/enzyme/benchmarks/ReverseMode/ba/Makefile.make
+++ b/enzyme/benchmarks/ReverseMode/ba/Makefile.make
@@ -4,6 +4,28 @@
 
 dir := $(abspath $(lastword $(MAKEFILE_LIST))/../../../..)
 
+include $(dir)/benchmarks/ReverseMode/adbench/Makefile.config
+
+ifeq ($(strip $(CLANG)),)
+$(error PASSES1 is not set)
+endif
+
+ifeq ($(strip $(PASSES1)),)
+$(error PASSES1 is not set)
+endif
+
+ifeq ($(strip $(PASSES2)),)
+$(error PASSES2 is not set)
+endif
+
+ifeq ($(strip $(PASSES3)),)
+$(error PASSES3 is not set)
+endif
+
+ifneq ($(strip $(PASSES4)),)
+$(error PASSES4 is set)
+endif
+
 clean:
 	rm -f *.ll *.o results.txt results.json
 	cargo +enzyme clean
@@ -12,16 +34,13 @@
 	RUSTFLAGS="-Z autodiff=Enable" cargo +enzyme rustc --release --lib --crate-type=staticlib --features=libm
 
 %-unopt.ll: %.cpp
-	clang++ $(BENCH) $(PTR) $^ -pthread -O2 -fno-vectorize -fno-slp-vectorize -ffast-math -fno-unroll-loops -o $@ -S -emit-llvm
+	$(CLANG) $(BENCH) $^ -pthread -O3 -fno-vectorize -fno-slp-vectorize -fno-unroll-loops -o $@ -S -emit-llvm
 
-%-raw.ll: %-unopt.ll
-	opt $^ $(LOAD) $(ENZYME) -o $@ -S
-
-%-opt.ll: %-raw.ll
-	opt $^ -o $@ -S
+%-opt.ll: %-unopt.ll
+	$(OPT) $^ $(LOAD) -passes="$(PASSES2),enzyme" -o $@ -S
 
 ba.o: ba-opt.ll $(dir)/benchmarks/ReverseMode/ba/target/release/libbars.a
-	clang++ $(BENCH) -pthread -O2 $^ -I /usr/include/c++/11 -I/usr/include/x86_64-linux-gnu/c++/11 -O2 -o $@ $(BENCHLINK) -lpthread -lm -L /usr/lib/gcc/x86_64-linux-gnu/11
+	$(CLANG) -pthread -O3 -fno-math-errno  $^ -o $@ $(BENCHLINK) -lm
 
 results.json: ba.o
 	numactl -C 1 ./$^
diff --git a/enzyme/benchmarks/ReverseMode/ba/ba.cpp b/enzyme/benchmarks/ReverseMode/ba/ba.cpp
index 602af73..c9b29ec 100644
--- a/enzyme/benchmarks/ReverseMode/ba/ba.cpp
+++ b/enzyme/benchmarks/ReverseMode/ba/ba.cpp
@@ -115,6 +115,15 @@
     proj[1] = proj[1] * L;
 }
 
+void radial_distort_restrict(double const *__restrict rad_params, double *__restrict proj)
+{
+    double rsq, L;
+    rsq = sqsum(2, proj);
+    L = 1. + rad_params[0] * rsq + rad_params[1] * rsq * rsq;
+    proj[0] = proj[0] * L;
+    proj[1] = proj[1] * L;
+}
+
 void project_restrict(double const *__restrict cam, double const *__restrict X,
                       double *__restrict proj) {
     double const* C = &cam[3];
@@ -129,7 +138,7 @@
     proj[0] = Xcam[0] / Xcam[2];
     proj[1] = Xcam[1] / Xcam[2];
 
-    radial_distort(&cam[9], proj);
+    radial_distort_restrict(&cam[9], proj);
 
     proj[0] = proj[0] * cam[6] + cam[7];
     proj[1] = proj[1] * cam[6] + cam[8];
diff --git a/enzyme/benchmarks/ReverseMode/ba/src/safe.rs b/enzyme/benchmarks/ReverseMode/ba/src/safe.rs
index 3530c79..dd8bf88 100644
--- a/enzyme/benchmarks/ReverseMode/ba/src/safe.rs
+++ b/enzyme/benchmarks/ReverseMode/ba/src/safe.rs
@@ -182,9 +182,9 @@
 
 #[no_mangle]
 extern "C" fn rust2_ba_objective(
-    n: usize,
-    m: usize,
-    p: usize,
+    n: i32,
+    m: i32,
+    p: i32,
     cams: *const f64,
     x: *const f64,
     w: *const f64,
@@ -193,6 +193,9 @@
     reproj_err: *mut f64,
     w_err: *mut f64,
 ) {
+    let n = n as usize;
+    let m = m as usize;
+    let p = p as usize;
     let cams = unsafe { std::slice::from_raw_parts(cams, n * 11) };
     let x = unsafe { std::slice::from_raw_parts(x, m * 3) };
     let w = unsafe { std::slice::from_raw_parts(w, p) };
diff --git a/enzyme/benchmarks/ReverseMode/ba/src/unsafe.rs b/enzyme/benchmarks/ReverseMode/ba/src/unsafe.rs
index 09f74be..467a7cb 100644
--- a/enzyme/benchmarks/ReverseMode/ba/src/unsafe.rs
+++ b/enzyme/benchmarks/ReverseMode/ba/src/unsafe.rs
@@ -110,9 +110,9 @@
 
 #[no_mangle]
 unsafe extern "C" fn rust2_unsafe_ba_objective(
-    n: usize,
-    m: usize,
-    p: usize,
+    n: i32,
+    m: i32,
+    p: i32,
     cams: *const f64,
     x: *const f64,
     w: *const f64,
@@ -121,6 +121,9 @@
     reproj_err: *mut f64,
     w_err: *mut f64,
 ) {
+    let n = n as usize;
+    let m = m as usize;
+    let p = p as usize;
     for i in 0..p {
         let cam_idx = *obs.add(i * 2 + 0) as usize;
         let pt_idx = *obs.add(i * 2 + 1) as usize;
diff --git a/enzyme/benchmarks/ReverseMode/fft/Makefile.make b/enzyme/benchmarks/ReverseMode/fft/Makefile.make
index b9385cd..9ed3daa 100644
--- a/enzyme/benchmarks/ReverseMode/fft/Makefile.make
+++ b/enzyme/benchmarks/ReverseMode/fft/Makefile.make
@@ -4,6 +4,28 @@
 
 dir := $(abspath $(lastword $(MAKEFILE_LIST))/../../../..)
 
+include $(dir)/benchmarks/ReverseMode/adbench/Makefile.config
+
+ifeq ($(strip $(CLANG)),)
+$(error PASSES1 is not set)
+endif
+
+ifeq ($(strip $(PASSES1)),)
+$(error PASSES1 is not set)
+endif
+
+ifeq ($(strip $(PASSES2)),)
+$(error PASSES2 is not set)
+endif
+
+ifeq ($(strip $(PASSES3)),)
+$(error PASSES3 is not set)
+endif
+
+ifneq ($(strip $(PASSES4)),)
+$(error PASSES4 is set)
+endif
+
 clean:
 	rm -f *.ll *.o results.txt results.json
 
@@ -11,17 +33,21 @@
 	RUSTFLAGS="-Z autodiff=Enable" cargo +enzyme rustc --release --lib --crate-type=staticlib
 
 %-unopt.ll: %.cpp
-	clang++ $(BENCH) $(PTR) $^ -pthread -O2 -fno-use-cxa-atexit -fno-vectorize -fno-slp-vectorize -ffast-math -fno-unroll-loops -o $@ -S -emit-llvm
+	$(CLANG) $(BENCH) $^ -DCPP=1 -fno-math-errno -fno-plt -pthread -O3 -fno-vectorize -fno-slp-vectorize -fno-unroll-loops -o $@ -S -emit-llvm #-fno-use-cxa-atexit 
+%-unoptr.ll: %.cpp
+	$(CLANG) $(BENCH) $^ -fno-math-errno -fno-plt -pthread -O3 -fno-vectorize -fno-slp-vectorize -fno-unroll-loops -o $@ -S -emit-llvm #-fno-use-cxa-atexit 
 
-%-raw.ll: %-unopt.ll
-	opt $^ $(LOAD) $(ENZYME) -o $@ -S
 
-%-opt.ll: %-raw.ll
-	opt $^ -o $@ -S
+%-opt.ll: %-unopt.ll
+	$(OPT) $^ $(LOAD) -passes="$(PASSES2),enzyme" -o $@ -S
+%-optr.ll: %-unoptr.ll
+	$(OPT) $^ $(LOAD) -passes="$(PASSES2),enzyme" -o $@ -S
 
 fft.o: fft-opt.ll $(dir)/benchmarks/ReverseMode/fft/target/release/libfft.a
-	clang++ $(BENCH) -pthread -O2 $^ -o $@ $(BENCHLINK) -lpthread -lm -L /usr/lib/gcc/x86_64-linux-gnu/11
-	#clang++ $(LOAD) $(BENCH) fft.cpp -I /usr/include/c++/11 -I/usr/include/x86_64-linux-gnu/c++/11 -O2 -o fft.o -lpthread $(BENCHLINK) -lm -L /usr/lib/gcc/x86_64-linux-gnu/11
+	$(CLANG) -DCPP=1 -pthread -O3 -fno-math-errno -fno-plt  -lpthread -lm $^ -o $@ $(BENCHLINK) -lm
+fftr.o: fft-optr.ll $(dir)/benchmarks/ReverseMode/fft/target/release/libfft.a
+	$(CLANG) -pthread -O3 -fno-math-errno -fno-plt  -lpthread -lm $^ -o $@ $(BENCHLINK) -lm
 
-results.json: fft.o
-	./$^ 1048576 | tee $@
+results.json: fftr.o fft.o
+	numactl -C 1 ./fft.o 1048576 | tee results.json
+	numactl -C 1 ./fftr.o 1048576 | tee resultsr.json
diff --git a/enzyme/benchmarks/ReverseMode/gmm/Makefile.make b/enzyme/benchmarks/ReverseMode/gmm/Makefile.make
index 17e22dd..f5f6de4 100644
--- a/enzyme/benchmarks/ReverseMode/gmm/Makefile.make
+++ b/enzyme/benchmarks/ReverseMode/gmm/Makefile.make
@@ -1,28 +1,46 @@
-# RUN: cd %S && LD_LIBRARY_PATH="%bldpath:$LD_LIBRARY_PATH" PTR="%ptr" BENCH="%bench" BENCHLINK="%blink" LOAD="%loadEnzyme" LOADCLANG="%loadClangEnzyme" ENZYME="%enzyme" make -B gmm-raw.ll results.json -f %s
+# RUN: cd %S && LD_LIBRARY_PATH="%bldpath:$LD_LIBRARY_PATH" BENCH="%bench" BENCHLINK="%blink" LOAD="%loadEnzyme" LOADCLANG="%loadClangEnzyme" ENZYME="%enzyme" make -B gmm-raw.ll results.json -f %s
 
 .PHONY: clean
 
 dir := $(abspath $(lastword $(MAKEFILE_LIST))/../../../..)
 
+include $(dir)/benchmarks/ReverseMode/adbench/Makefile.config
+
+ifeq ($(strip $(CLANG)),)
+$(error PASSES1 is not set)
+endif
+
+ifeq ($(strip $(PASSES1)),)
+$(error PASSES1 is not set)
+endif
+
+ifeq ($(strip $(PASSES2)),)
+$(error PASSES2 is not set)
+endif
+
+ifeq ($(strip $(PASSES3)),)
+$(error PASSES3 is not set)
+endif
+
+ifneq ($(strip $(PASSES4)),)
+$(error PASSES4 is set)
+endif
+
 clean:
 	rm -f *.ll *.o results.txt results.json
 	cargo +enzyme clean
 
 $(dir)/benchmarks/ReverseMode/gmm/target/release/libgmmrs.a: src/lib.rs Cargo.toml
-	RUSTFLAGS="-Z autodiff=Enable,LooseTypes" cargo +enzyme rustc --release --lib --crate-type=staticlib --features=libm
+	RUSTFLAGS="-Z autodiff=Enable,PrintPasses,LooseTypes" cargo +enzyme rustc --release --lib --crate-type=staticlib
 
 %-unopt.ll: %.cpp
-	clang++ $(BENCH) $(PTR) $^ -pthread -O2 -fno-vectorize -fno-slp-vectorize -ffast-math -fno-unroll-loops -o $@ -S -emit-llvm
+	$(CLANG) $(BENCH) $^ -pthread -O3 -fno-vectorize -fno-slp-vectorize -fno-unroll-loops -o $@ -S -emit-llvm
 
-%-raw.ll: %-unopt.ll
-	opt $^ $(LOAD) $(ENZYME) -o $@ -S
-
-%-opt.ll: %-raw.ll
-	opt $^ -o $@ -S
+%-opt.ll: %-unopt.ll
+	$(OPT) $^ $(LOAD) -passes="$(PASSES2),enzyme" -o $@ -S
 
 gmm.o: gmm-opt.ll $(dir)/benchmarks/ReverseMode/gmm/target/release/libgmmrs.a
-	clang++ -pthread -O2 $^ -o $@ $(BENCHLINK) -lm
-	#clang++ $(LOADCLANG) $(BENCH) gmm.cpp -I /usr/include/c++/11 -I/usr/include/x86_64-linux-gnu/c++/11 -O2 -o gmm.o -lpthread $(BENCHLINK) -lm -L /usr/lib/gcc/x86_64-linux-gnu/11
+	$(CLANG) -pthread -O3 -fno-math-errno  $^ -o $@ $(BENCHLINK) -lm
 
 results.json: gmm.o
 	numactl -C 1 ./$^
diff --git a/enzyme/benchmarks/ReverseMode/lstm/Makefile.make b/enzyme/benchmarks/ReverseMode/lstm/Makefile.make
index 1388a54..71c6f5b 100644
--- a/enzyme/benchmarks/ReverseMode/lstm/Makefile.make
+++ b/enzyme/benchmarks/ReverseMode/lstm/Makefile.make
@@ -4,24 +4,44 @@
 
 dir := $(abspath $(lastword $(MAKEFILE_LIST))/../../../..)
 
+include $(dir)/benchmarks/ReverseMode/adbench/Makefile.config
+
+ifeq ($(strip $(CLANG)),)
+$(error PASSES1 is not set)
+endif
+
+ifeq ($(strip $(PASSES1)),)
+$(error PASSES1 is not set)
+endif
+
+ifeq ($(strip $(PASSES2)),)
+$(error PASSES2 is not set)
+endif
+
+ifeq ($(strip $(PASSES3)),)
+$(error PASSES3 is not set)
+endif
+
+ifneq ($(strip $(PASSES4)),)
+$(error PASSES4 is set)
+endif
+
 clean:
 	rm -f *.ll *.o results.txt results.json
 	cargo +enzyme clean
 
 $(dir)/benchmarks/ReverseMode/lstm/target/release/liblstm.a: src/lib.rs Cargo.toml
-	RUSTFLAGS="-Z autodiff=Enable,LooseTypes" cargo +enzyme rustc --release --lib --crate-type=staticlib
+	RUSTFLAGS="-Z autodiff=Enable,PrintPasses" cargo +enzyme rustc --release --lib --crate-type=staticlib
 
 %-unopt.ll: %.cpp
-	clang++ $(BENCH) $(PTR) $^ -pthread -O2 -fno-vectorize -fno-slp-vectorize -ffast-math -fno-unroll-loops -o $@ -S -emit-llvm
+	$(CLANG) $(BENCH) $^ -pthread -O3 -fno-vectorize -fno-slp-vectorize -fno-unroll-loops -o $@ -S -emit-llvm
 
-%-raw.ll: %-unopt.ll
-	opt $^ $(LOAD) $(ENZYME) -o $@ -S
-
-%-opt.ll: %-raw.ll
-	opt $^ -o $@ -S
+%-opt.ll: %-unopt.ll
+	$(OPT) $^ $(LOAD) -passes="$(PASSES2),enzyme" -o $@ -S
 
 lstm.o: lstm-opt.ll $(dir)/benchmarks/ReverseMode/lstm/target/release/liblstm.a
-	clang++ -pthread -O2 $^ -o $@ $(BENCHLINK) -lm
+	$(CLANG) -pthread -O3 $^ -o $@ $(BENCHLINK) -lm
+	#$(CLANG) -pthread -O3 -fno-math-errno $^ -o $@ $(BENCHLINK) -lm
 
 results.json: lstm.o
 	numactl -C 1 ./$^
diff --git a/enzyme/benchmarks/ReverseMode/lstm/src/safe.rs b/enzyme/benchmarks/ReverseMode/lstm/src/safe.rs
index d6847a4..3329ebb 100644
--- a/enzyme/benchmarks/ReverseMode/lstm/src/safe.rs
+++ b/enzyme/benchmarks/ReverseMode/lstm/src/safe.rs
@@ -1,5 +1,6 @@
 use std::slice;
 use std::autodiff::autodiff;
+use std::hint::assert_unchecked;
 
 // Sigmoid on scalar
 fn sigmoid(x: f64) -> f64 {
@@ -32,11 +33,11 @@
     let (a, b) = gates.split_at_mut(2 * hsize);
     let ((forget, ingate), (outgate, change)) = (a.split_at_mut(hsize), b.split_at_mut(hsize));
 
-    //debug_assert_eq!(weight.len(), 4 * hsize);
-    //debug_assert_eq!(bias.len(), 4 * hsize);
-    //debug_assert_eq!(hidden.len(), hsize);
-    //debug_assert!(cell.len() >= hsize);
-    //debug_assert!(input.len() >= hsize);
+    // unsafe {assert_unchecked(weight.len()== 4 * hsize)};
+    // unsafe {assert_unchecked(bias.len()== 4 * hsize)};
+    // unsafe {assert_unchecked(hidden.len()== hsize)};
+    // unsafe {assert_unchecked(cell.len() >= hsize)};
+    // unsafe {assert_unchecked(input.len() >= hsize)};
     // caching input
     for i in 0..hsize {
         forget[i] = sigmoid(input[i] * weight[i] + bias[i]);
@@ -131,7 +132,7 @@
     let mut ypred = vec![0.0; b];
     let mut ynorm = vec![0.0; b];
 
-    //debug_assert!(b > 0);
+    // unsafe{assert_unchecked(b > 0)};
 
     let limit = (c - 1) * b;
     for j in 0..(c - 1) {
@@ -156,15 +157,18 @@
 
 #[no_mangle]
 pub extern "C" fn rust_lstm_objective(
-    l: usize,
-    c: usize,
-    b: usize,
+    l: i32,
+    c: i32,
+    b: i32,
     main_params: *const f64,
     extra_params: *const f64,
     state: *mut f64,
     sequence: *const f64,
     loss: *mut f64,
 ) {
+    let l = l as usize;
+    let c = c as usize;
+    let b = b as usize;
     let (main_params, extra_params, state, sequence) = unsafe {
         (
             slice::from_raw_parts(main_params, 2 * l * 4 * b),
@@ -190,9 +194,9 @@
 
 #[no_mangle]
 pub extern "C" fn rust_dlstm_objective(
-    l: usize,
-    c: usize,
-    b: usize,
+    l: i32,
+    c: i32,
+    b: i32,
     main_params: *const f64,
     d_main_params: *mut f64,
     extra_params: *const f64,
@@ -202,6 +206,9 @@
     res: *mut f64,
     d_res: *mut f64,
 ) {
+    let l = l as usize;
+    let c = c as usize;
+    let b = b as usize;
     let (main_params, d_main_params, extra_params, d_extra_params, state, sequence) = unsafe {
         (
             slice::from_raw_parts(main_params, 2 * l * 4 * b),
diff --git a/enzyme/benchmarks/ReverseMode/ode-real/Makefile.make b/enzyme/benchmarks/ReverseMode/ode-real/Makefile.make
index 87af95f..582ba79 100644
--- a/enzyme/benchmarks/ReverseMode/ode-real/Makefile.make
+++ b/enzyme/benchmarks/ReverseMode/ode-real/Makefile.make
@@ -4,6 +4,28 @@
 
 dir := $(abspath $(lastword $(MAKEFILE_LIST))/../../../..)
 
+include $(dir)/benchmarks/ReverseMode/adbench/Makefile.config
+
+ifeq ($(strip $(CLANG)),)
+$(error PASSES1 is not set)
+endif
+
+ifeq ($(strip $(PASSES1)),)
+$(error PASSES1 is not set)
+endif
+
+ifeq ($(strip $(PASSES2)),)
+$(error PASSES2 is not set)
+endif
+
+ifeq ($(strip $(PASSES3)),)
+$(error PASSES3 is not set)
+endif
+
+ifneq ($(strip $(PASSES4)),)
+$(error PASSES4 is set)
+endif
+
 clean:
 	rm -f *.ll *.o results.txt results.json
 	cargo +enzyme clean
@@ -12,16 +34,13 @@
 	RUSTFLAGS="-Z autodiff=Enable,LooseTypes" cargo +enzyme rustc --release --lib --crate-type=staticlib
 
 %-unopt.ll: %.cpp
-	clang++ $(BENCH) $(PTR) $^ -O2 -fno-use-cxa-atexit -fno-vectorize -fno-slp-vectorize -ffast-math -fno-unroll-loops -o $@ -S -emit-llvm
+	$(CLANG) $(BENCH) $^ -pthread -O3 -fno-use-cxa-atexit -fno-vectorize -fno-slp-vectorize -fno-unroll-loops -o $@ -S -emit-llvm
 
-%-raw.ll: %-unopt.ll
-	opt $^ $(LOAD) $(ENZYME) -o $@ -S
-
-%-opt.ll: %-raw.ll
-	opt $^ -o $@ -S
+%-opt.ll: %-unopt.ll
+	$(OPT) $^ $(LOAD) -passes="$(PASSES2),enzyme" -o $@ -S
 
 ode.o: ode-opt.ll $(dir)/benchmarks/ReverseMode/ode-real/target/release/libode.a
-	clang++ $(BENCH) -O2 $^ -o $@ $(BENCHLINK)
+	$(CLANG) -pthread -O3 -fno-math-errno  $^ -o $@ $(BENCHLINK)
 
 results.json: ode.o
 	numactl -C 1 ./$^ 1000 | tee $@