flang/runtime/reduction-templates.h - rust-lang/llvm-project - Git at Google

 //===-- runtime/reduction-templates.h -------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//

 // Generic function templates used by various reduction transformation
 // intrinsic functions (SUM, PRODUCT, &c.)
 //
 // * Partial reductions (i.e., those with DIM= arguments that are not
 //   required to be 1 by the rank of the argument) return arrays that
 //   are dynamically allocated in a caller-supplied descriptor.
 // * Total reductions (i.e., no DIM= argument) with FINDLOC, MAXLOC, & MINLOC
 //   return integer vectors of some kind, not scalars; a caller-supplied
 //   descriptor is used
 // * Character-valued reductions (MAXVAL & MINVAL) return arbitrary
 //   length results, dynamically allocated in a caller-supplied descriptor

 #ifndef FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_
 #define FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_

 #include "terminator.h"
 #include "tools.h"
 #include "flang/Runtime/cpp-type.h"
 #include "flang/Runtime/descriptor.h"

 namespace Fortran::runtime {

 // Reductions are implemented with *accumulators*, which are instances of
 // classes that incrementally build up the result (or an element thereof) during
 // a traversal of the unmasked elements of an array.  Each accumulator class
 // supports a constructor (which captures a reference to the array), an
 // AccumulateAt() member function that applies supplied subscripts to the
 // array and does something with a scalar element, and a GetResult()
 // member function that copies a final result into its destination.

 // Total reduction of the array argument to a scalar (or to a vector in the
 // cases of FINDLOC, MAXLOC, & MINLOC).  These are the cases without DIM= or
 // cases where the argument has rank 1 and DIM=, if present, must be 1.
 template <typename TYPE, typename ACCUMULATOR>
 inline void DoTotalReduction(const Descriptor &x, int dim,
     const Descriptor *mask, ACCUMULATOR &accumulator, const char *intrinsic,
     Terminator &terminator) {
   if (dim < 0 || dim > 1) {
     terminator.Crash("%s: bad DIM=%d for ARRAY argument with rank %d",
         intrinsic, dim, x.rank());
   }
   SubscriptValue xAt[maxRank];
   x.GetLowerBounds(xAt);
   if (mask) {
     CheckConformability(x, *mask, terminator, intrinsic, "ARRAY", "MASK");
     SubscriptValue maskAt[maxRank];
     mask->GetLowerBounds(maskAt);
     if (mask->rank() > 0) {
       for (auto elements{x.Elements()}; elements--;
            x.IncrementSubscripts(xAt), mask->IncrementSubscripts(maskAt)) {
         if (IsLogicalElementTrue(*mask, maskAt)) {
           accumulator.template AccumulateAt<TYPE>(xAt);
         }
       }
       return;
     } else if (!IsLogicalElementTrue(*mask, maskAt)) {
       // scalar MASK=.FALSE.: return identity value
       return;
     }
   }
   // No MASK=, or scalar MASK=.TRUE.
   for (auto elements{x.Elements()}; elements--; x.IncrementSubscripts(xAt)) {
     if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
       break; // cut short, result is known
     }
   }
 }

 template <TypeCategory CAT, int KIND, typename ACCUMULATOR>
 inline CppTypeFor<CAT, KIND> GetTotalReduction(const Descriptor &x,
     const char *source, int line, int dim, const Descriptor *mask,
     ACCUMULATOR &&accumulator, const char *intrinsic) {
   Terminator terminator{source, line};
   RUNTIME_CHECK(terminator, TypeCode(CAT, KIND) == x.type());
   using CppType = CppTypeFor<CAT, KIND>;
   DoTotalReduction<CppType>(x, dim, mask, accumulator, intrinsic, terminator);
   CppType result;
 #ifdef _MSC_VER // work around MSVC spurious error
   accumulator.GetResult(&result);
 #else
   accumulator.template GetResult(&result);
 #endif
   return result;
 }

 // For reductions on a dimension, e.g. SUM(array,DIM=2) where the shape
 // of the array is [2,3,5], the shape of the result is [2,5] and
 // result(j,k) = SUM(array(j,:,k)), possibly modified if the array has
 // lower bounds other than one.  This utility subroutine creates an
 // array of subscripts [j,_,k] for result subscripts [j,k] so that the
 // elemets of array(j,:,k) can be reduced.
 inline void GetExpandedSubscripts(SubscriptValue at[],
     const Descriptor &descriptor, int zeroBasedDim,
     const SubscriptValue from[]) {
   descriptor.GetLowerBounds(at);
   int rank{descriptor.rank()};
   int j{0};
   for (; j < zeroBasedDim; ++j) {
     at[j] += from[j] - 1 /*lower bound*/;
   }
   for (++j; j < rank; ++j) {
     at[j] += from[j - 1] - 1;
   }
 }

 template <typename TYPE, typename ACCUMULATOR>
 inline void ReduceDimToScalar(const Descriptor &x, int zeroBasedDim,
     SubscriptValue subscripts[], TYPE *result, ACCUMULATOR &accumulator) {
   SubscriptValue xAt[maxRank];
   GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts);
   const auto &dim{x.GetDimension(zeroBasedDim)};
   SubscriptValue at{dim.LowerBound()};
   for (auto n{dim.Extent()}; n-- > 0; ++at) {
     xAt[zeroBasedDim] = at;
     if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
       break;
     }
   }
 #ifdef _MSC_VER // work around MSVC spurious error
   accumulator.GetResult(result, zeroBasedDim);
 #else
   accumulator.template GetResult(result, zeroBasedDim);
 #endif
 }

 template <typename TYPE, typename ACCUMULATOR>
 inline void ReduceDimMaskToScalar(const Descriptor &x, int zeroBasedDim,
     SubscriptValue subscripts[], const Descriptor &mask, TYPE *result,
     ACCUMULATOR &accumulator) {
   SubscriptValue xAt[maxRank], maskAt[maxRank];
   GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts);
   GetExpandedSubscripts(maskAt, mask, zeroBasedDim, subscripts);
   const auto &xDim{x.GetDimension(zeroBasedDim)};
   SubscriptValue xPos{xDim.LowerBound()};
   const auto &maskDim{mask.GetDimension(zeroBasedDim)};
   SubscriptValue maskPos{maskDim.LowerBound()};
   for (auto n{x.GetDimension(zeroBasedDim).Extent()}; n-- > 0;
        ++xPos, ++maskPos) {
     maskAt[zeroBasedDim] = maskPos;
     if (IsLogicalElementTrue(mask, maskAt)) {
       xAt[zeroBasedDim] = xPos;
       if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
         break;
       }
     }
   }
 #ifdef _MSC_VER // work around MSVC spurious error
   accumulator.GetResult(result, zeroBasedDim);
 #else
   accumulator.template GetResult(result, zeroBasedDim);
 #endif
 }

 // Utility: establishes & allocates the result array for a partial
 // reduction (i.e., one with DIM=).
 static void CreatePartialReductionResult(Descriptor &result,
     const Descriptor &x, int dim, Terminator &terminator, const char *intrinsic,
     TypeCode typeCode) {
   int xRank{x.rank()};
   if (dim < 1 || dim > xRank) {
     terminator.Crash(
         "%s: bad DIM=%d for ARRAY with rank %d", intrinsic, dim, xRank);
   }
   int zeroBasedDim{dim - 1};
   SubscriptValue resultExtent[maxRank];
   for (int j{0}; j < zeroBasedDim; ++j) {
     resultExtent[j] = x.GetDimension(j).Extent();
   }
   for (int j{zeroBasedDim + 1}; j < xRank; ++j) {
     resultExtent[j - 1] = x.GetDimension(j).Extent();
   }
   result.Establish(typeCode, x.ElementBytes(), nullptr, xRank - 1, resultExtent,
       CFI_attribute_allocatable);
   for (int j{0}; j + 1 < xRank; ++j) {
     result.GetDimension(j).SetBounds(1, resultExtent[j]);
   }
   if (int stat{result.Allocate()}) {
     terminator.Crash(
         "%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
   }
 }

 // Partial reductions with DIM=

 template <typename ACCUMULATOR, TypeCategory CAT, int KIND>
 inline void PartialReduction(Descriptor &result, const Descriptor &x, int dim,
     const Descriptor *mask, Terminator &terminator, const char *intrinsic,
     ACCUMULATOR &accumulator) {
   CreatePartialReductionResult(
       result, x, dim, terminator, intrinsic, TypeCode{CAT, KIND});
   SubscriptValue at[maxRank];
   result.GetLowerBounds(at);
   INTERNAL_CHECK(result.rank() == 0 || at[0] == 1);
   using CppType = CppTypeFor<CAT, KIND>;
   if (mask) {
     CheckConformability(x, *mask, terminator, intrinsic, "ARRAY", "MASK");
     SubscriptValue maskAt[maxRank]; // contents unused
     if (mask->rank() > 0) {
       for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
         accumulator.Reinitialize();
         ReduceDimMaskToScalar<CppType, ACCUMULATOR>(
             x, dim - 1, at, *mask, result.Element<CppType>(at), accumulator);
       }
       return;
     } else if (!IsLogicalElementTrue(*mask, maskAt)) {
       // scalar MASK=.FALSE.
       accumulator.Reinitialize();
       for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
         accumulator.GetResult(result.Element<CppType>(at));
       }
       return;
     }
   }
   // No MASK= or scalar MASK=.TRUE.
   for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
     accumulator.Reinitialize();
     ReduceDimToScalar<CppType, ACCUMULATOR>(
         x, dim - 1, at, result.Element<CppType>(at), accumulator);
   }
 }

 template <template <typename> class ACCUM>
 struct PartialIntegerReductionHelper {
   template <int KIND> struct Functor {
     static constexpr int Intermediate{
         std::max(KIND, 4)}; // use at least "int" for intermediate results
     void operator()(Descriptor &result, const Descriptor &x, int dim,
         const Descriptor *mask, Terminator &terminator,
         const char *intrinsic) const {
       using Accumulator =
           ACCUM<CppTypeFor<TypeCategory::Integer, Intermediate>>;
       Accumulator accumulator{x};
       PartialReduction<Accumulator, TypeCategory::Integer, KIND>(
           result, x, dim, mask, terminator, intrinsic, accumulator);
     }
   };
 };

 template <template <typename> class INTEGER_ACCUM>
 inline void PartialIntegerReduction(Descriptor &result, const Descriptor &x,
     int dim, int kind, const Descriptor *mask, const char *intrinsic,
     Terminator &terminator) {
   ApplyIntegerKind<
       PartialIntegerReductionHelper<INTEGER_ACCUM>::template Functor, void>(
       kind, terminator, result, x, dim, mask, terminator, intrinsic);
 }

 template <TypeCategory CAT, template <typename> class ACCUM>
 struct PartialFloatingReductionHelper {
   template <int KIND> struct Functor {
     static constexpr int Intermediate{
         std::max(KIND, 8)}; // use at least "double" for intermediate results
     void operator()(Descriptor &result, const Descriptor &x, int dim,
         const Descriptor *mask, Terminator &terminator,
         const char *intrinsic) const {
       using Accumulator = ACCUM<CppTypeFor<TypeCategory::Real, Intermediate>>;
       Accumulator accumulator{x};
       PartialReduction<Accumulator, CAT, KIND>(
           result, x, dim, mask, terminator, intrinsic, accumulator);
     }
   };
 };

 template <template <typename> class INTEGER_ACCUM,
     template <typename> class REAL_ACCUM,
     template <typename> class COMPLEX_ACCUM>
 inline void TypedPartialNumericReduction(Descriptor &result,
     const Descriptor &x, int dim, const char *source, int line,
     const Descriptor *mask, const char *intrinsic) {
   Terminator terminator{source, line};
   auto catKind{x.type().GetCategoryAndKind()};
   RUNTIME_CHECK(terminator, catKind.has_value());
   switch (catKind->first) {
   case TypeCategory::Integer:
     PartialIntegerReduction<INTEGER_ACCUM>(
         result, x, dim, catKind->second, mask, intrinsic, terminator);
     break;
   case TypeCategory::Real:
     ApplyFloatingPointKind<PartialFloatingReductionHelper<TypeCategory::Real,
                                REAL_ACCUM>::template Functor,
         void>(catKind->second, terminator, result, x, dim, mask, terminator,
         intrinsic);
     break;
   case TypeCategory::Complex:
     ApplyFloatingPointKind<PartialFloatingReductionHelper<TypeCategory::Complex,
                                COMPLEX_ACCUM>::template Functor,
         void>(catKind->second, terminator, result, x, dim, mask, terminator,
         intrinsic);
     break;
   default:
     terminator.Crash("%s: bad type code %d", intrinsic, x.type().raw());
   }
 }

 template <typename ACCUMULATOR> struct LocationResultHelper {
   template <int KIND> struct Functor {
     void operator()(ACCUMULATOR &accumulator, const Descriptor &result) const {
       accumulator.GetResult(
           result.OffsetElement<CppTypeFor<TypeCategory::Integer, KIND>>());
     }
   };
 };

 template <typename ACCUMULATOR> struct PartialLocationHelper {
   template <int KIND> struct Functor {
     void operator()(Descriptor &result, const Descriptor &x, int dim,
         const Descriptor *mask, Terminator &terminator, const char *intrinsic,
         ACCUMULATOR &accumulator) const {
       PartialReduction<ACCUMULATOR, TypeCategory::Integer, KIND>(
           result, x, dim, mask, terminator, intrinsic, accumulator);
     }
   };
 };

 } // namespace Fortran::runtime
 #endif // FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_
	//===-- runtime/reduction-templates.h -------------------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	// Generic function templates used by various reduction transformation
	// intrinsic functions (SUM, PRODUCT, &c.)
	//
	// * Partial reductions (i.e., those with DIM= arguments that are not
	// required to be 1 by the rank of the argument) return arrays that
	// are dynamically allocated in a caller-supplied descriptor.
	// * Total reductions (i.e., no DIM= argument) with FINDLOC, MAXLOC, & MINLOC
	// return integer vectors of some kind, not scalars; a caller-supplied
	// descriptor is used
	// * Character-valued reductions (MAXVAL & MINVAL) return arbitrary
	// length results, dynamically allocated in a caller-supplied descriptor

	#ifndef FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_
	#define FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_

	#include "terminator.h"
	#include "tools.h"
	#include "flang/Runtime/cpp-type.h"
	#include "flang/Runtime/descriptor.h"

	namespace Fortran::runtime {

	// Reductions are implemented with accumulators, which are instances of
	// classes that incrementally build up the result (or an element thereof) during
	// a traversal of the unmasked elements of an array. Each accumulator class
	// supports a constructor (which captures a reference to the array), an
	// AccumulateAt() member function that applies supplied subscripts to the
	// array and does something with a scalar element, and a GetResult()
	// member function that copies a final result into its destination.

	// Total reduction of the array argument to a scalar (or to a vector in the
	// cases of FINDLOC, MAXLOC, & MINLOC). These are the cases without DIM= or
	// cases where the argument has rank 1 and DIM=, if present, must be 1.
	template <typename TYPE, typename ACCUMULATOR>
	inline void DoTotalReduction(const Descriptor &x, int dim,
	const Descriptor mask, ACCUMULATOR &accumulator, const char intrinsic,
	Terminator &terminator) {
	if (dim < 0 \|\| dim > 1) {
	terminator.Crash("%s: bad DIM=%d for ARRAY argument with rank %d",
	intrinsic, dim, x.rank());
	}
	SubscriptValue xAt[maxRank];
	x.GetLowerBounds(xAt);
	if (mask) {
	CheckConformability(x, *mask, terminator, intrinsic, "ARRAY", "MASK");
	SubscriptValue maskAt[maxRank];
	mask->GetLowerBounds(maskAt);
	if (mask->rank() > 0) {
	for (auto elements{x.Elements()}; elements--;
	x.IncrementSubscripts(xAt), mask->IncrementSubscripts(maskAt)) {
	if (IsLogicalElementTrue(*mask, maskAt)) {
	accumulator.template AccumulateAt<TYPE>(xAt);
	}
	}
	return;
	} else if (!IsLogicalElementTrue(*mask, maskAt)) {
	// scalar MASK=.FALSE.: return identity value
	return;
	}
	}
	// No MASK=, or scalar MASK=.TRUE.
	for (auto elements{x.Elements()}; elements--; x.IncrementSubscripts(xAt)) {
	if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
	break; // cut short, result is known
	}
	}
	}

	template <TypeCategory CAT, int KIND, typename ACCUMULATOR>
	inline CppTypeFor<CAT, KIND> GetTotalReduction(const Descriptor &x,
	const char source, int line, int dim, const Descriptor mask,
	ACCUMULATOR &&accumulator, const char *intrinsic) {
	Terminator terminator{source, line};
	RUNTIME_CHECK(terminator, TypeCode(CAT, KIND) == x.type());
	using CppType = CppTypeFor<CAT, KIND>;
	DoTotalReduction<CppType>(x, dim, mask, accumulator, intrinsic, terminator);
	CppType result;
	#ifdef _MSC_VER // work around MSVC spurious error
	accumulator.GetResult(&result);
	#else
	accumulator.template GetResult(&result);
	#endif
	return result;
	}

	// For reductions on a dimension, e.g. SUM(array,DIM=2) where the shape
	// of the array is [2,3,5], the shape of the result is [2,5] and
	// result(j,k) = SUM(array(j,:,k)), possibly modified if the array has
	// lower bounds other than one. This utility subroutine creates an
	// array of subscripts [j,_,k] for result subscripts [j,k] so that the
	// elemets of array(j,:,k) can be reduced.
	inline void GetExpandedSubscripts(SubscriptValue at[],
	const Descriptor &descriptor, int zeroBasedDim,
	const SubscriptValue from[]) {
	descriptor.GetLowerBounds(at);
	int rank{descriptor.rank()};
	int j{0};
	for (; j < zeroBasedDim; ++j) {
	at[j] += from[j] - 1 /lower bound/;
	}
	for (++j; j < rank; ++j) {
	at[j] += from[j - 1] - 1;
	}
	}

	template <typename TYPE, typename ACCUMULATOR>
	inline void ReduceDimToScalar(const Descriptor &x, int zeroBasedDim,
	SubscriptValue subscripts[], TYPE *result, ACCUMULATOR &accumulator) {
	SubscriptValue xAt[maxRank];
	GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts);
	const auto &dim{x.GetDimension(zeroBasedDim)};
	SubscriptValue at{dim.LowerBound()};
	for (auto n{dim.Extent()}; n-- > 0; ++at) {
	xAt[zeroBasedDim] = at;
	if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
	break;
	}
	}
	#ifdef _MSC_VER // work around MSVC spurious error
	accumulator.GetResult(result, zeroBasedDim);
	#else
	accumulator.template GetResult(result, zeroBasedDim);
	#endif
	}

	template <typename TYPE, typename ACCUMULATOR>
	inline void ReduceDimMaskToScalar(const Descriptor &x, int zeroBasedDim,
	SubscriptValue subscripts[], const Descriptor &mask, TYPE *result,
	ACCUMULATOR &accumulator) {
	SubscriptValue xAt[maxRank], maskAt[maxRank];
	GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts);
	GetExpandedSubscripts(maskAt, mask, zeroBasedDim, subscripts);
	const auto &xDim{x.GetDimension(zeroBasedDim)};
	SubscriptValue xPos{xDim.LowerBound()};
	const auto &maskDim{mask.GetDimension(zeroBasedDim)};
	SubscriptValue maskPos{maskDim.LowerBound()};
	for (auto n{x.GetDimension(zeroBasedDim).Extent()}; n-- > 0;
	++xPos, ++maskPos) {
	maskAt[zeroBasedDim] = maskPos;
	if (IsLogicalElementTrue(mask, maskAt)) {
	xAt[zeroBasedDim] = xPos;
	if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
	break;
	}
	}
	}
	#ifdef _MSC_VER // work around MSVC spurious error
	accumulator.GetResult(result, zeroBasedDim);
	#else
	accumulator.template GetResult(result, zeroBasedDim);
	#endif
	}

	// Utility: establishes & allocates the result array for a partial
	// reduction (i.e., one with DIM=).
	static void CreatePartialReductionResult(Descriptor &result,
	const Descriptor &x, int dim, Terminator &terminator, const char *intrinsic,
	TypeCode typeCode) {
	int xRank{x.rank()};
	if (dim < 1 \|\| dim > xRank) {
	terminator.Crash(
	"%s: bad DIM=%d for ARRAY with rank %d", intrinsic, dim, xRank);
	}
	int zeroBasedDim{dim - 1};
	SubscriptValue resultExtent[maxRank];
	for (int j{0}; j < zeroBasedDim; ++j) {
	resultExtent[j] = x.GetDimension(j).Extent();
	}
	for (int j{zeroBasedDim + 1}; j < xRank; ++j) {
	resultExtent[j - 1] = x.GetDimension(j).Extent();
	}
	result.Establish(typeCode, x.ElementBytes(), nullptr, xRank - 1, resultExtent,
	CFI_attribute_allocatable);
	for (int j{0}; j + 1 < xRank; ++j) {
	result.GetDimension(j).SetBounds(1, resultExtent[j]);
	}
	if (int stat{result.Allocate()}) {
	terminator.Crash(
	"%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
	}
	}

	// Partial reductions with DIM=

	template <typename ACCUMULATOR, TypeCategory CAT, int KIND>
	inline void PartialReduction(Descriptor &result, const Descriptor &x, int dim,
	const Descriptor mask, Terminator &terminator, const char intrinsic,
	ACCUMULATOR &accumulator) {
	CreatePartialReductionResult(
	result, x, dim, terminator, intrinsic, TypeCode{CAT, KIND});
	SubscriptValue at[maxRank];
	result.GetLowerBounds(at);
	INTERNAL_CHECK(result.rank() == 0 \|\| at[0] == 1);
	using CppType = CppTypeFor<CAT, KIND>;
	if (mask) {
	CheckConformability(x, *mask, terminator, intrinsic, "ARRAY", "MASK");
	SubscriptValue maskAt[maxRank]; // contents unused
	if (mask->rank() > 0) {
	for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
	accumulator.Reinitialize();
	ReduceDimMaskToScalar<CppType, ACCUMULATOR>(
	x, dim - 1, at, *mask, result.Element<CppType>(at), accumulator);
	}
	return;
	} else if (!IsLogicalElementTrue(*mask, maskAt)) {
	// scalar MASK=.FALSE.
	accumulator.Reinitialize();
	for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
	accumulator.GetResult(result.Element<CppType>(at));
	}
	return;
	}
	}
	// No MASK= or scalar MASK=.TRUE.
	for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
	accumulator.Reinitialize();
	ReduceDimToScalar<CppType, ACCUMULATOR>(
	x, dim - 1, at, result.Element<CppType>(at), accumulator);
	}
	}

	template <template <typename> class ACCUM>
	struct PartialIntegerReductionHelper {
	template <int KIND> struct Functor {
	static constexpr int Intermediate{
	std::max(KIND, 4)}; // use at least "int" for intermediate results
	void operator()(Descriptor &result, const Descriptor &x, int dim,
	const Descriptor *mask, Terminator &terminator,
	const char *intrinsic) const {
	using Accumulator =
	ACCUM<CppTypeFor<TypeCategory::Integer, Intermediate>>;
	Accumulator accumulator{x};
	PartialReduction<Accumulator, TypeCategory::Integer, KIND>(
	result, x, dim, mask, terminator, intrinsic, accumulator);
	}
	};
	};

	template <template <typename> class INTEGER_ACCUM>
	inline void PartialIntegerReduction(Descriptor &result, const Descriptor &x,
	int dim, int kind, const Descriptor mask, const char intrinsic,
	Terminator &terminator) {
	ApplyIntegerKind<
	PartialIntegerReductionHelper<INTEGER_ACCUM>::template Functor, void>(
	kind, terminator, result, x, dim, mask, terminator, intrinsic);
	}

	template <TypeCategory CAT, template <typename> class ACCUM>
	struct PartialFloatingReductionHelper {
	template <int KIND> struct Functor {
	static constexpr int Intermediate{
	std::max(KIND, 8)}; // use at least "double" for intermediate results
	void operator()(Descriptor &result, const Descriptor &x, int dim,
	const Descriptor *mask, Terminator &terminator,
	const char *intrinsic) const {
	using Accumulator = ACCUM<CppTypeFor<TypeCategory::Real, Intermediate>>;
	Accumulator accumulator{x};
	PartialReduction<Accumulator, CAT, KIND>(
	result, x, dim, mask, terminator, intrinsic, accumulator);
	}
	};
	};

	template <template <typename> class INTEGER_ACCUM,
	template <typename> class REAL_ACCUM,
	template <typename> class COMPLEX_ACCUM>
	inline void TypedPartialNumericReduction(Descriptor &result,
	const Descriptor &x, int dim, const char *source, int line,
	const Descriptor mask, const char intrinsic) {
	Terminator terminator{source, line};
	auto catKind{x.type().GetCategoryAndKind()};
	RUNTIME_CHECK(terminator, catKind.has_value());
	switch (catKind->first) {
	case TypeCategory::Integer:
	PartialIntegerReduction<INTEGER_ACCUM>(
	result, x, dim, catKind->second, mask, intrinsic, terminator);
	break;
	case TypeCategory::Real:
	ApplyFloatingPointKind<PartialFloatingReductionHelper<TypeCategory::Real,
	REAL_ACCUM>::template Functor,
	void>(catKind->second, terminator, result, x, dim, mask, terminator,
	intrinsic);
	break;
	case TypeCategory::Complex:
	ApplyFloatingPointKind<PartialFloatingReductionHelper<TypeCategory::Complex,
	COMPLEX_ACCUM>::template Functor,
	void>(catKind->second, terminator, result, x, dim, mask, terminator,
	intrinsic);
	break;
	default:
	terminator.Crash("%s: bad type code %d", intrinsic, x.type().raw());
	}
	}

	template <typename ACCUMULATOR> struct LocationResultHelper {
	template <int KIND> struct Functor {
	void operator()(ACCUMULATOR &accumulator, const Descriptor &result) const {
	accumulator.GetResult(
	result.OffsetElement<CppTypeFor<TypeCategory::Integer, KIND>>());
	}
	};
	};

	template <typename ACCUMULATOR> struct PartialLocationHelper {
	template <int KIND> struct Functor {
	void operator()(Descriptor &result, const Descriptor &x, int dim,
	const Descriptor mask, Terminator &terminator, const char intrinsic,
	ACCUMULATOR &accumulator) const {
	PartialReduction<ACCUMULATOR, TypeCategory::Integer, KIND>(
	result, x, dim, mask, terminator, intrinsic, accumulator);
	}
	};
	};

	} // namespace Fortran::runtime
	#endif // FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_