| /* | 
 |     Copyright (c) 2014-2016 Intel Corporation.  All Rights Reserved. | 
 |  | 
 |     Redistribution and use in source and binary forms, with or without | 
 |     modification, are permitted provided that the following conditions | 
 |     are met: | 
 |  | 
 |       * Redistributions of source code must retain the above copyright | 
 |         notice, this list of conditions and the following disclaimer. | 
 |       * Redistributions in binary form must reproduce the above copyright | 
 |         notice, this list of conditions and the following disclaimer in the | 
 |         documentation and/or other materials provided with the distribution. | 
 |       * Neither the name of Intel Corporation nor the names of its | 
 |         contributors may be used to endorse or promote products derived | 
 |         from this software without specific prior written permission. | 
 |  | 
 |     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | 
 |     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 
 |     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | 
 |     A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | 
 |     HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | 
 |     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 
 |     LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 
 |     DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 
 |     THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 
 |     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 
 |     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
 | */ | 
 |  | 
 |  | 
 | #include "cean_util.h" | 
 | #include "offload_common.h" | 
 |  | 
 | // 1. allocate element of CeanReadRanges type | 
 | // 2. initialized it for reading consequently contiguous ranges | 
 | //    described by "ap" argument | 
 | CeanReadRanges * init_read_ranges_arr_desc(const Arr_Desc *ap) | 
 | { | 
 |     CeanReadRanges * res; | 
 |  | 
 |     // find the max contiguous range | 
 |     int64_t rank = ap->rank - 1; | 
 |     int64_t length = ap->dim[rank].size; | 
 |     for (; rank >= 0; rank--) { | 
 |         if (ap->dim[rank].stride == 1) { | 
 |             length *= (ap->dim[rank].upper - ap->dim[rank].lower + 1); | 
 |             if (rank > 0 && length != ap->dim[rank - 1].size) { | 
 |                 break; | 
 |             } | 
 |         } | 
 |         else { | 
 |             break; | 
 |         } | 
 |     } | 
 |  | 
 |     res =(CeanReadRanges *)malloc(sizeof(CeanReadRanges) + | 
 |                                   (ap->rank - rank) * sizeof(CeanReadDim)); | 
 |     if (res == NULL) | 
 |       LIBOFFLOAD_ERROR(c_malloc); | 
 |  | 
 |     res->arr_desc = const_cast<Arr_Desc*>(ap); | 
 |     res->current_number = 0; | 
 |     res->range_size = length; | 
 |     res->last_noncont_ind = rank; | 
 |  | 
 |     // calculate number of contiguous ranges inside noncontiguous dimensions | 
 |     int count = 1; | 
 |     bool prev_is_cont = true; | 
 |     int64_t offset = 0; | 
 |  | 
 |     for (; rank >= 0; rank--) { | 
 |         res->Dim[rank].count = count; | 
 |         res->Dim[rank].size = ap->dim[rank].stride * ap->dim[rank].size; | 
 |         count *= (prev_is_cont && ap->dim[rank].stride == 1? 1 : | 
 |             (ap->dim[rank].upper - ap->dim[rank].lower + | 
 |             ap->dim[rank].stride) / ap->dim[rank].stride); | 
 |         prev_is_cont = false; | 
 |         offset +=(ap->dim[rank].lower - ap->dim[rank].lindex) * | 
 |                  ap->dim[rank].size; | 
 |     } | 
 |     res->range_max_number = count; | 
 |     res -> ptr = (void*)ap->base; | 
 |     res -> init_offset = offset; | 
 |     return res; | 
 | } | 
 |  | 
 | // check if ranges described by 1 argument could be transferred into ranges | 
 | // described by 2-nd one | 
 | bool cean_ranges_match( | 
 |     CeanReadRanges * read_rng1, | 
 |     CeanReadRanges * read_rng2 | 
 | ) | 
 | { | 
 |     return ( read_rng1 == NULL || read_rng2 == NULL || | 
 |             (read_rng1->range_size % read_rng2->range_size == 0 || | 
 |             read_rng2->range_size % read_rng1->range_size == 0)); | 
 | } | 
 |  | 
 | // Set next offset and length and returns true for next range. | 
 | // Returns false if the ranges are over. | 
 | bool get_next_range( | 
 |     CeanReadRanges * read_rng, | 
 |     int64_t *offset | 
 | ) | 
 | { | 
 |     if (++read_rng->current_number > read_rng->range_max_number) { | 
 |         read_rng->current_number = 0; | 
 |         return false; | 
 |     } | 
 |     int rank = 0; | 
 |     int num = read_rng->current_number - 1; | 
 |     int64_t cur_offset = 0; | 
 |     int num_loc; | 
 |     for (; rank <= read_rng->last_noncont_ind; rank++) { | 
 |         num_loc = num / read_rng->Dim[rank].count; | 
 |         cur_offset += num_loc * read_rng->Dim[rank].size; | 
 |         num = num % read_rng->Dim[rank].count; | 
 |     } | 
 |     *offset = cur_offset + read_rng->init_offset; | 
 |     return true; | 
 | } | 
 |  | 
 | bool is_arr_desc_contiguous(const Arr_Desc *ap) | 
 | { | 
 |     int64_t rank = ap->rank - 1; | 
 |     int64_t length = ap->dim[rank].size; | 
 |     for (; rank >= 0; rank--) { | 
 |         if (ap->dim[rank].stride > 1 && | 
 |             ap->dim[rank].upper - ap->dim[rank].lower != 0) { | 
 |                 return false; | 
 |         } | 
 |         else if (length != ap->dim[rank].size) { | 
 |             for (; rank >= 0; rank--) { | 
 |                 if (ap->dim[rank].upper - ap->dim[rank].lower != 0) { | 
 |                     return false; | 
 |                 } | 
 |             } | 
 |             return true; | 
 |         } | 
 |         length *= (ap->dim[rank].upper - ap->dim[rank].lower + 1); | 
 |     } | 
 |     return true; | 
 | } | 
 |  | 
 | int64_t cean_get_transf_size(CeanReadRanges * read_rng) | 
 | { | 
 |     return(read_rng->range_max_number * read_rng->range_size); | 
 | } | 
 |  | 
 | static uint64_t last_left, last_right; | 
 |  | 
 | typedef void (*fpp)( | 
 |     const char *spaces, | 
 |     uint64_t low, | 
 |     uint64_t high, | 
 |     int esize, | 
 |     bool print_values | 
 | ); | 
 |  | 
 | static void generate_one_range( | 
 |     const char *spaces, | 
 |     uint64_t lrange, | 
 |     uint64_t rrange, | 
 |     fpp fp, | 
 |     int esize, | 
 |     bool print_values | 
 | ) | 
 | { | 
 |     OFFLOAD_TRACE(3, | 
 |         "%s    generate_one_range(lrange=%p, rrange=%p, esize=%d)\n", | 
 |         spaces, (void*)lrange, (void*)rrange, esize); | 
 |     if (last_left == -1) { | 
 |         // First range | 
 |         last_left = lrange; | 
 |     } | 
 |     else { | 
 |         if (lrange == last_right+1) { | 
 |             // Extend previous range, don't print | 
 |         } | 
 |         else { | 
 |             (*fp)(spaces, last_left, last_right, esize, print_values); | 
 |             last_left = lrange; | 
 |         } | 
 |     } | 
 |     last_right = rrange; | 
 | } | 
 |  | 
 | static bool element_is_contiguous( | 
 |     uint64_t rank, | 
 |     const struct Dim_Desc *ddp | 
 | ) | 
 | {     | 
 |     if (rank == 1) { | 
 |         return (ddp[0].lower == ddp[0].upper || ddp[0].stride == 1); | 
 |     } | 
 |     else { | 
 |         return ((ddp[0].size == (ddp[1].upper-ddp[1].lower+1)*ddp[1].size) && | 
 |                  element_is_contiguous(rank-1, ddp++)); | 
 |     } | 
 | } | 
 |  | 
 | static void generate_mem_ranges_one_rank( | 
 |     const char *spaces, | 
 |     uint64_t base, | 
 |     uint64_t rank, | 
 |     const struct Dim_Desc *ddp, | 
 |     fpp fp, | 
 |     int esize, | 
 |     bool print_values | 
 | ) | 
 | { | 
 |     uint64_t lindex = ddp->lindex; | 
 |     uint64_t lower = ddp->lower; | 
 |     uint64_t upper = ddp->upper; | 
 |     uint64_t stride = ddp->stride; | 
 |     uint64_t size = ddp->size; | 
 |     OFFLOAD_TRACE(3, | 
 |         "%s    " | 
 |         "generate_mem_ranges_one_rank(base=%p, rank=%lld, lindex=%lld, " | 
 |         "lower=%lld, upper=%lld, stride=%lld, size=%lld, esize=%d)\n", | 
 |         spaces, (void*)base, rank, lindex, lower, upper, stride, size, esize); | 
 |  | 
 |     if (element_is_contiguous(rank, ddp)) { | 
 |         uint64_t lrange, rrange; | 
 |         lrange = base + (lower-lindex)*size; | 
 |         rrange = lrange + (upper-lower+1)*size - 1; | 
 |         generate_one_range(spaces, lrange, rrange, fp, esize, print_values); | 
 |     } | 
 |     else { | 
 |         if (rank == 1) { | 
 |             for (int i=lower-lindex; i<=upper-lindex; i+=stride) { | 
 |                 uint64_t lrange, rrange; | 
 |                 lrange = base + i*size; | 
 |                 rrange = lrange + size - 1; | 
 |                 generate_one_range(spaces, lrange, rrange, | 
 | 		                   fp, esize, print_values); | 
 |             } | 
 |         } | 
 |         else { | 
 |             for (int i=lower-lindex; i<=upper-lindex; i+=stride) { | 
 |                 generate_mem_ranges_one_rank( | 
 |                     spaces, base+i*size, rank-1, ddp+1, | 
 |                     fp, esize, print_values); | 
 |  | 
 |             } | 
 |         } | 
 |     } | 
 | } | 
 |  | 
 | static void generate_mem_ranges( | 
 |     const char *spaces, | 
 |     const Arr_Desc *adp, | 
 |     bool deref, | 
 |     fpp fp, | 
 |     bool print_values | 
 | ) | 
 | { | 
 |     uint64_t esize; | 
 |  | 
 |     OFFLOAD_TRACE(3, | 
 |         "%s    " | 
 |         "generate_mem_ranges(adp=%p, deref=%d, fp)\n", | 
 |         spaces, adp, deref); | 
 |     last_left = -1; | 
 |     last_right = -2; | 
 |  | 
 |     // Element size is derived from last dimension | 
 |     esize = adp->dim[adp->rank-1].size; | 
 |  | 
 |     generate_mem_ranges_one_rank( | 
 |         // For c_cean_var the base addr is the address of the data | 
 |         // For c_cean_var_ptr the base addr is dereferenced to get to the data | 
 |         spaces, deref ? *((uint64_t*)(adp->base)) : adp->base, | 
 |         adp->rank, &adp->dim[0], fp, esize, print_values); | 
 |     (*fp)(spaces, last_left, last_right, esize, print_values); | 
 | } | 
 |  | 
 | // returns offset and length of the data to be transferred | 
 | void __arr_data_offset_and_length( | 
 |     const Arr_Desc *adp, | 
 |     int64_t &offset, | 
 |     int64_t &length | 
 | ) | 
 | { | 
 |     int64_t rank = adp->rank - 1; | 
 |     int64_t size = adp->dim[rank].size; | 
 |     int64_t r_off = 0; // offset from right boundary | 
 |  | 
 |     // find the rightmost dimension which takes just part of its | 
 |     // range. We define it if the size of left rank is not equal | 
 |     // the range's length between upper and lower boungaries | 
 |     while (rank > 0) { | 
 |         size *= (adp->dim[rank].upper - adp->dim[rank].lower + 1); | 
 |         if (size != adp->dim[rank - 1].size) { | 
 |             break; | 
 |         } | 
 |         rank--; | 
 |     } | 
 |  | 
 |     offset = (adp->dim[rank].lower - adp->dim[rank].lindex) * | 
 |              adp->dim[rank].size; | 
 |  | 
 |     // find gaps both from the left - offset and from the right - r_off | 
 |     for (rank--; rank >= 0; rank--) { | 
 |         offset += (adp->dim[rank].lower - adp->dim[rank].lindex) * | 
 |                   adp->dim[rank].size; | 
 |         r_off += adp->dim[rank].size - | 
 |                  (adp->dim[rank + 1].upper - adp->dim[rank + 1].lindex + 1) * | 
 |                  adp->dim[rank + 1].size; | 
 |     } | 
 |     length = (adp->dim[0].upper - adp->dim[0].lindex + 1) * | 
 |              adp->dim[0].size - offset - r_off; | 
 | } | 
 |  | 
 | #if OFFLOAD_DEBUG > 0 | 
 |  | 
 | static void print_range( | 
 |     const char *spaces, | 
 |     uint64_t low, | 
 |     uint64_t high, | 
 |     int esize, | 
 |     bool print_values | 
 | ) | 
 | { | 
 |     char buffer[1024]; | 
 |     char number[32]; | 
 |  | 
 |     OFFLOAD_TRACE(3, "%s        print_range(low=%p, high=%p, esize=%d)\n", | 
 |         spaces, (void*)low, (void*)high, esize); | 
 |  | 
 |     if (console_enabled < 4 || !print_values) { | 
 |         return; | 
 |     } | 
 |     OFFLOAD_TRACE(4, "%s            values:\n", spaces); | 
 |     int count = 0; | 
 |     buffer[0] = '\0'; | 
 |     while (low <= high) | 
 |     { | 
 |         switch (esize) | 
 |         { | 
 |         case 1: | 
 |             sprintf(number, "%d ", *((char *)low)); | 
 |             low += 1; | 
 |             break; | 
 |         case 2: | 
 |             sprintf(number, "%d ", *((short *)low)); | 
 |             low += 2; | 
 |             break; | 
 |         case 4: | 
 |             sprintf(number, "%d ", *((int *)low)); | 
 |             low += 4; | 
 |             break; | 
 |         default: | 
 |             sprintf(number, "0x%016x ", *((uint64_t *)low)); | 
 |             low += 8; | 
 |             break; | 
 |         } | 
 |         strcat(buffer, number); | 
 |         count++; | 
 |         if (count == 10) { | 
 |             OFFLOAD_TRACE(4, "%s            %s\n", spaces, buffer); | 
 |             count = 0; | 
 |             buffer[0] = '\0'; | 
 |         } | 
 |     } | 
 |     if (count != 0) { | 
 |         OFFLOAD_TRACE(4, "%s            %s\n", spaces, buffer); | 
 |     } | 
 | } | 
 |  | 
 | void __arr_desc_dump( | 
 |     const char *spaces, | 
 |     const char *name, | 
 |     const Arr_Desc *adp, | 
 |     bool deref, | 
 |     bool print_values | 
 | ) | 
 | { | 
 |     OFFLOAD_TRACE(2, "%s%s CEAN expression %p\n", spaces, name, adp); | 
 |  | 
 |     if (adp != 0) { | 
 |         OFFLOAD_TRACE(2, "%s    base=%llx, rank=%lld\n", | 
 |             spaces, adp->base, adp->rank); | 
 |  | 
 |         for (int i = 0; i < adp->rank; i++) { | 
 |             OFFLOAD_TRACE(2, | 
 |                           "%s    dimension %d: size=%lld, lindex=%lld, " | 
 |                           "lower=%lld, upper=%lld, stride=%lld\n", | 
 |                           spaces, i, adp->dim[i].size, adp->dim[i].lindex, | 
 |                           adp->dim[i].lower, adp->dim[i].upper, | 
 |                           adp->dim[i].stride); | 
 |         } | 
 |         // For c_cean_var the base addr is the address of the data | 
 |         // For c_cean_var_ptr the base addr is dereferenced to get to the data | 
 |         generate_mem_ranges(spaces, adp, deref, &print_range, print_values); | 
 |     } | 
 | } | 
 |  | 
 | void noncont_struct_dump( | 
 |     const char *spaces, | 
 |     const char *name, | 
 |     struct NonContigDesc *desc_p) | 
 | { | 
 |    OFFLOAD_TRACE(2, "%s%s NonCont Struct expression %p\n", | 
 |                  spaces, name, desc_p->base); | 
 |    if (desc_p) { | 
 |        OFFLOAD_TRACE(2, "%s%s    base=%p\n", spaces, name, desc_p->base); | 
 |        for (int i = 0; i < desc_p->interval_cnt; i++) { | 
 |            OFFLOAD_TRACE(2,"%s    dimension %d: lower=%lld, size=%lld\n", | 
 |                spaces, i, desc_p->interval[i].lower, desc_p->interval[i].size); | 
 |        } | 
 |    } | 
 | } | 
 |  | 
 | int64_t get_noncont_struct_size(struct NonContigDesc *desc_p) | 
 | { | 
 |     int index = desc_p->interval_cnt - 1; | 
 |     return(desc_p->interval[index].lower + desc_p->interval[index].size); | 
 | } | 
 |  | 
 | #endif // OFFLOAD_DEBUG |