|  | /* | 
|  | Copyright (c) 2014-2016 Intel Corporation.  All Rights Reserved. | 
|  |  | 
|  | Redistribution and use in source and binary forms, with or without | 
|  | modification, are permitted provided that the following conditions | 
|  | are met: | 
|  |  | 
|  | * Redistributions of source code must retain the above copyright | 
|  | notice, this list of conditions and the following disclaimer. | 
|  | * Redistributions in binary form must reproduce the above copyright | 
|  | notice, this list of conditions and the following disclaimer in the | 
|  | documentation and/or other materials provided with the distribution. | 
|  | * Neither the name of Intel Corporation nor the names of its | 
|  | contributors may be used to endorse or promote products derived | 
|  | from this software without specific prior written permission. | 
|  |  | 
|  | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | 
|  | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 
|  | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | 
|  | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | 
|  | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | 
|  | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 
|  | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 
|  | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 
|  | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 
|  | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 
|  | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
|  | */ | 
|  |  | 
|  |  | 
|  | #include "offload_engine.h" | 
|  | #include <signal.h> | 
|  | #include <errno.h> | 
|  | #include <sys/stat.h> | 
|  | #include <sys/types.h> | 
|  |  | 
|  | #include <algorithm> | 
|  | #include <vector> | 
|  |  | 
|  | #include "offload_host.h" | 
|  | #include "offload_table.h" | 
|  | #include "offload_iterator.h" | 
|  |  | 
|  | #if defined(HOST_WINNT) | 
|  | #define PATH_SEPARATOR ";" | 
|  | #else | 
|  | #define PATH_SEPARATOR ":" | 
|  | #endif | 
|  |  | 
|  | // Static members of Stream class must be described somewhere. | 
|  | // This members describe the list of all streams defined in programm | 
|  | // via call to _Offload_stream_create. | 
|  | uint64_t  Stream::m_streams_count = 0; | 
|  | StreamMap Stream::all_streams; | 
|  | mutex_t   Stream::m_stream_lock; | 
|  | char*     mic_library_path = 0; | 
|  |  | 
|  | const char* Engine::m_func_names[Engine::c_funcs_total] = | 
|  | { | 
|  | "server_compute", | 
|  | #ifdef MYO_SUPPORT | 
|  | "server_myoinit", | 
|  | "server_myofini", | 
|  | #endif // MYO_SUPPORT | 
|  | "server_init", | 
|  | "server_var_table_size", | 
|  | "server_var_table_copy", | 
|  | "server_set_stream_affinity" | 
|  | }; | 
|  |  | 
|  | // Symbolic representation of system signals. Fix for CQ233593 | 
|  | const char* Engine::c_signal_names[Engine::c_signal_max] = | 
|  | { | 
|  | "Unknown SIGNAL", | 
|  | "SIGHUP",    /*  1, Hangup (POSIX).  */ | 
|  | "SIGINT",    /*  2, Interrupt (ANSI).  */ | 
|  | "SIGQUIT",   /*  3, Quit (POSIX).  */ | 
|  | "SIGILL",    /*  4, Illegal instruction (ANSI).  */ | 
|  | "SIGTRAP",   /*  5, Trace trap (POSIX).  */ | 
|  | "SIGABRT",   /*  6, Abort (ANSI).  */ | 
|  | "SIGBUS",    /*  7, BUS error (4.2 BSD).  */ | 
|  | "SIGFPE",    /*  8, Floating-point exception (ANSI).  */ | 
|  | "SIGKILL",   /*  9, Kill, unblockable (POSIX).  */ | 
|  | "SIGUSR1",   /* 10, User-defined signal 1 (POSIX).  */ | 
|  | "SIGSEGV",   /* 11, Segmentation violation (ANSI).  */ | 
|  | "SIGUSR2",   /* 12, User-defined signal 2 (POSIX).  */ | 
|  | "SIGPIPE",   /* 13, Broken pipe (POSIX).  */ | 
|  | "SIGALRM",   /* 14, Alarm clock (POSIX).  */ | 
|  | "SIGTERM",   /* 15, Termination (ANSI).  */ | 
|  | "SIGSTKFLT", /* 16, Stack fault.  */ | 
|  | "SIGCHLD",   /* 17, Child status has changed (POSIX).  */ | 
|  | "SIGCONT",   /* 18, Continue (POSIX).  */ | 
|  | "SIGSTOP",   /* 19, Stop, unblockable (POSIX).  */ | 
|  | "SIGTSTP",   /* 20, Keyboard stop (POSIX).  */ | 
|  | "SIGTTIN",   /* 21, Background read from tty (POSIX).  */ | 
|  | "SIGTTOU",   /* 22, Background write to tty (POSIX).  */ | 
|  | "SIGURG",    /* 23, Urgent condition on socket (4.2 BSD).  */ | 
|  | "SIGXCPU",   /* 24, CPU limit exceeded (4.2 BSD).  */ | 
|  | "SIGXFSZ",   /* 25, File size limit exceeded (4.2 BSD).  */ | 
|  | "SIGVTALRM", /* 26, Virtual alarm clock (4.2 BSD).  */ | 
|  | "SIGPROF",   /* 27, Profiling alarm clock (4.2 BSD).  */ | 
|  | "SIGWINCH",  /* 28, Window size change (4.3 BSD, Sun).  */ | 
|  | "SIGIO",     /* 29, I/O now possible (4.2 BSD).  */ | 
|  | "SIGPWR",    /* 30, Power failure restart (System V).  */ | 
|  | "SIGSYS"     /* 31, Bad system call.  */ | 
|  | }; | 
|  |  | 
|  | void Engine::init(void) | 
|  | { | 
|  | if (!m_ready) { | 
|  | mutex_locker_t locker(m_lock); | 
|  |  | 
|  | if (!m_ready) { | 
|  | // start process if not done yet | 
|  | if (m_process == 0) { | 
|  | init_process(); | 
|  | } | 
|  |  | 
|  | // load penging images | 
|  | load_libraries(); | 
|  |  | 
|  | // and (re)build pointer table | 
|  | init_ptr_data(); | 
|  |  | 
|  | // it is ready now | 
|  | m_ready = true; | 
|  |  | 
|  | //  Inform the debugger | 
|  | if (__dbg_is_attached) { | 
|  | __dbg_target_so_loaded(); | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | void Engine::print_stream_cpu_list(const char * str) | 
|  | { | 
|  | int count = 0; | 
|  | char buffer[1024]; | 
|  | CpuEl* cpu_el = m_cpu_head; | 
|  |  | 
|  | OFFLOAD_DEBUG_TRACE(3, | 
|  | "%s : cpu list as Index(Count) for the streams is :\n", str); | 
|  | buffer[0] = 0; | 
|  | for (int i = 0; i < m_num_threads; i++) { | 
|  | cpu_el = m_cpus + i; | 
|  | if (m_assigned_cpus == 0 || (*m_assigned_cpus)[i]) { | 
|  | count++; | 
|  | sprintf(buffer + strlen(buffer), "%d(%d) ", CPU_INDEX(cpu_el), cpu_el->count); | 
|  | if (count % 20 == 0) { | 
|  | OFFLOAD_DEBUG_TRACE(3, "%s\n", buffer); | 
|  | buffer[0] = 0; | 
|  | } | 
|  | } | 
|  | } | 
|  | if (count % 20 != 0) { | 
|  | OFFLOAD_DEBUG_TRACE(3, "%s\n", buffer); | 
|  | } | 
|  | } | 
|  |  | 
|  | void Engine::init_process(void) | 
|  | { | 
|  | COIENGINE engine; | 
|  | COIRESULT res; | 
|  | const char **environ; | 
|  | char buf[4096];  // For exe path name | 
|  | char* mic_device_main = 0; | 
|  |  | 
|  | // create environment for the target process | 
|  | environ = (const char**) mic_env_vars.create_environ_for_card(m_index); | 
|  | if (environ != 0) { | 
|  | for (const char **p = environ; *p != 0; p++) { | 
|  | OFFLOAD_DEBUG_TRACE(3, "Env Var for card %d: %s\n", m_index, *p); | 
|  | } | 
|  | } | 
|  |  | 
|  | // Create execution context in the specified device | 
|  | OFFLOAD_DEBUG_TRACE(2, "Getting device %d (engine %d) handle\n", m_index, | 
|  | m_physical_index); | 
|  | res = COI::EngineGetHandle(COI_ISA_MIC, m_physical_index, &engine); | 
|  | check_result(res, c_get_engine_handle, m_index, res); | 
|  |  | 
|  | // Get engine info on threads and cores. | 
|  | // The values of core number and thread number will be used later at stream | 
|  | // creation by call to _Offload_stream_create(device,number_of_cpus). | 
|  |  | 
|  | COI_ENGINE_INFO engine_info; | 
|  |  | 
|  | res = COI::EngineGetInfo(engine, sizeof(COI_ENGINE_INFO), &engine_info); | 
|  | check_result(res, c_get_engine_info, m_index, res); | 
|  | if (mic_library_path == 0 ) { | 
|  | if (engine_info.ISA == COI_DEVICE_KNC) { | 
|  | mic_library_path = knc_library_path; | 
|  | } | 
|  | else if (engine_info.ISA == COI_DEVICE_KNL) { | 
|  | mic_library_path = knl_library_path; | 
|  | } | 
|  | else { | 
|  | LIBOFFLOAD_ERROR(c_unknown_mic_device_type); | 
|  | } | 
|  | } | 
|  |  | 
|  | // m_cpus is the list of all available threads. | 
|  | // At the begining all threads made available through OFFLOAD_DEVICES | 
|  | // or all threads existed at the engine if OFFLOAD_DEVICES isn't set. | 
|  | // m_cpu_head points to the head of the m_cpus list. | 
|  | // m_cpus is ordered by number of streams using the thread. | 
|  | // m_cpu_head points to the least used thread. | 
|  | // After creating and destroying a stream the m_cpus list must be fixed | 
|  | // to be ordered. | 
|  |  | 
|  | m_cpus = (CpuEl*)malloc(engine_info.NumThreads * sizeof(CpuEl)); | 
|  | if (m_cpus == NULL) | 
|  | LIBOFFLOAD_ERROR(c_malloc); | 
|  | memset(m_cpus, 0, engine_info.NumThreads * sizeof(CpuEl)); | 
|  | CpuEl* prev_cpu = NULL; | 
|  |  | 
|  | for (int i = 0; i < engine_info.NumThreads; i++) { | 
|  | if (m_assigned_cpus == 0 || (*m_assigned_cpus)[i]) { | 
|  | if (prev_cpu) { | 
|  | prev_cpu->next = m_cpus + i; | 
|  | } | 
|  | else { | 
|  | m_cpu_head = m_cpus + i; | 
|  | } | 
|  | m_cpus[i].prev = prev_cpu; | 
|  | m_cpus[i].count = 0; | 
|  | prev_cpu = m_cpus + i; | 
|  | } | 
|  | } | 
|  |  | 
|  | // The following values will be used at pipeline creation for streams | 
|  | m_num_cores = engine_info.NumCores; | 
|  | m_num_threads = engine_info.NumThreads; | 
|  |  | 
|  | print_stream_cpu_list("init_process"); | 
|  |  | 
|  | // Check if OFFLOAD_DMA_CHANNEL_COUNT is set to 2 | 
|  | // Only the value 2 is supported in 16.0 | 
|  | if (mic_dma_channel_count == 2) { | 
|  | if (COI::ProcessConfigureDMA) { | 
|  | // Set DMA channels using COI API | 
|  | COI::ProcessConfigureDMA(2, COI::DMA_MODE_READ_WRITE); | 
|  | } | 
|  | else { | 
|  | // Set environment variable COI_DMA_CHANNEL_COUNT | 
|  | // use putenv instead of setenv as Windows has no setenv. | 
|  | // Note: putenv requires its argument can't be freed or modified. | 
|  | // So no free after call to putenv or elsewhere. | 
|  | char * env_var = strdup("COI_DMA_CHANNEL_COUNT=2"); | 
|  | if (env_var == NULL) | 
|  | LIBOFFLOAD_ERROR(c_malloc); | 
|  | putenv(env_var); | 
|  | } | 
|  | } | 
|  |  | 
|  | // Target executable is not available then use compiler provided offload_main | 
|  | if (__target_exe == 0) { | 
|  | // find target executable to be used if main application is not an | 
|  | // offload build application. | 
|  | const char *base_name = "offload_main"; | 
|  | if (mic_library_path != 0) { | 
|  | char *buf = strdup(mic_library_path); | 
|  | if (buf == NULL) | 
|  | LIBOFFLOAD_ERROR(c_malloc); | 
|  | char *try_name = (char*) alloca(strlen(mic_library_path) + | 
|  | strlen(base_name) + 2); | 
|  | char *dir, *ptr; | 
|  |  | 
|  | for (dir = strtok_r(buf, PATH_SEPARATOR, &ptr); dir != 0; | 
|  | dir = strtok_r(0, PATH_SEPARATOR, &ptr)) { | 
|  | // compose a full path | 
|  | sprintf(try_name, "%s/%s", dir, base_name); | 
|  |  | 
|  | // check if such file exists | 
|  | struct stat st; | 
|  | if (stat(try_name, &st) == 0 && S_ISREG(st.st_mode)) { | 
|  | mic_device_main = strdup(try_name); | 
|  | if (mic_device_main == NULL) | 
|  | LIBOFFLOAD_ERROR(c_malloc); | 
|  | break; | 
|  | } | 
|  | } | 
|  | free(buf); | 
|  | } | 
|  | if (mic_device_main == 0) { | 
|  | LIBOFFLOAD_ERROR(c_report_no_target_exe, "offload_main"); | 
|  | exit(1); | 
|  | } | 
|  |  | 
|  | OFFLOAD_DEBUG_TRACE(2, | 
|  | "Loading target executable %s\n",mic_device_main); | 
|  |  | 
|  | res = COI::ProcessCreateFromFile( | 
|  | engine,                 // in_Engine | 
|  | mic_device_main,        // in_pBinaryName | 
|  | 0,                      // in_Argc | 
|  | 0,                      // in_ppArgv | 
|  | environ == 0,           // in_DupEnv | 
|  | environ,                // in_ppAdditionalEnv | 
|  | mic_proxy_io,           // in_ProxyActive | 
|  | mic_proxy_fs_root,      // in_ProxyfsRoot | 
|  | mic_buffer_size,        // in_BufferSpace | 
|  | mic_library_path,       // in_LibrarySearchPath | 
|  | &m_process              // out_pProcess | 
|  | ); | 
|  | } | 
|  | else { | 
|  | // Target executable should be available by the time when we | 
|  | // attempt to initialize the device | 
|  |  | 
|  | //  Need the full path of the FAT exe for VTUNE | 
|  | { | 
|  | #ifndef TARGET_WINNT | 
|  | ssize_t len = readlink("/proc/self/exe", buf,1000); | 
|  | #else | 
|  | int len = GetModuleFileName(NULL, buf,1000); | 
|  | #endif // TARGET_WINNT | 
|  | if  (len == -1) { | 
|  | LIBOFFLOAD_ERROR(c_report_no_host_exe); | 
|  | exit(1); | 
|  | } | 
|  | else if (len > 999) { | 
|  | LIBOFFLOAD_ERROR(c_report_path_buff_overflow); | 
|  | exit(1); | 
|  | } | 
|  | buf[len] = '\0'; | 
|  | } | 
|  |  | 
|  | OFFLOAD_DEBUG_TRACE(2, | 
|  | "Loading target executable \"%s\" from %p, size %lld, host file %s\n", | 
|  | __target_exe->name, __target_exe->data, __target_exe->size, | 
|  | buf); | 
|  |  | 
|  | res = COI::ProcessCreateFromMemory( | 
|  | engine,                 // in_Engine | 
|  | __target_exe->name,     // in_pBinaryName | 
|  | __target_exe->data,     // in_pBinaryBuffer | 
|  | __target_exe->size,     // in_BinaryBufferLength, | 
|  | 0,                      // in_Argc | 
|  | 0,                      // in_ppArgv | 
|  | environ == 0,           // in_DupEnv | 
|  | environ,                // in_ppAdditionalEnv | 
|  | mic_proxy_io,           // in_ProxyActive | 
|  | mic_proxy_fs_root,      // in_ProxyfsRoot | 
|  | mic_buffer_size,        // in_BufferSpace | 
|  | mic_library_path,       // in_LibrarySearchPath | 
|  | buf,                    // in_FileOfOrigin | 
|  | -1,                     // in_FileOfOriginOffset use -1 to indicate to | 
|  | // COI that is is a FAT binary | 
|  | &m_process              // out_pProcess | 
|  | ); | 
|  | } | 
|  | check_result(res, c_process_create, m_index, res); | 
|  |  | 
|  | if ((mic_4k_buffer_size != 0) || (mic_2m_buffer_size !=0)) { | 
|  | // available only in MPSS 4.2 and greater | 
|  | if (COI::ProcessSetCacheSize != 0 ) { | 
|  | int flags; | 
|  | //  Need compiler to use MPSS 3.2 or greater to get these | 
|  | // definition so currently hardcoding it | 
|  | //  COI_CACHE_ACTION_GROW_NOW && COI_CACHE_MODE_ONDEMAND_SYNC; | 
|  | flags = 0x00020002; | 
|  | res = COI::ProcessSetCacheSize( | 
|  | m_process,             // in_Process | 
|  | mic_2m_buffer_size,    // in_HugePagePoolSize | 
|  | flags,                 // inHugeFlags | 
|  | mic_4k_buffer_size,    // in_SmallPagePoolSize | 
|  | flags,                 // inSmallFlags | 
|  | 0,                     // in_NumDependencies | 
|  | 0,                     // in_pDependencies | 
|  | 0                      // out_PCompletion | 
|  | ); | 
|  | OFFLOAD_DEBUG_TRACE(2, | 
|  | "Reserve target buffers 4K pages = %d  2M pages = %d\n", | 
|  | mic_4k_buffer_size, mic_2m_buffer_size); | 
|  | check_result(res, c_process_set_cache_size, m_index, res); | 
|  | } | 
|  | else { | 
|  | OFFLOAD_DEBUG_TRACE(2, | 
|  | "Reserve target buffers not supported in current MPSS\n"); | 
|  | } | 
|  | } | 
|  |  | 
|  | // get function handles | 
|  | res = COI::ProcessGetFunctionHandles(m_process, c_funcs_total, | 
|  | m_func_names, m_funcs); | 
|  | check_result(res, c_process_get_func_handles, m_index, res); | 
|  |  | 
|  | // initialize device side | 
|  | pid_t pid = init_device(); | 
|  |  | 
|  | // For IDB | 
|  | if (__dbg_is_attached) { | 
|  | // TODO: we have in-memory executable now. | 
|  | // Check with IDB team what should we provide them now? | 
|  | if (__target_exe == 0) { | 
|  | strcpy(__dbg_target_exe_name, "offload_main"); | 
|  | } | 
|  | else { | 
|  | if (strlen(__target_exe->name) < MAX_TARGET_NAME) { | 
|  | strcpy(__dbg_target_exe_name, __target_exe->name); | 
|  | } | 
|  | } | 
|  | __dbg_target_so_pid = pid; | 
|  | __dbg_target_id = m_physical_index; | 
|  | // The call to __dbg_target_so_loaded() is moved | 
|  | // to Engine:init so all the libraries are loaded before | 
|  | // informing debugger so debugger can access them. | 
|  | // __dbg_target_so_loaded(); | 
|  | } | 
|  | } | 
|  |  | 
|  | void Engine::fini_process(bool verbose) | 
|  | { | 
|  | if (m_process != 0) { | 
|  | uint32_t sig; | 
|  | int8_t ret; | 
|  |  | 
|  | // destroy target process | 
|  | OFFLOAD_DEBUG_TRACE(2, "Destroying process on the device %d\n", | 
|  | m_index); | 
|  |  | 
|  | COIRESULT res = COI::ProcessDestroy(m_process, -1, 0, &ret, &sig); | 
|  | m_process = 0; | 
|  |  | 
|  | if (res == COI_SUCCESS) { | 
|  | OFFLOAD_DEBUG_TRACE(3, "Device process: signal %d, exit code %d\n", | 
|  | sig, ret); | 
|  | if (verbose) { | 
|  | if (sig != 0) { | 
|  | LIBOFFLOAD_ERROR( | 
|  | c_mic_process_exit_sig, m_index, sig, | 
|  | c_signal_names[sig >= c_signal_max ? 0 : sig]); | 
|  | } | 
|  | else { | 
|  | LIBOFFLOAD_ERROR(c_mic_process_exit_ret, m_index, ret); | 
|  | } | 
|  | } | 
|  |  | 
|  | // for idb | 
|  | if (__dbg_is_attached) { | 
|  | __dbg_target_so_unloaded(); | 
|  | } | 
|  | } | 
|  | else { | 
|  | if (verbose) { | 
|  | LIBOFFLOAD_ERROR(c_mic_process_exit, m_index); | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | void Engine::load_libraries() | 
|  | { | 
|  | // load libraries collected so far | 
|  | for (TargetImageList::iterator it = m_images.begin(); | 
|  | it != m_images.end(); it++) { | 
|  | OFFLOAD_DEBUG_TRACE(2, | 
|  | "Loading library \"%s\" from %p, size %llu, host file %s\n", | 
|  | it->name, it->data, it->size, it->origin); | 
|  |  | 
|  | // load library to the device | 
|  | COILIBRARY lib; | 
|  | COIRESULT res; | 
|  | res = COI::ProcessLoadLibraryFromMemory(m_process, | 
|  | it->data, | 
|  | it->size, | 
|  | it->name, | 
|  | mic_library_path, | 
|  | it->origin, | 
|  | (it->origin) ? -1 : 0, | 
|  | COI_LOADLIBRARY_V1_FLAGS, | 
|  | &lib); | 
|  | m_dyn_libs.push_front(DynLib(it->name, it->data, lib)); | 
|  |  | 
|  | if (res != COI_SUCCESS && res != COI_ALREADY_EXISTS) { | 
|  | check_result(res, c_load_library, it->origin, m_index, res); | 
|  | } | 
|  | } | 
|  | m_images.clear(); | 
|  | } | 
|  |  | 
|  | void Engine::unload_library(const void *data, const char *name) | 
|  | { | 
|  | if (m_process == 0) { | 
|  | return; | 
|  | } | 
|  | for (DynLibList::iterator it = m_dyn_libs.begin(); | 
|  | it != m_dyn_libs.end(); it++) { | 
|  | if (it->data == data) { | 
|  | COIRESULT res; | 
|  | OFFLOAD_DEBUG_TRACE(2, | 
|  | "Unloading library \"%s\"\n",name); | 
|  | res = COI::ProcessUnloadLibrary(m_process,it->lib); | 
|  | m_dyn_libs.erase(it); | 
|  | if (res != COI_SUCCESS) { | 
|  | check_result(res, c_unload_library, m_index, res); | 
|  | } | 
|  | return; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | static bool target_entry_cmp( | 
|  | const VarList::BufEntry &l, | 
|  | const VarList::BufEntry &r | 
|  | ) | 
|  | { | 
|  | const char *l_name = reinterpret_cast<const char*>(l.name); | 
|  | const char *r_name = reinterpret_cast<const char*>(r.name); | 
|  | return strcmp(l_name, r_name) < 0; | 
|  | } | 
|  |  | 
|  | static bool host_entry_cmp( | 
|  | const VarTable::Entry *l, | 
|  | const VarTable::Entry *r | 
|  | ) | 
|  | { | 
|  | return strcmp(l->name, r->name) < 0; | 
|  | } | 
|  |  | 
|  | void Engine::init_ptr_data(void) | 
|  | { | 
|  | COIRESULT res; | 
|  | COIEVENT event; | 
|  |  | 
|  | // Prepare table of host entries | 
|  | std::vector<const VarTable::Entry*> host_table( | 
|  | Iterator(__offload_vars.get_head()), | 
|  | Iterator()); | 
|  |  | 
|  | // no need to do anything further is host table is empty | 
|  | if (host_table.size() <= 0) { | 
|  | return; | 
|  | } | 
|  |  | 
|  | // Get var table entries from the target. | 
|  | // First we need to get size for the buffer to copy data | 
|  | struct { | 
|  | int64_t nelems; | 
|  | int64_t length; | 
|  | } params; | 
|  |  | 
|  | res = COI::PipelineRunFunction(get_pipeline(), | 
|  | m_funcs[c_func_var_table_size], | 
|  | 0, 0, 0, | 
|  | 0, 0, | 
|  | 0, 0, | 
|  | ¶ms, sizeof(params), | 
|  | &event); | 
|  | check_result(res, c_pipeline_run_func, m_index, res); | 
|  |  | 
|  | res = COI::EventWait(1, &event, -1, 1, 0, 0); | 
|  | check_result(res, c_event_wait, res); | 
|  |  | 
|  | if (params.length == 0) { | 
|  | return; | 
|  | } | 
|  |  | 
|  | // create buffer for target entries and copy data to host | 
|  | COIBUFFER buffer; | 
|  | res = COI::BufferCreate(params.length, COI_BUFFER_NORMAL, 0, 0, 1, | 
|  | &m_process, &buffer); | 
|  | check_result(res, c_buf_create, m_index, res); | 
|  |  | 
|  | COI_ACCESS_FLAGS flags = COI_SINK_WRITE; | 
|  | res = COI::PipelineRunFunction(get_pipeline(), | 
|  | m_funcs[c_func_var_table_copy], | 
|  | 1, &buffer, &flags, | 
|  | 0, 0, | 
|  | ¶ms.nelems, sizeof(params.nelems), | 
|  | 0, 0, | 
|  | &event); | 
|  | check_result(res, c_pipeline_run_func, m_index, res); | 
|  |  | 
|  | res = COI::EventWait(1, &event, -1, 1, 0, 0); | 
|  | check_result(res, c_event_wait, res); | 
|  |  | 
|  | // patch names in target data | 
|  | VarList::BufEntry *target_table; | 
|  | COIMAPINSTANCE map_inst; | 
|  | res = COI::BufferMap(buffer, 0, params.length, COI_MAP_READ_ONLY, 0, 0, | 
|  | 0, &map_inst, | 
|  | reinterpret_cast<void**>(&target_table)); | 
|  | check_result(res, c_buf_map, res); | 
|  |  | 
|  | VarList::table_patch_names(target_table, params.nelems); | 
|  |  | 
|  | // and sort entries | 
|  | std::sort(target_table, target_table + params.nelems, target_entry_cmp); | 
|  | std::sort(host_table.begin(), host_table.end(), host_entry_cmp); | 
|  |  | 
|  | // merge host and target entries and enter matching vars map | 
|  | std::vector<const VarTable::Entry*>::const_iterator hi = | 
|  | host_table.begin(); | 
|  | std::vector<const VarTable::Entry*>::const_iterator he = | 
|  | host_table.end(); | 
|  | const VarList::BufEntry *ti = target_table; | 
|  | const VarList::BufEntry *te = target_table + params.nelems; | 
|  |  | 
|  | while (hi != he && ti != te) { | 
|  | int res = strcmp((*hi)->name, reinterpret_cast<const char*>(ti->name)); | 
|  | if (res == 0) { | 
|  | bool is_new; | 
|  | // add matching entry to var map | 
|  | PtrData *ptr = insert_ptr_data((*hi)->addr, (*hi)->size, is_new); | 
|  |  | 
|  | // store address for new entries | 
|  | if (is_new) { | 
|  | ptr->mic_addr = ti->addr; | 
|  | ptr->is_static = true; | 
|  | ptr->var_alloc_type = (*hi)->var_alloc_type; | 
|  | } | 
|  | ptr->alloc_ptr_data_lock.unlock(); | 
|  | hi++; | 
|  | ti++; | 
|  | } | 
|  | else if (res < 0) { | 
|  | hi++; | 
|  | } | 
|  | else { | 
|  | ti++; | 
|  | } | 
|  | } | 
|  |  | 
|  | // cleanup | 
|  | res = COI::BufferUnmap(map_inst, 0, 0, 0); | 
|  | check_result(res, c_buf_unmap, res); | 
|  |  | 
|  | res = COI::BufferDestroy(buffer); | 
|  | check_result(res, c_buf_destroy, res); | 
|  | } | 
|  |  | 
|  | COIRESULT Engine::compute( | 
|  | _Offload_stream stream, | 
|  | const std::list<COIBUFFER> &buffers, | 
|  | const void*         data, | 
|  | uint16_t            data_size, | 
|  | void*               ret, | 
|  | uint16_t            ret_size, | 
|  | uint32_t            num_deps, | 
|  | const COIEVENT*     deps, | 
|  | COIEVENT*           event | 
|  | ) /* const */ | 
|  | { | 
|  | COIBUFFER *bufs; | 
|  | COI_ACCESS_FLAGS *flags; | 
|  | COIRESULT res; | 
|  |  | 
|  | // convert buffers list to array | 
|  | int num_bufs = buffers.size(); | 
|  | if (num_bufs > 0) { | 
|  | bufs = (COIBUFFER*) alloca(num_bufs * sizeof(COIBUFFER)); | 
|  | flags = (COI_ACCESS_FLAGS*) alloca(num_bufs * | 
|  | sizeof(COI_ACCESS_FLAGS)); | 
|  |  | 
|  | int i = 0; | 
|  | for (std::list<COIBUFFER>::const_iterator it = buffers.begin(); | 
|  | it != buffers.end(); it++) { | 
|  | bufs[i] = *it; | 
|  |  | 
|  | // TODO: this should be fixed | 
|  | flags[i++] = COI_SINK_WRITE; | 
|  | } | 
|  | } | 
|  | else { | 
|  | bufs = 0; | 
|  | flags = 0; | 
|  | } | 
|  | COIPIPELINE pipeline = (stream == no_stream) ? | 
|  | get_pipeline() : | 
|  | get_pipeline(stream); | 
|  | // start computation | 
|  | res = COI::PipelineRunFunction(pipeline, | 
|  | m_funcs[c_func_compute], | 
|  | num_bufs, bufs, flags, | 
|  | num_deps, deps, | 
|  | data, data_size, | 
|  | ret, ret_size, | 
|  | event); | 
|  | return res; | 
|  | } | 
|  |  | 
|  | pid_t Engine::init_device(void) | 
|  | { | 
|  | struct init_data { | 
|  | int  device_index; | 
|  | int  devices_total; | 
|  | int  console_level; | 
|  | int  offload_report_level; | 
|  | } data; | 
|  | COIRESULT res; | 
|  | COIEVENT event; | 
|  | pid_t pid; | 
|  |  | 
|  | OFFLOAD_DEBUG_TRACE_1(2, 0, c_offload_init, | 
|  | "Initializing device with logical index %d " | 
|  | "and physical index %d\n", | 
|  | m_index, m_physical_index); | 
|  |  | 
|  | // setup misc data | 
|  | data.device_index = m_index; | 
|  | data.devices_total = mic_engines_total; | 
|  | data.console_level = console_enabled; | 
|  | data.offload_report_level = offload_report_level; | 
|  |  | 
|  | res = COI::PipelineRunFunction(get_pipeline(), | 
|  | m_funcs[c_func_init], | 
|  | 0, 0, 0, 0, 0, | 
|  | &data, sizeof(data), | 
|  | &pid, sizeof(pid), | 
|  | &event); | 
|  | check_result(res, c_pipeline_run_func, m_index, res); | 
|  |  | 
|  | res = COI::EventWait(1, &event, -1, 1, 0, 0); | 
|  | check_result(res, c_event_wait, res); | 
|  |  | 
|  | OFFLOAD_DEBUG_TRACE(2, "Device process pid is %d\n", pid); | 
|  |  | 
|  | return pid; | 
|  | } | 
|  |  | 
|  | // data associated with each thread | 
|  | struct Thread { | 
|  | Thread(long* addr_coipipe_counter) { | 
|  | m_addr_coipipe_counter = addr_coipipe_counter; | 
|  | memset(m_pipelines, 0, sizeof(m_pipelines)); | 
|  | } | 
|  |  | 
|  | ~Thread() { | 
|  | #ifndef TARGET_WINNT | 
|  | __sync_sub_and_fetch(m_addr_coipipe_counter, 1); | 
|  | #else // TARGET_WINNT | 
|  | _InterlockedDecrement(m_addr_coipipe_counter); | 
|  | #endif // TARGET_WINNT | 
|  | for (int i = 0; i < mic_engines_total; i++) { | 
|  | if (m_pipelines[i] != 0) { | 
|  | COI::PipelineDestroy(m_pipelines[i]); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | COIPIPELINE get_pipeline(int index) const { | 
|  | return m_pipelines[index]; | 
|  | } | 
|  |  | 
|  | void set_pipeline(int index, COIPIPELINE pipeline) { | 
|  | m_pipelines[index] = pipeline; | 
|  | } | 
|  |  | 
|  | AutoSet& get_auto_vars() { | 
|  | return m_auto_vars; | 
|  | } | 
|  |  | 
|  | private: | 
|  | long*       m_addr_coipipe_counter; | 
|  | AutoSet     m_auto_vars; | 
|  | COIPIPELINE m_pipelines[MIC_ENGINES_MAX]; | 
|  | }; | 
|  |  | 
|  | COIPIPELINE Engine::get_pipeline(void) | 
|  | { | 
|  | Thread* thread = (Thread*) thread_getspecific(mic_thread_key); | 
|  | if (thread == 0) { | 
|  | thread = new Thread(&m_proc_number); | 
|  | thread_setspecific(mic_thread_key, thread); | 
|  | } | 
|  |  | 
|  | COIPIPELINE pipeline = thread->get_pipeline(m_index); | 
|  | if (pipeline == 0) { | 
|  | COIRESULT res; | 
|  | int proc_num; | 
|  |  | 
|  | #ifndef TARGET_WINNT | 
|  | proc_num = __sync_fetch_and_add(&m_proc_number, 1); | 
|  | #else // TARGET_WINNT | 
|  | proc_num = _InterlockedIncrement(&m_proc_number); | 
|  | #endif // TARGET_WINNT | 
|  |  | 
|  | if (proc_num > COI_PIPELINE_MAX_PIPELINES) { | 
|  | LIBOFFLOAD_ERROR(c_coipipe_max_number, COI_PIPELINE_MAX_PIPELINES); | 
|  | LIBOFFLOAD_ABORT; | 
|  | } | 
|  |  | 
|  | // Create pipeline for this thread | 
|  | if (m_assigned_cpus == 0) { | 
|  | // If m_assigned_cpus is NULL, it implies all threads | 
|  | // Create the pipeline with no CPU mask | 
|  | res = COI::PipelineCreate(m_process, 0, mic_stack_size, &pipeline); | 
|  | } else { | 
|  | // Create COI CPU mask | 
|  | COI_CPU_MASK  in_Mask; | 
|  | res = COI::PipelineClearCPUMask(in_Mask); | 
|  | check_result(res, c_clear_cpu_mask, m_index, res); | 
|  |  | 
|  | int threads_per_core = m_num_threads / m_num_cores; | 
|  |  | 
|  | // Available threads are defined by examining of m_assigned_cpus bitset. | 
|  | // We skip thread 0. | 
|  | for (int i = 1; i < m_num_threads; i++) { | 
|  | // For available thread i m_assigned_cpus[i] is equal to 1 | 
|  | if ((*m_assigned_cpus)[i]) { | 
|  | COI_CPU_MASK_SET(i, in_Mask); | 
|  | } | 
|  | } | 
|  | OFFLOAD_DEBUG_TRACE(2, "COIPipelineCreate Mask for this CPU thread\n" | 
|  | "%016lx %016lx %016lx %016lx\n%016lx %016lx %016lx %016lx\n" | 
|  | "%016lx %016lx %016lx %016lx\n%016lx %016lx %016lx %016lx\n", | 
|  | in_Mask[0], in_Mask[1], in_Mask[2], in_Mask[3], | 
|  | in_Mask[4], in_Mask[5], in_Mask[6], in_Mask[7], | 
|  | in_Mask[8], in_Mask[9], in_Mask[10], in_Mask[11], | 
|  | in_Mask[12], in_Mask[13], in_Mask[14], in_Mask[15]); | 
|  |  | 
|  | // Create the pipeline with allowable CPUs | 
|  | res = COI::PipelineCreate(m_process, in_Mask, mic_stack_size, &pipeline); | 
|  | } | 
|  | check_result(res, c_pipeline_create, m_index, res); | 
|  | thread->set_pipeline(m_index, pipeline); | 
|  | } | 
|  | return pipeline; | 
|  | } | 
|  |  | 
|  | Stream* Stream::find_stream(uint64_t handle, bool remove) | 
|  | { | 
|  | Stream *stream = 0; | 
|  |  | 
|  | m_stream_lock.lock(); | 
|  | { | 
|  | StreamMap::iterator it = all_streams.find(handle); | 
|  | if (it != all_streams.end()) { | 
|  | stream = it->second; | 
|  | if (remove) { | 
|  | all_streams.erase(it); | 
|  | } | 
|  | } | 
|  | } | 
|  | m_stream_lock.unlock(); | 
|  | return stream; | 
|  | } | 
|  |  | 
|  | void Engine::move_cpu_el_after(CpuEl* cpu_what, CpuEl* cpu_after) | 
|  | { | 
|  | if (cpu_what == cpu_after) { | 
|  | return; | 
|  | } | 
|  | CpuEl* cpu_prev = cpu_what->prev; | 
|  |  | 
|  | // remove cpu_what | 
|  | if (!cpu_prev) { | 
|  | m_cpu_head = cpu_what->next; | 
|  | } | 
|  | else { | 
|  | cpu_prev->next = cpu_what->next; | 
|  | } | 
|  | if (cpu_what->next) { | 
|  | cpu_what->next->prev = cpu_prev; | 
|  | } | 
|  |  | 
|  | // insert cpu_what after cpu_after | 
|  | cpu_what->prev = cpu_after; | 
|  | cpu_what->next = cpu_after->next; | 
|  | if (cpu_after->next) { | 
|  | cpu_after->next->prev = cpu_what; | 
|  | } | 
|  | cpu_after->next = cpu_what; | 
|  | } | 
|  |  | 
|  | COIPIPELINE Engine::get_pipeline(_Offload_stream handle) | 
|  | { | 
|  | Stream * stream = Stream::find_stream(handle, false); | 
|  |  | 
|  | if (!stream) { | 
|  | LIBOFFLOAD_ERROR(c_offload_no_stream, m_index); | 
|  | LIBOFFLOAD_ABORT; | 
|  | } | 
|  |  | 
|  | COIPIPELINE pipeline = stream->get_pipeline(); | 
|  |  | 
|  | if (pipeline == 0) { | 
|  | COIRESULT     res; | 
|  | int           proc_num; | 
|  | COI_CPU_MASK  in_Mask ; | 
|  |  | 
|  | #ifndef TARGET_WINNT | 
|  | proc_num = __sync_fetch_and_add(&m_proc_number, 1); | 
|  | #else // TARGET_WINNT | 
|  | proc_num = _InterlockedIncrement(&m_proc_number); | 
|  | #endif // TARGET_WINNT | 
|  |  | 
|  | if (proc_num > COI_PIPELINE_MAX_PIPELINES) { | 
|  | LIBOFFLOAD_ERROR(c_coipipe_max_number, COI_PIPELINE_MAX_PIPELINES); | 
|  | LIBOFFLOAD_ABORT; | 
|  | } | 
|  |  | 
|  | m_stream_lock.lock(); | 
|  |  | 
|  | // start process if not done yet | 
|  | if (m_process == 0) { | 
|  | init_process(); | 
|  | } | 
|  |  | 
|  | // create CPUmask | 
|  | res = COI::PipelineClearCPUMask(in_Mask); | 
|  | check_result(res, c_clear_cpu_mask, m_index, res); | 
|  |  | 
|  | int stream_cpu_num = stream->get_cpu_number(); | 
|  |  | 
|  | stream->m_stream_cpus.reset(); | 
|  |  | 
|  | int threads_per_core = m_num_threads / m_num_cores; | 
|  |  | 
|  |  | 
|  | // Available threads is taken from m_cpus list. | 
|  | // m_cpu_head points to the head of m_cpus. | 
|  | // the elements of m_cpus is ordered by the number of usage in streams. | 
|  |  | 
|  | CpuEl *cpu_el = m_cpu_head; | 
|  | CpuEl *cpu_used_el, *cpu_used_prev, *cpu_prev; | 
|  |  | 
|  | for (int i = 0; i < stream_cpu_num; i++) { | 
|  | COI_CPU_MASK_SET(CPU_INDEX(cpu_el), in_Mask); | 
|  | stream->m_stream_cpus.set(CPU_INDEX(cpu_el)); | 
|  | //If the number of availabale threads is less than stream_cpu_num, | 
|  | // the stream_cpu_num is restricted to this number. | 
|  | if (!cpu_el->next) { | 
|  | break; | 
|  | } | 
|  | if (i + 1 < stream_cpu_num) { | 
|  | cpu_el = cpu_el->next; | 
|  | } | 
|  | } | 
|  |  | 
|  | // assertion : cpu_el points to the last used thread | 
|  | cpu_used_el = cpu_el; | 
|  | while (cpu_used_el) { | 
|  | cpu_used_el->count++; | 
|  | cpu_el = cpu_prev = cpu_used_el; | 
|  | cpu_used_prev = cpu_used_el->prev; | 
|  | if (!cpu_el->next) { | 
|  | cpu_used_el = cpu_used_prev; | 
|  | continue; | 
|  | } | 
|  |  | 
|  | while (cpu_el) { | 
|  | if (cpu_used_el->count < cpu_el->count) { | 
|  | break; | 
|  | } | 
|  | // Equal used threads are ordered by thread number to | 
|  | // assign to a stream as contiguous threads as possible. | 
|  | else if (cpu_used_el->count == cpu_el->count && | 
|  | CPU_INDEX(cpu_used_el) <  CPU_INDEX(cpu_el)) { | 
|  | break; | 
|  | } | 
|  | cpu_prev = cpu_el; | 
|  | cpu_el = cpu_el->next; | 
|  | } | 
|  | if (cpu_used_el != cpu_prev) { | 
|  | move_cpu_el_after(cpu_used_el, cpu_prev); | 
|  | } | 
|  | cpu_used_el = cpu_used_prev; | 
|  | } | 
|  | print_stream_cpu_list("get_pipeline"); | 
|  |  | 
|  | // create pipeline for this thread | 
|  | OFFLOAD_DEBUG_TRACE(2, "COIPipelineCreate Mask for this Stream\n" | 
|  | "%016lx %016lx %016lx %016lx\n%016lx %016lx %016lx %016lx\n" | 
|  | "%016lx %016lx %016lx %016lx\n%016lx %016lx %016lx %016lx\n", | 
|  | in_Mask[0], in_Mask[1], in_Mask[2], in_Mask[3], | 
|  | in_Mask[4], in_Mask[5], in_Mask[6], in_Mask[7], | 
|  | in_Mask[8], in_Mask[9], in_Mask[10], in_Mask[11], | 
|  | in_Mask[12], in_Mask[13], in_Mask[14], in_Mask[15]); | 
|  | res = COI::PipelineCreate(m_process, in_Mask, | 
|  | mic_stack_size, &pipeline); | 
|  | check_result(res, c_pipeline_create, m_index, res); | 
|  |  | 
|  | // Set stream's affinities | 
|  | { | 
|  | struct affinity_spec affinity_spec; | 
|  | char* affinity_type; | 
|  | int i; | 
|  |  | 
|  | // "compact" by default | 
|  | affinity_spec.affinity_type = affinity_compact; | 
|  |  | 
|  | // Check if user has specified type of affinity | 
|  | if ((affinity_type = getenv("OFFLOAD_STREAM_AFFINITY")) != | 
|  | NULL) | 
|  | { | 
|  | char affinity_str[16]; | 
|  | int affinity_str_len; | 
|  |  | 
|  | OFFLOAD_DEBUG_TRACE(2, | 
|  | "User has specified OFFLOAD_STREAM_AFFINITY=%s\n", | 
|  | affinity_type); | 
|  |  | 
|  | // Set type of affinity requested | 
|  | affinity_str_len = strlen(affinity_type); | 
|  | for (i=0; i<affinity_str_len && i<15; i++) | 
|  | { | 
|  | affinity_str[i] = tolower(affinity_type[i]); | 
|  | } | 
|  | affinity_str[i] = '\0'; | 
|  | if (strcmp(affinity_str, "compact") == 0) { | 
|  | affinity_spec.affinity_type = affinity_compact; | 
|  | OFFLOAD_DEBUG_TRACE(2, "Setting affinity=compact\n"); | 
|  | } else if (strcmp(affinity_str, "scatter") == 0) { | 
|  | affinity_spec.affinity_type = affinity_scatter; | 
|  | OFFLOAD_DEBUG_TRACE(2, "Setting affinity=scatter\n"); | 
|  | } else { | 
|  | LIBOFFLOAD_ERROR(c_incorrect_affinity, affinity_str); | 
|  | affinity_spec.affinity_type = affinity_compact; | 
|  | OFFLOAD_DEBUG_TRACE(2, "Setting affinity=compact\n"); | 
|  | } | 
|  | } | 
|  | // Make flat copy of sink mask because COI's mask is opaque | 
|  | for (i=0; i<16; i++) { | 
|  | affinity_spec.sink_mask[i] = in_Mask[i]; | 
|  | } | 
|  | // Set number of cores and threads | 
|  | affinity_spec.num_cores = m_num_cores; | 
|  | affinity_spec.num_threads = m_num_threads; | 
|  |  | 
|  | COIEVENT event; | 
|  | res = COI::PipelineRunFunction(pipeline, | 
|  | m_funcs[c_func_set_stream_affinity], | 
|  | 0, 0, 0, | 
|  | 0, 0, | 
|  | &affinity_spec, sizeof(affinity_spec), | 
|  | 0, 0, | 
|  | &event); | 
|  | check_result(res, c_pipeline_run_func, m_index, res); | 
|  |  | 
|  | res = COI::EventWait(1, &event, -1, 1, 0, 0); | 
|  | check_result(res, c_event_wait, res); | 
|  | } | 
|  |  | 
|  | m_stream_lock.unlock(); | 
|  | stream->set_pipeline(pipeline); | 
|  | } | 
|  | return pipeline; | 
|  | } | 
|  |  | 
|  | void Engine::stream_destroy(_Offload_stream handle) | 
|  | { | 
|  | // get stream | 
|  | Stream * stream =  Stream::find_stream(handle, true); | 
|  |  | 
|  | if (stream) { | 
|  | // return cpus for future use | 
|  | for (int i = 0; i < m_num_threads; i++) { | 
|  | if (stream->m_stream_cpus.test(i)) { | 
|  | CpuEl *cpu_el = m_cpus + i; | 
|  | CpuEl *cpu_first_el = cpu_el; | 
|  | // decrease count of thread "i" and move its CpuEl to the | 
|  | // proper place into the ordered list | 
|  | cpu_el->count--; | 
|  | while (cpu_el->prev) { | 
|  | if (cpu_first_el->count > cpu_el->prev->count) { | 
|  | break; | 
|  | } | 
|  | else if (cpu_first_el->count == cpu_el->prev->count && | 
|  | CPU_INDEX(cpu_first_el) > CPU_INDEX(cpu_el->prev)) { | 
|  | break; | 
|  | } | 
|  | cpu_el = cpu_el->prev; | 
|  | } | 
|  | cpu_el = cpu_el->prev; | 
|  | // If cpu_el for thread "i" must be moved in the list | 
|  | if (cpu_first_el != cpu_el) { | 
|  | // Thread "i" is used the least times. It must be set as | 
|  | // the m_cpu_head. | 
|  | if (!cpu_el) { | 
|  | if (!cpu_first_el->prev) { | 
|  | continue; | 
|  | } | 
|  | // remove cpu_el. | 
|  | cpu_first_el->prev->next = cpu_first_el->next; | 
|  | if (cpu_first_el->next) { | 
|  | cpu_first_el->next->prev = cpu_first_el->prev; | 
|  | } | 
|  | // make cpu_first_el as new m_cpu_head | 
|  | cpu_first_el->prev = NULL; | 
|  | cpu_first_el->next = m_cpu_head; | 
|  | m_cpu_head->prev = cpu_first_el; | 
|  | m_cpu_head = cpu_first_el; | 
|  | } | 
|  | else { | 
|  | move_cpu_el_after(cpu_first_el, cpu_el); | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  | print_stream_cpu_list("stream_destroy"); | 
|  | delete stream; | 
|  | } | 
|  | else { | 
|  | LIBOFFLOAD_ERROR(c_offload_no_stream, m_index); | 
|  | LIBOFFLOAD_ABORT; | 
|  | } | 
|  | } | 
|  |  | 
|  | uint64_t Engine::get_thread_id(void) | 
|  | { | 
|  | Thread* thread = (Thread*) thread_getspecific(mic_thread_key); | 
|  | if (thread == 0) { | 
|  | thread = new Thread(&m_proc_number); | 
|  | thread_setspecific(mic_thread_key, thread); | 
|  | } | 
|  |  | 
|  | return reinterpret_cast<uint64_t>(thread); | 
|  | } | 
|  |  | 
|  | AutoSet& Engine::get_auto_vars(void) | 
|  | { | 
|  | Thread* thread = (Thread*) thread_getspecific(mic_thread_key); | 
|  | if (thread == 0) { | 
|  | thread = new Thread(&m_proc_number); | 
|  | thread_setspecific(mic_thread_key, thread); | 
|  | } | 
|  |  | 
|  | return thread->get_auto_vars(); | 
|  | } | 
|  |  | 
|  | void Engine::destroy_thread_data(void *data) | 
|  | { | 
|  | delete static_cast<Thread*>(data); | 
|  | } |