#ifndef _OMP_H
#define _OMP_H

#include <stddef.h>

/*
 * define the lock data types
 */
#ifndef _OMP_LOCK_T_DEF
#define _OMP_LOCK_T_DEF
    typedef struct {
        void *lock[2];
    } omp_lock_t;
#endif
#ifndef _OMP_NEST_LOCK_T_DEF
#define _OMP_NEST_LOCK_T_DEF
    typedef struct {
        void *lock[4];
    } omp_nest_lock_t;
#endif

/*
 * define the synchronization hints
 */
typedef enum omp_sync_hint_t {
    omp_sync_hint_none = 0x0,
    omp_lock_hint_none = omp_sync_hint_none,
    omp_sync_hint_uncontended = 0x1,
    omp_lock_hint_uncontended = omp_sync_hint_uncontended,
    omp_sync_hint_contended = 0x2,
    omp_lock_hint_contended = omp_sync_hint_contended,
    omp_sync_hint_nonspeculative = 0x4,
    omp_lock_hint_nonspeculative = omp_sync_hint_nonspeculative,
    omp_sync_hint_speculative = 0x8,
    omp_lock_hint_speculative = omp_sync_hint_speculative
    /* ,
       Add vendor specific constants for lock hints here,
       starting from the most-significant bit. */
} omp_sync_hint_t;

/* omp_lock_hint_t has been deprecated */
typedef omp_sync_hint_t omp_lock_hint_t;

/*
 * define the schedule kinds
 */
typedef enum omp_sched_t {
    // schedule kinds
    omp_sched_static = 0x1,
    omp_sched_dynamic = 0x2,
    omp_sched_guided = 0x3,
    omp_sched_auto = 0x4,
    /* Add vendor specific schedule constants here */
    // schedule modifier
    omp_sched_monotonic = 0x80000000u
} omp_sched_t;

/*
 * define the proc bind values
 */
typedef enum omp_proc_bind_t {
    omp_proc_bind_false = 0,
    omp_proc_bind_true = 1,
    omp_proc_bind_master = 2,
    omp_proc_bind_close = 3,
    omp_proc_bind_spread = 4
} omp_proc_bind_t;

typedef void *omp_depend_t;

/*
 * define interop properties
 */
typedef enum omp_interop_property_t
{
    omp_ipr_fr_id = -1,
    omp_ipr_fr_name = -2,
    omp_ipr_vendor = -3,
    omp_ipr_vendor_name = -4,
    omp_ipr_device_num = -5,
    omp_ipr_platform = -6,
    omp_ipr_device = -7,
    omp_ipr_device_context = -8,
    omp_ipr_targetsync = -9,
    omp_ipr_first = -9
} omp_interop_property_t;

/*
 * define interop return code properties
 */
typedef enum omp_interop_rc_t
{
    omp_irc_no_value = 1,
    omp_irc_success = 0,
    omp_irc_empty = -1,
    omp_irc_out_of_range = -2,
    omp_irc_type_int = -3,
    omp_irc_type_ptr = -4,
    omp_irc_type_str = -5,
    omp_irc_other = -6
} omp_interop_rc_t;

typedef void *omp_interop_t;
#define omp_interop_none ((omp_interop_t) 0)

/*
 * define memory management types
 */
typedef unsigned long int omp_uintptr_t;
typedef long int omp_intptr_t;

typedef enum omp_memspace_handle_t {
    omp_default_mem_space = 0,
    omp_large_cap_mem_space = omp_default_mem_space,
    omp_const_mem_space = omp_default_mem_space,
    omp_high_bw_mem_space = omp_default_mem_space,
    omp_low_lat_mem_space = omp_default_mem_space
    /* ,
       Add vendor specific constants for memory spaces here. */
} omp_memspace_handle_t;

typedef enum omp_allocator_handle_t {
    omp_null_allocator = 0,
    /* The rest of the enumerators have
       implementation specific values. */
    omp_default_mem_alloc = 1,
    omp_large_cap_mem_alloc = omp_default_mem_alloc,
    omp_const_mem_alloc = omp_default_mem_alloc,
    omp_high_bw_mem_alloc = omp_default_mem_alloc,
    omp_low_lat_mem_alloc = omp_default_mem_alloc,
    omp_cgroup_mem_alloc = omp_default_mem_alloc,
    omp_pteam_mem_alloc = omp_default_mem_alloc,
    omp_thread_mem_alloc = omp_default_mem_alloc
    /* ,
       Some range for dynamically allocated handles. */
} omp_allocator_handle_t;

typedef enum omp_alloctrait_key_t {
    omp_atk_sync_hint = 1,
    omp_atk_alignment = 2,
    omp_atk_access = 3,
    omp_atk_pool_size = 4,
    omp_atk_fallback = 5,
    omp_atk_fb_data = 6,
    omp_atk_pinned = 7,
    omp_atk_partition = 8
} omp_alloctrait_key_t;

typedef enum omp_alloctrait_value_t {
    omp_atv_false = 0,
    omp_atv_true = 1,
    omp_atv_contended = 3,
    omp_atv_uncontended = 4,
    omp_atv_sequential = 5,
    omp_atv_private = 6,
    omp_atv_all = 7,
    omp_atv_thread = 8,
    omp_atv_pteam = 9,
    omp_atv_cgroup = 10,
    omp_atv_default_mem_fb = 11,
    omp_atv_null_fb = 12,
    omp_atv_abort_fb = 13,
    omp_atv_allocator_fb = 14,
    omp_atv_environment = 15,
    omp_atv_nearest = 16,
    omp_atv_blocked = 17,
    omp_atv_interleaved = 18
} omp_alloctrait_value_t;

#define omp_atv_default ((omp_uintptr_t) -1)

typedef struct omp_alloctrait_t {
    omp_alloctrait_key_t key;
    omp_uintptr_t value;
} omp_alloctrait_t;

/*
 * define kinds of relinguishing resources
 */
typedef enum omp_pause_resource_t {
    omp_pause_soft = 1,
    omp_pause_hard = 2
} omp_pause_resource_t;

typedef enum omp_event_handle_t {
    /* Vendor specific enumerators, e.g.:  */
    __omp_event_min = 0,
    __omp_event_max = ~0u
} omp_event_handle_t;

/*
 * define the tool control commands
 */
typedef enum omp_control_tool_t
{
    omp_control_tool_start = 1,
    omp_control_tool_pause = 2,
    omp_control_tool_flush = 3,
    omp_control_tool_end = 4
} omp_control_tool_t;

typedef enum omp_control_tool_result_t {
    omp_control_tool_notool = -2,
    omp_control_tool_nocallback = -1,
    omp_control_tool_success = 0,
    omp_control_tool_ignored = 1
} omp_control_tool_result_t;

#ifndef _OMP_FUNC_DEF

/*
 * Exported OpenMP functions
 */
#ifdef __cplusplus
/*
 * TODO:
 * The omp.h header file also defines a class template that models the Allocator concept in the
 * omp::allocator namespace for each predefined memory allocator in Table 2.10 on page 155
 * for which the name includes neither the omp_ prefix nor the _alloc suffix.
 */
extern "C" {
#endif

#if defined(__stdc__) || defined(__STDC__) || defined(__cplusplus)

/* Thread Team Routines */
extern void omp_set_num_threads(int num_threads);
extern int omp_get_num_threads(void);
extern int omp_get_max_threads(void);
extern int omp_get_thread_num(void);
extern int omp_in_parallel(void);
extern void omp_set_dynamic(int dynamic_threads);
extern int omp_get_dynamic(void);
extern int omp_get_cancellation(void);
extern void omp_set_nested(int nested);
extern int omp_get_nested(void);
extern void omp_set_schedule(omp_sched_t kind, int chunk);
extern void omp_get_schedule(omp_sched_t *kind, int *chunk);
extern int omp_get_thread_limit(void);
extern int omp_get_supported_active_levels(void);
extern void omp_set_max_active_levels(int max_levels);
extern int omp_get_max_active_levels(void);
extern int omp_get_level(void);
extern int omp_get_ancestor_thread_num(int level);
extern int omp_get_team_size(int level);
extern int omp_get_active_level(void);

/* Thread Affinity Routines */
extern omp_proc_bind_t omp_get_proc_bind(void);
extern int omp_get_num_places(void);
extern int omp_get_place_num_procs(int place_num);
extern void omp_get_place_proc_ids(int place_num, int *ids);
extern int omp_get_place_num(void);
extern int omp_get_partition_num_places(void);
extern void omp_get_partition_place_nums(int *place_nums);
extern void omp_set_affinity_format(const char *format);
extern size_t omp_get_affinity_format(char *buffer, size_t size);
extern void omp_display_affinity(const char *format);
extern size_t omp_capture_affinity(char *buffer, size_t size, const char*format);

/* Teams Region Routines */
extern int omp_get_num_teams(void);
extern int omp_get_team_num(void);
extern void omp_set_num_teams(int num_teams);
extern int omp_get_max_teams(void);
extern void omp_set_teams_thread_limit(int thread_limit);
extern int omp_get_teams_thread_limit(void);

/* Tasking Routines */
extern int omp_get_max_task_priority(void);
extern int omp_in_final(void);

/* Resource Relinquishing Routines */
extern int omp_pause_resource(omp_pause_resource_t kind, int device_num);
extern int omp_pause_resource_all(omp_pause_resource_t kind);

/* Device Information Routines */
extern int omp_get_num_procs(void);
extern void omp_set_default_device(int device_num);
extern int omp_get_default_device(void);
extern int omp_get_num_devices(void);
extern int omp_get_device_num(void);
extern int omp_is_initial_device(void);
extern int omp_get_initial_device(void);

/* Device Memory Routines */
extern void* omp_target_alloc(size_t size, int device_num);
extern void omp_target_free(void *device_ptr, int device_num);
extern int omp_target_is_present(const void *ptr, int device_num);
extern int omp_target_is_accessible(const void *ptr, size_t size, int device_num);
extern int omp_target_memcpy(void *dst, const void *src, size_t length, size_t dst_offset, size_t src_offset, int dst_device_num, int src_device_num);
extern int omp_target_memcpy_rect(void *dst, const void *src, size_t element_size, int num_dims,
                                  const size_t *volume, const size_t *dst_offsets, const size_t *src_offsets,
                                  const size_t *dst_dimensions, const size_t *src_dimensions,
                                  int dst_device_num, int src_device_num);
extern int omp_target_memcpy_async(void *dst, const void *src, size_t length, size_t dst_offset, size_t src_offset, int dst_device_num, int src_device_num, int depobj_count, omp_depend_t *depobj_list);
extern int omp_target_memcpy_rect_async(void *dst, const void *src, size_t element_size, int num_dims,
                                        const size_t *volume, const size_t *dst_offsets, const size_t *src_offsets,
                                        const size_t *dst_dimensions, const size_t *src_dimensions,
                                        int dst_device_num, int src_device_num, int depobj_count, omp_depend_t *depobj_list);
extern int omp_target_associate_ptr(const void *host_ptr, const void *device_ptr, size_t size, size_t device_offset, int device_num);
extern int omp_target_disassociate_ptr(const void *ptr, int device_num);
extern void *omp_get_mapped_ptr(const void* ptr, int devnum);

/* Lock routines */
extern void omp_init_lock(omp_lock_t *lock);
extern void omp_init_lock_with_hint(omp_lock_t *lock, omp_sync_hint_t hint);
extern void omp_destroy_lock(omp_lock_t *lock);
extern void omp_set_lock(omp_lock_t *lock);
extern void omp_unset_lock(omp_lock_t *lock);
extern int omp_test_lock(omp_lock_t *lock);

extern void omp_init_lock_ftn(omp_lock_t **lock);
extern void omp_init_lock_with_hint_ftn(omp_lock_t **lock, omp_sync_hint_t hint);
extern void omp_destroy_lock_ftn(omp_lock_t **lock);
extern void omp_set_lock_ftn(omp_lock_t **lock);
extern void omp_unset_lock_ftn(omp_lock_t **lock);
extern int omp_test_lock_ftn(omp_lock_t **lock);

/* Nest lock routines*/
extern void omp_init_nest_lock(omp_nest_lock_t *lock);
extern void omp_init_nest_lock_with_hint(omp_lock_t *lock, omp_sync_hint_t hint);
extern void omp_destroy_nest_lock(omp_nest_lock_t *lock);
extern void omp_set_nest_lock(omp_nest_lock_t *lock);
extern void omp_unset_nest_lock(omp_nest_lock_t *lock);
extern int omp_test_nest_lock(omp_nest_lock_t *lock);

/* Timing routines */
extern double omp_get_wtime(void);
extern double omp_get_wtick(void);

/* Event Routine */
extern void omp_fulfill_event(omp_event_handle_t event);

/* Interoperability Routines */
extern int omp_get_num_interop_properties(const omp_interop_t interop);
extern omp_intptr_t omp_get_interop_int(const omp_interop_t interop, omp_interop_property_t property_id, int *ret_code);
extern void *omp_get_interop_ptr(const omp_interop_t interop, omp_interop_property_t property_id, int *ret_code);
extern const char *omp_get_interop_str(const omp_interop_t interop, omp_interop_property_t property_id, int *ret_code);
extern const char *omp_get_interop_name(const omp_interop_t interop, omp_interop_property_t property_id);
extern const char *omp_get_interop_type_desc(const omp_interop_t interop, omp_interop_property_t property_id);
extern const char *omp_get_interop_rc_desc(const omp_interop_t interop, omp_interop_rc_t ret_code);

/* Memory Management Routines */
extern omp_allocator_handle_t omp_init_allocator(omp_memspace_handle_t memspace, int ntraits, const omp_alloctrait_t *traits);
extern void omp_destroy_allocator(omp_allocator_handle_t allocator);
extern void omp_set_default_allocator(omp_allocator_handle_t allocator);
extern omp_allocator_handle_t omp_get_default_allocator(void);
#ifdef __cplusplus
extern void *omp_alloc(size_t size, omp_allocator_handle_t allocator=omp_null_allocator);
extern void *omp_aligned_alloc(size_t alignment, size_t size, omp_allocator_handle_t allocator=omp_null_allocator);
extern void omp_free(void *ptr, omp_allocator_handle_t allocator=omp_null_allocator);
extern void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator=omp_null_allocator);
extern void *omp_aligned_calloc(size_t alignment, size_t nmemb, size_t size, omp_allocator_handle_t allocator=omp_null_allocator);
extern void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator=omp_null_allocator, omp_allocator_handle_t free_allocator=omp_null_allocator);
#else
extern void *omp_alloc(size_t size, omp_allocator_handle_t allocator);
extern void *omp_aligned_alloc(size_t alignment, size_t size, omp_allocator_handle_t allocator);
extern void omp_free(void *ptr, omp_allocator_handle_t allocator);
extern void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator);
extern void *omp_aligned_calloc(size_t alignment, size_t nmemb, size_t size, omp_allocator_handle_t allocator);
extern void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator, omp_allocator_handle_t free_allocator);
#endif

/* Tool Control Routine */
extern int omp_control_tool(int command, int modifier, void *arg);

/* Environment Display Routine */
extern void omp_display_env(int verbose);

/* Implementation Defined Routines (Non-standard) */
extern void *ompx_get_cuda_stream(int device, int nowait);
extern void ompx_set_cuda_stream_auto(int enable);
// DISCLAIMER: This API is just a hack and its production usage needs to be formalized.
extern void nvomp_set_memory_preferred_location_device(void *ptr, int size);
extern void nvomp_proc_bind_off(void);

#else

/* Thread Team Routines */
extern void omp_set_num_threads();
extern int omp_get_num_threads();
extern int omp_get_max_threads();
extern int omp_get_thread_num();
extern int omp_in_parallel();
extern void omp_set_dynamic();
extern int omp_get_dynamic();
extern int omp_get_cancellation();
extern void omp_set_nested();
extern int omp_get_nested();
extern void omp_set_schedule();
extern void omp_get_schedule();
extern int omp_get_thread_limit();
extern int omp_get_supported_active_levels();
extern void omp_set_max_active_levels();
extern int omp_get_max_active_levels();
extern int omp_get_level();
extern int omp_get_ancestor_thread_num();
extern int omp_get_team_size();
extern int omp_get_active_level();

/* Thread Affinity Routines */
extern omp_proc_bind_t omp_get_proc_bind();
extern int omp_get_num_places();
extern int omp_get_place_num_procs();
extern void omp_get_place_proc_ids();
extern int omp_get_place_num();
extern int omp_get_partition_num_places();
extern void omp_get_partition_place_nums();
extern void omp_set_affinity_format();
extern size_t omp_get_affinity_format();
extern void omp_display_affinity();
extern size_t omp_capture_affinity();

/* Teams Region Routines */
extern int omp_get_num_teams();
extern int omp_get_team_num();
extern void omp_set_num_teams();
extern int omp_get_max_teams();
extern void omp_set_teams_thread_limit();
extern int omp_get_teams_thread_limit();

/* Tasking Routines */
extern int omp_get_max_task_priority();
extern int omp_in_final();

/* Resource Relinquishing Routines */
extern int omp_pause_resource();
extern int omp_pause_resource_all();

/* Device Information Routines */
extern int omp_get_num_procs();
extern void omp_set_default_device();
extern int omp_get_default_device();
extern int omp_get_num_devices();
extern int omp_get_device_num();
extern int omp_is_initial_device();
extern int omp_get_initial_device();

/* Device Memory Routines */
extern void* omp_target_alloc();
extern void omp_target_free();
extern int omp_target_is_present();
extern int omp_target_memcpy();
extern int omp_target_memcpy_rect();
extern int omp_target_memcpy_async();
extern int omp_target_memcpy_rect_async();
extern int omp_target_associate_ptr();
extern int omp_target_disassociate_ptr();
extern void *omp_get_mapped_ptr();

/* Lock routines */
extern void omp_init_lock();
extern void omp_init_lock_with_hint();
extern void omp_destroy_lock();
extern void omp_set_lock();
extern void omp_unset_lock();
extern int omp_test_lock();

extern void omp_init_lock_ftn();
extern void omp_init_lock_with_hint_ftn();
extern void omp_destroy_lock_ftn();
extern void omp_set_lock_ftn();
extern void omp_unset_lock_ftn();
extern int omp_test_lock_ftn();

/* Nest lock routines*/
extern void omp_init_nest_lock();
extern void omp_init_nest_lock_with_hint();
extern void omp_destroy_nest_lock();
extern void omp_set_nest_lock();
extern void omp_unset_nest_lock();
extern int omp_test_nest_lock();

/* Timing routines */
extern double omp_get_wtime();
extern double omp_get_wtick();

/* Event Routine */
extern void omp_fulfill_event();

/* Interoperability Routines */
extern int omp_get_num_interop_properties();
extern omp_intptr_t omp_get_interop_int();
extern void *omp_get_interop_ptr();
extern const char *omp_get_interop_str();
extern const char *omp_get_interop_name();
extern const char *omp_get_interop_type_desc();
const char *omp_get_interop_rc_desc();

/* Memory Management Routines */
extern omp_allocator_handle_t omp_init_allocator();
extern void omp_destroy_allocator();
extern void omp_set_default_allocator();
extern omp_allocator_handle_t omp_get_default_allocator();
extern void *omp_alloc();
extern void *omp_aligned_alloc();
extern void omp_free();
extern void *omp_calloc();
extern void *omp_aligned_calloc();
extern void *omp_realloc();

/* Tool Control Routine */
extern int omp_control_tool();

/* Implementation Defined Routines (Non-standard) */
extern void *ompx_get_cuda_stream();
extern void ompx_set_cuda_stream_auto();
// DISCLAIMER: This API is just a hack and its production usage needs to be formalized.
extern void nvomp_set_memory_preferred_location_device();
extern void nvomp_proc_bind_off();

#endif // defined(__stdc__) || defined(__STDC__) || defined(__cplusplus)

#ifdef __cplusplus
}
#endif

#endif // _OMP_FUNC_DEF

#endif // _OMP_H
