/build/reproducible-path/rocrand-6.4.1/library/include/rocrand/rocrand_common.h Source File

/build/reproducible-path/rocrand-6.4.1/library/include/rocrand/rocrand_common.h Source File#

API library: /build/reproducible-path/rocrand-6.4.1/library/include/rocrand/rocrand_common.h Source File
rocrand_common.h
1// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20
21#ifndef ROCRAND_COMMON_H_
22#define ROCRAND_COMMON_H_
23
24#define ROCRAND_2POW16_INV (1.5258789e-05f)
25#define ROCRAND_2POW16_INV_2PI (9.58738e-05f)
26#define ROCRAND_2POW32_INV (2.3283064e-10f)
27#define ROCRAND_2POW32_INV_DOUBLE (2.3283064365386963e-10)
28#define ROCRAND_2POW64_INV (5.4210109e-20f)
29#define ROCRAND_2POW64_INV_DOUBLE (5.4210108624275221700372640043497e-20)
30#define ROCRAND_2POW32_INV_2PI (1.46291807e-09f)
31#define ROCRAND_2POW53_INV_DOUBLE (1.1102230246251565e-16)
32#define ROCRAND_PI (3.141592653f)
33#define ROCRAND_PI_DOUBLE (3.1415926535897932)
34#define ROCRAND_2PI (6.2831855f)
35#define ROCRAND_SQRT2 (1.4142135f)
36#define ROCRAND_SQRT2_DOUBLE (1.4142135623730951)
37
38#include <hip/hip_runtime.h>
39
40#include <math.h>
41
42#define ROCRAND_KERNEL __global__ static
43
44#if __HIP_DEVICE_COMPILE__ \
45 && (defined(__HIP_PLATFORM_AMD__) \
46 || (defined(__HIP_PLATFORM_NVCC__) && (__CUDA_ARCH__ >= 530)))
47 #define ROCRAND_HALF_MATH_SUPPORTED
48#endif
49
50// Copyright 2001 John Maddock.
51// Copyright 2017 Peter Dimov.
52//
53// Distributed under the Boost Software License, Version 1.0.
54//
55// See http://www.boost.org/LICENSE_1_0.txt
56//
57// BOOST_STRINGIZE(X)
58#define ROCRAND_STRINGIZE(X) ROCRAND_DO_STRINGIZE(X)
59#define ROCRAND_DO_STRINGIZE(X) #X
60
61// Copyright 2017 Peter Dimov.
62//
63// Distributed under the Boost Software License, Version 1.0.
64//
65// See http://www.boost.org/LICENSE_1_0.txt
66//
67// BOOST_PRAGMA_MESSAGE("message")
68//
69// Expands to the equivalent of #pragma message("message")
70#if defined(__INTEL_COMPILER)
71 #define ROCRAND_PRAGMA_MESSAGE(x) \
72 __pragma(message(__FILE__ "(" ROCRAND_STRINGIZE(__LINE__) "): note: " x))
73#elif defined(__GNUC__)
74 #define ROCRAND_PRAGMA_MESSAGE(x) _Pragma(ROCRAND_STRINGIZE(message(x)))
75#elif defined(_MSC_VER)
76 #define ROCRAND_PRAGMA_MESSAGE(x) \
77 __pragma(message(__FILE__ "(" ROCRAND_STRINGIZE(__LINE__) "): note: " x))
78#else
79 #define ROCRAND_PRAGMA_MESSAGE(x)
80#endif
81
82#if __cplusplus >= 201402L
83 #define ROCRAND_DEPRECATED(msg) [[deprecated(msg)]]
84#elif defined(_MSC_VER) && !defined(__clang__)
85 #define ROCRAND_DEPRECATED(msg) __declspec(deprecated(msg))
86#elif defined(__clang__) || defined(__GNUC__)
87 #define ROCRAND_DEPRECATED(msg) __attribute__((deprecated(msg)))
88#else
89 #define ROCRAND_DEPRECATED(msg)
90#endif
91
92namespace rocrand_device {
93namespace detail {
94
95#if ( defined(__HIP_PLATFORM_NVCC__) || \
96 defined(__gfx801__) || \
97 defined(__gfx802__) || \
98 defined(__gfx803__) || \
99 defined(__gfx810__) || \
100 defined(__gfx900__) || \
101 defined(__gfx902__) || \
102 defined(__gfx904__) || \
103 defined(__gfx906__) || \
104 defined(__gfx908__) || \
105 defined(__gfx909__) || \
106 defined(__gfx1030__) )
107 #if !defined(ROCRAND_ENABLE_INLINE_ASM)
108 #define ROCRAND_ENABLE_INLINE_ASM
109 #endif
110#else
111 #if defined(__HIP_DEVICE_COMPILE__) && defined(ROCRAND_ENABLE_INLINE_ASM)
112 #undef ROCRAND_ENABLE_INLINE_ASM
113 #endif
114#endif
115
116__forceinline__ __device__ __host__ unsigned long long
117 mad_u64_u32(const unsigned int x, const unsigned int y, const unsigned long long z)
118{
119#if defined(__HIP_PLATFORM_AMD__) && defined(__HIP_DEVICE_COMPILE__) \
120 && defined(ROCRAND_ENABLE_INLINE_ASM)
121
122 #if __AMDGCN_WAVEFRONT_SIZE == 64u
123 using sgpr_t = unsigned long long;
124 #elif __AMDGCN_WAVEFRONT_SIZE == 32u
125 using sgpr_t = unsigned int;
126 #endif
127
128 unsigned long long r;
129 sgpr_t c; // carry bits, SGPR, unused
130 // x has "r" constraint. This allows to use both VGPR and SGPR
131 // (to save VGPR) as input.
132 // y and z have "v" constraints, because only one SGPR or literal
133 // can be read by the instruction.
134 asm volatile("v_mad_u64_u32 %0, %1, %2, %3, %4"
135 : "=v"(r), "=s"(c) : "r"(x), "v"(y), "v"(z)
136 );
137 return r;
138 #elif defined(__HIP_PLATFORM_NVCC__) && defined(__HIP_DEVICE_COMPILE__) \
139 && defined(ROCRAND_ENABLE_INLINE_ASM)
140
141 unsigned long long r;
142 asm("mad.wide.u32 %0, %1, %2, %3;"
143 : "=l"(r) : "r"(x), "r"(y), "l"(z)
144 );
145 return r;
146
147 #else // host code
148
149 return static_cast<unsigned long long>(x) * static_cast<unsigned long long>(y) + z;
150
151 #endif
152}
153
154// This helps access fields of engine's internal state which
155// saves floats and doubles generated using the Box–Muller transform
156template<typename Engine>
157struct engine_boxmuller_helper
158{
159 static __forceinline__ __device__ __host__ bool has_float(const Engine* engine)
160 {
161 return engine->m_state.boxmuller_float_state != 0;
162 }
163
164 static __forceinline__ __device__ __host__ float get_float(Engine* engine)
165 {
166 engine->m_state.boxmuller_float_state = 0;
167 return engine->m_state.boxmuller_float;
168 }
169
170 static __forceinline__ __device__ __host__ void save_float(Engine* engine, float f)
171 {
172 engine->m_state.boxmuller_float_state = 1;
173 engine->m_state.boxmuller_float = f;
174 }
175
176 static __forceinline__ __device__ __host__ bool has_double(const Engine* engine)
177 {
178 return engine->m_state.boxmuller_double_state != 0;
179 }
180
181 static __forceinline__ __device__ __host__ float get_double(Engine* engine)
182 {
183 engine->m_state.boxmuller_double_state = 0;
184 return engine->m_state.boxmuller_double;
185 }
186
187 static __forceinline__ __device__ __host__ void save_double(Engine* engine, double d)
188 {
189 engine->m_state.boxmuller_double_state = 1;
190 engine->m_state.boxmuller_double = d;
191 }
192};
193
194template<typename T>
195__forceinline__ __device__ __host__ void split_ull(T& lo, T& hi, unsigned long long int val);
196
197template<>
198__forceinline__ __device__ __host__ void
199 split_ull(unsigned int& lo, unsigned int& hi, unsigned long long int val)
200{
201 lo = val & 0xFFFFFFFF;
202 hi = (val >> 32) & 0xFFFFFFFF;
203}
204
205template<>
206__forceinline__ __device__ __host__ void
207 split_ull(unsigned long long int& lo, unsigned long long int& hi, unsigned long long int val)
208{
209 lo = val;
210 hi = 0;
211}
212
213} // end namespace detail
214} // end namespace rocrand_device
215
216#endif // ROCRAND_COMMON_H_