我们从Python开源项目中,提取了以下6个代码示例,用于说明如何使用numba.float64()。
def pix2ang_ring(nside, ipix): """Calculate the angular coordinates on the sphere for each pixel index in the RING ordering scheme. Parameters ---------- ipix : 1D or 2D `~numpy.ndarray` The indexes of the HEALPix pixels in the RING ordering Returns ------- theta : 1D or 2D `~numpy.ndarray` The polar angles (i.e., latitudes), ? ? [0, ?]. (unit: rad) phi : 1D or 2D `~numpy.ndarray` The azimuthal angles (i.e., longitudes), ? ? [0, 2?). (unit: rad) The shape is the same as the input array. NOTE ---- * Only support the *RING* ordering scheme * This is the JIT-optimized version that partially replaces the ``healpy.pix2ang`` """ shape = ipix.shape size = ipix.size ipix = ipix.flatten() theta = np.zeros(size, dtype=np.float64) phi = np.zeros(size, dtype=np.float64) for i in range(size): theta_, phi_ = pix2ang_ring_single(nside, ipix[i]) theta[i] = theta_ phi[i] = phi_ return (theta.reshape(shape), phi.reshape(shape))
def compute_log_p_sample(data, f, t): C = len(data.cn) population_prior = np.zeros(3) population_prior[0] = (1 - t) population_prior[1] = t * (1 - f) population_prior[2] = t * f ll = np.ones(C, dtype=np.float64) * np.inf * -1 for c in range(C): e_vaf = 0 norm_const = 0 for i in range(3): e_cn = population_prior[i] * data.cn[c, i] e_vaf += e_cn * data.mu[c, i] norm_const += e_cn e_vaf /= norm_const ll[c] = data.log_pi[c] + binomial_log_pdf(data.a + data.b, data.b, e_vaf) return log_sum_exp(ll)
def hist_cuda_test(): histogram_array = src1#np.zeros(vectorSize*BIN_COUNT, dtype=np.int32).reshape(vectorSize,BIN_COUNT) # This will be calculated from the Camera's Image processed on GPU. # Lets hardcode it at the moment histogram = src1[SEARCH_INDEX]#np.zeros(BIN_COUNT, dtype=np.float32) results = np.zeros(9, dtype=np.float64) foundIndex = -1 # use stream to trigger async memory transfer cstream = cuda.stream() ts = timer() # Increase Counter to measure the Efficiency count = 1 for i in range(count): with cstream.auto_synchronize(): # For Histogram Compairision. d_histogram_array = cuda.to_device(histogram_array, stream=cstream) d_histogram = cuda.to_device(histogram, stream=cstream) d_results = cuda.to_device(results, stream=cstream) d_foundIndex = cuda.to_device(foundIndex, stream=cstream) hist_comp[1, vectorSize, cstream](d_histogram_array,d_histogram,d_results,d_foundIndex) d_histogram_array.copy_to_host(histogram_array, stream=cstream) d_histogram.copy_to_host(histogram, stream=cstream) d_results.copy_to_host(results, stream=cstream) d_foundIndex.copy_to_host(foundIndex, stream=cstream) te = timer() print('GPU Process ',count," Iterations : in ", te - ts) print('histogram is') print(results) print('Found Index ', foundIndex)
def hist_cuda_test(): histogram_array = src1#np.zeros(vectorSize*BIN_COUNT, dtype=np.int32).reshape(vectorSize,BIN_COUNT) histogram = src1[SEARCH_INDEX]#np.zeros(BIN_COUNT, dtype=np.float32) results = np.zeros(9, dtype=np.float64) # use stream to trigger async memory transfer cstream = cuda.stream() ts = timer() # Increase Counter to measure the Efficiency count = 1 for i in range(count): with cstream.auto_synchronize(): # For Histogram Compairision. d_histogram_array = cuda.to_device(histogram_array, stream=cstream) d_histogram = cuda.to_device(histogram, stream=cstream) d_results = cuda.to_device(results, stream=cstream) hist_comp[1, vectorSize, cstream](d_histogram_array,d_histogram,d_results) d_histogram_array.copy_to_host(histogram_array, stream=cstream) d_histogram.copy_to_host(histogram, stream=cstream) d_results.copy_to_host(results, stream=cstream) te = timer() print('GPU Process ',count," Iterations : in ", te - ts) print('histogram is') print(results)
def hist_comp(arry, hist, result, index): # We have N threads per block # And We have one block only x = cuda.grid(1) R = cuda.shared.array(9, dtype=float64) # No of featureVectors # array.shape[0] == 9*34 A = cuda.shared.array(shape=(9,34), dtype=float64) # Vecture To Compair # hist.shape[0] == BIN_COUNT == 34 ? B = cuda.shared.array(34, dtype=float64) for i in range(BIN_COUNT): B[i] = hist[i] A[x] = arry[x] cuda.syncthreads() # Do Actual Calculations. # i.e: kullback_leibler_divergence Sum = 0.00 for i in range(BIN_COUNT): a = B[i] b = A[x][i] Sum += (a * (math.log(a/b) / math.log(2.0))) # R Contains the KL-Divergences R[x] = Sum cuda.syncthreads() # These Should be Shared Variables. Min = cuda.shared.array(1,dtype=float32) mIndex = cuda.shared.array(1,dtype=int8) Min = 0.0000000000 mIndex = 0 if x == 0: Min = R[x] mIndex = x cuda.syncthreads() if R[x] <= Min: Min = R[x] mIndex = x cuda.syncthreads() if x == mIndex : index=mIndex
def hist_comp(arry, hist, result): # We have N threads per block # And We have one block only x = cuda.grid(1) R = cuda.shared.array(9, dtype=float64) # No of featureVectors # array.shape[0] == 9*34 A = cuda.shared.array(shape=(9,34), dtype=float64) # Vecture To Compair # hist.shape[0] == BIN_COUNT == 34 ? B = cuda.shared.array(34, dtype=float64) for i in range(BIN_COUNT): B[i] = hist[i] A[x] = arry[x] cuda.syncthreads() # Do Actual Calculations. # i.e: kullback_leibler_divergence Sum = 0.00 for i in range(BIN_COUNT): a = B[i] b = A[x][i] Sum += (a * (math.log(a/b) / math.log(2.0))) # R Contains the KL-Divergences R[x] = Sum cuda.syncthreads() # Finding the Min Divergence OR # Finding the sum of all Divergences # by Reducing Method rSize = cuda.blockDim.x >> 1 while rSize > 0: if x < rSize: R[x] = (R[x]+R[x+rSize]) # R[x] = min(R[x],R[x+rSize]) rSize >>= 1 cuda.syncthreads() # This implementation doesn't take care of last two values. # So, Using Hack # TODO: need to Fix It. if x == 0 : # R[x] = x if R[x] < R[x+1] else (x+1) R[x] = (R[x]+R[x+1]) # R[x] = min(R[x],R[x+1]) result[x] = R[x]