Best Python code snippet using fMBT_python
gpu.py
Source:gpu.py
1from __future__ import division2from numbapro import cuda3import numpy as np4import numbapro.cudalib.cublas5import numpy.random6import math7import scipy.sparse.linalg8import scipy.sparse as sps9def fista(I, Phi, lambdav, L=None, tol=10e-6, max_iterations=200, display=True, verbose=False):10 """11 I: Images 12 Phi: Dictionary 13 lambdav: Sparse Penalty 14 L = Largest eigenvalue of Phi 15 """16 b = numbapro.cudalib.cublas.Blas()17 (m, n) = Phi.shape18 (m, batch) = I.shape19 if L == None:20 L = scipy.sparse.linalg.svds(Phi, 1, which='LM', return_singular_vectors=False)21 print "Max eigenvalue: ." + str(L)22 L = (L**2)*2 # L = svd(Phi) -> eig(2*(Phi.T*Phi))23 invL = 1/L24 t = 1.25 if sps.issparse(Phi):26 Phi = np.array(Phi.todense())27 d_I = cuda.to_device(np.array(I, dtype=np.float32, order='F'))28 d_Phi = cuda.to_device(np.array(Phi, dtype=np.float32, order='F'))29 d_Q = cuda.device_array((n, n), dtype=np.float32, order='F')30 d_c = cuda.device_array((n, batch), dtype=np.float32, order='F')31 d_x = cuda.to_device(np.array(np.zeros((n, batch), dtype=np.float32), order='F'))32 d_y = cuda.to_device(np.array(np.zeros((n, batch), dtype=np.float32), order='F'))33 d_x2 = cuda.to_device(np.array(np.zeros((n, batch), dtype=np.float32), order='F'))34 # Temporary array variables35 d_t = cuda.device_array((m, batch), dtype=np.float32, order='F')36 d_t2 = cuda.device_array(n*batch, dtype=np.float32, order='F')37 b.gemm('T', 'N', n, n, m, 1, d_Phi, d_Phi, 0, d_Q) # Q = Phi^T * Phi38 b.gemm('T', 'N', n, batch, m, -2, d_Phi, d_I, 0, d_c) # c = -2*Phi^T * y39 blockdim = 32, 3240 griddim = int(math.ceil(n/blockdim[0])), int(math.ceil(batch/blockdim[1]))41 blockdim_1d = 25642 griddim_1d = int(math.ceil(n*batch/blockdim_1d))43 start = l2l1obj(b, d_I, d_Phi, d_x, d_t, d_t2, lambdav, blockdim_1d, griddim_1d)44 obj2 = start45 for i in xrange(max_iterations):46 # x2 = 2*Q*y + c47 b.symm('L', 'U', n, batch, 2, d_Q, d_y, 0, d_x2)48 b.geam('N', 'N', n, batch, 1, d_c, 1, d_x2, d_x2)49 50 # x2 = y - invL * x251 b.geam('N', 'N', n, batch, 1, d_y, -invL, d_x2, d_x2)52 # proxOp() 53 l1prox[griddim, blockdim](d_x2, invL*lambdav, d_x2)54 t2 = (1+math.sqrt(1+4*(t**2)))/2.055 56 # y = x2 + ((t-1)/t2)*(x2-x)57 b.geam('N', 'N', n, batch, 1+(t-1)/t2, d_x2, (1-t)/t2, d_x, d_y)58 # x = x259 b.geam('N', 'N', n, batch, 1, d_x2, 0, d_x, d_x)60 t = t261 # update objective62 obj = obj263 obj2 = l2l1obj(b, d_I, d_Phi, d_x2, d_t, d_t2, lambdav, blockdim_1d, griddim_1d)64 if verbose:65 x2 = d_x2.copy_to_host()66 print "L1 Objective: " + str(obj2)67 if np.abs(obj-obj2)/float(obj) < tol:68 break69 x2 = d_x2.copy_to_host()70 if display:71 print "FISTA Iterations: " + str(i)72 print "L1 Objective: " + str(lambdav*np.sum(np.abs(x2)) + np.sum((I-Phi.dot(x2))**2))73 print "Objective delta: " + str(obj2-start)74 return x275def l2l1obj(b, d_I, d_Phi, d_x2, d_t, d_t2, lambdav, blockdim, griddim):76 (m, n) = d_Phi.shape77 (m, batch) = d_I.shape78 b.gemm('N', 'N', m, batch, n, 1, d_Phi, d_x2, 0, d_t)79 b.geam('N', 'N', m, batch, 1, d_I, -1, d_t, d_t)80 l2 = b.nrm2(d_t.ravel(order='F'))**281 82 gabs[griddim, blockdim](d_x2.ravel(order='F'), d_t2)83 84 l1 = lambdav*b.asum(d_t2)85 return l2 + l186@cuda.jit('void(float32[:,:], float64, float32[:,:])')87def l1prox(A, t, C):88 """ l1 Proximal operator: C = np.fmax(A-t, 0) + np.fmin(A+t, 0) 89 A: coefficients matrix (dim, batch)90 t: threshold91 C: output (dim, batch) """92 i, j = cuda.grid(2)93 if i >= A.shape[0] or j >= A.shape[1]:94 return95 if A[i, j] >= t:96 C[i, j] = A[i, j] - t 97 elif A[i, j] <= -t: 98 C[i, j] = A[i, j] + t 99 else:100 C[i, j] = 0101 return102@cuda.jit('void(float32[:], float32[:])')103def gabs(x, y):104 i = cuda.grid(1)105 if i >= x.size:106 return107 if x[i] < 0:108 y[i] = -x[i]109 else:110 y[i] = x[i]...
gpu 2.py
Source:gpu 2.py
1from __future__ import division2from numbapro import cuda3import numpy as np4import numbapro.cudalib.cublas5import numpy.random6import math7import scipy.sparse.linalg8import scipy.sparse as sps9def fista(I, Phi, lambdav, L=None, tol=10e-6, max_iterations=200, display=True, verbose=False):10 """11 I: Images 12 Phi: Dictionary 13 lambdav: Sparse Penalty 14 L = Largest eigenvalue of Phi 15 """16 b = numbapro.cudalib.cublas.Blas()17 (m, n) = Phi.shape18 (m, batch) = I.shape19 if L == None:20 L = scipy.sparse.linalg.svds(Phi, 1, which='LM', return_singular_vectors=False)21 print "Max eigenvalue: ." + str(L)22 L = (L**2)*2 # L = svd(Phi) -> eig(2*(Phi.T*Phi))23 invL = 1/L24 t = 1.25 if sps.issparse(Phi):26 Phi = np.array(Phi.todense())27 d_I = cuda.to_device(np.array(I, dtype=np.float32, order='F'))28 d_Phi = cuda.to_device(np.array(Phi, dtype=np.float32, order='F'))29 d_Q = cuda.device_array((n, n), dtype=np.float32, order='F')30 d_c = cuda.device_array((n, batch), dtype=np.float32, order='F')31 d_x = cuda.to_device(np.array(np.zeros((n, batch), dtype=np.float32), order='F'))32 d_y = cuda.to_device(np.array(np.zeros((n, batch), dtype=np.float32), order='F'))33 d_x2 = cuda.to_device(np.array(np.zeros((n, batch), dtype=np.float32), order='F'))34 # Temporary array variables35 d_t = cuda.device_array((m, batch), dtype=np.float32, order='F')36 d_t2 = cuda.device_array(n*batch, dtype=np.float32, order='F')37 b.gemm('T', 'N', n, n, m, 1, d_Phi, d_Phi, 0, d_Q) # Q = Phi^T * Phi38 b.gemm('T', 'N', n, batch, m, -2, d_Phi, d_I, 0, d_c) # c = -2*Phi^T * y39 blockdim = 32, 3240 griddim = int(math.ceil(n/blockdim[0])), int(math.ceil(batch/blockdim[1]))41 blockdim_1d = 25642 griddim_1d = int(math.ceil(n*batch/blockdim_1d))43 start = l2l1obj(b, d_I, d_Phi, d_x, d_t, d_t2, lambdav, blockdim_1d, griddim_1d)44 obj2 = start45 for i in xrange(max_iterations):46 # x2 = 2*Q*y + c47 b.symm('L', 'U', n, batch, 2, d_Q, d_y, 0, d_x2)48 b.geam('N', 'N', n, batch, 1, d_c, 1, d_x2, d_x2)49 50 # x2 = y - invL * x251 b.geam('N', 'N', n, batch, 1, d_y, -invL, d_x2, d_x2)52 # proxOp() 53 l1prox[griddim, blockdim](d_x2, invL*lambdav, d_x2)54 t2 = (1+math.sqrt(1+4*(t**2)))/2.055 56 # y = x2 + ((t-1)/t2)*(x2-x)57 b.geam('N', 'N', n, batch, 1+(t-1)/t2, d_x2, (1-t)/t2, d_x, d_y)58 # x = x259 b.geam('N', 'N', n, batch, 1, d_x2, 0, d_x, d_x)60 t = t261 # update objective62 obj = obj263 obj2 = l2l1obj(b, d_I, d_Phi, d_x2, d_t, d_t2, lambdav, blockdim_1d, griddim_1d)64 if verbose:65 x2 = d_x2.copy_to_host()66 print "L1 Objective: " + str(obj2)67 if np.abs(obj-obj2)/float(obj) < tol:68 break69 x2 = d_x2.copy_to_host()70 if display:71 print "FISTA Iterations: " + str(i)72 print "L1 Objective: " + str(lambdav*np.sum(np.abs(x2)) + np.sum((I-Phi.dot(x2))**2))73 print "Objective delta: " + str(obj2-start)74 return x275def l2l1obj(b, d_I, d_Phi, d_x2, d_t, d_t2, lambdav, blockdim, griddim):76 (m, n) = d_Phi.shape77 (m, batch) = d_I.shape78 b.gemm('N', 'N', m, batch, n, 1, d_Phi, d_x2, 0, d_t)79 b.geam('N', 'N', m, batch, 1, d_I, -1, d_t, d_t)80 l2 = b.nrm2(d_t.ravel(order='F'))**281 82 gabs[griddim, blockdim](d_x2.ravel(order='F'), d_t2)83 84 l1 = lambdav*b.asum(d_t2)85 return l2 + l186@cuda.jit('void(float32[:,:], float64, float32[:,:])')87def l1prox(A, t, C):88 """ l1 Proximal operator: C = np.fmax(A-t, 0) + np.fmin(A+t, 0) 89 A: coefficients matrix (dim, batch)90 t: threshold91 C: output (dim, batch) """92 i, j = cuda.grid(2)93 if i >= A.shape[0] or j >= A.shape[1]:94 return95 if A[i, j] >= t:96 C[i, j] = A[i, j] - t 97 elif A[i, j] <= -t: 98 C[i, j] = A[i, j] + t 99 else:100 C[i, j] = 0101 return102@cuda.jit('void(float32[:], float32[:])')103def gabs(x, y):104 i = cuda.grid(1)105 if i >= x.size:106 return107 if x[i] < 0:108 y[i] = -x[i]109 else:110 y[i] = x[i]...
corrCUDA.py
Source:corrCUDA.py
1import numpy as np2import accelerate.cuda.blas as blas3import accelerate.cuda.fft as ft4from numba import cuda5def corr_td_single (x1,x2):6 c_12 = blas.dot(x1,x2)7 return c_128def best_grid_size(size, tpb):9 bpg = np.ceil(np.array(size, dtype=np.float) / tpb).astype(np.int).tolist()10 return tuple(bpg)11@cuda.jit('void(float32[:], float32[:])')12def mult_inplace(img, resp):13 i = cuda.grid(1)14 img[i] *= resp[i]15def corr_FD(x1,x2):16 threadperblock = 32, 817 blockpergrid = best_grid_size(tuple(reversed(x1.shape)), threadperblock)18 print('kernel config: %s x %s' % (blockpergrid, threadperblock))19 # Trigger initialization the cuFFT system.20 # This takes significant time for small dataset.21 # We should not be including the time wasted here22 #ft.FFTPlan(shape=x1.shape, itype=np.float32, otype=np.complex64)23 X1 = x1.astype(np.float32)24 X2 = x2.astype(np.float32)25 stream1 = cuda.stream()26 stream2 = cuda.stream()27 fftplan1 = ft.FFTPlan(shape=x1.shape, itype=np.float32,28 otype=np.complex64, stream=stream1)29 fftplan2 = ft.FFTPlan(shape=x2.shape, itype=np.float32,30 otype=np.complex64, stream=stream2)31 # pagelock memory32 with cuda.pinned(X1, X2):33 # We can overlap the transfer of response_complex with the forward FFT34 # on image_complex.35 d_X1 = cuda.to_device(X1, stream=stream1)36 d_X2 = cuda.to_device(X2, stream=stream2)37 fftplan1.forward(d_X1, out=d_X1)38 fftplan2.forward(d_X2, out=d_X2)39 print ('d_X1 is ',np.shape(d_X1),type(d_X1),np.max(d_X1))40 print ('d_X2 is ',np.shape(d_X2),type(d_X2),np.max(d_X2))41 stream2.synchronize()42 mult_inplace[blockpergrid, threadperblock, stream1](d_X1, d_X2)43 fftplan1.inverse(d_X1, out=d_X1)44 # implicitly synchronizes the streams45 c = d_X1.copy_to_host().real / np.prod(x1.shape)...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!