

发表于:2025-01-20 作者:千家信息网编辑
千家信息网最后更新 2025年01月20日,这篇文章主要为大家展示了"Python3如何实现打格点算法的GPU加速实例",内容简而易懂,条理清晰,希望能够帮助大家解决疑惑,下面让小编带领大家一起研究并学习一下"Python3如何实现打格点算法的
千家信息网最后更新 2025年01月20日Python3如何实现打格点算法的GPU加速实例






# cuda_grid.pyfrom numba import jitfrom numba import cudaimport numpy as npdef grid_by_cpu(crd, rxyz, atoms, grids):    """Transform coordinates [x,y,z] into grids [nx,ny,nz].    Args:        crd(list): The 3-D coordinates of atoms.        rxyz(list): The list includes xmin,ymin,zmin,grid_num.        atoms(int): The total number of atoms.        grids(list): The transformed grids matrix.    """    for i in range(atoms):        grids[i][0] = int((crd[i][0]-rxyz[0])/rxyz[3])        grids[i][1] = int((crd[i][1]-rxyz[1])/rxyz[3])        grids[i][2] = int((crd[i][2]-rxyz[2])/rxyz[3])    return gridsif __name__=='__main__':    np.random.seed(1)    atoms = 4    grid_size = 0.1    crd = np.random.random((atoms,3)).astype(np.float32)    xmin = min(crd[:,0])    ymin = min(crd[:,1])    zmin = min(crd[:,2])    xmax = max(crd[:,0])    ymax = max(crd[:,1])    zmax = max(crd[:,2])    xgrids = int((xmax-xmin)/grid_size)+1    ygrids = int((ymax-ymin)/grid_size)+1    zgrids = int((zmax-zmin)/grid_size)+1    rxyz = np.array([xmin,ymin,zmin,grid_size], dtype=np.float32)        grids = np.ones_like(crd)*(-1)    grids = grids.astype(np.float32)    grids_cpu = grid_by_cpu(crd, rxyz, atoms, grids)    print (crd)    print (grids_cpu)    import matplotlib.pyplot as plt    plt.figure()    plt.plot(crd[:,0], crd[:,1], 'o', color='red')    for grid in range(ygrids+1):        plt.plot([xmin,xmin+grid_size*xgrids], [ymin+grid_size*grid,ymin+grid_size*grid], color='black')    for grid in range(xgrids+1):        plt.plot([xmin+grid_size*grid,xmin+grid_size*grid], [ymin,ymin+grid_size*ygrids], color='black')    plt.savefig('Atom_Grids.png')


$ python3 cuda_grid.py
[[4.17021990e-01 7.20324516e-01 1.14374816e-04]
[3.02332580e-01 1.46755889e-01 9.23385918e-02]
[1.86260208e-01 3.45560730e-01 3.96767467e-01]
[5.38816750e-01 4.19194520e-01 6.85219526e-01]]
[[2. 5. 0.]
[1. 0. 0.]
[0. 1. 3.]
[3. 2. 6.]]





# cuda_grid.pyfrom numba import jitfrom numba import cudaimport numpy as npdef grid_by_cpu(crd, rxyz, atoms, grids):    """Transform coordinates [x,y,z] into grids [nx,ny,nz].    Args:        crd(list): The 3-D coordinates of atoms.        rxyz(list): The list includes xmin,ymin,zmin,grid_num.        atoms(int): The total number of atoms.        grids(list): The transformed grids matrix.    """    for i in range(atoms):        grids[i][0] = int((crd[i][0]-rxyz[0])/rxyz[3])        grids[i][1] = int((crd[i][1]-rxyz[1])/rxyz[3])        grids[i][2] = int((crd[i][2]-rxyz[2])/rxyz[3])    return grids@jitdef grid_by_jit(crd, rxyz, atoms, grids):    """Transform coordinates [x,y,z] into grids [nx,ny,nz].    Args:        crd(list): The 3-D coordinates of atoms.        rxyz(list): The list includes xmin,ymin,zmin,grid_num.        atoms(int): The total number of atoms.        grids(list): The transformed grids matrix.    """    for i in range(atoms):        grids[i][0] = int((crd[i][0]-rxyz[0])/rxyz[3])        grids[i][1] = int((crd[i][1]-rxyz[1])/rxyz[3])        grids[i][2] = int((crd[i][2]-rxyz[2])/rxyz[3])    return grids@cuda.jitdef grid_by_gpu(crd, rxyz, grids):    """Transform coordinates [x,y,z] into grids [nx,ny,nz].    Args:        crd(list): The 3-D coordinates of atoms.        rxyz(list): The list includes xmin,ymin,zmin,grid_num.        atoms(int): The total number of atoms.        grids(list): The transformed grids matrix.    """    i,j = cuda.grid(2)    grids[i][j] = int((crd[i][j]-rxyz[j])/rxyz[3])if __name__=='__main__':    np.random.seed(1)    atoms = 4    grid_size = 0.1    crd = np.random.random((atoms,3)).astype(np.float32)    xmin = min(crd[:,0])    ymin = min(crd[:,1])    zmin = min(crd[:,2])    xmax = max(crd[:,0])    ymax = max(crd[:,1])    zmax = max(crd[:,2])    xgrids = int((xmax-xmin)/grid_size)+1    ygrids = int((ymax-ymin)/grid_size)+1    zgrids = int((zmax-zmin)/grid_size)+1    rxyz = np.array([xmin,ymin,zmin,grid_size], dtype=np.float32)    crd_cuda = cuda.to_device(crd)    rxyz_cuda = cuda.to_device(rxyz)        grids = np.ones_like(crd)*(-1)    grids = grids.astype(np.float32)    grids_cpu = grid_by_cpu(crd, rxyz, atoms, grids)    grids = np.ones_like(crd)*(-1)    grids_jit = grid_by_jit(crd, rxyz, atoms, grids)    grids = np.ones_like(crd)*(-1)    grids_cuda = cuda.to_device(grids)        grid_by_gpu[(atoms,3),(1,1)](crd_cuda,                                 rxyz_cuda,                                 grids_cuda)    print (crd)    print (grids_cpu)    print (grids_jit)    print (grids_cuda.copy_to_host())


$ python3 cuda_grid.py
/home/dechin/anaconda3/lib/python3.8/site-packages/numba/cuda/compiler.py:865: NumbaPerformanceWarning: Grid size (12) < 2 * SM count (72) will likely result in GPU under utilization due to low occupancy.
[[4.17021990e-01 7.20324516e-01 1.14374816e-04]
[3.02332580e-01 1.46755889e-01 9.23385918e-02]
[1.86260208e-01 3.45560730e-01 3.96767467e-01]
[5.38816750e-01 4.19194520e-01 6.85219526e-01]]
[[2. 5. 0.]
[1. 0. 0.]
[0. 1. 3.]
[3. 2. 6.]]
[[2. 5. 0.]
[1. 0. 0.]
[0. 1. 3.]
[3. 2. 6.]]
[[2. 5. 0.]
[1. 0. 0.]
[0. 1. 3.]
[3. 2. 6.]]


# cuda_grid.pyfrom numba import jitfrom numba import cudaimport numpy as npdef grid_by_cpu(crd, rxyz, atoms, grids):    """Transform coordinates [x,y,z] into grids [nx,ny,nz].    Args:        crd(list): The 3-D coordinates of atoms.        rxyz(list): The list includes xmin,ymin,zmin,grid_num.        atoms(int): The total number of atoms.        grids(list): The transformed grids matrix.    """    for i in range(atoms):        grids[i][0] = int((crd[i][0]-rxyz[0])/rxyz[3])        grids[i][1] = int((crd[i][1]-rxyz[1])/rxyz[3])        grids[i][2] = int((crd[i][2]-rxyz[2])/rxyz[3])    return grids@jitdef grid_by_jit(crd, rxyz, atoms, grids):    """Transform coordinates [x,y,z] into grids [nx,ny,nz].    Args:        crd(list): The 3-D coordinates of atoms.        rxyz(list): The list includes xmin,ymin,zmin,grid_num.        atoms(int): The total number of atoms.        grids(list): The transformed grids matrix.    """    for i in range(atoms):        grids[i][0] = int((crd[i][0]-rxyz[0])/rxyz[3])        grids[i][1] = int((crd[i][1]-rxyz[1])/rxyz[3])        grids[i][2] = int((crd[i][2]-rxyz[2])/rxyz[3])    return grids@cuda.jitdef grid_by_gpu(crd, rxyz, grids):    """Transform coordinates [x,y,z] into grids [nx,ny,nz].    Args:        crd(list): The 3-D coordinates of atoms.        rxyz(list): The list includes xmin,ymin,zmin,grid_num.        atoms(int): The total number of atoms.        grids(list): The transformed grids matrix.    """    i,j = cuda.grid(2)    grids[i][j] = int((crd[i][j]-rxyz[j])/rxyz[3])if __name__=='__main__':    import time    from tqdm import trange    np.random.seed(1)    atoms = 100000    grid_size = 0.1    crd = np.random.random((atoms,3)).astype(np.float32)    xmin = min(crd[:,0])    ymin = min(crd[:,1])    zmin = min(crd[:,2])    xmax = max(crd[:,0])    ymax = max(crd[:,1])    zmax = max(crd[:,2])    xgrids = int((xmax-xmin)/grid_size)+1    ygrids = int((ymax-ymin)/grid_size)+1    zgrids = int((zmax-zmin)/grid_size)+1    rxyz = np.array([xmin,ymin,zmin,grid_size], dtype=np.float32)    crd_cuda = cuda.to_device(crd)    rxyz_cuda = cuda.to_device(rxyz)        cpu_time = 0    jit_time = 0    gpu_time = 0    for i in trange(100):        grids = np.ones_like(crd)*(-1)        grids = grids.astype(np.float32)        time0 = time.time()        grids_cpu = grid_by_cpu(crd, rxyz, atoms, grids)        time1 = time.time()        grids = np.ones_like(crd)*(-1)        time2 = time.time()        grids_jit = grid_by_jit(crd, rxyz, atoms, grids)        time3 = time.time()        grids = np.ones_like(crd)*(-1)        grids_cuda = cuda.to_device(grids)        time4 = time.time()        grid_by_gpu[(atoms,3),(1,1)](crd_cuda,                                    rxyz_cuda,                                    grids_cuda)        time5 = time.time()                if i != 0:            cpu_time += time1 - time0            jit_time += time3 - time2            gpu_time += time5 - time4        print ('The time cost of CPU calculation is: {}s'.format(cpu_time))    print ('The time cost of JIT calculation is: {}s'.format(jit_time))    print ('The time cost of GPU calculation is: {}s'.format(gpu_time))


$ python3 cuda_grid.py
100%|███████████████████████████| 100/100 [00:23<00:00, 4.18it/s]
The time cost of CPU calculation is: 23.01943016052246s
The time cost of JIT calculation is: 0.04810166358947754s
The time cost of GPU calculation is: 0.01806473731994629s


# cuda_grid.pyfrom numba import jitfrom numba import cudaimport numpy as npdef grid_by_cpu(crd, rxyz, atoms, grids):    """Transform coordinates [x,y,z] into grids [nx,ny,nz].    Args:        crd(list): The 3-D coordinates of atoms.        rxyz(list): The list includes xmin,ymin,zmin,grid_num.        atoms(int): The total number of atoms.        grids(list): The transformed grids matrix.    """    for i in range(atoms):        grids[i][0] = int((crd[i][0]-rxyz[0])/rxyz[3])        grids[i][1] = int((crd[i][1]-rxyz[1])/rxyz[3])        grids[i][2] = int((crd[i][2]-rxyz[2])/rxyz[3])    return grids@jitdef grid_by_jit(crd, rxyz, atoms, grids):    """Transform coordinates [x,y,z] into grids [nx,ny,nz].    Args:        crd(list): The 3-D coordinates of atoms.        rxyz(list): The list includes xmin,ymin,zmin,grid_num.        atoms(int): The total number of atoms.        grids(list): The transformed grids matrix.    """    for i in range(atoms):        grids[i][0] = int((crd[i][0]-rxyz[0])/rxyz[3])        grids[i][1] = int((crd[i][1]-rxyz[1])/rxyz[3])        grids[i][2] = int((crd[i][2]-rxyz[2])/rxyz[3])    return grids@cuda.jitdef grid_by_gpu(crd, rxyz, grids):    """Transform coordinates [x,y,z] into grids [nx,ny,nz].    Args:        crd(list): The 3-D coordinates of atoms.        rxyz(list): The list includes xmin,ymin,zmin,grid_num.        atoms(int): The total number of atoms.        grids(list): The transformed grids matrix.    """    i,j = cuda.grid(2)    grids[i][j] = int((crd[i][j]-rxyz[j])/rxyz[3])if __name__=='__main__':    import time    from tqdm import trange    np.random.seed(1)    atoms = 5000000    grid_size = 0.1    crd = np.random.random((atoms,3)).astype(np.float32)    xmin = min(crd[:,0])    ymin = min(crd[:,1])    zmin = min(crd[:,2])    xmax = max(crd[:,0])    ymax = max(crd[:,1])    zmax = max(crd[:,2])    xgrids = int((xmax-xmin)/grid_size)+1    ygrids = int((ymax-ymin)/grid_size)+1    zgrids = int((zmax-zmin)/grid_size)+1    rxyz = np.array([xmin,ymin,zmin,grid_size], dtype=np.float32)    crd_cuda = cuda.to_device(crd)    rxyz_cuda = cuda.to_device(rxyz)    jit_time = 0    gpu_time = 0    for i in trange(100):        grids = np.ones_like(crd)*(-1)        time2 = time.time()        grids_jit = grid_by_jit(crd, rxyz, atoms, grids)        time3 = time.time()        grids = np.ones_like(crd)*(-1)        grids_cuda = cuda.to_device(grids)        time4 = time.time()        grid_by_gpu[(atoms,3),(1,1)](crd_cuda,                                     rxyz_cuda,                                     grids_cuda)        time5 = time.time()                if i != 0:            jit_time += time3 - time2            gpu_time += time5 - time4        print ('The time cost of JIT calculation is: {}s'.format(jit_time))    print ('The time cost of GPU calculation is: {}s'.format(gpu_time))


$ python3 cuda_grid.py
100%|███████████████████████████| 100/100 [00:09<00:00, 10.15it/s]
The time cost of JIT calculation is: 2.3743042945861816s
The time cost of GPU calculation is: 0.022843599319458008s

