1: function LDLT(Lp, Li, Lx, level_p, level_i) 2:分配GPU内存,将数据从CPU端复制到GPU端 3: cudaMemGetInfo(&free, &total); 4: free -= 1024ull*1024ull*1024ull; //预留1GB的空闲内存 5: free /= (2*n*sizeof(数据类型)); 6: for level = 0 to nlevel - 1 do 7: //level 级别的并行 8: lev_size = level_p[level+1]-level_p[level]; 9: tmp = min(lev_size,free); 10: //GPU上为tmpMem1和tmpMem分配全局内存 11: offset = 0; 12: while lev_size >0 do 13: restCol = min(lev_size tmp); 14: dynamic<<<restCol, 1>>>(Lp, Li, Lx, level_p, level_i, tmpMem, tmpMem1, n,level,offset); 15: lev_size -= tmp; 16: offset += tmp; 17: end while 18: 释放tmpMem 19: end for 20:从GPU端到CPU端拷贝数据,释放GPU内存 |
|