稀疏对称矩阵的LDLT分解在GPU上的高效实现
陈鑫峰,王武

An Effective Implementation of LDLT Decomposition of Sparse Symmetric Matrix on GPU
Chen Xinfeng,Wang Wu
伪代码4. 更新当前列的子列
1: function update(Lp, Li, Lx, tmpMem,
tmpMem1, n, k, col)
2: j = Li[Lp[k]+1+blockIdx.x];
3: s = tmpMem1[n*col+j];
4: subColSize = Lp[j+1]-Lp[j];
5: offset=0;
6: while offset<subColSize do
7: if threadIdx.x+offset < subColSize then
8: //A为复数矩阵
9: x = cuCmulf(s,tmpMem[n*col + Li[Lp[j]
+offset+threadIdx.x]]);
10: atomicAdd(&(Lx[Lp[j] + offset +
threadIdx.x].x), -x.x);
11: atomicAdd(&(Lx[Lp[j] + offset+threadIdx.x].y),
-x.y);
12: //A为实数矩阵
13: x = s*tmpMem[n*col+Li[Lp[j]
+offset+threadIdx.x]]);
14: atomicAdd(&(Lx[Lp[j]+offset+threadIdx.x]), -x);
15: end if
16: offset += blockDim.x;
17: end while
18: end function