" auto tB = B.slice((int)tgid.x, 0); \n"
" \n"
" matmul2d< \n"
- " matmul2d_descriptor(8, 8, dynamic_extent), \n"
+ " matmul2d_descriptor(16, 16, dynamic_extent), \n"
" execution_simdgroups<4>> mm; \n"
" \n"
" auto cT = mm.get_destination_cooperative_tensor<decltype(tA), decltype(tB), float>(); \n"
" auto tB = B.slice((int)tgid.x, 0); \n"
" \n"
" matmul2d< \n"
- " matmul2d_descriptor(8, 8, dynamic_extent), \n"
+ " matmul2d_descriptor(16, 16, dynamic_extent), \n"
" execution_simdgroups<4>> mm; \n"
" \n"
" auto cT = mm.get_destination_cooperative_tensor<decltype(tA), decltype(tB), float>(); \n"