7 subroutine mxm(a,n1,b,n2,c,n3)
8 use kinds, only : dp, i8
11 integer,
intent(in) :: n1, n2, n3
12 real(DP),
intent(in) :: a(n1,n2),b(n2,n3)
13 real(DP),
intent(out) :: c(n1,n3)
18 integer(i8) :: tt = 32
20 if (n2 == 8 .and. mod(n1,4) == 0 &
21 .and. mod(loc(a),tt)==0 &
22 .and. mod(loc(b),tt)==0 &
23 .and. mod(loc(c),tt)==0 &
28 if (n2 == 12 .and. mod(n1,4) == 0 &
29 .and. mod(loc(a),tt)==0 &
30 .and. mod(loc(b),tt)==0 &
31 .and. mod(loc(c),tt)==0 &
36 if (n2 == 16 .and. mod(n1,4) == 0 &
37 .and. mod(loc(a),tt)==0 &
38 .and. mod(loc(b),tt)==0 &
39 .and. mod(loc(c),tt)==0 &
44 if (n2 == 10 .and. mod(n1,4) == 0 .and. mod(n3,2) == 0 &
45 .and. mod(loc(a),tt)==0 &
46 .and. mod(loc(b),tt)==0 &
47 .and. mod(loc(c),tt)==0 &
52 if (n2 == 6 .and. mod(n1,4) == 0 .and. mod(n3,2) == 0 &
53 .and. mod(loc(a),tt)==0 &
54 .and. mod(loc(b),tt)==0 &
55 .and. mod(loc(c),tt)==0 &
64 call dgemm(
'N',
'N',n1,n3,n2,1.0,a,n1,b,n2,0.0,c,n1)
69 call bg_aligned3(a,b,c,aligned)
72 else if ((aligned == 1) .AND. &
73 (n1 >= 8) .AND. (n2 >= 8) .AND. (n3 >= 8) .AND. &
74 (modulo(n1,2) == 0) .AND. (modulo(n2,2) == 0) )
then
75 if (modulo(n3,4) == 0)
then
76 call bg_mxm44(a,n1,b,n2,c,n3)
78 call bg_mxm44_uneven(a,n1,b,n2,c,n3)
80 else if((aligned == 1) .AND. &
81 (modulo(n1,6) == 0) .AND. (modulo(n3,6) == 0) .AND. &
82 (n2 >= 4) .AND. (modulo(n2,2) == 0) )
then
83 call bg_mxm3(a,n1,b,n2,c,n3)
93 ierr = k10_mxm(a,n1,b,n2,c,n3)
94 if (ierr > 0) call
mxmf2(a,n1,b,n2,c,n3)
98 call
mxmf2(a,n1,b,n2,c,n3)
subroutine mxmf2(A, N1, B, N2, C, N3)
unrolled loop version
subroutine mxm_bgq_8(a, n1, b, n2, c, n3)
subroutine mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product C = A*B.
subroutine mxm_bgq_6(a, n1, b, n2, c, n3)
subroutine mxm44_0(a, m, b, k, c, n)
subroutine mxm_bgq_12(a, n1, b, n2, c, n3)
subroutine mxm_bgq_10(a, n1, b, n2, c, n3)
subroutine mxm_bgq_16(a, n1, b, n2, c, n3)
subroutine mxm44_2(a, m, b, k, c, n)