Nek5000
SEM for Incompressible NS
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Modules Pages
mxm_wrapper.F90
Go to the documentation of this file.
1 
3 
7 subroutine mxm(a,n1,b,n2,c,n3)
8  use kinds, only : dp, i8
9  implicit none
10 
11  integer, intent(in) :: n1, n2, n3
12  real(DP), intent(in) :: a(n1,n2),b(n2,n3)
13  real(DP), intent(out) :: c(n1,n3)
14  integer :: aligned
15  integer :: K10_mxm
16 
17 #ifdef BGQ
18  integer(i8) :: tt = 32
19 
20  if (n2 == 8 .and. mod(n1,4) == 0 &
21  .and. mod(loc(a),tt)==0 &
22  .and. mod(loc(b),tt)==0 &
23  .and. mod(loc(c),tt)==0 &
24  ) then
25  call mxm_bgq_8(a, n1, b, n2, c, n3)
26  return
27  endif
28  if (n2 == 12 .and. mod(n1,4) == 0 &
29  .and. mod(loc(a),tt)==0 &
30  .and. mod(loc(b),tt)==0 &
31  .and. mod(loc(c),tt)==0 &
32  ) then
33  call mxm_bgq_12(a, n1, b, n2, c, n3)
34  return
35  endif
36  if (n2 == 16 .and. mod(n1,4) == 0 &
37  .and. mod(loc(a),tt)==0 &
38  .and. mod(loc(b),tt)==0 &
39  .and. mod(loc(c),tt)==0 &
40  ) then
41  call mxm_bgq_16(a, n1, b, n2, c, n3)
42  return
43  endif
44  if (n2 == 10 .and. mod(n1,4) == 0 .and. mod(n3,2) == 0 &
45  .and. mod(loc(a),tt)==0 &
46  .and. mod(loc(b),tt)==0 &
47  .and. mod(loc(c),tt)==0 &
48  ) then
49  call mxm_bgq_10(a, n1, b, n2, c, n3)
50  return
51  endif
52  if (n2 == 6 .and. mod(n1,4) == 0 .and. mod(n3,2) == 0 &
53  .and. mod(loc(a),tt)==0 &
54  .and. mod(loc(b),tt)==0 &
55  .and. mod(loc(c),tt)==0 &
56  ) then
57  call mxm_bgq_6(a, n1, b, n2, c, n3)
58  return
59  endif
60 #endif
61 
62 !#define BLAS_MXM
63 #ifdef BLAS_MXM
64  call dgemm('N','N',n1,n3,n2,1.0,a,n1,b,n2,0.0,c,n1)
65  return
66 #endif
67 
68 #ifdef BG
69  call bg_aligned3(a,b,c,aligned)
70  if (n2 == 2) then
71  call mxm44_2(a,n1,b,n2,c,n3)
72  else if ((aligned == 1) .AND. &
73  (n1 >= 8) .AND. (n2 >= 8) .AND. (n3 >= 8) .AND. &
74  (modulo(n1,2) == 0) .AND. (modulo(n2,2) == 0) ) then
75  if (modulo(n3,4) == 0) then
76  call bg_mxm44(a,n1,b,n2,c,n3)
77  else
78  call bg_mxm44_uneven(a,n1,b,n2,c,n3)
79  endif
80  else if((aligned == 1) .AND. &
81  (modulo(n1,6) == 0) .AND. (modulo(n3,6) == 0) .AND. &
82  (n2 >= 4) .AND. (modulo(n2,2) == 0) ) then
83  call bg_mxm3(a,n1,b,n2,c,n3)
84  else
85  call mxm44_0(a,n1,b,n2,c,n3)
86  endif
87  return
88 #endif
89 
90 #ifdef K10_MXM
91  ! fow now only supported for lx1=8
92  ! tuned for AMD K10
93  ierr = k10_mxm(a,n1,b,n2,c,n3)
94  if (ierr > 0) call mxmf2(a,n1,b,n2,c,n3)
95  return
96 #endif
97 
98  call mxmf2(a,n1,b,n2,c,n3)
99 
100  return
101 end subroutine mxm
subroutine mxmf2(A, N1, B, N2, C, N3)
unrolled loop version
Definition: mxm_std.F90:2
subroutine mxm_bgq_8(a, n1, b, n2, c, n3)
Definition: mxm_bgq.F90:1
subroutine mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product C = A*B.
Definition: mxm_wrapper.F90:7
subroutine mxm_bgq_6(a, n1, b, n2, c, n3)
Definition: mxm_bgq.F90:217
subroutine mxm44_0(a, m, b, k, c, n)
Definition: mxm_std.F90:668
subroutine mxm_bgq_12(a, n1, b, n2, c, n3)
Definition: mxm_bgq.F90:56
subroutine mxm_bgq_10(a, n1, b, n2, c, n3)
Definition: mxm_bgq.F90:287
subroutine mxm_bgq_16(a, n1, b, n2, c, n3)
Definition: mxm_bgq.F90:129
subroutine mxm44_2(a, m, b, k, c, n)
Definition: mxm_std.F90:1098