Nek5000
SEM for Incompressible NS
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Modules Pages
mxm_bgq.F90
Go to the documentation of this file.
1 subroutine mxm_bgq_8(a,n1,b,n2,c,n3)
2  use kinds, only : dp
3  implicit none
4 
5  integer, intent(in) :: n1, n2, n3
6  real(DP), intent(in) :: a(n1,n2),b(n2,n3)
7  real(DP), intent(out) :: c(n1,n3)
8 
9  integer :: i, j
10  vector(real(dp)) av1, av2, av3, av4, av5, av6, av7, av8
11  vector(real(dp)) bv1, bsv1, bsv2, bsv3, bsv4
12  vector(real(dp)) bv2, bsv5, bsv6, bsv7, bsv8
13  vector(real(dp)) cv
14 
15  call alignx(32, a(1,1))
16  call alignx(32, b(1,1))
17  call alignx(32, c(1,1))
18 
19  do i = 1, n1, 4
20  av1 = vec_ld(0,a(i,1))
21  av2 = vec_ld(0,a(i,2))
22  av3 = vec_ld(0,a(i,3))
23  av4 = vec_ld(0,a(i,4))
24  av5 = vec_ld(0,a(i,5))
25  av6 = vec_ld(0,a(i,6))
26  av7 = vec_ld(0,a(i,7))
27  av8 = vec_ld(0,a(i,8))
28 
29  do j = 1, n3
30  bv1 = vec_ld(0,b(1,j))
31  bv2 = vec_ld(0,b(5,j))
32  bsv1 = vec_splat(bv1, 0)
33  bsv2 = vec_splat(bv1, 1)
34  bsv3 = vec_splat(bv1, 2)
35  bsv4 = vec_splat(bv1, 3)
36  bsv5 = vec_splat(bv2, 0)
37  bsv6 = vec_splat(bv2, 1)
38  bsv7 = vec_splat(bv2, 2)
39  bsv8 = vec_splat(bv2, 3)
40 
41  cv = vec_mul(av1, bsv1)
42  cv = vec_madd(av2, bsv2, cv)
43  cv = vec_madd(av3, bsv3, cv)
44  cv = vec_madd(av4, bsv4, cv)
45  cv = vec_madd(av5, bsv5, cv)
46  cv = vec_madd(av6, bsv6, cv)
47  cv = vec_madd(av7, bsv7, cv)
48  cv = vec_madd(av8, bsv8, cv)
49 
50  call vec_st(cv, 0, c(i,j))
51  end do
52  end do
53  return
54 end subroutine mxm_bgq_8
55 
56 subroutine mxm_bgq_12(a,n1,b,n2,c,n3)
57  use kinds, only : dp
58  implicit none
59 
60  integer, intent(in) :: n1, n2, n3
61  real(DP), intent(in) :: a(n1,n2),b(n2,n3)
62  real(DP), intent(out) :: c(n1,n3)
63 
64  integer :: i, j
65 
66  vector(real(dp)) av1, av2, av3, av4, av5, av6, av7, av8
67  vector(real(dp)) av9, av10, av11, av12
68  vector(real(dp)) bv1, bsv1, bsv2, bsv3, bsv4
69  vector(real(dp)) bv2, bsv5, bsv6, bsv7, bsv8
70  vector(real(dp)) bv3, bsv9, bsv10, bsv11, bsv12
71 
72  vector(real(dp)) cv
73 
74  call alignx(32, a(1,1))
75  call alignx(32, b(1,1))
76  call alignx(32, c(1,1))
77 
78  do i = 1, n1, 4
79  av1 = vec_ld(0,a(i,1))
80  av2 = vec_ld(0,a(i,2))
81  av3 = vec_ld(0,a(i,3))
82  av4 = vec_ld(0,a(i,4))
83  av5 = vec_ld(0,a(i,5))
84  av6 = vec_ld(0,a(i,6))
85  av7 = vec_ld(0,a(i,7))
86  av8 = vec_ld(0,a(i,8))
87  av9 = vec_ld(0,a(i,9))
88  av10 = vec_ld(0,a(i,10))
89  av11 = vec_ld(0,a(i,11))
90  av12 = vec_ld(0,a(i,12))
91 
92  do j = 1, n3
93  bv1 = vec_ld(0,b(1,j))
94  bv2 = vec_ld(0,b(5,j))
95  bv3 = vec_ld(0,b(9,j))
96 
97  bsv1 = vec_splat(bv1, 0)
98  bsv2 = vec_splat(bv1, 1)
99  bsv3 = vec_splat(bv1, 2)
100  bsv4 = vec_splat(bv1, 3)
101  bsv5 = vec_splat(bv2, 0)
102  bsv6 = vec_splat(bv2, 1)
103  bsv7 = vec_splat(bv2, 2)
104  bsv8 = vec_splat(bv2, 3)
105  bsv9 = vec_splat(bv3, 0)
106  bsv10 = vec_splat(bv3, 1)
107  bsv11 = vec_splat(bv3, 2)
108  bsv12 = vec_splat(bv3, 3)
109 
110  cv = vec_mul(av1, bsv1)
111  cv = vec_madd(av2, bsv2, cv)
112  cv = vec_madd(av3, bsv3, cv)
113  cv = vec_madd(av4, bsv4, cv)
114  cv = vec_madd(av5, bsv5, cv)
115  cv = vec_madd(av6, bsv6, cv)
116  cv = vec_madd(av7, bsv7, cv)
117  cv = vec_madd(av8, bsv8, cv)
118  cv = vec_madd(av9, bsv9, cv)
119  cv = vec_madd(av10, bsv10, cv)
120  cv = vec_madd(av11, bsv11, cv)
121  cv = vec_madd(av12, bsv12, cv)
122 
123  call vec_st(cv, 0, c(i,j))
124  end do
125  end do
126  return
127 end subroutine mxm_bgq_12
128 
129 subroutine mxm_bgq_16(a,n1,b,n2,c,n3)
130  use kinds, only : dp
131  implicit none
132 
133  integer, intent(in) :: n1, n2, n3
134  real(DP), intent(in) :: a(n1,n2),b(n2,n3)
135  real(DP), intent(out) :: c(n1,n3)
136 
137  integer :: i, j
138 
139  vector(real(dp)) av1, av2, av3, av4, av5, av6, av7, av8
140  vector(real(dp)) av9, av10, av11, av12, av13, av14, av15, av16
141  vector(real(dp)) bv1, bsv1, bsv2, bsv3, bsv4
142  vector(real(dp)) bv2, bsv5, bsv6, bsv7, bsv8
143  vector(real(dp)) bv3, bsv9, bsv10, bsv11, bsv12
144  vector(real(dp)) bv4, bsv13, bsv14, bsv15, bsv16
145 
146  vector(real(dp)) cv
147 
148  call alignx(32, a(1,1))
149  call alignx(32, b(1,1))
150  call alignx(32, c(1,1))
151 
152  do i = 1, n1, 4
153  av1 = vec_ld(0,a(i,1))
154  av2 = vec_ld(0,a(i,2))
155  av3 = vec_ld(0,a(i,3))
156  av4 = vec_ld(0,a(i,4))
157  av5 = vec_ld(0,a(i,5))
158  av6 = vec_ld(0,a(i,6))
159  av7 = vec_ld(0,a(i,7))
160  av8 = vec_ld(0,a(i,8))
161  av9 = vec_ld(0,a(i,9))
162  av10 = vec_ld(0,a(i,10))
163  av11 = vec_ld(0,a(i,11))
164  av12 = vec_ld(0,a(i,12))
165  av13 = vec_ld(0,a(i,13))
166  av14 = vec_ld(0,a(i,14))
167  av15 = vec_ld(0,a(i,15))
168  av16 = vec_ld(0,a(i,16))
169 
170  do j = 1, n3
171  bv1 = vec_ld(0,b(1,j))
172  bv2 = vec_ld(0,b(5,j))
173  bv3 = vec_ld(0,b(9,j))
174  bv4 = vec_ld(0,b(13,j))
175 
176  bsv1 = vec_splat(bv1, 0)
177  bsv2 = vec_splat(bv1, 1)
178  bsv3 = vec_splat(bv1, 2)
179  bsv4 = vec_splat(bv1, 3)
180  bsv5 = vec_splat(bv2, 0)
181  bsv6 = vec_splat(bv2, 1)
182  bsv7 = vec_splat(bv2, 2)
183  bsv8 = vec_splat(bv2, 3)
184  bsv9 = vec_splat(bv3, 0)
185  bsv10 = vec_splat(bv3, 1)
186  bsv11 = vec_splat(bv3, 2)
187  bsv12 = vec_splat(bv3, 3)
188  bsv13 = vec_splat(bv4, 0)
189  bsv14 = vec_splat(bv4, 1)
190  bsv15 = vec_splat(bv4, 2)
191  bsv16 = vec_splat(bv4, 3)
192 
193  cv = vec_mul(av1, bsv1)
194  cv = vec_madd(av2, bsv2, cv)
195  cv = vec_madd(av3, bsv3, cv)
196  cv = vec_madd(av4, bsv4, cv)
197  cv = vec_madd(av5, bsv5, cv)
198  cv = vec_madd(av6, bsv6, cv)
199  cv = vec_madd(av7, bsv7, cv)
200  cv = vec_madd(av8, bsv8, cv)
201  cv = vec_madd(av9, bsv9, cv)
202  cv = vec_madd(av10, bsv10, cv)
203  cv = vec_madd(av11, bsv11, cv)
204  cv = vec_madd(av12, bsv12, cv)
205  cv = vec_madd(av13, bsv13, cv)
206  cv = vec_madd(av14, bsv14, cv)
207  cv = vec_madd(av15, bsv15, cv)
208  cv = vec_madd(av16, bsv16, cv)
209 
210  call vec_st(cv, 0, c(i,j))
211  end do
212  end do
213  return
214 end subroutine mxm_bgq_16
215 
216 
217 subroutine mxm_bgq_6(a,n1,b,n2,c,n3)
218  use kinds, only : dp
219  implicit none
220 
221  integer, intent(in) :: n1, n2, n3
222  real(DP), intent(in) :: a(n1,n2),b(n2,n3)
223  real(DP), intent(out) :: c(n1,n3)
224 
225  integer :: i, j
226 
227  vector(real(dp)) av1, av2, av3, av4, av5, av6
228  vector(real(dp)) bv1, bsv1, bsv2, bsv3, bsv4
229  vector(real(dp)) bv2, bsv5, bsv6
230 
231  vector(real(dp)) cv
232 
233  call alignx(32, a(1,1))
234  call alignx(32, b(1,1))
235  call alignx(32, c(1,1))
236 
237  do i = 1, n1, 4
238  av1 = vec_ld(0,a(i,1))
239  av2 = vec_ld(0,a(i,2))
240  av3 = vec_ld(0,a(i,3))
241  av4 = vec_ld(0,a(i,4))
242  av5 = vec_ld(0,a(i,5))
243  av6 = vec_ld(0,a(i,6))
244 
245  do j = 1, n3, 2
246  bv1 = vec_ld(0,b(1,j))
247  bv2 = vec_ld(0,b(5,j))
248 
249  bsv1 = vec_splat(bv1, 0)
250  bsv2 = vec_splat(bv1, 1)
251  bsv3 = vec_splat(bv1, 2)
252  bsv4 = vec_splat(bv1, 3)
253  bsv5 = vec_splat(bv2, 0)
254  bsv6 = vec_splat(bv2, 1)
255 
256  cv = vec_mul(av1, bsv1)
257  cv = vec_madd(av2, bsv2, cv)
258  cv = vec_madd(av3, bsv3, cv)
259  cv = vec_madd(av4, bsv4, cv)
260  cv = vec_madd(av5, bsv5, cv)
261  cv = vec_madd(av6, bsv6, cv)
262 
263  call vec_st(cv, 0, c(i,j))
264 
265  bv1 = vec_ld(0,b(9,j))
266 
267  bsv1 = vec_splat(bv2, 2)
268  bsv2 = vec_splat(bv2, 3)
269  bsv3 = vec_splat(bv1, 0)
270  bsv4 = vec_splat(bv1, 1)
271  bsv5 = vec_splat(bv1, 2)
272  bsv6 = vec_splat(bv1, 3)
273 
274  cv = vec_mul(av1, bsv1)
275  cv = vec_madd(av2, bsv2, cv)
276  cv = vec_madd(av3, bsv3, cv)
277  cv = vec_madd(av4, bsv4, cv)
278  cv = vec_madd(av5, bsv5, cv)
279  cv = vec_madd(av6, bsv6, cv)
280 
281  call vec_st(cv, 0, c(i,j+1))
282  end do
283  end do
284  return
285 end subroutine mxm_bgq_6
286 
287 subroutine mxm_bgq_10(a,n1,b,n2,c,n3)
288  use kinds, only : dp
289  implicit none
290 
291  integer, intent(in) :: n1, n2, n3
292  real(DP), intent(in) :: a(n1,n2),b(n2,n3)
293  real(DP), intent(out) :: c(n1,n3)
294 
295  integer :: i, j
296 
297  vector(real(dp)) av1, av2, av3, av4, av5, av6, av7, av8
298  vector(real(dp)) av9, av10
299  vector(real(dp)) bv1, bsv1, bsv2, bsv3, bsv4
300  vector(real(dp)) bv2, bsv5, bsv6, bsv7, bsv8
301  vector(real(dp)) bv3, bsv9, bsv10
302  vector(real(dp)) cv
303 
304  call alignx(32, a(1,1))
305  call alignx(32, b(1,1))
306  call alignx(32, c(1,1))
307 
308  do i = 1, n1, 4
309  av1 = vec_ld(0,a(i,1))
310  av2 = vec_ld(0,a(i,2))
311  av3 = vec_ld(0,a(i,3))
312  av4 = vec_ld(0,a(i,4))
313  av5 = vec_ld(0,a(i,5))
314  av6 = vec_ld(0,a(i,6))
315  av7 = vec_ld(0,a(i,7))
316  av8 = vec_ld(0,a(i,8))
317  av9 = vec_ld(0,a(i,9))
318  av10 = vec_ld(0,a(i,10))
319 
320  do j = 1, n3, 2
321  bv1 = vec_ld(0,b(1,j))
322  bv2 = vec_ld(0,b(5,j))
323  bv3 = vec_ld(0,b(9,j))
324 
325  bsv1 = vec_splat(bv1, 0)
326  bsv2 = vec_splat(bv1, 1)
327  bsv3 = vec_splat(bv1, 2)
328  bsv4 = vec_splat(bv1, 3)
329  bsv5 = vec_splat(bv2, 0)
330  bsv6 = vec_splat(bv2, 1)
331  bsv7 = vec_splat(bv2, 2)
332  bsv8 = vec_splat(bv2, 3)
333  bsv9 = vec_splat(bv3, 0)
334  bsv10 = vec_splat(bv3, 1)
335 
336  cv = vec_mul(av1, bsv1)
337  cv = vec_madd(av2, bsv2, cv)
338  cv = vec_madd(av3, bsv3, cv)
339  cv = vec_madd(av4, bsv4, cv)
340  cv = vec_madd(av5, bsv5, cv)
341  cv = vec_madd(av6, bsv6, cv)
342  cv = vec_madd(av7, bsv7, cv)
343  cv = vec_madd(av8, bsv8, cv)
344  cv = vec_madd(av9, bsv9, cv)
345  cv = vec_madd(av10, bsv10, cv)
346 
347  call vec_st(cv, 0, c(i,j))
348 
349  bv1 = vec_ld(0,b(13,j))
350  bv2 = vec_ld(0,b(17,j))
351 
352  bsv1 = vec_splat(bv3, 2)
353  bsv2 = vec_splat(bv3, 3)
354  bsv3 = vec_splat(bv1, 0)
355  bsv4 = vec_splat(bv1, 1)
356  bsv5 = vec_splat(bv1, 2)
357  bsv6 = vec_splat(bv1, 3)
358  bsv7 = vec_splat(bv2, 0)
359  bsv8 = vec_splat(bv2, 1)
360  bsv9 = vec_splat(bv2, 2)
361  bsv10 = vec_splat(bv2, 3)
362 
363  cv = vec_mul(av1, bsv1)
364  cv = vec_madd(av2, bsv2, cv)
365  cv = vec_madd(av3, bsv3, cv)
366  cv = vec_madd(av4, bsv4, cv)
367  cv = vec_madd(av5, bsv5, cv)
368  cv = vec_madd(av6, bsv6, cv)
369  cv = vec_madd(av7, bsv7, cv)
370  cv = vec_madd(av8, bsv8, cv)
371  cv = vec_madd(av9, bsv9, cv)
372  cv = vec_madd(av10, bsv10, cv)
373 
374  call vec_st(cv, 0, c(i,j+1))
375  end do
376  end do
377  return
378 end subroutine mxm_bgq_10
379 
subroutine mxm_bgq_8(a, n1, b, n2, c, n3)
Definition: mxm_bgq.F90:1
subroutine mxm_bgq_6(a, n1, b, n2, c, n3)
Definition: mxm_bgq.F90:217
subroutine mxm_bgq_12(a, n1, b, n2, c, n3)
Definition: mxm_bgq.F90:56
subroutine mxm_bgq_10(a, n1, b, n2, c, n3)
Definition: mxm_bgq.F90:287
subroutine mxm_bgq_16(a, n1, b, n2, c, n3)
Definition: mxm_bgq.F90:129