5 integer,
intent(in) :: n1, n2, n3
6 real(DP),
intent(in) :: a(n1,n2),b(n2,n3)
7 real(DP),
intent(out) :: c(n1,n3)
10 vector(
real(dp)) av1, av2, av3, av4, av5, av6, av7, av8
11 vector(
real(dp)) bv1, bsv1, bsv2, bsv3, bsv4
12 vector(
real(dp)) bv2, bsv5, bsv6, bsv7, bsv8
15 call alignx(32, a(1,1))
16 call alignx(32, b(1,1))
17 call alignx(32, c(1,1))
20 av1 = vec_ld(0,a(i,1))
21 av2 = vec_ld(0,a(i,2))
22 av3 = vec_ld(0,a(i,3))
23 av4 = vec_ld(0,a(i,4))
24 av5 = vec_ld(0,a(i,5))
25 av6 = vec_ld(0,a(i,6))
26 av7 = vec_ld(0,a(i,7))
27 av8 = vec_ld(0,a(i,8))
30 bv1 = vec_ld(0,b(1,j))
31 bv2 = vec_ld(0,b(5,j))
32 bsv1 = vec_splat(bv1, 0)
33 bsv2 = vec_splat(bv1, 1)
34 bsv3 = vec_splat(bv1, 2)
35 bsv4 = vec_splat(bv1, 3)
36 bsv5 = vec_splat(bv2, 0)
37 bsv6 = vec_splat(bv2, 1)
38 bsv7 = vec_splat(bv2, 2)
39 bsv8 = vec_splat(bv2, 3)
41 cv = vec_mul(av1, bsv1)
42 cv = vec_madd(av2, bsv2, cv)
43 cv = vec_madd(av3, bsv3, cv)
44 cv = vec_madd(av4, bsv4, cv)
45 cv = vec_madd(av5, bsv5, cv)
46 cv = vec_madd(av6, bsv6, cv)
47 cv = vec_madd(av7, bsv7, cv)
48 cv = vec_madd(av8, bsv8, cv)
50 call vec_st(cv, 0, c(i,j))
60 integer,
intent(in) :: n1, n2, n3
61 real(DP),
intent(in) :: a(n1,n2),b(n2,n3)
62 real(DP),
intent(out) :: c(n1,n3)
66 vector(
real(dp)) av1, av2, av3, av4, av5, av6, av7, av8
67 vector(
real(dp)) av9, av10, av11, av12
68 vector(
real(dp)) bv1, bsv1, bsv2, bsv3, bsv4
69 vector(
real(dp)) bv2, bsv5, bsv6, bsv7, bsv8
70 vector(
real(dp)) bv3, bsv9, bsv10, bsv11, bsv12
74 call alignx(32, a(1,1))
75 call alignx(32, b(1,1))
76 call alignx(32, c(1,1))
79 av1 = vec_ld(0,a(i,1))
80 av2 = vec_ld(0,a(i,2))
81 av3 = vec_ld(0,a(i,3))
82 av4 = vec_ld(0,a(i,4))
83 av5 = vec_ld(0,a(i,5))
84 av6 = vec_ld(0,a(i,6))
85 av7 = vec_ld(0,a(i,7))
86 av8 = vec_ld(0,a(i,8))
87 av9 = vec_ld(0,a(i,9))
88 av10 = vec_ld(0,a(i,10))
89 av11 = vec_ld(0,a(i,11))
90 av12 = vec_ld(0,a(i,12))
93 bv1 = vec_ld(0,b(1,j))
94 bv2 = vec_ld(0,b(5,j))
95 bv3 = vec_ld(0,b(9,j))
97 bsv1 = vec_splat(bv1, 0)
98 bsv2 = vec_splat(bv1, 1)
99 bsv3 = vec_splat(bv1, 2)
100 bsv4 = vec_splat(bv1, 3)
101 bsv5 = vec_splat(bv2, 0)
102 bsv6 = vec_splat(bv2, 1)
103 bsv7 = vec_splat(bv2, 2)
104 bsv8 = vec_splat(bv2, 3)
105 bsv9 = vec_splat(bv3, 0)
106 bsv10 = vec_splat(bv3, 1)
107 bsv11 = vec_splat(bv3, 2)
108 bsv12 = vec_splat(bv3, 3)
110 cv = vec_mul(av1, bsv1)
111 cv = vec_madd(av2, bsv2, cv)
112 cv = vec_madd(av3, bsv3, cv)
113 cv = vec_madd(av4, bsv4, cv)
114 cv = vec_madd(av5, bsv5, cv)
115 cv = vec_madd(av6, bsv6, cv)
116 cv = vec_madd(av7, bsv7, cv)
117 cv = vec_madd(av8, bsv8, cv)
118 cv = vec_madd(av9, bsv9, cv)
119 cv = vec_madd(av10, bsv10, cv)
120 cv = vec_madd(av11, bsv11, cv)
121 cv = vec_madd(av12, bsv12, cv)
123 call vec_st(cv, 0, c(i,j))
133 integer,
intent(in) :: n1, n2, n3
134 real(DP),
intent(in) :: a(n1,n2),b(n2,n3)
135 real(DP),
intent(out) :: c(n1,n3)
139 vector(
real(dp)) av1, av2, av3, av4, av5, av6, av7, av8
140 vector(
real(dp)) av9, av10, av11, av12, av13, av14, av15, av16
141 vector(
real(dp)) bv1, bsv1, bsv2, bsv3, bsv4
142 vector(
real(dp)) bv2, bsv5, bsv6, bsv7, bsv8
143 vector(
real(dp)) bv3, bsv9, bsv10, bsv11, bsv12
144 vector(
real(dp)) bv4, bsv13, bsv14, bsv15, bsv16
148 call alignx(32, a(1,1))
149 call alignx(32, b(1,1))
150 call alignx(32, c(1,1))
153 av1 = vec_ld(0,a(i,1))
154 av2 = vec_ld(0,a(i,2))
155 av3 = vec_ld(0,a(i,3))
156 av4 = vec_ld(0,a(i,4))
157 av5 = vec_ld(0,a(i,5))
158 av6 = vec_ld(0,a(i,6))
159 av7 = vec_ld(0,a(i,7))
160 av8 = vec_ld(0,a(i,8))
161 av9 = vec_ld(0,a(i,9))
162 av10 = vec_ld(0,a(i,10))
163 av11 = vec_ld(0,a(i,11))
164 av12 = vec_ld(0,a(i,12))
165 av13 = vec_ld(0,a(i,13))
166 av14 = vec_ld(0,a(i,14))
167 av15 = vec_ld(0,a(i,15))
168 av16 = vec_ld(0,a(i,16))
171 bv1 = vec_ld(0,b(1,j))
172 bv2 = vec_ld(0,b(5,j))
173 bv3 = vec_ld(0,b(9,j))
174 bv4 = vec_ld(0,b(13,j))
176 bsv1 = vec_splat(bv1, 0)
177 bsv2 = vec_splat(bv1, 1)
178 bsv3 = vec_splat(bv1, 2)
179 bsv4 = vec_splat(bv1, 3)
180 bsv5 = vec_splat(bv2, 0)
181 bsv6 = vec_splat(bv2, 1)
182 bsv7 = vec_splat(bv2, 2)
183 bsv8 = vec_splat(bv2, 3)
184 bsv9 = vec_splat(bv3, 0)
185 bsv10 = vec_splat(bv3, 1)
186 bsv11 = vec_splat(bv3, 2)
187 bsv12 = vec_splat(bv3, 3)
188 bsv13 = vec_splat(bv4, 0)
189 bsv14 = vec_splat(bv4, 1)
190 bsv15 = vec_splat(bv4, 2)
191 bsv16 = vec_splat(bv4, 3)
193 cv = vec_mul(av1, bsv1)
194 cv = vec_madd(av2, bsv2, cv)
195 cv = vec_madd(av3, bsv3, cv)
196 cv = vec_madd(av4, bsv4, cv)
197 cv = vec_madd(av5, bsv5, cv)
198 cv = vec_madd(av6, bsv6, cv)
199 cv = vec_madd(av7, bsv7, cv)
200 cv = vec_madd(av8, bsv8, cv)
201 cv = vec_madd(av9, bsv9, cv)
202 cv = vec_madd(av10, bsv10, cv)
203 cv = vec_madd(av11, bsv11, cv)
204 cv = vec_madd(av12, bsv12, cv)
205 cv = vec_madd(av13, bsv13, cv)
206 cv = vec_madd(av14, bsv14, cv)
207 cv = vec_madd(av15, bsv15, cv)
208 cv = vec_madd(av16, bsv16, cv)
210 call vec_st(cv, 0, c(i,j))
221 integer,
intent(in) :: n1, n2, n3
222 real(DP),
intent(in) :: a(n1,n2),b(n2,n3)
223 real(DP),
intent(out) :: c(n1,n3)
227 vector(
real(dp)) av1, av2, av3, av4, av5, av6
228 vector(
real(dp)) bv1, bsv1, bsv2, bsv3, bsv4
229 vector(
real(dp)) bv2, bsv5, bsv6
233 call alignx(32, a(1,1))
234 call alignx(32, b(1,1))
235 call alignx(32, c(1,1))
238 av1 = vec_ld(0,a(i,1))
239 av2 = vec_ld(0,a(i,2))
240 av3 = vec_ld(0,a(i,3))
241 av4 = vec_ld(0,a(i,4))
242 av5 = vec_ld(0,a(i,5))
243 av6 = vec_ld(0,a(i,6))
246 bv1 = vec_ld(0,b(1,j))
247 bv2 = vec_ld(0,b(5,j))
249 bsv1 = vec_splat(bv1, 0)
250 bsv2 = vec_splat(bv1, 1)
251 bsv3 = vec_splat(bv1, 2)
252 bsv4 = vec_splat(bv1, 3)
253 bsv5 = vec_splat(bv2, 0)
254 bsv6 = vec_splat(bv2, 1)
256 cv = vec_mul(av1, bsv1)
257 cv = vec_madd(av2, bsv2, cv)
258 cv = vec_madd(av3, bsv3, cv)
259 cv = vec_madd(av4, bsv4, cv)
260 cv = vec_madd(av5, bsv5, cv)
261 cv = vec_madd(av6, bsv6, cv)
263 call vec_st(cv, 0, c(i,j))
265 bv1 = vec_ld(0,b(9,j))
267 bsv1 = vec_splat(bv2, 2)
268 bsv2 = vec_splat(bv2, 3)
269 bsv3 = vec_splat(bv1, 0)
270 bsv4 = vec_splat(bv1, 1)
271 bsv5 = vec_splat(bv1, 2)
272 bsv6 = vec_splat(bv1, 3)
274 cv = vec_mul(av1, bsv1)
275 cv = vec_madd(av2, bsv2, cv)
276 cv = vec_madd(av3, bsv3, cv)
277 cv = vec_madd(av4, bsv4, cv)
278 cv = vec_madd(av5, bsv5, cv)
279 cv = vec_madd(av6, bsv6, cv)
281 call vec_st(cv, 0, c(i,j+1))
291 integer,
intent(in) :: n1, n2, n3
292 real(DP),
intent(in) :: a(n1,n2),b(n2,n3)
293 real(DP),
intent(out) :: c(n1,n3)
297 vector(
real(dp)) av1, av2, av3, av4, av5, av6, av7, av8
298 vector(
real(dp)) av9, av10
299 vector(
real(dp)) bv1, bsv1, bsv2, bsv3, bsv4
300 vector(
real(dp)) bv2, bsv5, bsv6, bsv7, bsv8
301 vector(
real(dp)) bv3, bsv9, bsv10
304 call alignx(32, a(1,1))
305 call alignx(32, b(1,1))
306 call alignx(32, c(1,1))
309 av1 = vec_ld(0,a(i,1))
310 av2 = vec_ld(0,a(i,2))
311 av3 = vec_ld(0,a(i,3))
312 av4 = vec_ld(0,a(i,4))
313 av5 = vec_ld(0,a(i,5))
314 av6 = vec_ld(0,a(i,6))
315 av7 = vec_ld(0,a(i,7))
316 av8 = vec_ld(0,a(i,8))
317 av9 = vec_ld(0,a(i,9))
318 av10 = vec_ld(0,a(i,10))
321 bv1 = vec_ld(0,b(1,j))
322 bv2 = vec_ld(0,b(5,j))
323 bv3 = vec_ld(0,b(9,j))
325 bsv1 = vec_splat(bv1, 0)
326 bsv2 = vec_splat(bv1, 1)
327 bsv3 = vec_splat(bv1, 2)
328 bsv4 = vec_splat(bv1, 3)
329 bsv5 = vec_splat(bv2, 0)
330 bsv6 = vec_splat(bv2, 1)
331 bsv7 = vec_splat(bv2, 2)
332 bsv8 = vec_splat(bv2, 3)
333 bsv9 = vec_splat(bv3, 0)
334 bsv10 = vec_splat(bv3, 1)
336 cv = vec_mul(av1, bsv1)
337 cv = vec_madd(av2, bsv2, cv)
338 cv = vec_madd(av3, bsv3, cv)
339 cv = vec_madd(av4, bsv4, cv)
340 cv = vec_madd(av5, bsv5, cv)
341 cv = vec_madd(av6, bsv6, cv)
342 cv = vec_madd(av7, bsv7, cv)
343 cv = vec_madd(av8, bsv8, cv)
344 cv = vec_madd(av9, bsv9, cv)
345 cv = vec_madd(av10, bsv10, cv)
347 call vec_st(cv, 0, c(i,j))
349 bv1 = vec_ld(0,b(13,j))
350 bv2 = vec_ld(0,b(17,j))
352 bsv1 = vec_splat(bv3, 2)
353 bsv2 = vec_splat(bv3, 3)
354 bsv3 = vec_splat(bv1, 0)
355 bsv4 = vec_splat(bv1, 1)
356 bsv5 = vec_splat(bv1, 2)
357 bsv6 = vec_splat(bv1, 3)
358 bsv7 = vec_splat(bv2, 0)
359 bsv8 = vec_splat(bv2, 1)
360 bsv9 = vec_splat(bv2, 2)
361 bsv10 = vec_splat(bv2, 3)
363 cv = vec_mul(av1, bsv1)
364 cv = vec_madd(av2, bsv2, cv)
365 cv = vec_madd(av3, bsv3, cv)
366 cv = vec_madd(av4, bsv4, cv)
367 cv = vec_madd(av5, bsv5, cv)
368 cv = vec_madd(av6, bsv6, cv)
369 cv = vec_madd(av7, bsv7, cv)
370 cv = vec_madd(av8, bsv8, cv)
371 cv = vec_madd(av9, bsv9, cv)
372 cv = vec_madd(av10, bsv10, cv)
374 call vec_st(cv, 0, c(i,j+1))
subroutine mxm_bgq_8(a, n1, b, n2, c, n3)
subroutine mxm_bgq_6(a, n1, b, n2, c, n3)
subroutine mxm_bgq_12(a, n1, b, n2, c, n3)
subroutine mxm_bgq_10(a, n1, b, n2, c, n3)
subroutine mxm_bgq_16(a, n1, b, n2, c, n3)