12#ifndef PLASMA_CORE_BLAS_S_H
13#define PLASMA_CORE_BLAS_S_H
15#include "plasma_async.h"
16#include "plasma_barrier.h"
17#include "plasma_descriptor.h"
18#include "plasma_types.h"
19#include "plasma_workspace.h"
20#include "plasma_descriptor.h"
30float fabsf(
float alpha);
33int plasma_core_sgeadd(plasma_enum_t transa,
35 float alpha,
const float *A,
int lda,
36 float beta,
float *B,
int ldb);
38int plasma_core_sgelqt(
int m,
int n,
int ib,
44void plasma_core_sgemm(plasma_enum_t transa, plasma_enum_t transb,
46 float alpha,
const float *A,
int lda,
47 const float *B,
int ldb,
48 float beta,
float *C,
int ldc);
50int plasma_core_sgeqrt(
int m,
int n,
int ib,
56void plasma_core_sgessq(
int m,
int n,
57 const float *A,
int lda,
58 float *scale,
float *sumsq);
60void plasma_core_sgetrf(
plasma_desc_t A,
int *ipiv,
int ib,
int rank,
int size,
61 volatile int *max_idx,
volatile float *max_val,
62 volatile int *info, plasma_barrier_t *barrier);
64int plasma_core_ssygst(
int itype, plasma_enum_t uplo,
69void plasma_core_ssymm(plasma_enum_t side, plasma_enum_t uplo,
71 float alpha,
const float *A,
int lda,
72 const float *B,
int ldb,
73 float beta,
float *C,
int ldc);
75void plasma_core_ssyr2k(plasma_enum_t uplo, plasma_enum_t trans,
77 float alpha,
const float *A,
int lda,
78 const float *B,
int ldb,
79 float beta,
float *C,
int ldc);
81void plasma_core_ssyrk(plasma_enum_t uplo, plasma_enum_t trans,
83 float alpha,
const float *A,
int lda,
84 float beta,
float *C,
int ldc);
86void plasma_core_ssyssq(plasma_enum_t uplo,
88 const float *A,
int lda,
89 float *scale,
float *sumsq);
91void plasma_core_ssyssq(plasma_enum_t uplo,
93 const float *A,
int lda,
94 float *scale,
float *sumsq);
96void plasma_core_slacpy(plasma_enum_t uplo, plasma_enum_t transa,
98 const float *A,
int lda,
101void plasma_core_slacpy_lapack2tile_band(plasma_enum_t uplo,
103 int m,
int n,
int nb,
int kl,
int ku,
104 const float *A,
int lda,
107void plasma_core_slacpy_tile2lapack_band(plasma_enum_t uplo,
109 int m,
int n,
int nb,
int kl,
int ku,
110 const float *B,
int ldb,
113void plasma_core_slange(plasma_enum_t norm,
115 const float *A,
int lda,
116 float *work,
float *result);
118void plasma_core_slansy(plasma_enum_t norm, plasma_enum_t uplo,
120 const float *A,
int lda,
121 float *work,
float *value);
123void plasma_core_slansy(plasma_enum_t norm, plasma_enum_t uplo,
125 const float *A,
int lda,
126 float *work,
float *value);
128void plasma_core_slantr(plasma_enum_t norm, plasma_enum_t uplo, plasma_enum_t diag,
130 const float *A,
int lda,
131 float *work,
float *value);
133void plasma_core_slascl(plasma_enum_t uplo,
134 float cfrom,
float cto,
138void plasma_core_slaset(plasma_enum_t uplo,
140 float alpha,
float beta,
143void plasma_core_sgeswp(plasma_enum_t colrow,
146void plasma_core_ssyswp(
int rank,
int num_threads,
148 int incx, plasma_barrier_t *barrier);
150int plasma_core_slauum(plasma_enum_t uplo,
154int plasma_core_spamm(plasma_enum_t op, plasma_enum_t side, plasma_enum_t storev,
155 int m,
int n,
int k,
int l,
156 const float *A1,
int lda1,
158 const float *V,
int ldv,
161int plasma_core_sparfb(plasma_enum_t side, plasma_enum_t trans, plasma_enum_t direct,
162 plasma_enum_t storev,
163 int m1,
int n1,
int m2,
int n2,
int k,
int l,
166 const float *V,
int ldv,
167 const float *T,
int ldt,
168 float *work,
int ldwork);
170int plasma_core_spemv(plasma_enum_t trans,
int storev,
173 const float *A,
int lda,
174 const float *X,
int incx,
179int plasma_core_spotrf(plasma_enum_t uplo,
183void plasma_core_ssymm(plasma_enum_t side, plasma_enum_t uplo,
185 float alpha,
const float *A,
int lda,
186 const float *B,
int ldb,
187 float beta,
float *C,
int ldc);
189void plasma_core_ssyr2k(
190 plasma_enum_t uplo, plasma_enum_t trans,
192 float alpha,
const float *A,
int lda,
193 const float *B,
int ldb,
194 float beta,
float *C,
int ldc);
196void plasma_core_ssyrk(plasma_enum_t uplo, plasma_enum_t trans,
198 float alpha,
const float *A,
int lda,
199 float beta,
float *C,
int ldc);
201int plasma_core_stradd(plasma_enum_t uplo, plasma_enum_t transa,
203 float alpha,
const float *A,
int lda,
204 float beta,
float *B,
int ldb);
206void plasma_core_strmm(plasma_enum_t side, plasma_enum_t uplo,
207 plasma_enum_t transa, plasma_enum_t diag,
209 float alpha,
const float *A,
int lda,
212void plasma_core_strsm(plasma_enum_t side, plasma_enum_t uplo,
213 plasma_enum_t transa, plasma_enum_t diag,
215 float alpha,
const float *A,
int lda,
218void plasma_core_strssq(plasma_enum_t uplo, plasma_enum_t diag,
220 const float *A,
int lda,
221 float *scale,
float *sumsq);
223int plasma_core_strtri(plasma_enum_t uplo, plasma_enum_t diag,
227int plasma_core_stslqt(
int m,
int n,
int ib,
234int plasma_core_stsmlq(plasma_enum_t side, plasma_enum_t trans,
235 int m1,
int n1,
int m2,
int n2,
int k,
int ib,
238 const float *V,
int ldv,
239 const float *T,
int ldt,
240 float *work,
int ldwork);
242int plasma_core_stsmqr(plasma_enum_t side, plasma_enum_t trans,
243 int m1,
int n1,
int m2,
int n2,
int k,
int ib,
246 const float *V,
int ldv,
247 const float *T,
int ldt,
248 float *work,
int ldwork);
250int plasma_core_stsqrt(
int m,
int n,
int ib,
257int plasma_core_sttlqt(
int m,
int n,
int ib,
264int plasma_core_sttmlq(plasma_enum_t side, plasma_enum_t trans,
265 int m1,
int n1,
int m2,
int n2,
int k,
int ib,
268 const float *V,
int ldv,
269 const float *T,
int ldt,
270 float *work,
int ldwork);
272int plasma_core_sttmqr(plasma_enum_t side, plasma_enum_t trans,
273 int m1,
int n1,
int m2,
int n2,
int k,
int ib,
276 const float *V,
int ldv,
277 const float *T,
int ldt,
278 float *work,
int ldwork);
280int plasma_core_sttqrt(
int m,
int n,
int ib,
287int plasma_core_sormlq(plasma_enum_t side, plasma_enum_t trans,
288 int m,
int n,
int k,
int ib,
289 const float *A,
int lda,
290 const float *T,
int ldt,
292 float *work,
int ldwork);
294int plasma_core_sormqr(plasma_enum_t side, plasma_enum_t trans,
295 int m,
int n,
int k,
int ib,
296 const float *A,
int lda,
297 const float *T,
int ldt,
299 float *work,
int ldwork);
302void plasma_core_omp_samax(
int colrow,
int m,
int n,
303 const float *A,
int lda,
305 plasma_sequence_t *sequence, plasma_request_t *request);
307void plasma_core_omp_sgeadd(
308 plasma_enum_t transa,
int m,
int n,
309 float alpha,
const float *A,
int lda,
310 float beta,
float *B,
int ldb,
311 plasma_sequence_t *sequence, plasma_request_t *request);
313void plasma_core_omp_sgelqt(
int m,
int n,
int ib,
316 plasma_workspace_t work,
317 plasma_sequence_t *sequence, plasma_request_t *request);
319void plasma_core_omp_sgemm(
320 plasma_enum_t transa, plasma_enum_t transb,
322 float alpha,
const float *A,
int lda,
323 const float *B,
int ldb,
324 float beta,
float *C,
int ldc,
325 plasma_sequence_t *sequence, plasma_request_t *request);
327void plasma_core_omp_sgeqrt(
int m,
int n,
int ib,
330 plasma_workspace_t work,
331 plasma_sequence_t *sequence, plasma_request_t *request);
333void plasma_core_omp_sgessq(
int m,
int n,
334 const float *A,
int lda,
335 float *scale,
float *sumsq,
336 plasma_sequence_t *sequence, plasma_request_t *request);
338void plasma_core_omp_sgessq_aux(
int n,
339 const float *scale,
const float *sumsq,
341 plasma_sequence_t *sequence,
342 plasma_request_t *request);
344void plasma_core_omp_ssygst(
int itype, plasma_enum_t uplo,
348 plasma_sequence_t *sequence, plasma_request_t *request);
350void plasma_core_omp_ssymm(
351 plasma_enum_t side, plasma_enum_t uplo,
353 float alpha,
const float *A,
int lda,
354 const float *B,
int ldb,
355 float beta,
float *C,
int ldc,
356 plasma_sequence_t *sequence, plasma_request_t *request);
358void plasma_core_omp_ssyr2k(
359 plasma_enum_t uplo, plasma_enum_t trans,
361 float alpha,
const float *A,
int lda,
362 const float *B,
int ldb,
363 float beta,
float *C,
int ldc,
364 plasma_sequence_t *sequence, plasma_request_t *request);
366void plasma_core_omp_ssyrk(plasma_enum_t uplo, plasma_enum_t trans,
368 float alpha,
const float *A,
int lda,
369 float beta,
float *C,
int ldc,
370 plasma_sequence_t *sequence, plasma_request_t *request);
372void plasma_core_omp_ssyssq(plasma_enum_t uplo,
374 const float *A,
int lda,
375 float *scale,
float *sumsq,
376 plasma_sequence_t *sequence, plasma_request_t *request);
378void plasma_core_omp_ssyssq(plasma_enum_t uplo,
380 const float *A,
int lda,
381 float *scale,
float *sumsq,
382 plasma_sequence_t *sequence, plasma_request_t *request);
384void plasma_core_omp_ssyssq_aux(
int m,
int n,
385 const float *scale,
const float *sumsq,
387 plasma_sequence_t *sequence,
388 plasma_request_t *request);
390void plasma_core_omp_slacpy(plasma_enum_t uplo, plasma_enum_t transa,
392 const float *A,
int lda,
394 plasma_sequence_t *sequence, plasma_request_t *request);
396void plasma_core_omp_slacpy_lapack2tile_band(plasma_enum_t uplo,
398 int m,
int n,
int nb,
int kl,
int ku,
399 const float *A,
int lda,
402void plasma_core_omp_slacpy_tile2lapack_band(plasma_enum_t uplo,
404 int m,
int n,
int nb,
int kl,
int ku,
405 const float *B,
int ldb,
408void plasma_core_omp_slange(plasma_enum_t norm,
410 const float *A,
int lda,
411 float *work,
float *result,
412 plasma_sequence_t *sequence, plasma_request_t *request);
414void plasma_core_omp_slange_aux(plasma_enum_t norm,
416 const float *A,
int lda,
418 plasma_sequence_t *sequence,
419 plasma_request_t *request);
421void plasma_core_omp_slansy(plasma_enum_t norm, plasma_enum_t uplo,
423 const float *A,
int lda,
424 float *work,
float *value,
425 plasma_sequence_t *sequence, plasma_request_t *request);
427void plasma_core_omp_slansy_aux(plasma_enum_t norm, plasma_enum_t uplo,
429 const float *A,
int lda,
431 plasma_sequence_t *sequence,
432 plasma_request_t *request);
434void plasma_core_omp_slansy(plasma_enum_t norm, plasma_enum_t uplo,
436 const float *A,
int lda,
437 float *work,
float *value,
438 plasma_sequence_t *sequence, plasma_request_t *request);
440void plasma_core_omp_slansy_aux(plasma_enum_t norm, plasma_enum_t uplo,
442 const float *A,
int lda,
444 plasma_sequence_t *sequence,
445 plasma_request_t *request);
447void plasma_core_omp_slantr(plasma_enum_t norm, plasma_enum_t uplo, plasma_enum_t diag,
449 const float *A,
int lda,
450 float *work,
float *value,
451 plasma_sequence_t *sequence, plasma_request_t *request);
453void plasma_core_omp_slantr_aux(plasma_enum_t norm, plasma_enum_t uplo,
456 const float *A,
int lda,
458 plasma_sequence_t *sequence,
459 plasma_request_t *request);
461void plasma_core_omp_slascl(plasma_enum_t uplo,
462 float cfrom,
float cto,
465 plasma_sequence_t *sequence, plasma_request_t *request);
467void plasma_core_omp_slaset(plasma_enum_t uplo,
471 float alpha,
float beta,
474void plasma_core_omp_slauum(plasma_enum_t uplo,
477 plasma_sequence_t *sequence, plasma_request_t *request);
479void plasma_core_omp_spotrf(plasma_enum_t uplo,
483 plasma_sequence_t *sequence, plasma_request_t *request);
485void plasma_core_omp_ssymm(
486 plasma_enum_t side, plasma_enum_t uplo,
488 float alpha,
const float *A,
int lda,
489 const float *B,
int ldb,
490 float beta,
float *C,
int ldc,
491 plasma_sequence_t *sequence, plasma_request_t *request);
493void plasma_core_omp_ssyr2k(
494 plasma_enum_t uplo, plasma_enum_t trans,
496 float alpha,
const float *A,
int lda,
497 const float *B,
int ldb,
498 float beta,
float *C,
int ldc,
499 plasma_sequence_t *sequence, plasma_request_t *request);
501void plasma_core_omp_ssyrk(
502 plasma_enum_t uplo, plasma_enum_t trans,
504 float alpha,
const float *A,
int lda,
505 float beta,
float *C,
int ldc,
506 plasma_sequence_t *sequence, plasma_request_t *request);
508void plasma_core_omp_stradd(
509 plasma_enum_t uplo, plasma_enum_t transa,
511 float alpha,
const float *A,
int lda,
512 float beta,
float *B,
int ldb,
513 plasma_sequence_t *sequence, plasma_request_t *request);
515void plasma_core_omp_strmm(
516 plasma_enum_t side, plasma_enum_t uplo,
517 plasma_enum_t transa, plasma_enum_t diag,
519 float alpha,
const float *A,
int lda,
521 plasma_sequence_t *sequence, plasma_request_t *request);
523void plasma_core_omp_strsm(
524 plasma_enum_t side, plasma_enum_t uplo,
525 plasma_enum_t transa, plasma_enum_t diag,
527 float alpha,
const float *A,
int lda,
529 plasma_sequence_t *sequence, plasma_request_t *request);
531void plasma_core_omp_strssq(plasma_enum_t uplo, plasma_enum_t diag,
533 const float *A,
int lda,
534 float *scale,
float *sumsq,
535 plasma_sequence_t *sequence, plasma_request_t *request);
537void plasma_core_omp_strtri(plasma_enum_t uplo, plasma_enum_t diag,
541 plasma_sequence_t *sequence, plasma_request_t *request);
543void plasma_core_omp_stslqt(
int m,
int n,
int ib,
547 plasma_workspace_t work,
548 plasma_sequence_t *sequence, plasma_request_t *request);
550void plasma_core_omp_stsmlq(plasma_enum_t side, plasma_enum_t trans,
551 int m1,
int n1,
int m2,
int n2,
int k,
int ib,
554 const float *V,
int ldv,
555 const float *T,
int ldt,
556 plasma_workspace_t work,
557 plasma_sequence_t *sequence, plasma_request_t *request);
559void plasma_core_omp_stsmqr(plasma_enum_t side, plasma_enum_t trans,
560 int m1,
int n1,
int m2,
int n2,
int k,
int ib,
563 const float *V,
int ldv,
564 const float *T,
int ldt,
565 plasma_workspace_t work,
566 plasma_sequence_t *sequence, plasma_request_t *request);
568void plasma_core_omp_stsqrt(
int m,
int n,
int ib,
572 plasma_workspace_t work,
573 plasma_sequence_t *sequence, plasma_request_t *request);
575void plasma_core_omp_sttlqt(
int m,
int n,
int ib,
579 plasma_workspace_t work,
580 plasma_sequence_t *sequence, plasma_request_t *request);
582void plasma_core_omp_sttmlq(plasma_enum_t side, plasma_enum_t trans,
583 int m1,
int n1,
int m2,
int n2,
int k,
int ib,
586 const float *V,
int ldv,
587 const float *T,
int ldt,
588 plasma_workspace_t work,
589 plasma_sequence_t *sequence, plasma_request_t *request);
591void plasma_core_omp_sttmqr(plasma_enum_t side, plasma_enum_t trans,
592 int m1,
int n1,
int m2,
int n2,
int k,
int ib,
595 const float *V,
int ldv,
596 const float *T,
int ldt,
597 plasma_workspace_t work,
598 plasma_sequence_t *sequence, plasma_request_t *request);
600void plasma_core_omp_sttqrt(
int m,
int n,
int ib,
604 plasma_workspace_t work,
605 plasma_sequence_t *sequence, plasma_request_t *request);
607void plasma_core_omp_sormlq(plasma_enum_t side, plasma_enum_t trans,
608 int m,
int n,
int k,
int ib,
609 const float *A,
int lda,
610 const float *T,
int ldt,
612 plasma_workspace_t work,
613 plasma_sequence_t *sequence, plasma_request_t *request);
615void plasma_core_omp_sormqr(plasma_enum_t side, plasma_enum_t trans,
616 int m,
int n,
int k,
int ib,
617 const float *A,
int lda,
618 const float *T,
int ldt,
620 plasma_workspace_t work,
621 plasma_sequence_t *sequence, plasma_request_t *request);
Definition: plasma_descriptor.h:40