Äîêóìåíò âçÿò èç êýøà ïîèñêîâîé ìàøèíû. Àäðåñ îðèãèíàëüíîãî äîêóìåíòà : http://sp.cs.msu.ru/proseminar/2011/bakhtin.2011.04.08.pdf
Äàòà èçìåíåíèÿ: Mon Apr 11 17:19:41 2011
Äàòà èíäåêñèðîâàíèÿ: Mon Oct 1 21:56:02 2012
Êîäèðîâêà:

, . . . ..-.., . .. bakhtin@keldysh.ru " "

. .. , , 2011 .



()

8 , 2011



2 70



1.5-2 . ( ). . , , , 5-6 . -, , . Intel Itanium: Latency to L1: 1-2 cycles Latency to L2: 5 - 7 cycles Latency to L3: 12 - 21 cycles Latency to memory: 180 ­ 225 cycles - GUPS (Giga Updates Per Second)
8 , 2011



3 70



( thread) ­ , , .















2















4











3
2 1

Chip MultiThreading








8 , 2011

-



-





4 70



Jaguar Cray XT5-HE Opteron Six Core 2.6 GHz - 2331 TFlop/s -- 224 162 Linpack - 1759 TFlop/s (75.4% ) - 6950.60

­ Power Efficency (Megaflops/watt)
=> Chip MultiProcessing, .

8 , 2011



5 70



AMD Opteron 6100 (MagnyCours) 6176 SE 12 @ 2,3 , 12 L3 Cache

6136 8 @ 2,4 , 12 L3 Cache
(4 DDR3) 42.7 GB/s 4 «-» HyperTransort 3.0 25.6 GB/s

8 , 2011



6 70



Intel Xeon 5600 (Nehalem) X5680 6 @ 3,33 , 12 , 12 L3 Cache X5677 4 @ 3,46 , 8 , 12 L3 Cache Intel® Turbo Boost Intel® Hyper-Threading Intel® QuickPath Intel® Intelligent Power

8 , 2011



7 70



Intel Core i7 980X (Gulftown) 3,33 6 e 12 Intel Hyper-Threading 12 - Intel Smart Cache (3 DDR3 1066 ) Intel QuickPath Interconnect

8 , 2011



8 70




Intel Itanium 9350 (Tukwila) 1,73 4 e 8 Intel Hyper-Threading 24 L3 - Intel QuickPath Interconnect Intel Turbo Boost

8 , 2011



9 70



IBM Power7

3,5 - 4,0 8 x 4 Simultaneuos MultiThreading L1 64 L2 256 L3 32


8 , 2011



10 70




8 , 2011



11 70


.
/* Jacobi program */ #include #define L 1000

#define ITMAX 100
int i,j,it; double A[L][L];

double B[L][L];
int main(int an, char **as) { printf("JAC STARTED\n"); for(i=0;i<=L-1;i++) for(j=0;j<=L-1;j++) { A[i][j]=0.;

B[i][j]=1.+i+j;
}
8 , 2011



12 70


.
/****** iteration loop *************************/ for(it=1; it
for(i=1;i<=L-2;i++)
for(j=1;j<=L-2;j++) A[i][j] = B[i][j];

for(i=1;i<=L-2;i++)
for(j=1;j<=L-2;j++) B[i][j] = (A[i-1][j]+A[i+1][j]+A[i][j-1]+A[i][j+1])/4.; } return 0; }

8 , 2011



13 70


. MPI-

8 , 2011



14 70


. MPI-
/* Jacobi-1d program */ #include #include

#include
#include "mpi.h" #define m_printf if (myrank==0)printf

#define L 1000
#define ITMAX 100 int i,j,it,k; int ll,shift; double (* A)[L]; double (* B)[L];

8 , 2011



15 70


. MPI-
int main(int argc, char **argv) { MPI_Request req[4];

int myrank, ranksize;
int startrow,lastrow,nrow; MPI_Status status[4]; double t1, t2, time; MPI_Init (&argc, &argv); /* initialize MPI system */ MPI_Comm_rank(MPI_COMM_WORLD, &myrank);/*my place in MPI system*/ MPI_Comm_size (MPI_COMM_WORLD, &ranksize); /* size of MPI system */ MPI_Barrier(MPI_COMM_WORLD);

/* rows of matrix I have to process */
startrow = (myrank *L) / ranksize; lastrow = (((myrank + 1) * L) / ranksize)-1; nrow = lastrow - startrow + 1; m_printf("JAC1 STARTED\n");
8 , 2011



16 70


. MPI-
/* dynamically allocate data structures */ A = malloc ((nrow+2) * L * sizeof(double)); B = malloc ((nrow) * L * sizeof(double));

for(i=1; i<=nrow; i++)
for(j=0; j<=L-1; j++) {

A[i][j]=0.;
B[i-1][j]=1.+startrow+i-1+j; }

8 , 2011



17 70


. MPI-
/****** iteration loop *************************/ t1=MPI_Wtime(); for(it=1; it<=ITMAX; it++)

{
for(i=1; i<=nrow; i++) {

if (((i==1)&&(myrank==0))||((i==nrow)&&(myrank==ranksize-1))) continue;
for(j=1; j<=L-2; j++) { A[i][j] = B[i-1][j]; } }

8 , 2011



18 70


. MPI-
if(myrank!=0)
MPI_Irecv(&A[0][0],L,MPI_DOUBLE, myrank-1, 1215, MPI_COMM_WORLD, &req[0]);

if(myrank!=ranksize-1)
MPI_Isend(&A[nrow][0],L,MPI_DOUBLE, myrank+1, 1215, MPI_COMM_WORLD,&req[2]); if(myrank!=ranksize-1) MPI_Irecv(&A[nrow+1][0],L,MPI_DOUBLE, myrank+1, 1216, MPI_COMM_WORLD, &req[3]); if(myrank!=0) MPI_Isend(&A[1][0],L,MPI_DOUBLE, myrank-1, 1216, MPI_COMM_WORLD,&req[1]); ll=4; shift=0; if (myrank==0) {ll=2;shift=2;}

if (myrank==ranksize-1) {ll=2;}
MPI_Waitall(ll,&req[shift],&status[0]);
8 , 2011



19 70


. MPI-
for(i=1; i<=nrow; i++) { if (((i==1)&&(myrank==0))||((i==nrow)&&(myrank==ranksize-1))) continue; for(j=1; j<=L-2; j++) B[i-1][j] = (A[i-1][j]+A[i+1][j]+ A[i][j-1]+A[i][j+1])/4.; } }/*DO it*/ printf("%d: Time of task=%lf\n",myrank,MPI_Wtime()-t1); MPI_Finalize (); return 0; }

8 , 2011



20 70


. MPI-

8 , 2011



21 70


. MPI-
/*Jacobi-2d program */ #include #include #include #include "mpi.h" #define m_printf if (myrank==0)printf #define L 1000 #define LC 2 #define ITMAX 100 int i,j,it,k; double (* A)[L/LC+2]; double (* B)[L/LC];

8 , 2011



22 70


. MPI-
int main(int argc, char **argv) { MPI_Request req[8]; int myrank, ranksize; int srow,lrow,nrow,scol,lcol,ncol; MPI_Status status[8]; double t1; int isper[] = {0,0}; int dim[2]; int coords[2]; MPI_Comm newcomm; MPI_Datatype vectype; int pleft,pright, pdown,pup; MPI_Init (&argc, &argv); /* initialize MPI system */ MPI_Comm_size (MPI_COMM_WORLD, &ranksize); /* size of MPI system */ MPI_Comm_rank (MPI_COMM_WORLD, &myrank); /* my place in MPI system */

8 , 2011



23 70


. MPI-
dim[0]=ranksize/LC; dim[1]=LC; if ((L%dim[0])||(L%dim[1])) { m_printf("ERROR: array[%d*%d] is not distributed on %d*%d processors\n",L,L,dim[0],dim[1]); MPI_Finalize(); exit(1); } MPI_Cart_create(MPI_COMM_WORLD,2,dim,isper,1,&newcomm); MPI_Cart_shift(newcomm,0,1,&pup,&pdown); MPI_Cart_shift(newcomm,1,1,&pleft,&pright); MPI_Comm_rank (newcomm, &myrank); /* my place in MPI system */ MPI_Cart_coords(newcomm,myrank,2,coords);

8 , 2011



24 70


. MPI-
/* rows of matrix I have to process */ srow = (coords[0] * L) / dim[0]; lrow = (((coords[0] + 1) * L) / dim[0])-1; nrow = lrow - srow + 1; /* columns of matrix I have to process */ scol = (coords[1] * L) / dim[1]; lcol = (((coords[1] + 1) * L) / dim[1])-1; ncol = lcol - scol + 1; MPI_Type_vector(nrow,1,ncol+2,MPI_DOUBLE,&vectype); MPI_Type_commit(&vectype); m_printf("JAC2 STARTED on %d*%d processors with %d*%d array, it=%d\n",dim[0],dim[1],L,L,ITMAX); /* dynamically allocate data structures */ A = malloc ((nrow+2) * (ncol+2) * sizeof(double)); B = malloc (nrow * ncol * sizeof(double));

8 , 2011



25 70


. MPI-
for(i=0; i<=nrow -1; i++) { for(j=0; j<=ncol-1; j++) { A[i+1][j+1]=0.; B[i][j]=1.+srow+i+scol+j; } } /****** iteration loop *************************/ MPI_Barrier(newcomm); t1=MPI_Wtime(); for(it=1; it<=ITMAX; it++) { for(i=0; i<=nrow -1; i++) {
if (((i==0)&&(pup==MPI_PROC_NULL))||((i==nrow-1)&&(pdown==MPI_PROC_NULL))) continue;

for(j=0; j<=ncol-1; j++) {
if (((j==0)&&(pleft==MPI_PROC_NULL))||((j==ncol-1)&&(pright==MPI_PROC_NULL))) continue;

A[i+1][j+1] = B[i][j];
8 } , 2011

}



26 70


. MPI-
MPI_Irecv(&A[0][1],ncol,MPI_DOUBLE, pup, 1215, MPI_COMM_WORLD, &req[0]); MPI_Isend(&A[nrow][1],ncol,MPI_DOUBLE, pdown, 1215, MPI_COMM_WORLD,&req[1]); MPI_Irecv(&A[nrow+1][1],ncol,MPI_DOUBLE, pdown, 1216, MPI_COMM_WORLD, &req[2]); MPI_Isend(&A[1][1],ncol,MPI_DOUBLE, pup, 1216, MPI_COMM_WORLD,&req[3]); MPI_Irecv(&A[1][0],1,vectype, pleft, 1217, MPI_COMM_WORLD, &req[4]); MPI_Isend(&A[1][ncol],1,vectype, pright, 1217, MPI_COMM_WORLD,&req[5]); MPI_Irecv(&A[1][ncol+1],1,vectype, pright, 1218, MPI_COMM_WORLD, &req[6]); MPI_Isend(&A[1][1],1,vectype, pleft, 1218, MPI_COMM_WORLD,&req[7]); MPI_Waitall(8,req,status);

8 , 2011



27 70


. MPI-
for(i=1; i<=nrow; i++) { if (((i==1)&&(pup==MPI_PROC_NULL))|| ((i==nrow)&&(pdown==MPI_PROC_NULL))) continue; for(j=1; j<=ncol; j++) { if (((j==1)&&(pleft==MPI_PROC_NULL))|| ((j==ncol)&&(pright==MPI_PROC_NULL))) continue; B[i-1][j-1] = (A[i-1][j]+A[i+1][j]+A[i][j-1]+A[i][j+1])/4.; } }

} printf("%d: Time of task=%lf\n",myrank,MPI_Wtime()-t1); MPI_Finalize (); return 0;
}

8 , 2011



28 70


MPI/OpenMP


MPI


OpenMP Core Core ... Core
0
8 , 2011

OpenMP Core Core ... Core
N
29 70




. MPI/OpenMP-
/* Jacobi-1d program */ #include #include

#include
#include "mpi.h" #define m_printf if (myrank==0)printf

#define L 1000
#define ITMAX 100 int i,j,it,k; int ll,shift; double (* A)[L]; double (* B)[L];

8 , 2011



30 70


. MPI/OpenMP-
int main(int argc, char **argv) { MPI_Request req[4];

int myrank, ranksize;
int startrow,lastrow,nrow; MPI_Status status[4]; double t1, t2, time; MPI_Init (&argc, &argv); /* initialize MPI system */ MPI_Comm_rank(MPI_COMM_WORLD,&myrank); /*my place in MPI system */ MPI_Comm_size (MPI_COMM_WORLD, &ranksize); /* size of MPI system */ MPI_Barrier(MPI_COMM_WORLD);

/* rows of matrix I have to process */
startrow = (myrank * N) / ranksize; lastrow = (((myrank + 1) * N) / ranksize)-1; nrow = lastrow - startrow + 1; m_printf("JAC1 STARTED\n");
8 , 2011



31 70


. MPI/OpenMP-
/* dynamically allocate data structures */ A = malloc ((nrow+2) * N * sizeof(double)); B = malloc ((nrow) * N * sizeof(double));

for(i=1; i<=nrow; i++)
#pragma omp parallel for for(j=0; j<=L-1; j++)

{
A[i][j]=0.; B[i-1][j]=1.+startrow+i-1+j; }

8 , 2011



32 70


. MPI/OpenMP-
/****** iteration loop *************************/ t1=MPI_Wtime(); for(it=1; it<=ITMAX; it++)

{
for(i=1; i<=nrow; i++) {

if (((i==1)&&(myrank==0))||((i==nrow)&&(myrank==ranksize-1))) continue;
#pragma omp parallel for for(j=1; j<=L-2; j++) { A[i][j] = B[i-1][j]; } }

8 , 2011



33 70


. MPI/OpenMP-
if(myrank!=0) MPI_Irecv(&A[0][0],L,MPI_DOUBLE, myrank-1, 1215, MPI_COMM_WORLD, &req[0]); if(myrank!=ranksize-1) MPI_Isend(&A[nrow][0],L,MPI_DOUBLE, myrank+1, 1215, MPI_COMM_WORLD,&req[2]); if(myrank!=ranksize-1) MPI_Irecv(&A[nrow+1][0],L,MPI_DOUBLE, myrank+1, 1216, MPI_COMM_WORLD, &req[3]);

if(myrank!=0)
MPI_Isend(&A[1][0],L,MPI_DOUBLE, myrank-1, 1216, MPI_COMM_WORLD,&req[1]);

ll=4; shift=0;
if (myrank==0) {ll=2;shift=2;} if (myrank==ranksize-1) {ll=2;} MPI_Waitall(ll,&req[shift],&status[0]);

8 , 2011



34 70


. MPI/OpenMP-
for(i=1; i<=nrow; i++) { if (((i==1)&&(myrank==0))||((i==nrow)&&(myrank==ranksize-1))) continue; #pragma omp parallel for for(j=1; j<=L-2; j++) B[i-1][j] = (A[i-1][j]+A[i+1][j]+ A[i][j-1]+A[i][j+1])/4.; } }/*DO it*/ printf("%d: Time of task=%lf\n",myrank,MPI_Wtime()-t1); MPI_Finalize (); return 0; }

8 , 2011



35 70




8 , 2011



36 70


CPU GPU

8 , 2011



37 70


GPU (NVIDIA GF100)

8 , 2011



38 70


Fortran
PROGRAM JACOB_SEQ PARAMETER (L=4096, ITMAX=100) REAL A(L,L), B(L,L) PRINT *, '********** TEST_JACOBI **********` DO IT = 1, ITMAX DO J = 2, L-1 DO I = 2, L-1 A(I, J) = B(I, J) ENDDO ENDDO DO J = 2, L-1 DO I = 2, L-1 B(I, J) = (A(I-1, J) + A(I, J-1) + A(I+1, J) + * A(I, J+1)) / 4 ENDDO ENDDO ENDDO END
8 , 2011



39 70


Fortran CUDA
program JACOB_CUDA use cudafor use jac_cuda parameter (k=4096, itmax = 100, block_dim = 16) real, device, dimension(k, k) :: a, b integer it type(dim3) :: grid, block print *, '********** test_jacobi **********` grid = dim3(k / block_dim, k / block_dim, 1) block = dim3(block_dim, block_dim, 1) do it = 1, itmax call arr_copy<<>>(a, b, k) call arr_renew<<>>(a, b, k) end do end program JACOB_CUDA
8 , 2011



40 70


Fortran CUDA
module jac_cuda contains attributes(global) subroutine arr_copy(a, b, k) real, device, dimension(k, k) :: a, b integer, value :: k integer i, j i = (blockIdx%x - 1) * blockDim%x + threadIdx%x j = (blockIdx%y - 1) * blockDim%y + threadIdx%y if (i.ne.1 .and. i.ne.k .and. j.ne.1 .and. j.ne.k) a(i, j) = b(i, j) end subroutine arr_copy attributes(global) subroutine arr_renew(a, b, k) real, device, dimension(k, k) :: a, b integer, value :: k integer i, j i = (blockIdx%x - 1) * blockDim%x + threadIdx%x j = (blockIdx%y - 1) * blockDim%y + threadIdx%y if (i.ne.1.and.i.ne.k.and.j.ne.1.and.j.ne.k) b(i,j)=(a(i-1,j)+a(i+1,j)+a(i,j-1)+a(I,j+1))/4 end subroutine arr_renew end module jac_cuda
8 , 2011



41 70


DVM-
DVM- : Fortran-DVM/OpenMP C-DVM LIB-DVM DVM- DVM- DVM- DVM (Distributed Virtual Memory, Distributed Virtual Machine)
8 , 2011



42 70



C-DVM = + Fortran-DVM/OpenMP = 95 + «»


8 , 2011

43 70


. DVM-
PROGRAM PARAMETER JAC_DVM (L=4096, ITMAX=100) ( BLOCK, BLOCK) :: A

REAL

A(L,L), B(L,L)

CDVM$ DISTRIBUTE

CDVM$ ALIGN B(I,J) WITH A(I,J) PRINT *, '********** TEST_JACOBI **********' DO IT = 1, ITMAX

CDVM$

PARALLEL (J,I) ON A(I, J)
DO J = 2, L-1 DO I = 2, L-1 A(I, J) = B(I, J) ENDDO ENDDO

8 , 2011



44 70


. DVM-
CDVM$ PARALLEL (J,I) ON B(I, J), SHADOW_RENEW (A) DO J = 2, L-1

DO I = 2, L-1
B(I, J) = (A(I-1, J) + A(I, J-1) + A(I+1, J) + A(I, J+1)) / 4 ENDDO ENDDO ENDDO

END

8 , 2011



45 70


NAS
BT CG EP 3D -, , 3D 3D -, 3D , Multigrid 3D -, Beam-Warning approximate factorization SEQ 3929 1108 641 MPI 5744 1793 670 DVM 3991 1118 649 MPI/ SEQ 1.46 1.62 1.04 DVM/S EQ 1.02 1.01 1.01

FT
IS LU MG

1500
925 4189 1898

2352
1218 5497 2857

1605
1067 4269 2131

1.57
1.32 1.31 1.50

1.07
1.17 1.02 1.12

SP

3361

5020

3630

1.49
1.43

1.08
1.05

8 , 2011

17551 25151 18460


NAS

8 , 2011



47 70


NAS

8 , 2011



48 70


. DVM/OpenMP-
PROGRAM PARAMETER JAC_OpenMP_DVM (L=4096, ITMAX=100) ( BLOCK, BLOCK) :: A

REAL

A(L,L), B(L,L)

CDVM$ DISTRIBUTE

CDVM$ ALIGN B(I,J) WITH A(I,J) PRINT *, '********** TEST_JACOBI **********' DO IT = 1, ITMAX

CDVM$
C$OMP

PARALLEL (J,I) ON A(I, J)
PARALLEL DO COLLAPSE (2) DO J = 2, L-1 DO I = 2, L-1 A(I, J) = B(I, J) ENDDO ENDDO

8 , 2011



49 70


. DVM/OpenMP-
CDVM$ C$OMP PARALLEL (J,I) ON B(I, J), SHADOW_RENEW (A) PARALLEL DO COLLAPSE (2)

DO J = 2, L-1
DO I = 2, L-1 B(I, J) = (A(I-1, J) + A(I, J-1) + A(I+1, J) + A(I, J+1)) / 4 ENDDO ENDDO

ENDDO
END

8 , 2011



50 70


. DVM/GPU-
PROGRAM PARAMETER JAC_GPU_DVM (L=4096, ITMAX=100) ( BLOCK, BLOCK) :: A

REAL

A(L,L), B(L,L)

CDVM$ DISTRIBUTE

CDVM$ ALIGN B(I,J) WITH A(I,J) PRINT *, '********** TEST_JACOBI **********` C$ACC C$ACC CDVM$ DATA REGION COPYOUT(B), LOCAL (A)

DO IT = 1, ITMAX
REGION PARALLEL (J,I) ON A(I, J) DO J = 2, L-1 DO I = 2, L-1 A(I, J) = B(I, J) ENDDO ENDDO
8 , 2011



51 70


. DVM/GPU-
CDVM$ PARALLEL (J,I) ON B(I, J), SHADOW_RENEW (A) DO J = 2, L-1

DO I = 2, L-1
B(I, J) = (A(I-1, J) + A(I, J-1) + A(I+1, J) + A(I, J+1)) / 4 ENDDO ENDDO C$ACC C$ACC END REGION

ENDDO
END DATA REGION END

8 , 2011



52 70














8 , 2011



53 70





1

2



1 2

-DVM/OpenMP

-DVM/OpenMP/GPU

8 , 2011



54 70



: NAS LU, BT, SP MHPDV ( ) ZEBRA ( )

8 , 2011



55 70



1 8 64 256 1024

BT-
BT- LU- LU- SP- SP- MHPDV-


3482.40 2103.14 1982.00 2601.85 3703.23

1255.97
817.88 1009.49 858.26 500.78

182.70
128.01 148.78 122.61 89.32

54.64
30.27 40.33 34.99 34.75

21.36
7.19 25.55 19.97 12.78

MHPDV-
ZEBRA- ZEBRA-
8 , 2011

3574.29
75.09 75.62

486.74
11.13 10.18

79.63
1.96 1.85

32.15
-

10.98
56 70




BT DVM- () 1855 10442 504 481 30530

LU 96 3635 424 410 6638

SP 879 5950 499 293 11015

MHPDV 41 1878 116 115 4643

ZEBRA 753 2426 49 28 2680

DVM-



( ) () ()

34
75 16 5 7 3 2 1 6 16 38,56

30
64 22 3 0 5 4 5 16 3,93

37
70 21 3 9 4

33
78 16 0 6 11

40
49 36 0 3 1

5 16 2,62

5 16 1,43

6 64 0,29

77,39
127

2,06
127

52,31
182

1,13
144

11,47
543




8 , 2011




, ?
!

8 , 2011



59 70



DVM-. http://www.keldysh.ru/dvm OpenMP Application Program Interface Version 3.0, May 2008. http://www.openmp.org/mp-documents/spec30.pdf MPI: A Message-Passing Interface Standard Version 2.2, September 2009. http://www.mpi-forum.org/docs/mpi-2.2/mpi22-report.pdf .. OpenMP: .-.: - , 2009. http://parallel.ru/info/parallel/openmp/OpenMP.pdf .. MPI: .-.: - , 2004. http://parallel.ru/tech/tech_dev/MPI/mpibook.pdf .., .. . ­ .: -, 2002. . , . . . . ­ . , 2003
8 , 2011



60 70



, - , . .. , , . . . bakhtin@keldysh.ru

8 , 2011



61 70


MPI
MPI : int MPI_Init ( int *agrc, char ***argv )

MPI-. .
MPI : int MPI_Finalize (void)


8 , 2011



62 70



: int MPI_Comm_size ( MPI_Comm comm, int *size ). : int MPI_Comm_rank ( MPI_Comm comm, int *rank ).


8 , 2011



63 70



- : int MPI_Isend(void *buf, int count, MPI_Datatype type, int dest, int tag, MPI_Comm comm, MPI_Request *request), · buf - , , · count - , · type - , · dest - , , · tag - -, , · comm - , . - : int MPI_Irecv(void *buf, int count, MPI_Datatype type, int source, int tag, MPI_Comm comm, MPI_Status *status, MPI_Request *request), ·buf, count, type - , MPI_Send, ·source - , , ·tag - , , ·comm - , , ·status - . 8 64 70 , 2011


MPI_Waitall
: int MPI_Waitall( int count, MPI_Request array_of_requests[], MPI_Status array_of_statuses[])



8 , 2011



65 70


MPI_Cart_create
() MPI: int MPI_Cart_create(MPI_Comm oldcomm, int ndims, int *dims, int *periods, int reorder, MPI_Comm *cartcomm), : · oldcomm - , · ndims - , · dims - ndims, , · periods - ndims, , , · reorder - , · cartcomm - .


8 , 2011



66 70


MPI_Cart_shift
: int MPI_Card_shift(MPI_Comm comm, int dir, int disp, int *source, int *dst) (source) (dst) (comm) dir disp.


8 , 2011



67 70


MPI_Card_coords
: int MPI_Card_coords(MPI_Comm comm,int rank,int ndims,int *coords),

: ·comm - , · rank - , , · ndims - , · coords - .


8 , 2011



68 70


MPI_Type_vector
MPI : · , · , . , , · , , · .

int MPI_Type_vector(int count, int blocklen, int stride, MPI_Data_type oldtype, MPI_Datatype *newtype),
·count - , ·blocklen - , ·stride - , ·oldtype - , ·newtype - .
8 , 2011



69 70


MPI_Type_commit
: int MPI_Type_commit (MPI_Datatype *type )

:
int MPI_Type_free (MPI_Datatype *type ).


8 , 2011



70 70