并行谱数值方法/格雷-斯科特
外观
< 并行谱数值方法
上述反应-扩散系统描述了两种化学物质的密度,( 和 ),与另一种物质发生反应。方程式中的第一项表示扩散,第二项描述了 和 的反应,( → ),第一个方程式中的最后一项表示新的 的输入,而第二个方程式中的最后一项表示从系统中移除 。
该程序使用傅里叶变换求解线性部分,使用隐式中点规则求解非线性部分。
|
(
) |
% A program to solve the Gray-Scott equations using
% splitting method. The nonlinear equations are solved using the implicit
% midpoint rule
clear all; format compact, format short,
set(0,'defaultaxesfontsize',17,'defaultaxeslinewidth',.7,...
'defaultlinelinewidth',3.5,'defaultpatchlinewidth',3.5)
Nx =64; % number of modes
Ny=64;
Nz=64;
% set up grid
Nt=4;
tmax=0.04;
Lx = 1.5; % period 2*pi * L
Ly =1.5;
Lz=1.5;
dt = tmax/Nt; % number of time slices
tol=0.1^12;
A = 0.04;
B = 0.1;
Du = 1.;
Dv = 1.;
% initialise variables
x = (2*pi/Nx)*(-Nx/2:Nx/2 -1)'*Lx; % x coordinate
y = (2*pi/Ny)*(-Nx/2:Ny/2 -1)'*Ly;
z = (2*pi/Nz)*(-Nx/2:Nz/2 -1)'*Lz;
kx = i*[0:Nx/2-1 0 -Nx/2+1:-1]'/Lx; % wave vector
ky = i*[0:Ny/2-1 0 -Ny/2+1:-1]'/Ly;
kz = i*[0:Nz/2-1 0 -Nz/2+1:-1]'/Lz;
[xx,yy,zz]=meshgrid(x,y,z);
[kxx,kyy,kzz]=meshgrid(kx,ky,kz);
% initial conditions
t=0; tdata(1)=t;
u = 0.2 + exp(-2*(xx.^2+yy.^2+zz.^2));
v = 0.1 + exp(-4*(xx.^2+yy.^2+zz.^2-0.01).^2);
gamma=[1];
Ahat=A*fftn(ones(size(xx)));
t=0;
figure(11); clf;
subplot(2,1,1);
H = vol3d('CData',real(u),'texture','3D','XData',x,'YData',y,'ZData',z);
xlabel('x'); ylabel('y'); zlabel('z'); colorbar;
axis equal; axis square; view(3);
xlabel('x'); ylabel('y'); zlabel('z');
axis equal; axis square; view(3); drawnow;
title(['Time ',num2str(t)]);
subplot(2,1,2);
H = vol3d('CData',real(v),'texture','3D','XData',x,'YData',y,'ZData',z);
xlabel('x'); ylabel('y'); zlabel('z'); colorbar;
axis equal; axis square; view(3);
xlabel('x'); ylabel('y'); zlabel('z');
axis equal; axis square; view(3); drawnow;
filename=['./pictures1/',num2str(10000000),'.jpg'];
saveas(11,filename)
for n=1:Nt
chg=1;
for m=1:1
% use fixed point iteration to solve nonlinear system in real space
chg=1;
uold=u; vold=v;
while (chg>tol)
utemp=u; vtemp=v;
umean=0.5*(u+uold);
vmean=0.5*(v+vold);
u=uold+0.5*gamma(m)*dt*(-umean.*vmean.^2);
v=vold+0.5*gamma(m)*dt*umean.*vmean.^2;
chg=max(abs(u-utemp))+max(abs(v-vtemp));
end
uhat=fftn(u); vhat=fftn(v); % solve linear part exactly in Fourier space
uhat=exp(gamma(m)*dt*(-A+Du*(kxx.^2+kyy.^2+kzz.^2))).*...
(uhat-Ahat./(A+Du*(kxx.^2+kyy.^2+kzz)))+Ahat./...
(A+Du*(kxx.^2+kyy.^2+kzz.^2));
vhat=exp(gamma(m)*dt*(-B+Dv*(kxx.^2+kyy.^2+kzz.^2))).*vhat;
u=ifftn(uhat); v=ifftn(vhat);
% use fixed point iteration to solve nonlinear system in real space
chg=1;
uold=u; vold=v;
while (chg>tol)
utemp=u; vtemp=v;
umean=0.5*(u+uold);
vmean=0.5*(v+vold);
u=uold+0.5*gamma(m)*dt*(-umean.*vmean.^2);
v=vold+0.5*gamma(m)*dt*umean.*vmean.^2;
chg=max(abs(u-utemp))+max(abs(v-vtemp));
end
end
t=n*dt;
figure(11); clf;
subplot(2,1,1);
H = vol3d('CData',real(u),'texture','3D','XData',x,'YData',y,'ZData',z);
xlabel('x'); ylabel('y'); zlabel('z'); colorbar;
axis equal; axis square; view(3);
xlabel('x'); ylabel('y'); zlabel('z');
axis equal; axis square; view(3); drawnow;
title(['Time ',num2str(t)]);
subplot(2,1,2);
H = vol3d('CData',real(v),'texture','3D','XData',x,'YData',y,'ZData',z);
xlabel('x'); ylabel('y'); zlabel('z'); colorbar;
axis equal; axis square; view(3);
xlabel('x'); ylabel('y'); zlabel('z');
axis equal; axis square; view(3); drawnow;
filename=['./pictures1/',num2str(1000000+n),'.jpg'];
saveas(11,filename)
end
要计算可视化,请下载 vol3d.m [2]。
该程序使用 2decomp_fft,就像之前章节中的程序一样。
|
(
) |
!--------------------------------------------------------------------
!
!
! PURPOSE
!
! This program solves GrayScott equation in 3 dimensions
! u_t=D_u*(u_{xx}+u_{yy}+u_{zz})^2-u*v^2-F*(1-u)
! v_t=D_v*(v_{xx}+v_{yy}+v_{zz})^2+u*v^2-(F+k)*v
!
!
! .. Parameters ..
! Nx = number of modes in x - power of 2 for FFT
! Ny = number of modes in y - power of 2 for FFT
! Nz = number of modes in z - power of 2 for FFT
! Nt = number of timesteps to take
! Tmax = maximum simulation time
! plotgap = number of timesteps between plots
! pi = 3.14159265358979323846264338327950288419716939937510d0
! Lx = width of box in x direction
! Ly = width of box in y direction
! Lz = width of box in z direction
! A =F
! B =F+k
! .. Scalars ..
! Ahat = Fourier transform of A
! i = loop counter in x direction
! j = loop counter in y direction
! k = loop counter in z direction
! n = loop counter for timesteps direction
! allocatestatus = error indicator during allocation
! start = variable to record start time of program
! finish = variable to record end time of program
! count_rate = variable for clock count rate
! dt = timestep
! modescalereal = Number to scale after backward FFT
! myid = Process id
! ierr = error code
! p_row = number of rows for domain decomposition
! p_col = number of columns for domain decomposition
! filesize = total filesize
! disp = displacement to start writing data from
! ind = index in array to write
! plotnum = number of plot to save
! numberfile = number of the file to be saved to disk
! stat = error indicator when reading inputfile
! umean = temporary for fix point iteration
! vmean = temporary for fix point iteration
! .. Arrays ..
! u = approximate solution u
! v = approximate solution v
! uold = previous timestep of u
! vold = previous timestep of v
! utemp = temporary for fix point iteration u
! vtemp = temporary for fix point iteration v
! uhat = Fourier transform of u
! vhat = Fourier transform of v
! .. Vectors ..
! kx = fourier frequencies in x direction squared
! ky = fourier frequencies in y direction squared
! kz = fourier frequencies in z direction squared
! x = x locations
! y = y locations
! z = z locations
! time = times at which save data
! nameconfig = array to store filename for data to be saved
! InputFileName = name of the Input File
! dpcomm
! intcomm
! .. Special Structures ..
! decomp = contains information on domain decomposition
! sp see http://www.2decomp.org/ for more information
!
! REFERENCES
!
! ACKNOWLEDGEMENTS
!
! ACCURACY
!
! ERROR INDICATORS AND WARNINGS
!
! FURTHER COMMENTS
! Check that the initial iterate is consistent with the
! boundary conditions for the domain specified
!--------------------------------------------------------------------
! External routines required
!
! External libraries required
! 2DECOMP&FFT -- Domain decomposition and Fast Fourier Library
! (http://www.2decomp.org/index.html)
! MPI library
PROGRAM main
USE decomp_2d
USE decomp_2d_fft
USE decomp_2d_io
USE MPI
! Declare variables
IMPLICIT NONE
INTEGER(kind=4) :: Nx=0
INTEGER(kind=4) :: Ny=0
INTEGER(kind=4) :: Nz=0
INTEGER(kind=4) :: Nt=0
INTEGER(kind=4) :: plotgap=0
REAL(kind=8), PARAMETER ::&
pi=3.14159265358979323846264338327950288419716939937510d0
REAL(kind=8) :: Lx=0.0,Ly=0.0,Lz=0.0
REAL(kind=8) :: dt=0.0
REAL(kind=8) :: tol=0.1**12
REAL(kind=8) :: A=0.0
REAL(kind=8) :: Ahat=0
REAL(kind=8) :: B=0.0
REAL(kind=8) :: Du=0.0
REAL(kind=8) :: Dv=0.0
REAL(kind=8) :: chg=1.0
REAL(kind=8), DIMENSION(:), ALLOCATABLE :: kx,ky,kz
REAL(kind=8), DIMENSION(:), ALLOCATABLE :: x,y,z
REAL(kind=8), DIMENSION(:,:,:), ALLOCATABLE :: u,v
REAL(kind=8), DIMENSION(:,:,:), ALLOCATABLE :: uold,vold
REAL(kind=8) :: utemp,vtemp
COMPLEX(kind=8), DIMENSION(:,:,:), ALLOCATABLE :: uhat,vhat
COMPLEX(kind=8) :: uhatmp
REAL(kind=8) :: umean,vmean
REAL(kind=8), DIMENSION(:), ALLOCATABLE :: time
INTEGER(KIND=4), DIMENSION(1:5) :: intcomm
REAL(KIND=8), DIMENSION(1:8) :: dpcomm
REAL(kind=8) :: modescalereal
INTEGER(kind=4) :: i,j,k,n,AllocateStatus,stat
INTEGER(kind=4) :: myid,numprocs,ierr
TYPE(DECOMP_INFO) :: decomp,sp
INTEGER(kind=MPI_OFFSET_KIND) :: filesize, disp
INTEGER(kind=4) :: p_row=0, p_col=0
INTEGER(kind=4) :: start, finish, count_rate, ind, plotnum
CHARACTER*50 :: nameconfig
CHARACTER*20 :: numberfile, InputFileName
! initialisation of MPI
CALL MPI_INIT(ierr)
CALL MPI_COMM_SIZE(MPI_COMM_WORLD, numprocs, ierr)
CALL MPI_COMM_RANK(MPI_COMM_WORLD, myid, ierr)
CALL MPI_BCAST(stat,1,MPI_INTEGER,0,MPI_COMM_WORLD,ierr)
IF(myid.eq.0) THEN
InputFileName='INPUTFILE'
OPEN(unit=11,FILE=trim(InputFileName),status="OLD")
REWIND(11)
READ(11,*) intcomm(1), intcomm(2), intcomm(3), intcomm(4),intcomm(5),&
dpcomm(1), dpcomm(2), dpcomm(3), dpcomm(4), dpcomm(5), &
dpcomm(6), dpcomm(7), dpcomm(8)
CLOSE(11)
PRINT *,"NX ",intcomm(1)
PRINT *,"NY ",intcomm(2)
PRINT *,"NZ ",intcomm(3)
PRINT *,"NT ",intcomm(4)
PRINT *,"plotgap ",intcomm(5)
PRINT *,"Lx",dpcomm(1)
PRINT *,"Ly",dpcomm(2)
PRINT *,"Lz",dpcomm(3)
PRINT *,"dt",dpcomm(4)
PRINT *,"Du",dpcomm(5)
PRINT *,"Dv",dpcomm(6)
PRINT *,"F",dpcomm(7)
PRINT *,"k",dpcomm(8)
PRINT *,"Read inputfile"
END IF
CALL MPI_BCAST(dpcomm,8,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ierr)
CALL MPI_BCAST(intcomm,5,MPI_INTEGER,0,MPI_COMM_WORLD,ierr)
Nx=intcomm(1)
Ny=intcomm(2)
Nz=intcomm(3)
Nt=intcomm(4)
plotgap=intcomm(5)
Lx=dpcomm(1)
Ly=dpcomm(2)
Lz=dpcomm(3)
dt=dpcomm(4)
Du=dpcomm(5)
Dv=dpcomm(6)
A=dpcomm(7)
B=dpcomm(8)+dpcomm(7)
! initialisation of 2decomp
! do automatic domain decomposition
CALL decomp_2d_init(Nx,Ny,Nz,p_row,p_col)
! get information about domain decomposition choosen
CALL get_decomp_info(decomp) ! physical domain
CALL decomp_info_init(Nx/2+1,Ny,Nz,sp) ! spectral domain
! initialise FFT library
CALL decomp_2d_fft_init
ALLOCATE(u(decomp%xst(1):decomp%xen(1),&
decomp%xst(2):decomp%xen(2),&
decomp%xst(3):decomp%xen(3)),&
v(decomp%xst(1):decomp%xen(1),&
decomp%xst(2):decomp%xen(2),&
decomp%xst(3):decomp%xen(3)),&
uold(decomp%xst(1):decomp%xen(1),&
decomp%xst(2):decomp%xen(2),&
decomp%xst(3):decomp%xen(3)),&
vold(decomp%xst(1):decomp%xen(1),&
decomp%xst(2):decomp%xen(2),&
decomp%xst(3):decomp%xen(3)),&
uhat(sp%zst(1):sp%zen(1),&
sp%zst(2):sp%zen(2),&
sp%zst(3):sp%zen(3)),&
vhat(sp%zst(1):sp%zen(1),&
sp%zst(2):sp%zen(2),&
sp%zst(3):sp%zen(3)),&
kx(sp%zst(1):sp%zen(1)), &
ky(sp%zst(2):sp%zen(2)), &
kz(sp%zst(3):sp%zen(3)), &
x(decomp%xst(1):decomp%xen(1)),&
y(decomp%xst(2):decomp%xen(2)),&
z(decomp%xst(3):decomp%xen(3)),&
time(1:1+Nt/plotgap),&
stat=AllocateStatus)
IF (AllocateStatus .ne. 0) STOP
IF (myid.eq.0) THEN
PRINT *,'allocated space'
END IF
CALL MPI_BARRIER(MPI_COMM_WORLD,ierr)
modescalereal=1.0d0/REAL(Nx,KIND(0d0))
modescalereal=modescalereal/REAL(Ny,KIND(0d0))
modescalereal=modescalereal/REAL(Nz,KIND(0d0))
! setup fourier frequencies and grid points
DO i = 1,1+ Nx/2
IF ((i.GE.sp%zst(1)).AND.(i.LE.sp%zen(1))) THEN
kx(i)=-(REAL(i-1,kind(0d0))/Lx)**2
END IF
END DO
IF ((Nx/2 + 1 .GE.sp%zst(1)).AND.(Nx/2 + 1 .LE.sp%zen(1))) THEN
kx( Nx/2 + 1 ) = 0.0d0
END IF
DO i = Nx/2+2, Nx
IF ((i.GE.sp%zst(1)).AND.(i.LE.sp%zen(1))) THEN
Kx( i) = -(REAL(1-i+Nx,KIND(0d0))/Lx)**2
END IF
END DO
DO i=decomp%xst(1),decomp%xen(1)
x(i)=(-1.0d0 + 2.0d0*REAL(i-1,kind(0d0))/REAL(Nx,kind(0d0)))*pi*Lx
END DO
DO j = 1,1+ Ny/2
IF ((j.GE.sp%zst(2)).AND.(j.LE.sp%zen(2))) THEN
ky(j)= -(REAL(j-1,kind(0d0))/Ly)**2
END IF
END DO
IF ((Ny/2 + 1 .GE.sp%zst(2)).AND.(Ny/2 + 1 .LE.sp%zen(2))) THEN
ky( Ny/2 + 1 ) = 0.0d0
ENDIF
DO j = Ny/2+2, Ny
IF ((j.GE.sp%zst(2)).AND.(j.LE.sp%zen(2))) THEN
ky(j) = -(REAL(1-j+Ny,KIND(0d0))/Ly)**2
END IF
END DO
DO j=decomp%xst(2),decomp%xen(2)
y(j)=(-1.0d0 + 2.0d0*REAL(j-1,kind(0d0))/REAL(Ny,kind(0d0)))*pi*Ly
END DO
DO k = 1,1+ Nz/2
IF ((k.GE.sp%zst(3)).AND.(k.LE.sp%zen(3))) THEN
kz(k)= -(REAL(k-1,kind(0d0))/Lz)**2
END IF
END DO
IF ((Nz/2 + 1 .GE.sp%zst(3)).AND.(Nz/2 + 1 .LE.sp%zen(3))) THEN
kz( Nz/2 + 1 ) = 0.0d0
ENDIF
DO k = Nz/2+2, Nz
IF ((k.GE.sp%zst(3)).AND.(k.LE.sp%zen(3))) THEN
kz(k) = -(REAL(1-k+Nz,KIND(0d0))/Lz)**2
END IF
END DO
DO k=decomp%xst(3),decomp%xen(3)
z(k)=(-1.0d0 + 2.0d0*REAL(k-1,kind(0d0))/REAL(Nz,kind(0d0)))*pi*Lz
END DO
IF (myid.eq.0) THEN
PRINT *,'Setup grid and fourier frequencies'
END IF
!compute Ahat
Ahat=A*Nx*Ny*Nz
!initial condition
CALL MPI_BARRIER(MPI_COMM_WORLD,ierr)
DO k=decomp%xst(3),decomp%xen(3)
DO j=decomp%xst(2),decomp%xen(2)
DO i=decomp%xst(1),decomp%xen(1)
u(i,j,k)=1+(exp(-1*(x(i)**2+y(j)**2+z(k)**2)-1))
v(i,j,k)=0+(exp(-1*(x(i)**2+y(j)**2+z(k)**2)-1))
END DO
END DO
END DO
! write out using 2DECOMP&FFT MPI-IO routines
nameconfig='./data/u'
plotnum=0
WRITE(numberfile,'(i0)') 10000000+plotnum
ind=index(nameconfig,' ') -1
nameconfig=nameconfig(1:ind)//numberfile
ind=index(nameconfig,' ') -1
nameconfig=nameconfig(1:ind)//'.datbin'
CALL decomp_2d_write_one(1,u,nameconfig)
!same for v
nameconfig='./data/v'
plotnum=0
WRITE(numberfile,'(i0)') 10000000+plotnum
ind=index(nameconfig,' ') -1
nameconfig=nameconfig(1:ind)//numberfile
ind=index(nameconfig,' ') -1
nameconfig=nameconfig(1:ind)//'.datbin'
CALL decomp_2d_write_one(1,v,nameconfig)
IF (myid.eq.0) THEN
PRINT *,'Got initial data, starting timestepping'
END IF
CALL system_clock(start,count_rate)
time(1)=0
DO n=1,Nt
!use fixed point iteration to solve nonlinear system in real space
DO k=decomp%xst(3),decomp%xen(3)
DO j=decomp%xst(2),decomp%xen(2)
DO i=decomp%xst(1),decomp%xen(1)
uold(i,j,k)=u(i,j,k)
vold(i,j,k)=v(i,j,k)
END DO
END DO
END DO
DO k=decomp%xst(3),decomp%xen(3)
DO j=decomp%xst(2),decomp%xen(2)
DO i=decomp%xst(1),decomp%xen(1)
chg=1
DO WHILE (chg>tol)
utemp=u(i,j,k)
vtemp=v(i,j,k)
umean=0.5*(u(i,j,k)+uold(i,j,k))
vmean=0.5*(v(i,j,k)+vold(i,j,k))
u(i,j,k)=uold(i,j,k)+0.5*dt*(-umean*vmean**2)
v(i,j,k)=vold(i,j,k)+0.5*dt*(umean*vmean**2)
utemp=u(i,j,k)-utemp
vtemp=v(i,j,k)-vtemp
chg=abs(utemp)+abs(vtemp)
END DO
END DO
END DO
END DO
! solve linear part exactly in Fourier space
CALL decomp_2d_fft_3d(u,uhat)
CALL decomp_2d_fft_3d(v,vhat)
IF (myid.eq.0) THEN
uhatmp=uhat(1,1,1)
END IF
DO k=sp%zst(3),sp%zen(3)
DO j=sp%zst(2),sp%zen(2)
DO i=sp%zst(1),sp%zen(1)
uhat(i,j,k)=exp(dt*(-A+Du*(kx(i)+ky(j)+kz(k))))*&
uhat(i,j,k)
vhat(i,j,k)=exp(dt*(-B+Dv*(kx(i)+ky(j)+kz(k))))*&
vhat(i,j,k)
END DO
END DO
END DO
IF (myid.eq.0) THEN
uhat(1,1,1)=exp(dt*(-A+Du*(kx(1)+ky(1)+kz(1))))*&
(uhatmp-Ahat/(A+Du*(kx(1)+ky(1)+kz(1))))+&
(Ahat/(A+Du*(kx(1)+ky(1)+kz(1))))
END IF
!dont forget to scale
CALL decomp_2d_fft_3d(uhat,u)
CALL decomp_2d_fft_3d(vhat,v)
DO k=decomp%xst(3),decomp%xen(3)
DO j=decomp%xst(2),decomp%xen(2)
DO i=decomp%xst(1),decomp%xen(1)
u(i,j,k)=u(i,j,k)*modescalereal
v(i,j,k)=v(i,j,k)*modescalereal
END DO
END DO
END DO
!use fixed point iteration to solve nonlinear system in real space
DO k=decomp%xst(3),decomp%xen(3)
DO j=decomp%xst(2),decomp%xen(2)
DO i=decomp%xst(1),decomp%xen(1)
uold(i,j,k)=u(i,j,k)
vold(i,j,k)=v(i,j,k)
END DO
END DO
END DO
DO k=decomp%xst(3),decomp%xen(3)
DO j=decomp%xst(2),decomp%xen(2)
DO i=decomp%xst(1),decomp%xen(1)
chg=1
DO WHILE (chg>tol)
utemp=u(i,j,k)
vtemp=v(i,j,k)
umean=0.5*(u(i,j,k)+uold(i,j,k))
vmean=0.5*(v(i,j,k)+vold(i,j,k))
u(i,j,k)=uold(i,j,k)+0.5*dt*(-umean*vmean**2)
v(i,j,k)=vold(i,j,k)+0.5*dt*(umean*vmean**2)
utemp=u(i,j,k)-utemp
vtemp=v(i,j,k)-vtemp
chg=abs(utemp)+abs(vtemp)
END DO
END DO
END DO
END DO
!write data
IF (mod(n,plotgap)==0) THEN
time(1+n/plotgap)=n*dt
IF (myid.eq.0) THEN
PRINT *,'time',n*dt
END IF
nameconfig='./data/u'
plotnum=plotnum+1
WRITE(numberfile,'(i0)') 10000000+plotnum
ind=index(nameconfig,' ') -1
nameconfig=nameconfig(1:ind)//numberfile
ind=index(nameconfig,' ') -1
nameconfig=nameconfig(1:ind)//'.datbin'
!write out using 2DECOMP&FFT MPI-IO routines
CALL decomp_2d_write_one(1,u,nameconfig)
nameconfig='./data/v'
WRITE(numberfile,'(i0)') 10000000+plotnum
ind=index(nameconfig,' ') -1
nameconfig=nameconfig(1:ind)//numberfile
ind=index(nameconfig,' ') -1
nameconfig=nameconfig(1:ind)//'.datbin'
!write out using 2DECOMP&FFT MPI-IO routines
CALL decomp_2d_write_one(1,v,nameconfig)
END IF
END DO
IF (myid.eq.0) THEN
PRINT *,'Finished time stepping'
END IF
CALL system_clock(finish,count_rate)
IF (myid.eq.0) THEN
PRINT*,'Program took ',REAL(finish-start)/REAL(count_rate),'for execution'
END IF
IF (myid.eq.0) THEN
! Save times at which output was made in text format
nameconfig = './data/tdata.dat'
OPEN(unit=11,FILE=nameconfig,status="UNKNOWN")
REWIND(11)
DO j=1,1+Nt/plotgap
WRITE(11,*) time(j)
END DO
CLOSE(11)
! Save x grid points in text format
nameconfig = './data/xcoord.dat'
OPEN(unit=11,FILE=nameconfig,status="UNKNOWN")
REWIND(11)
DO i=1,Nx
WRITE(11,*) x(i)
END DO
CLOSE(11)
! Save y grid points in text format
nameconfig = './data/ycoord.dat'
OPEN(unit=11,FILE=nameconfig,status="UNKNOWN")
REWIND(11)
DO j=1,Ny
WRITE(11,*) y(j)
END DO
CLOSE(11)
! Save z grid points in text format
nameconfig = './data/zcoord.dat'
OPEN(unit=11,FILE=nameconfig,status="UNKNOWN")
REWIND(11)
DO k=1,Nz
WRITE(11,*) z(k)
END DO
CLOSE(11)
PRINT *,'Saved data'
END IF
! clean up
CALL decomp_2d_fft_finalize
CALL decomp_2d_finalize
DEALLOCATE(u,v,uold,vold,uhat,vhat,&
kx,ky,kz,x,y,z,&
time,stat=AllocateStatus)
IF (AllocateStatus .ne. 0) STOP
IF (myid.eq.0) THEN
PRINT *,'Program execution complete'
END IF
CALL MPI_FINALIZE(ierr)
END PROGRAM main
在 OpenCL[3] 中,您编写一个“内核”来并行执行密集计算。您可以选择在 CPU、GPU 或其他协处理器上运行内核。内核在运行时针对您选择的设备进行编译。
要运行以下代码,您需要一个有效的 OpenCL 1.2 框架,例如 AMD APP SDK [4](它也可以在非 AMD CPU 上运行)。您还需要 clfft [5],一个针对 OpenCL 的 FFT 实现。
|
(
) |
#include "clFFT.h"
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <sys/time.h>
#include "gs_functions.c"
#if defined(cl_amd_fp64) || defined(cl_khr_fp64)
#if defined(cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64 : enable
#elif defined(cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#endif
// function declarations/definitions using double precision floating-point arithmetic
#endif
int main(void) {
//time meassuring
struct timeval tvs;
//variables
int Nx=1024;
int Ny=1024;
int plotnum=0;
int Tmax=2;
int plottime=0;
int plotgap=1;
double Lx=1.0;
double Ly=1.0;
double dt=0.0;
double A=0.0;
double B=0.0;
double Du=0.0;
double Dv=0.0;
//splitting coefficients
double a=0.5;
double b=0.5;
double c=1.0;
//loop counters
int i=0;
int j=0;
int n=0;
double*umax=NULL;
double*vmax=NULL;
parainit(&Nx,&Ny,&Tmax,&plotgap,&Lx,&Ly,&dt,&Du,&Dv,&A,&B);
plottime=plotgap;
vmax=(double*)malloc((Tmax/plotgap+1)*sizeof(double));
umax=(double*)malloc((Tmax/plotgap+1)*sizeof(double));
//openCL variables
cl_platform_id *platform_id = NULL;
cl_kernel frequencies = NULL, initialdata = NULL, linearpart=NULL;
cl_kernel nonlinearpart_a=NULL, nonlinearpart_b=NULL;
cl_int ret;
cl_uint num_platforms;
// Detect how many platforms there are.
ret = clGetPlatformIDs(0, NULL, &num_platforms);
// Allocate enough space for the number of platforms.
platform_id = (cl_platform_id*) malloc(num_platforms*sizeof(cl_platform_id));
// Store the platforms
ret = clGetPlatformIDs(num_platforms, platform_id, NULL);
printf("Found %d platform(s)!\n",num_platforms);
cl_uint *num_devices;
num_devices=(cl_uint*) malloc(num_platforms*sizeof(cl_uint));
cl_device_id **device_id = NULL;
device_id =(cl_device_id**) malloc(num_platforms*sizeof(cl_device_id*));
// Detect number of devices in the platforms
for(i=0;i<num_platforms;i++){
char buf[65536];
size_t size;
ret = clGetPlatformInfo(platform_id[i],CL_PLATFORM_VERSION,sizeof(buf),buf,&size);
printf("%s\n",buf);
ret = clGetDeviceIDs(platform_id[i],CL_DEVICE_TYPE_ALL,0,NULL,num_devices);
printf("Found %d device(s) on platform %d!\n", num_devices[i],i);
ret = clGetPlatformInfo(platform_id[i],CL_PLATFORM_NAME,sizeof(buf),buf,&size);
printf("%s ",buf);
// Store numDevices from platform
device_id[i]=(cl_device_id*) malloc(num_devices[i]*sizeof(device_id));
ret = clGetDeviceIDs(platform_id[i],CL_DEVICE_TYPE_ALL,num_devices[i],device_id[i],NULL);
for(j=0;j<num_devices[i];j++){
ret = clGetDeviceInfo(device_id[i][j],CL_DEVICE_NAME,sizeof(buf),buf,&size);
printf("%s (%d,%d)\n",buf,i,j);
}
}
//create context and command_queue
cl_context context = NULL;
cl_command_queue command_queue = NULL;
//Which platform and device do i choose?
int chooseplatform=0;
int choosedevice=0;
printf("Choose platform %d and device %d!\n",chooseplatform,choosedevice);
context = clCreateContext( NULL, num_devices[chooseplatform], device_id[chooseplatform], NULL, NULL, &ret);
if(ret!=CL_SUCCESS){printf("createContext ret:%d\n",ret); exit(1); }
command_queue = clCreateCommandQueue(context, device_id[chooseplatform][choosedevice], 0, &ret);
if(ret!=CL_SUCCESS){printf("createCommandQueue ret:%d\n",ret); exit(1); }
//OpenCL arrays
cl_mem cl_u = NULL,cl_v = NULL;
cl_mem cl_uhat = NULL, cl_vhat = NULL;
cl_mem cl_kx = NULL, cl_ky = NULL;
//FFT
clfftPlanHandle planHandle;
cl_mem tmpBuffer = NULL;
fftinit(&planHandle,&context, &command_queue, &tmpBuffer, Nx, Ny);
//allocate gpu memory/
cl_u=clCreateBuffer(context, CL_MEM_READ_WRITE, 2*Nx* Ny* sizeof(double), NULL, &ret);
cl_v=clCreateBuffer(context, CL_MEM_READ_WRITE, 2*Nx* Ny* sizeof(double), NULL, &ret);
cl_uhat=clCreateBuffer(context, CL_MEM_READ_WRITE, 2*Nx * Ny* sizeof(double), NULL, &ret);
cl_vhat=clCreateBuffer(context, CL_MEM_READ_WRITE, 2*Nx * Ny* sizeof(double), NULL, &ret);
cl_kx = clCreateBuffer(context, CL_MEM_READ_WRITE, Nx * sizeof(double), NULL, &ret);
cl_ky = clCreateBuffer(context, CL_MEM_READ_WRITE, Ny * sizeof(double), NULL, &ret);
printf("allocated space\n");
//load the kernels
loadKernel(&frequencies,&context,&device_id[chooseplatform][choosedevice],"frequencies");
loadKernel(&initialdata,&context,&device_id[chooseplatform][choosedevice],"initialdata");
loadKernel(&linearpart,&context,&device_id[chooseplatform][choosedevice],"linearpart");
loadKernel(&nonlinearpart_a,&context,&device_id[chooseplatform][choosedevice],"nonlinearpart_a");
loadKernel(&nonlinearpart_b,&context,&device_id[chooseplatform][choosedevice],"nonlinearpart_b");
size_t global_work_size[1] = {Nx*Ny};
size_t global_work_size_X[1] = {Nx};
size_t global_work_size_Y[1] = {Ny};
//frequencies
ret = clSetKernelArg(frequencies, 0, sizeof(cl_mem),(void *)&cl_kx);
ret = clSetKernelArg(frequencies, 1, sizeof(double),(void* )&Lx);
ret = clSetKernelArg(frequencies, 2, sizeof(int),(void* )&Nx);
ret = clEnqueueNDRangeKernel(command_queue, frequencies, 1, NULL, global_work_size_X, NULL, 0, NULL, NULL);
ret = clFinish(command_queue);
ret = clSetKernelArg(frequencies, 0, sizeof(cl_mem),(void *)&cl_ky);
ret = clSetKernelArg(frequencies, 1, sizeof(double),(void* )&Ly);
ret = clSetKernelArg(frequencies, 2, sizeof(int),(void* )&Ny);
ret = clEnqueueNDRangeKernel(command_queue, frequencies, 1, NULL, global_work_size_Y, NULL, 0, NULL, NULL);
ret = clFinish(command_queue);
//printCL(&cl_kx,&command_queue,Nx,1);
//printCL(&cl_ky,&command_queue,1,Ny);
//inintial data
ret = clSetKernelArg(initialdata, 0, sizeof(cl_mem),(void *)&cl_u);
ret = clSetKernelArg(initialdata, 1, sizeof(cl_mem),(void* )&cl_v);
ret = clSetKernelArg(initialdata, 2, sizeof(int),(void* )&Nx);
ret = clSetKernelArg(initialdata, 3, sizeof(int),(void* )&Ny);
ret = clSetKernelArg(initialdata, 4, sizeof(double),(void* )&Lx);
ret = clSetKernelArg(initialdata, 5, sizeof(double),(void* )&Ly);
ret = clEnqueueNDRangeKernel(command_queue, initialdata, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
ret = clFinish(command_queue);
//make output
writedata_C(&cl_u, &command_queue,Nx,Ny,plotnum,"u");
writedata_C(&cl_v, &command_queue,Nx,Ny,plotnum,"v");
umax[plotnum]=writeimage(&cl_u, &command_queue,Nx,Ny,plotnum,"u");
vmax[plotnum]=writeimage(&cl_v, &command_queue,Nx,Ny,plotnum,"v");
printf("Got initial data, starting timestepping\n");
mtime_s(&tvs);
for(n=0;n<=Tmax;n++){
//nonlinearpart_a
ret = clSetKernelArg(nonlinearpart_a, 0, sizeof(cl_mem),(void *)&cl_u);
ret = clSetKernelArg(nonlinearpart_a, 1, sizeof(cl_mem),(void* )&cl_v);
ret = clSetKernelArg(nonlinearpart_a, 2, sizeof(double),(void* )&A);
ret = clSetKernelArg(nonlinearpart_a, 3, sizeof(double),(void* )&dt);
ret = clSetKernelArg(nonlinearpart_a, 4, sizeof(double),(void* )&a);
ret = clEnqueueNDRangeKernel(command_queue, nonlinearpart_a, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
ret = clFinish(command_queue);
//nonlinearpart_b
ret = clSetKernelArg(nonlinearpart_b, 0, sizeof(cl_mem),(void *)&cl_u);
ret = clSetKernelArg(nonlinearpart_b, 1, sizeof(cl_mem),(void* )&cl_v);
ret = clSetKernelArg(nonlinearpart_b, 2, sizeof(double),(void* )&A);
ret = clSetKernelArg(nonlinearpart_b, 3, sizeof(double),(void* )&dt);
ret = clSetKernelArg(nonlinearpart_b, 4, sizeof(double),(void* )&b);
ret = clEnqueueNDRangeKernel(command_queue, nonlinearpart_b, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
ret = clFinish(command_queue);
//linear
fft2dfor(&cl_u, &cl_uhat,&planHandle,&command_queue,&tmpBuffer);
fft2dfor(&cl_v, &cl_vhat,&planHandle,&command_queue,&tmpBuffer);
//printf("A%f,B%f\n",A,B);
ret = clSetKernelArg(linearpart, 0, sizeof(cl_mem),(void *)&cl_uhat);
ret = clSetKernelArg(linearpart, 1, sizeof(cl_mem),(void *)&cl_vhat);
ret = clSetKernelArg(linearpart, 2, sizeof(cl_mem),(void* )&cl_kx);
ret = clSetKernelArg(linearpart, 3, sizeof(cl_mem),(void* )&cl_ky);
ret = clSetKernelArg(linearpart, 4, sizeof(double),(void* )&Du);
ret = clSetKernelArg(linearpart, 5, sizeof(double),(void* )&Dv);
ret = clSetKernelArg(linearpart, 6, sizeof(double),(void* )&A);
ret = clSetKernelArg(linearpart, 7, sizeof(double),(void* )&B);
ret = clSetKernelArg(linearpart, 8, sizeof(double),(void* )&dt);
ret = clSetKernelArg(linearpart, 9, sizeof(double),(void* )&c);
ret = clSetKernelArg(linearpart, 10, sizeof(int),(void* )&Nx);
ret = clSetKernelArg(linearpart, 11, sizeof(int),(void* )&Ny);
ret = clEnqueueNDRangeKernel(command_queue, linearpart, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
ret = clFinish(command_queue);
fft2dback(&cl_u, &cl_uhat,&planHandle,&command_queue,&tmpBuffer);
fft2dback(&cl_v, &cl_vhat,&planHandle,&command_queue,&tmpBuffer);
//nonlinearpart_b
ret = clSetKernelArg(nonlinearpart_b, 0, sizeof(cl_mem),(void *)&cl_u);
ret = clSetKernelArg(nonlinearpart_b, 1, sizeof(cl_mem),(void* )&cl_v);
ret = clSetKernelArg(nonlinearpart_b, 2, sizeof(double),(void* )&A);
ret = clSetKernelArg(nonlinearpart_b, 3, sizeof(double),(void* )&dt);
ret = clSetKernelArg(nonlinearpart_b, 4, sizeof(double),(void* )&b);
ret = clEnqueueNDRangeKernel(command_queue, nonlinearpart_b, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
ret = clFinish(command_queue);
//nonlinearpart_a
ret = clSetKernelArg(nonlinearpart_a, 0, sizeof(cl_mem),(void *)&cl_u);
ret = clSetKernelArg(nonlinearpart_a, 1, sizeof(cl_mem),(void* )&cl_v);
ret = clSetKernelArg(nonlinearpart_a, 2, sizeof(double),(void* )&A);
ret = clSetKernelArg(nonlinearpart_a, 3, sizeof(double),(void* )&dt);
ret = clSetKernelArg(nonlinearpart_a, 4, sizeof(double),(void* )&a);
ret = clEnqueueNDRangeKernel(command_queue, nonlinearpart_a, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
ret = clFinish(command_queue);
// done
if(n==plottime){
printf("time:%f, step:%d,%d,umax:%f,vmax:%f\n",n*dt,n,plotnum,umax[plotnum],vmax[plotnum]);
plottime=plottime+plotgap;
plotnum=plotnum+1;
writedata_C(&cl_u, &command_queue,Nx,Ny,plotnum,"u");
writedata_C(&cl_v, &command_queue,Nx,Ny,plotnum,"v");
umax[plotnum]=writeimage(&cl_u, &command_queue,Nx,Ny,plotnum,"u");
vmax[plotnum]=writeimage(&cl_v, &command_queue,Nx,Ny,plotnum,"v");
}
}//end timestepping
printf("Finished time stepping\n");
mtime_e(&tvs,"Programm took:");
writearray(umax,(Tmax/plotgap)+1,"u");
writearray(vmax,(Tmax/plotgap)+1,"v");
free(umax);
free(vmax);
clReleaseMemObject(cl_u);
clReleaseMemObject(cl_v);
clReleaseMemObject(cl_uhat);
clReleaseMemObject(cl_vhat);
clReleaseMemObject(cl_kx);
clReleaseMemObject(cl_ky);
ret = clReleaseKernel(initialdata);
ret = clReleaseKernel(frequencies);
ret = clReleaseKernel(linearpart);
ret = clReleaseKernel(nonlinearpart_a);
ret = clReleaseKernel(nonlinearpart_b);
fftdestroy(&planHandle, &tmpBuffer);
ret = clReleaseCommandQueue(command_queue);
ret = clReleaseContext(context);
for(i=0;i<num_platforms;i++){free(device_id[i]);}
free(device_id);
free(platform_id);
free(num_devices);
printf("Program execution complete\n");
return 0;
}
您可以将“chooseplatform”和“choosedevice”更改为要计算的设备。
|
(
) |
//
//
//
//This file contains only functions for main_gs.c
//
//
#include <CL/cl.h>
#include <sys/time.h>
#include <string.h>
//read the INPUTFILE
void parainit(int * Nx, int * Ny, int * Tmax, int * plotgap, double * Lx, double * Ly, double * dt, double * Du, double * Dv, double * A, double *B ){
int intcomm[4];
double dpcomm[7];
char InputFileName[]="./INPUTFILE";
FILE*fp;
fp=fopen(InputFileName,"r");
if(!fp) {fprintf(stderr, "Failed to load IPUTFILE.\n");exit(1);}
int ierr=fscanf(fp, "%d %d %d %d %lf %lf %lf %lf %lf %lf %lf", &intcomm[0],&intcomm[1],&intcomm[2],&intcomm[3],&dpcomm[0],&dpcomm[1],&dpcomm[2],&dpcomm[3],&dpcomm[4],&dpcomm[5],&dpcomm[6]);
if(ierr!=11){fprintf(stderr, "INPUTFILE corrupted:%d\n",ierr);exit(1);}
fclose(fp);
printf("NX %d\nNY %d\nTmax %d\nplotgap %d\n",intcomm[0],intcomm[1],intcomm[2],intcomm[3]);
printf("Lx %lf\nLy %lf\ndt %lf\nDu %lf\nDv %lf\nF %lf\nk %lf\n",dpcomm[0],dpcomm[1],dpcomm[2],dpcomm[3],dpcomm[4],dpcomm[5],dpcomm[6]);
*Nx=intcomm[0];
*Ny=intcomm[1];
*Tmax=intcomm[2];
*plotgap=intcomm[3];
*Lx=dpcomm[0];
*Ly=dpcomm[1];
*dt=dpcomm[2];
*Du=dpcomm[3];
*Dv=dpcomm[4];
*A=dpcomm[5];
*B=dpcomm[5]+dpcomm[6];
printf("Read Inputfile\n");
};
//loads a kernel from a file
#define MAX_SOURCE_SIZE 8192
void loadKernel(cl_kernel *kernel,cl_context *context, cl_device_id *device_id, char*name){
cl_program p_kernel;
cl_int ret=0;
size_t source_size;
char *source_str;
char nameconfig[100];
int i=0;
source_str = (char *)malloc(MAX_SOURCE_SIZE*sizeof(char));
for(i=0;i<MAX_SOURCE_SIZE;i++){source_str[i]='\0';}
FILE* fp;
strcpy(nameconfig,"./");
strcat(nameconfig,name);
strcat(nameconfig,".cl");
fp = fopen(nameconfig, "r");
if (!fp) {fprintf(stderr, "Failed to load kernel.\n"); exit(1); }
source_size = fread( source_str, sizeof(char), MAX_SOURCE_SIZE, fp );
fclose( fp );
p_kernel = clCreateProgramWithSource(*context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret);
if(ret!=CL_SUCCESS){printf("createProgram ret:%d\n",ret);exit(1); }
ret = clBuildProgram(p_kernel, 1, &*device_id, NULL, NULL, NULL);
if(ret!=CL_SUCCESS){printf("buildProgram ret:%d\n",ret); exit(1); }
*kernel = clCreateKernel(p_kernel, name, &ret);
if(ret!=CL_SUCCESS){printf("createKernel ret:%d\n",ret);exit(1); }
ret = clReleaseProgram(p_kernel);
if(ret!=CL_SUCCESS){printf("releaseProgram ret:%d\n",ret);exit(1); }
printf("got kernel %s\n",name);
free(source_str);
};
//displays an array on gpu memory (debug)
void printCL(cl_mem* cl_u,cl_command_queue* command_queue, int Nx,int Ny){
double* u;
int i=0;
int j=0;
cl_int ret=0;
u=(double*)malloc(Nx*Ny*sizeof(double));
ret = clEnqueueReadBuffer(*command_queue, *cl_u, CL_TRUE, 0, Nx*Ny*sizeof(double), u, 0, NULL, NULL);
ret = clFinish(*command_queue); if(ret!=CL_SUCCESS){printf("failed");}
for(i=0;i<Nx;i++){
for(j=0;j<Ny;j++){
printf("%f ",u[i+Nx*j]);
}
printf("\n");
}
printf("\n");
free(u);
};
//displays an real part of complex array on gpu memory (debug)
void printCL_C(cl_mem* cl_u,cl_command_queue* command_queue, int Nx,int Ny){
double* u;
int i=0;
int j=0;
cl_int ret=0;
u=(double*)malloc(2*Nx*Ny*sizeof(double));
ret = clEnqueueReadBuffer(*command_queue, *cl_u, CL_TRUE, 0, 2*Nx*Ny*sizeof(double), u, 0, NULL, NULL);
ret = clFinish(*command_queue);
if(ret!=CL_SUCCESS){printf("failed");}
for(i=0;i<Nx;i++){
for(j=0;j<Ny;j++){
printf("%f ",u[2*i+Nx*2*j]);
}
printf("\n");
}
printf("\n");
free(u);
};
//make plans for FFT
void fftinit(clfftPlanHandle *planHandle, cl_context* context, cl_command_queue* command_queue, cl_mem* tmpBuffer, int Nx,int Ny){
clfftDim dim = CLFFT_2D;
size_t clLength[2] = {Nx,Ny};
cl_int ret=0;
// Setup clFFT.
clfftSetupData fftSetup;
ret = clfftInitSetupData(&fftSetup);
if(ret!=CL_SUCCESS){printf("clFFT init ret:%d\n",ret);exit(1); }
ret = clfftSetup(&fftSetup);
if(ret!=CL_SUCCESS){printf("clFFT Setup ret:%d\n",ret);exit(1); }
// Create a default plan for a complex FFT.
ret = clfftCreateDefaultPlan(&*planHandle, *context, dim, clLength);
if(ret!=CL_SUCCESS){printf("clFFT Plan ret:%d\n",ret);exit(1); }
// Set plan parameters.
ret = clfftSetPlanPrecision(*planHandle, CLFFT_DOUBLE);
if(ret!=CL_SUCCESS){printf("clFFT Precision ret:%d\n",ret);exit(1); }
//ret = clfftSetPlanBatchSize(*planHandle, (size_t) Ny );
//if(ret!=CL_SUCCESS){printf("clFFT Batch ret:%d\n",ret);exit(1); }
ret = clfftSetLayout(*planHandle, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED);
if(ret!=CL_SUCCESS){printf("clFFT Layout ret:%d\n",ret);exit(1); }
ret = clfftSetResultLocation(*planHandle, CLFFT_OUTOFPLACE);
if(ret!=CL_SUCCESS){printf("clFFT Place ret:%d\n",ret);exit(1); }
// Bake the plan.
ret = clfftBakePlan(*planHandle, 1, &*command_queue, NULL, NULL);
if(ret!=CL_SUCCESS){printf("clFFT Bake ret:%d\n",ret);exit(1); }
// Create temporary buffer.
// Size of temp buffer.
size_t tmpBufferSize = 0;
ret = clfftGetTmpBufSize(*planHandle, &tmpBufferSize);
if ((ret == CL_SUCCESS) && (tmpBufferSize > 0)) {
*tmpBuffer = clCreateBuffer(*context, CL_MEM_READ_WRITE, tmpBufferSize, NULL, &ret);
if (ret != CL_SUCCESS){printf("Error with tmpBuffer clCreateBuffer\n");exit(1);}
}
};
//destroy plans
void fftdestroy(clfftPlanHandle *planHandle,cl_mem* tmpBuffer){
cl_int ret=0;
clReleaseMemObject(*tmpBuffer);
ret = clfftDestroyPlan(&*planHandle);
if(ret!=0){printf("Error while destroying fft");exit(1);}
clfftTeardown();
};
//fft2dfoward
void fft2dfor(cl_mem *cl_u, cl_mem *cl_uhat, clfftPlanHandle *planHandle, cl_command_queue* command_queue, cl_mem* tmpBuffer){
int ret=0;
ret = clfftEnqueueTransform(*planHandle, CLFFT_FORWARD, 1, command_queue, 0, NULL, NULL,&*cl_u, &*cl_uhat, *tmpBuffer);
if (ret != CL_SUCCESS){printf("FFT failedA%d",ret);}
ret = clFinish(*command_queue);
if (ret != CL_SUCCESS){printf("FFT failedB%d",ret);}
};
//fft2dback
void fft2dback(cl_mem *cl_u, cl_mem *cl_uhat, clfftPlanHandle *planHandle, cl_command_queue* command_queue, cl_mem* tmpBuffer){
int ret=0;
ret = clfftEnqueueTransform(*planHandle, CLFFT_BACKWARD, 1, command_queue, 0, NULL, NULL,&*cl_uhat, &*cl_u, *tmpBuffer);
if (ret != CL_SUCCESS){printf("FFT failedC%d",ret);}
ret = clFinish(*command_queue);
if (ret != CL_SUCCESS){printf("FFT failedD%d",ret);}
};
//writes an image to disk and returns the maximum of cl_u
double writeimage(cl_mem* cl_u, cl_command_queue *command_queue, int Nx,int Ny, int plotnum, char* prefix){
int i=0;
cl_int ret=0;
int header=54;
double* u;
u=(double*)malloc(2*Nx*Ny*sizeof(double));
ret = clEnqueueReadBuffer(*command_queue, *cl_u, CL_TRUE, 0, 2*Nx*Ny * sizeof(double), u, 0, NULL, NULL);
ret = clFinish(*command_queue);
if(ret!=0){printf("Error hahah");}
double max=0.0;
for(i=0;i<Nx*Ny;i++){
if(u[2*i]>max){max=u[2*i];}
}
unsigned char*picture=(unsigned char*)malloc((3*Nx*Ny+header)*sizeof(unsigned char));
for(i=0;i<Nx*Ny;i++){
picture[3*i+header+0]=(unsigned char)(255*u[2*i]/max);
picture[3*i+header+1]=(unsigned char)(255*u[2*i]/max);
picture[3*i+header+2]=(unsigned char)(255*u[2*i]/max);
}
//header for bmp file
int w=Ny;
int h=Nx;
int padSize=(4-w%4)%4;
int filesize=header + 3*h*w+h*padSize;
unsigned char bmppad[3] = {0,0,0}; //padding
picture[ 0]='B';
picture[ 1]='M';
picture[ 2] = (unsigned char)(filesize );
picture[ 3] = (unsigned char)(filesize>> 8);
picture[ 4] = (unsigned char)(filesize>>16);
picture[ 5] = (unsigned char)(filesize>>24);
picture[ 6] = 0;
picture[ 7] = 0;
picture[ 8] = 0;
picture[ 9] = 0;
picture[10] = 54;
picture[11] = 0;
picture[12] = 0;
picture[13] = 0;
picture[14] = 40;
picture[15] = 0;
picture[16] = 0;
picture[17] = 0;//3
picture[18] = (unsigned char)( w );
picture[19] = (unsigned char)( w>> 8);
picture[20] = (unsigned char)( w>>16);
picture[21] = (unsigned char)( w>>24);
picture[22] = (unsigned char)( h );
picture[23] = (unsigned char)( h>> 8);
picture[24] = (unsigned char)( h>>16);
picture[25] = (unsigned char)( h>>24);
picture[26] = 1;
picture[27] = 0;
picture[28] = 24;
picture[29] = 0;
for(i=30;i<54;i++){
picture[i]=0;
}
FILE*fp;
//file name
char tmp_str[10];
char nameconfig[100];
strcpy(nameconfig,"./data/");
strcat(nameconfig,prefix);
sprintf(tmp_str,"%d",10000000+plotnum);
strcat(nameconfig,tmp_str);
strcat(nameconfig,".bmp");
fp=fopen(nameconfig,"wb");
if (!fp) {fprintf(stderr, "Failed to write data.\n"); exit(1); }
for(i=0;i<header;i++){fwrite(&picture[i], sizeof(unsigned char), 1, fp);}
for(i=0;i<h;i++){
fwrite(picture+(w*(h-i-1)*3)+header,3* sizeof(unsigned char), w, fp);
fwrite(bmppad,sizeof(unsigned char),(4-(w*3)%4)%4,fp);
}
fclose( fp );
free(picture);
free(u);
return max;
};
//writes the array to disk (debug)
void writedata(cl_mem* cl_u, cl_command_queue *command_queue, int Nx,int Ny, int plotnum,char* prefix){
int i=0;
cl_int ret=0;
double* u;
u=(double*)malloc(Nx*Ny*sizeof(double));
ret = clEnqueueReadBuffer(*command_queue, *cl_u, CL_TRUE, 0, Nx*Ny * sizeof(double), u, 0, NULL, NULL);
ret = clFinish(*command_queue);
if(ret!=0){printf("Error hahah");}
FILE*fp;
//file name
char tmp_str[10];
char nameconfig[100];
strcpy(nameconfig,"./data/");
strcat(nameconfig,prefix);
sprintf(tmp_str,"%d",10000000+plotnum);
strcat(nameconfig,tmp_str);
strcat(nameconfig,".datbin");
fp=fopen(nameconfig,"wb");
if (!fp) {fprintf(stderr, "Failed to write data.\n"); exit(1); }
for(i=0;i<Nx;i++){fwrite(u+i*Ny, sizeof(double), Ny, fp);}
fclose( fp );
free(u);
};
//writes the real part of complex array to disk
void writedata_C(cl_mem* cl_u, cl_command_queue *command_queue, int Nx,int Ny, int plotnum,char* prefix){
int i=0;
cl_int ret=0;
double* u;
u=(double*)malloc(2*Nx*Ny*sizeof(double));
ret = clEnqueueReadBuffer(*command_queue, *cl_u, CL_TRUE, 0, 2*Nx*Ny * sizeof(double), u, 0, NULL, NULL);
ret = clFinish(*command_queue);
if(ret!=0){printf("Error hahah");}
for(i=0;i<Nx*Ny;i++){u[i]=u[2*i];}
FILE*fp;
//file name
char tmp_str[10];
char nameconfig[100];
strcpy(nameconfig,"./data/");
strcat(nameconfig,prefix);
sprintf(tmp_str,"%d",10000000+plotnum);
strcat(nameconfig,tmp_str);
strcat(nameconfig,".datbin");
fp=fopen(nameconfig,"wb");
if (!fp) {fprintf(stderr, "Failed to write data.\n"); exit(1); }
for(i=0;i<Nx;i++){fwrite(u+i*Ny, sizeof(double), Ny, fp);}
fclose( fp );
free(u);
};
//loades the data from disk (debug)
void loaddata(cl_mem* cl_u, cl_command_queue *command_queue, int Nx,int Ny, char*name){
int i=0;
int numread=0;
cl_int ret=0;
double* u;
u=(double*)malloc(Nx*Ny*sizeof(double));
FILE*fp;
fp=fopen(name,"rb");
if (!fp) {fprintf(stderr, "Failed to open file.\n"); exit(1); }
for(i=0;i<Nx;i++){
numread=fread(u+i*Ny, sizeof(double), Ny, fp);
if (numread!=Ny) {fprintf(stderr, "Failed to read file.\n"); exit(1); }
}
fclose( fp );
ret = clEnqueueWriteBuffer(*command_queue, *cl_u, CL_TRUE, 0, Nx*Ny * sizeof(double), u, 0, NULL, NULL);
ret = clFinish(*command_queue);
if(ret!=0){printf("Error hahah");}
free(u);
};
//writes an array to disc ASCII
void writearray(double*u,int length,char* prefix){
int i=0;
char nameconfig[100];
strcpy(nameconfig,"./data/");
strcat(nameconfig,prefix);
FILE*fp;
fp=fopen(nameconfig,"w");
if (!fp) {fprintf(stderr, "Failed to write data.\n"); exit(1); }
for(i=0;i<length;i++){
fprintf(fp,"%.17g\n",u[i]);
}
};
//start measuring time
void mtime_s(struct timeval* tvs){
gettimeofday(&*tvs, NULL);
};
//end measuring time+ printing
void mtime_e(struct timeval* tvs, char* printme){
struct timeval tve;
double elapsedTime;
gettimeofday(&tve, NULL);
elapsedTime = (tve.tv_sec - (*tvs).tv_sec) * 1000.0; // sec to ms
elapsedTime += (tve.tv_usec - (*tvs).tv_usec) / 1000.0; // us to ms
printf("%s%lfms\n",printme,elapsedTime);
};
现在所有 OpenCL 内核,应该保存在同一个文件夹中。
|
(
) |
#if defined(cl_amd_fp64) || defined(cl_khr_fp64)
#if defined(cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64 : enable
#elif defined(cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#endif
// function declarations/definitions using double precision doubleing-point arithmetic
#endif
__kernel void initialdata ( __global double2* u, __global double2* v, const int Nx, const int Ny, const double Lx, const double Ly)
{
const int ind = get_global_id(0);
int i,j;
j=floor((double)(ind)/(double)Nx);
i=ind-j*Nx;
double x=(-1.0 + ( 2.0*(double)i/(double)Nx))*M_PI*Lx;
double y=(-1.0 + ( 2.0*(double)j/(double)Ny))*M_PI*Ly;
u[ind].x=0.5+exp(-1.0*(x*x+y*y)-1.0);//
u[ind].y=0.0;
v[ind].x=0.1+exp(-1.0*(x*x+y*y)-1.0);
v[ind].y=0.0;
}
|
(
) |
#if defined(cl_amd_fp64) || defined(cl_khr_fp64)
#if defined(cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64 : enable
#elif defined(cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#endif
// function declarations/definitions using double precision doubleing-point arithmetic
#endif
__kernel void frequencies ( __global double* kx, const double Lx, const int Nx)
{
const int ind = get_global_id(0);
if ( ind < Nx/2) kx[ind] = -1.0*((double)((ind))/Lx)*((double)( ind)/Lx);
if ( ind ==Nx/2) kx[ ind]=0.0;
if ( ind > Nx/2) kx[ ind]=-1.0*(double)(Nx-ind)/Lx*(double)(Nx-ind)/Lx;
}
|
(
) |
#if defined(cl_amd_fp64) || defined(cl_khr_fp64)
#if defined(cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64 : enable
#elif defined(cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#endif
// function declarations/definitions using double precision doubleing-point arithmetic
#endif
__kernel void linearpart ( __global double2* uhat, __global double2* vhat, __global const double* Kx, __global const double* Ky, const double Du, const double Dv, const double A, const double B, const double dt, const double c, const int Nx, const int Ny)
{
const int ind = get_global_id(0);
int i,j,k;
j=floor((double)(ind)/(double)Nx);
i=ind-j*Nx;
double uexp;
double vexp;
uexp=dt*c*(-1.0*A+Du*(Kx[i]+Ky[j]));
vexp=dt*c*(-1.0*B+Dv*(Kx[i]+Ky[j]));
uhat[ind].x=exp(uexp)*uhat[ind].x;
uhat[ind].y=exp(uexp)*uhat[ind].y;
vhat[ind].x=exp(vexp)*vhat[ind].x;
vhat[ind].y=exp(vexp)*vhat[ind].y;
}
|
(
) |
#if defined(cl_amd_fp64) || defined(cl_khr_fp64)
#if defined(cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64 : enable
#elif defined(cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#endif
// function declarations/definitions using double precision doubleing-point arithmetic
#endif
__kernel void nonlinearpart_a ( __global double2* u,__global double2* v, const double A, const double dt, const double a){
const int ind = get_global_id(0);
//u[ind].x=A/(v[ind].x*v[ind].x)+(u[ind].x-A/(v[ind].x*v[ind].x))*exp(dt*a*(-1.0)*v[ind].x*v[ind].x);
u[ind].x=u[ind].x*exp(dt*a*(-1.0)*v[ind].x*v[ind].x);
u[ind].y=0.0;
}
|
(
) |
#if defined(cl_amd_fp64) || defined(cl_khr_fp64)
#if defined(cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64 : enable
#elif defined(cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#endif
// function declarations/definitions using double precision doubleing-point arithmetic
#endif
__kernel void nonlinearpart_b ( __global double2* u,__global double2* v, const double A, const double dt, const double b){
const int ind = get_global_id(0);
//v[ind].x=-v[ind].x/(u[ind].x*dt*b*v[ind].x-1.0);
v[ind].x=1.0/(-0.5*A*(dt*dt*b*b)-u[ind].x*(dt*b)+1/v[ind].x);
v[ind].y=0.0;
u[ind].x=A*dt*b+u[ind].x;
}
这是您的 makefile!将 clFFT 更改为您自己的路径。
|
(
) |
####### Compiler, tools and options
CC = gcc
CFLAGS = -m64 -pipe -O3 -Wno-unused-but-set-parameter -Wall
DEL_FILE = rm -f
LIBS = -lOpenCL -I/home/quell/grayscott/clFFT/src/package/include/ -L/home/quell/grayscott/clFFT/src/package/lib64 -lclFFT -lm
#
####### Build rules
CL: main_gs.c
$(DEL_FILE) gs
$(DEL_FILE) *.o
$(CC) $(CFLAGS) -o gs main_gs.c $(LIBS)
export LD_LIBRARY_PATH=/home/quell/grayscott/clFFT/src/package/lib64:$$LD_LIBRARY_PATH
out: xdmfcreate.f90
gfortran -o out xdmfcreate.f90
clean:
$(DEL_FILE) gs
$(DEL_FILE) out
$(DEL_FILE) *.xmf
$(DEL_FILE) *.o
$(DEL_FILE) ./data/u*
$(DEL_FILE) ./data/v*
cleandata:
$(DEL_FILE) ./data/u*
$(DEL_FILE) ./data/v*
要运行,您需要包含参数的 INPUTFILE。
|
(
) |
1024
1024
100000
100
4.0
4.0
0.01
0.04
0.005
0.038
0.076
Nx
Ny
Tmax
Plotgap
Lx
Ly
DT
Du
Dv
F
k
To view the data, compile and run xdmfcreate.f90. Open udata.xdmf and vdata.xdmf in ParaView.
|
(
) |
PROGRAM XdmfCreate
!--------------------------------------------------------------------------------
! .. Purpose ..
! XdmfCreate is a postprocessing program which creates header files for
! Paraview and Visit
! It uses the INPUTFILE and assumes that the filenames in the program
! are
! consistent with those in the current file.
!
! .. PARAMETERS .. INITIALIZED IN INPUTFILE
! Nx = power of two, number of modes in the x direction
! Ny = power of two, number of modes in the y direction
! Nt = the number of timesteps
! plotgap = the number of timesteps to take before plotting
! Lx = definition of the periodic domain of computation in x direction
! Ly = definition of the periodic domain of computation in y direction
! Dt = the time step
!
! REFERENCES
!
! www.xdmf.org
! Paraview users mailing list
! VisIt users mailing list
!
! ACCURACY
!
! ERROR INDICATORS AND WARNINGS
!
! FURTHER COMMENTS
!------------------------------------------------------------------------------------
! EXTERNAL ROUTINES REQUIRED
IMPLICIT NONE
! .. Scalar Arguments ..
INTEGER(KIND=4) :: Nx, Ny, Nt, plotgap
REAL(KIND=8) :: Lx, Ly, DT
! .. Local scalars ..
INTEGER(KIND=4) :: stat,plotnum,ind,n,numplots
REAL(KIND=8), PARAMETER :: pi=3.14159265358979323846264338327950288419716939937510d0
! .. Local Arrays ..
CHARACTER*50 :: InputFileName, OutputFileName, OutputFileName2
CHARACTER*10 :: number_file
InputFileName='INPUTFILE'
OPEN(unit=11,FILE=trim(InputFileName),status="OLD")
REWIND(11)
READ(11,*) Nx, Ny, Nt, plotgap, Lx, Ly, DT
CLOSE(11)
plotnum=0
numplots=1+Nt/plotgap
OutputFileName='udata.xmf'
OPEN(unit=11,FILE=trim(OutputFileName),status="UNKNOWN")
REWIND(11)
WRITE(11,'(a)') "<?xml version=""1.0"" ?>"
WRITE(11,'(a)') "<!DOCTYPE Xdmf SYSTEM ""Xdmf.dtd"" []>"
WRITE(11,'(a)') "<Xdmf xmlns:xi=""http://www.w3.org/2001/XInclude"" Version=""2.0"">"
WRITE(11,'(a)') "<Domain>"
WRITE(11,'(a)') " <Topology name=""topo"" TopologyType=""3DCoRectMesh"""
WRITE(11,*) " Dimensions=""",Nx,Ny,""">"
WRITE(11,'(a)') " </Topology>"
WRITE(11,'(a)') " <Geometry name=""geo"" Type=""ORIGIN_DXDYDZ"">"
WRITE(11,'(a)') " <!-- Origin -->"
WRITE(11,'(a)') " <DataItem Format=""XML"" Dimensions=""2"">"
WRITE(11,*) -pi*Lx, -pi*Ly
WRITE(11,'(a)') " </DataItem>"
WRITE(11,'(a)') " <!-- DxDyDz -->"
WRITE(11,'(a)') " <DataItem Format=""XML"" Dimensions=""2"">"
WRITE(11,*) REAL(2.0*pi*Lx/Nx,kind(0d0)), REAL(2.0*pi*Ly/Ny,kind(0d0))
WRITE(11,'(a)') " </DataItem>"
WRITE(11,'(a)') " </Geometry>"
WRITE(11,'(a)')
WRITE(11,'(a)') " <Grid Name=""TimeSeries"" GridType=""Collection"" CollectionType=""Temporal"">"
WRITE(11,'(a)') " <Time TimeType=""HyperSlab"">"
WRITE(11,'(a)') " <DataItem Format=""XML"" NumberType=""Float"" Dimensions=""2"">"
WRITE(11,*) " 0.0", dt," 2"
WRITE(11,'(a)') " </DataItem>"
WRITE(11,'(a)') " </Time>"
DO n=1,numplots
OutputFileName = 'data/u'
ind = index(OutputFileName,' ') - 1
WRITE(number_file,'(i0)') 10000000+plotnum
OutputFileName = OutputFileName(1:ind)//number_file
ind = index(OutputFileName,' ') - 1
OutputFileName = OutputFileName(1:ind)//'.datbin'
OutputFileName2 = "<Grid Name=""T"
WRITE(number_file,'(i0)') n
OutputFileName2 = OutputFileName2(1:13)//number_file
ind = index(number_file,' ') - 1
OutputFileName2 = OutputFileName2(1:13+ind)//'" GridType="Uniform">'
plotnum=plotnum+1
WRITE(11,'(a)')
WRITE(11,'(3X,a)') " ",OutputFileName2
WRITE(11,'(a)') " <Topology Reference=""/Xdmf/Domain/Topology[1]""/>"
WRITE(11,'(a)') " <Geometry Reference=""/Xdmf/Domain/Geometry[1]""/>"
WRITE(11,'(a)') " <Attribute Name=""Displacement"" Center=""Node"">"
WRITE(11,'(a)') " <DataItem Format=""Binary"" "
WRITE(11,'(a)') " DataType=""Float"" Precision=""8"" Endian=""Little"" "
WRITE(11,*) " Dimensions=""",Nx, Ny,""">"
WRITE(11,'(16X,a)') " ",OutputFileName
WRITE(11,'(a)') " </DataItem>"
WRITE(11,'(a)') " </Attribute>"
WRITE(11,'(3X,a)') "</Grid>"
END DO
WRITE(11,'(a)')" </Grid>"
WRITE(11,'(a)') "</Domain>"
WRITE(11,'(a)') "</Xdmf>"
CLOSE(11)
!same for v
plotnum=0
numplots=1+Nt/plotgap
OutputFileName='vdata.xmf'
OPEN(unit=11,FILE=trim(OutputFileName),status="UNKNOWN")
REWIND(11)
WRITE(11,'(a)') "<?xml version=""1.0"" ?>"
WRITE(11,'(a)') "<!DOCTYPE Xdmf SYSTEM ""Xdmf.dtd"" []>"
WRITE(11,'(a)') "<Xdmf xmlns:xi=""http://www.w3.org/2001/XInclude"" Version=""2.0"">"
WRITE(11,'(a)') "<Domain>"
WRITE(11,'(a)') " <Topology name=""topo"" TopologyType=""3DCoRectMesh"""
WRITE(11,*) " Dimensions=""",Nx,Ny,""">"
WRITE(11,'(a)') " </Topology>"
WRITE(11,'(a)') " <Geometry name=""geo"" Type=""ORIGIN_DXDYDZ"">"
WRITE(11,'(a)') " <!-- Origin -->"
WRITE(11,'(a)') " <DataItem Format=""XML"" Dimensions=""2"">"
WRITE(11,*) -pi*Lx, -pi*Ly
WRITE(11,'(a)') " </DataItem>"
WRITE(11,'(a)') " <!-- DxDyDz -->"
WRITE(11,'(a)') " <DataItem Format=""XML"" Dimensions=""2"">"
WRITE(11,*) REAL(2.0*pi*Lx/Nx,kind(0d0)), REAL(2.0*pi*Ly/Ny,kind(0d0))
WRITE(11,'(a)') " </DataItem>"
WRITE(11,'(a)') " </Geometry>"
WRITE(11,'(a)')
WRITE(11,'(a)') " <Grid Name=""TimeSeries"" GridType=""Collection"" CollectionType=""Temporal"">"
WRITE(11,'(a)') " <Time TimeType=""HyperSlab"">"
WRITE(11,'(a)') " <DataItem Format=""XML"" NumberType=""Float"" Dimensions=""2"">"
WRITE(11,*) " 0.0", dt," 2"
WRITE(11,'(a)') " </DataItem>"
WRITE(11,'(a)') " </Time>"
DO n=1,numplots
OutputFileName = 'data/v'
ind = index(OutputFileName,' ') - 1
WRITE(number_file,'(i0)') 10000000+plotnum
OutputFileName = OutputFileName(1:ind)//number_file
ind = index(OutputFileName,' ') - 1
OutputFileName = OutputFileName(1:ind)//'.datbin'
OutputFileName2 = "<Grid Name=""T"
WRITE(number_file,'(i0)') n
OutputFileName2 = OutputFileName2(1:13)//number_file
ind = index(number_file,' ') - 1
OutputFileName2 = OutputFileName2(1:13+ind)//'" GridType="Uniform">'
plotnum=plotnum+1
WRITE(11,'(a)')
WRITE(11,'(3X,a)') " ",OutputFileName2
WRITE(11,'(a)') " <Topology Reference=""/Xdmf/Domain/Topology[1]""/>"
WRITE(11,'(a)') " <Geometry Reference=""/Xdmf/Domain/Geometry[1]""/>"
WRITE(11,'(a)') " <Attribute Name=""Displacement"" Center=""Node"">"
WRITE(11,'(a)') " <DataItem Format=""Binary"" "
WRITE(11,'(a)') " DataType=""Float"" Precision=""8"" Endian=""Little"" "
WRITE(11,*) " Dimensions=""",Nx, Ny,""">"
WRITE(11,'(16X,a)') " ",OutputFileName
WRITE(11,'(a)') " </DataItem>"
WRITE(11,'(a)') " </Attribute>"
WRITE(11,'(3X,a)') "</Grid>"
END DO
WRITE(11,'(a)')" </Grid>"
WRITE(11,'(a)') "</Domain>"
WRITE(11,'(a)') "</Xdmf>"
CLOSE(11)
END PROGRAM XdmfCreate
- ↑ 关于方程式的更多信息可在 http://mrob.com/pub/comp/xmorphia/#formula 找到。
- ↑ http://www.mathworks.com/matlabcentral/fileexchange/22940-vol3d-v2
- ↑ https://en.wikipedia.org/wiki/OpenCL
- ↑ http://developer.amd.com/tools-and-sdks/opencl-zone/amd-accelerated-parallel-processing-app-sdk/
- ↑ https://github.com/clMathLibraries/clFFT