高性能计算实验报告学生姓名:X X学号:XXXXXXXXXX班号:116122指导教师:郭明强中国地质大学(武汉)信息工程学院第一题1.编写console程序2.由下图看出,电脑是双核CPU3.多线程程序,利用windowsAPI函数创建线程代码#include"stdafx.h"#include<iostream>#include"windows.h"usingnamespace std;DWORD WINAPI first(PVOID pParam){for (int i = 0;i < 10;i++){printf("1\n");}return 0;}DWORD WINAPI second(PVOID pParam){for (int i = 0;i < 10;i++){printf("2\n");}return 0;}int main(int argc, char * argv[]){HANDLE hHandle_Calc[2];hHandle_Calc[0] = CreateThread(NULL, 0, first, NULL, 0, NULL);hHandle_Calc[1] = CreateThread(NULL, 0, second, NULL, 0, NULL);WaitForMultipleObjects(2, hHandle_Calc, true, INFINITE);}第二题多线程实现计算e和π的乘积代码#include"stdafx.h"#include"windows.h"#define num_steps 2000000#include<iostream>usingnamespace std;//计算eDWORD WINAPI ThreadCalc_E(PVOID pParam)//计算e子函数{double factorial = 1;int i = 1;double e = 1;for (;i <num_steps;i++){factorial *= i;e += 1.0 / factorial;}*((double*)pParam) = e;printf("e done E =%2.5f\n", e);return 0;}//计算PIDWORD WINAPI ThreadCalc_PI(PVOID pParam)//计算pi子函数{int i = 0;double pi = 0;for (;i <num_steps * 10;i++){pi += 1.0 / (i*4.0 + 1.0);pi -= 1.0 / (i*4.0 + 3.0);}pi = pi*4.0;*((double*)pParam) = pi;printf("pi done PI = %2.5f\n", pi);return 0;}int main(int argc,char * argv[])//进程的主线程入口点{HANDLE hHandle_Calc[2];double result_e, result_pi;hHandle_Calc[0] = CreateThread(NULL, 0, ThreadCalc_E, (void*)(&result_e), 0, NULL);hHandle_Calc[1] = CreateThread(NULL, 0, ThreadCalc_PI, (void*)(&result_pi), 0, NULL);//附加线程的创建WaitForMultipleObjects(2, hHandle_Calc, true, INFINITE);//等待子线程计算结束printf("e* pi = %2.5f\n", result_e*result_pi);//打印出e*pi结果return 0;}第三题1.Parallel语句#include"stdafx.h"#include"windows.h"#include"omp.h"#include<iostream>usingnamespace std;void main(){#pragma omp parallel num_threads(8){printf("Hello, World!, ThreadId=%d\n", omp_get_thread_num());}}2.For语句#include"stdafx.h"#include"windows.h"#include"omp.h"#include<iostream>usingnamespace std;void main(){int j = 0;#pragma omp parallel{#pragma omp forfor (j = 0;j < 4;j++) {printf("j=%d,threadid=%d\n", j, omp_get_thread_num());}}}3.Sections和section语句的用法#include"stdafx.h"#include"windows.h"#include"omp.h"#include<iostream>usingnamespace std;void main(int argc, char * argv){#pragma omp parallel sections{#pragma omp sectionprintf(" section 1 threadid = %d \n", omp_get_thread_num()); #pragma omp sectionprintf(" section 2 threadid = %d \n", omp_get_thread_num()); #pragma omp sectionprintf(" section 3 threadid = %d \n", omp_get_thread_num()); #pragma omp sectionprintf(" section 4 threadid = %d \n", omp_get_thread_num());}}4.Threadprivate语句的用法#include"stdafx.h"#include"windows.h"#include"omp.h"#include<iostream>usingnamespace std;int a, b, i, tid;float x;#pragma omp threadprivate(a,x)void main(){//关闭动态线程分配omp_set_dynamic(0);printf("1st Parallel Region:\n");#pragma omp parallel private(b,tid){tid = omp_get_thread_num();a = tid;b = tid;x = 1.1*tid + 1.0;printf("Threading %d: a,b,x = %d %d %f\n", tid, a, b, x);}//end of parallel sectionprintf("********************************************\n");printf("主线程中串行线程\n");printf("********************************************\n");printf("2nd Parallel Region:\n");#pragma omp parallel private(tid){tid = omp_get_thread_num();printf("Threading %d: a,b,x = %d %d %f\n", tid, a, b, x);}//end of parallel section}5.reduction语句的用法#include"stdafx.h"#include"windows.h"#include"omp.h"#include<iostream>usingnamespace std;#include<omp.h>void main(){int i, n, chunk;float a[100], b[100], result;//变量的初始化n = 100;chunk = 10;result = 0.0;for (i = 0;i < n;i++){a[i] = i*2.0;b[i] = i*3.0;}#pragma omp parallel for default(shared) private(i)schedule(static, chunk)reduction(+:result)for (i = 0;i < n;i++){result = result + (a[i] * b[i]);}printf("Final result= %f\n", result);}第四题OpenMP实例分析与比较串行#include"stdafx.h"#include<time.h>#define num_steps 20000000int main(int argc, char *argv[]){double start, stop;double e, pi, factorial, product;int i;//启动定时器start = clock();//首先运用taylor展开运算eprintf("e started\n");e = 1;factorial = 1;for (i = 1;i <num_steps;i++){factorial *= i;e += 1.0 / factorial;}printf("e done\n");//然后计算pi运用taylor展开printf("pi started\n");pi = 0;for (i = 1;i <num_steps * 10;i++){pi += 1.0/(i*4.0 + 1.0);pi -= 1.0/(i*4.0 + 3.0);}pi = pi*4.0;printf("pi done\n");product = e*pi;stop = clock();printf("Reached result %f in %.3f second\n", product, (stop - start)/1000);return 0;}并行#include"stdafx.h"#include<time.h>#define num_steps 20000000int main(int argc, char * argv[]){double start, stop;//任务开始double e, pi, factorial, product;int i;//启动定时器start = clock();//启动两个进程分别计算e pi#pragma omp parallel sections shared(e,pi){#pragma omp section{printf("e started\n");e = 1;factorial = 1;for (i = 1;i <num_steps;i++){factorial *= i;e += 1.0 / factorial;}printf("e done\n");}#pragma omp section{printf("pi started\n");pi = 0;for (i = 1;i <num_steps * 10;i++){pi += 1.0/(i*4.0 + 1.0);pi -= 1.0/(i*4.0 + 3.0);}pi = pi*4.0;printf("pi done\n");}}//omp sections//两个线程合并为主线程product = e*pi;stop = clock();printf("reached result %f in %.3f second\n", product, (stop - start)/1000);return 0;}由下面两个图可知,并行比串行运行速度快,CPU使用效率高1.串行2.并行。