利用硬件加速大规模科学计算 | 从MATLAB到C/C++代码

link管理

链接快照平台

输入网页链接，自动生成快照
标签化管理网页链接

相关文章推荐

不拘小节的山羊 · ObjectMapper-将java对象转为 ...· 5 月前 ·

曾深爱过的毛衣 · excel vba截图-掘金· 9 月前 ·

暴走的树叶 · 关于升级最新版本node.js你知道多少？- ...· 1 年前 ·

冷冷的茄子 · 男主和妹子都是我的了类似小说推荐,男主和妹子 ...· 1 年前 ·

激动的瀑布 · Python ...· 1 年前 ·

int xpageoffset; for (i = 0; i < 10000; i++) { for (xpageoffset = 0; xpageoffset < 10000; xpageoffset++) { d = 0.0; for (k = 0; k < 10000; k++) { d += a[i + 10000 * k] * b[k + 10000 * xpageoffset]; c[i + 10000 * xpageoffset] = d; static double b[100000000]; static double c[100000000]; cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, (MKL_INT)10000, (MKL_INT)10000, (MKL_INT)10000, 1.0, &a[0], (MKL_INT)10000, &b[0], (MKL_INT)10000, 0.0, &c[0], (MKL_INT)10000);

#pragma omp parallel for \
 num_threads(4 > omp_get_max_threads() ? omp_get_max_threads() : 4) \
 private(b_i,yCol,b_r)
  for (i = 0; i < 10; i++) {
    /*  指定4线程并发 */
    for (b_i = 0; b_i < 256; b_i++) {
      b_r[b_i] = r[i + 10 * b_i];
    c_FFTImplementationCallback_doH(b_r, 0, yCol);
    for (b_i = 0; b_i < 256; b_i++) {
      a[i + 10 * b_i] = yCol[b_i].re;
%% generate standalone exe by using GPU Coder (gpuCodeGenTest.m)
cfg = coder.gpuConfig('exe');
cfg.GenerateExampleMain = 'GenerateCodeAndCompile';
codegen largeMatrixTest -config cfg -report
	cublasDgemm(getCublasGlobalHandle(), CUBLAS_OP_N, CUBLAS_OP_N, 5000, 5000,
              5000, (double *)gpu_alpha1, (double *)&(*gpu_a)[0], 5000, (double *)
              &(*gpu_b)[0], 5000, (double *)gpu_beta1, (double *)&(*gpu_c)[0],
              5000);
cusolverDnDgesvd(getCuSolverGlobalHandle(), 'N', 'N', 5000, 5000, (double *)
                     &(*gpu_c)[0], 5000, &(*gpu_s)[0], NULL, 1, NULL, 1, (double
      *)getCuSolverWorkspaceBuff(), *getCuSolverWorkspaceReq(), &(*gpu_superb)[0],
                     gpu_info_t);
	除了以上提到的内容，如今最热且重度依赖硬件加速的深度学习应用并没在本文中讨论，事实上MATLAB从R2017b就已经开始支持针对深度学习推断生成C/C++代码，并可利用硬件来加速深度学习的推断，包括NVIDIA的桌面与服务器GPU及嵌入式GPU（通过CUDA实现）、ARM Mali GPU与ARM Neon核（通过Arm Compute Library实现），或者利用x86_64处理器的SIMD（SSE/AVX，通过Intel MKL-DNN实现）。在最新的R2020b版本中，Deep Learning HDL Toolbox还可以将训练好的深度学习模型生成为硬件描述语言，从而把深度学习部署到FPGA上。详情可参考MATLAB帮助文档或者咨询MathWorks中国办公室。
	%% example function for code generation (largeMatrixTest.m)
function largeMatrixTest()
    a = rand(5000, 5000);
    b = a * a;
    c = sum(a);
    s = svd(a);
    e = eig(a);
    [maxValue, maxPos] = max(a);
    tCpu = toc;
    fprintf('    Time cost: %f\n', tCpu);
%% define class for BLASCallback (useMyBLAS.m)
classdef useMyBLAS < coder.BLASCallback
    methods (Static)
        function updateBuildInfo(buildInfo, ~)
            libPath = 'C:\Program Files (x86)\IntelSWTools\compilers_and_libraries\windows\mkl\lib\intel64';
            libPriority = '';
            libPreCompiled = true;
            libLinkOnly = true;
            libs = {'mkl_intel_ilp64.lib' 'mkl_intel_thread.lib' 'mkl_core.lib'};
            buildInfo.addLinkObjects(libs, libPath, libPriority, libPreCompiled, ...
                                  libLinkOnly);
            buildInfo.addLinkObjects('libiomp5md.lib',fullfile(matlabroot,'bin', ...
                             'win64'), libPriority, libPreCompiled, libLinkOnly);
            buildInfo.addIncludePaths('C:\Program Files (x86)\IntelSWTools\compilers_and_libraries_2020.1.216\windows\mkl\include');
            buildInfo.addDefines('-DMKL_ILP64');
        function headerName = getHeaderFilename()
            headerName = 'mkl_cblas.h';
        function intTypeName = getBLASIntTypeName()
            intTypeName = 'MKL_INT';
        function doubleComplexTypeName = getBLASDoubleComplexTypeName()
            doubleComplexTypeName = 'my_double_complex_type';
        function singleComplexTypeName = getBLASSingleComplexTypeName()
            singleComplexTypeName = 'my_single_complex_type';
        function p = useEnumNameRatherThanTypedef()
            p = true;
%% define class for LAPACKCallback (useMyLAPACK.m)
classdef useMyLAPACK < coder.LAPACKCallback
    methods (Static)
        function hn = getHeaderFilename()
            hn = 'mkl_lapacke.h';
        function updateBuildInfo(buildInfo, buildctx)
            buildInfo.addIncludePaths(fullfile(pwd,'include'));
            libName = 'mkl_lapack95_ilp64';
            libPath = 'C:\Program Files (x86)\IntelSWTools\compilers_and_libraries\windows\mkl\lib\intel64';
            [~,linkLibExt] = buildctx.getStdLibInfo();
            buildInfo.addLinkObjects([libName linkLibExt], libPath, ...
                '', true, true);
            buildInfo.addIncludePaths('C:\Program Files (x86)\IntelSWTools\compilers_and_libraries_2020.1.216\windows\mkl\include');
            buildInfo.addDefines('HAVE_LAPACK_CONFIG_H');
            buildInfo.addDefines('LAPACK_COMPLEX_STRUCTURE');
            buildInfo.addDefines('LAPACK_ILP64'); 
%% generate standalone exe for above MATLAB function (genCodeTest.m)
cfg = coder.config('exe');
cfg.CustomBLASCallback = 'useMyBLAS';
cfg.CustomLAPACKCallback = 'useMyLAPACK';
cfg.GenerateExampleMain = 'GenerateCodeAndCompile';
codegen largeMatrixTest -config cfg -report