添加链接
link管理
链接快照平台
  • 输入网页链接,自动生成快照
  • 标签化管理网页链接
相关文章推荐
想旅行的双杠  ·  CBETA 線上閱讀·  1 月前    · 
憨厚的可乐  ·  Selectively overwrite ...·  6 月前    · 
int xpageoffset; for (i = 0; i < 10000; i++) { for (xpageoffset = 0; xpageoffset < 10000; xpageoffset++) { d = 0.0; for (k = 0; k < 10000; k++) { d += a[i + 10000 * k] * b[k + 10000 * xpageoffset]; c[i + 10000 * xpageoffset] = d; static double b[100000000]; static double c[100000000]; cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, (MKL_INT)10000, (MKL_INT)10000, (MKL_INT)10000, 1.0, &a[0], (MKL_INT)10000, &b[0], (MKL_INT)10000, 0.0, &c[0], (MKL_INT)10000);
#pragma omp parallel for \
 num_threads(4 > omp_get_max_threads() ? omp_get_max_threads() : 4) \
 private(b_i,yCol,b_r)
  for (i = 0; i < 10; i++) {
    /*  指定4线程并发 */
    for (b_i = 0; b_i < 256; b_i++) {
      b_r[b_i] = r[i + 10 * b_i];
    c_FFTImplementationCallback_doH(b_r, 0, yCol);
    for (b_i = 0; b_i < 256; b_i++) {
      a[i + 10 * b_i] = yCol[b_i].re;
%% generate standalone exe by using GPU Coder (gpuCodeGenTest.m)
cfg = coder.gpuConfig('exe');
cfg.GenerateExampleMain = 'GenerateCodeAndCompile';
codegen largeMatrixTest -config cfg -report
	
cublasDgemm(getCublasGlobalHandle(), CUBLAS_OP_N, CUBLAS_OP_N, 5000, 5000,
              5000, (double *)gpu_alpha1, (double *)&(*gpu_a)[0], 5000, (double *)
              &(*gpu_b)[0], 5000, (double *)gpu_beta1, (double *)&(*gpu_c)[0],
              5000);
cusolverDnDgesvd(getCuSolverGlobalHandle(), 'N', 'N', 5000, 5000, (double *)
                     &(*gpu_c)[0], 5000, &(*gpu_s)[0], NULL, 1, NULL, 1, (double
      *)getCuSolverWorkspaceBuff(), *getCuSolverWorkspaceReq(), &(*gpu_superb)[0],
                     gpu_info_t);
	

除了以上提到的内容,如今最热且重度依赖硬件加速的深度学习应用并没在本文中讨论,事实上MATLAB从R2017b就已经开始支持针对深度学习推断生成C/C++代码,并可利用硬件来加速深度学习的推断,包括NVIDIA的桌面与服务器GPU及嵌入式GPU(通过CUDA实现)、ARM Mali GPU与ARM Neon核(通过Arm Compute Library实现),或者利用x86_64处理器的SIMD(SSE/AVX,通过Intel MKL-DNN实现)。在最新的R2020b版本中,Deep Learning HDL Toolbox还可以将训练好的深度学习模型生成为硬件描述语言,从而把深度学习部署到FPGA上。详情可参考MATLAB帮助文档或者咨询MathWorks中国办公室。

%% example function for code generation (largeMatrixTest.m)
function largeMatrixTest()
    a = rand(5000, 5000);
    b = a * a;
    c = sum(a);
    s = svd(a);
    e = eig(a);
    [maxValue, maxPos] = max(a);
    tCpu = toc;
    fprintf('    Time cost: %f\n', tCpu);
%% define class for BLASCallback (useMyBLAS.m)
classdef useMyBLAS < coder.BLASCallback
    methods (Static)
        function updateBuildInfo(buildInfo, ~)
            libPath = 'C:\Program Files (x86)\IntelSWTools\compilers_and_libraries\windows\mkl\lib\intel64';
            libPriority = '';
            libPreCompiled = true;
            libLinkOnly = true;
            libs = {'mkl_intel_ilp64.lib' 'mkl_intel_thread.lib' 'mkl_core.lib'};
            buildInfo.addLinkObjects(libs, libPath, libPriority, libPreCompiled, ...
                                  libLinkOnly);
            buildInfo.addLinkObjects('libiomp5md.lib',fullfile(matlabroot,'bin', ...
                             'win64'), libPriority, libPreCompiled, libLinkOnly);
            buildInfo.addIncludePaths('C:\Program Files (x86)\IntelSWTools\compilers_and_libraries_2020.1.216\windows\mkl\include');
            buildInfo.addDefines('-DMKL_ILP64');
        function headerName = getHeaderFilename()
            headerName = 'mkl_cblas.h';
        function intTypeName = getBLASIntTypeName()
            intTypeName = 'MKL_INT';
        function doubleComplexTypeName = getBLASDoubleComplexTypeName()
            doubleComplexTypeName = 'my_double_complex_type';
        function singleComplexTypeName = getBLASSingleComplexTypeName()
            singleComplexTypeName = 'my_single_complex_type';
        function p = useEnumNameRatherThanTypedef()
            p = true;
%% define class for LAPACKCallback (useMyLAPACK.m)
classdef useMyLAPACK < coder.LAPACKCallback
    methods (Static)
        function hn = getHeaderFilename()
            hn = 'mkl_lapacke.h';
        function updateBuildInfo(buildInfo, buildctx)
            buildInfo.addIncludePaths(fullfile(pwd,'include'));
            libName = 'mkl_lapack95_ilp64';
            libPath = 'C:\Program Files (x86)\IntelSWTools\compilers_and_libraries\windows\mkl\lib\intel64';
            [~,linkLibExt] = buildctx.getStdLibInfo();
            buildInfo.addLinkObjects([libName linkLibExt], libPath, ...
                '', true, true);
            buildInfo.addIncludePaths('C:\Program Files (x86)\IntelSWTools\compilers_and_libraries_2020.1.216\windows\mkl\include');
            buildInfo.addDefines('HAVE_LAPACK_CONFIG_H');
            buildInfo.addDefines('LAPACK_COMPLEX_STRUCTURE');
            buildInfo.addDefines('LAPACK_ILP64'); 
%% generate standalone exe for above MATLAB function (genCodeTest.m)
cfg = coder.config('exe');
cfg.CustomBLASCallback = 'useMyBLAS';
cfg.CustomLAPACKCallback = 'useMyLAPACK';
cfg.GenerateExampleMain = 'GenerateCodeAndCompile';
codegen largeMatrixTest -config cfg -report
					

Vous pouvez également sélectionner un site web dans la liste suivante :

Comment optimiser les performances du site

Pour optimiser les performances du site, sélectionnez la région Chine (en chinois ou en anglais). Les sites de MathWorks pour les autres pays ne sont pas optimisés pour les visites provenant de votre région.

Amériques

  • América Latina (Español)
  • Canada (English)
  • United States (English)
  • Europe

  • Belgium (English)
  • Denmark (English)
  • Deutschland (Deutsch)
  • España (Español)
  • Finland (English)
  • France (Français)
  • Ireland (English)
  • Italia (Italiano)
  • Luxembourg (English)
  •