int
xpageoffset;
for
(i = 0; i < 10000; i++) {
for
(xpageoffset = 0; xpageoffset < 10000; xpageoffset++) {
d = 0.0;
for
(k = 0; k < 10000; k++) {
d += a[i + 10000 * k] * b[k + 10000 * xpageoffset];
c[i + 10000 * xpageoffset] = d;
static double
b[100000000];
static double
c[100000000];
cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, (MKL_INT)10000,
(MKL_INT)10000, (MKL_INT)10000, 1.0, &a[0], (MKL_INT)10000, &b[0],
(MKL_INT)10000, 0.0, &c[0], (MKL_INT)10000);
#pragma omp parallel for \
num_threads(4 > omp_get_max_threads() ? omp_get_max_threads() : 4) \
private(b_i,yCol,b_r)
for (i = 0; i < 10; i++) {
for (b_i = 0; b_i < 256; b_i++) {
b_r[b_i] = r[i + 10 * b_i];
c_FFTImplementationCallback_doH(b_r, 0, yCol);
for (b_i = 0; b_i < 256; b_i++) {
a[i + 10 * b_i] = yCol[b_i].re;
cfg = coder.gpuConfig('exe');
cfg.GenerateExampleMain = 'GenerateCodeAndCompile';
codegen largeMatrixTest -config cfg -report
cublasDgemm(getCublasGlobalHandle(), CUBLAS_OP_N, CUBLAS_OP_N, 5000, 5000,
5000, (double *)gpu_alpha1, (double *)&(*gpu_a)[0], 5000, (double *)
&(*gpu_b)[0], 5000, (double *)gpu_beta1, (double *)&(*gpu_c)[0],
5000);
cusolverDnDgesvd(getCuSolverGlobalHandle(), 'N', 'N', 5000, 5000, (double *)
&(*gpu_c)[0], 5000, &(*gpu_s)[0], NULL, 1, NULL, 1, (double
*)getCuSolverWorkspaceBuff(), *getCuSolverWorkspaceReq(), &(*gpu_superb)[0],
gpu_info_t);
除了以上提到的内容,如今最热且重度依赖硬件加速的深度学习应用并没在本文中讨论,事实上MATLAB从R2017b就已经开始支持针对深度学习推断生成C/C++代码,并可利用硬件来加速深度学习的推断,包括NVIDIA的桌面与服务器GPU及嵌入式GPU(通过CUDA实现)、ARM Mali GPU与ARM Neon核(通过Arm Compute Library实现),或者利用x86_64处理器的SIMD(SSE/AVX,通过Intel MKL-DNN实现)。在最新的R2020b版本中,Deep Learning HDL Toolbox还可以将训练好的深度学习模型生成为硬件描述语言,从而把深度学习部署到FPGA上。详情可参考MATLAB帮助文档或者咨询MathWorks中国办公室。
function largeMatrixTest()
a = rand(5000, 5000);
b = a * a;
c = sum(a);
s = svd(a);
e = eig(a);
[maxValue, maxPos] = max(a);
tCpu = toc;
fprintf(' Time cost: %f\n', tCpu);
classdef useMyBLAS < coder.BLASCallback
methods (Static)
function updateBuildInfo(buildInfo, ~)
libPath = 'C:\Program Files (x86)\IntelSWTools\compilers_and_libraries\windows\mkl\lib\intel64';
libPriority = '';
libPreCompiled = true;
libLinkOnly = true;
libs = {'mkl_intel_ilp64.lib' 'mkl_intel_thread.lib' 'mkl_core.lib'};
buildInfo.addLinkObjects(libs, libPath, libPriority, libPreCompiled, ...
libLinkOnly);
buildInfo.addLinkObjects('libiomp5md.lib',fullfile(matlabroot,'bin', ...
'win64'), libPriority, libPreCompiled, libLinkOnly);
buildInfo.addIncludePaths('C:\Program Files (x86)\IntelSWTools\compilers_and_libraries_2020.1.216\windows\mkl\include');
buildInfo.addDefines('-DMKL_ILP64');
function headerName = getHeaderFilename()
headerName = 'mkl_cblas.h';
function intTypeName = getBLASIntTypeName()
intTypeName = 'MKL_INT';
function doubleComplexTypeName = getBLASDoubleComplexTypeName()
doubleComplexTypeName = 'my_double_complex_type';
function singleComplexTypeName = getBLASSingleComplexTypeName()
singleComplexTypeName = 'my_single_complex_type';
function p = useEnumNameRatherThanTypedef()
p = true;
classdef useMyLAPACK < coder.LAPACKCallback
methods (Static)
function hn = getHeaderFilename()
hn = 'mkl_lapacke.h';
function updateBuildInfo(buildInfo, buildctx)
buildInfo.addIncludePaths(fullfile(pwd,'include'));
libName = 'mkl_lapack95_ilp64';
libPath = 'C:\Program Files (x86)\IntelSWTools\compilers_and_libraries\windows\mkl\lib\intel64';
[~,linkLibExt] = buildctx.getStdLibInfo();
buildInfo.addLinkObjects([libName linkLibExt], libPath, ...
'', true, true);
buildInfo.addIncludePaths('C:\Program Files (x86)\IntelSWTools\compilers_and_libraries_2020.1.216\windows\mkl\include');
buildInfo.addDefines('HAVE_LAPACK_CONFIG_H');
buildInfo.addDefines('LAPACK_COMPLEX_STRUCTURE');
buildInfo.addDefines('LAPACK_ILP64');
cfg = coder.config('exe');
cfg.CustomBLASCallback = 'useMyBLAS';
cfg.CustomLAPACKCallback = 'useMyLAPACK';
cfg.GenerateExampleMain = 'GenerateCodeAndCompile';
codegen largeMatrixTest -config cfg -report
Vous pouvez également sélectionner un site web dans la liste suivante :
Comment optimiser les performances du site
Pour optimiser les performances du site, sélectionnez la région Chine (en chinois ou en anglais). Les sites de MathWorks pour les autres pays ne sont pas optimisés pour les visites provenant de votre région.
Amériques
América Latina (Español)
Canada (English)
United States (English)
Europe
Belgium (English)
Denmark (English)
Deutschland (Deutsch)
España (Español)
Finland (English)
France (Français)
Ireland (English)
Italia (Italiano)
Luxembourg (English)