内存拷贝的优化方法(草稿) [2]

程序员文章站 2022-06-17 08:34:42

...

以下为引用： global_fast_memcpy9 %defineparamesp124 %definesrcparam0 %definedstparam4 %definelenparam8 %defineCACHEBLOCK400h _fast_memcpy9: pushesi pushedi pushebx movesi,[src];sourcearray movedi,[dst];destinationarray movecx,[len];numbero

以下为引用：

global _fast_memcpy9
%define param esp+12+4
%define src param+0
%define dst param+4
%define len param+8

%define CACHEBLOCK 400h

_fast_memcpy9:
push esi
push edi
push ebx

mov esi, [src] ; source array
mov edi, [dst] ; destination array
mov ecx, [len] ; number of QWORDS (8 bytes) assumes len / CACHEBLOCK is an integer
shr ecx, 3

lea esi, [esi+ecx*8] ; end of source
lea edi, [edi+ecx*8] ; end of destination
neg ecx ; use a negative offset as a combo pointer-and-loop-counter

.mainloop:
mov eax, CACHEBLOCK / 16 ; note: .prefetchloop is unrolled 2X
add ecx, CACHEBLOCK ; move up to end of block

.prefetchloop:
mov ebx, [esi+ecx*8-64] ; read one address in this cache line...
mov ebx, [esi+ecx*8-128] ; ... and one in the previous line
sub ecx, 16 ; 16 QWORDS = 2 64-byte cache lines
dec eax
jnz .prefetchloop

mov eax, CACHEBLOCK / 8

.writeloop:
prefetchnta [esi+ecx*8 + 512] ; fetch ahead by 512 bytes

movq mm0, qword [esi+ecx*8]
movq mm1, qword [esi+ecx*8+8]
movq mm2, qword [esi+ecx*8+16]
movq mm3, qword [esi+ecx*8+24]
movq mm4, qword [esi+ecx*8+32]
movq mm5, qword [esi+ecx*8+40]
movq mm6, qword [esi+ecx*8+48]
movq mm7, qword [esi+ecx*8+56]

movntq qword [edi+ecx*8], mm0
movntq qword [edi+ecx*8+8], mm1
movntq qword [edi+ecx*8+16], mm2
movntq qword [edi+ecx*8+24], mm3
movntq qword [edi+ecx*8+32], mm4
movntq qword [edi+ecx*8+40], mm5
movntq qword [edi+ecx*8+48], mm6
movntq qword [edi+ecx*8+56], mm7

add ecx, 8
dec eax
jnz .writeloop

or ecx, ecx ; assumes integer number of cacheblocks
jnz .mainloop

sfence ; flush write buffer
emms

pop ebx
pop edi
pop esi

ret

相关标签：内存拷贝优化方法草稿以下引用 global fa

上一篇：如何压缩一段文本后存入mysql中_PHP

下一篇： PHP中try{}catch{}的具体用法详解_PHP教程

内存拷贝的优化方法(草稿) [2]

深入C# 内存管理以及优化的方法详解

深入C# 内存管理以及优化的方法详解

mysql 数据库中my.ini的优化 2G内存针对站多抗压型的设置

Android 中对于图片的内存优化方法

mysql 数据库中my.ini的优化 2G内存针对站多抗压型的设置

Android 中对于图片的内存优化方法

VMware vSphere最低内存2G的限制的解决方法

vue-cli2 构建速度优化的实现方法

全面优化Windows系统内存的九招方法介绍

51模拟器内存不足1G怎么办?修改51模拟器内存大小的2种方法

内存拷贝的优化方法(草稿) [2]

深入C# 内存管理以及优化的方法详解

深入C# 内存管理以及优化的方法详解

mysql 数据库中my.ini的优化 2G内存针对站多 抗压型的设置

Android 中对于图片的内存优化方法

mysql 数据库中my.ini的优化 2G内存针对站多 抗压型的设置

Android 中对于图片的内存优化方法

VMware vSphere最低内存2G的限制的解决方法

vue-cli2 构建速度优化的实现方法

全面优化Windows系统内存的九招方法介绍

51模拟器内存不足1G怎么办?修改51模拟器内存大小的2种方法

mysql 数据库中my.ini的优化 2G内存针对站多抗压型的设置

mysql 数据库中my.ini的优化 2G内存针对站多抗压型的设置