Assembler with LCC-Win32

Packed addition using the MMX instruction paddb.

 

MmxPadd


#include <stdio.h>

void __declspec(naked) mmx_packed_add(unsigned char * a, unsigned char * b, int n)
{
    // PADDB does a packed addition between its two 64-bit operands,
    // storing the result in the destination (second) operand. PADDB
    // treats the 64-bit operands as vectors of eight bytes and adds
    // each byte individually.
    _asm("movl 4(%esp),%eax");  // src address (memory)
    _asm("movl 8(%esp),%edx");  // dest address (memory)
    _asm("movl 12(%esp),%ecx"); // number of 8 bytes (quads)
        _asm("cmpl $0,%ecx");
    _asm("je lexit");

_asm("lpp:");
    _asm("decl %ecx");
    _asm("movq (%eax),%mm0");   // load first 8 bytes of src
    _asm("movq (%edx),%mm1");   // load first 8 bytes of dest
    _asm("paddb %mm0, %mm1");   // add
    _asm("movq %mm1,(%edx)");   // store result back in dest
    _asm("addl $8,%edx");       // add 8 bytes to each address
    _asm("addl $8,%eax");
        _asm("cmpl $0,%ecx");
    _asm("jne lpp");
_asm("lexit:");
     _asm("ret");

}

int main(void)
{
    unsigned char src[16] = {120,120,120,120,120,120,120,
                    120,120,120,120,120,120,120,120,120};
    unsigned char dest[16] = {17,17,17,17,17,17,17,17,17,
                    17,17,17,17,17,17,17};
    mmx_packed_add(src,dest,sizeof(src)/8); // 16/8 == 2
    for (int i=0; i<16; i++)
        printf("%d ", dest[i]);

    return 0;
}

Back to main page