gcc is smart enough to inline memcpy calls for short memory blocks,
when optimisations are enabled:
fangorn:~/tmp 273> cat test.c
#include <string.h>
void foo(int *p, int *q)
{
memcpy(q, p, 2 * sizeof *p);
}
fangorn:~/tmp 274> gcc -O2 -S test.c
fangorn:~/tmp 275> cat test.s
.file "test.c"
.text
.p2align 4,,15
.globl foo
.type foo, @function
foo:
pushl %ebp
movl %esp, %ebp
movl 8(%ebp), %edx
movl 12(%ebp), %ecx
movl (%edx), %eax
movl %eax, (%ecx)
movl 4(%edx), %eax
movl %eax, 4(%ecx)
popl %ebp
ret
.size foo, .-foo
.section .note.GNU-stack,"",@progbits
.ident "GCC: (GNU) 3.3.3"
Even if you have no clue about x86 assembly, you can easily see that there
is no memcpy call in the code generated by gcc for this function. One
more reason to prefer memcpy to for loops.