x86_64-specific implementation of the support routines of NLKD. Signed-Off-By: Jan Beulich Index: 2.6.14-nlkd/debug/nlkd/dbgAMD64.S =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ 2.6.14-nlkd/debug/nlkd/dbgAMD64.S 2005-10-05 11:37:21.000000000 +0200 @@ -0,0 +1,403 @@ +/***************************************************************************** + * + * File Name: dbgAMD64.S + * Created by: jbeulich + * %version: 8 % + * %derived_by: jbeulich % + * %date_modified: Wed Oct 05 01:08:52 2005 % + * + ****************************************************************************/ +/***************************************************************************** + * * + * Copyright (c) 2002-2005 Novell, Inc. All Rights Reserved. * + * * + * This program is free software; you can redistribute it and/or * + * modify it under the terms of version 2 of the GNU General Public License * + * as published by the Free Software Foundation. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, contact Novell, Inc. * + * * + * To contact Novell about this file by physical or electronic mail, * + * you may find current contact information at www.novell.com. * + * * + *****************************************************************************/ +/***************************************************************************** + * + * File Description: + * + ****************************************************************************/ + +#include +.equiv PROCEDURE_ALIGN, 1 +#include "asm-amd64.h" + +// int dbgCompareMem(const void*, const void*, size_t); +.pubproc dbgCompareMem + mov rcx, rdx + xchg rsi, rdi + xor eax, eax + xor edx, edx + repe cmpsb + seta al + setb dl + sub eax, edx + ret +.endp dbgCompareMem + +// int dbgCompareStr(const char*, const char*); +.pubproc dbgCompareStr + xor eax, eax +0: + mov al, [rdi] + inc rdi + test al, al + jz 1f + cmp al, [rsi] + jne 1f + inc rsi + jmp 0b +1: + movzx edx, byte ptr [rsi] + sub eax, edx + ret +.endp dbgCompareStr + +// void*dbgCopyMem(void*, const void*, size_t); +// Note: The minimum requirements to this routine are that copying between properly +// aligned buffers with a suitable size will be carried out with the maximum +// access granularity possible. +.pubproc dbgCopyMem + mov rcx, rdx + test dl, byte + mov rax, rdi + jz 0f + rep movsb + ret +0: + test dl, word or dword + jz 2f + shr rcx, 2 + jnc 1f + adc rcx, rcx + .byte 0x66 +1: + rep movsd + ret +2: + shr rcx, 3 + rep movsq + ret +.endp dbgCopyMem + +// char*dbgCopyStr(char*, const char*); +.pubproc dbgCopyStr + mov rax, rdi +0: + cmp byte ptr [rsi], 0 + movsb + jne 0b + ret +.endp dbgCopyStr + +// char*dbgCopyStrP(char*, const char*); +.pubproc dbgCopyStrP +0: + lodsb + stosb + test al, al + jnz 0b + lea rax, [rdi-byte] + ret +.endp dbgCopyStrP + +// unsigned dbgCountBits(uint64_t); +.pubproc dbgCountBits + shld rsi, rdi, 32 + xor eax, eax +0: + shr edi, 1 + adc eax, 0 + shr esi, 1 + adc eax, 0 + test edi, edi + jnz 0b + test esi, esi + jnz 0b + ret +.endp dbgCountBits + +// size_t dbgCountStr(const char*); +.pubproc dbgCountStr + or rcx, not 0 + xor eax, eax + repne scasb + add rcx, 2 + sub rax, rcx + ret +.endp dbgCountStr + +// size_t dbgCountWstr(const wchar_t*); +.pubproc dbgCountWstr + or rcx, not 0 + xor eax, eax + repne scasw + add rcx, 2 + sub rax, rcx + ret +.endp dbgCountWstr + +// char*dbgFindChar(const char*, char); +.pubproc dbgFindChar +0: + mov dl, [rdi] + cmp sil, dl + je 1f + inc rdi + test dl, dl + jnz 0b + xor eax, eax + ret +1: + mov rax, rdi + ret +.endp dbgFindChar + +// wchar_t*dbgFindWchar(const wchar_t*, wchar_t); +.pubproc dbgFindWchar +0: + mov dx, [rdi] + cmp si, dx + je 1f + add rdi, word + test dx, dx + jnz 0b + xor eax, eax + ret +1: + mov rax, rdi + ret +.endp dbgFindWchar + +// char*dbgWstr2Str(char*, const wchar_t*); +.pubproc dbgWstr2Str + mov rax, rdi + mov cl, '?' +0: + movzx edx, word ptr [rsi] + add rsi, word + cmp edx, 0x7f + cmova edx, ecx + mov [rdi], dl + inc rdi + test edx, edx + jnz 0b + ret +.endp dbgWstr2Str + +// int dbgLower(int); +.pubproc dbgLower + mov eax, edi + cmp edi, 'A' + jb 0f + cmp edi, 'Z' + ja 0f + add al, 'a' - 'A' +0: + ret +.endp dbgLower + +// int dbgUpper(int); +.pubproc dbgUpper + mov eax, edi + cmp edi, 'a' + jb 0f + cmp edi, 'z' + ja 0f + add al, 'A' - 'a' +0: + ret +.endp dbgUpper + +// void*dbgFillMem(void*, int, size_t); +.pubproc dbgFillMem + mov rcx, rdx + mov eax, esi + jmp 0f +// void*dbgZeroMem(void*, size_t); +.pubentry dbgZeroMem + mov ecx, esi + xor eax, eax +0: + mov rdx, rdi + rep stosb + mov rax, rdx + ret +.endp dbgFillMem + +// uint8_t*dbgFindInt8(const uint8_t*, uint8_t, size_t); +// void*dbgFindByte(const void*, int, size_t); +.pubproc dbgFindInt8 +.pubentry dbgFindByte + mov rcx, rdx + jrcxz 0f + mov eax, esi + repne scasb + lea rax, [rdi-byte] + je 1f +0: + xor eax, eax +1: + ret +.endp dbgFindInt8 + +// uint16_t*dbgFindInt16(const uint16_t*, uint16_t, size_t); +.pubproc dbgFindInt16 + mov rcx, rdx + jrcxz 0f + mov eax, esi + repne scasw + lea rax, [rdi-word] + je 1f +0: + xor eax, eax +1: + ret +.endp dbgFindInt16 + +// uint32_t*dbgFindInt32(const uint32_t*, uint32_t, size_t); +// nuint_t*dbgFindIntN(const nuint_t*, nuint_t, size_t); +.pubproc dbgFindInt32 +.pubentry dbgFindIntN + mov rcx, rdx + jrcxz 0f + mov eax, esi + repne scasd + lea rax, [rdi-dword] + je 1f +0: + xor eax, eax +1: + ret +.endp dbgFindInt32 + +// uint64_t*dbgFindInt64(const uint64_t*, uint64_t, size_t); +// void**dbgFindPtr(const void**, const void*, size_t); +.pubproc dbgFindInt64 +.pubentry dbgFindPtr + mov rcx, rdx + jrcxz 0f + mov rax, rsi + repne scasq + lea rax, [rdi-qword] + je 1f +0: + xor eax, eax +1: + ret +.endp dbgFindInt64 + +// uint64_t dbgMultiply(uint64_t, uint64_t, uint64_t*overflow); +.pubproc dbgMultiply + mov rax, rdi + mov rcx, rdx + mul rsi + mov [rcx], rdx + ret +.endp dbgMultiply + +// nint_t dbgLog2(uint64_t) +.pubproc dbgLog2 + bsr rax, rdi + jnz 0f + or eax, not 0 +0: + ret +.endp dbgLog2 + +// uint8_t dbgRotl8(uint8_t, nuint_t); +.pubproc dbgRotl8 + mov eax, edi + mov ecx, esi + rol al, cl + ret +.endp dbgRotl8 + +// uint8_t dbgRotr8(uint8_t, nuint_t); +.pubproc dbgRotr8 + mov eax, edi + mov ecx, esi + ror al, cl + ret +.endp dbgRotr8 + +// uint16_t dbgRotl16(uint16_t, nuint_t); +.pubproc dbgRotl16 + mov eax, edi + mov ecx, esi + rol ax, cl + ret +.endp dbgRotl16 + +// uint16_t dbgRotr16(uint16_t, nuint_t); +.pubproc dbgRotr16 + mov eax, edi + mov ecx, esi + ror ax, cl + ret +.endp dbgRotr16 + +// uint32_t dbgRotl32(uint32_t, nuint_t); +.pubproc dbgRotl32 + mov eax, edi + mov ecx, esi + rol eax, cl + ret +.endp dbgRotl32 + +// uint32_t dbgRotr32(uint32_t, nuint_t); +.pubproc dbgRotr32 + mov eax, edi + mov ecx, esi + ror eax, cl + ret +.endp dbgRotr32 + +// uint64_t dbgRotl64(uint64_t, nuint_t); +.pubproc dbgRotl64 + mov rax, rdi + mov ecx, esi + rol rax, cl + ret +.endp dbgRotl64 + +// uint64_t dbgRotr64(uint64_t, nuint_t); +.pubproc dbgRotr64 + mov rax, rdi + mov ecx, esi + ror rax, cl + ret +.endp dbgRotr64 + +.pubproc dbgDivideU + mov eax, edi + xor edx, edx + div esi + shl rdx, 32 + or rax, rdx + ret +.endp dbgDivideU + +.pubproc dbgDivideS + mov eax, edi + cdq + idiv esi + shl rdx, 32 + or rax, rdx + ret +.endp dbgDivideS