i386-specific implementation of the support routines of NLKD. Signed-Off-By: Jan Beulich Index: 2.6.14-nlkd/debug/nlkd/dbgIA32.S =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ 2.6.14-nlkd/debug/nlkd/dbgIA32.S 2005-10-05 11:37:24.000000000 +0200 @@ -0,0 +1,721 @@ +/***************************************************************************** + * + * File Name: dbgIA32.S + * Created by: jbeulich + * %version: 10 % + * %derived_by: jbeulich % + * %date_modified: Wed Oct 05 01:10:16 2005 % + * + ****************************************************************************/ +/***************************************************************************** + * * + * Copyright (c) 2001-2005 Novell, Inc. All Rights Reserved. * + * * + * This program is free software; you can redistribute it and/or * + * modify it under the terms of version 2 of the GNU General Public License * + * as published by the Free Software Foundation. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, contact Novell, Inc. * + * * + * To contact Novell about this file by physical or electronic mail, * + * you may find current contact information at www.novell.com. * + * * + *****************************************************************************/ +/***************************************************************************** + * + * File Description: + * + ****************************************************************************/ + +#include +.equiv PROCEDURE_ALIGN, 1 +#include "asm-ia32.h" + +//todo .file "dbgutil" +.text + +// int dbgCompareMem(const void*, const void*, size_t); +.pubproc dbgCompareMem + mov ecx, [esp+3*dword] + EHpush esi + xor eax, eax + EHpush edi + xor edx, edx + mov esi, [esp+3*dword] + mov edi, [esp+4*dword] + repe cmpsb + seta al + EHpop edi + setb dl + EHpop esi + sub eax, edx + ret +.endp dbgCompareMem + +// int dbgCompareStr(const char*, const char*); +.pubproc dbgCompareStr + xor eax, eax + mov ecx, [esp+dword] + mov edx, [esp+2*dword] +0: + mov al, [ecx] + inc ecx + test al, al + jz 1f + cmp al, [edx] + jne 1f + inc edx + jmp 0b +1: + movzx edx, byte ptr [edx] + sub eax, edx + ret +.endp dbgCompareStr + +// void*dbgCopyMem(void*, const void*, size_t); +// Note: The minimum requirements to this routine are that copying between properly +// aligned buffers with a suitable size will be carried out with the maximum +// access granularity possible. +.pubproc dbgCopyMem +#if !defined(CONFIG_REGPARM) || __GNUC__ < 3 + mov ecx, [esp+3*dword] +#endif + EHpush esi + EHpush edi + test cl, byte +#if !defined(CONFIG_REGPARM) || __GNUC__ < 3 + mov edi, [esp+3*dword] + mov esi, [esp+4*dword] + mov eax, edi +#else + mov edi, eax + mov esi, edx +#endif + jz 0f + rep movsb + EHpushState + EHpop edi + EHpop esi + ret +0: + EHpopState + shr ecx, 2 + jnc 1f + adc ecx, ecx + .byte 0x66 +1: + rep movsd + EHpop edi + EHpop esi + ret +.endp dbgCopyMem + +// char*dbgCopyStr(char*, const char*); +.pubproc dbgCopyStr + mov ecx, [esp+2*dword] + mov edx, [esp+dword] + EHpush ecx +0: + mov al, [ecx] + inc ecx + mov [edx], al + inc edx + test al, al + jnz 0b + EHpop eax + ret +.endp dbgCopyStr + +// char*dbgCopyStrP(char*, const char*); +.pubproc dbgCopyStrP + mov ecx, [esp+2*dword] + mov edx, [esp+dword] +0: + mov al, [ecx] + inc ecx + mov [edx], al + inc edx + test al, al + jnz 0b + lea eax, [edx-byte] + ret +.endp dbgCopyStrP + +// unsigned dbgCountBits(uint64_t); +.pubproc dbgCountBits + mov ecx, [esp+dword] + mov edx, [esp+2*dword] + xor eax, eax +0: + shr ecx, 1 + adc eax, 0 + shr edx, 1 + adc eax, 0 + test ecx, ecx + jnz 0b + test edx, edx + jnz 0b + ret +.endp dbgCountBits + +// size_t dbgCountStr(const char*); +.pubproc dbgCountStr + EHpush edi + or ecx, not 0 + xor eax, eax + mov edi, [esp+2*dword] + repne scasb + add ecx, 2 + EHpop edi + sub eax, ecx + ret +.endp dbgCountStr + +// size_t dbgCountWstr(const wchar_t*); +.pubproc dbgCountWstr + EHpush edi + or ecx, not 0 + xor eax, eax + mov edi, [esp+2*dword] + repne scasw + add ecx, 2 + EHpop edi + sub eax, ecx + ret +.endp dbgCountWstr + +// char*dbgFindChar(const char*, char); +.pubproc dbgFindChar + mov eax, [esp+dword] + movzx ecx, byte ptr [esp+2*dword] +0: + movzx edx, byte ptr [eax] + cmp ecx, edx + je 1f + inc eax + test edx, edx + jnz 0b + xor eax, eax +0: + ret +.endp dbgFindChar + +// wchar_t*dbgFindWchar(const wchar_t*, wchar_t); +.pubproc dbgFindWchar + mov eax, [esp+dword] + movzx ecx, word ptr [esp+2*dword] +0: + movzx edx, word ptr [eax] + cmp ecx, edx + je 1f + add eax, word + test edx, edx + jnz 0b + xor eax, eax +1: + ret +.endp dbgFindWchar + +// char*dbgWstr2Str(char*, const wchar_t*); +.pubproc dbgWstr2Str + EHpush ebx + mov bl, '?' + mov ecx, [esp+3*dword] + mov edx, [esp+2*dword] + EHpush ecx +0: + movzx eax, word ptr [ecx] + add ecx, word + cmp eax, 0x7f + cmova eax, ebx + mov [edx], al + inc edx + test eax, eax + jnz 0b + EHpop eax + EHpop ebx + ret +.endp dbgWstr2Str + +// int dbgLower(int); +.pubproc dbgLower + mov eax, [esp+dword] + cmp eax, 'A' + jb 0f + cmp eax, 'Z' + ja 0f + add al, 'a' - 'A' +0: + ret +.endp dbgLower + +// int dbgUpper(int); +.pubproc dbgUpper + mov eax, [esp+dword] + cmp eax, 'a' + jb 0f + cmp eax, 'z' + ja 0f + add al, 'A' - 'a' +0: + ret +.endp dbgUpper + +// void*dbgFillMem(void*, int, size_t); +.pubproc dbgFillMem +#if !defined(CONFIG_REGPARM) || __GNUC__ < 3 + mov ecx, [esp+3*dword] + mov eax, [esp+2*dword] +#else + xchg eax, edx +#endif + jmp 0f +// void*dbgZeroMem(void*, size_t); +.pubentry dbgZeroMem +#if !defined(CONFIG_REGPARM) || __GNUC__ < 3 + mov ecx, [esp+2*dword] +#else + mov ecx, edx + mov edx, eax +#endif + xor eax, eax +0: + EHpush edi +#if !defined(CONFIG_REGPARM) || __GNUC__ < 3 + mov edi, [esp+2*dword] + EHpush edi, FALSE +#else + mov edi, edx +#endif + rep stosb +#if !defined(CONFIG_REGPARM) || __GNUC__ < 3 + EHpop eax +#else + mov eax, edx +#endif + EHpop edi + ret +.endp dbgFillMem + +// uint8_t*dbgFindInt8(const uint8_t*, uint8_t, size_t); +// void*dbgFindByte(const void*, int, size_t); +.pubproc dbgFindInt8 +.pubentry dbgFindByte + mov ecx, [esp+3*dword] + EHpush edi + jecxz 0f + mov edi, [esp+2*dword] + mov al, [esp+3*dword] + repne scasb + lea eax, [edi-byte] + je 1f +0: + xor eax, eax +1: + EHpop edi + ret +.endp dbgFindInt8 + +// uint16_t*dbgFindInt16(const uint16_t*, uint16_t, size_t); +.pubproc dbgFindInt16 + mov ecx, [esp+3*dword] + EHpush edi + jecxz 0f + mov edi, [esp+2*dword] + mov eax, [esp+3*dword] + repne scasw + lea eax, [edi-word] + je 1f +0: + xor eax, eax +1: + EHpop edi + ret +.endp dbgFindInt16 + +// uint32_t*dbgFindInt32(const uint32_t*, uint32_t, size_t); +// nuint_t*dbgFindIntN(const nuint_t*, nuint_t, size_t); +// void**dbgFindPtr(const void**, const void*, size_t); +.pubproc dbgFindInt32 +.pubentry dbgFindIntN +.pubentry dbgFindPtr + mov ecx, [esp+3*dword] + EHpush edi + jecxz 0f + mov edi, [esp+2*dword] + mov eax, [esp+3*dword] + repne scasd + lea eax, [edi-dword] + je 1f +0: + xor eax, eax +1: + EHpop edi + ret +.endp dbgFindInt32 + +// uint64_t dbgMultiply(uint64_t, uint64_t, uint64_t*overflow); +.pubproc dbgMultiply + mov eax, [esp+2*dword] + EHpush edi + mov edi, [esp+6*dword] + mul dword ptr [esp+5*dword] + mov [edi], eax + mov eax, [esp+2*dword] + mov [edi+dword], edx + mul dword ptr [esp+5*dword] + mov ecx, eax + add [edi], edx + mov eax, [esp+3*dword] + adc dword ptr [edi+dword], 0 + mul dword ptr [esp+4*dword] + add ecx, eax + adc [edi], edx + mov eax, [esp+2*dword] + adc dword ptr [edi+dword], 0 + mul dword ptr [esp+4*dword] + add edx, ecx + adc dword ptr [edi], 0 + add dword ptr [edi+dword], 0 + EHpop edi + ret +.endp dbgMultiply + +// nint_t dbgLog2(uint64_t) +.pubproc dbgLog2 + bsr eax, [esp+2*dword] + lea eax, [eax+32] + jnz 0f + bsr eax, [esp+1*dword] + jnz 0f + or eax, not 0 +0: + ret +.endp dbgLog2 + +// uint8_t dbgRotl8(uint8_t, nuint_t); +.pubproc dbgRotl8 + mov cl, [esp+2*dword] + mov al, [esp+dword] + rol al, cl + ret +.endp dbgRotl8 + +// uint8_t dbgRotr8(uint8_t, nuint_t); +.pubproc dbgRotr8 + mov cl, [esp+2*dword] + mov al, [esp+dword] + ror al, cl + ret +.endp dbgRotr8 + +// uint16_t dbgRotl16(uint16_t, nuint_t); +.pubproc dbgRotl16 + mov cl, [esp+2*dword] + mov eax, [esp+dword] + rol ax, cl + ret +.endp dbgRotl16 + +// uint16_t dbgRotr16(uint16_t, nuint_t); +.pubproc dbgRotr16 + mov cl, [esp+2*dword] + mov eax, [esp+dword] + ror ax, cl + ret +.endp dbgRotr16 + +// uint32_t dbgRotl32(uint32_t, nuint_t); +.pubproc dbgRotl32 + mov cl, [esp+2*dword] + mov eax, [esp+dword] + rol eax, cl + ret +.endp dbgRotl32 + +// uint32_t dbgRotr32(uint32_t, nuint_t); +.pubproc dbgRotr32 + mov cl, [esp+2*dword] + mov eax, [esp+dword] + ror eax, cl + ret +.endp dbgRotr32 + +// uint64_t dbgRotl64(uint64_t, nuint_t); +.pubproc dbgRotl64 + mov cl, [esp+3*dword] + mov eax, [esp+dword] + test cl, 0x20 + mov edx, [esp+2*dword] + EHpush ebx + jz 0f + xchg eax, edx +0: + mov ebx, edx + shld edx, eax, cl + shld eax, ebx, cl + EHpop ebx + ret +.endp dbgRotl64 + +// uint64_t dbgRotr64(uint64_t, nuint_t); +.pubproc dbgRotr64 + mov cl, [esp+3*dword] + mov eax, [esp+dword] + test cl, 0x20 + mov edx, [esp+2*dword] + EHpush ebx + jz 0f + xchg eax, edx +0: + mov ebx, eax + shrd eax, edx, cl + shrd edx, ebx, cl + EHpop ebx + ret +.endp dbgRotr64 + +.pubproc dbgDivideU + mov eax, [esp+2*dword] + xor edx, edx + mov ecx, [esp+1*dword] + div dword ptr [esp+3*dword] + mov [ecx], eax + mov [ecx+dword], edx + mov eax, ecx + ret dword +.endp dbgDivideU + +.pubproc dbgDivideS + mov eax, [esp+2*dword] + mov ecx, [esp+1*dword] + cdq + idiv dword ptr [esp+3*dword] + mov [ecx], eax + mov [ecx+dword], edx + mov eax, ecx + ret dword +.endp dbgDivideS + +.pubproc dbg__divdi3 +#if !defined(CONFIG_REGPARM) || __GNUC__ < 3 + mov eax, [esp+1*dword] + mov edx, [esp+2*dword] +# define ARG2_SLOT 3 +#else +# define ARG2_SLOT 1 +#endif + EHpushState + EHpush ebp + EHpush ebx + mov ebp, [esp+(ARG2_SLOT+3)*dword] + mov ebx, [esp+(ARG2_SLOT+2)*dword] + EHpush esi + mov ecx, edx + mov esi, ebp + sar ecx, 31 + sar esi, 31 + xor eax, ecx + xor edx, ecx + xor ebx, esi + xor ebp, esi + sub eax, ecx + sbb edx, ecx + xor ecx, esi + sub ebx, esi + sbb ebp, esi + EHpop esi + jz .LquickDIV + jns .LscaleDIV + // EDX:EAX <= 80000000:00000000, EBP:EBX == 80000000:00000000 + mov eax, edx + xor edx, edx + shr eax, 31 + jmp .LreturnDIV + EHpopState +.pubentry dbg__udivdi3 +#if !defined(CONFIG_REGPARM) || __GNUC__ < 3 + mov eax, [esp+1*dword] + mov edx, [esp+2*dword] +#endif + EHpush ebp + EHpush ebx + xor ecx, ecx + mov ebp, [esp+(ARG2_SLOT+3)*dword] + mov ebx, [esp+(ARG2_SLOT+2)*dword] + test ebp, ebp + jz .LquickDIV + js .LcompareDIV +.LscaleDIV: + EHpush ecx + bsr ecx, ebp + EHpush edx + EHpush eax + inc ecx // number of bits to shift (at most 31) + EHpush ebx, FALSE + shrd ebx, ebp, cl + shrd eax, edx, cl + shr edx, cl + div ebx + EHpop ebx, FALSE + imul ebp, eax // lo(quotient * hi(divisor)) + mov ecx, eax + mul ebx // quotient * lo(divisor) + EHpop ebx, FALSE + add ebp, edx // EBP:EAX = quotient * divisor + sbb ecx, 0 // adjust quotient (if overflow) + xor edx, edx + sub ebx, eax + EHpop ebx, FALSE + mov eax, ecx + sbb ebx, ebp + EHpop ecx + sbb eax, edx // adjust quotient + jmp .LreturnDIV +.LcompareDIV: + sub eax, ebx + mov eax, 0 + sbb edx, ebp + mov edx, eax + sbb eax, -1 + jmp .LreturnDIV +.LquickDIV: + cmp edx, ebx + jb 0f + mov ebp, eax + mov eax, edx + xor edx, edx + div ebx + xchg eax, ebp +0: + div ebx + mov edx, ebp +.LreturnDIV: + xor eax, ecx + EHpop ebx + xor edx, ecx + sub eax, ecx + EHpop ebp + sbb edx, ecx + ret +#undef ARG2_SLOT +.endp dbg__divdi3 + +.pubproc dbg__moddi3 +#if !defined(CONFIG_REGPARM) || __GNUC__ < 3 + mov eax, [esp+1*dword] + mov edx, [esp+2*dword] +# define ARG2_SLOT 3 +#else +# define ARG2_SLOT 1 +#endif + EHpushState + EHpush ebp + EHpush ebx + mov ebp, [esp+(ARG2_SLOT+3)*dword] + mov ebx, [esp+(ARG2_SLOT+2)*dword] + EHpush esi + mov ecx, edx + mov esi, ebp + sar ecx, 31 + sar esi, 31 + xor eax, ecx + xor edx, ecx + xor ebx, esi + xor ebp, esi + sub eax, ecx + sbb edx, ecx + sub ebx, esi + sbb ebp, esi + EHpop esi + jz .LquickMOD + jns .LscaleMOD + // EDX:EAX <= 80000000:00000000, EBP:EBX == 80000000:00000000 + not ebp + and edx, ebp + jmp .LreturnMOD + EHpopState +.pubentry dbg__umoddi3 +#if !defined(CONFIG_REGPARM) || __GNUC__ < 3 + mov eax, [esp+1*dword] + mov edx, [esp+2*dword] +#endif + EHpush ebp + EHpush ebx + xor ecx, ecx + mov ebp, [esp+(ARG2_SLOT+3)*dword] + mov ebx, [esp+(ARG2_SLOT+2)*dword] + test ebp, ebp + jz .LquickMOD + js .LcompareMOD +.LscaleMOD: + EHpush ecx + bsr ecx, ebp + EHpush esi + mov esi, edx + EHpush edi + inc ecx // number of bits to shift (at most 31) + mov edi, eax + EHpush ebx, FALSE + shrd ebx, ebp, cl + shrd eax, edx, cl + shr edx, cl + mov ecx, [esp] + div ebx + mov [esp], ebp + imul ebp, eax // lo(quotient * hi(divisor)) + mul ecx // quotient * lo(divisor) + add edx, ebp // EDX:EAX = quotient * divisor + sbb ebp, ebp // record overflow + sub edi, eax + sbb esi, edx + mov eax, ecx + sbb ebp, 0 // accumulate overflow + EHpop edx + and eax, ebp + and edx, ebp + add eax, edi + EHpop edi + adc edx, esi + EHpop esi + EHpop ecx + jmp .LreturnMOD +.LcompareMOD: + sub eax, ebx + EHpush esi + sbb edx, ebp + sbb esi, esi + and ebx, esi + and ebp, esi + add eax, ebx + EHpop esi + adc edx, ebp + jmp .LreturnMOD +.LquickMOD: + cmp edx, ebx + jb 0f + mov ebp, eax + mov eax, edx + xor edx, edx + div ebx + xchg eax, ebp +0: + div ebx + mov eax, edx + xor edx, edx +.LreturnMOD: + xor eax, ecx + EHpop ebx + xor edx, ecx + sub eax, ecx + EHpop ebp + sbb edx, ecx + ret +#undef ARG2_SLOT +.endp dbg__moddi3