i386-specific implementation of the support routines of NLKD.

Signed-Off-By: Jan Beulich <jbeulich@novell.com>

Index: 2.6.14-nlkd/debug/nlkd/dbgIA32.S
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ 2.6.14-nlkd/debug/nlkd/dbgIA32.S	2005-10-05 11:37:24.000000000 +0200
@@ -0,0 +1,721 @@
+/*****************************************************************************
+ *
+ *   File Name:      dbgIA32.S
+ *   Created by:     jbeulich
+ *   %version:       10 %
+ *   %derived_by:    jbeulich %
+ *   %date_modified: Wed Oct 05 01:10:16 2005 %
+ *
+ ****************************************************************************/
+/*****************************************************************************
+ *                                                                           *
+ * Copyright (c) 2001-2005 Novell, Inc. All Rights Reserved.                 *
+ *                                                                           *
+ * This program is free software; you can redistribute it and/or             *
+ * modify it under the terms of version 2 of the GNU General Public License  *
+ * as published by the Free Software Foundation.                             *
+ *                                                                           *
+ * This program is distributed in the hope that it will be useful,           *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of            *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the              *
+ * GNU General Public License for more details.                              *
+ *                                                                           *
+ * You should have received a copy of the GNU General Public License         *
+ * along with this program; if not, contact Novell, Inc.                     *
+ *                                                                           *
+ * To contact Novell about this file by physical or electronic mail,         *
+ * you may find current contact information at www.novell.com.               *
+ *                                                                           *
+ *****************************************************************************/
+/*****************************************************************************
+ *
+ *	 File Description:
+ *
+ ****************************************************************************/
+
+#include <linux/config.h>
+.equiv PROCEDURE_ALIGN, 1
+#include "asm-ia32.h"
+
+//todo .file "dbgutil"
+.text
+
+// int dbgCompareMem(const void*, const void*, size_t);
+.pubproc dbgCompareMem
+	mov	ecx, [esp+3*dword]
+	EHpush	esi
+	xor	eax, eax
+	EHpush	edi
+	xor	edx, edx
+	mov	esi, [esp+3*dword]
+	mov	edi, [esp+4*dword]
+	repe cmpsb
+	seta	al
+	EHpop	edi
+	setb	dl
+	EHpop	esi
+	sub	eax, edx
+	ret
+.endp dbgCompareMem
+
+// int dbgCompareStr(const char*, const char*);
+.pubproc dbgCompareStr
+	xor	eax, eax
+	mov	ecx, [esp+dword]
+	mov	edx, [esp+2*dword]
+0:
+	mov	al, [ecx]
+	inc	ecx
+	test	al, al
+	jz	1f
+	cmp	al, [edx]
+	jne	1f
+	inc	edx
+	jmp	0b
+1:
+	movzx	edx, byte ptr [edx]
+	sub	eax, edx
+	ret
+.endp dbgCompareStr
+
+// void*dbgCopyMem(void*, const void*, size_t);
+// Note: The minimum requirements to this routine are that copying between properly
+//       aligned buffers with a suitable size will be carried out with the maximum
+//       access granularity possible.
+.pubproc dbgCopyMem
+#if !defined(CONFIG_REGPARM) || __GNUC__ < 3
+	mov	ecx, [esp+3*dword]
+#endif
+	EHpush	esi
+	EHpush	edi
+	test	cl, byte
+#if !defined(CONFIG_REGPARM) || __GNUC__ < 3
+	mov	edi, [esp+3*dword]
+	mov	esi, [esp+4*dword]
+	mov	eax, edi
+#else
+	mov	edi, eax
+	mov	esi, edx
+#endif
+	jz	0f
+	rep movsb
+	EHpushState
+	EHpop	edi
+	EHpop	esi
+	ret
+0:
+	EHpopState
+	shr	ecx, 2
+	jnc	1f
+	adc	ecx, ecx
+	.byte	0x66
+1:
+	rep movsd
+	EHpop	edi
+	EHpop	esi
+	ret
+.endp dbgCopyMem
+
+// char*dbgCopyStr(char*, const char*);
+.pubproc dbgCopyStr
+	mov	ecx, [esp+2*dword]
+	mov	edx, [esp+dword]
+	EHpush	ecx
+0:
+	mov	al, [ecx]
+	inc	ecx
+	mov	[edx], al
+	inc	edx
+	test	al, al
+	jnz	0b
+	EHpop	eax
+	ret
+.endp dbgCopyStr
+
+// char*dbgCopyStrP(char*, const char*);
+.pubproc dbgCopyStrP
+	mov	ecx, [esp+2*dword]
+	mov	edx, [esp+dword]
+0:
+	mov	al, [ecx]
+	inc	ecx
+	mov	[edx], al
+	inc	edx
+	test	al, al
+	jnz	0b
+	lea	eax, [edx-byte]
+	ret
+.endp dbgCopyStrP
+
+// unsigned dbgCountBits(uint64_t);
+.pubproc dbgCountBits
+	mov	ecx, [esp+dword]
+	mov	edx, [esp+2*dword]
+	xor	eax, eax
+0:
+	shr	ecx, 1
+	adc	eax, 0
+	shr	edx, 1
+	adc	eax, 0
+	test	ecx, ecx
+	jnz	0b
+	test	edx, edx
+	jnz	0b
+	ret
+.endp dbgCountBits
+
+// size_t dbgCountStr(const char*);
+.pubproc dbgCountStr
+	EHpush	edi
+	or	ecx, not 0
+	xor	eax, eax
+	mov	edi, [esp+2*dword]
+	repne scasb
+	add	ecx, 2
+	EHpop	edi
+	sub	eax, ecx
+	ret
+.endp dbgCountStr
+
+// size_t dbgCountWstr(const wchar_t*);
+.pubproc dbgCountWstr
+	EHpush	edi
+	or	ecx, not 0
+	xor	eax, eax
+	mov	edi, [esp+2*dword]
+	repne scasw
+	add	ecx, 2
+	EHpop	edi
+	sub	eax, ecx
+	ret
+.endp dbgCountWstr
+
+// char*dbgFindChar(const char*, char);
+.pubproc dbgFindChar
+	mov	eax, [esp+dword]
+	movzx	ecx, byte ptr [esp+2*dword]
+0:
+	movzx	edx, byte ptr [eax]
+	cmp	ecx, edx
+	je	1f
+	inc	eax
+	test	edx, edx
+	jnz	0b
+	xor	eax, eax
+0:
+	ret
+.endp dbgFindChar
+
+// wchar_t*dbgFindWchar(const wchar_t*, wchar_t);
+.pubproc dbgFindWchar
+	mov	eax, [esp+dword]
+	movzx	ecx, word ptr [esp+2*dword]
+0:
+	movzx	edx, word ptr [eax]
+	cmp	ecx, edx
+	je	1f
+	add	eax, word
+	test	edx, edx
+	jnz	0b
+	xor	eax, eax
+1:
+	ret
+.endp dbgFindWchar
+
+// char*dbgWstr2Str(char*, const wchar_t*);
+.pubproc dbgWstr2Str
+	EHpush	ebx
+	mov	bl, '?'
+	mov	ecx, [esp+3*dword]
+	mov	edx, [esp+2*dword]
+	EHpush	ecx
+0:
+	movzx	eax, word ptr [ecx]
+	add	ecx, word
+	cmp	eax, 0x7f
+	cmova	eax, ebx
+	mov	[edx], al
+	inc	edx
+	test	eax, eax
+	jnz	0b
+	EHpop	eax
+	EHpop	ebx
+	ret
+.endp dbgWstr2Str
+
+// int dbgLower(int);
+.pubproc dbgLower
+	mov	eax, [esp+dword]
+	cmp	eax, 'A'
+	jb	0f
+	cmp	eax, 'Z'
+	ja	0f
+	add	al, 'a' - 'A'
+0:
+	ret
+.endp dbgLower
+
+// int dbgUpper(int);
+.pubproc dbgUpper
+	mov	eax, [esp+dword]
+	cmp	eax, 'a'
+	jb	0f
+	cmp	eax, 'z'
+	ja	0f
+	add	al, 'A' - 'a'
+0:
+	ret
+.endp dbgUpper
+
+// void*dbgFillMem(void*, int, size_t);
+.pubproc dbgFillMem
+#if !defined(CONFIG_REGPARM) || __GNUC__ < 3
+	mov	ecx, [esp+3*dword]
+	mov	eax, [esp+2*dword]
+#else
+	xchg	eax, edx
+#endif
+	jmp	0f
+// void*dbgZeroMem(void*, size_t);
+.pubentry dbgZeroMem
+#if !defined(CONFIG_REGPARM) || __GNUC__ < 3
+	mov	ecx, [esp+2*dword]
+#else
+	mov	ecx, edx
+	mov	edx, eax
+#endif
+	xor	eax, eax
+0:
+	EHpush	edi
+#if !defined(CONFIG_REGPARM) || __GNUC__ < 3
+	mov	edi, [esp+2*dword]
+	EHpush	edi, FALSE
+#else
+	mov	edi, edx
+#endif
+	rep stosb
+#if !defined(CONFIG_REGPARM) || __GNUC__ < 3
+	EHpop	eax
+#else
+	mov	eax, edx
+#endif
+	EHpop	edi
+	ret
+.endp dbgFillMem
+
+// uint8_t*dbgFindInt8(const uint8_t*, uint8_t, size_t);
+// void*dbgFindByte(const void*, int, size_t);
+.pubproc dbgFindInt8
+.pubentry dbgFindByte
+	mov	ecx, [esp+3*dword]
+	EHpush	edi
+	jecxz	0f
+	mov	edi, [esp+2*dword]
+	mov	al, [esp+3*dword]
+	repne scasb
+	lea	eax, [edi-byte]
+	je	1f
+0:
+	xor	eax, eax
+1:
+	EHpop	edi
+	ret
+.endp dbgFindInt8
+
+// uint16_t*dbgFindInt16(const uint16_t*, uint16_t, size_t);
+.pubproc dbgFindInt16
+	mov	ecx, [esp+3*dword]
+	EHpush	edi
+	jecxz	0f
+	mov	edi, [esp+2*dword]
+	mov	eax, [esp+3*dword]
+	repne scasw
+	lea	eax, [edi-word]
+	je	1f
+0:
+	xor	eax, eax
+1:
+	EHpop	edi
+	ret
+.endp dbgFindInt16
+
+// uint32_t*dbgFindInt32(const uint32_t*, uint32_t, size_t);
+// nuint_t*dbgFindIntN(const nuint_t*, nuint_t, size_t);
+// void**dbgFindPtr(const void**, const void*, size_t);
+.pubproc dbgFindInt32
+.pubentry dbgFindIntN
+.pubentry dbgFindPtr
+	mov	ecx, [esp+3*dword]
+	EHpush	edi
+	jecxz	0f
+	mov	edi, [esp+2*dword]
+	mov	eax, [esp+3*dword]
+	repne scasd
+	lea	eax, [edi-dword]
+	je	1f
+0:
+	xor	eax, eax
+1:
+	EHpop	edi
+	ret
+.endp dbgFindInt32
+
+// uint64_t dbgMultiply(uint64_t, uint64_t, uint64_t*overflow);
+.pubproc dbgMultiply
+	mov	eax, [esp+2*dword]
+	EHpush	edi
+	mov	edi, [esp+6*dword]
+	mul	dword ptr [esp+5*dword]
+	mov	[edi], eax
+	mov	eax, [esp+2*dword]
+	mov	[edi+dword], edx
+	mul	dword ptr [esp+5*dword]
+	mov	ecx, eax
+	add	[edi], edx
+	mov	eax, [esp+3*dword]
+	adc	dword ptr [edi+dword], 0
+	mul	dword ptr [esp+4*dword]
+	add	ecx, eax
+	adc	[edi], edx
+	mov	eax, [esp+2*dword]
+	adc	dword ptr [edi+dword], 0
+	mul	dword ptr [esp+4*dword]
+	add	edx, ecx
+	adc	dword ptr [edi], 0
+	add	dword ptr [edi+dword], 0
+	EHpop	edi
+	ret
+.endp dbgMultiply
+
+// nint_t dbgLog2(uint64_t)
+.pubproc dbgLog2
+	bsr	eax, [esp+2*dword]
+	lea	eax, [eax+32]
+	jnz	0f
+	bsr	eax, [esp+1*dword]
+	jnz	0f
+	or	eax, not 0
+0:
+	ret
+.endp dbgLog2
+
+// uint8_t dbgRotl8(uint8_t, nuint_t);
+.pubproc dbgRotl8
+	mov	cl, [esp+2*dword]
+	mov	al, [esp+dword]
+	rol	al, cl
+	ret
+.endp dbgRotl8
+
+// uint8_t dbgRotr8(uint8_t, nuint_t);
+.pubproc dbgRotr8
+	mov	cl, [esp+2*dword]
+	mov	al, [esp+dword]
+	ror	al, cl
+	ret
+.endp dbgRotr8
+
+// uint16_t dbgRotl16(uint16_t, nuint_t);
+.pubproc dbgRotl16
+	mov	cl, [esp+2*dword]
+	mov	eax, [esp+dword]
+	rol	ax, cl
+	ret
+.endp dbgRotl16
+
+// uint16_t dbgRotr16(uint16_t, nuint_t);
+.pubproc dbgRotr16
+	mov	cl, [esp+2*dword]
+	mov	eax, [esp+dword]
+	ror	ax, cl
+	ret
+.endp dbgRotr16
+
+// uint32_t dbgRotl32(uint32_t, nuint_t);
+.pubproc dbgRotl32
+	mov	cl, [esp+2*dword]
+	mov	eax, [esp+dword]
+	rol	eax, cl
+	ret
+.endp dbgRotl32
+
+// uint32_t dbgRotr32(uint32_t, nuint_t);
+.pubproc dbgRotr32
+	mov	cl, [esp+2*dword]
+	mov	eax, [esp+dword]
+	ror	eax, cl
+	ret
+.endp dbgRotr32
+
+// uint64_t dbgRotl64(uint64_t, nuint_t);
+.pubproc dbgRotl64
+	mov	cl, [esp+3*dword]
+	mov	eax, [esp+dword]
+	test	cl, 0x20
+	mov	edx, [esp+2*dword]
+	EHpush	ebx
+	jz	0f
+	xchg	eax, edx
+0:
+	mov	ebx, edx
+	shld	edx, eax, cl
+	shld	eax, ebx, cl
+	EHpop	ebx
+	ret
+.endp dbgRotl64
+
+// uint64_t dbgRotr64(uint64_t, nuint_t);
+.pubproc dbgRotr64
+	mov	cl, [esp+3*dword]
+	mov	eax, [esp+dword]
+	test	cl, 0x20
+	mov	edx, [esp+2*dword]
+	EHpush	ebx
+	jz	0f
+	xchg	eax, edx
+0:
+	mov	ebx, eax
+	shrd	eax, edx, cl
+	shrd	edx, ebx, cl
+	EHpop	ebx
+	ret
+.endp dbgRotr64
+
+.pubproc dbgDivideU
+	mov	eax, [esp+2*dword]
+	xor	edx, edx
+	mov	ecx, [esp+1*dword]
+	div	dword ptr [esp+3*dword]
+	mov	[ecx], eax
+	mov	[ecx+dword], edx
+	mov	eax, ecx
+	ret	dword
+.endp dbgDivideU
+
+.pubproc dbgDivideS
+	mov	eax, [esp+2*dword]
+	mov	ecx, [esp+1*dword]
+	cdq
+	idiv	dword ptr [esp+3*dword]
+	mov	[ecx], eax
+	mov	[ecx+dword], edx
+	mov	eax, ecx
+	ret	dword
+.endp dbgDivideS
+
+.pubproc dbg__divdi3
+#if !defined(CONFIG_REGPARM) || __GNUC__ < 3
+	mov	eax, [esp+1*dword]
+	mov	edx, [esp+2*dword]
+# define ARG2_SLOT 3
+#else
+# define ARG2_SLOT 1
+#endif
+	EHpushState
+	EHpush	ebp
+	EHpush	ebx
+	mov	ebp, [esp+(ARG2_SLOT+3)*dword]
+	mov	ebx, [esp+(ARG2_SLOT+2)*dword]
+	EHpush	esi
+	mov	ecx, edx
+	mov	esi, ebp
+	sar	ecx, 31
+	sar	esi, 31
+	xor	eax, ecx
+	xor	edx, ecx
+	xor	ebx, esi
+	xor	ebp, esi
+	sub	eax, ecx
+	sbb	edx, ecx
+	xor	ecx, esi
+	sub	ebx, esi
+	sbb	ebp, esi
+	EHpop	esi
+	jz	.LquickDIV
+	jns	.LscaleDIV
+	// EDX:EAX <= 80000000:00000000, EBP:EBX == 80000000:00000000
+	mov	eax, edx
+	xor	edx, edx
+	shr	eax, 31
+	jmp	.LreturnDIV
+	EHpopState
+.pubentry dbg__udivdi3
+#if !defined(CONFIG_REGPARM) || __GNUC__ < 3
+	mov	eax, [esp+1*dword]
+	mov	edx, [esp+2*dword]
+#endif
+	EHpush	ebp
+	EHpush	ebx
+	xor	ecx, ecx
+	mov	ebp, [esp+(ARG2_SLOT+3)*dword]
+	mov	ebx, [esp+(ARG2_SLOT+2)*dword]
+	test	ebp, ebp
+	jz	.LquickDIV
+	js	.LcompareDIV
+.LscaleDIV:
+	EHpush	ecx
+	bsr	ecx, ebp
+	EHpush	edx
+	EHpush	eax
+	inc	ecx				// number of bits to shift (at most 31)
+	EHpush	ebx, FALSE
+	shrd	ebx, ebp, cl
+	shrd	eax, edx, cl
+	shr	edx, cl
+	div	ebx
+	EHpop	ebx, FALSE
+	imul	ebp, eax			// lo(quotient * hi(divisor))
+	mov	ecx, eax
+	mul	ebx				// quotient * lo(divisor)
+	EHpop	ebx, FALSE
+	add	ebp, edx			// EBP:EAX = quotient * divisor
+	sbb	ecx, 0				// adjust quotient (if overflow)
+	xor	edx, edx
+	sub	ebx, eax
+	EHpop	ebx, FALSE
+	mov	eax, ecx
+	sbb	ebx, ebp
+	EHpop	ecx
+	sbb	eax, edx			// adjust quotient
+	jmp	.LreturnDIV
+.LcompareDIV:
+	sub	eax, ebx
+	mov	eax, 0
+	sbb	edx, ebp
+	mov	edx, eax
+	sbb	eax, -1
+	jmp	.LreturnDIV
+.LquickDIV:
+	cmp	edx, ebx
+	jb	0f
+	mov	ebp, eax
+	mov	eax, edx
+	xor	edx, edx
+	div	ebx
+	xchg	eax, ebp
+0:
+	div	ebx
+	mov	edx, ebp
+.LreturnDIV:
+	xor	eax, ecx
+	EHpop	ebx
+	xor	edx, ecx
+	sub	eax, ecx
+	EHpop	ebp
+	sbb	edx, ecx
+	ret
+#undef ARG2_SLOT
+.endp dbg__divdi3
+
+.pubproc dbg__moddi3
+#if !defined(CONFIG_REGPARM) || __GNUC__ < 3
+	mov	eax, [esp+1*dword]
+	mov	edx, [esp+2*dword]
+# define ARG2_SLOT 3
+#else
+# define ARG2_SLOT 1
+#endif
+	EHpushState
+	EHpush	ebp
+	EHpush	ebx
+	mov	ebp, [esp+(ARG2_SLOT+3)*dword]
+	mov	ebx, [esp+(ARG2_SLOT+2)*dword]
+	EHpush	esi
+	mov	ecx, edx
+	mov	esi, ebp
+	sar	ecx, 31
+	sar	esi, 31
+	xor	eax, ecx
+	xor	edx, ecx
+	xor	ebx, esi
+	xor	ebp, esi
+	sub	eax, ecx
+	sbb	edx, ecx
+	sub	ebx, esi
+	sbb	ebp, esi
+	EHpop	esi
+	jz	.LquickMOD
+	jns	.LscaleMOD
+	// EDX:EAX <= 80000000:00000000, EBP:EBX == 80000000:00000000
+	not	ebp
+	and	edx, ebp
+	jmp	.LreturnMOD
+	EHpopState
+.pubentry dbg__umoddi3
+#if !defined(CONFIG_REGPARM) || __GNUC__ < 3
+	mov	eax, [esp+1*dword]
+	mov	edx, [esp+2*dword]
+#endif
+	EHpush	ebp
+	EHpush	ebx
+	xor	ecx, ecx
+	mov	ebp, [esp+(ARG2_SLOT+3)*dword]
+	mov	ebx, [esp+(ARG2_SLOT+2)*dword]
+	test	ebp, ebp
+	jz	.LquickMOD
+	js	.LcompareMOD
+.LscaleMOD:
+	EHpush	ecx
+	bsr	ecx, ebp
+	EHpush	esi
+	mov	esi, edx
+	EHpush	edi
+	inc	ecx				// number of bits to shift (at most 31)
+	mov	edi, eax
+	EHpush	ebx, FALSE
+	shrd	ebx, ebp, cl
+	shrd	eax, edx, cl
+	shr	edx, cl
+	mov	ecx, [esp]
+	div	ebx
+	mov	[esp], ebp
+	imul	ebp, eax			// lo(quotient * hi(divisor))
+	mul	ecx				// quotient * lo(divisor)
+	add	edx, ebp			// EDX:EAX = quotient * divisor
+	sbb	ebp, ebp			// record overflow
+	sub	edi, eax
+	sbb	esi, edx
+	mov	eax, ecx
+	sbb	ebp, 0				// accumulate overflow
+	EHpop	edx
+	and	eax, ebp
+	and	edx, ebp
+	add	eax, edi
+	EHpop	edi
+	adc	edx, esi
+	EHpop	esi
+	EHpop	ecx
+	jmp	.LreturnMOD
+.LcompareMOD:
+	sub	eax, ebx
+	EHpush	esi
+	sbb	edx, ebp
+	sbb	esi, esi
+	and	ebx, esi
+	and	ebp, esi
+	add	eax, ebx
+	EHpop	esi
+	adc	edx, ebp
+	jmp	.LreturnMOD
+.LquickMOD:
+	cmp	edx, ebx
+	jb	0f
+	mov	ebp, eax
+	mov	eax, edx
+	xor	edx, edx
+	div	ebx
+	xchg	eax, ebp
+0:
+	div	ebx
+	mov	eax, edx
+	xor	edx, edx
+.LreturnMOD:
+	xor	eax, ecx
+	EHpop	ebx
+	xor	edx, ecx
+	sub	eax, ecx
+	EHpop	ebp
+	sbb	edx, ecx
+	ret
+#undef ARG2_SLOT
+.endp dbg__moddi3