div
package b
/*
#cgo LDFLAGS: -L/Users/jalyzhang/CLionProjects/untitled125 -lh
#include "stdint.h"
void f1(uint64_t *q1, uint64_t *r1, uint64_t a, uint64_t b, uint64_t c);
*/
import "C"
func div(h uint64, l uint64, b uint64) (q uint64, r uint64) {
C.f1((*C.uint64_t)(&q), (*C.uint64_t)(&r), C.uint64_t(h), C.uint64_t(l), C.uint64_t(b))
return q, r
}
package b
/*
#cgo LDFLAGS: -L/Users/jalyzhang/CLionProjects/untitled125 -lh
#include "stdint.h"
void f1(uint64_t *q1, uint64_t *r1, uint64_t a, uint64_t b, uint64_t c);
*/
import "C"
func div(h uint64, l uint64, b uint64) (q uint64, r uint64) {
C.f1((*C.uint64_t)(&q), (*C.uint64_t)(&r), C.uint64_t(h), C.uint64_t(l), C.uint64_t(b))
return q, r
}
import (
"crypto/rand"
"encoding/binary"
"math/bits"
"testing"
)
func rand64() uint64 {
b := make([]byte, 8)
rand.Read(b)
return binary.BigEndian.Uint64(b)
}
func TestDiv(t *testing.T) {
for i := 0; i < 0xfffff; i++ {
h := rand64()
l := rand64()
b := uint64(0)
/*
a * b + <b = c d
a < 10, b < 10
a * b + <b < b0
*/
for b <= h {
b = rand64()
}
q1, r1 := bits.Div64(h, l, b)
q2, r2 := div(h, l, b)
if q1 != q2 || r1 != r2 {
t.Fatal(h, l, b)
}
}
}
#include "stdlib.h"
#include "immintrin.h"
#define udiv(q, r, h, l, y) asm("div %4" : "=a"(q), "=d"(r) : "d"(h), "a"(l), "r"(y))
#include "stdint.h"
#if 1920
static uint32_t udiv64(
uint32_t highDividend,
uint32_t lowDividend,
uint32_t divisor,
uint32_t *remainder
) {
uint32_t q;
udiv(q, *remainder, highDividend, lowDividend, divisor);
return q;
}
#endif
//-mlzcnt
static inline size_t leadingzero(uint64_t x) {
return _lzcnt_u64(x);
}
static inline uint64_t Div(const uint64_t h, const uint64_t l, uint64_t y, uint64_t *r) {
uint64_t ab, cd;
size_t s = leadingzero(y);
y <<= s;
int64_t mask = s; mask--;
ab = h<<s | (l>>-s & ~(uint64_t)(mask>>63));
cd = l << s;
uint32_t e = y >> 32;
uint32_t f = y;
uint32_t a = ab >> 32;
uint32_t b = ab;
uint32_t c = cd >> 32;
uint32_t d = cd;
uint32_t t, q;
uint64_t tc, qf;
const uint64_t e0 = (uint64_t)e << 32;
int i;
uint8_t cf;
i = 0;
if (a == e) {
if (b >= e) {
b -= e;
i++;
}
b -= e;
i++;
a--;
}
q = udiv64(a, b, e, &t);
tc = (uint64_t)t << 32 | c, qf = (uint64_t)q * f;
switch (i) {
case 1:
cf = _addcarry_u64(0, tc, e0, &tc);
break;
case 2:
cf = _addcarry_u64(0, tc, e0, &tc);
cf = _addcarry_u64(0, tc, e0, &tc);
break;
default:
break;
}
if (qf > tc && cf == 0) {
q--;
cf = _addcarry_u64(0, tc, y, &tc);
}
if (qf > tc && cf == 0) {
q--;
tc += y;
}
tc -= qf;
uint32_t p = q;
a = tc >> 32;
b = tc;
c = d;
i = 0;
if (a == e) {
if (b >= e) {
b -= e;
i++;
}
b -= e;
i++;
a--;
}
q = udiv64(a, b, e, &t);
tc = (uint64_t)t << 32 | c, qf = (uint64_t)q * f;
switch (i) {
case 1:
cf = _addcarry_u64(0, tc, e0, &tc);
break;
case 2:
cf = _addcarry_u64(0, tc, e0, &tc);
cf = _addcarry_u64(0, tc, e0, &tc);
break;
default:
break;
}
if (qf > tc && cf == 0) {
q--;
cf = _addcarry_u64(0, tc, y, &tc);
}
if (qf > tc && cf == 0) {
q--;
tc += y;
}
tc -= qf;
return *r = tc >> s, (uint64_t)p << 32 | q;
}
extern "C" void f1(uint64_t *q1, uint64_t *r1, uint64_t a, uint64_t b, uint64_t c)
{
udiv(*q1, *r1, a, b, c);
}
extern "C" void f2(uint64_t *q1, uint64_t *r1, uint64_t a, uint64_t b, uint64_t c){
*q1 = Div(a, b, c, r1);
}
int rmain() {
uint64_t a = 0xafaf3234, b = 0xafaf3235, c = 0xafaf3236;
uint64_t q1, r1, q, r;
void f1(uint64_t *q1, uint64_t *r1, uint64_t a, uint64_t b, uint64_t c);
void f2(uint64_t *q1, uint64_t *r1, uint64_t a, uint64_t b, uint64_t c);
//for (int i = 0; i < 0xfffffff; i++)
{
f1(&q1, &r1,a, b,c);
f1(&q1, &r1,a, b,c);
f1(&q1, &r1,a, b,c);
f1(&q1, &r1,a, b,c);
}
//for (int i = 0; i < 0xfffffff; i++)
{
f2(&q, &r,a, b,c);
f2(&q, &r,a, b,c);
f2(&q, &r,a, b,c);
f2(&q, &r,a, b,c);
}
return q1 == q && r1 == r;
}