You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@milagro.apache.org by sa...@apache.org on 2016/09/01 07:25:38 UTC

[10/12] incubator-milagro-crypto git commit: MILAGRO-14.Updating package name with apache git

http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto/blob/85fabaa6/go/amcl-go/BIG.go
----------------------------------------------------------------------
diff --git a/go/amcl-go/BIG.go b/go/amcl-go/BIG.go
new file mode 100644
index 0000000..a1c5184
--- /dev/null
+++ b/go/amcl-go/BIG.go
@@ -0,0 +1,956 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+*/
+
+/* AMCL BIG number class */
+
+package amcl
+
+import "strconv"
+
+//import "fmt"
+
+type BIG struct {
+	w [NLEN]int64
+}
+
+func NewBIG() *BIG {
+	b := new(BIG)
+	for i := 0; i < NLEN; i++ {
+		b.w[i] = 0
+	}
+	return b
+}
+
+func NewBIGint(x int) *BIG {
+	b := new(BIG)
+	b.w[0] = int64(x)
+	for i := 1; i < NLEN; i++ {
+		b.w[i] = 0
+	}
+	return b
+}
+
+func NewBIGcopy(x *BIG) *BIG {
+	b := new(BIG)
+	for i := 0; i < NLEN; i++ {
+		b.w[i] = x.w[i]
+	}
+	return b
+}
+
+func NewBIGdcopy(x *DBIG) *BIG {
+	b := new(BIG)
+	for i := 0; i < NLEN; i++ {
+		b.w[i] = x.w[i]
+	}
+	return b
+}
+
+func NewBIGints(x [NLEN]int64) *BIG {
+	b := new(BIG)
+	for i := 0; i < NLEN; i++ {
+		b.w[i] = x[i]
+	}
+	return b
+}
+
+func (r *BIG) get(i int) int64 {
+	return r.w[i]
+}
+
+func (r *BIG) set(i int, x int64) {
+	r.w[i] = x
+}
+
+func (r *BIG) xortop(x int64) {
+	r.w[NLEN-1] ^= x
+}
+
+func (r *BIG) ortop(x int64) {
+	r.w[NLEN-1] |= x
+}
+
+/* test for zero */
+func (r *BIG) iszilch() bool {
+	for i := 0; i < NLEN; i++ {
+		if r.w[i] != 0 {
+			return false
+		}
+	}
+	return true
+}
+
+/* set to zero */
+func (r *BIG) zero() {
+	for i := 0; i < NLEN; i++ {
+		r.w[i] = 0
+	}
+}
+
+/* Test for equal to one */
+func (r *BIG) isunity() bool {
+	for i := 1; i < NLEN; i++ {
+		if r.w[i] != 0 {
+			return false
+		}
+	}
+	if r.w[0] != 1 {
+		return false
+	}
+	return true
+}
+
+/* set to one */
+func (r *BIG) one() {
+	r.w[0] = 1
+	for i := 1; i < NLEN; i++ {
+		r.w[i] = 0
+	}
+}
+
+/* Copy from another BIG */
+func (r *BIG) copy(x *BIG) {
+	for i := 0; i < NLEN; i++ {
+		r.w[i] = x.w[i]
+	}
+}
+
+/* Copy from another DBIG */
+func (r *BIG) dcopy(x *DBIG) {
+	for i := 0; i < NLEN; i++ {
+		r.w[i] = x.w[i]
+	}
+}
+
+/* calculate Field Excess */
+func EXCESS(a *BIG) int64 {
+	return ((a.w[NLEN-1] & OMASK) >> (MODBITS % BASEBITS))
+}
+
+/* normalise BIG - force all digits < 2^BASEBITS */
+func (r *BIG) norm() int64 {
+	var carry int64 = 0
+	for i := 0; i < NLEN-1; i++ {
+		d := r.w[i] + carry
+		r.w[i] = d & MASK
+		carry = d >> BASEBITS
+	}
+	r.w[NLEN-1] = (r.w[NLEN-1] + carry)
+
+	return (r.w[NLEN-1] >> ((8 * MODBYTES) % BASEBITS))
+}
+
+/* Conditional swap of two bigs depending on d using XOR - no branches */
+func (r *BIG) cswap(b *BIG, d int32) {
+	var c = int64(d)
+	c = ^(c - 1)
+
+	for i := 0; i < NLEN; i++ {
+		t := c & (r.w[i] ^ b.w[i])
+		r.w[i] ^= t
+		b.w[i] ^= t
+	}
+}
+
+func (r *BIG) cmove(g *BIG, d int32) {
+	var b = int64(-d)
+
+	for i := 0; i < NLEN; i++ {
+		r.w[i] ^= (r.w[i] ^ g.w[i]) & b
+	}
+}
+
+/* Shift right by less than a word */
+func (r *BIG) fshr(k uint) int64 {
+	w := r.w[0] & ((int64(1) << k) - 1) /* shifted out part */
+	for i := 0; i < NLEN-1; i++ {
+		r.w[i] = (r.w[i] >> k) | ((r.w[i+1] << (BASEBITS - k)) & MASK)
+	}
+	r.w[NLEN-1] = r.w[NLEN-1] >> k
+	return w
+}
+
+/* general shift right */
+func (r *BIG) shr(k uint) {
+	n := (k % BASEBITS)
+	m := int(k / BASEBITS)
+	for i := 0; i < NLEN-m-1; i++ {
+		r.w[i] = (r.w[m+i] >> n) | ((r.w[m+i+1] << (BASEBITS - n)) & MASK)
+	}
+	r.w[NLEN-m-1] = r.w[NLEN-1] >> n
+	for i := NLEN - m; i < NLEN; i++ {
+		r.w[i] = 0
+	}
+}
+
+/* Shift right by less than a word */
+func (r *BIG) fshl(k uint) int64 {
+	r.w[NLEN-1] = (r.w[NLEN-1] << k) | (r.w[NLEN-2] >> (BASEBITS - k))
+	for i := NLEN - 2; i > 0; i-- {
+		r.w[i] = ((r.w[i] << k) & MASK) | (r.w[i-1] >> (BASEBITS - k))
+	}
+	r.w[0] = (r.w[0] << k) & MASK
+	return (r.w[NLEN-1] >> ((8 * MODBYTES) % BASEBITS)) /* return excess - only used in ff.c */
+}
+
+/* general shift left */
+func (r *BIG) shl(k uint) {
+	n := k % BASEBITS
+	m := int(k / BASEBITS)
+
+	r.w[NLEN-1] = (r.w[NLEN-1-m] << n) | (r.w[NLEN-m-2] >> (BASEBITS - n))
+	for i := NLEN - 2; i > m; i-- {
+		r.w[i] = ((r.w[i-m] << n) & MASK) | (r.w[i-m-1] >> (BASEBITS - n))
+	}
+	r.w[m] = (r.w[0] << n) & MASK
+	for i := 0; i < m; i++ {
+		r.w[i] = 0
+	}
+}
+
+/* return number of bits */
+func (r *BIG) nbits() int {
+	k := NLEN - 1
+	r.norm()
+	for k >= 0 && r.w[k] == 0 {
+		k--
+	}
+	if k < 0 {
+		return 0
+	}
+	bts := int(BASEBITS) * k
+	c := r.w[k]
+	for c != 0 {
+		c /= 2
+		bts++
+	}
+	return bts
+}
+
+/* Convert to Hex String */
+func (r *BIG) toString() string {
+	s := ""
+	len := r.nbits()
+
+	if len%4 == 0 {
+		len /= 4
+	} else {
+		len /= 4
+		len++
+
+	}
+	MB := int(MODBYTES * 2)
+	if len < MB {
+		len = MB
+	}
+
+	for i := len - 1; i >= 0; i-- {
+		b := NewBIGcopy(r)
+
+		b.shr(uint(i * 4))
+		s += strconv.FormatInt(b.w[0]&15, 16)
+	}
+	return s
+}
+
+func (r *BIG) add(x *BIG) {
+	for i := 0; i < NLEN; i++ {
+		r.w[i] = r.w[i] + x.w[i]
+	}
+}
+
+/* return this+x */
+func (r *BIG) plus(x *BIG) *BIG {
+	s := new(BIG)
+	for i := 0; i < NLEN; i++ {
+		s.w[i] = r.w[i] + x.w[i]
+	}
+	return s
+}
+
+/* this+=x, where x is int */
+func (r *BIG) inc(x int) {
+	r.norm()
+	r.w[0] += int64(x)
+}
+
+/* return this-x */
+func (r *BIG) minus(x *BIG) *BIG {
+	d := new(BIG)
+	for i := 0; i < NLEN; i++ {
+		d.w[i] = r.w[i] - x.w[i]
+	}
+	return d
+}
+
+/* this-=x */
+func (r *BIG) sub(x *BIG) {
+	for i := 0; i < NLEN; i++ {
+		r.w[i] = r.w[i] - x.w[i]
+	}
+}
+
+/* reverse subtract this=x-this */
+func (r *BIG) rsub(x *BIG) {
+	for i := 0; i < NLEN; i++ {
+		r.w[i] = x.w[i] - r.w[i]
+	}
+}
+
+/* this-=x, where x is int */
+func (r *BIG) dec(x int) {
+	r.norm()
+	r.w[0] -= int64(x)
+}
+
+/* this*=x, where x is small int<NEXCESS */
+func (r *BIG) imul(c int) {
+	for i := 0; i < NLEN; i++ {
+		r.w[i] *= int64(c)
+	}
+}
+
+/* convert this BIG to byte array */
+func (r *BIG) tobytearray(b []byte, n int) {
+	r.norm()
+	c := NewBIGcopy(r)
+
+	for i := int(MODBYTES) - 1; i >= 0; i-- {
+		b[i+n] = byte(c.w[0])
+		c.fshr(8)
+	}
+}
+
+/* convert from byte array to BIG */
+func frombytearray(b []byte, n int) *BIG {
+	m := NewBIG()
+	for i := 0; i < int(MODBYTES); i++ {
+		m.fshl(8)
+		m.w[0] += int64(b[i+n] & 0xff)
+	}
+	return m
+}
+
+func (r *BIG) toBytes(b []byte) {
+	r.tobytearray(b, 0)
+}
+
+func fromBytes(b []byte) *BIG {
+	return frombytearray(b, 0)
+}
+
+/* set this[i]+=x*y+c, and return high part */
+
+func (r *BIG) muladd(a int64, b int64, c int64, i int) int64 {
+	x0 := a & HMASK
+	x1 := (a >> HBITS)
+	y0 := b & HMASK
+	y1 := (b >> HBITS)
+	bot := x0 * y0
+	top := x1 * y1
+	mid := x0*y1 + x1*y0
+	x0 = mid & HMASK
+	x1 = (mid >> HBITS)
+	bot += x0 << HBITS
+	bot += c
+	bot += r.w[i]
+	top += x1
+	carry := bot >> BASEBITS
+	bot &= MASK
+	top += carry
+	r.w[i] = bot
+	return top
+}
+
+/* this*=x, where x is >NEXCESS */
+func (r *BIG) pmul(c int) int64 {
+	var carry int64 = 0
+	r.norm()
+	for i := 0; i < NLEN; i++ {
+		ak := r.w[i]
+		r.w[i] = 0
+		carry = r.muladd(ak, int64(c), carry, i)
+	}
+	return carry
+}
+
+/* this*=c and catch overflow in DBIG */
+func (r *BIG) pxmul(c int) *DBIG {
+	m := NewDBIG()
+	var carry int64 = 0
+	for j := 0; j < NLEN; j++ {
+		carry = m.muladd(r.w[j], int64(c), carry, j)
+	}
+	m.w[NLEN] = carry
+	return m
+}
+
+/* divide by 3 */
+func (r *BIG) div3() int {
+	var carry int64 = 0
+	r.norm()
+	base := (int64(1) << BASEBITS)
+	for i := NLEN - 1; i >= 0; i-- {
+		ak := (carry*base + r.w[i])
+		r.w[i] = ak / 3
+		carry = ak % 3
+	}
+	return int(carry)
+}
+
+/* return a*b where result fits in a BIG */
+func smul(a *BIG, b *BIG) *BIG {
+	var carry int64
+	c := NewBIG()
+	for i := 0; i < NLEN; i++ {
+		carry = 0
+		for j := 0; j < NLEN; j++ {
+			if i+j < NLEN {
+				carry = c.muladd(a.w[i], b.w[j], carry, i+j)
+			}
+		}
+	}
+	return c
+}
+
+/* Compare a and b, return 0 if a==b, -1 if a<b, +1 if a>b. Inputs must be normalised */
+func comp(a *BIG, b *BIG) int {
+	for i := NLEN - 1; i >= 0; i-- {
+		if a.w[i] == b.w[i] {
+			continue
+		}
+		if a.w[i] > b.w[i] {
+			return 1
+		} else {
+			return -1
+		}
+	}
+	return 0
+}
+
+/* return parity */
+func (r *BIG) parity() int {
+	return int(r.w[0] % 2)
+}
+
+/* return n-th bit */
+func (r *BIG) bit(n int) int {
+	if (r.w[n/int(BASEBITS)] & (int64(1) << (uint(n) % BASEBITS))) > 0 {
+		return 1
+	}
+	return 0
+}
+
+/* return n last bits */
+func (r *BIG) lastbits(n int) int {
+	msk := (1 << uint(n)) - 1
+	r.norm()
+	return (int(r.w[0])) & msk
+}
+
+/* set x = x mod 2^m */
+func (r *BIG) mod2m(m uint) {
+	wd := int(m / BASEBITS)
+	bt := m % BASEBITS
+	msk := (int64(1) << bt) - 1
+	r.w[wd] &= msk
+	for i := wd + 1; i < NLEN; i++ {
+		r.w[i] = 0
+	}
+}
+
+/* Arazi and Qi inversion mod 256 */
+func invmod256(a int) int {
+	var t1 int = 0
+	c := (a >> 1) & 1
+	t1 += c
+	t1 &= 1
+	t1 = 2 - t1
+	t1 <<= 1
+	U := t1 + 1
+
+	// i=2
+	b := a & 3
+	t1 = U * b
+	t1 >>= 2
+	c = (a >> 2) & 3
+	t2 := (U * c) & 3
+	t1 += t2
+	t1 *= U
+	t1 &= 3
+	t1 = 4 - t1
+	t1 <<= 2
+	U += t1
+
+	// i=4
+	b = a & 15
+	t1 = U * b
+	t1 >>= 4
+	c = (a >> 4) & 15
+	t2 = (U * c) & 15
+	t1 += t2
+	t1 *= U
+	t1 &= 15
+	t1 = 16 - t1
+	t1 <<= 4
+	U += t1
+
+	return U
+}
+
+/* a=1/a mod 2^256. This is very fast! */
+func (r *BIG) invmod2m() {
+	U := NewBIG()
+	b := NewBIG()
+	c := NewBIG()
+
+	U.inc(invmod256(r.lastbits(8)))
+
+	for i := 8; i < 256; i <<= 1 {
+		ui := uint(i)
+		b.copy(r)
+		b.mod2m(ui)
+		t1 := smul(U, b)
+		t1.shr(ui)
+		c.copy(r)
+		c.shr(ui)
+		c.mod2m(ui)
+
+		t2 := smul(U, c)
+		t2.mod2m(ui)
+		t1.add(t2)
+		b = smul(t1, U)
+		t1.copy(b)
+		t1.mod2m(ui)
+
+		t2.one()
+		t2.shl(ui)
+		t1.rsub(t2)
+		t1.norm()
+		t1.shl(ui)
+		U.add(t1)
+	}
+	r.copy(U)
+}
+
+/* reduce this mod m */
+func (r *BIG) mod(m *BIG) {
+	r.norm()
+	if comp(r, m) < 0 {
+		return
+	}
+
+	m.fshl(1)
+	k := 1
+
+	for comp(r, m) >= 0 {
+		m.fshl(1)
+		k++
+	}
+
+	for k > 0 {
+		m.fshr(1)
+		if comp(r, m) >= 0 {
+			r.sub(m)
+			r.norm()
+		}
+		k--
+	}
+}
+
+/* divide this by m */
+func (r *BIG) div(m *BIG) {
+	k := 0
+	r.norm()
+	e := NewBIGint(1)
+	b := NewBIGcopy(r)
+	r.zero()
+
+	for comp(b, m) >= 0 {
+		e.fshl(1)
+		m.fshl(1)
+		k++
+	}
+
+	for k > 0 {
+		m.fshr(1)
+		e.fshr(1)
+		if comp(b, m) >= 0 {
+			r.add(e)
+			r.norm()
+			b.sub(m)
+			b.norm()
+		}
+		k--
+	}
+}
+
+/* get 8*MODBYTES size random number */
+func random(rng *RAND) *BIG {
+	m := NewBIG()
+	var j int = 0
+	var r byte = 0
+	/* generate random BIG */
+	for i := 0; i < 8*int(MODBYTES); i++ {
+		if j == 0 {
+			r = rng.GetByte()
+		} else {
+			r >>= 1
+		}
+
+		b := int64(r & 1)
+		m.shl(1)
+		m.w[0] += b // m.inc(b)
+		j++
+		j &= 7
+	}
+	return m
+}
+
+/* Create random BIG in portable way, one bit at a time */
+func randomnum(q *BIG, rng *RAND) *BIG {
+	d := NewDBIG()
+	var j int = 0
+	var r byte = 0
+	for i := 0; i < 2*int(MODBITS); i++ {
+		if j == 0 {
+			r = rng.GetByte()
+		} else {
+			r >>= 1
+		}
+
+		b := int64(r & 1)
+		d.shl(1)
+		d.w[0] += b // m.inc(b);
+		j++
+		j &= 7
+	}
+	m := d.mod(q)
+	return m
+}
+
+/* return NAF value as +/- 1, 3 or 5. x and x3 should be normed.
+nbs is number of bits processed, and nzs is number of trailing 0s detected */
+func nafbits(x *BIG, x3 *BIG, i int) [3]int {
+	var n [3]int
+	var j int
+	nb := x3.bit(i) - x.bit(i)
+
+	n[1] = 1
+	n[0] = 0
+	if nb == 0 {
+		n[0] = 0
+		return n
+	}
+	if i == 0 {
+		n[0] = nb
+		return n
+	}
+	if nb > 0 {
+		n[0] = 1
+	} else {
+		n[0] = (-1)
+	}
+
+	for j = i - 1; j > 0; j-- {
+		n[1]++
+		n[0] *= 2
+		nb = x3.bit(j) - x.bit(j)
+		if nb > 0 {
+			n[0] += 1
+		}
+		if nb < 0 {
+			n[0] -= 1
+		}
+		if n[0] > 5 || n[0] < -5 {
+			break
+		}
+	}
+
+	if n[0]%2 != 0 && j != 0 { /* backtrack */
+		if nb > 0 {
+			n[0] = (n[0] - 1) / 2
+		}
+		if nb < 0 {
+			n[0] = (n[0] + 1) / 2
+		}
+		n[1]--
+	}
+	for n[0]%2 == 0 { /* remove trailing zeros */
+		n[0] /= 2
+		n[2]++
+		n[1]--
+	}
+	return n
+}
+
+/* return a*b as DBIG */
+func mul(a *BIG, b *BIG) *DBIG {
+	c := NewDBIG()
+	var carry int64
+	a.norm()
+	b.norm()
+
+	for i := 0; i < NLEN; i++ {
+		carry = 0
+		for j := 0; j < NLEN; j++ {
+			carry = c.muladd(a.w[i], b.w[j], carry, i+j)
+		}
+		c.w[NLEN+i] = carry
+	}
+
+	return c
+}
+
+/* return a^2 as DBIG */
+func sqr(a *BIG) *DBIG {
+	c := NewDBIG()
+	var carry int64
+	a.norm()
+	for i := 0; i < NLEN; i++ {
+		carry = 0
+		for j := i + 1; j < NLEN; j++ {
+			carry = c.muladd(2*a.w[i], a.w[j], carry, i+j)
+		}
+		c.w[NLEN+i] = carry
+	}
+
+	for i := 0; i < NLEN; i++ {
+		c.w[2*i+1] += c.muladd(a.w[i], a.w[i], 0, 2*i)
+	}
+	c.norm()
+	return c
+}
+
+/* reduce a DBIG to a BIG using the appropriate form of the modulus */
+func mod(d *DBIG) *BIG {
+	var b *BIG
+	if MODTYPE == PSEUDO_MERSENNE {
+		t := d.split(MODBITS)
+		b = NewBIGdcopy(d)
+
+		v := t.pmul(int(MConst))
+		tw := t.w[NLEN-1]
+		t.w[NLEN-1] &= TMASK
+		t.w[0] += (MConst * ((tw >> TBITS) + (v << (BASEBITS - TBITS))))
+
+		b.add(t)
+	}
+	if MODTYPE == MONTGOMERY_FRIENDLY {
+		for i := 0; i < NLEN; i++ {
+			d.w[NLEN+i] += d.muladd(d.w[i], MConst-1, d.w[i], NLEN+i-1)
+		}
+		b = NewBIG()
+
+		for i := 0; i < NLEN; i++ {
+			b.w[i] = d.w[NLEN+i]
+		}
+	}
+
+	if MODTYPE == NOT_SPECIAL {
+		md := NewBIGints(Modulus)
+		var carry, m int64
+		for i := 0; i < NLEN; i++ {
+			if MConst == -1 {
+				m = (-d.w[i]) & MASK
+			} else {
+				if MConst == 1 {
+					m = d.w[i]
+				} else {
+					m = (MConst * d.w[i]) & MASK
+				}
+			}
+
+			carry = 0
+			for j := 0; j < NLEN; j++ {
+				carry = d.muladd(m, md.w[j], carry, i+j)
+			}
+			d.w[NLEN+i] += carry
+		}
+
+		b = NewBIG()
+		for i := 0; i < NLEN; i++ {
+			b.w[i] = d.w[NLEN+i]
+		}
+
+	}
+	b.norm()
+	return b
+}
+
+/* return a*b mod m */
+func modmul(a, b, m *BIG) *BIG {
+	a.mod(m)
+	b.mod(m)
+	d := mul(a, b)
+	return d.mod(m)
+}
+
+/* return a^2 mod m */
+func modsqr(a, m *BIG) *BIG {
+	a.mod(m)
+	d := sqr(a)
+	return d.mod(m)
+}
+
+/* return -a mod m */
+func modneg(a, m *BIG) *BIG {
+	a.mod(m)
+	return m.minus(a)
+}
+
+/* return this^e mod m */
+func (r *BIG) powmod(e *BIG, m *BIG) *BIG {
+	r.norm()
+	e.norm()
+	a := NewBIGint(1)
+	z := NewBIGcopy(e)
+	s := NewBIGcopy(r)
+	for true {
+		bt := z.parity()
+		z.fshr(1)
+		if bt == 1 {
+			a = modmul(a, s, m)
+		}
+		if z.iszilch() {
+			break
+		}
+		s = modsqr(s, m)
+	}
+	return a
+}
+
+/* Jacobi Symbol (this/p). Returns 0, 1 or -1 */
+func (r *BIG) jacobi(p *BIG) int {
+	m := 0
+	t := NewBIGint(0)
+	x := NewBIGint(0)
+	n := NewBIGint(0)
+	zilch := NewBIGint(0)
+	one := NewBIGint(1)
+	if p.parity() == 0 || comp(r, zilch) == 0 || comp(p, one) <= 0 {
+		return 0
+	}
+	r.norm()
+	x.copy(r)
+	n.copy(p)
+	x.mod(p)
+
+	for comp(n, one) > 0 {
+		if comp(x, zilch) == 0 {
+			return 0
+		}
+		n8 := n.lastbits(3)
+		k := 0
+		for x.parity() == 0 {
+			k++
+			x.shr(1)
+		}
+		if k%2 == 1 {
+			m += (n8*n8 - 1) / 8
+		}
+		m += (n8 - 1) * (x.lastbits(2) - 1) / 4
+		t.copy(n)
+		t.mod(x)
+		n.copy(x)
+		x.copy(t)
+		m %= 2
+
+	}
+	if m == 0 {
+		return 1
+	}
+	return -1
+}
+
+/* this=1/this mod p. Binary method */
+func (r *BIG) invmodp(p *BIG) {
+	r.mod(p)
+	u := NewBIGcopy(r)
+
+	v := NewBIGcopy(p)
+	x1 := NewBIGint(1)
+	x2 := NewBIGint(0)
+	t := NewBIGint(0)
+	one := NewBIGint(1)
+	for comp(u, one) != 0 && comp(v, one) != 0 {
+		for u.parity() == 0 {
+			u.shr(1)
+			if x1.parity() != 0 {
+				x1.add(p)
+				x1.norm()
+			}
+			x1.shr(1)
+		}
+		for v.parity() == 0 {
+			v.shr(1)
+			if x2.parity() != 0 {
+				x2.add(p)
+				x2.norm()
+			}
+			x2.shr(1)
+		}
+		if comp(u, v) >= 0 {
+			u.sub(v)
+			u.norm()
+			if comp(x1, x2) >= 0 {
+				x1.sub(x2)
+			} else {
+				t.copy(p)
+				t.sub(x2)
+				x1.add(t)
+			}
+			x1.norm()
+		} else {
+			v.sub(u)
+			v.norm()
+			if comp(x2, x1) >= 0 {
+				x2.sub(x1)
+			} else {
+				t.copy(p)
+				t.sub(x1)
+				x2.add(t)
+			}
+			x2.norm()
+		}
+	}
+	if comp(u, one) == 0 {
+		r.copy(x1)
+	} else {
+		r.copy(x2)
+	}
+}
+
+/*
+func main() {
+	a := NewBIGint(3)
+	m := NewBIGints(Modulus)
+
+	fmt.Printf("Modulus= "+m.toString())
+	fmt.Printf("\n")
+
+
+	e := NewBIGcopy(m);
+	e.dec(7); e.norm();
+	fmt.Printf("Exponent= "+e.toString())
+	fmt.Printf("\n")
+	a=a.powmod(e,m);
+	fmt.Printf("Result= "+a.toString())
+}
+*/

http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto/blob/85fabaa6/go/amcl-go/DBIG.go
----------------------------------------------------------------------
diff --git a/go/amcl-go/DBIG.go b/go/amcl-go/DBIG.go
new file mode 100644
index 0000000..98314b6
--- /dev/null
+++ b/go/amcl-go/DBIG.go
@@ -0,0 +1,260 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+*/
+
+/* AMCL double length DBIG number class */
+
+package amcl
+
+import "strconv"
+
+type DBIG struct {
+	w [2 * NLEN]int64
+}
+
+func NewDBIG() *DBIG {
+	b := new(DBIG)
+	for i := 0; i < DNLEN; i++ {
+		b.w[i] = 0
+	}
+	return b
+}
+
+func NewDBIGcopy(x *DBIG) *DBIG {
+	b := new(DBIG)
+	for i := 0; i < DNLEN; i++ {
+		b.w[i] = x.w[i]
+	}
+	return b
+}
+
+func NewDBIGscopy(x *BIG) *DBIG {
+	b := new(DBIG)
+	for i := 0; i < NLEN-1; i++ {
+		b.w[i] = x.w[i]
+	}
+	b.w[NLEN-1] = x.get(NLEN-1) & MASK /* top word normalized */
+	b.w[NLEN] = x.get(NLEN-1) >> BASEBITS
+
+	for i := NLEN + 1; i < DNLEN; i++ {
+		b.w[i] = 0
+	}
+	return b
+}
+
+/* set this[i]+=x*y+c, and return high part */
+
+func (r *DBIG) muladd(a int64, b int64, c int64, i int) int64 {
+	x0 := a & HMASK
+	x1 := (a >> HBITS)
+	y0 := b & HMASK
+	y1 := (b >> HBITS)
+	bot := x0 * y0
+	top := x1 * y1
+	mid := x0*y1 + x1*y0
+	x0 = mid & HMASK
+	x1 = (mid >> HBITS)
+	bot += x0 << HBITS
+	bot += c
+	bot += r.w[i]
+	top += x1
+	carry := bot >> BASEBITS
+	bot &= MASK
+	top += carry
+	r.w[i] = bot
+	return top
+}
+
+/* normalise this */
+func (r *DBIG) norm() {
+	var carry int64 = 0
+	for i := 0; i < DNLEN-1; i++ {
+		d := r.w[i] + carry
+		r.w[i] = d & MASK
+		carry = d >> BASEBITS
+	}
+	r.w[DNLEN-1] = (r.w[DNLEN-1] + carry)
+}
+
+/* split DBIG at position n, return higher half, keep lower half */
+func (r *DBIG) split(n uint) *BIG {
+	t := NewBIG()
+	m := n % BASEBITS
+	carry := r.w[DNLEN-1] << (BASEBITS - m)
+
+	for i := DNLEN - 2; i >= NLEN-1; i-- {
+		nw := (r.w[i] >> m) | carry
+		carry = (r.w[i] << (BASEBITS - m)) & MASK
+		t.set(i-NLEN+1, nw)
+	}
+	r.w[NLEN-1] &= ((int64(1) << m) - 1)
+	return t
+}
+
+/* Compare a and b, return 0 if a==b, -1 if a<b, +1 if a>b. Inputs must be normalised */
+func dcomp(a *DBIG, b *DBIG) int {
+	for i := DNLEN - 1; i >= 0; i-- {
+		if a.w[i] == b.w[i] {
+			continue
+		}
+		if a.w[i] > b.w[i] {
+			return 1
+		} else {
+			return -1
+		}
+	}
+	return 0
+}
+
+func (r *DBIG) add(x *DBIG) {
+	for i := 0; i < DNLEN; i++ {
+		r.w[i] = r.w[i] + x.w[i]
+	}
+}
+
+/* this-=x */
+func (r *DBIG) sub(x *DBIG) {
+	for i := 0; i < DNLEN; i++ {
+		r.w[i] = r.w[i] - x.w[i]
+	}
+}
+
+/* general shift left */
+func (r *DBIG) shl(k uint) {
+	n := k % BASEBITS
+	m := int(k / BASEBITS)
+
+	r.w[DNLEN-1] = (r.w[DNLEN-1-m] << n) | (r.w[DNLEN-m-2] >> (BASEBITS - n))
+	for i := DNLEN - 2; i > m; i-- {
+		r.w[i] = ((r.w[i-m] << n) & MASK) | (r.w[i-m-1] >> (BASEBITS - n))
+	}
+	r.w[m] = (r.w[0] << n) & MASK
+	for i := 0; i < m; i++ {
+		r.w[i] = 0
+	}
+}
+
+/* general shift right */
+func (r *DBIG) shr(k uint) {
+	n := (k % BASEBITS)
+	m := int(k / BASEBITS)
+	for i := 0; i < DNLEN-m-1; i++ {
+		r.w[i] = (r.w[m+i] >> n) | ((r.w[m+i+1] << (BASEBITS - n)) & MASK)
+	}
+	r.w[DNLEN-m-1] = r.w[DNLEN-1] >> n
+	for i := DNLEN - m; i < DNLEN; i++ {
+		r.w[i] = 0
+	}
+}
+
+/* reduces this DBIG mod a BIG, and returns the BIG */
+func (r *DBIG) mod(c *BIG) *BIG {
+	r.norm()
+	m := NewDBIGscopy(c)
+
+	if dcomp(r, m) < 0 {
+		return NewBIGdcopy(r)
+	}
+
+	m.shl(1)
+	k := 1
+
+	for dcomp(r, m) >= 0 {
+		m.shl(1)
+		k++
+	}
+
+	for k > 0 {
+		m.shr(1)
+		if dcomp(r, m) >= 0 {
+			r.sub(m)
+			r.norm()
+		}
+		k--
+	}
+	return NewBIGdcopy(r)
+}
+
+/* return this/c */
+func (r *DBIG) div(c *BIG) *BIG {
+	k := 0
+	m := NewDBIGscopy(c)
+	a := NewBIGint(0)
+	e := NewBIGint(1)
+	r.norm()
+
+	for dcomp(r, m) >= 0 {
+		e.fshl(1)
+		m.shl(1)
+		k++
+	}
+
+	for k > 0 {
+		m.shr(1)
+		e.shr(1)
+		if dcomp(r, m) > 0 {
+			a.add(e)
+			a.norm()
+			r.sub(m)
+			r.norm()
+		}
+		k--
+	}
+	return a
+}
+
+/* Convert to Hex String */
+func (r *DBIG) toString() string {
+	s := ""
+	len := r.nbits()
+
+	if len%4 == 0 {
+		len /= 4
+	} else {
+		len /= 4
+		len++
+
+	}
+
+	for i := len - 1; i >= 0; i-- {
+		b := NewDBIGcopy(r)
+
+		b.shr(uint(i * 4))
+		s += strconv.FormatInt(b.w[0]&15, 16)
+	}
+	return s
+}
+
+/* return number of bits */
+func (r *DBIG) nbits() int {
+	k := DNLEN - 1
+	r.norm()
+	for k >= 0 && r.w[k] == 0 {
+		k--
+	}
+	if k < 0 {
+		return 0
+	}
+	bts := int(BASEBITS) * k
+	c := r.w[k]
+	for c != 0 {
+		c /= 2
+		bts++
+	}
+	return bts
+}

http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto/blob/85fabaa6/go/amcl-go/ECDH.go
----------------------------------------------------------------------
diff --git a/go/amcl-go/ECDH.go b/go/amcl-go/ECDH.go
new file mode 100644
index 0000000..20718eb
--- /dev/null
+++ b/go/amcl-go/ECDH.go
@@ -0,0 +1,657 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+*/
+
+/* Elliptic Curve API high-level functions  */
+
+package amcl
+
+import "fmt"
+
+const ECDH_INVALID_PUBLIC_KEY int = -2
+const ECDH_ERROR int = -3
+const ECDH_INVALID int = -4
+const ECDH_EFS int = int(MODBYTES)
+const ECDH_EGS int = int(MODBYTES)
+const ECDH_EAS int = 16
+const ECDH_EBS int = 16
+
+/* Convert Integer to n-byte array */
+func inttoBytes(n int, len int) []byte {
+	var b []byte
+	var i int
+	for i = 0; i < len; i++ {
+		b = append(b, 0)
+	}
+	i = len
+	for n > 0 && i > 0 {
+		i--
+		b[i] = byte(n & 0xff)
+		n /= 256
+	}
+	return b
+}
+
+/* Key Derivation Functions */
+/* Input octet Z */
+/* Output key of length olen */
+func KDF1(Z []byte, olen int) []byte {
+	/* NOTE: the parameter olen is the length of the output K in bytes */
+	H := NewHASH()
+	hlen := 32
+	var K []byte
+	k := 0
+
+	for i := 0; i < olen; i++ {
+		K = append(K, 0)
+	}
+
+	cthreshold := olen / hlen
+	if olen%hlen != 0 {
+		cthreshold++
+	}
+
+	for counter := 0; counter < cthreshold; counter++ {
+		H.Process_array(Z)
+		if counter > 0 {
+			H.Process_num(int32(counter))
+		}
+		B := H.Hash()
+		if k+hlen > olen {
+			for i := 0; i < olen%hlen; i++ {
+				K[k] = B[i]
+				k++
+			}
+		} else {
+			for i := 0; i < hlen; i++ {
+				K[k] = B[i]
+				k++
+			}
+		}
+	}
+	return K
+}
+
+func KDF2(Z []byte, P []byte, olen int) []byte {
+	/* NOTE: the parameter olen is the length of the output k in bytes */
+	H := NewHASH()
+	hlen := 32
+	var K []byte
+
+	k := 0
+
+	for i := 0; i < olen; i++ {
+		K = append(K, 0)
+	}
+
+	cthreshold := olen / hlen
+	if olen%hlen != 0 {
+		cthreshold++
+	}
+
+	for counter := 1; counter <= cthreshold; counter++ {
+		H.Process_array(Z)
+		H.Process_num(int32(counter))
+		H.Process_array(P)
+		B := H.Hash()
+		if k+hlen > olen {
+			for i := 0; i < olen%hlen; i++ {
+				K[k] = B[i]
+				k++
+			}
+		} else {
+			for i := 0; i < hlen; i++ {
+				K[k] = B[i]
+				k++
+			}
+		}
+	}
+	return K
+}
+
+/* Password based Key Derivation Function */
+/* Input password p, salt s, and repeat count */
+/* Output key of length olen */
+func PBKDF2(Pass []byte, Salt []byte, rep int, olen int) []byte {
+	d := olen / 32
+	if olen%32 != 0 {
+		d++
+	}
+	var F [ECDH_EFS]byte
+	var U [ECDH_EFS]byte
+
+	var S []byte
+
+	//byte[] S=new byte[Salt.length+4];
+
+	var K []byte
+	//byte[] K=new byte[d*EFS];
+	//opt:=0
+
+	for i := 1; i <= d; i++ {
+		for j := 0; j < len(Salt); j++ {
+			S = append(S, Salt[j])
+		}
+		N := inttoBytes(i, 4)
+		for j := 0; j < 4; j++ {
+			S = append(S, N[j])
+		}
+
+		HMAC(S, Pass, F[:])
+
+		for j := 0; j < ECDH_EFS; j++ {
+			U[j] = F[j]
+		}
+		for j := 2; j <= rep; j++ {
+			HMAC(U[:], Pass, U[:])
+			for k := 0; k < ECDH_EFS; k++ {
+				F[k] ^= U[k]
+			}
+		}
+		for j := 0; j < ECDH_EFS; j++ {
+			K = append(K, F[j])
+		}
+	}
+	var key []byte
+	for i := 0; i < olen; i++ {
+		key = append(key, K[i])
+	}
+	return key
+}
+
+/* Calculate HMAC of m using key k. HMAC is tag of length olen */
+func HMAC(M []byte, K []byte, tag []byte) int {
+	/* Input is from an octet m        *
+	* olen is requested output length in bytes. k is the key  *
+	* The output is the calculated tag */
+	var B [32]byte
+	var K0 [64]byte
+	olen := len(tag)
+
+	b := len(K0)
+	if olen < 4 || olen > 32 {
+		return 0
+	}
+
+	for i := 0; i < b; i++ {
+		K0[i] = 0
+	}
+
+	H := NewHASH()
+
+	if len(K) > b {
+		H.Process_array(K)
+		B = H.Hash()
+		for i := 0; i < 32; i++ {
+			K0[i] = B[i]
+		}
+	} else {
+		for i := 0; i < len(K); i++ {
+			K0[i] = K[i]
+		}
+	}
+
+	for i := 0; i < b; i++ {
+		K0[i] ^= 0x36
+	}
+	H.Process_array(K0[:])
+	H.Process_array(M)
+	B = H.Hash()
+
+	for i := 0; i < b; i++ {
+		K0[i] ^= 0x6a
+	}
+	H.Process_array(K0[:])
+	H.Process_array(B[:])
+	B = H.Hash()
+
+	for i := 0; i < olen; i++ {
+		tag[i] = B[i]
+	}
+
+	return 1
+}
+
+/* AES encryption/decryption. Encrypt byte array M using key K and returns ciphertext */
+func AES_CBC_IV0_ENCRYPT(K []byte, M []byte) []byte { /* AES CBC encryption, with Null IV and key K */
+	/* Input is from an octet string M, output is to an octet string C */
+	/* Input is padded as necessary to make up a full final block */
+	a := NewAES()
+	fin := false
+
+	var buff [16]byte
+	var C []byte
+
+	a.Init(aes_CBC, K, nil)
+
+	ipt := 0 //opt:=0
+	var i int
+	for true {
+		for i = 0; i < 16; i++ {
+			if ipt < len(M) {
+				buff[i] = M[ipt]
+				ipt++
+			} else {
+				fin = true
+				break
+			}
+		}
+		if fin {
+			break
+		}
+		a.Encrypt(buff[:])
+		for i = 0; i < 16; i++ {
+			C = append(C, buff[i])
+		}
+	}
+
+	/* last block, filled up to i-th index */
+
+	padlen := 16 - i
+	for j := i; j < 16; j++ {
+		buff[j] = byte(padlen)
+	}
+
+	a.Encrypt(buff[:])
+
+	for i = 0; i < 16; i++ {
+		C = append(C, buff[i])
+	}
+	a.End()
+	return C
+}
+
+/* returns plaintext if all consistent, else returns null string */
+func AES_CBC_IV0_DECRYPT(K []byte, C []byte) []byte { /* padding is removed */
+	a := NewAES()
+	var buff [16]byte
+	var MM []byte
+	var M []byte
+
+	var i int
+	ipt := 0
+	opt := 0
+
+	a.Init(aes_CBC, K, nil)
+
+	if len(C) == 0 {
+		return nil
+	}
+	ch := C[ipt]
+	ipt++
+
+	fin := false
+
+	for true {
+		for i = 0; i < 16; i++ {
+			buff[i] = ch
+			if ipt >= len(C) {
+				fin = true
+				break
+			} else {
+				ch = C[ipt]
+				ipt++
+			}
+		}
+		a.Decrypt(buff[:])
+		if fin {
+			break
+		}
+		for i = 0; i < 16; i++ {
+			MM = append(MM, buff[i])
+			opt++
+		}
+	}
+
+	a.End()
+	bad := false
+	padlen := int(buff[15])
+	if i != 15 || padlen < 1 || padlen > 16 {
+		bad = true
+	}
+	if padlen >= 2 && padlen <= 16 {
+		for i = 16 - padlen; i < 16; i++ {
+			if buff[i] != byte(padlen) {
+				bad = true
+			}
+		}
+	}
+
+	if !bad {
+		for i = 0; i < 16-padlen; i++ {
+			MM = append(MM, buff[i])
+			opt++
+		}
+	}
+
+	if bad {
+		return nil
+	}
+
+	for i = 0; i < opt; i++ {
+		M = append(M, MM[i])
+	}
+
+	return M
+}
+
+/* Calculate a public/private EC GF(p) key pair W,S where W=S.G mod EC(p),
+ * where S is the secret key and W is the public key
+ * and G is fixed generator.
+ * If RNG is NULL then the private key is provided externally in S
+ * otherwise it is generated randomly internally */
+func ECDH_KEY_PAIR_GENERATE(RNG *RAND, S []byte, W []byte) int {
+	res := 0
+	var T [ECDH_EFS]byte
+	var s *BIG
+	var G *ECP
+
+	gx := NewBIGints(CURVE_Gx)
+	if CURVETYPE != MONTGOMERY {
+		gy := NewBIGints(CURVE_Gy)
+		G = NewECPbigs(gx, gy)
+	} else {
+		G = NewECPbig(gx)
+	}
+
+	r := NewBIGints(CURVE_Order)
+
+	if RNG == nil {
+		s = fromBytes(S)
+	} else {
+		s = randomnum(r, RNG)
+
+		s.toBytes(T[:])
+		for i := 0; i < ECDH_EGS; i++ {
+			S[i] = T[i]
+		}
+	}
+
+	WP := G.mul(s)
+
+	WP.toBytes(W)
+
+	return res
+}
+
+/* validate public key. Set full=true for fuller check */
+func ECDH_PUBLIC_KEY_VALIDATE(full bool, W []byte) int {
+	WP := ECP_fromBytes(W)
+	res := 0
+
+	r := NewBIGints(CURVE_Order)
+
+	if WP.is_infinity() {
+		res = ECDH_INVALID_PUBLIC_KEY
+	}
+	if res == 0 && full {
+		WP = WP.mul(r)
+		if !WP.is_infinity() {
+			res = ECDH_INVALID_PUBLIC_KEY
+		}
+	}
+	return res
+}
+
+/* IEEE-1363 Diffie-Hellman online calculation Z=S.WD */
+func ECPSVDP_DH(S []byte, WD []byte, Z []byte) int {
+	res := 0
+	var T [ECDH_EFS]byte
+
+	s := fromBytes(S)
+
+	W := ECP_fromBytes(WD)
+	if W.is_infinity() {
+		res = ECDH_ERROR
+	}
+
+	if res == 0 {
+		r := NewBIGints(CURVE_Order)
+		s.mod(r)
+		W = W.mul(s)
+		if W.is_infinity() {
+			res = ECDH_ERROR
+		} else {
+			W.getX().toBytes(T[:])
+			for i := 0; i < ECDH_EFS; i++ {
+				Z[i] = T[i]
+			}
+		}
+	}
+	return res
+}
+
+/* IEEE ECDSA Signature, C and D are signature on F using private key S */
+func ECPSP_DSA(RNG *RAND, S []byte, F []byte, C []byte, D []byte) int {
+	var T [ECDH_EFS]byte
+
+	H := NewHASH()
+	H.Process_array(F)
+	B := H.Hash()
+
+	gx := NewBIGints(CURVE_Gx)
+	gy := NewBIGints(CURVE_Gy)
+
+	G := NewECPbigs(gx, gy)
+	r := NewBIGints(CURVE_Order)
+
+	s := fromBytes(S)
+	f := fromBytes(B[:])
+
+	c := NewBIGint(0)
+	d := NewBIGint(0)
+	V := NewECP()
+
+	for d.iszilch() {
+		u := randomnum(r, RNG)
+
+		V.copy(G)
+		V = V.mul(u)
+		vx := V.getX()
+		c.copy(vx)
+		c.mod(r)
+		if c.iszilch() {
+			continue
+		}
+		u.invmodp(r)
+		d.copy(modmul(s, c, r))
+		d.add(f)
+		d.copy(modmul(u, d, r))
+	}
+
+	c.toBytes(T[:])
+	for i := 0; i < ECDH_EFS; i++ {
+		C[i] = T[i]
+	}
+	d.toBytes(T[:])
+	for i := 0; i < ECDH_EFS; i++ {
+		D[i] = T[i]
+	}
+	return 0
+}
+
+/* IEEE1363 ECDSA Signature Verification. Signature C and D on F is verified using public key W */
+func ECPVP_DSA(W []byte, F []byte, C []byte, D []byte) int {
+	res := 0
+
+	H := NewHASH()
+	H.Process_array(F)
+	B := H.Hash()
+
+	gx := NewBIGints(CURVE_Gx)
+	gy := NewBIGints(CURVE_Gy)
+
+	G := NewECPbigs(gx, gy)
+	r := NewBIGints(CURVE_Order)
+
+	c := fromBytes(C)
+	d := fromBytes(D)
+	f := fromBytes(B[:])
+
+	if c.iszilch() || comp(c, r) >= 0 || d.iszilch() || comp(d, r) >= 0 {
+		res = ECDH_INVALID
+	}
+
+	if res == 0 {
+		d.invmodp(r)
+		f.copy(modmul(f, d, r))
+		h2 := modmul(c, d, r)
+
+		WP := ECP_fromBytes(W)
+		if WP.is_infinity() {
+			res = ECDH_ERROR
+		} else {
+			P := NewECP()
+			P.copy(WP)
+
+			P = P.mul2(h2, G, f)
+
+			if P.is_infinity() {
+				res = ECDH_INVALID
+			} else {
+				d = P.getX()
+				d.mod(r)
+
+				if comp(d, c) != 0 {
+					res = ECDH_INVALID
+				}
+			}
+		}
+	}
+
+	return res
+}
+
+/* IEEE1363 ECIES encryption. Encryption of plaintext M uses public key W and produces ciphertext V,C,T */
+func ECIES_ENCRYPT(P1 []byte, P2 []byte, RNG *RAND, W []byte, M []byte, V []byte, T []byte) []byte {
+	var Z [ECDH_EFS]byte
+	var VZ [3*ECDH_EFS + 1]byte
+	var K1 [ECDH_EAS]byte
+	var K2 [ECDH_EAS]byte
+	var U [ECDH_EGS]byte
+
+	if ECDH_KEY_PAIR_GENERATE(RNG, U[:], V) != 0 {
+		return nil
+	}
+	if ECPSVDP_DH(U[:], W, Z[:]) != 0 {
+		return nil
+	}
+
+	for i := 0; i < 2*ECDH_EFS+1; i++ {
+		VZ[i] = V[i]
+	}
+	for i := 0; i < ECDH_EFS; i++ {
+		VZ[2*ECDH_EFS+1+i] = Z[i]
+	}
+
+	K := KDF2(VZ[:], P1, ECDH_EFS)
+
+	for i := 0; i < ECDH_EAS; i++ {
+		K1[i] = K[i]
+		K2[i] = K[ECDH_EAS+i]
+	}
+
+	C := AES_CBC_IV0_ENCRYPT(K1[:], M)
+
+	L2 := inttoBytes(len(P2), 8)
+
+	var AC []byte
+
+	for i := 0; i < len(C); i++ {
+		AC = append(AC, C[i])
+	}
+	for i := 0; i < len(P2); i++ {
+		AC = append(AC, P2[i])
+	}
+	for i := 0; i < 8; i++ {
+		AC = append(AC, L2[i])
+	}
+
+	HMAC(AC, K2[:], T)
+
+	return C
+}
+
+/* IEEE1363 ECIES decryption. Decryption of ciphertext V,C,T using private key U outputs plaintext M */
+func ECIES_DECRYPT(P1 []byte, P2 []byte, V []byte, C []byte, T []byte, U []byte) []byte {
+	var Z [ECDH_EFS]byte
+	var VZ [3*ECDH_EFS + 1]byte
+	var K1 [ECDH_EAS]byte
+	var K2 [ECDH_EAS]byte
+
+	var TAG []byte = T[:]
+
+	if ECPSVDP_DH(U, V, Z[:]) != 0 {
+		return nil
+	}
+
+	for i := 0; i < 2*ECDH_EFS+1; i++ {
+		VZ[i] = V[i]
+	}
+	for i := 0; i < ECDH_EFS; i++ {
+		VZ[2*ECDH_EFS+1+i] = Z[i]
+	}
+
+	K := KDF2(VZ[:], P1, ECDH_EFS)
+
+	for i := 0; i < ECDH_EAS; i++ {
+		K1[i] = K[i]
+		K2[i] = K[ECDH_EAS+i]
+	}
+
+	M := AES_CBC_IV0_DECRYPT(K1[:], C)
+
+	if M == nil {
+		return nil
+	}
+
+	L2 := inttoBytes(len(P2), 8)
+
+	var AC []byte
+
+	for i := 0; i < len(C); i++ {
+		AC = append(AC, C[i])
+	}
+	for i := 0; i < len(P2); i++ {
+		AC = append(AC, P2[i])
+	}
+	for i := 0; i < 8; i++ {
+		AC = append(AC, L2[i])
+	}
+
+	HMAC(AC, K2[:], TAG)
+
+	same := true
+	for i := 0; i < len(T); i++ {
+		if T[i] != TAG[i] {
+			same = false
+		}
+	}
+	if !same {
+		return nil
+	}
+
+	return M
+}
+
+func ECDH_printBinary(array []byte) {
+	for i := 0; i < len(array); i++ {
+		fmt.Printf("%02x", array[i])
+	}
+	fmt.Printf("\n")
+}

http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto/blob/85fabaa6/go/amcl-go/ECP.go
----------------------------------------------------------------------
diff --git a/go/amcl-go/ECP.go b/go/amcl-go/ECP.go
new file mode 100644
index 0000000..3ed1d04
--- /dev/null
+++ b/go/amcl-go/ECP.go
@@ -0,0 +1,1076 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+*/
+
+package amcl
+
+//import "fmt"
+
+/* Elliptic Curve Point Structure */
+
+type ECP struct {
+	x   *FP
+	y   *FP
+	z   *FP
+	INF bool
+}
+
+/* Constructors */
+func NewECP() *ECP {
+	E := new(ECP)
+	E.x = NewFPint(0)
+	E.y = NewFPint(0)
+	E.z = NewFPint(0)
+	E.INF = true
+	return E
+}
+
+/* set (x,y) from two BIGs */
+func NewECPbigs(ix *BIG, iy *BIG) *ECP {
+	E := new(ECP)
+	E.x = NewFPbig(ix)
+	E.y = NewFPbig(iy)
+	E.z = NewFPint(1)
+	rhs := RHS(E.x)
+
+	if CURVETYPE == MONTGOMERY {
+		if rhs.jacobi() == 1 {
+			E.INF = false
+		} else {
+			E.inf()
+		}
+	} else {
+		y2 := NewFPcopy(E.y)
+		y2.sqr()
+		if y2.equals(rhs) {
+			E.INF = false
+		} else {
+			E.inf()
+		}
+	}
+	return E
+}
+
+/* set (x,y) from BIG and a bit */
+func NewECPbigint(ix *BIG, s int) *ECP {
+	E := new(ECP)
+	E.x = NewFPbig(ix)
+	E.y = NewFPint(0)
+	rhs := RHS(E.x)
+	E.z = NewFPint(1)
+	if rhs.jacobi() == 1 {
+		ny := rhs.sqrt()
+		if ny.redc().parity() != s {
+			ny.neg()
+		}
+		E.y.copy(ny)
+		E.INF = false
+	} else {
+		E.inf()
+	}
+	return E
+}
+
+/* set from x - calculate y from curve equation */
+func NewECPbig(ix *BIG) *ECP {
+	E := new(ECP)
+	E.x = NewFPbig(ix)
+	E.y = NewFPint(0)
+	rhs := RHS(E.x)
+	E.z = NewFPint(1)
+	if rhs.jacobi() == 1 {
+		if CURVETYPE != MONTGOMERY {
+			E.y.copy(rhs.sqrt())
+		}
+		E.INF = false
+	} else {
+		E.INF = true
+	}
+	return E
+}
+
+/* test for O point-at-infinity */
+func (E *ECP) is_infinity() bool {
+	if CURVETYPE == EDWARDS {
+		E.x.reduce()
+		E.y.reduce()
+		E.z.reduce()
+		return (E.x.iszilch() && E.y.equals(E.z))
+	} else {
+		return E.INF
+	}
+}
+
+/* Conditional swap of P and Q dependant on d */
+func (E *ECP) cswap(Q *ECP, d int32) {
+	E.x.cswap(Q.x, d)
+	if CURVETYPE != MONTGOMERY {
+		E.y.cswap(Q.y, d)
+	}
+	E.z.cswap(Q.z, d)
+	if CURVETYPE != EDWARDS {
+		bd := true
+		if d == 0 {
+			bd = false
+		}
+		bd = bd && (E.INF != Q.INF)
+		E.INF = (bd != E.INF)
+		Q.INF = (bd != Q.INF)
+	}
+}
+
+/* Conditional move of Q to P dependant on d */
+func (E *ECP) cmove(Q *ECP, d int32) {
+	E.x.cmove(Q.x, d)
+	if CURVETYPE != MONTGOMERY {
+		E.y.cmove(Q.y, d)
+	}
+	E.z.cmove(Q.z, d)
+	if CURVETYPE != EDWARDS {
+		bd := true
+		if d == 0 {
+			bd = false
+		}
+		E.INF = (E.INF != ((E.INF != Q.INF) && bd))
+	}
+}
+
+/* return 1 if b==c, no branching */
+func teq(b int32, c int32) int32 {
+	x := b ^ c
+	x -= 1 // if x=0, x now -1
+	return ((x >> 31) & 1)
+}
+
+/* this=P */
+func (E *ECP) copy(P *ECP) {
+	E.x.copy(P.x)
+	if CURVETYPE != MONTGOMERY {
+		E.y.copy(P.y)
+	}
+	E.z.copy(P.z)
+	E.INF = P.INF
+}
+
+/* this=-this */
+func (E *ECP) neg() {
+	if E.is_infinity() {
+		return
+	}
+	if CURVETYPE == WEIERSTRASS {
+		E.y.neg()
+		E.y.reduce()
+	}
+	if CURVETYPE == EDWARDS {
+		E.x.neg()
+		E.x.reduce()
+	}
+	return
+}
+
+/* Constant time select from pre-computed table */
+func (E *ECP) selector(W []*ECP, b int32) {
+	MP := NewECP()
+	m := b >> 31
+	babs := (b ^ m) - m
+
+	babs = (babs - 1) / 2
+
+	E.cmove(W[0], teq(babs, 0)) // conditional move
+	E.cmove(W[1], teq(babs, 1))
+	E.cmove(W[2], teq(babs, 2))
+	E.cmove(W[3], teq(babs, 3))
+	E.cmove(W[4], teq(babs, 4))
+	E.cmove(W[5], teq(babs, 5))
+	E.cmove(W[6], teq(babs, 6))
+	E.cmove(W[7], teq(babs, 7))
+
+	MP.copy(E)
+	MP.neg()
+	E.cmove(MP, (m & 1))
+}
+
+/* set this=O */
+func (E *ECP) inf() {
+	E.INF = true
+	E.x.zero()
+	E.y.one()
+	E.z.one()
+}
+
+/* Test P == Q */
+func (E *ECP) equals(Q *ECP) bool {
+	if E.is_infinity() && Q.is_infinity() {
+		return true
+	}
+	if E.is_infinity() || Q.is_infinity() {
+		return false
+	}
+	if CURVETYPE == WEIERSTRASS {
+		zs2 := NewFPcopy(E.z)
+		zs2.sqr()
+		zo2 := NewFPcopy(Q.z)
+		zo2.sqr()
+		zs3 := NewFPcopy(zs2)
+		zs3.mul(E.z)
+		zo3 := NewFPcopy(zo2)
+		zo3.mul(Q.z)
+		zs2.mul(Q.x)
+		zo2.mul(E.x)
+		if !zs2.equals(zo2) {
+			return false
+		}
+		zs3.mul(Q.y)
+		zo3.mul(E.y)
+		if !zs3.equals(zo3) {
+			return false
+		}
+	} else {
+		a := NewFPint(0)
+		b := NewFPint(0)
+		a.copy(E.x)
+		a.mul(Q.z)
+		a.reduce()
+		b.copy(Q.x)
+		b.mul(E.z)
+		b.reduce()
+		if !a.equals(b) {
+			return false
+		}
+		if CURVETYPE == EDWARDS {
+			a.copy(E.y)
+			a.mul(Q.z)
+			a.reduce()
+			b.copy(Q.y)
+			b.mul(E.z)
+			b.reduce()
+			if !a.equals(b) {
+				return false
+			}
+		}
+	}
+	return true
+}
+
+/* Calculate RHS of curve equation */
+func RHS(x *FP) *FP {
+	x.norm()
+	r := NewFPcopy(x)
+	r.sqr()
+
+	if CURVETYPE == WEIERSTRASS { // x^3+Ax+B
+		b := NewFPbig(NewBIGints(CURVE_B))
+		r.mul(x)
+		if CURVE_A == -3 {
+			cx := NewFPcopy(x)
+			cx.imul(3)
+			cx.neg()
+			cx.norm()
+			r.add(cx)
+		}
+		r.add(b)
+	}
+	if CURVETYPE == EDWARDS { // (Ax^2-1)/(Bx^2-1)
+		b := NewFPbig(NewBIGints(CURVE_B))
+
+		one := NewFPint(1)
+		b.mul(r)
+		b.sub(one)
+		if CURVE_A == -1 {
+			r.neg()
+		}
+		r.sub(one)
+		b.inverse()
+		r.mul(b)
+	}
+	if CURVETYPE == MONTGOMERY { // x^3+Ax^2+x
+		x3 := NewFPint(0)
+		x3.copy(r)
+		x3.mul(x)
+		r.imul(CURVE_A)
+		r.add(x3)
+		r.add(x)
+	}
+	r.reduce()
+	return r
+}
+
+/* set to affine - from (x,y,z) to (x,y) */
+func (E *ECP) affine() {
+	if E.is_infinity() {
+		return
+	}
+	one := NewFPint(1)
+	if E.z.equals(one) {
+		return
+	}
+	E.z.inverse()
+	if CURVETYPE == WEIERSTRASS {
+		z2 := NewFPcopy(E.z)
+		z2.sqr()
+		E.x.mul(z2)
+		E.x.reduce()
+		E.y.mul(z2)
+		E.y.mul(E.z)
+		E.y.reduce()
+	}
+	if CURVETYPE == EDWARDS {
+		E.x.mul(E.z)
+		E.x.reduce()
+		E.y.mul(E.z)
+		E.y.reduce()
+	}
+	if CURVETYPE == MONTGOMERY {
+		E.x.mul(E.z)
+		E.x.reduce()
+	}
+	E.z.one()
+}
+
+/* extract x as a BIG */
+func (E *ECP) getX() *BIG {
+	E.affine()
+	return E.x.redc()
+}
+
+/* extract y as a BIG */
+func (E *ECP) getY() *BIG {
+	E.affine()
+	return E.y.redc()
+}
+
+/* get sign of Y */
+func (E *ECP) getS() int {
+	E.affine()
+	y := E.getY()
+	return y.parity()
+}
+
+/* extract x as an FP */
+func (E *ECP) getx() *FP {
+	return E.x
+}
+
+/* extract y as an FP */
+func (E *ECP) gety() *FP {
+	return E.y
+}
+
+/* extract z as an FP */
+func (E *ECP) getz() *FP {
+	return E.z
+}
+
+/* convert to byte array */
+func (E *ECP) toBytes(b []byte) {
+	var t [int(MODBYTES)]byte
+	MB := int(MODBYTES)
+	if CURVETYPE != MONTGOMERY {
+		b[0] = 0x04
+	} else {
+		b[0] = 0x02
+	}
+
+	E.affine()
+	E.x.redc().toBytes(t[:])
+	for i := 0; i < MB; i++ {
+		b[i+1] = t[i]
+	}
+	if CURVETYPE != MONTGOMERY {
+		E.y.redc().toBytes(t[:])
+		for i := 0; i < MB; i++ {
+			b[i+MB+1] = t[i]
+		}
+	}
+}
+
+/* convert from byte array to point */
+func ECP_fromBytes(b []byte) *ECP {
+	var t [int(MODBYTES)]byte
+	MB := int(MODBYTES)
+	p := NewBIGints(Modulus)
+
+	for i := 0; i < MB; i++ {
+		t[i] = b[i+1]
+	}
+	px := fromBytes(t[:])
+	if comp(px, p) >= 0 {
+		return NewECP()
+	}
+
+	if b[0] == 0x04 {
+		for i := 0; i < MB; i++ {
+			t[i] = b[i+MB+1]
+		}
+		py := fromBytes(t[:])
+		if comp(py, p) >= 0 {
+			return NewECP()
+		}
+		return NewECPbigs(px, py)
+	} else {
+		return NewECPbig(px)
+	}
+}
+
+/* convert to hex string */
+func (E *ECP) toString() string {
+	if E.is_infinity() {
+		return "infinity"
+	}
+	E.affine()
+	if CURVETYPE == MONTGOMERY {
+		return "(" + E.x.redc().toString() + ")"
+	} else {
+		return "(" + E.x.redc().toString() + "," + E.y.redc().toString() + ")"
+	}
+}
+
+/* this*=2 */
+func (E *ECP) dbl() {
+	if CURVETYPE == WEIERSTRASS {
+		if E.INF {
+			return
+		}
+		if E.y.iszilch() {
+			E.inf()
+			return
+		}
+
+		w1 := NewFPcopy(E.x)
+		w6 := NewFPcopy(E.z)
+		w2 := NewFPint(0)
+		w3 := NewFPcopy(E.x)
+		w8 := NewFPcopy(E.x)
+
+		if CURVE_A == -3 {
+			w6.sqr()
+			w1.copy(w6)
+			w1.neg()
+			w3.add(w1)
+
+			w8.add(w6)
+
+			w3.mul(w8)
+			w8.copy(w3)
+			w8.imul(3)
+		} else {
+			w1.sqr()
+			w8.copy(w1)
+			w8.imul(3)
+		}
+
+		w2.copy(E.y)
+		w2.sqr()
+		w3.copy(E.x)
+		w3.mul(w2)
+		w3.imul(4)
+		w1.copy(w3)
+		w1.neg()
+		//		w1.norm();
+
+		E.x.copy(w8)
+		E.x.sqr()
+		E.x.add(w1)
+		E.x.add(w1)
+		//		x.reduce();
+		E.x.norm()
+
+		E.z.mul(E.y)
+		E.z.add(E.z)
+
+		w2.add(w2)
+		w2.sqr()
+		w2.add(w2)
+		w3.sub(E.x)
+		E.y.copy(w8)
+		E.y.mul(w3)
+		//		w2.norm();
+		E.y.sub(w2)
+		//		y.reduce();
+		//		z.reduce();
+		E.y.norm()
+		E.z.norm()
+
+	}
+	if CURVETYPE == EDWARDS {
+		C := NewFPcopy(E.x)
+		D := NewFPcopy(E.y)
+		H := NewFPcopy(E.z)
+		J := NewFPint(0)
+
+		E.x.mul(E.y)
+		E.x.add(E.x)
+		C.sqr()
+		D.sqr()
+		if CURVE_A == -1 {
+			C.neg()
+		}
+		E.y.copy(C)
+		E.y.add(D)
+		//		y.norm();
+		H.sqr()
+		H.add(H)
+		E.z.copy(E.y)
+		J.copy(E.y)
+		J.sub(H)
+		E.x.mul(J)
+		C.sub(D)
+		E.y.mul(C)
+		E.z.mul(J)
+
+		E.x.norm()
+		E.y.norm()
+		E.z.norm()
+	}
+	if CURVETYPE == MONTGOMERY {
+		A := NewFPcopy(E.x)
+		B := NewFPcopy(E.x)
+		AA := NewFPint(0)
+		BB := NewFPint(0)
+		C := NewFPint(0)
+
+		if E.INF {
+			return
+		}
+
+		A.add(E.z)
+		AA.copy(A)
+		AA.sqr()
+		B.sub(E.z)
+		BB.copy(B)
+		BB.sqr()
+		C.copy(AA)
+		C.sub(BB)
+		//		C.norm();
+
+		E.x.copy(AA)
+		E.x.mul(BB)
+
+		A.copy(C)
+		A.imul((CURVE_A + 2) / 4)
+
+		BB.add(A)
+		E.z.copy(BB)
+		E.z.mul(C)
+		//		x.reduce();
+		//		z.reduce();
+		E.x.norm()
+		E.z.norm()
+	}
+	return
+}
+
+/* this+=Q */
+func (E *ECP) add(Q *ECP) {
+	if CURVETYPE == WEIERSTRASS {
+		if E.INF {
+			E.copy(Q)
+			return
+		}
+		if Q.INF {
+			return
+		}
+
+		aff := false
+
+		one := NewFPint(1)
+		if Q.z.equals(one) {
+			aff = true
+		}
+
+		var A, C *FP
+		B := NewFPcopy(E.z)
+		D := NewFPcopy(E.z)
+		if !aff {
+			A = NewFPcopy(Q.z)
+			C = NewFPcopy(Q.z)
+
+			A.sqr()
+			B.sqr()
+			C.mul(A)
+			D.mul(B)
+
+			A.mul(E.x)
+			C.mul(E.y)
+		} else {
+			A = NewFPcopy(E.x)
+			C = NewFPcopy(E.y)
+
+			B.sqr()
+			D.mul(B)
+		}
+
+		B.mul(Q.x)
+		B.sub(A)
+		D.mul(Q.y)
+		D.sub(C)
+
+		if B.iszilch() {
+			if D.iszilch() {
+				E.dbl()
+				return
+			} else {
+				E.INF = true
+				return
+			}
+		}
+
+		if !aff {
+			E.z.mul(Q.z)
+		}
+		E.z.mul(B)
+
+		e := NewFPcopy(B)
+		e.sqr()
+		B.mul(e)
+		A.mul(e)
+
+		e.copy(A)
+		e.add(A)
+		e.add(B)
+		E.x.copy(D)
+		E.x.sqr()
+		E.x.sub(e)
+
+		A.sub(E.x)
+		E.y.copy(A)
+		E.y.mul(D)
+		C.mul(B)
+		E.y.sub(C)
+
+		//	x.reduce();
+		//	y.reduce();
+		//	z.reduce();
+		E.x.norm()
+		E.y.norm()
+		E.z.norm()
+	}
+	if CURVETYPE == EDWARDS {
+		b := NewFPbig(NewBIGints(CURVE_B))
+		A := NewFPcopy(E.z)
+		B := NewFPint(0)
+		C := NewFPcopy(E.x)
+		D := NewFPcopy(E.y)
+		EE := NewFPint(0)
+		F := NewFPint(0)
+		G := NewFPint(0)
+		//H:=NewFPint(0)
+		//I:=NewFPint(0)
+
+		A.mul(Q.z)
+		B.copy(A)
+		B.sqr()
+		C.mul(Q.x)
+		D.mul(Q.y)
+
+		EE.copy(C)
+		EE.mul(D)
+		EE.mul(b)
+		F.copy(B)
+		F.sub(EE)
+		G.copy(B)
+		G.add(EE)
+		C.add(D)
+
+		if CURVE_A == 1 {
+			EE.copy(D)
+			D.sub(C)
+		}
+
+		B.copy(E.x)
+		B.add(E.y)
+		D.copy(Q.x)
+		D.add(Q.y)
+		B.mul(D)
+		B.sub(C)
+		B.mul(F)
+		E.x.copy(A)
+		E.x.mul(B)
+
+		if CURVE_A == 1 {
+			C.copy(EE)
+			C.mul(G)
+		}
+		if CURVE_A == -1 {
+			C.mul(G)
+		}
+		E.y.copy(A)
+		E.y.mul(C)
+		E.z.copy(F)
+		E.z.mul(G)
+		//	x.reduce(); y.reduce(); z.reduce();
+		E.x.norm()
+		E.y.norm()
+		E.z.norm()
+	}
+	return
+}
+
+/* Differential Add for Montgomery curves. this+=Q where W is this-Q and is affine. */
+func (E *ECP) dadd(Q *ECP, W *ECP) {
+	A := NewFPcopy(E.x)
+	B := NewFPcopy(E.x)
+	C := NewFPcopy(Q.x)
+	D := NewFPcopy(Q.x)
+	DA := NewFPint(0)
+	CB := NewFPint(0)
+
+	A.add(E.z)
+	B.sub(E.z)
+
+	C.add(Q.z)
+	D.sub(Q.z)
+
+	DA.copy(D)
+	DA.mul(A)
+	CB.copy(C)
+	CB.mul(B)
+
+	A.copy(DA)
+	A.add(CB)
+	A.sqr()
+	B.copy(DA)
+	B.sub(CB)
+	B.sqr()
+
+	E.x.copy(A)
+	E.z.copy(W.x)
+	E.z.mul(B)
+
+	if E.z.iszilch() {
+		E.inf()
+	} else {
+		E.INF = false
+	}
+
+	//	x.reduce();
+	E.x.norm()
+}
+
+/* this-=Q */
+func (E *ECP) sub(Q *ECP) {
+	Q.neg()
+	E.add(Q)
+	Q.neg()
+}
+
+func multiaffine(m int, P []*ECP) {
+	t1 := NewFPint(0)
+	t2 := NewFPint(0)
+
+	var work []*FP
+
+	for i := 0; i < m; i++ {
+		work = append(work, NewFPint(0))
+	}
+
+	work[0].one()
+	work[1].copy(P[0].z)
+
+	for i := 2; i < m; i++ {
+		work[i].copy(work[i-1])
+		work[i].mul(P[i-1].z)
+	}
+
+	t1.copy(work[m-1])
+	t1.mul(P[m-1].z)
+	t1.inverse()
+	t2.copy(P[m-1].z)
+	work[m-1].mul(t1)
+
+	for i := m - 2; ; i-- {
+		if i == 0 {
+			work[0].copy(t1)
+			work[0].mul(t2)
+			break
+		}
+		work[i].mul(t2)
+		work[i].mul(t1)
+		t2.mul(P[i].z)
+	}
+	/* now work[] contains inverses of all Z coordinates */
+
+	for i := 0; i < m; i++ {
+		P[i].z.one()
+		t1.copy(work[i])
+		t1.sqr()
+		P[i].x.mul(t1)
+		t1.mul(work[i])
+		P[i].y.mul(t1)
+	}
+}
+
+/* constant time multiply by small integer of length bts - use ladder */
+func (E *ECP) pinmul(e int32, bts int32) *ECP {
+	if CURVETYPE == MONTGOMERY {
+		return E.mul(NewBIGint(int(e)))
+	} else {
+		P := NewECP()
+		R0 := NewECP()
+		R1 := NewECP()
+		R1.copy(E)
+
+		for i := bts - 1; i >= 0; i-- {
+			b := (e >> uint32(i)) & 1
+			P.copy(R1)
+			P.add(R0)
+			R0.cswap(R1, b)
+			R1.copy(P)
+			R0.dbl()
+			R0.cswap(R1, b)
+		}
+		P.copy(R0)
+		P.affine()
+		return P
+	}
+}
+
+/* return e.this */
+
+func (E *ECP) mul(e *BIG) *ECP {
+	if e.iszilch() || E.is_infinity() {
+		return NewECP()
+	}
+	P := NewECP()
+	if CURVETYPE == MONTGOMERY {
+		/* use Ladder */
+		D := NewECP()
+		R0 := NewECP()
+		R0.copy(E)
+		R1 := NewECP()
+		R1.copy(E)
+		R1.dbl()
+		D.copy(E)
+		D.affine()
+		nb := e.nbits()
+		for i := nb - 2; i >= 0; i-- {
+			b := int32(e.bit(i))
+			P.copy(R1)
+			P.dadd(R0, D)
+			R0.cswap(R1, b)
+			R1.copy(P)
+			R0.dbl()
+			R0.cswap(R1, b)
+		}
+		P.copy(R0)
+	} else {
+		// fixed size windows
+		mt := NewBIG()
+		t := NewBIG()
+		Q := NewECP()
+		C := NewECP()
+
+		var W []*ECP
+		var w [1 + (NLEN*int(BASEBITS)+3)/4]int8
+
+		E.affine()
+
+		Q.copy(E)
+		Q.dbl()
+
+		W = append(W, NewECP())
+		W[0].copy(E)
+
+		for i := 1; i < 8; i++ {
+			W = append(W, NewECP())
+			W[i].copy(W[i-1])
+			W[i].add(Q)
+		}
+
+		// convert the table to affine
+		if CURVETYPE == WEIERSTRASS {
+			multiaffine(8, W[:])
+		}
+
+		// make exponent odd - add 2P if even, P if odd
+		t.copy(e)
+		s := int32(t.parity())
+		t.inc(1)
+		t.norm()
+		ns := int32(t.parity())
+		mt.copy(t)
+		mt.inc(1)
+		mt.norm()
+		t.cmove(mt, s)
+		Q.cmove(E, ns)
+		C.copy(Q)
+
+		nb := 1 + (t.nbits()+3)/4
+
+		// convert exponent to signed 4-bit window
+		for i := 0; i < nb; i++ {
+			w[i] = int8(t.lastbits(5) - 16)
+			t.dec(int(w[i]))
+			t.norm()
+			t.fshr(4)
+		}
+		w[nb] = int8(t.lastbits(5))
+
+		P.copy(W[(int(w[nb])-1)/2])
+		for i := nb - 1; i >= 0; i-- {
+			Q.selector(W, int32(w[i]))
+			P.dbl()
+			P.dbl()
+			P.dbl()
+			P.dbl()
+			P.add(Q)
+		}
+		P.sub(C) /* apply correction */
+	}
+	P.affine()
+	return P
+}
+
+/* Return e.this+f.Q */
+
+func (E *ECP) mul2(e *BIG, Q *ECP, f *BIG) *ECP {
+	te := NewBIG()
+	tf := NewBIG()
+	mt := NewBIG()
+	S := NewECP()
+	T := NewECP()
+	C := NewECP()
+	var W []*ECP
+	//ECP[] W=new ECP[8];
+	var w [1 + (NLEN*int(BASEBITS)+1)/2]int8
+
+	E.affine()
+	Q.affine()
+
+	te.copy(e)
+	tf.copy(f)
+
+	// precompute table
+	for i := 0; i < 8; i++ {
+		W = append(W, NewECP())
+	}
+	W[1].copy(E)
+	W[1].sub(Q)
+	W[2].copy(E)
+	W[2].add(Q)
+	S.copy(Q)
+	S.dbl()
+	W[0].copy(W[1])
+	W[0].sub(S)
+	W[3].copy(W[2])
+	W[3].add(S)
+	T.copy(E)
+	T.dbl()
+	W[5].copy(W[1])
+	W[5].add(T)
+	W[6].copy(W[2])
+	W[6].add(T)
+	W[4].copy(W[5])
+	W[4].sub(S)
+	W[7].copy(W[6])
+	W[7].add(S)
+
+	// convert the table to affine
+	if CURVETYPE == WEIERSTRASS {
+		multiaffine(8, W)
+	}
+
+	// if multiplier is odd, add 2, else add 1 to multiplier, and add 2P or P to correction
+
+	s := int32(te.parity())
+	te.inc(1)
+	te.norm()
+	ns := int32(te.parity())
+	mt.copy(te)
+	mt.inc(1)
+	mt.norm()
+	te.cmove(mt, s)
+	T.cmove(E, ns)
+	C.copy(T)
+
+	s = int32(tf.parity())
+	tf.inc(1)
+	tf.norm()
+	ns = int32(tf.parity())
+	mt.copy(tf)
+	mt.inc(1)
+	mt.norm()
+	tf.cmove(mt, s)
+	S.cmove(Q, ns)
+	C.add(S)
+
+	mt.copy(te)
+	mt.add(tf)
+	mt.norm()
+	nb := 1 + (mt.nbits()+1)/2
+
+	// convert exponent to signed 2-bit window
+	for i := 0; i < nb; i++ {
+		a := (te.lastbits(3) - 4)
+		te.dec(int(a))
+		te.norm()
+		te.fshr(2)
+		b := (tf.lastbits(3) - 4)
+		tf.dec(int(b))
+		tf.norm()
+		tf.fshr(2)
+		w[i] = int8(4*a + b)
+	}
+	w[nb] = int8(4*te.lastbits(3) + tf.lastbits(3))
+	S.copy(W[(w[nb]-1)/2])
+
+	for i := nb - 1; i >= 0; i-- {
+		T.selector(W, int32(w[i]))
+		S.dbl()
+		S.dbl()
+		S.add(T)
+	}
+	S.sub(C) /* apply correction */
+	S.affine()
+	return S
+}
+
+/*
+func main() {
+	Gx:=NewBIGints(CURVE_Gx);
+	var Gy *BIG
+	var P *ECP
+
+	if CURVETYPE!=MONTGOMERY {Gy=NewBIGints(CURVE_Gy)}
+	r:=NewBIGints(CURVE_Order)
+
+	//r.dec(7);
+
+	fmt.Printf("Gx= "+Gx.toString())
+	fmt.Printf("\n")
+
+	if CURVETYPE!=MONTGOMERY {
+		fmt.Printf("Gy= "+Gy.toString())
+		fmt.Printf("\n")
+	}
+
+	if CURVETYPE!=MONTGOMERY {
+		P=NewECPbigs(Gx,Gy)
+	} else  {P=NewECPbig(Gx)}
+
+	fmt.Printf("P= "+P.toString());
+	fmt.Printf("\n")
+
+	R:=P.mul(r);
+		//for (int i=0;i<10000;i++)
+		//	R=P.mul(r);
+
+	fmt.Printf("R= "+R.toString())
+	fmt.Printf("\n")
+}
+*/

http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto/blob/85fabaa6/go/amcl-go/ECP2.go
----------------------------------------------------------------------
diff --git a/go/amcl-go/ECP2.go b/go/amcl-go/ECP2.go
new file mode 100644
index 0000000..6770378
--- /dev/null
+++ b/go/amcl-go/ECP2.go
@@ -0,0 +1,672 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+*/
+
+/* AMCL Weierstrass elliptic curve functions over FP2 */
+
+package amcl
+
+//import "fmt"
+
+type ECP2 struct {
+	x   *FP2
+	y   *FP2
+	z   *FP2
+	INF bool
+}
+
+func NewECP2() *ECP2 {
+	E := new(ECP2)
+	E.x = NewFP2int(0)
+	E.y = NewFP2int(1)
+	E.z = NewFP2int(1)
+	E.INF = true
+	return E
+}
+
+/* Test this=O? */
+func (E *ECP2) is_infinity() bool {
+	return E.INF
+}
+
+/* copy this=P */
+func (E *ECP2) copy(P *ECP2) {
+	E.x.copy(P.x)
+	E.y.copy(P.y)
+	E.z.copy(P.z)
+	E.INF = P.INF
+}
+
+/* set this=O */
+func (E *ECP2) inf() {
+	E.INF = true
+	E.x.zero()
+	E.y.zero()
+	E.z.zero()
+}
+
+/* set this=-this */
+func (E *ECP2) neg() {
+	if E.is_infinity() {
+		return
+	}
+	E.y.neg()
+	E.y.reduce()
+}
+
+/* Conditional move of Q to P dependant on d */
+func (E *ECP2) cmove(Q *ECP2, d int32) {
+	E.x.cmove(Q.x, d)
+	E.y.cmove(Q.y, d)
+	E.z.cmove(Q.z, d)
+
+	var bd bool
+	if d == 0 {
+		bd = false
+	} else {
+		bd = true
+	}
+	E.INF = (E.INF != (E.INF != Q.INF) && bd)
+}
+
+/* Constant time select from pre-computed table */
+func (E *ECP2) selector(W []*ECP2, b int32) {
+	MP := NewECP2()
+	m := b >> 31
+	babs := (b ^ m) - m
+
+	babs = (babs - 1) / 2
+
+	E.cmove(W[0], teq(babs, 0)) // conditional move
+	E.cmove(W[1], teq(babs, 1))
+	E.cmove(W[2], teq(babs, 2))
+	E.cmove(W[3], teq(babs, 3))
+	E.cmove(W[4], teq(babs, 4))
+	E.cmove(W[5], teq(babs, 5))
+	E.cmove(W[6], teq(babs, 6))
+	E.cmove(W[7], teq(babs, 7))
+
+	MP.copy(E)
+	MP.neg()
+	E.cmove(MP, (m & 1))
+}
+
+/* Test if P == Q */
+func (E *ECP2) equals(Q *ECP2) bool {
+	if E.is_infinity() && Q.is_infinity() {
+		return true
+	}
+	if E.is_infinity() || Q.is_infinity() {
+		return false
+	}
+
+	zs2 := NewFP2copy(E.z)
+	zs2.sqr()
+	zo2 := NewFP2copy(Q.z)
+	zo2.sqr()
+	zs3 := NewFP2copy(zs2)
+	zs3.mul(E.z)
+	zo3 := NewFP2copy(zo2)
+	zo3.mul(Q.z)
+	zs2.mul(Q.x)
+	zo2.mul(E.x)
+	if !zs2.equals(zo2) {
+		return false
+	}
+	zs3.mul(Q.y)
+	zo3.mul(E.y)
+	if !zs3.equals(zo3) {
+		return false
+	}
+
+	return true
+}
+
+/* set to Affine - (x,y,z) to (x,y) */
+func (E *ECP2) affine() {
+	if E.is_infinity() {
+		return
+	}
+	one := NewFP2int(1)
+	if E.z.equals(one) {
+		return
+	}
+	E.z.inverse()
+
+	z2 := NewFP2copy(E.z)
+	z2.sqr()
+	E.x.mul(z2)
+	E.x.reduce()
+	E.y.mul(z2)
+	E.y.mul(E.z)
+	E.y.reduce()
+	E.z.copy(one)
+}
+
+/* extract affine x as FP2 */
+func (E *ECP2) getX() *FP2 {
+	E.affine()
+	return E.x
+}
+
+/* extract affine y as FP2 */
+func (E *ECP2) getY() *FP2 {
+	E.affine()
+	return E.y
+}
+
+/* extract projective x */
+func (E *ECP2) getx() *FP2 {
+	return E.x
+}
+
+/* extract projective y */
+func (E *ECP2) gety() *FP2 {
+	return E.y
+}
+
+/* extract projective z */
+func (E *ECP2) getz() *FP2 {
+	return E.z
+}
+
+/* convert to byte array */
+func (E *ECP2) toBytes(b []byte) {
+	var t [int(MODBYTES)]byte
+	MB := int(MODBYTES)
+
+	E.affine()
+	E.x.getA().toBytes(t[:])
+	for i := 0; i < MB; i++ {
+		b[i] = t[i]
+	}
+	E.x.getB().toBytes(t[:])
+	for i := 0; i < MB; i++ {
+		b[i+MB] = t[i]
+	}
+
+	E.y.getA().toBytes(t[:])
+	for i := 0; i < MB; i++ {
+		b[i+2*MB] = t[i]
+	}
+	E.y.getB().toBytes(t[:])
+	for i := 0; i < MB; i++ {
+		b[i+3*MB] = t[i]
+	}
+}
+
+/* convert from byte array to point */
+func ECP2_fromBytes(b []byte) *ECP2 {
+	var t [int(MODBYTES)]byte
+	MB := int(MODBYTES)
+
+	for i := 0; i < MB; i++ {
+		t[i] = b[i]
+	}
+	ra := fromBytes(t[:])
+	for i := 0; i < MB; i++ {
+		t[i] = b[i+MB]
+	}
+	rb := fromBytes(t[:])
+	rx := NewFP2bigs(ra, rb)
+
+	for i := 0; i < MB; i++ {
+		t[i] = b[i+2*MB]
+	}
+	ra = fromBytes(t[:])
+	for i := 0; i < MB; i++ {
+		t[i] = b[i+3*MB]
+	}
+	rb = fromBytes(t[:])
+	ry := NewFP2bigs(ra, rb)
+
+	return NewECP2fp2s(rx, ry)
+}
+
+/* convert this to hex string */
+func (E *ECP2) toString() string {
+	if E.is_infinity() {
+		return "infinity"
+	}
+	E.affine()
+	return "(" + E.x.toString() + "," + E.y.toString() + ")"
+}
+
+/* Calculate RHS of twisted curve equation x^3+B/i */
+func RHS2(x *FP2) *FP2 {
+	x.norm()
+	r := NewFP2copy(x)
+	r.sqr()
+	b := NewFP2big(NewBIGints(CURVE_B))
+	b.div_ip()
+	r.mul(x)
+	r.add(b)
+
+	r.reduce()
+	return r
+}
+
+/* construct this from (x,y) - but set to O if not on curve */
+func NewECP2fp2s(ix *FP2, iy *FP2) *ECP2 {
+	E := new(ECP2)
+	E.x = NewFP2copy(ix)
+	E.y = NewFP2copy(iy)
+	E.z = NewFP2int(1)
+	rhs := RHS2(E.x)
+	y2 := NewFP2copy(E.y)
+	y2.sqr()
+	if y2.equals(rhs) {
+		E.INF = false
+	} else {
+		E.x.zero()
+		E.INF = true
+	}
+	return E
+}
+
+/* construct this from x - but set to O if not on curve */
+func NewECP2fp2(ix *FP2) *ECP2 {
+	E := new(ECP2)
+	E.x = NewFP2copy(ix)
+	E.y = NewFP2int(1)
+	E.z = NewFP2int(1)
+	rhs := RHS2(E.x)
+	if rhs.sqrt() {
+		E.y.copy(rhs)
+		E.INF = false
+	} else {
+		E.x.zero()
+		E.INF = true
+	}
+	return E
+}
+
+/* this+=this */
+func (E *ECP2) dbl() int {
+	if E.INF {
+		return -1
+	}
+	if E.y.iszilch() {
+		E.inf()
+		return -1
+	}
+
+	w1 := NewFP2copy(E.x)
+	w2 := NewFP2int(0)
+	w3 := NewFP2copy(E.x)
+	w8 := NewFP2copy(E.x)
+
+	w1.sqr()
+	w8.copy(w1)
+	w8.imul(3)
+
+	w2.copy(E.y)
+	w2.sqr()
+	w3.copy(E.x)
+	w3.mul(w2)
+	w3.imul(4)
+	w1.copy(w3)
+	w1.neg()
+	//	w1.norm();
+
+	E.x.copy(w8)
+	E.x.sqr()
+	E.x.add(w1)
+	E.x.add(w1)
+	E.x.norm()
+
+	E.z.mul(E.y)
+	E.z.add(E.z)
+
+	w2.add(w2)
+	w2.sqr()
+	w2.add(w2)
+	w3.sub(E.x)
+	E.y.copy(w8)
+	E.y.mul(w3)
+	//	w2.norm();
+	E.y.sub(w2)
+
+	E.y.norm()
+	E.z.norm()
+
+	return 1
+}
+
+/* this+=Q - return 0 for add, 1 for double, -1 for O */
+func (E *ECP2) add(Q *ECP2) int {
+	if E.INF {
+		E.copy(Q)
+		return -1
+	}
+	if Q.INF {
+		return -1
+	}
+
+	aff := false
+
+	if Q.z.isunity() {
+		aff = true
+	}
+
+	var A, C *FP2
+	B := NewFP2copy(E.z)
+	D := NewFP2copy(E.z)
+	if !aff {
+		A = NewFP2copy(Q.z)
+		C = NewFP2copy(Q.z)
+
+		A.sqr()
+		B.sqr()
+		C.mul(A)
+		D.mul(B)
+
+		A.mul(E.x)
+		C.mul(E.y)
+	} else {
+		A = NewFP2copy(E.x)
+		C = NewFP2copy(E.y)
+
+		B.sqr()
+		D.mul(B)
+	}
+
+	B.mul(Q.x)
+	B.sub(A)
+	D.mul(Q.y)
+	D.sub(C)
+
+	if B.iszilch() {
+		if D.iszilch() {
+			E.dbl()
+			return 1
+		} else {
+			E.INF = true
+			return -1
+		}
+	}
+
+	if !aff {
+		E.z.mul(Q.z)
+	}
+	E.z.mul(B)
+
+	e := NewFP2copy(B)
+	e.sqr()
+	B.mul(e)
+	A.mul(e)
+
+	e.copy(A)
+	e.add(A)
+	e.add(B)
+	E.x.copy(D)
+	E.x.sqr()
+	E.x.sub(e)
+
+	A.sub(E.x)
+	E.y.copy(A)
+	E.y.mul(D)
+	C.mul(B)
+	E.y.sub(C)
+
+	E.x.norm()
+	E.y.norm()
+	E.z.norm()
+
+	return 0
+}
+
+/* set this-=Q */
+func (E *ECP2) sub(Q *ECP2) int {
+	Q.neg()
+	D := E.add(Q)
+	Q.neg()
+	return D
+}
+
+/* set this*=q, where q is Modulus, using Frobenius */
+func (E *ECP2) frob(X *FP2) {
+	if E.INF {
+		return
+	}
+	X2 := NewFP2copy(X)
+	X2.sqr()
+	E.x.conj()
+	E.y.conj()
+	E.z.conj()
+	E.z.reduce()
+	E.x.mul(X2)
+	E.y.mul(X2)
+	E.y.mul(X)
+}
+
+/* normalises m-array of ECP2 points. Requires work vector of m FP2s */
+
+func multiaffine2(m int, P []*ECP2) {
+	t1 := NewFP2int(0)
+	t2 := NewFP2int(0)
+
+	var work []*FP2
+
+	for i := 0; i < m; i++ {
+		work = append(work, NewFP2int(0))
+	}
+
+	work[0].one()
+	work[1].copy(P[0].z)
+
+	for i := 2; i < m; i++ {
+		work[i].copy(work[i-1])
+		work[i].mul(P[i-1].z)
+	}
+
+	t1.copy(work[m-1])
+	t1.mul(P[m-1].z)
+
+	t1.inverse()
+
+	t2.copy(P[m-1].z)
+	work[m-1].mul(t1)
+
+	for i := m - 2; ; i-- {
+		if i == 0 {
+			work[0].copy(t1)
+			work[0].mul(t2)
+			break
+		}
+		work[i].mul(t2)
+		work[i].mul(t1)
+		t2.mul(P[i].z)
+	}
+	/* now work[] contains inverses of all Z coordinates */
+
+	for i := 0; i < m; i++ {
+		P[i].z.one()
+		t1.copy(work[i])
+		t1.sqr()
+		P[i].x.mul(t1)
+		t1.mul(work[i])
+		P[i].y.mul(t1)
+	}
+}
+
+/* P*=e */
+func (E *ECP2) mul(e *BIG) *ECP2 {
+	/* fixed size windows */
+	mt := NewBIG()
+	t := NewBIG()
+	P := NewECP2()
+	Q := NewECP2()
+	C := NewECP2()
+
+	if E.is_infinity() {
+		return NewECP2()
+	}
+
+	var W []*ECP2
+	var w [1 + (NLEN*int(BASEBITS)+3)/4]int8
+
+	E.affine()
+
+	/* precompute table */
+	Q.copy(E)
+	Q.dbl()
+
+	W = append(W, NewECP2())
+	W[0].copy(E)
+
+	for i := 1; i < 8; i++ {
+		W = append(W, NewECP2())
+		W[i].copy(W[i-1])
+		W[i].add(Q)
+	}
+
+	/* convert the table to affine */
+
+	multiaffine2(8, W[:])
+
+	/* make exponent odd - add 2P if even, P if odd */
+	t.copy(e)
+	s := int32(t.parity())
+	t.inc(1)
+	t.norm()
+	ns := int32(t.parity())
+	mt.copy(t)
+	mt.inc(1)
+	mt.norm()
+	t.cmove(mt, s)
+	Q.cmove(E, ns)
+	C.copy(Q)
+
+	nb := 1 + (t.nbits()+3)/4
+	/* convert exponent to signed 4-bit window */
+	for i := 0; i < nb; i++ {
+		w[i] = int8(t.lastbits(5) - 16)
+		t.dec(int(w[i]))
+		t.norm()
+		t.fshr(4)
+	}
+	w[nb] = int8(t.lastbits(5))
+
+	P.copy(W[(w[nb]-1)/2])
+	for i := nb - 1; i >= 0; i-- {
+		Q.selector(W, int32(w[i]))
+		P.dbl()
+		P.dbl()
+		P.dbl()
+		P.dbl()
+		P.add(Q)
+	}
+	P.sub(C)
+	P.affine()
+	return P
+}
+
+/* P=u0.Q0+u1*Q1+u2*Q2+u3*Q3 */
+func mul4(Q []*ECP2, u []*BIG) *ECP2 {
+	var a [4]int8
+	T := NewECP2()
+	C := NewECP2()
+	P := NewECP2()
+
+	var W []*ECP2
+
+	mt := NewBIG()
+	var t []*BIG
+
+	var w [NLEN*int(BASEBITS) + 1]int8
+
+	for i := 0; i < 4; i++ {
+		t = append(t, NewBIGcopy(u[i]))
+		Q[i].affine()
+	}
+
+	/* precompute table */
+
+	W = append(W, NewECP2())
+	W[0].copy(Q[0])
+	W[0].sub(Q[1])
+	W = append(W, NewECP2())
+	W[1].copy(W[0])
+	W = append(W, NewECP2())
+	W[2].copy(W[0])
+	W = append(W, NewECP2())
+	W[3].copy(W[0])
+	W = append(W, NewECP2())
+	W[4].copy(Q[0])
+	W[4].add(Q[1])
+	W = append(W, NewECP2())
+	W[5].copy(W[4])
+	W = append(W, NewECP2())
+	W[6].copy(W[4])
+	W = append(W, NewECP2())
+	W[7].copy(W[4])
+
+	T.copy(Q[2])
+	T.sub(Q[3])
+	W[1].sub(T)
+	W[2].add(T)
+	W[5].sub(T)
+	W[6].add(T)
+	T.copy(Q[2])
+	T.add(Q[3])
+	W[0].sub(T)
+	W[3].add(T)
+	W[4].sub(T)
+	W[7].add(T)
+
+	multiaffine2(8, W[:])
+
+	/* if multiplier is even add 1 to multiplier, and add P to correction */
+	mt.zero()
+	C.inf()
+	for i := 0; i < 4; i++ {
+		if t[i].parity() == 0 {
+			t[i].inc(1)
+			t[i].norm()
+			C.add(Q[i])
+		}
+		mt.add(t[i])
+		mt.norm()
+	}
+
+	nb := 1 + mt.nbits()
+
+	/* convert exponent to signed 1-bit window */
+	for j := 0; j < nb; j++ {
+		for i := 0; i < 4; i++ {
+			a[i] = int8(t[i].lastbits(2) - 2)
+			t[i].dec(int(a[i]))
+			t[i].norm()
+			t[i].fshr(1)
+		}
+		w[j] = (8*a[0] + 4*a[1] + 2*a[2] + a[3])
+	}
+	w[nb] = int8(8*t[0].lastbits(2) + 4*t[1].lastbits(2) + 2*t[2].lastbits(2) + t[3].lastbits(2))
+
+	P.copy(W[(w[nb]-1)/2])
+	for i := nb - 1; i >= 0; i-- {
+		T.selector(W, int32(w[i]))
+		P.dbl()
+		P.add(T)
+	}
+	P.sub(C) /* apply correction */
+
+	P.affine()
+	return P
+}

http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto/blob/85fabaa6/go/amcl-go/FF.go
----------------------------------------------------------------------
diff --git a/go/amcl-go/FF.go b/go/amcl-go/FF.go
new file mode 100644
index 0000000..9e6e68c
--- /dev/null
+++ b/go/amcl-go/FF.go
@@ -0,0 +1,926 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+*/
+
+package amcl
+
+//import "fmt"
+
+const P_MBITS uint = MODBYTES * 8
+const P_MB uint = (P_MBITS % BASEBITS)
+const P_OMASK int64 = (int64(-1) << (P_MBITS % BASEBITS))
+const P_FEXCESS int64 = (int64(1) << (BASEBITS*uint(NLEN) - P_MBITS))
+const P_TBITS uint = (P_MBITS % BASEBITS)
+
+type FF struct {
+	length int
+	v      []*BIG
+}
+
+func (F *FF) P_EXCESS() int64 {
+	return ((F.v[F.length-1].get(NLEN-1) & P_OMASK) >> (P_MB))
+}
+
+/* Constructors */
+func NewFFint(n int) *FF {
+	F := new(FF)
+	for i := 0; i < n; i++ {
+		F.v = append(F.v, NewBIG())
+	}
+	F.length = n
+	return F
+}
+
+func NewFFints(x [][5]int64, n int) *FF {
+	F := new(FF)
+	for i := 0; i < n; i++ {
+		F.v = append(F.v, NewBIGints(x[i]))
+	}
+	F.length = n
+	return F
+}
+
+/* set to zero */
+func (F *FF) zero() {
+	for i := 0; i < F.length; i++ {
+		F.v[i].zero()
+	}
+}
+
+func (F *FF) getlen() int {
+	return F.length
+}
+
+/* set to integer */
+func (F *FF) set(m int) {
+	F.zero()
+	F.v[0].set(0, int64(m))
+}
+
+/* copy from FF b */
+func (F *FF) copy(b *FF) {
+	for i := 0; i < F.length; i++ {
+		F.v[i].copy(b.v[i])
+	}
+}
+
+/* x=y<<n */
+func (F *FF) dsucopy(b *FF) {
+	for i := 0; i < b.length; i++ {
+		F.v[b.length+i].copy(b.v[i])
+		F.v[i].zero()
+	}
+}
+
+/* x=y */
+func (F *FF) dscopy(b *FF) {
+	for i := 0; i < b.length; i++ {
+		F.v[i].copy(b.v[i])
+		F.v[b.length+i].zero()
+	}
+}
+
+/* x=y>>n */
+func (F *FF) sducopy(b *FF) {
+	for i := 0; i < F.length; i++ {
+		F.v[i].copy(b.v[F.length+i])
+	}
+}
+
+func (F *FF) one() {
+	F.v[0].one()
+	for i := 1; i < F.length; i++ {
+		F.v[i].zero()
+	}
+}
+
+/* test equals 0 */
+func (F *FF) iszilch() bool {
+	for i := 0; i < F.length; i++ {
+		if !F.v[i].iszilch() {
+			return false
+		}
+	}
+	return true
+}
+
+/* shift right by 256-bit words */
+func (F *FF) shrw(n int) {
+	for i := 0; i < n; i++ {
+		F.v[i].copy(F.v[i+n])
+		F.v[i+n].zero()
+	}
+}
+
+/* shift left by 256-bit words */
+func (F *FF) shlw(n int) {
+	for i := 0; i < n; i++ {
+		F.v[n+i].copy(F.v[i])
+		F.v[i].zero()
+	}
+}
+
+/* extract last bit */
+func (F *FF) parity() int {
+	return F.v[0].parity()
+}
+
+func (F *FF) lastbits(m int) int {
+	return F.v[0].lastbits(m)
+}
+
+/* compare x and y - must be normalised, and of same length */
+func ff_comp(a *FF, b *FF) int {
+	for i := a.length - 1; i >= 0; i-- {
+		j := comp(a.v[i], b.v[i])
+		if j != 0 {
+			return j
+		}
+	}
+	return 0
+}
+
+/* recursive add */
+func (F *FF) radd(vp int, x *FF, xp int, y *FF, yp int, n int) {
+	for i := 0; i < n; i++ {
+		F.v[vp+i].copy(x.v[xp+i])
+		F.v[vp+i].add(y.v[yp+i])
+	}
+}
+
+/* recursive inc */
+func (F *FF) rinc(vp int, y *FF, yp int, n int) {
+	for i := 0; i < n; i++ {
+		F.v[vp+i].add(y.v[yp+i])
+	}
+}
+
+/* recursive sub */
+func (F *FF) rsub(vp int, x *FF, xp int, y *FF, yp int, n int) {
+	for i := 0; i < n; i++ {
+		F.v[vp+i].copy(x.v[xp+i])
+		F.v[vp+i].sub(y.v[yp+i])
+	}
+}
+
+/* recursive dec */
+func (F *FF) rdec(vp int, y *FF, yp int, n int) {
+	for i := 0; i < n; i++ {
+		F.v[vp+i].sub(y.v[yp+i])
+	}
+}
+
+/* simple add */
+func (F *FF) add(b *FF) {
+	for i := 0; i < F.length; i++ {
+		F.v[i].add(b.v[i])
+	}
+}
+
+/* simple sub */
+func (F *FF) sub(b *FF) {
+	for i := 0; i < F.length; i++ {
+		F.v[i].sub(b.v[i])
+	}
+}
+
+/* reverse sub */
+func (F *FF) revsub(b *FF) {
+	for i := 0; i < F.length; i++ {
+		F.v[i].rsub(b.v[i])
+	}
+}
+
+/* normalise - but hold any overflow in top part unless n<0 */
+func (F *FF) rnorm(vp int, n int) {
+	trunc := false
+	var carry int64
+	if n < 0 { /* -v n signals to do truncation */
+		n = -n
+		trunc = true
+	}
+	for i := 0; i < n-1; i++ {
+		carry = F.v[vp+i].norm()
+		F.v[vp+i].xortop(carry << P_TBITS)
+		F.v[vp+i+1].inc(int(carry))
+	}
+	carry = F.v[vp+n-1].norm()
+	if trunc {
+		F.v[vp+n-1].xortop(carry << P_TBITS)
+	}
+
+}
+
+func (F *FF) norm() {
+	F.rnorm(0, F.length)
+}
+
+/* increment/decrement by a small integer */
+func (F *FF) inc(m int) {
+	F.v[0].inc(m)
+	F.norm()
+}
+
+func (F *FF) dec(m int) {
+	F.v[0].dec(m)
+	F.norm()
+}
+
+/* shift left by one bit */
+func (F *FF) shl() {
+	var delay_carry int = 0
+	for i := 0; i < F.length-1; i++ {
+		carry := F.v[i].fshl(1)
+		F.v[i].inc(delay_carry)
+		F.v[i].xortop(carry << P_TBITS)
+		delay_carry = int(carry)
+	}
+	F.v[F.length-1].fshl(1)
+	F.v[F.length-1].inc(delay_carry)
+}
+
+/* shift right by one bit */
+
+func (F *FF) shr() {
+	for i := F.length - 1; i > 0; i-- {
+		carry := F.v[i].fshr(1)
+		F.v[i-1].ortop(carry << P_TBITS)
+	}
+	F.v[0].fshr(1)
+}
+
+/* Convert to Hex String */
+func (F *FF) toString() string {
+	F.norm()
+	s := ""
+	for i := F.length - 1; i >= 0; i-- {
+		s += F.v[i].toString()
+	}
+	return s
+}
+
+/* Convert FFs to/from byte arrays */
+func (F *FF) toBytes(b []byte) {
+	for i := 0; i < F.length; i++ {
+		F.v[i].tobytearray(b, (F.length-i-1)*int(MODBYTES))
+	}
+}
+
+func ff_fromBytes(x *FF, b []byte) {
+	for i := 0; i < x.length; i++ {
+		x.v[i] = frombytearray(b, (x.length-i-1)*int(MODBYTES))
+	}
+}
+
+/* in-place swapping using xor - side channel resistant - lengths must be the same */
+func ff_cswap(a *FF, b *FF, d int32) {
+	for i := 0; i < a.length; i++ {
+		a.v[i].cswap(b.v[i], d)
+	}
+}
+
+/* z=x*y, t is workspace */
+func (F *FF) karmul(vp int, x *FF, xp int, y *FF, yp int, t *FF, tp int, n int) {
+	if n == 1 {
+		d := mul(x.v[xp], y.v[yp])
+		F.v[vp+1] = d.split(8 * MODBYTES)
+		F.v[vp].dcopy(d)
+		return
+	}
+	nd2 := n / 2
+	F.radd(vp, x, xp, x, xp+nd2, nd2)
+	F.radd(vp+nd2, y, yp, y, yp+nd2, nd2)
+	t.karmul(tp, F, vp, F, vp+nd2, t, tp+n, nd2)
+	F.karmul(vp, x, xp, y, yp, t, tp+n, nd2)
+	F.karmul(vp+n, x, xp+nd2, y, yp+nd2, t, tp+n, nd2)
+	t.rdec(tp, F, vp, n)
+	t.rdec(tp, F, vp+n, n)
+	F.rinc(vp+nd2, t, tp, n)
+	F.rnorm(vp, 2*n)
+}
+
+func (F *FF) karsqr(vp int, x *FF, xp int, t *FF, tp int, n int) {
+	if n == 1 {
+		d := sqr(x.v[xp])
+		F.v[vp+1].copy(d.split(8 * MODBYTES))
+		F.v[vp].dcopy(d)
+		return
+	}
+
+	nd2 := n / 2
+	F.karsqr(vp, x, xp, t, tp+n, nd2)
+	F.karsqr(vp+n, x, xp+nd2, t, tp+n, nd2)
+	t.karmul(tp, x, xp, x, xp+nd2, t, tp+n, nd2)
+	F.rinc(vp+nd2, t, tp, n)
+	F.rinc(vp+nd2, t, tp, n)
+	F.rnorm(vp+nd2, n)
+}
+
+/* Calculates Least Significant bottom half of x*y */
+func (F *FF) karmul_lower(vp int, x *FF, xp int, y *FF, yp int, t *FF, tp int, n int) {
+	if n == 1 { /* only calculate bottom half of product */
+		F.v[vp].copy(smul(x.v[xp], y.v[yp]))
+		return
+	}
+	nd2 := n / 2
+
+	F.karmul(vp, x, xp, y, yp, t, tp+n, nd2)
+	t.karmul_lower(tp, x, xp+nd2, y, yp, t, tp+n, nd2)
+	F.rinc(vp+nd2, t, tp, nd2)
+	t.karmul_lower(tp, x, xp, y, yp+nd2, t, tp+n, nd2)
+	F.rinc(vp+nd2, t, tp, nd2)
+	F.rnorm(vp+nd2, -nd2) /* truncate it */
+}
+
+/* Calculates Most Significant upper half of x*y, given lower part */
+func (F *FF) karmul_upper(x *FF, y *FF, t *FF, n int) {
+	nd2 := n / 2
+	F.radd(n, x, 0, x, nd2, nd2)
+	F.radd(n+nd2, y, 0, y, nd2, nd2)
+
+	t.karmul(0, F, n+nd2, F, n, t, n, nd2) /* t = (a0+a1)(b0+b1) */
+	F.karmul(n, x, nd2, y, nd2, t, n, nd2) /* z[n]= a1*b1 */
+	/* z[0-nd2]=l(a0b0) z[nd2-n]= h(a0b0)+l(t)-l(a0b0)-l(a1b1) */
+	t.rdec(0, F, n, n)     /* t=t-a1b1  */
+	F.rinc(nd2, F, 0, nd2) /* z[nd2-n]+=l(a0b0) = h(a0b0)+l(t)-l(a1b1)  */
+	F.rdec(nd2, t, 0, nd2) /* z[nd2-n]=h(a0b0)+l(t)-l(a1b1)-l(t-a1b1)=h(a0b0) */
+	F.rnorm(0, -n)         /* a0b0 now in z - truncate it */
+	t.rdec(0, F, 0, n)     /* (a0+a1)(b0+b1) - a0b0 */
+	F.rinc(nd2, t, 0, n)
+
+	F.rnorm(nd2, n)
+}
+
+/* z=x*y. Assumes x and y are of same length. */
+func ff_mul(x *FF, y *FF) *FF {
+	n := x.length
+	z := NewFFint(2 * n)
+	t := NewFFint(2 * n)
+	z.karmul(0, x, 0, y, 0, t, 0, n)
+	return z
+}
+
+/* return low part of product this*y */
+func (F *FF) lmul(y *FF) {
+	n := F.length
+	t := NewFFint(2 * n)
+	x := NewFFint(n)
+	x.copy(F)
+	F.karmul_lower(0, x, 0, y, 0, t, 0, n)
+}
+
+/* Set b=b mod c */
+func (F *FF) mod(c *FF) {
+	var k int = 1
+
+	F.norm()
+	if ff_comp(F, c) < 0 {
+		return
+	}
+
+	c.shl()
+	for ff_comp(F, c) >= 0 {
+		c.shl()
+		k++
+	}
+
+	for k > 0 {
+		c.shr()
+		if ff_comp(F, c) >= 0 {
+			F.sub(c)
+			F.norm()
+		}
+		k--
+	}
+}
+
+/* z=x^2 */
+func ff_sqr(x *FF) *FF {
+	n := x.length
+	z := NewFFint(2 * n)
+	t := NewFFint(2 * n)
+	z.karsqr(0, x, 0, t, 0, n)
+	return z
+}
+
+/* return This mod modulus, N is modulus, ND is Montgomery Constant */
+func (F *FF) reduce(N *FF, ND *FF) *FF { /* fast karatsuba Montgomery reduction */
+	n := N.length
+	t := NewFFint(2 * n)
+	r := NewFFint(n)
+	m := NewFFint(n)
+
+	r.sducopy(F)
+	m.karmul_lower(0, F, 0, ND, 0, t, 0, n)
+	F.karmul_upper(N, m, t, n)
+	m.sducopy(F)
+
+	r.add(N)
+	r.sub(m)
+	r.norm()
+
+	return r
+
+}
+
+/* Set r=this mod b */
+/* this is of length - 2*n */
+/* r,b is of length - n */
+func (F *FF) dmod(b *FF) *FF {
+	n := b.length
+	m := NewFFint(2 * n)
+	x := NewFFint(2 * n)
+	r := NewFFint(n)
+
+	x.copy(F)
+	x.norm()
+	m.dsucopy(b)
+	k := 256 * n
+
+	for k > 0 {
+		m.shr()
+
+		if ff_comp(x, m) >= 0 {
+			x.sub(m)
+			x.norm()
+		}
+		k--
+	}
+
+	r.copy(x)
+	r.mod(b)
+	return r
+}
+
+/* Set return=1/this mod p. Binary method - a<p on entry */
+
+func (F *FF) invmodp(p *FF) {
+	n := p.length
+
+	u := NewFFint(n)
+	v := NewFFint(n)
+	x1 := NewFFint(n)
+	x2 := NewFFint(n)
+	t := NewFFint(n)
+	one := NewFFint(n)
+
+	one.one()
+	u.copy(F)
+	v.copy(p)
+	x1.copy(one)
+	x2.zero()
+
+	// reduce n in here as well!
+	for ff_comp(u, one) != 0 && ff_comp(v, one) != 0 {
+		for u.parity() == 0 {
+			u.shr()
+			if x1.parity() != 0 {
+				x1.add(p)
+				x1.norm()
+			}
+			x1.shr()
+		}
+		for v.parity() == 0 {
+			v.shr()
+			if x2.parity() != 0 {
+				x2.add(p)
+				x2.norm()
+			}
+			x2.shr()
+		}
+		if ff_comp(u, v) >= 0 {
+			u.sub(v)
+			u.norm()
+			if ff_comp(x1, x2) >= 0 {
+				x1.sub(x2)
+			} else {
+				t.copy(p)
+				t.sub(x2)
+				x1.add(t)
+			}
+			x1.norm()
+		} else {
+			v.sub(u)
+			v.norm()
+			if ff_comp(x2, x1) >= 0 {
+				x2.sub(x1)
+			} else {
+				t.copy(p)
+				t.sub(x1)
+				x2.add(t)
+			}
+			x2.norm()
+		}
+	}
+	if ff_comp(u, one) == 0 {
+		F.copy(x1)
+	} else {
+		F.copy(x2)
+	}
+}
+
+/* nresidue mod m */
+func (F *FF) nres(m *FF) {
+	n := m.length
+	d := NewFFint(2 * n)
+	d.dsucopy(F)
+	F.copy(d.dmod(m))
+}
+
+func (F *FF) redc(m *FF, ND *FF) {
+	n := m.length
+	d := NewFFint(2 * n)
+	F.mod(m)
+	d.dscopy(F)
+	F.copy(d.reduce(m, ND))
+	F.mod(m)
+}
+
+func (F *FF) mod2m(m int) {
+	for i := m; i < F.length; i++ {
+		F.v[i].zero()
+	}
+}
+
+/* U=1/a mod 2^m - Arazi & Qi */
+func (F *FF) invmod2m() *FF {
+	n := F.length
+
+	b := NewFFint(n)
+	c := NewFFint(n)
+	U := NewFFint(n)
+
+	U.zero()
+	U.v[0].copy(F.v[0])
+	U.v[0].invmod2m()
+
+	for i := 1; i < n; i <<= 1 {
+		b.copy(F)
+		b.mod2m(i)
+		t := ff_mul(U, b)
+		t.shrw(i)
+		b.copy(t)
+		c.copy(F)
+		c.shrw(i)
+		c.mod2m(i)
+		c.lmul(U)
+		c.mod2m(i)
+
+		b.add(c)
+		b.norm()
+		b.lmul(U)
+		b.mod2m(i)
+
+		c.one()
+		c.shlw(i)
+		b.revsub(c)
+		b.norm()
+		b.shlw(i)
+		U.add(b)
+	}
+	U.norm()
+	return U
+}
+
+func (F *FF) random(rng *RAND) {
+	n := F.length
+	for i := 0; i < n; i++ {
+		F.v[i].copy(random(rng))
+	}
+	/* make sure top bit is 1 */
+	for F.v[n-1].nbits() < int(MODBYTES*8) {
+		F.v[n-1].copy(random(rng))
+	}
+}
+
+/* generate random x less than p */
+func (F *FF) randomnum(p *FF, rng *RAND) {
+	n := F.length
+	d := NewFFint(2 * n)
+
+	for i := 0; i < 2*n; i++ {
+		d.v[i].copy(random(rng))
+	}
+	F.copy(d.dmod(p))
+}
+
+/* this*=y mod p */
+func (F *FF) modmul(y *FF, p *FF, nd *FF) {
+	ex := F.P_EXCESS()
+	ey := y.P_EXCESS()
+	if (ex+1)*(ey+1)+1 >= P_FEXCESS {
+		F.mod(p)
+	}
+	d := ff_mul(F, y)
+	F.copy(d.reduce(p, nd))
+}
+
+/* this*=y mod p */
+func (F *FF) modsqr(p *FF, nd *FF) {
+	ex := F.P_EXCESS()
+	if (ex+1)*(ex+1)+1 >= P_FEXCESS {
+		F.mod(p)
+	}
+	d := ff_sqr(F)
+	F.copy(d.reduce(p, nd))
+}
+
+/* this=this^e mod p using side-channel resistant Montgomery Ladder, for large e */
+func (F *FF) skpow(e *FF, p *FF) {
+	n := p.length
+	R0 := NewFFint(n)
+	R1 := NewFFint(n)
+	ND := p.invmod2m()
+
+	F.mod(p)
+	R0.one()
+	R1.copy(F)
+	R0.nres(p)
+	R1.nres(p)
+
+	for i := int(8*MODBYTES)*n - 1; i >= 0; i-- {
+		b := int32(e.v[i/256].bit(i % 256))
+		F.copy(R0)
+		F.modmul(R1, p, ND)
+
+		ff_cswap(R0, R1, b)
+		R0.modsqr(p, ND)
+
+		R1.copy(F)
+		ff_cswap(R0, R1, b)
+	}
+	F.copy(R0)
+	F.redc(p, ND)
+}
+
+/* this =this^e mod p using side-channel resistant Montgomery Ladder, for short e */
+func (F *FF) skpows(e *BIG, p *FF) {
+	n := p.length
+	R0 := NewFFint(n)
+	R1 := NewFFint(n)
+	ND := p.invmod2m()
+
+	F.mod(p)
+	R0.one()
+	R1.copy(F)
+	R0.nres(p)
+	R1.nres(p)
+
+	for i := int(8*MODBYTES) - 1; i >= 0; i-- {
+		b := int32(e.bit(i))
+		F.copy(R0)
+		F.modmul(R1, p, ND)
+
+		ff_cswap(R0, R1, b)
+		R0.modsqr(p, ND)
+
+		R1.copy(F)
+		ff_cswap(R0, R1, b)
+	}
+	F.copy(R0)
+	F.redc(p, ND)
+}
+
+/* raise to an integer power - right-to-left method */
+func (F *FF) power(e int, p *FF) {
+	n := p.length
+	w := NewFFint(n)
+	ND := p.invmod2m()
+	f := true
+
+	w.copy(F)
+	w.nres(p)
+
+	if e == 2 {
+		F.copy(w)
+		F.modsqr(p, ND)
+	} else {
+		for true {
+			if e%2 == 1 {
+				if f {
+					F.copy(w)
+				} else {
+					F.modmul(w, p, ND)
+				}
+				f = false
+			}
+			e >>= 1
+			if e == 0 {
+				break
+			}
+			w.modsqr(p, ND)
+		}
+	}
+	F.redc(p, ND)
+}
+
+/* this=this^e mod p, faster but not side channel resistant */
+func (F *FF) pow(e *FF, p *FF) {
+	n := p.length
+	w := NewFFint(n)
+	ND := p.invmod2m()
+
+	w.copy(F)
+	F.one()
+	F.nres(p)
+	w.nres(p)
+	for i := int(8*MODBYTES)*n - 1; i >= 0; i-- {
+		F.modsqr(p, ND)
+		b := e.v[i/256].bit(i % 256)
+		if b == 1 {
+			F.modmul(w, p, ND)
+		}
+	}
+	F.redc(p, ND)
+}
+
+/* double exponentiation r=x^e.y^f mod p */
+func (F *FF) pow2(e *BIG, y *FF, f *BIG, p *FF) {
+	n := p.length
+	xn := NewFFint(n)
+	yn := NewFFint(n)
+	xy := NewFFint(n)
+	ND := p.invmod2m()
+
+	xn.copy(F)
+	yn.copy(y)
+	xn.nres(p)
+	yn.nres(p)
+	xy.copy(xn)
+	xy.modmul(yn, p, ND)
+	F.one()
+	F.nres(p)
+
+	for i := int(8*MODBYTES) - 1; i >= 0; i-- {
+		eb := e.bit(i)
+		fb := f.bit(i)
+		F.modsqr(p, ND)
+		if eb == 1 {
+			if fb == 1 {
+				F.modmul(xy, p, ND)
+			} else {
+				F.modmul(xn, p, ND)
+			}
+		} else {
+			if fb == 1 {
+				F.modmul(yn, p, ND)
+			}
+		}
+	}
+	F.redc(p, ND)
+}
+
+func igcd(x int, y int) int { /* integer GCD, returns GCD of x and y */
+	var r int
+	if y == 0 {
+		return x
+	}
+	for true {
+		r = x % y
+		if r == 0 {
+			break
+		}
+		x = y
+		y = r
+	}
+	return y
+}
+
+/* quick and dirty check for common factor with n */
+func (F *FF) cfactor(s int) bool {
+	n := F.length
+
+	x := NewFFint(n)
+	y := NewFFint(n)
+
+	y.set(s)
+	x.copy(F)
+	x.norm()
+
+	x.sub(y)
+	x.norm()
+
+	for !x.iszilch() && x.parity() == 0 {
+		x.shr()
+	}
+
+	for ff_comp(x, y) > 0 {
+		x.sub(y)
+		x.norm()
+		for !x.iszilch() && x.parity() == 0 {
+			x.shr()
+		}
+	}
+
+	g := int(x.v[0].get(0))
+	r := igcd(s, g)
+	if r > 1 {
+		return true
+	}
+	return false
+}
+
+/* Miller-Rabin test for primality. Slow. */
+func prime(p *FF, rng *RAND) bool {
+	s := 0
+	n := p.length
+	d := NewFFint(n)
+	x := NewFFint(n)
+	unity := NewFFint(n)
+	nm1 := NewFFint(n)
+
+	sf := 4849845 /* 3*5*.. *19 */
+	p.norm()
+
+	if p.cfactor(sf) {
+		return false
+	}
+	unity.one()
+	nm1.copy(p)
+	nm1.sub(unity)
+	nm1.norm()
+	d.copy(nm1)
+
+	for d.parity() == 0 {
+		d.shr()
+		s++
+	}
+	if s == 0 {
+		return false
+	}
+	for i := 0; i < 10; i++ {
+		x.randomnum(p, rng)
+		x.pow(d, p)
+		if ff_comp(x, unity) == 0 || ff_comp(x, nm1) == 0 {
+			continue
+		}
+		loop := false
+		for j := 1; j < s; j++ {
+			x.power(2, p)
+			if ff_comp(x, unity) == 0 {
+				return false
+			}
+			if ff_comp(x, nm1) == 0 {
+				loop = true
+				break
+			}
+		}
+		if loop {
+			continue
+		}
+		return false
+	}
+	return true
+}
+
+/*
+func main() {
+
+	var P = [4][5]int64 {{0xAD19A781670957,0x76A79C00965796,0xDEFCC5FC9A9717,0xF02F2940E20E9,0xBF59E34F},{0x6894F31844C908,0x8DADA70E82C79F,0xFD29F3836046F6,0x8C1D874D314DD0,0x46D077B},{0x3C515217813331,0x56680FD1CE935B,0xE55C53EEA8838E,0x92C2F7E14A4A95,0xD945E5B1},{0xACF673E919F5EF,0x6723E7E7DAB446,0x6B6FA69B36EB1B,0xF7D13920ECA300,0xB5FC2165}}
+
+	fmt.Printf("Testing FF\n")
+	var raw [100]byte
+	rng:=NewRAND()
+
+	rng.Clean()
+	for i:=0;i<100;i++ {
+		raw[i]=byte(i)
+	}
+
+	rng.Seed(100,raw[:])
+
+	n:=4
+
+	x:=NewFFint(n)
+	x.set(3)
+
+	p:=NewFFints(P[:],n)
+
+	if prime(p,rng) {fmt.Printf("p is a prime\n"); fmt.Printf("\n")}
+
+	e:=NewFFint(n)
+	e.copy(p)
+	e.dec(1); e.norm()
+
+	fmt.Printf("e= "+e.toString())
+	fmt.Printf("\n")
+	x.skpow(e,p)
+	fmt.Printf("x= "+x.toString())
+	fmt.Printf("\n")
+}
+*/

http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto/blob/85fabaa6/go/amcl-go/FP.go
----------------------------------------------------------------------
diff --git a/go/amcl-go/FP.go b/go/amcl-go/FP.go
new file mode 100644
index 0000000..c8a4d62
--- /dev/null
+++ b/go/amcl-go/FP.go
@@ -0,0 +1,288 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+*/
+
+/* Finite Field arithmetic */
+/* CLINT mod p functions */
+
+package amcl
+
+//import "fmt"
+
+var p BIG = BIG{w: [NLEN]int64(Modulus)}
+
+type FP struct {
+	x *BIG
+}
+
+/* Constructors */
+func NewFPint(a int) *FP {
+	F := new(FP)
+	F.x = NewBIGint(a)
+	F.nres()
+	return F
+}
+
+func NewFPbig(a *BIG) *FP {
+	F := new(FP)
+	F.x = NewBIGcopy(a)
+	F.nres()
+	return F
+}
+
+func NewFPcopy(a *FP) *FP {
+	F := new(FP)
+	F.x = NewBIGcopy(a.x)
+	return F
+}
+
+func (F *FP) toString() string {
+	return F.redc().toString()
+}
+
+/* convert to Montgomery n-residue form */
+func (F *FP) nres() {
+	if MODTYPE != PSEUDO_MERSENNE {
+		d := NewDBIGscopy(F.x)
+		d.shl(uint(NLEN) * BASEBITS)
+		F.x.copy(d.mod(&p))
+	}
+}
+
+/* convert back to regular form */
+func (F *FP) redc() *BIG {
+	if MODTYPE != PSEUDO_MERSENNE {
+		d := NewDBIGscopy(F.x)
+		return mod(d)
+	} else {
+		r := NewBIGcopy(F.x)
+		return r
+	}
+}
+
+/* reduce this mod Modulus */
+func (F *FP) reduce() {
+	F.x.mod(&p)
+}
+
+/* test this=0? */
+func (F *FP) iszilch() bool {
+	F.reduce()
+	return F.x.iszilch()
+}
+
+/* copy from FP b */
+func (F *FP) copy(b *FP) {
+	F.x.copy(b.x)
+}
+
+/* set this=0 */
+func (F *FP) zero() {
+	F.x.zero()
+}
+
+/* set this=1 */
+func (F *FP) one() {
+	F.x.one()
+	F.nres()
+}
+
+/* normalise this */
+func (F *FP) norm() {
+	F.x.norm()
+}
+
+/* swap FPs depending on d */
+func (F *FP) cswap(b *FP, d int32) {
+	F.x.cswap(b.x, d)
+}
+
+/* copy FPs depending on d */
+func (F *FP) cmove(b *FP, d int32) {
+	F.x.cmove(b.x, d)
+}
+
+/* this*=b mod Modulus */
+func (F *FP) mul(b *FP) {
+	ea := EXCESS(F.x)
+	eb := EXCESS(b.x)
+
+	if (ea+1)*(eb+1)+1 >= FEXCESS {
+		F.reduce()
+	}
+
+	d := mul(F.x, b.x)
+	F.x.copy(mod(d))
+}
+
+/* this = -this mod Modulus */
+func (F *FP) neg() {
+	m := NewBIGcopy(&p)
+
+	F.norm()
+
+	ov := EXCESS(F.x)
+	sb := uint(1)
+	for ov != 0 {
+		sb++
+		ov >>= 1
+	}
+
+	m.fshl(sb)
+	F.x.rsub(m)
+
+	if EXCESS(F.x) >= FEXCESS {
+		F.reduce()
+	}
+}
+
+/* this*=c mod Modulus, where c is a small int */
+func (F *FP) imul(c int) {
+	F.norm()
+	s := false
+	if c < 0 {
+		c = -c
+		s = true
+	}
+	afx := (EXCESS(F.x)+1)*(int64(c)+1) + 1
+	if c < NEXCESS && afx < FEXCESS {
+		F.x.imul(c)
+	} else {
+		if afx < FEXCESS {
+			F.x.pmul(c)
+		} else {
+			d := F.x.pxmul(c)
+			F.x.copy(d.mod(&p))
+		}
+	}
+	if s {
+		F.neg()
+	}
+	F.norm()
+}
+
+/* this*=this mod Modulus */
+func (F *FP) sqr() {
+	ea := EXCESS(F.x)
+	if (ea+1)*(ea+1)+1 >= FEXCESS {
+		F.reduce()
+	}
+
+	d := sqr(F.x)
+
+	F.x.copy(mod(d))
+}
+
+/* this+=b */
+func (F *FP) add(b *FP) {
+	F.x.add(b.x)
+	if EXCESS(F.x)+2 >= FEXCESS {
+		F.reduce()
+	}
+}
+
+/* this-=b */
+func (F *FP) sub(b *FP) {
+	n := NewFPcopy(b)
+	n.neg()
+	F.add(n)
+}
+
+/* this/=2 mod Modulus */
+func (F *FP) div2() {
+	F.x.norm()
+	if F.x.parity() == 0 {
+		F.x.fshr(1)
+	} else {
+		F.x.add(&p)
+		F.x.norm()
+		F.x.fshr(1)
+	}
+}
+
+/* this=1/this mod Modulus */
+func (F *FP) inverse() {
+	r := F.redc()
+	r.invmodp(&p)
+	F.x.copy(r)
+	F.nres()
+}
+
+/* return TRUE if this==a */
+func (F *FP) equals(a *FP) bool {
+	a.reduce()
+	F.reduce()
+	if comp(a.x, F.x) == 0 {
+		return true
+	}
+	return false
+}
+
+/* return this^e mod Modulus */
+func (F *FP) pow(e *BIG) *FP {
+	r := NewFPint(1)
+	e.norm()
+	F.x.norm()
+	m := NewFPcopy(F)
+	for true {
+		bt := e.parity()
+		e.fshr(1)
+		if bt == 1 {
+			r.mul(m)
+		}
+		if e.iszilch() {
+			break
+		}
+		m.sqr()
+	}
+	r.x.mod(&p)
+	return r
+}
+
+/* return sqrt(this) mod Modulus */
+func (F *FP) sqrt() *FP {
+	F.reduce()
+	b := NewBIGcopy(&p)
+	if MOD8 == 5 {
+		b.dec(5)
+		b.norm()
+		b.shr(3)
+		i := NewFPcopy(F)
+		i.x.shl(1)
+		v := i.pow(b)
+		i.mul(v)
+		i.mul(v)
+		i.x.dec(1)
+		r := NewFPcopy(F)
+		r.mul(v)
+		r.mul(i)
+		r.reduce()
+		return r
+	} else {
+		b.inc(1)
+		b.norm()
+		b.shr(2)
+		return F.pow(b)
+	}
+}
+
+/* return jacobi symbol (this/Modulus) */
+func (F *FP) jacobi() int {
+	w := F.redc()
+	return w.jacobi(&p)
+}