You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@milagro.apache.org by br...@apache.org on 2018/11/08 00:12:57 UTC
[37/51] [partial] incubator-milagro-crypto-c git commit: update code

http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto-c/blob/8d28d2c3/src/fp2.c.in
----------------------------------------------------------------------
diff --git a/src/fp2.c.in b/src/fp2.c.in
new file mode 100644
index 0000000..93c2cf0
--- /dev/null
+++ b/src/fp2.c.in
@@ -0,0 +1,469 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+*/
+
+/* AMCL Fp^2 functions */
+/* SU=m, m is Stack Usage (no lazy )*/
+
+/* FP2 elements are of the form a+ib, where i is sqrt(-1) */
+
+#include "fp2_YYY.h"
+
+/* test x==0 ? */
+/* SU= 8 */
+int FP2_YYY_iszilch(FP2_YYY *x)
+{
+    FP2_YYY_reduce(x);
+    if (FP_YYY_iszilch(&(x->a)) && FP_YYY_iszilch(&(x->b))) return 1;
+    return 0;
+}
+
+/* Move b to a if d=1 */
+void FP2_YYY_cmove(FP2_YYY *f,FP2_YYY *g,int d)
+{
+    FP_YYY_cmove(&(f->a),&(g->a),d);
+    FP_YYY_cmove(&(f->b),&(g->b),d);
+}
+
+/* test x==1 ? */
+/* SU= 48 */
+int FP2_YYY_isunity(FP2_YYY *x)
+{
+    FP_YYY one;
+    FP_YYY_one(&one);
+    FP2_YYY_reduce(x);
+    if (FP_YYY_equals(&(x->a),&one) && FP_YYY_iszilch(&(x->b))) return 1;
+    return 0;
+}
+
+/* SU= 8 */
+/* Fully reduce a and b mod Modulus */
+void FP2_YYY_reduce(FP2_YYY *w)
+{
+    FP_YYY_reduce(&(w->a));
+    FP_YYY_reduce(&(w->b));
+}
+
+/* return 1 if x==y, else 0 */
+/* SU= 16 */
+int FP2_YYY_equals(FP2_YYY *x,FP2_YYY *y)
+{
+    FP2_YYY_reduce(x);
+    FP2_YYY_reduce(y);
+    if (FP_YYY_equals(&(x->a),&(y->a)) && FP_YYY_equals(&(x->b),&(y->b)))
+        return 1;
+    return 0;
+}
+
+/* Create FP2 from two FPs */
+/* SU= 16 */
+void FP2_YYY_from_FPs(FP2_YYY *w,FP_YYY *x,FP_YYY *y)
+{
+    FP_YYY_copy(&(w->a),x);
+    FP_YYY_copy(&(w->b),y);
+}
+
+/* Create FP2 from two BIGS */
+/* SU= 16 */
+void FP2_YYY_from_BIGs(FP2_YYY *w,BIG_XXX x,BIG_XXX y)
+{
+    FP_YYY_nres(&(w->a),x);
+    FP_YYY_nres(&(w->b),y);
+}
+
+/* Create FP2 from FP */
+/* SU= 8 */
+void FP2_YYY_from_FP(FP2_YYY *w,FP_YYY *x)
+{
+    FP_YYY_copy(&(w->a),x);
+    FP_YYY_zero(&(w->b));
+}
+
+/* Create FP2 from BIG */
+/* SU= 8 */
+void FP2_YYY_from_BIG(FP2_YYY *w,BIG_XXX x)
+{
+    FP_YYY_nres(&(w->a),x);
+    FP_YYY_zero(&(w->b));
+}
+
+/* FP2 copy w=x */
+/* SU= 16 */
+void FP2_YYY_copy(FP2_YYY *w,FP2_YYY *x)
+{
+    if (w==x) return;
+    FP_YYY_copy(&(w->a),&(x->a));
+    FP_YYY_copy(&(w->b),&(x->b));
+}
+
+/* FP2 set w=0 */
+/* SU= 8 */
+void FP2_YYY_zero(FP2_YYY *w)
+{
+    FP_YYY_zero(&(w->a));
+    FP_YYY_zero(&(w->b));
+}
+
+/* FP2 set w=1 */
+/* SU= 48 */
+void FP2_YYY_one(FP2_YYY *w)
+{
+    FP_YYY one;
+    FP_YYY_one(&one);
+    FP2_YYY_from_FP(w,&one);
+}
+
+/* Set w=-x */
+/* SU= 88 */
+void FP2_YYY_neg(FP2_YYY *w,FP2_YYY *x)
+{
+    /* Just one neg! */
+    FP_YYY m,t;
+    FP_YYY_add(&m,&(x->a),&(x->b));
+    FP_YYY_neg(&m,&m);
+    FP_YYY_add(&t,&m,&(x->b));
+    FP_YYY_add(&(w->b),&m,&(x->a));
+    FP_YYY_copy(&(w->a),&t);
+
+}
+
+/* Set w=conj(x) */
+/* SU= 16 */
+void FP2_YYY_conj(FP2_YYY *w,FP2_YYY *x)
+{
+    FP_YYY_copy(&(w->a),&(x->a));
+    FP_YYY_neg(&(w->b),&(x->b));
+    FP_YYY_norm(&(w->b));
+}
+
+/* Set w=x+y */
+/* SU= 16 */
+void FP2_YYY_add(FP2_YYY *w,FP2_YYY *x,FP2_YYY *y)
+{
+    FP_YYY_add(&(w->a),&(x->a),&(y->a));
+    FP_YYY_add(&(w->b),&(x->b),&(y->b));
+}
+
+/* Set w=x-y */
+/* Input y MUST be normed */
+void FP2_YYY_sub(FP2_YYY *w,FP2_YYY *x,FP2_YYY *y)
+{
+    FP2_YYY m;
+    FP2_YYY_neg(&m,y);
+    FP2_YYY_add(w,x,&m);
+}
+
+/* Set w=s*x, where s is FP */
+/* SU= 16 */
+void FP2_YYY_pmul(FP2_YYY *w,FP2_YYY *x,FP_YYY *s)
+{
+    FP_YYY_mul(&(w->a),&(x->a),s);
+    FP_YYY_mul(&(w->b),&(x->b),s);
+}
+
+/* SU= 16 */
+/* Set w=s*x, where s is int */
+void FP2_YYY_imul(FP2_YYY *w,FP2_YYY *x,int s)
+{
+    FP_YYY_imul(&(w->a),&(x->a),s);
+    FP_YYY_imul(&(w->b),&(x->b),s);
+}
+
+/* Set w=x^2 */
+/* SU= 128 */
+void FP2_YYY_sqr(FP2_YYY *w,FP2_YYY *x)
+{
+    FP_YYY w1,w3,mb;
+
+    FP_YYY_add(&w1,&(x->a),&(x->b));
+    FP_YYY_neg(&mb,&(x->b));
+
+    FP_YYY_add(&w3,&(x->a),&(x->a));
+    FP_YYY_norm(&w3);
+    FP_YYY_mul(&(w->b),&w3,&(x->b));
+
+    FP_YYY_add(&(w->a),&(x->a),&mb);
+
+    FP_YYY_norm(&w1);
+    FP_YYY_norm(&(w->a));
+
+    FP_YYY_mul(&(w->a),&w1,&(w->a));     /* w->a#2 w->a=1 w1&w2=6 w1*w2=2 */
+}
+
+
+/* Set w=x*y */
+/* Inputs MUST be normed  */
+/* Now uses Lazy reduction */
+void FP2_YYY_mul(FP2_YYY *w,FP2_YYY *x,FP2_YYY *y)
+{
+    DBIG_XXX A,B,E,F,pR;
+    BIG_XXX C,D,p;
+
+    BIG_XXX_rcopy(p,Modulus_YYY);
+    BIG_XXX_dsucopy(pR,p);
+
+    // reduce excesses of a and b as required (so product < pR)
+
+    if ((sign64)(x->a.XES+x->b.XES)*(y->a.XES+y->b.XES)>(sign64)FEXCESS_YYY)
+    {
+#ifdef DEBUG_REDUCE
+        printf("FP2 Product too large - reducing it\n");
+#endif
+        if (x->a.XES>1) FP_YYY_reduce(&(x->a));
+        if (x->b.XES>1) FP_YYY_reduce(&(x->b));
+    }
+
+    BIG_XXX_mul(A,x->a.g,y->a.g);
+    BIG_XXX_mul(B,x->b.g,y->b.g);
+
+    BIG_XXX_add(C,x->a.g,x->b.g);
+    BIG_XXX_norm(C);
+    BIG_XXX_add(D,y->a.g,y->b.g);
+    BIG_XXX_norm(D);
+
+    BIG_XXX_mul(E,C,D);
+    BIG_XXX_dadd(F,A,B);
+    BIG_XXX_dsub(B,pR,B);
+
+    BIG_XXX_dadd(A,A,B);    // A<pR? Not necessarily, but <2pR
+    BIG_XXX_dsub(E,E,F);    // E<pR ? Yes
+
+    BIG_XXX_dnorm(A);
+    FP_YYY_mod(w->a.g,A);
+    w->a.XES=3;// may drift above 2p...
+    BIG_XXX_dnorm(E);
+    FP_YYY_mod(w->b.g,E);
+    w->b.XES=2;
+
+}
+
+/* output FP2 in hex format [a,b] */
+/* SU= 16 */
+void FP2_YYY_output(FP2_YYY *w)
+{
+    BIG_XXX bx,by;
+    FP2_YYY_reduce(w);
+    FP_YYY_redc(bx,&(w->a));
+    FP_YYY_redc(by,&(w->b));
+    printf("[");
+    BIG_XXX_output(bx);
+    printf(",");
+    BIG_XXX_output(by);
+    printf("]");
+    FP_YYY_nres(&(w->a),bx);
+    FP_YYY_nres(&(w->b),by);
+}
+
+/* SU= 8 */
+void FP2_YYY_rawoutput(FP2_YYY *w)
+{
+    printf("[");
+    BIG_XXX_rawoutput(w->a.g);
+    printf(",");
+    BIG_XXX_rawoutput(w->b.g);
+    printf("]");
+}
+
+
+/* Set w=1/x */
+/* SU= 128 */
+void FP2_YYY_inv(FP2_YYY *w,FP2_YYY *x)
+{
+    FP_YYY w1,w2;
+
+    FP2_YYY_norm(x);
+    FP_YYY_sqr(&w1,&(x->a));
+    FP_YYY_sqr(&w2,&(x->b));
+    FP_YYY_add(&w1,&w1,&w2);
+
+    FP_YYY_inv(&w1,&w1);
+
+    FP_YYY_mul(&(w->a),&(x->a),&w1);
+    FP_YYY_neg(&w1,&w1);
+    FP_YYY_norm(&w1);
+    FP_YYY_mul(&(w->b),&(x->b),&w1);
+}
+
+
+/* Set w=x/2 */
+/* SU= 16 */
+void FP2_YYY_div2(FP2_YYY *w,FP2_YYY *x)
+{
+    FP_YYY_div2(&(w->a),&(x->a));
+    FP_YYY_div2(&(w->b),&(x->b));
+}
+
+/* Set w*=(1+sqrt(-1)) */
+/* where X^2-(1+sqrt(-1)) is irreducible for FP4, assumes p=3 mod 8 */
+
+/* Input MUST be normed */
+void FP2_YYY_mul_ip(FP2_YYY *w)
+{
+    FP_YYY z;
+    FP2_YYY t;
+
+    FP2_YYY_copy(&t,w);
+
+    FP_YYY_copy(&z,&(w->a));
+    FP_YYY_neg(&(w->a),&(w->b));
+    FP_YYY_copy(&(w->b),&z);
+
+    FP2_YYY_add(w,&t,w);
+    // Output NOT normed, so use with care
+}
+
+
+void FP2_YYY_div_ip2(FP2_YYY *w)
+{
+    FP2_YYY t;
+    FP2_YYY_norm(w);
+    FP_YYY_add(&(t.a),&(w->a),&(w->b));
+    FP_YYY_sub(&(t.b),&(w->b),&(w->a));
+    FP2_YYY_norm(&t);
+    FP2_YYY_copy(w,&t);
+}
+
+/* Set w/=(1+sqrt(-1)) */
+/* SU= 88 */
+void FP2_YYY_div_ip(FP2_YYY *w)
+{
+    FP2_YYY t;
+    FP2_YYY_norm(w);
+    FP_YYY_add(&t.a,&(w->a),&(w->b));
+    FP_YYY_sub(&t.b,&(w->b),&(w->a));
+    FP2_YYY_norm(&t);
+    FP2_YYY_div2(w,&t);
+}
+
+/* SU= 8 */
+/* normalise a and b components of w */
+void FP2_YYY_norm(FP2_YYY *w)
+{
+    FP_YYY_norm(&(w->a));
+    FP_YYY_norm(&(w->b));
+}
+
+/* Set w=a^b mod m */
+/* SU= 208 */
+void FP2_YYY_pow(FP2_YYY *r,FP2_YYY* a,BIG_XXX b)
+{
+    FP2_YYY w;
+    FP_YYY one;
+    BIG_XXX z,zilch;
+    int bt;
+
+    BIG_XXX_norm(b);
+    BIG_XXX_copy(z,b);
+    FP2_YYY_copy(&w,a);
+    FP_YYY_one(&one);
+    BIG_XXX_zero(zilch);
+    FP2_YYY_from_FP(r,&one);
+    while(1)
+    {
+        bt=BIG_XXX_parity(z);
+        BIG_XXX_shr(z,1);
+        if (bt) FP2_YYY_mul(r,r,&w);
+        if (BIG_XXX_comp(z,zilch)==0) break;
+        FP2_YYY_sqr(&w,&w);
+    }
+    FP2_YYY_reduce(r);
+}
+
+/* sqrt(a+ib) = sqrt(a+sqrt(a*a-n*b*b)/2)+ib/(2*sqrt(a+sqrt(a*a-n*b*b)/2)) */
+/* returns true if u is QR */
+
+int FP2_YYY_sqrt(FP2_YYY *w,FP2_YYY *u)
+{
+    FP_YYY w1,w2;
+    FP2_YYY_copy(w,u);
+    if (FP2_YYY_iszilch(w)) return 1;
+
+    FP_YYY_sqr(&w1,&(w->b));
+    FP_YYY_sqr(&w2,&(w->a));
+    FP_YYY_add(&w1,&w1,&w2);
+    if (!FP_YYY_qr(&w1))
+    {
+        FP2_YYY_zero(w);
+        return 0;
+    }
+    FP_YYY_sqrt(&w1,&w1);
+    FP_YYY_add(&w2,&(w->a),&w1);
+    FP_YYY_norm(&w2);
+    FP_YYY_div2(&w2,&w2);
+    if (!FP_YYY_qr(&w2))
+    {
+        FP_YYY_sub(&w2,&(w->a),&w1);
+        FP_YYY_norm(&w2);
+        FP_YYY_div2(&w2,&w2);
+        if (!FP_YYY_qr(&w2))
+        {
+            FP2_YYY_zero(w);
+            return 0;
+        }
+    }
+    FP_YYY_sqrt(&w2,&w2);
+    FP_YYY_copy(&(w->a),&w2);
+    FP_YYY_add(&w2,&w2,&w2);
+
+    FP_YYY_inv(&w2,&w2);
+
+    FP_YYY_mul(&(w->b),&(w->b),&w2);
+    return 1;
+}
+
+/*
+int main()
+{
+	int i;
+	FP2_YYY w,z;
+	BIG_XXX a,b,e;
+	BIG_XXX pp1,pm1;
+	BIG_XXX_unity(a); BIG_XXX_unity(b);
+	FP2_YYY_from_BIGs(&w,a,b);
+//	for (i=0;i<100;i++)
+//	{
+//		BIG_XXX_randomnum(a); BIG_XXX_randomnum(b);
+//		BIG_XXX_mod(a,Modulus_YYY); BIG_XXX_mod(b,Modulus_YYY);
+//		FP2_YYY_from_FPs(&w,a,b);
+//		FP2_YYY_output(&w);
+//		FP2_YYY_inv(&z,&w);
+//				FP2_YYY_output(&z);
+//		FP2_YYY_inv(&z,&z);
+//				FP2_YYY_output(&z);
+//				FP2_YYY_output(&w);
+//		if (FP2_YYY_comp(&w,&z)!=1) printf("error \n");
+//		else printf("OK \n");
+//	}
+//exit(0);
+	printf("w= "); FP2_YYY_output(&w); printf("\n");
+	BIG_XXX_zero(e); BIG_XXX_inc(e,27);
+	FP2_YYY_pow(&w,&w,e);
+	FP2_YYY_output(&w);
+exit(0);
+	BIG_XXX_rcopy(pp1,Modulus_YYY);
+	BIG_XXX_rcopy(pm1,Modulus_YYY);
+	BIG_XXX_inc(pp1,1);
+	BIG_XXX_dec(pm1,1);
+	BIG_XXX_norm(pp1);
+	BIG_XXX_norm(pm1);
+	FP2_YYY_pow(&w,&w,pp1);
+	FP2_YYY_pow(&w,&w,pm1);
+	FP2_YYY_output(&w);
+}
+
+*/

http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto-c/blob/8d28d2c3/src/fp24.c.in
----------------------------------------------------------------------
diff --git a/src/fp24.c.in b/src/fp24.c.in
new file mode 100644
index 0000000..19fb52f
--- /dev/null
+++ b/src/fp24.c.in
@@ -0,0 +1,940 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+*/
+
+/* AMCL Fp^12 functions */
+/* SU=m, m is Stack Usage (no lazy )*/
+/* FP24 elements are of the form a+i.b+i^2.c */
+
+#include "fp24_YYY.h"
+
+/* return 1 if b==c, no branching */
+static int teq(sign32 b,sign32 c)
+{
+    sign32 x=b^c;
+    x-=1;  // if x=0, x now -1
+    return (int)((x>>31)&1);
+}
+
+
+/* Constant time select from pre-computed table */
+static void FP24_YYY_select(FP24_YYY *f,FP24_YYY g[],sign32 b)
+{
+    FP24_YYY invf;
+    sign32 m=b>>31;
+    sign32 babs=(b^m)-m;
+
+    babs=(babs-1)/2;
+
+    FP24_YYY_cmove(f,&g[0],teq(babs,0));  // conditional move
+    FP24_YYY_cmove(f,&g[1],teq(babs,1));
+    FP24_YYY_cmove(f,&g[2],teq(babs,2));
+    FP24_YYY_cmove(f,&g[3],teq(babs,3));
+    FP24_YYY_cmove(f,&g[4],teq(babs,4));
+    FP24_YYY_cmove(f,&g[5],teq(babs,5));
+    FP24_YYY_cmove(f,&g[6],teq(babs,6));
+    FP24_YYY_cmove(f,&g[7],teq(babs,7));
+
+    FP24_YYY_copy(&invf,f);
+    FP24_YYY_conj(&invf,&invf);  // 1/f
+    FP24_YYY_cmove(f,&invf,(int)(m&1));
+}
+
+/* test x==0 ? */
+/* SU= 8 */
+int FP24_YYY_iszilch(FP24_YYY *x)
+{
+    if (FP8_YYY_iszilch(&(x->a)) && FP8_YYY_iszilch(&(x->b)) && FP8_YYY_iszilch(&(x->c))) return 1;
+    return 0;
+}
+
+/* test x==1 ? */
+/* SU= 8 */
+int FP24_YYY_isunity(FP24_YYY *x)
+{
+    if (FP8_YYY_isunity(&(x->a)) && FP8_YYY_iszilch(&(x->b)) && FP8_YYY_iszilch(&(x->c))) return 1;
+    return 0;
+}
+
+/* FP24 copy w=x */
+/* SU= 16 */
+void FP24_YYY_copy(FP24_YYY *w,FP24_YYY *x)
+{
+    if (x==w) return;
+    FP8_YYY_copy(&(w->a),&(x->a));
+    FP8_YYY_copy(&(w->b),&(x->b));
+    FP8_YYY_copy(&(w->c),&(x->c));
+}
+
+/* FP24 w=1 */
+/* SU= 8 */
+void FP24_YYY_one(FP24_YYY *w)
+{
+    FP8_YYY_one(&(w->a));
+    FP8_YYY_zero(&(w->b));
+    FP8_YYY_zero(&(w->c));
+}
+
+/* return 1 if x==y, else 0 */
+/* SU= 16 */
+int FP24_YYY_equals(FP24_YYY *x,FP24_YYY *y)
+{
+    if (FP8_YYY_equals(&(x->a),&(y->a)) && FP8_YYY_equals(&(x->b),&(y->b)) && FP8_YYY_equals(&(x->b),&(y->b)))
+        return 1;
+    return 0;
+}
+
+/* Set w=conj(x) */
+/* SU= 8 */
+void FP24_YYY_conj(FP24_YYY *w,FP24_YYY *x)
+{
+    FP24_YYY_copy(w,x);
+    FP8_YYY_conj(&(w->a),&(w->a));
+    FP8_YYY_nconj(&(w->b),&(w->b));
+    FP8_YYY_conj(&(w->c),&(w->c));
+}
+
+/* Create FP24 from FP8 */
+/* SU= 8 */
+void FP24_YYY_from_FP8(FP24_YYY *w,FP8_YYY *a)
+{
+    FP8_YYY_copy(&(w->a),a);
+    FP8_YYY_zero(&(w->b));
+    FP8_YYY_zero(&(w->c));
+}
+
+/* Create FP24 from 3 FP8's */
+/* SU= 16 */
+void FP24_YYY_from_FP8s(FP24_YYY *w,FP8_YYY *a,FP8_YYY *b,FP8_YYY *c)
+{
+    FP8_YYY_copy(&(w->a),a);
+    FP8_YYY_copy(&(w->b),b);
+    FP8_YYY_copy(&(w->c),c);
+}
+
+/* Granger-Scott Unitary Squaring. This does not benefit from lazy reduction */
+/* SU= 600 */
+void FP24_YYY_usqr(FP24_YYY *w,FP24_YYY *x)
+{
+    FP8_YYY A,B,C,D;
+
+    FP8_YYY_copy(&A,&(x->a));
+
+    FP8_YYY_sqr(&(w->a),&(x->a));
+    FP8_YYY_add(&D,&(w->a),&(w->a));
+    FP8_YYY_add(&(w->a),&D,&(w->a));
+
+    FP8_YYY_norm(&(w->a));
+    FP8_YYY_nconj(&A,&A);
+
+    FP8_YYY_add(&A,&A,&A);
+    FP8_YYY_add(&(w->a),&(w->a),&A);
+    FP8_YYY_sqr(&B,&(x->c));
+    FP8_YYY_times_i(&B);
+
+    FP8_YYY_add(&D,&B,&B);
+    FP8_YYY_add(&B,&B,&D);
+    FP8_YYY_norm(&B);
+
+    FP8_YYY_sqr(&C,&(x->b));
+
+    FP8_YYY_add(&D,&C,&C);
+    FP8_YYY_add(&C,&C,&D);
+
+    FP8_YYY_norm(&C);
+    FP8_YYY_conj(&(w->b),&(x->b));
+    FP8_YYY_add(&(w->b),&(w->b),&(w->b));
+    FP8_YYY_nconj(&(w->c),&(x->c));
+
+    FP8_YYY_add(&(w->c),&(w->c),&(w->c));
+    FP8_YYY_add(&(w->b),&B,&(w->b));
+    FP8_YYY_add(&(w->c),&C,&(w->c));
+
+    FP24_YYY_reduce(w);	    /* reduce here as in pow function repeated squarings would trigger multiple reductions */
+}
+
+/* FP24 squaring w=x^2 */
+/* SU= 600 */
+void FP24_YYY_sqr(FP24_YYY *w,FP24_YYY *x)
+{
+    /* Use Chung-Hasan SQR2 method from http://cacr.uwaterloo.ca/techreports/2006/cacr2006-24.pdf */
+
+    FP8_YYY A,B,C,D;
+
+    FP8_YYY_sqr(&A,&(x->a));
+    FP8_YYY_mul(&B,&(x->b),&(x->c));
+    FP8_YYY_add(&B,&B,&B);
+    FP8_YYY_norm(&B);
+    FP8_YYY_sqr(&C,&(x->c));
+
+    FP8_YYY_mul(&D,&(x->a),&(x->b));
+    FP8_YYY_add(&D,&D,&D);
+
+    FP8_YYY_add(&(w->c),&(x->a),&(x->c));
+    FP8_YYY_add(&(w->c),&(x->b),&(w->c));
+    FP8_YYY_norm(&(w->c));
+
+    FP8_YYY_sqr(&(w->c),&(w->c));
+
+    FP8_YYY_copy(&(w->a),&A);
+    FP8_YYY_add(&A,&A,&B);
+
+    FP8_YYY_norm(&A);
+
+    FP8_YYY_add(&A,&A,&C);
+    FP8_YYY_add(&A,&A,&D);
+
+    FP8_YYY_norm(&A);
+
+    FP8_YYY_neg(&A,&A);
+    FP8_YYY_times_i(&B);
+    FP8_YYY_times_i(&C);
+
+    FP8_YYY_add(&(w->a),&(w->a),&B);
+    FP8_YYY_add(&(w->b),&C,&D);
+    FP8_YYY_add(&(w->c),&(w->c),&A);
+
+    FP24_YYY_norm(w);
+}
+
+/* FP24 full multiplication w=w*y */
+/* SU= 896 */
+void FP24_YYY_mul(FP24_YYY *w,FP24_YYY *y)
+{
+    FP8_YYY z0,z1,z2,z3,t0,t1;
+
+    FP8_YYY_mul(&z0,&(w->a),&(y->a));
+    FP8_YYY_mul(&z2,&(w->b),&(y->b));  //
+
+    FP8_YYY_add(&t0,&(w->a),&(w->b));
+    FP8_YYY_add(&t1,&(y->a),&(y->b));  //
+
+    FP8_YYY_norm(&t0);
+    FP8_YYY_norm(&t1);
+
+    FP8_YYY_mul(&z1,&t0,&t1);
+    FP8_YYY_add(&t0,&(w->b),&(w->c));
+    FP8_YYY_add(&t1,&(y->b),&(y->c));  //
+
+    FP8_YYY_norm(&t0);
+    FP8_YYY_norm(&t1);
+
+    FP8_YYY_mul(&z3,&t0,&t1);
+
+    FP8_YYY_neg(&t0,&z0);
+    FP8_YYY_neg(&t1,&z2);
+
+    FP8_YYY_add(&z1,&z1,&t0);   // z1=z1-z0
+//    FP8_YYY_norm(&z1);
+    FP8_YYY_add(&(w->b),&z1,&t1);
+// z1=z1-z2
+    FP8_YYY_add(&z3,&z3,&t1);        // z3=z3-z2
+    FP8_YYY_add(&z2,&z2,&t0);        // z2=z2-z0
+
+    FP8_YYY_add(&t0,&(w->a),&(w->c));
+    FP8_YYY_add(&t1,&(y->a),&(y->c));
+
+    FP8_YYY_norm(&t0);
+    FP8_YYY_norm(&t1);
+
+    FP8_YYY_mul(&t0,&t1,&t0);
+    FP8_YYY_add(&z2,&z2,&t0);
+
+    FP8_YYY_mul(&t0,&(w->c),&(y->c));
+    FP8_YYY_neg(&t1,&t0);
+
+    FP8_YYY_add(&(w->c),&z2,&t1);
+    FP8_YYY_add(&z3,&z3,&t1);
+    FP8_YYY_times_i(&t0);
+    FP8_YYY_add(&(w->b),&(w->b),&t0);
+    FP8_YYY_norm(&z3);
+    FP8_YYY_times_i(&z3);
+    FP8_YYY_add(&(w->a),&z0,&z3);
+
+    FP24_YYY_norm(w);
+}
+
+/* FP24 multiplication w=w*y */
+/* SU= 744 */
+/* catering for special case that arises from special form of ATE pairing line function */
+void FP24_YYY_smul(FP24_YYY *w,FP24_YYY *y,int type)
+{
+    FP8_YYY z0,z1,z2,z3,t0,t1;
+
+    if (type==D_TYPE)
+    {
+        // y->c is 0
+
+        FP8_YYY_copy(&z3,&(w->b));
+        FP8_YYY_mul(&z0,&(w->a),&(y->a));
+
+        FP8_YYY_pmul(&z2,&(w->b),&(y->b).a);
+        FP8_YYY_add(&(w->b),&(w->a),&(w->b));
+        FP8_YYY_copy(&t1,&(y->a));
+        FP4_YYY_add(&t1.a,&t1.a,&(y->b).a);
+
+        FP8_YYY_norm(&t1);
+        FP8_YYY_norm(&(w->b));
+
+        FP8_YYY_mul(&(w->b),&(w->b),&t1);
+        FP8_YYY_add(&z3,&z3,&(w->c));
+        FP8_YYY_norm(&z3);
+        FP8_YYY_pmul(&z3,&z3,&(y->b).a);
+        FP8_YYY_neg(&t0,&z0);
+        FP8_YYY_neg(&t1,&z2);
+
+        FP8_YYY_add(&(w->b),&(w->b),&t0);   // z1=z1-z0
+        FP8_YYY_add(&(w->b),&(w->b),&t1);   // z1=z1-z2
+
+        FP8_YYY_add(&z3,&z3,&t1);        // z3=z3-z2
+        FP8_YYY_add(&z2,&z2,&t0);        // z2=z2-z0
+
+        FP8_YYY_add(&t0,&(w->a),&(w->c));
+
+        FP8_YYY_norm(&t0);
+        FP8_YYY_norm(&z3);
+
+        FP8_YYY_mul(&t0,&(y->a),&t0);
+        FP8_YYY_add(&(w->c),&z2,&t0);
+
+        FP8_YYY_times_i(&z3);
+        FP8_YYY_add(&(w->a),&z0,&z3);
+    }
+
+    if (type==M_TYPE)
+    {
+        // y->b is zero
+        FP8_YYY_mul(&z0,&(w->a),&(y->a));
+        FP8_YYY_add(&t0,&(w->a),&(w->b));
+        FP8_YYY_norm(&t0);
+
+        FP8_YYY_mul(&z1,&t0,&(y->a));
+        FP8_YYY_add(&t0,&(w->b),&(w->c));
+        FP8_YYY_norm(&t0);
+
+        FP8_YYY_pmul(&z3,&t0,&(y->c).b);
+        FP8_YYY_times_i(&z3);
+
+        FP8_YYY_neg(&t0,&z0);
+        FP8_YYY_add(&z1,&z1,&t0);   // z1=z1-z0
+
+        FP8_YYY_copy(&(w->b),&z1);
+
+        FP8_YYY_copy(&z2,&t0);
+
+        FP8_YYY_add(&t0,&(w->a),&(w->c));
+        FP8_YYY_add(&t1,&(y->a),&(y->c));
+
+        FP8_YYY_norm(&t0);
+        FP8_YYY_norm(&t1);
+
+        FP8_YYY_mul(&t0,&t1,&t0);
+        FP8_YYY_add(&z2,&z2,&t0);
+
+        FP8_YYY_pmul(&t0,&(w->c),&(y->c).b);
+        FP8_YYY_times_i(&t0);
+        FP8_YYY_neg(&t1,&t0);
+        FP8_YYY_times_i(&t0);
+
+        FP8_YYY_add(&(w->c),&z2,&t1);
+        FP8_YYY_add(&z3,&z3,&t1);
+
+        FP8_YYY_add(&(w->b),&(w->b),&t0);
+        FP8_YYY_norm(&z3);
+        FP8_YYY_times_i(&z3);
+        FP8_YYY_add(&(w->a),&z0,&z3);
+    }
+    FP24_YYY_norm(w);
+}
+
+/* Set w=1/x */
+/* SU= 600 */
+void FP24_YYY_inv(FP24_YYY *w,FP24_YYY *x)
+{
+    FP8_YYY f0,f1,f2,f3;
+
+    FP8_YYY_sqr(&f0,&(x->a));
+    FP8_YYY_mul(&f1,&(x->b),&(x->c));
+    FP8_YYY_times_i(&f1);
+    FP8_YYY_sub(&f0,&f0,&f1);  /* y.a */
+    FP8_YYY_norm(&f0);
+
+    FP8_YYY_sqr(&f1,&(x->c));
+    FP8_YYY_times_i(&f1);
+    FP8_YYY_mul(&f2,&(x->a),&(x->b));
+    FP8_YYY_sub(&f1,&f1,&f2);  /* y.b */
+    FP8_YYY_norm(&f1);
+
+    FP8_YYY_sqr(&f2,&(x->b));
+    FP8_YYY_mul(&f3,&(x->a),&(x->c));
+    FP8_YYY_sub(&f2,&f2,&f3);  /* y.c */
+    FP8_YYY_norm(&f2);
+
+    FP8_YYY_mul(&f3,&(x->b),&f2);
+    FP8_YYY_times_i(&f3);
+    FP8_YYY_mul(&(w->a),&f0,&(x->a));
+    FP8_YYY_add(&f3,&(w->a),&f3);
+    FP8_YYY_mul(&(w->c),&f1,&(x->c));
+    FP8_YYY_times_i(&(w->c));
+
+    FP8_YYY_add(&f3,&(w->c),&f3);
+    FP8_YYY_norm(&f3);
+
+    FP8_YYY_inv(&f3,&f3);
+    FP8_YYY_mul(&(w->a),&f0,&f3);
+    FP8_YYY_mul(&(w->b),&f1,&f3);
+    FP8_YYY_mul(&(w->c),&f2,&f3);
+
+}
+
+/* constant time powering by small integer of max length bts */
+
+void FP24_YYY_pinpow(FP24_YYY *r,int e,int bts)
+{
+    int i,b;
+    FP24_YYY R[2];
+
+    FP24_YYY_one(&R[0]);
+    FP24_YYY_copy(&R[1],r);
+
+    for (i=bts-1; i>=0; i--)
+    {
+        b=(e>>i)&1;
+        FP24_YYY_mul(&R[1-b],&R[b]);
+        FP24_YYY_usqr(&R[b],&R[b]);
+    }
+    FP24_YYY_copy(r,&R[0]);
+}
+
+/* Compressed powering of unitary elements y=x^(e mod r) */
+
+void FP24_YYY_compow(FP8_YYY *c,FP24_YYY *x,BIG_XXX e,BIG_XXX r)
+{
+    FP24_YYY g1,g2;
+    FP8_YYY cp,cpm1,cpm2;
+    FP2_YYY f;
+    BIG_XXX q,a,b,m;
+
+    BIG_XXX_rcopy(a,Fra_YYY);
+    BIG_XXX_rcopy(b,Frb_YYY);
+    FP2_YYY_from_BIGs(&f,a,b);
+
+    BIG_XXX_rcopy(q,Modulus_YYY);
+
+    FP24_YYY_copy(&g1,x);
+    FP24_YYY_copy(&g2,x);
+
+    BIG_XXX_copy(m,q);
+    BIG_XXX_mod(m,r);
+
+    BIG_XXX_copy(a,e);
+    BIG_XXX_mod(a,m);
+
+    BIG_XXX_copy(b,e);
+    BIG_XXX_sdiv(b,m);
+
+    FP24_YYY_trace(c,&g1);
+
+    if (BIG_XXX_iszilch(b))
+    {
+        FP8_YYY_xtr_pow(c,c,e);
+        return;
+    }
+
+    FP24_YYY_frob(&g2,&f,1);
+    FP24_YYY_trace(&cp,&g2);
+    FP24_YYY_conj(&g1,&g1);
+    FP24_YYY_mul(&g2,&g1);
+    FP24_YYY_trace(&cpm1,&g2);
+    FP24_YYY_mul(&g2,&g1);
+
+    FP24_YYY_trace(&cpm2,&g2);
+
+    FP8_YYY_xtr_pow2(c,&cp,c,&cpm1,&cpm2,a,b);
+
+}
+
+/* Note this is simple square and multiply, so not side-channel safe */
+
+void FP24_YYY_pow(FP24_YYY *r,FP24_YYY *a,BIG_XXX b)
+{
+    FP24_YYY w;
+    BIG_XXX b3;
+    int i,nb,bt;
+    BIG_XXX_norm(b);
+    BIG_XXX_pmul(b3,b,3);
+    BIG_XXX_norm(b3);
+
+    FP24_YYY_copy(&w,a);
+
+    nb=BIG_XXX_nbits(b3);
+    for (i=nb-2; i>=1; i--)
+    {
+        FP24_YYY_usqr(&w,&w);
+        bt=BIG_XXX_bit(b3,i)-BIG_XXX_bit(b,i);
+        if (bt==1)
+            FP24_YYY_mul(&w,a);
+        if (bt==-1)
+        {
+            FP24_YYY_conj(a,a);
+            FP24_YYY_mul(&w,a);
+            FP24_YYY_conj(a,a);
+        }
+    }
+
+    FP24_YYY_copy(r,&w);
+    FP24_YYY_reduce(r);
+}
+
+/* p=q0^u0.q1^u1.q2^u2.q3^u3... */
+/* Side channel attack secure */
+// Bos & Costello https://eprint.iacr.org/2013/458.pdf
+// Faz-Hernandez & Longa & Sanchez  https://eprint.iacr.org/2013/158.pdf
+
+void FP24_YYY_pow8(FP24_YYY *p,FP24_YYY *q,BIG_XXX u[8])
+{
+    int i,j,k,nb,pb1,pb2,bt;
+    FP24_YYY g1[8],g2[8],r;
+    BIG_XXX t[8],mt;
+    sign8 w1[NLEN_XXX*BASEBITS_XXX+1];
+    sign8 s1[NLEN_XXX*BASEBITS_XXX+1];
+    sign8 w2[NLEN_XXX*BASEBITS_XXX+1];
+    sign8 s2[NLEN_XXX*BASEBITS_XXX+1];
+    FP_YYY fx,fy;
+    FP2_YYY X;
+
+    FP_YYY_rcopy(&fx,Fra_YYY);
+    FP_YYY_rcopy(&fy,Frb_YYY);
+    FP2_YYY_from_FPs(&X,&fx,&fy);
+
+    for (i=0; i<8; i++)
+        BIG_XXX_copy(t[i],u[i]);
+
+    // Precomputed table
+    FP24_YYY_copy(&g1[0],&q[0]); // q[0]
+    FP24_YYY_copy(&g1[1],&g1[0]);
+    FP24_YYY_mul(&g1[1],&q[1]);	// q[0].q[1]
+    FP24_YYY_copy(&g1[2],&g1[0]);
+    FP24_YYY_mul(&g1[2],&q[2]);	// q[0].q[2]
+    FP24_YYY_copy(&g1[3],&g1[1]);
+    FP24_YYY_mul(&g1[3],&q[2]);	// q[0].q[1].q[2]
+    FP24_YYY_copy(&g1[4],&g1[0]);
+    FP24_YYY_mul(&g1[4],&q[3]);  // q[0].q[3]
+    FP24_YYY_copy(&g1[5],&g1[1]);
+    FP24_YYY_mul(&g1[5],&q[3]);	// q[0].q[1].q[3]
+    FP24_YYY_copy(&g1[6],&g1[2]);
+    FP24_YYY_mul(&g1[6],&q[3]);	// q[0].q[2].q[3]
+    FP24_YYY_copy(&g1[7],&g1[3]);
+    FP24_YYY_mul(&g1[7],&q[3]);	// q[0].q[1].q[2].q[3]
+
+    // Use Frobenius
+
+    for (i=0; i<8; i++)
+    {
+        FP24_YYY_copy(&g2[i],&g1[i]);
+        FP24_YYY_frob(&g2[i],&X,4);
+    }
+
+    // Make it odd
+    pb1=1-BIG_XXX_parity(t[0]);
+    BIG_XXX_inc(t[0],pb1);
+    BIG_XXX_norm(t[0]);
+
+    pb2=1-BIG_XXX_parity(t[4]);
+    BIG_XXX_inc(t[4],pb2);
+    BIG_XXX_norm(t[4]);
+
+    // Number of bits
+    BIG_XXX_zero(mt);
+    for (i=0; i<8; i++)
+    {
+        BIG_XXX_or(mt,mt,t[i]);
+    }
+    nb=1+BIG_XXX_nbits(mt);
+
+    // Sign pivot
+    s1[nb-1]=1;
+    s2[nb-1]=1;
+    for (i=0; i<nb-1; i++)
+    {
+        BIG_XXX_fshr(t[0],1);
+        s1[i]=2*BIG_XXX_parity(t[0])-1;
+        BIG_XXX_fshr(t[4],1);
+        s2[i]=2*BIG_XXX_parity(t[4])-1;
+    }
+
+    // Recoded exponents
+    for (i=0; i<nb; i++)
+    {
+        w1[i]=0;
+        k=1;
+        for (j=1; j<4; j++)
+        {
+            bt=s1[i]*BIG_XXX_parity(t[j]);
+            BIG_XXX_fshr(t[j],1);
+
+            BIG_XXX_dec(t[j],(bt>>1));
+            BIG_XXX_norm(t[j]);
+            w1[i]+=bt*k;
+            k*=2;
+        }
+
+        w2[i]=0;
+        k=1;
+        for (j=5; j<8; j++)
+        {
+            bt=s2[i]*BIG_XXX_parity(t[j]);
+            BIG_XXX_fshr(t[j],1);
+
+            BIG_XXX_dec(t[j],(bt>>1));
+            BIG_XXX_norm(t[j]);
+            w2[i]+=bt*k;
+            k*=2;
+        }
+    }
+
+    // Main loop
+    FP24_YYY_select(p,g1,2*w1[nb-1]+1);
+    FP24_YYY_select(&r,g2,2*w2[nb-1]+1);
+    FP24_YYY_mul(p,&r);
+    for (i=nb-2; i>=0; i--)
+    {
+        FP24_YYY_usqr(p,p);
+        FP24_YYY_select(&r,g1,2*w1[i]+s1[i]);
+        FP24_YYY_mul(p,&r);
+        FP24_YYY_select(&r,g2,2*w2[i]+s2[i]);
+        FP24_YYY_mul(p,&r);
+    }
+
+    // apply correction
+    FP24_YYY_conj(&r,&q[0]);
+    FP24_YYY_mul(&r,p);
+    FP24_YYY_cmove(p,&r,pb1);
+    FP24_YYY_conj(&r,&q[4]);
+    FP24_YYY_mul(&r,p);
+    FP24_YYY_cmove(p,&r,pb2);
+
+    FP24_YYY_reduce(p);
+}
+
+/* Set w=w^p using Frobenius */
+/* SU= 160 */
+void FP24_YYY_frob(FP24_YYY *w,FP2_YYY *f,int n)
+{
+    int i;
+    FP2_YYY f3,f2;				// f=(1+i)^(p-7)/12
+    FP2_YYY_sqr(&f2,f);     //
+    FP2_YYY_mul(&f3,&f2,f); // f3=f^3=(1+i)^(p-7)/4
+
+    FP2_YYY_mul_ip(&f3);    // f3 = (1+i).f3 = (1+i)^(p-3)/4
+    FP2_YYY_norm(&f3);
+
+    for (i=0; i<n; i++)
+    {
+        FP8_YYY_frob(&(w->a),&f3);   // a=a^p
+        FP8_YYY_frob(&(w->b),&f3);   // b=b^p
+        FP8_YYY_frob(&(w->c),&f3);   // c=c^p
+
+        FP8_YYY_qmul(&(w->b),&(w->b),f);
+        FP8_YYY_times_i2(&(w->b));
+        FP8_YYY_qmul(&(w->c),&(w->c),&f2);
+        FP8_YYY_times_i2(&(w->c));
+        FP8_YYY_times_i2(&(w->c));
+    }
+}
+
+/* SU= 8 */
+/* normalise all components of w */
+void FP24_YYY_norm(FP24_YYY *w)
+{
+    FP8_YYY_norm(&(w->a));
+    FP8_YYY_norm(&(w->b));
+    FP8_YYY_norm(&(w->c));
+}
+
+/* SU= 8 */
+/* reduce all components of w */
+void FP24_YYY_reduce(FP24_YYY *w)
+{
+    FP8_YYY_reduce(&(w->a));
+    FP8_YYY_reduce(&(w->b));
+    FP8_YYY_reduce(&(w->c));
+}
+
+/* trace function w=trace(x) */
+/* SU= 8 */
+void FP24_YYY_trace(FP8_YYY *w,FP24_YYY *x)
+{
+    FP8_YYY_imul(w,&(x->a),3);
+    FP8_YYY_reduce(w);
+}
+
+/* SU= 8 */
+/* Output w in hex */
+void FP24_YYY_output(FP24_YYY *w)
+{
+    printf("[");
+    FP8_YYY_output(&(w->a));
+    printf(",");
+    FP8_YYY_output(&(w->b));
+    printf(",");
+    FP8_YYY_output(&(w->c));
+    printf("]");
+}
+
+/* SU= 64 */
+/* Convert g to octet string w */
+void FP24_YYY_toOctet(octet *W,FP24_YYY *g)
+{
+    BIG_XXX a;
+    W->len=24*MODBYTES_XXX;
+
+    FP_YYY_redc(a,&(g->a.a.a.a));
+    BIG_XXX_toBytes(&(W->val[0]),a);
+    FP_YYY_redc(a,&(g->a.a.a.b));
+    BIG_XXX_toBytes(&(W->val[MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->a.a.b.a));
+    BIG_XXX_toBytes(&(W->val[2*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->a.a.b.b));
+    BIG_XXX_toBytes(&(W->val[3*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->a.b.a.a));
+    BIG_XXX_toBytes(&(W->val[4*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->a.b.a.b));
+    BIG_XXX_toBytes(&(W->val[5*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->a.b.b.a));
+    BIG_XXX_toBytes(&(W->val[6*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->a.b.b.b));
+    BIG_XXX_toBytes(&(W->val[7*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->b.a.a.a));
+    BIG_XXX_toBytes(&(W->val[8*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->b.a.a.b));
+    BIG_XXX_toBytes(&(W->val[9*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->b.a.b.a));
+    BIG_XXX_toBytes(&(W->val[10*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->b.a.b.b));
+    BIG_XXX_toBytes(&(W->val[11*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->b.b.a.a));
+    BIG_XXX_toBytes(&(W->val[12*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->b.b.a.b));
+    BIG_XXX_toBytes(&(W->val[13*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->b.b.b.a));
+    BIG_XXX_toBytes(&(W->val[14*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->b.b.b.b));
+    BIG_XXX_toBytes(&(W->val[15*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->c.a.a.a));
+    BIG_XXX_toBytes(&(W->val[16*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->c.a.a.b));
+    BIG_XXX_toBytes(&(W->val[17*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->c.a.b.a));
+    BIG_XXX_toBytes(&(W->val[18*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->c.a.b.b));
+    BIG_XXX_toBytes(&(W->val[19*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->c.b.a.a));
+    BIG_XXX_toBytes(&(W->val[20*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->c.b.a.b));
+    BIG_XXX_toBytes(&(W->val[21*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->c.b.b.a));
+    BIG_XXX_toBytes(&(W->val[22*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->c.b.b.b));
+    BIG_XXX_toBytes(&(W->val[23*MODBYTES_XXX]),a);
+}
+
+/* SU= 24 */
+/* Restore g from octet string w */
+void FP24_YYY_fromOctet(FP24_YYY *g,octet *W)
+{
+    BIG_XXX b;
+
+    BIG_XXX_fromBytes(b,&W->val[0]);
+    FP_YYY_nres(&(g->a.a.a.a),b);
+    BIG_XXX_fromBytes(b,&W->val[MODBYTES_XXX]);
+    FP_YYY_nres(&(g->a.a.a.b),b);
+    BIG_XXX_fromBytes(b,&W->val[2*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->a.a.b.a),b);
+    BIG_XXX_fromBytes(b,&W->val[3*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->a.a.b.b),b);
+    BIG_XXX_fromBytes(b,&W->val[4*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->a.b.a.a),b);
+    BIG_XXX_fromBytes(b,&W->val[5*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->a.b.a.b),b);
+    BIG_XXX_fromBytes(b,&W->val[6*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->a.b.b.a),b);
+    BIG_XXX_fromBytes(b,&W->val[7*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->a.b.b.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[8*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->b.a.a.a),b);
+    BIG_XXX_fromBytes(b,&W->val[9*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->b.a.a.b),b);
+    BIG_XXX_fromBytes(b,&W->val[10*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->b.a.b.a),b);
+    BIG_XXX_fromBytes(b,&W->val[11*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->b.a.b.b),b);
+    BIG_XXX_fromBytes(b,&W->val[12*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->b.b.a.a),b);
+    BIG_XXX_fromBytes(b,&W->val[13*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->b.b.a.b),b);
+    BIG_XXX_fromBytes(b,&W->val[14*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->b.b.b.a),b);
+    BIG_XXX_fromBytes(b,&W->val[15*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->b.b.b.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[16*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->c.a.a.a),b);
+    BIG_XXX_fromBytes(b,&W->val[17*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->c.a.a.b),b);
+    BIG_XXX_fromBytes(b,&W->val[18*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->c.a.b.a),b);
+    BIG_XXX_fromBytes(b,&W->val[19*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->c.a.b.b),b);
+    BIG_XXX_fromBytes(b,&W->val[20*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->c.b.a.a),b);
+    BIG_XXX_fromBytes(b,&W->val[21*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->c.b.a.b),b);
+    BIG_XXX_fromBytes(b,&W->val[22*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->c.b.b.a),b);
+    BIG_XXX_fromBytes(b,&W->val[23*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->c.b.b.b),b);
+}
+
+/* Move b to a if d=1 */
+void FP24_YYY_cmove(FP24_YYY *f,FP24_YYY *g,int d)
+{
+    FP8_YYY_cmove(&(f->a),&(g->a),d);
+    FP8_YYY_cmove(&(f->b),&(g->b),d);
+    FP8_YYY_cmove(&(f->c),&(g->c),d);
+}
+
+#ifdef HAS_MAIN
+
+int main()
+{
+    int i;
+    FP2_YYY f,w0,w1,X;
+    FP4_YYY f0,f1;
+    FP8_YYY t0,t1,t2;
+    FP24_YYY w,t,lv;
+    BIG_XXX a,b;
+    BIG_XXX p;
+
+    char raw[100];
+    csprng RNG;                // Crypto Strong RNG
+
+    for (i=0; i<100; i++) raw[i]=i;
+
+    BIG_XXX_rcopy(a,Fra_YYY);
+    BIG_XXX_rcopy(b,Frb_YYY);
+    FP2_YYY_from_BIGs(&X,a,b);
+
+    RAND_seed(&RNG,100,raw);   // initialise strong RNG
+
+    BIG_XXX_rcopy(p,Modulus);
+
+    BIG_XXX_randomnum(a,p,&RNG);
+    BIG_XXX_randomnum(b,p,&RNG);
+    FP2_YYY_from_BIGs(&w0,a,b);
+
+    BIG_XXX_randomnum(a,p,&RNG);
+    BIG_XXX_randomnum(b,p,&RNG);
+    FP2_YYY_from_BIGs(&w1,a,b);
+
+    FP4_YYY_from_FP2s(&f0,&w0,&w1);
+
+    BIG_XXX_randomnum(a,p,&RNG);
+    BIG_XXX_randomnum(b,p,&RNG);
+    FP2_YYY_from_BIGs(&w0,a,b);
+
+    BIG_XXX_randomnum(a,p,&RNG);
+    BIG_XXX_randomnum(b,p,&RNG);
+    FP2_YYY_from_BIGs(&w1,a,b);
+
+    FP4_YYY_from_FP2s(&f1,&w0,&w1);
+    FP8_YYY_from_FP4s(&t0,&f0,&f1);
+
+    BIG_XXX_randomnum(a,p,&RNG);
+    BIG_XXX_randomnum(b,p,&RNG);
+    FP2_YYY_from_BIGs(&w0,a,b);
+
+    BIG_XXX_randomnum(a,p,&RNG);
+    BIG_XXX_randomnum(b,p,&RNG);
+    FP2_YYY_from_BIGs(&w1,a,b);
+
+    FP4_YYY_from_FP2s(&f0,&w0,&w1);
+
+    BIG_XXX_randomnum(a,p,&RNG);
+    BIG_XXX_randomnum(b,p,&RNG);
+    FP2_YYY_from_BIGs(&w0,a,b);
+
+    BIG_XXX_randomnum(a,p,&RNG);
+    BIG_XXX_randomnum(b,p,&RNG);
+    FP2_YYY_from_BIGs(&w1,a,b);
+
+    FP4_YYY_from_FP2s(&f1,&w0,&w1);
+    FP8_YYY_from_FP4s(&t1,&f0,&f1);
+
+    BIG_XXX_randomnum(a,p,&RNG);
+    BIG_XXX_randomnum(b,p,&RNG);
+    FP2_YYY_from_BIGs(&w0,a,b);
+
+    BIG_XXX_randomnum(a,p,&RNG);
+    BIG_XXX_randomnum(b,p,&RNG);
+    FP2_YYY_from_BIGs(&w1,a,b);
+
+    FP4_YYY_from_FP2s(&f0,&w0,&w1);
+
+    BIG_XXX_randomnum(a,p,&RNG);
+    BIG_XXX_randomnum(b,p,&RNG);
+    FP2_YYY_from_BIGs(&w0,a,b);
+
+    BIG_XXX_randomnum(a,p,&RNG);
+    BIG_XXX_randomnum(b,p,&RNG);
+    FP2_YYY_from_BIGs(&w1,a,b);
+
+    FP4_YYY_from_FP2s(&f1,&w0,&w1);
+    FP8_YYY_from_FP4s(&t2,&f0,&f1);
+
+    FP24_YYY_from_FP8s(&w,&t0,&t1,&t2);
+
+    FP24_YYY_copy(&t,&w);
+
+    printf("w= ");
+    FP24_YYY_output(&w);
+    printf("\n");
+
+    FP24_norm(&w);
+
+    printf("w^p= ");
+    FP24_YYY_frob(&w,&X);
+    FP24_YYY_output(&w);
+    printf("\n");
+
+    printf("1/w= ");
+    FP24_YYY_inv(&t,&w);
+    FP24_YYY_output(&t);
+    printf("\n");
+
+    printf("w= ");
+    FP24_YYY_inv(&w,&t);
+    FP24_YYY_output(&w);
+    printf("\n");
+
+    return 0;
+}
+#endif

http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto-c/blob/8d28d2c3/src/fp4.c.in
----------------------------------------------------------------------
diff --git a/src/fp4.c.in b/src/fp4.c.in
new file mode 100644
index 0000000..52a8e31
--- /dev/null
+++ b/src/fp4.c.in
@@ -0,0 +1,785 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+*/
+
+/* AMCL Fp^4 functions */
+/* SU=m, m is Stack Usage (no lazy )*/
+
+/* FP4 elements are of the form a+ib, where i is sqrt(-1+sqrt(-1)) */
+
+#include "fp4_YYY.h"
+
+/* test x==0 ? */
+/* SU= 8 */
+int FP4_YYY_iszilch(FP4_YYY *x)
+{
+    if (FP2_YYY_iszilch(&(x->a)) && FP2_YYY_iszilch(&(x->b))) return 1;
+    return 0;
+}
+
+/* test x==1 ? */
+/* SU= 8 */
+int FP4_YYY_isunity(FP4_YYY *x)
+{
+    if (FP2_YYY_isunity(&(x->a)) && FP2_YYY_iszilch(&(x->b))) return 1;
+    return 0;
+}
+
+/* test is w real? That is in a+ib test b is zero */
+int FP4_YYY_isreal(FP4_YYY *w)
+{
+    return FP2_YYY_iszilch(&(w->b));
+}
+
+/* return 1 if x==y, else 0 */
+/* SU= 16 */
+int FP4_YYY_equals(FP4_YYY *x,FP4_YYY *y)
+{
+    if (FP2_YYY_equals(&(x->a),&(y->a)) && FP2_YYY_equals(&(x->b),&(y->b)))
+        return 1;
+    return 0;
+}
+
+/* set FP4 from two FP2s */
+/* SU= 16 */
+void FP4_YYY_from_FP2s(FP4_YYY *w,FP2_YYY * x,FP2_YYY* y)
+{
+    FP2_YYY_copy(&(w->a), x);
+    FP2_YYY_copy(&(w->b), y);
+}
+
+/* set FP4 from FP2 */
+/* SU= 8 */
+void FP4_YYY_from_FP2(FP4_YYY *w,FP2_YYY *x)
+{
+    FP2_YYY_copy(&(w->a), x);
+    FP2_YYY_zero(&(w->b));
+}
+
+/* set high part of FP4 from FP2 */
+/* SU= 8 */
+void FP4_YYY_from_FP2H(FP4_YYY *w,FP2_YYY *x)
+{
+    FP2_YYY_copy(&(w->b), x);
+    FP2_YYY_zero(&(w->a));
+}
+
+/* FP4 copy w=x */
+/* SU= 16 */
+void FP4_YYY_copy(FP4_YYY *w,FP4_YYY *x)
+{
+    if (w==x) return;
+    FP2_YYY_copy(&(w->a), &(x->a));
+    FP2_YYY_copy(&(w->b), &(x->b));
+}
+
+/* FP4 w=0 */
+/* SU= 8 */
+void FP4_YYY_zero(FP4_YYY *w)
+{
+    FP2_YYY_zero(&(w->a));
+    FP2_YYY_zero(&(w->b));
+}
+
+/* FP4 w=1 */
+/* SU= 8 */
+void FP4_YYY_one(FP4_YYY *w)
+{
+    FP2_YYY_one(&(w->a));
+    FP2_YYY_zero(&(w->b));
+}
+
+/* Set w=-x */
+/* SU= 160 */
+void FP4_YYY_neg(FP4_YYY *w,FP4_YYY *x)
+{
+    /* Just one field neg */
+    FP2_YYY m,t;
+    FP4_YYY_norm(x);
+    FP2_YYY_add(&m,&(x->a),&(x->b));
+    FP2_YYY_neg(&m,&m);
+    FP2_YYY_add(&t,&m,&(x->b));
+    FP2_YYY_add(&(w->b),&m,&(x->a));
+    FP2_YYY_copy(&(w->a),&t);
+    FP4_YYY_norm(w);
+}
+
+/* Set w=conj(x) */
+/* SU= 16 */
+void FP4_YYY_conj(FP4_YYY *w,FP4_YYY *x)
+{
+    FP2_YYY_copy(&(w->a), &(x->a));
+    FP2_YYY_neg(&(w->b), &(x->b));
+    FP4_YYY_norm(w);
+}
+
+/* Set w=-conj(x) */
+/* SU= 16 */
+void FP4_YYY_nconj(FP4_YYY *w,FP4_YYY *x)
+{
+    FP2_YYY_copy(&(w->b),&(x->b));
+    FP2_YYY_neg(&(w->a), &(x->a));
+    FP4_YYY_norm(w);
+}
+
+/* Set w=x+y */
+/* SU= 16 */
+void FP4_YYY_add(FP4_YYY *w,FP4_YYY *x,FP4_YYY *y)
+{
+    FP2_YYY_add(&(w->a), &(x->a), &(y->a));
+    FP2_YYY_add(&(w->b), &(x->b), &(y->b));
+}
+
+/* Set w=x-y */
+/* Input y MUST be normed */
+void FP4_YYY_sub(FP4_YYY *w,FP4_YYY *x,FP4_YYY *y)
+{
+    FP4_YYY my;
+
+    FP4_YYY_neg(&my, y);
+    FP4_YYY_add(w, x, &my);
+
+}
+/* SU= 8 */
+/* reduce all components of w mod Modulus */
+void FP4_YYY_reduce(FP4_YYY *w)
+{
+    FP2_YYY_reduce(&(w->a));
+    FP2_YYY_reduce(&(w->b));
+}
+
+/* SU= 8 */
+/* normalise all elements of w */
+void FP4_YYY_norm(FP4_YYY *w)
+{
+    FP2_YYY_norm(&(w->a));
+    FP2_YYY_norm(&(w->b));
+}
+
+/* Set w=s*x, where s is FP2 */
+/* SU= 16 */
+void FP4_YYY_pmul(FP4_YYY *w,FP4_YYY *x,FP2_YYY *s)
+{
+    FP2_YYY_mul(&(w->a),&(x->a),s);
+    FP2_YYY_mul(&(w->b),&(x->b),s);
+}
+
+/* Set w=s*x, where s is FP */
+void FP4_YYY_qmul(FP4_YYY *w,FP4_YYY *x,FP_YYY *s)
+{
+    FP2_YYY_pmul(&(w->a),&(x->a),s);
+    FP2_YYY_pmul(&(w->b),&(x->b),s);
+}
+
+/* SU= 16 */
+/* Set w=s*x, where s is int */
+void FP4_YYY_imul(FP4_YYY *w,FP4_YYY *x,int s)
+{
+    FP2_YYY_imul(&(w->a),&(x->a),s);
+    FP2_YYY_imul(&(w->b),&(x->b),s);
+}
+
+/* Set w=x^2 */
+/* Input MUST be normed  */
+void FP4_YYY_sqr(FP4_YYY *w,FP4_YYY *x)
+{
+    FP2_YYY t1,t2,t3;
+
+    FP2_YYY_mul(&t3,&(x->a),&(x->b)); /* norms x */
+    FP2_YYY_copy(&t2,&(x->b));
+    FP2_YYY_add(&t1,&(x->a),&(x->b));
+    FP2_YYY_mul_ip(&t2);
+
+    FP2_YYY_add(&t2,&(x->a),&t2);
+
+    FP2_YYY_norm(&t1);  // 2
+    FP2_YYY_norm(&t2);  // 2
+
+    FP2_YYY_mul(&(w->a),&t1,&t2);
+
+    FP2_YYY_copy(&t2,&t3);
+    FP2_YYY_mul_ip(&t2);
+
+    FP2_YYY_add(&t2,&t2,&t3);
+
+    FP2_YYY_norm(&t2);  // 2
+    FP2_YYY_neg(&t2,&t2);
+    FP2_YYY_add(&(w->a),&(w->a),&t2);  /* a=(a+b)(a+i^2.b)-i^2.ab-ab = a*a+ib*ib */
+    FP2_YYY_add(&(w->b),&t3,&t3);  /* b=2ab */
+
+    FP4_YYY_norm(w);
+}
+
+/* Set w=x*y */
+/* Inputs MUST be normed  */
+void FP4_YYY_mul(FP4_YYY *w,FP4_YYY *x,FP4_YYY *y)
+{
+
+    FP2_YYY t1,t2,t3,t4;
+    FP2_YYY_mul(&t1,&(x->a),&(y->a));
+    FP2_YYY_mul(&t2,&(x->b),&(y->b));
+
+    FP2_YYY_add(&t3,&(y->b),&(y->a));
+    FP2_YYY_add(&t4,&(x->b),&(x->a));
+
+    FP2_YYY_norm(&t4); // 2
+    FP2_YYY_norm(&t3); // 2
+
+    FP2_YYY_mul(&t4,&t4,&t3); /* (xa+xb)(ya+yb) */
+
+    FP2_YYY_neg(&t3,&t1);  // 1
+    FP2_YYY_add(&t4,&t4,&t3);  //t4E=3
+    FP2_YYY_norm(&t4);
+
+    FP2_YYY_neg(&t3,&t2);  // 1
+    FP2_YYY_add(&(w->b),&t4,&t3); //wbE=3
+
+    FP2_YYY_mul_ip(&t2);
+    FP2_YYY_add(&(w->a),&t2,&t1);
+
+    FP4_YYY_norm(w);
+}
+
+/* output FP4 in format [a,b] */
+/* SU= 8 */
+void FP4_YYY_output(FP4_YYY *w)
+{
+    printf("[");
+    FP2_YYY_output(&(w->a));
+    printf(",");
+    FP2_YYY_output(&(w->b));
+    printf("]");
+}
+
+/* SU= 8 */
+void FP4_YYY_rawoutput(FP4_YYY *w)
+{
+    printf("[");
+    FP2_YYY_rawoutput(&(w->a));
+    printf(",");
+    FP2_YYY_rawoutput(&(w->b));
+    printf("]");
+}
+
+/* Set w=1/x */
+/* SU= 160 */
+void FP4_YYY_inv(FP4_YYY *w,FP4_YYY *x)
+{
+    FP2_YYY t1,t2;
+    FP2_YYY_sqr(&t1,&(x->a));
+    FP2_YYY_sqr(&t2,&(x->b));
+    FP2_YYY_mul_ip(&t2);
+    FP2_YYY_norm(&t2);
+    FP2_YYY_sub(&t1,&t1,&t2);
+    FP2_YYY_inv(&t1,&t1);
+    FP2_YYY_mul(&(w->a),&t1,&(x->a));
+    FP2_YYY_neg(&t1,&t1);
+    FP2_YYY_norm(&t1);
+    FP2_YYY_mul(&(w->b),&t1,&(x->b));
+}
+
+/* w*=i where i = sqrt(-1+sqrt(-1)) */
+/* SU= 200 */
+void FP4_YYY_times_i(FP4_YYY *w)
+{
+    FP_YYY z;
+    FP2_YYY s,t;
+
+    FP2_YYY_copy(&t,&(w->b));
+
+    FP2_YYY_copy(&s,&t);
+
+    FP_YYY_copy(&z,&(s.a));
+    FP_YYY_neg(&(s.a),&(s.b));
+    FP_YYY_copy(&(s.b),&z);
+
+    FP2_YYY_add(&t,&t,&s);
+
+    FP2_YYY_copy(&(w->b),&(w->a));
+    FP2_YYY_copy(&(w->a),&t);
+    FP4_YYY_norm(w);
+}
+
+/* Set w=w^p using Frobenius */
+/* SU= 16 */
+void FP4_YYY_frob(FP4_YYY *w,FP2_YYY *f)
+{
+    FP2_YYY_conj(&(w->a),&(w->a));
+    FP2_YYY_conj(&(w->b),&(w->b));
+    FP2_YYY_mul( &(w->b),f,&(w->b));
+}
+
+/* Set r=a^b mod m */
+/* SU= 240 */
+void FP4_YYY_pow(FP4_YYY *r,FP4_YYY* a,BIG_XXX b)
+{
+    FP4_YYY w;
+    BIG_XXX z,zilch;
+    int bt;
+
+    BIG_XXX_zero(zilch);
+    BIG_XXX_norm(b);
+    BIG_XXX_copy(z,b);
+    FP4_YYY_copy(&w,a);
+    FP4_YYY_one(r);
+
+    while(1)
+    {
+        bt=BIG_XXX_parity(z);
+        BIG_XXX_shr(z,1);
+        if (bt) FP4_YYY_mul(r,r,&w);
+        if (BIG_XXX_comp(z,zilch)==0) break;
+        FP4_YYY_sqr(&w,&w);
+    }
+    FP4_YYY_reduce(r);
+}
+
+/* SU= 304 */
+/* XTR xtr_a function */
+void FP4_YYY_xtr_A(FP4_YYY *r,FP4_YYY *w,FP4_YYY *x,FP4_YYY *y,FP4_YYY *z)
+{
+    FP4_YYY t1,t2;
+
+    FP4_YYY_copy(r,x);
+    FP4_YYY_sub(&t1,w,y);
+    FP4_YYY_norm(&t1);
+    FP4_YYY_pmul(&t1,&t1,&(r->a));
+    FP4_YYY_add(&t2,w,y);
+    FP4_YYY_norm(&t2);
+    FP4_YYY_pmul(&t2,&t2,&(r->b));
+    FP4_YYY_times_i(&t2);
+
+    FP4_YYY_add(r,&t1,&t2);
+    FP4_YYY_add(r,r,z);
+
+    FP4_YYY_reduce(r);
+}
+
+/* SU= 152 */
+/* XTR xtr_d function */
+void FP4_YYY_xtr_D(FP4_YYY *r,FP4_YYY *x)
+{
+    FP4_YYY w;
+    FP4_YYY_copy(r,x);
+    FP4_YYY_conj(&w,r);
+    FP4_YYY_add(&w,&w,&w);
+    FP4_YYY_sqr(r,r);
+    FP4_YYY_norm(&w);
+    FP4_YYY_sub(r,r,&w);
+    FP4_YYY_reduce(r);    /* reduce here as multiple calls trigger automatic reductions */
+}
+
+/* SU= 728 */
+/* r=x^n using XTR method on traces of FP12s */
+void FP4_YYY_xtr_pow(FP4_YYY *r,FP4_YYY *x,BIG_XXX n)
+{
+    int i,par,nb;
+    BIG_XXX v;
+    FP2_YYY w;
+    FP4_YYY t,a,b,c;
+
+    BIG_XXX_zero(v);
+    BIG_XXX_inc(v,3);
+    BIG_XXX_norm(v);
+    FP2_YYY_from_BIG(&w,v);
+    FP4_YYY_from_FP2(&a,&w);
+
+    FP4_YYY_copy(&b,x);
+    FP4_YYY_xtr_D(&c,x);
+
+    BIG_XXX_norm(n);
+    par=BIG_XXX_parity(n);
+    BIG_XXX_copy(v,n);
+    BIG_XXX_shr(v,1);
+    if (par==0)
+    {
+        BIG_XXX_dec(v,1);
+        BIG_XXX_norm(v);
+    }
+
+    nb=BIG_XXX_nbits(v);
+    for (i=nb-1; i>=0; i--)
+    {
+        if (!BIG_XXX_bit(v,i))
+        {
+            FP4_YYY_copy(&t,&b);
+            FP4_YYY_conj(x,x);
+            FP4_YYY_conj(&c,&c);
+            FP4_YYY_xtr_A(&b,&a,&b,x,&c);
+            FP4_YYY_conj(x,x);
+            FP4_YYY_xtr_D(&c,&t);
+            FP4_YYY_xtr_D(&a,&a);
+        }
+        else
+        {
+            FP4_YYY_conj(&t,&a);
+            FP4_YYY_xtr_D(&a,&b);
+            FP4_YYY_xtr_A(&b,&c,&b,x,&t);
+            FP4_YYY_xtr_D(&c,&c);
+        }
+    }
+
+    if (par==0) FP4_YYY_copy(r,&c);
+    else FP4_YYY_copy(r,&b);
+    FP4_YYY_reduce(r);
+}
+
+/* SU= 872 */
+/* r=ck^a.cl^n using XTR double exponentiation method on traces of FP12s. See Stam thesis. */
+void FP4_YYY_xtr_pow2(FP4_YYY *r,FP4_YYY *ck,FP4_YYY *cl,FP4_YYY *ckml,FP4_YYY *ckm2l,BIG_XXX a,BIG_XXX b)
+{
+    int i,f2;
+    BIG_XXX d,e,w;
+    FP4_YYY t,cu,cv,cumv,cum2v;
+
+    BIG_XXX_norm(a);
+    BIG_XXX_norm(b);
+    BIG_XXX_copy(e,a);
+    BIG_XXX_copy(d,b);
+    FP4_YYY_copy(&cu,ck);
+    FP4_YYY_copy(&cv,cl);
+    FP4_YYY_copy(&cumv,ckml);
+    FP4_YYY_copy(&cum2v,ckm2l);
+
+    f2=0;
+    while (BIG_XXX_parity(d)==0 && BIG_XXX_parity(e)==0)
+    {
+        BIG_XXX_shr(d,1);
+        BIG_XXX_shr(e,1);
+        f2++;
+    }
+    while (BIG_XXX_comp(d,e)!=0)
+    {
+        if (BIG_XXX_comp(d,e)>0)
+        {
+            BIG_XXX_imul(w,e,4);
+            BIG_XXX_norm(w);
+            if (BIG_XXX_comp(d,w)<=0)
+            {
+                BIG_XXX_copy(w,d);
+                BIG_XXX_copy(d,e);
+                BIG_XXX_sub(e,w,e);
+                BIG_XXX_norm(e);
+                FP4_YYY_xtr_A(&t,&cu,&cv,&cumv,&cum2v);
+                FP4_YYY_conj(&cum2v,&cumv);
+                FP4_YYY_copy(&cumv,&cv);
+                FP4_YYY_copy(&cv,&cu);
+                FP4_YYY_copy(&cu,&t);
+            }
+            else if (BIG_XXX_parity(d)==0)
+            {
+                BIG_XXX_shr(d,1);
+                FP4_YYY_conj(r,&cum2v);
+                FP4_YYY_xtr_A(&t,&cu,&cumv,&cv,r);
+                FP4_YYY_xtr_D(&cum2v,&cumv);
+                FP4_YYY_copy(&cumv,&t);
+                FP4_YYY_xtr_D(&cu,&cu);
+            }
+            else if (BIG_XXX_parity(e)==1)
+            {
+                BIG_XXX_sub(d,d,e);
+                BIG_XXX_norm(d);
+                BIG_XXX_shr(d,1);
+                FP4_YYY_xtr_A(&t,&cu,&cv,&cumv,&cum2v);
+                FP4_YYY_xtr_D(&cu,&cu);
+                FP4_YYY_xtr_D(&cum2v,&cv);
+                FP4_YYY_conj(&cum2v,&cum2v);
+                FP4_YYY_copy(&cv,&t);
+            }
+            else
+            {
+                BIG_XXX_copy(w,d);
+                BIG_XXX_copy(d,e);
+                BIG_XXX_shr(d,1);
+                BIG_XXX_copy(e,w);
+                FP4_YYY_xtr_D(&t,&cumv);
+                FP4_YYY_conj(&cumv,&cum2v);
+                FP4_YYY_conj(&cum2v,&t);
+                FP4_YYY_xtr_D(&t,&cv);
+                FP4_YYY_copy(&cv,&cu);
+                FP4_YYY_copy(&cu,&t);
+            }
+        }
+        if (BIG_XXX_comp(d,e)<0)
+        {
+            BIG_XXX_imul(w,d,4);
+            BIG_XXX_norm(w);
+            if (BIG_XXX_comp(e,w)<=0)
+            {
+                BIG_XXX_sub(e,e,d);
+                BIG_XXX_norm(e);
+                FP4_YYY_xtr_A(&t,&cu,&cv,&cumv,&cum2v);
+                FP4_YYY_copy(&cum2v,&cumv);
+                FP4_YYY_copy(&cumv,&cu);
+                FP4_YYY_copy(&cu,&t);
+            }
+            else if (BIG_XXX_parity(e)==0)
+            {
+                BIG_XXX_copy(w,d);
+                BIG_XXX_copy(d,e);
+                BIG_XXX_shr(d,1);
+                BIG_XXX_copy(e,w);
+                FP4_YYY_xtr_D(&t,&cumv);
+                FP4_YYY_conj(&cumv,&cum2v);
+                FP4_YYY_conj(&cum2v,&t);
+                FP4_YYY_xtr_D(&t,&cv);
+                FP4_YYY_copy(&cv,&cu);
+                FP4_YYY_copy(&cu,&t);
+            }
+            else if (BIG_XXX_parity(d)==1)
+            {
+                BIG_XXX_copy(w,e);
+                BIG_XXX_copy(e,d);
+                BIG_XXX_sub(w,w,d);
+                BIG_XXX_norm(w);
+                BIG_XXX_copy(d,w);
+                BIG_XXX_shr(d,1);
+                FP4_YYY_xtr_A(&t,&cu,&cv,&cumv,&cum2v);
+                FP4_YYY_conj(&cumv,&cumv);
+                FP4_YYY_xtr_D(&cum2v,&cu);
+                FP4_YYY_conj(&cum2v,&cum2v);
+                FP4_YYY_xtr_D(&cu,&cv);
+                FP4_YYY_copy(&cv,&t);
+            }
+            else
+            {
+                BIG_XXX_shr(d,1);
+                FP4_YYY_conj(r,&cum2v);
+                FP4_YYY_xtr_A(&t,&cu,&cumv,&cv,r);
+                FP4_YYY_xtr_D(&cum2v,&cumv);
+                FP4_YYY_copy(&cumv,&t);
+                FP4_YYY_xtr_D(&cu,&cu);
+            }
+        }
+    }
+    FP4_YYY_xtr_A(r,&cu,&cv,&cumv,&cum2v);
+    for (i=0; i<f2; i++)	FP4_YYY_xtr_D(r,r);
+    FP4_YYY_xtr_pow(r,r,d);
+}
+
+/* Move b to a if d=1 */
+void FP4_YYY_cmove(FP4_YYY *f,FP4_YYY *g,int d)
+{
+    FP2_YYY_cmove(&(f->a),&(g->a),d);
+    FP2_YYY_cmove(&(f->b),&(g->b),d);
+}
+
+/* New stuff for ECp4 support */
+
+/* Set w=x/2 */
+void FP4_YYY_div2(FP4_YYY *w,FP4_YYY *x)
+{
+    FP2_YYY_div2(&(w->a),&(x->a));
+    FP2_YYY_div2(&(w->b),&(x->b));
+}
+
+#if CURVE_SECURITY_ZZZ >= 192
+
+/* sqrt(a+xb) = sqrt((a+sqrt(a*a-n*b*b))/2)+x.b/(2*sqrt((a+sqrt(a*a-n*b*b))/2)) */
+/* returns true if x is QR */
+int FP4_YYY_sqrt(FP4_YYY *r,FP4_YYY* x)
+{
+    FP2_YYY a,s,t;
+
+    FP4_YYY_copy(r,x);
+    if (FP4_YYY_iszilch(x))
+        return 1;
+
+    FP2_YYY_copy(&a,&(x->a));
+    FP2_YYY_copy(&s,&(x->b));
+
+    if (FP2_YYY_iszilch(&s))
+    {
+        if (FP2_YYY_sqrt(&t,&a))
+        {
+            FP4_YYY_from_FP2(r,&t);
+        }
+        else
+        {
+            FP2_YYY_div_ip(&a);
+            FP2_YYY_sqrt(&t,&a);
+            FP4_YYY_from_FP2H(r,&t);
+        }
+        return 1;
+    }
+
+    FP2_YYY_sqr(&s,&s);  // s*=s
+    FP2_YYY_sqr(&a,&a);  // a*=a
+    FP2_YYY_mul_ip(&s);
+    FP2_YYY_norm(&s);
+    FP2_YYY_sub(&a,&a,&s); // a-=txx(s)
+
+    if (!FP2_YYY_sqrt(&s,&a)) return 0;
+
+    FP2_YYY_copy(&t,&(x->a));
+    FP2_YYY_add(&a,&t,&s);
+    FP2_YYY_norm(&a);
+    FP2_YYY_div2(&a,&a);
+
+    if (!FP2_YYY_sqrt(&a,&a))
+    {
+        FP2_YYY_sub(&a,&t,&s);
+        FP2_YYY_norm(&a);
+        FP2_YYY_div2(&a,&a);
+        if (!FP2_YYY_sqrt(&a,&a)) return 0;
+    }
+
+    FP2_YYY_copy(&t,&(x->b));
+    FP2_YYY_add(&s,&a,&a);
+    FP2_YYY_inv(&s,&s);
+
+    FP2_YYY_mul(&t,&t,&s);
+    FP4_YYY_from_FP2s(r,&a,&t);
+
+    return 1;
+}
+
+void FP4_YYY_div_i(FP4_YYY *f)
+{
+    FP2_YYY u,v;
+    FP2_YYY_copy(&u,&(f->a));
+    FP2_YYY_copy(&v,&(f->b));
+    FP2_YYY_div_ip(&u);
+    FP2_YYY_copy(&(f->a),&v);
+    FP2_YYY_copy(&(f->b),&u);
+}
+
+void FP4_YYY_div_2i(FP4_YYY *f)
+{
+    FP2_YYY u,v;
+    FP2_YYY_copy(&u,&(f->a));
+    FP2_YYY_copy(&v,&(f->b));
+    FP2_YYY_div_ip2(&u);
+    FP2_YYY_add(&v,&v,&v);
+    FP2_YYY_norm(&v);
+    FP2_YYY_copy(&(f->a),&v);
+    FP2_YYY_copy(&(f->b),&u);
+}
+
+#endif
+
+/*
+int main(){
+		FP2_YYY w0,w1,f;
+		FP4_YYY w,t;
+		FP4_YYY c1,c2,c3,c4,cr;
+		BIG_XXX a,b;
+		BIG_XXX e,e1,e2;
+		BIG_XXX p,md;
+
+
+		BIG_XXX_rcopy(md,Modulus);
+		//Test w^(P^4) = w mod p^2
+		BIG_XXX_zero(a); BIG_XXX_inc(a,27);
+		BIG_XXX_zero(b); BIG_XXX_inc(b,45);
+		FP2_YYY_from_BIGs(&w0,a,b);
+
+		BIG_XXX_zero(a); BIG_XXX_inc(a,33);
+		BIG_XXX_zero(b); BIG_XXX_inc(b,54);
+		FP2_YYY_from_BIGs(&w1,a,b);
+
+		FP4_YYY_from_FP2s(&w,&w0,&w1);
+		FP4_YYY_reduce(&w);
+
+		printf("w= ");
+		FP4_YYY_output(&w);
+		printf("\n");
+
+
+		FP4_YYY_copy(&t,&w);
+
+
+		BIG_XXX_copy(p,md);
+		FP4_YYY_pow(&w,&w,p);
+
+		printf("w^p= ");
+		FP4_YYY_output(&w);
+		printf("\n");
+//exit(0);
+
+		BIG_XXX_rcopy(a,CURVE_Fra);
+		BIG_XXX_rcopy(b,CURVE_Frb);
+		FP2_YYY_from_BIGs(&f,a,b);
+
+		FP4_YYY_frob(&t,&f);
+		printf("w^p= ");
+		FP4_YYY_output(&t);
+		printf("\n");
+
+		FP4_YYY_pow(&w,&w,p);
+		FP4_YYY_pow(&w,&w,p);
+		FP4_YYY_pow(&w,&w,p);
+		printf("w^p4= ");
+		FP4_YYY_output(&w);
+		printf("\n");
+
+// Test 1/(1/x) = x mod p^4
+		FP4_YYY_from_FP2s(&w,&w0,&w1);
+		printf("Test Inversion \nw= ");
+		FP4_YYY_output(&w);
+		printf("\n");
+
+		FP4_YYY_inv(&w,&w);
+		printf("1/w mod p^4 = ");
+		FP4_YYY_output(&w);
+		printf("\n");
+
+		FP4_YYY_inv(&w,&w);
+		printf("1/(1/w) mod p^4 = ");
+		FP4_YYY_output(&w);
+		printf("\n");
+
+		BIG_XXX_zero(e); BIG_XXX_inc(e,12);
+
+
+
+	//	FP4_YYY_xtr_A(&w,&t,&w,&t,&t);
+		FP4_YYY_xtr_pow(&w,&w,e);
+
+		printf("w^e= ");
+		FP4_YYY_output(&w);
+		printf("\n");
+
+
+		BIG_XXX_zero(a); BIG_XXX_inc(a,37);
+		BIG_XXX_zero(b); BIG_XXX_inc(b,17);
+		FP2_YYY_from_BIGs(&w0,a,b);
+
+		BIG_XXX_zero(a); BIG_XXX_inc(a,49);
+		BIG_XXX_zero(b); BIG_XXX_inc(b,31);
+		FP2_YYY_from_BIGs(&w1,a,b);
+
+		FP4_YYY_from_FP2s(&c1,&w0,&w1);
+		FP4_YYY_from_FP2s(&c2,&w0,&w1);
+		FP4_YYY_from_FP2s(&c3,&w0,&w1);
+		FP4_YYY_from_FP2s(&c4,&w0,&w1);
+
+		BIG_XXX_zero(e1); BIG_XXX_inc(e1,3331);
+		BIG_XXX_zero(e2); BIG_XXX_inc(e2,3372);
+
+		FP4_YYY_xtr_pow2(&w,&c1,&w,&c2,&c3,e1,e2);
+
+		printf("c^e= ");
+		FP4_YYY_output(&w);
+		printf("\n");
+
+
+		return 0;
+}
+*/
+

http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto-c/blob/8d28d2c3/src/fp48.c.in
----------------------------------------------------------------------
diff --git a/src/fp48.c.in b/src/fp48.c.in
new file mode 100644
index 0000000..a3a67f8
--- /dev/null
+++ b/src/fp48.c.in
@@ -0,0 +1,1143 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+*/
+
+/* AMCL Fp^48 functions */
+/* SU=m, m is Stack Usage (no lazy )*/
+/* FP48 elements are of the form a+i.b+i^2.c */
+
+#include "fp48_YYY.h"
+
+/* return 1 if b==c, no branching */
+static int teq(sign32 b,sign32 c)
+{
+    sign32 x=b^c;
+    x-=1;  // if x=0, x now -1
+    return (int)((x>>31)&1);
+}
+
+/* Constant time select from pre-computed table */
+static void FP48_YYY_select(FP48_YYY *f,FP48_YYY g[],sign32 b)
+{
+    FP48_YYY invf;
+    sign32 m=b>>31;
+    sign32 babs=(b^m)-m;
+
+    babs=(babs-1)/2;
+
+    FP48_YYY_cmove(f,&g[0],teq(babs,0));  // conditional move
+    FP48_YYY_cmove(f,&g[1],teq(babs,1));
+    FP48_YYY_cmove(f,&g[2],teq(babs,2));
+    FP48_YYY_cmove(f,&g[3],teq(babs,3));
+    FP48_YYY_cmove(f,&g[4],teq(babs,4));
+    FP48_YYY_cmove(f,&g[5],teq(babs,5));
+    FP48_YYY_cmove(f,&g[6],teq(babs,6));
+    FP48_YYY_cmove(f,&g[7],teq(babs,7));
+
+    FP48_YYY_copy(&invf,f);
+    FP48_YYY_conj(&invf,&invf);  // 1/f
+    FP48_YYY_cmove(f,&invf,(int)(m&1));
+}
+
+/* test x==0 ? */
+/* SU= 8 */
+int FP48_YYY_iszilch(FP48_YYY *x)
+{
+    if (FP16_YYY_iszilch(&(x->a)) && FP16_YYY_iszilch(&(x->b)) && FP16_YYY_iszilch(&(x->c))) return 1;
+    return 0;
+}
+
+/* test x==1 ? */
+/* SU= 8 */
+int FP48_YYY_isunity(FP48_YYY *x)
+{
+    if (FP16_YYY_isunity(&(x->a)) && FP16_YYY_iszilch(&(x->b)) && FP16_YYY_iszilch(&(x->c))) return 1;
+    return 0;
+}
+
+/* FP48 copy w=x */
+/* SU= 16 */
+void FP48_YYY_copy(FP48_YYY *w,FP48_YYY *x)
+{
+    if (x==w) return;
+    FP16_YYY_copy(&(w->a),&(x->a));
+    FP16_YYY_copy(&(w->b),&(x->b));
+    FP16_YYY_copy(&(w->c),&(x->c));
+}
+
+/* FP48 w=1 */
+/* SU= 8 */
+void FP48_YYY_one(FP48_YYY *w)
+{
+    FP16_YYY_one(&(w->a));
+    FP16_YYY_zero(&(w->b));
+    FP16_YYY_zero(&(w->c));
+}
+
+/* return 1 if x==y, else 0 */
+/* SU= 16 */
+int FP48_YYY_equals(FP48_YYY *x,FP48_YYY *y)
+{
+    if (FP16_YYY_equals(&(x->a),&(y->a)) && FP16_YYY_equals(&(x->b),&(y->b)) && FP16_YYY_equals(&(x->b),&(y->b)))
+        return 1;
+    return 0;
+}
+
+/* Set w=conj(x) */
+/* SU= 8 */
+void FP48_YYY_conj(FP48_YYY *w,FP48_YYY *x)
+{
+    FP48_YYY_copy(w,x);
+    FP16_YYY_conj(&(w->a),&(w->a));
+    FP16_YYY_nconj(&(w->b),&(w->b));
+    FP16_YYY_conj(&(w->c),&(w->c));
+}
+
+/* Create FP48 from FP16 */
+/* SU= 8 */
+void FP48_YYY_from_FP16(FP48_YYY *w,FP16_YYY *a)
+{
+    FP16_YYY_copy(&(w->a),a);
+    FP16_YYY_zero(&(w->b));
+    FP16_YYY_zero(&(w->c));
+}
+
+/* Create FP48 from 3 FP16's */
+/* SU= 16 */
+void FP48_YYY_from_FP16s(FP48_YYY *w,FP16_YYY *a,FP16_YYY *b,FP16_YYY *c)
+{
+    FP16_YYY_copy(&(w->a),a);
+    FP16_YYY_copy(&(w->b),b);
+    FP16_YYY_copy(&(w->c),c);
+}
+
+/* Granger-Scott Unitary Squaring. This does not benefit from lazy reduction */
+/* SU= 600 */
+void FP48_YYY_usqr(FP48_YYY *w,FP48_YYY *x)
+{
+    FP16_YYY A,B,C,D;
+
+    FP16_YYY_copy(&A,&(x->a));
+
+    FP16_YYY_sqr(&(w->a),&(x->a));
+    FP16_YYY_add(&D,&(w->a),&(w->a));
+    FP16_YYY_add(&(w->a),&D,&(w->a));
+
+    FP16_YYY_norm(&(w->a));
+    FP16_YYY_nconj(&A,&A);
+
+    FP16_YYY_add(&A,&A,&A);
+    FP16_YYY_add(&(w->a),&(w->a),&A);
+    FP16_YYY_sqr(&B,&(x->c));
+    FP16_YYY_times_i(&B);
+
+    FP16_YYY_add(&D,&B,&B);
+    FP16_YYY_add(&B,&B,&D);
+    FP16_YYY_norm(&B);
+
+    FP16_YYY_sqr(&C,&(x->b));
+
+    FP16_YYY_add(&D,&C,&C);
+    FP16_YYY_add(&C,&C,&D);
+
+    FP16_YYY_norm(&C);
+    FP16_YYY_conj(&(w->b),&(x->b));
+    FP16_YYY_add(&(w->b),&(w->b),&(w->b));
+    FP16_YYY_nconj(&(w->c),&(x->c));
+
+    FP16_YYY_add(&(w->c),&(w->c),&(w->c));
+    FP16_YYY_add(&(w->b),&B,&(w->b));
+    FP16_YYY_add(&(w->c),&C,&(w->c));
+
+    FP48_YYY_reduce(w);	    /* reduce here as in pow function repeated squarings would trigger multiple reductions */
+}
+
+/* FP48 squaring w=x^2 */
+/* SU= 600 */
+void FP48_YYY_sqr(FP48_YYY *w,FP48_YYY *x)
+{
+    /* Use Chung-Hasan SQR2 method from http://cacr.uwaterloo.ca/techreports/2006/cacr2006-24.pdf */
+
+    FP16_YYY A,B,C,D;
+
+    FP16_YYY_sqr(&A,&(x->a));
+    FP16_YYY_mul(&B,&(x->b),&(x->c));
+    FP16_YYY_add(&B,&B,&B);
+    FP16_YYY_norm(&B);
+    FP16_YYY_sqr(&C,&(x->c));
+
+    FP16_YYY_mul(&D,&(x->a),&(x->b));
+    FP16_YYY_add(&D,&D,&D);
+
+    FP16_YYY_add(&(w->c),&(x->a),&(x->c));
+    FP16_YYY_add(&(w->c),&(x->b),&(w->c));
+    FP16_YYY_norm(&(w->c));
+
+    FP16_YYY_sqr(&(w->c),&(w->c));
+
+    FP16_YYY_copy(&(w->a),&A);
+    FP16_YYY_add(&A,&A,&B);
+
+    FP16_YYY_norm(&A);
+
+    FP16_YYY_add(&A,&A,&C);
+    FP16_YYY_add(&A,&A,&D);
+
+    FP16_YYY_norm(&A);
+
+    FP16_YYY_neg(&A,&A);
+    FP16_YYY_times_i(&B);
+    FP16_YYY_times_i(&C);
+
+    FP16_YYY_add(&(w->a),&(w->a),&B);
+    FP16_YYY_add(&(w->b),&C,&D);
+    FP16_YYY_add(&(w->c),&(w->c),&A);
+
+    FP48_YYY_norm(w);
+}
+
+/* FP48 full multiplication w=w*y */
+/* SU= 896 */
+void FP48_YYY_mul(FP48_YYY *w,FP48_YYY *y)
+{
+    FP16_YYY z0,z1,z2,z3,t0,t1;
+
+    FP16_YYY_mul(&z0,&(w->a),&(y->a));
+    FP16_YYY_mul(&z2,&(w->b),&(y->b));  //
+
+    FP16_YYY_add(&t0,&(w->a),&(w->b));
+    FP16_YYY_add(&t1,&(y->a),&(y->b));  //
+
+    FP16_YYY_norm(&t0);
+    FP16_YYY_norm(&t1);
+
+    FP16_YYY_mul(&z1,&t0,&t1);
+    FP16_YYY_add(&t0,&(w->b),&(w->c));
+    FP16_YYY_add(&t1,&(y->b),&(y->c));  //
+
+    FP16_YYY_norm(&t0);
+    FP16_YYY_norm(&t1);
+
+    FP16_YYY_mul(&z3,&t0,&t1);
+
+    FP16_YYY_neg(&t0,&z0);
+    FP16_YYY_neg(&t1,&z2);
+
+    FP16_YYY_add(&z1,&z1,&t0);   // z1=z1-z0
+    FP16_YYY_add(&(w->b),&z1,&t1);    // z1=z1-z2
+    FP16_YYY_add(&z3,&z3,&t1);        // z3=z3-z2
+    FP16_YYY_add(&z2,&z2,&t0);        // z2=z2-z0
+
+    FP16_YYY_add(&t0,&(w->a),&(w->c));
+    FP16_YYY_add(&t1,&(y->a),&(y->c));
+
+    FP16_YYY_norm(&t0);
+    FP16_YYY_norm(&t1);
+
+    FP16_YYY_mul(&t0,&t1,&t0);
+    FP16_YYY_add(&z2,&z2,&t0);
+
+    FP16_YYY_mul(&t0,&(w->c),&(y->c));
+    FP16_YYY_neg(&t1,&t0);
+
+    FP16_YYY_add(&(w->c),&z2,&t1);
+    FP16_YYY_add(&z3,&z3,&t1);
+    FP16_YYY_times_i(&t0);
+    FP16_YYY_add(&(w->b),&(w->b),&t0);
+    FP16_YYY_norm(&z3);
+    FP16_YYY_times_i(&z3);
+    FP16_YYY_add(&(w->a),&z0,&z3);
+
+    FP48_YYY_norm(w);
+}
+
+/* FP48 multiplication w=w*y */
+/* SU= 744 */
+/* catering for special case that arises from special form of ATE pairing line function */
+void FP48_YYY_smul(FP48_YYY *w,FP48_YYY *y,int type)
+{
+    FP16_YYY z0,z1,z2,z3,t0,t1;
+
+    if (type==D_TYPE)
+    {
+        // y->c is 0
+
+        FP16_YYY_copy(&z3,&(w->b));
+        FP16_YYY_mul(&z0,&(w->a),&(y->a));
+
+        FP16_YYY_pmul(&z2,&(w->b),&(y->b).a);
+        FP16_YYY_add(&(w->b),&(w->a),&(w->b));
+        FP16_YYY_copy(&t1,&(y->a));
+        FP8_YYY_add(&t1.a,&t1.a,&(y->b).a);
+
+        FP16_YYY_norm(&t1);
+        FP16_YYY_norm(&(w->b));
+
+        FP16_YYY_mul(&(w->b),&(w->b),&t1);
+        FP16_YYY_add(&z3,&z3,&(w->c));
+        FP16_YYY_norm(&z3);
+        FP16_YYY_pmul(&z3,&z3,&(y->b).a);
+        FP16_YYY_neg(&t0,&z0);
+        FP16_YYY_neg(&t1,&z2);
+
+        FP16_YYY_add(&(w->b),&(w->b),&t0);   // z1=z1-z0
+        FP16_YYY_add(&(w->b),&(w->b),&t1);   // z1=z1-z2
+
+        FP16_YYY_add(&z3,&z3,&t1);        // z3=z3-z2
+        FP16_YYY_add(&z2,&z2,&t0);        // z2=z2-z0
+
+        FP16_YYY_add(&t0,&(w->a),&(w->c));
+
+        FP16_YYY_norm(&t0);
+        FP16_YYY_norm(&z3);
+
+        FP16_YYY_mul(&t0,&(y->a),&t0);
+        FP16_YYY_add(&(w->c),&z2,&t0);
+
+        FP16_YYY_times_i(&z3);
+        FP16_YYY_add(&(w->a),&z0,&z3);
+    }
+
+    if (type==M_TYPE)
+    {
+        // y->b is zero
+        FP16_YYY_mul(&z0,&(w->a),&(y->a));
+        FP16_YYY_add(&t0,&(w->a),&(w->b));
+        FP16_YYY_norm(&t0);
+
+        FP16_YYY_mul(&z1,&t0,&(y->a));
+        FP16_YYY_add(&t0,&(w->b),&(w->c));
+        FP16_YYY_norm(&t0);
+
+        FP16_YYY_pmul(&z3,&t0,&(y->c).b);
+        FP16_YYY_times_i(&z3);
+
+        FP16_YYY_neg(&t0,&z0);
+        FP16_YYY_add(&z1,&z1,&t0);   // z1=z1-z0
+
+        FP16_YYY_copy(&(w->b),&z1);
+
+        FP16_YYY_copy(&z2,&t0);
+
+        FP16_YYY_add(&t0,&(w->a),&(w->c));
+        FP16_YYY_add(&t1,&(y->a),&(y->c));
+
+        FP16_YYY_norm(&t0);
+        FP16_YYY_norm(&t1);
+
+        FP16_YYY_mul(&t0,&t1,&t0);
+        FP16_YYY_add(&z2,&z2,&t0);
+
+        FP16_YYY_pmul(&t0,&(w->c),&(y->c).b);
+        FP16_YYY_times_i(&t0);
+        FP16_YYY_neg(&t1,&t0);
+        FP16_YYY_times_i(&t0);
+
+        FP16_YYY_add(&(w->c),&z2,&t1);
+        FP16_YYY_add(&z3,&z3,&t1);
+
+        FP16_YYY_add(&(w->b),&(w->b),&t0);
+        FP16_YYY_norm(&z3);
+        FP16_YYY_times_i(&z3);
+        FP16_YYY_add(&(w->a),&z0,&z3);
+    }
+    FP48_YYY_norm(w);
+}
+
+/* Set w=1/x */
+/* SU= 600 */
+void FP48_YYY_inv(FP48_YYY *w,FP48_YYY *x)
+{
+    FP16_YYY f0,f1,f2,f3;
+//    FP48_YYY_norm(x);
+
+    FP16_YYY_sqr(&f0,&(x->a));
+    FP16_YYY_mul(&f1,&(x->b),&(x->c));
+    FP16_YYY_times_i(&f1);
+    FP16_YYY_sub(&f0,&f0,&f1);  /* y.a */
+    FP16_YYY_norm(&f0);
+
+    FP16_YYY_sqr(&f1,&(x->c));
+    FP16_YYY_times_i(&f1);
+    FP16_YYY_mul(&f2,&(x->a),&(x->b));
+    FP16_YYY_sub(&f1,&f1,&f2);  /* y.b */
+    FP16_YYY_norm(&f1);
+
+    FP16_YYY_sqr(&f2,&(x->b));
+    FP16_YYY_mul(&f3,&(x->a),&(x->c));
+    FP16_YYY_sub(&f2,&f2,&f3);  /* y.c */
+    FP16_YYY_norm(&f2);
+
+    FP16_YYY_mul(&f3,&(x->b),&f2);
+    FP16_YYY_times_i(&f3);
+    FP16_YYY_mul(&(w->a),&f0,&(x->a));
+    FP16_YYY_add(&f3,&(w->a),&f3);
+    FP16_YYY_mul(&(w->c),&f1,&(x->c));
+    FP16_YYY_times_i(&(w->c));
+
+    FP16_YYY_add(&f3,&(w->c),&f3);
+    FP16_YYY_norm(&f3);
+
+    FP16_YYY_inv(&f3,&f3);
+    FP16_YYY_mul(&(w->a),&f0,&f3);
+    FP16_YYY_mul(&(w->b),&f1,&f3);
+    FP16_YYY_mul(&(w->c),&f2,&f3);
+}
+
+/* constant time powering by small integer of max length bts */
+
+void FP48_YYY_pinpow(FP48_YYY *r,int e,int bts)
+{
+    int i,b;
+    FP48_YYY R[2];
+
+    FP48_YYY_one(&R[0]);
+    FP48_YYY_copy(&R[1],r);
+
+    for (i=bts-1; i>=0; i--)
+    {
+        b=(e>>i)&1;
+        FP48_YYY_mul(&R[1-b],&R[b]);
+        FP48_YYY_usqr(&R[b],&R[b]);
+    }
+    FP48_YYY_copy(r,&R[0]);
+}
+
+/* Compressed powering of unitary elements y=x^(e mod r) */
+
+void FP48_YYY_compow(FP16_YYY *c,FP48_YYY *x,BIG_XXX e,BIG_XXX r)
+{
+    FP48_YYY g1,g2;
+    FP16_YYY cp,cpm1,cpm2;
+    FP2_YYY  f;
+    BIG_XXX q,a,b,m;
+
+    BIG_XXX_rcopy(a,Fra_YYY);
+    BIG_XXX_rcopy(b,Frb_YYY);
+    FP2_YYY_from_BIGs(&f,a,b);
+
+    BIG_XXX_rcopy(q,Modulus_YYY);
+
+    FP48_YYY_copy(&g1,x);
+    FP48_YYY_copy(&g2,x);
+
+    BIG_XXX_copy(m,q);
+    BIG_XXX_mod(m,r);
+
+    BIG_XXX_copy(a,e);
+    BIG_XXX_mod(a,m);
+
+    BIG_XXX_copy(b,e);
+    BIG_XXX_sdiv(b,m);
+
+    FP48_YYY_trace(c,&g1);
+
+    if (BIG_XXX_iszilch(b))
+    {
+        FP16_YYY_xtr_pow(c,c,e);
+        return;
+    }
+
+    FP48_YYY_frob(&g2,&f,1);
+    FP48_YYY_trace(&cp,&g2);
+    FP48_YYY_conj(&g1,&g1);
+    FP48_YYY_mul(&g2,&g1);
+    FP48_YYY_trace(&cpm1,&g2);
+    FP48_YYY_mul(&g2,&g1);
+
+    FP48_YYY_trace(&cpm2,&g2);
+
+    FP16_YYY_xtr_pow2(c,&cp,c,&cpm1,&cpm2,a,b);
+}
+
+/* Note this is simple square and multiply, so not side-channel safe */
+
+void FP48_YYY_pow(FP48_YYY *r,FP48_YYY *a,BIG_XXX b)
+{
+    FP48_YYY w;
+    BIG_XXX b3;
+    int i,nb,bt;
+    BIG_XXX_norm(b);
+    BIG_XXX_pmul(b3,b,3);
+    BIG_XXX_norm(b3);
+
+    FP48_YYY_copy(&w,a);
+
+    nb=BIG_XXX_nbits(b3);
+    for (i=nb-2; i>=1; i--)
+    {
+        FP48_YYY_usqr(&w,&w);
+        bt=BIG_XXX_bit(b3,i)-BIG_XXX_bit(b,i);
+        if (bt==1)
+            FP48_YYY_mul(&w,a);
+        if (bt==-1)
+        {
+            FP48_YYY_conj(a,a);
+            FP48_YYY_mul(&w,a);
+            FP48_YYY_conj(a,a);
+        }
+    }
+
+    FP48_YYY_copy(r,&w);
+    FP48_YYY_reduce(r);
+}
+
+/* p=q0^u0.q1^u1.q2^u2.q3^u3... */
+/* Side channel attack secure */
+// Bos & Costello https://eprint.iacr.org/2013/458.pdf
+// Faz-Hernandez & Longa & Sanchez  https://eprint.iacr.org/2013/158.pdf
+
+void FP48_YYY_pow16(FP48_YYY *p,FP48_YYY *q,BIG_XXX u[16])
+{
+    int i,j,k,nb,pb1,pb2,pb3,pb4,bt;
+    FP48_YYY g1[8],g2[8],g3[8],g4[8],r;
+    BIG_XXX t[16],mt;
+    sign8 w1[NLEN_XXX*BASEBITS_XXX+1];
+    sign8 s1[NLEN_XXX*BASEBITS_XXX+1];
+    sign8 w2[NLEN_XXX*BASEBITS_XXX+1];
+    sign8 s2[NLEN_XXX*BASEBITS_XXX+1];
+    sign8 w3[NLEN_XXX*BASEBITS_XXX+1];
+    sign8 s3[NLEN_XXX*BASEBITS_XXX+1];
+    sign8 w4[NLEN_XXX*BASEBITS_XXX+1];
+    sign8 s4[NLEN_XXX*BASEBITS_XXX+1];
+    FP_YYY fx,fy;
+    FP2_YYY  X;
+
+    FP_YYY_rcopy(&fx,Fra_YYY);
+    FP_YYY_rcopy(&fy,Frb_YYY);
+    FP2_YYY_from_FPs(&X,&fx,&fy);
+
+    for (i=0; i<16; i++)
+        BIG_XXX_copy(t[i],u[i]);
+
+    // Precomputed table
+    FP48_YYY_copy(&g1[0],&q[0]); // q[0]
+    FP48_YYY_copy(&g1[1],&g1[0]);
+    FP48_YYY_mul(&g1[1],&q[1]);	// q[0].q[1]
+    FP48_YYY_copy(&g1[2],&g1[0]);
+    FP48_YYY_mul(&g1[2],&q[2]);	// q[0].q[2]
+    FP48_YYY_copy(&g1[3],&g1[1]);
+    FP48_YYY_mul(&g1[3],&q[2]);	// q[0].q[1].q[2]
+    FP48_YYY_copy(&g1[4],&g1[0]);
+    FP48_YYY_mul(&g1[4],&q[3]);  // q[0].q[3]
+    FP48_YYY_copy(&g1[5],&g1[1]);
+    FP48_YYY_mul(&g1[5],&q[3]);	// q[0].q[1].q[3]
+    FP48_YYY_copy(&g1[6],&g1[2]);
+    FP48_YYY_mul(&g1[6],&q[3]);	// q[0].q[2].q[3]
+    FP48_YYY_copy(&g1[7],&g1[3]);
+    FP48_YYY_mul(&g1[7],&q[3]);	// q[0].q[1].q[2].q[3]
+
+    // Use Frobenius
+
+    for (i=0; i<8; i++)
+    {
+        FP48_YYY_copy(&g2[i],&g1[i]);
+        FP48_YYY_frob(&g2[i],&X,4);
+
+        FP48_YYY_copy(&g3[i],&g2[i]);
+        FP48_YYY_frob(&g3[i],&X,4);
+
+        FP48_YYY_copy(&g4[i],&g3[i]);
+        FP48_YYY_frob(&g4[i],&X,4);
+    }
+
+    // Make them odd
+    pb1=1-BIG_XXX_parity(t[0]);
+    BIG_XXX_inc(t[0],pb1);
+    BIG_XXX_norm(t[0]);
+
+    pb2=1-BIG_XXX_parity(t[4]);
+    BIG_XXX_inc(t[4],pb2);
+    BIG_XXX_norm(t[4]);
+
+    pb3=1-BIG_XXX_parity(t[8]);
+    BIG_XXX_inc(t[8],pb3);
+    BIG_XXX_norm(t[8]);
+
+    pb4=1-BIG_XXX_parity(t[12]);
+    BIG_XXX_inc(t[12],pb4);
+    BIG_XXX_norm(t[12]);
+
+    // Number of bits
+    BIG_XXX_zero(mt);
+    for (i=0; i<16; i++)
+    {
+        BIG_XXX_or(mt,mt,t[i]);
+    }
+    nb=1+BIG_XXX_nbits(mt);
+
+    // Sign pivot
+    s1[nb-1]=1;
+    s2[nb-1]=1;
+    s3[nb-1]=1;
+    s4[nb-1]=1;
+    for (i=0; i<nb-1; i++)
+    {
+        BIG_XXX_fshr(t[0],1);
+        s1[i]=2*BIG_XXX_parity(t[0])-1;
+        BIG_XXX_fshr(t[4],1);
+        s2[i]=2*BIG_XXX_parity(t[4])-1;
+        BIG_XXX_fshr(t[8],1);
+        s3[i]=2*BIG_XXX_parity(t[8])-1;
+        BIG_XXX_fshr(t[12],1);
+        s4[i]=2*BIG_XXX_parity(t[12])-1;
+    }
+
+    // Recoded exponents
+    for (i=0; i<nb; i++)
+    {
+        w1[i]=0;
+        k=1;
+        for (j=1; j<4; j++)
+        {
+            bt=s1[i]*BIG_XXX_parity(t[j]);
+            BIG_XXX_fshr(t[j],1);
+
+            BIG_XXX_dec(t[j],(bt>>1));
+            BIG_XXX_norm(t[j]);
+            w1[i]+=bt*k;
+            k*=2;
+        }
+
+        w2[i]=0;
+        k=1;
+        for (j=5; j<8; j++)
+        {
+            bt=s2[i]*BIG_XXX_parity(t[j]);
+            BIG_XXX_fshr(t[j],1);
+
+            BIG_XXX_dec(t[j],(bt>>1));
+            BIG_XXX_norm(t[j]);
+            w2[i]+=bt*k;
+            k*=2;
+        }
+
+        w3[i]=0;
+        k=1;
+        for (j=9; j<12; j++)
+        {
+            bt=s3[i]*BIG_XXX_parity(t[j]);
+            BIG_XXX_fshr(t[j],1);
+
+            BIG_XXX_dec(t[j],(bt>>1));
+            BIG_XXX_norm(t[j]);
+            w3[i]+=bt*k;
+            k*=2;
+        }
+
+        w4[i]=0;
+        k=1;
+        for (j=13; j<16; j++)
+        {
+            bt=s4[i]*BIG_XXX_parity(t[j]);
+            BIG_XXX_fshr(t[j],1);
+
+            BIG_XXX_dec(t[j],(bt>>1));
+            BIG_XXX_norm(t[j]);
+            w4[i]+=bt*k;
+            k*=2;
+        }
+    }
+
+    // Main loop
+    FP48_YYY_select(p,g1,2*w1[nb-1]+1);
+    FP48_YYY_select(&r,g2,2*w2[nb-1]+1);
+    FP48_YYY_mul(p,&r);
+    FP48_YYY_select(&r,g3,2*w3[nb-1]+1);
+    FP48_YYY_mul(p,&r);
+    FP48_YYY_select(&r,g4,2*w4[nb-1]+1);
+    FP48_YYY_mul(p,&r);
+    for (i=nb-2; i>=0; i--)
+    {
+        FP48_YYY_usqr(p,p);
+        FP48_YYY_select(&r,g1,2*w1[i]+s1[i]);
+        FP48_YYY_mul(p,&r);
+        FP48_YYY_select(&r,g2,2*w2[i]+s2[i]);
+        FP48_YYY_mul(p,&r);
+        FP48_YYY_select(&r,g3,2*w3[i]+s3[i]);
+        FP48_YYY_mul(p,&r);
+        FP48_YYY_select(&r,g4,2*w4[i]+s4[i]);
+        FP48_YYY_mul(p,&r);
+    }
+
+    // apply correction
+    FP48_YYY_conj(&r,&q[0]);
+    FP48_YYY_mul(&r,p);
+    FP48_YYY_cmove(p,&r,pb1);
+    FP48_YYY_conj(&r,&q[4]);
+    FP48_YYY_mul(&r,p);
+    FP48_YYY_cmove(p,&r,pb2);
+
+    FP48_YYY_conj(&r,&q[8]);
+    FP48_YYY_mul(&r,p);
+    FP48_YYY_cmove(p,&r,pb3);
+    FP48_YYY_conj(&r,&q[12]);
+    FP48_YYY_mul(&r,p);
+    FP48_YYY_cmove(p,&r,pb4);
+
+    FP48_YYY_reduce(p);
+}
+
+/* Set w=w^p using Frobenius */
+/* SU= 160 */
+void FP48_YYY_frob(FP48_YYY *w,FP2_YYY  *f,int n)
+{
+    int i;
+    FP2_YYY f3,f2;				// f=(1+i)^(p-19)/24
+    FP2_YYY_sqr(&f2,f);     //
+    FP2_YYY_mul(&f3,&f2,f); // f3=f^3=(1+i)^(p-19)/8
+
+    FP2_YYY_mul_ip(&f3);
+    FP2_YYY_norm(&f3);
+    FP2_YYY_mul_ip(&f3);    // f3 = (1+i)^16/8.(1+i)^(p-19)/8 = (1+i)^(p-3)/8
+    FP2_YYY_norm(&f3);
+
+    for (i=0; i<n; i++)
+    {
+        FP16_YYY_frob(&(w->a),&f3);   // a=a^p
+        FP16_YYY_frob(&(w->b),&f3);   // b=b^p
+        FP16_YYY_frob(&(w->c),&f3);   // c=c^p
+
+        FP16_YYY_qmul(&(w->b),&(w->b),f);
+        FP16_YYY_times_i4(&(w->b));
+        FP16_YYY_times_i2(&(w->b));
+        FP16_YYY_qmul(&(w->c),&(w->c),&f2);
+        FP16_YYY_times_i4(&(w->c));
+        FP16_YYY_times_i4(&(w->c));
+        FP16_YYY_times_i4(&(w->c));
+    }
+}
+
+/* SU= 8 */
+/* normalise all components of w */
+void FP48_YYY_norm(FP48_YYY *w)
+{
+    FP16_YYY_norm(&(w->a));
+    FP16_YYY_norm(&(w->b));
+    FP16_YYY_norm(&(w->c));
+}
+
+/* SU= 8 */
+/* reduce all components of w */
+void FP48_YYY_reduce(FP48_YYY *w)
+{
+    FP16_YYY_reduce(&(w->a));
+    FP16_YYY_reduce(&(w->b));
+    FP16_YYY_reduce(&(w->c));
+}
+
+/* trace function w=trace(x) */
+/* SU= 8 */
+void FP48_YYY_trace(FP16_YYY *w,FP48_YYY *x)
+{
+    FP16_YYY_imul(w,&(x->a),3);
+    FP16_YYY_reduce(w);
+}
+
+/* SU= 8 */
+/* Output w in hex */
+void FP48_YYY_output(FP48_YYY *w)
+{
+    printf("[");
+    FP16_YYY_output(&(w->a));
+    printf(",");
+    FP16_YYY_output(&(w->b));
+    printf(",");
+    FP16_YYY_output(&(w->c));
+    printf("]");
+}
+
+/* Convert g to octet string w */
+void FP48_YYY_toOctet(octet *W,FP48_YYY *g)
+{
+    BIG_XXX a;
+    W->len=48*MODBYTES_XXX;
+
+    FP_YYY_redc(a,&(g->a.a.a.a.a));
+    BIG_XXX_toBytes(&(W->val[0]),a);
+    FP_YYY_redc(a,&(g->a.a.a.a.b));
+    BIG_XXX_toBytes(&(W->val[MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->a.a.a.b.a));
+    BIG_XXX_toBytes(&(W->val[2*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->a.a.a.b.b));
+    BIG_XXX_toBytes(&(W->val[3*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->a.a.b.a.a));
+    BIG_XXX_toBytes(&(W->val[4*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->a.a.b.a.b));
+    BIG_XXX_toBytes(&(W->val[5*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->a.a.b.b.a));
+    BIG_XXX_toBytes(&(W->val[6*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->a.a.b.b.b));
+    BIG_XXX_toBytes(&(W->val[7*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->a.b.a.a.a));
+    BIG_XXX_toBytes(&(W->val[8*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->a.b.a.a.b));
+    BIG_XXX_toBytes(&(W->val[9*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->a.b.a.b.a));
+    BIG_XXX_toBytes(&(W->val[10*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->a.b.a.b.b));
+    BIG_XXX_toBytes(&(W->val[11*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->a.b.b.a.a));
+    BIG_XXX_toBytes(&(W->val[12*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->a.b.b.a.b));
+    BIG_XXX_toBytes(&(W->val[13*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->a.b.b.b.a));
+    BIG_XXX_toBytes(&(W->val[14*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->a.b.b.b.b));
+    BIG_XXX_toBytes(&(W->val[15*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->b.a.a.a.a));
+    BIG_XXX_toBytes(&(W->val[16*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->b.a.a.a.b));
+    BIG_XXX_toBytes(&(W->val[17*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->b.a.a.b.a));
+    BIG_XXX_toBytes(&(W->val[18*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->b.a.a.b.b));
+    BIG_XXX_toBytes(&(W->val[19*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->b.a.b.a.a));
+    BIG_XXX_toBytes(&(W->val[20*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->b.a.b.a.b));
+    BIG_XXX_toBytes(&(W->val[21*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->b.a.b.b.a));
+    BIG_XXX_toBytes(&(W->val[22*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->b.a.b.b.b));
+    BIG_XXX_toBytes(&(W->val[23*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->b.b.a.a.a));
+    BIG_XXX_toBytes(&(W->val[24*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->b.b.a.a.b));
+    BIG_XXX_toBytes(&(W->val[25*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->b.b.a.b.a));
+    BIG_XXX_toBytes(&(W->val[26*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->b.b.a.b.b));
+    BIG_XXX_toBytes(&(W->val[27*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->b.b.b.a.a));
+    BIG_XXX_toBytes(&(W->val[28*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->b.b.b.a.b));
+    BIG_XXX_toBytes(&(W->val[29*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->b.b.b.b.a));
+    BIG_XXX_toBytes(&(W->val[30*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->b.b.b.b.b));
+    BIG_XXX_toBytes(&(W->val[31*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->c.a.a.a.a));
+    BIG_XXX_toBytes(&(W->val[32*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->c.a.a.a.b));
+    BIG_XXX_toBytes(&(W->val[33*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->c.a.a.b.a));
+    BIG_XXX_toBytes(&(W->val[34*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->c.a.a.b.b));
+    BIG_XXX_toBytes(&(W->val[35*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->c.a.b.a.a));
+    BIG_XXX_toBytes(&(W->val[36*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->c.a.b.a.b));
+    BIG_XXX_toBytes(&(W->val[37*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->c.a.b.b.a));
+    BIG_XXX_toBytes(&(W->val[38*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->c.a.b.b.b));
+    BIG_XXX_toBytes(&(W->val[39*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->c.b.a.a.a));
+    BIG_XXX_toBytes(&(W->val[40*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->c.b.a.a.b));
+    BIG_XXX_toBytes(&(W->val[41*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->c.b.a.b.a));
+    BIG_XXX_toBytes(&(W->val[42*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->c.b.a.b.b));
+    BIG_XXX_toBytes(&(W->val[43*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->c.b.b.a.a));
+    BIG_XXX_toBytes(&(W->val[44*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->c.b.b.a.b));
+    BIG_XXX_toBytes(&(W->val[45*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->c.b.b.b.a));
+    BIG_XXX_toBytes(&(W->val[46*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->c.b.b.b.b));
+    BIG_XXX_toBytes(&(W->val[47*MODBYTES_XXX]),a);
+
+}
+
+/* Restore g from octet string w */
+void FP48_YYY_fromOctet(FP48_YYY *g,octet *W)
+{
+    BIG_XXX b;
+
+    BIG_XXX_fromBytes(b,&W->val[0]);
+    FP_YYY_nres(&(g->a.a.a.a.a),b);
+    BIG_XXX_fromBytes(b,&W->val[MODBYTES_XXX]);
+    FP_YYY_nres(&(g->a.a.a.a.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[2*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->a.a.a.b.a),b);
+    BIG_XXX_fromBytes(b,&W->val[3*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->a.a.a.b.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[4*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->a.a.b.a.a),b);
+    BIG_XXX_fromBytes(b,&W->val[5*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->a.a.b.a.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[6*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->a.a.b.b.a),b);
+    BIG_XXX_fromBytes(b,&W->val[7*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->a.a.b.b.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[8*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->a.b.a.a.a),b);
+    BIG_XXX_fromBytes(b,&W->val[9*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->a.b.a.a.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[10*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->a.b.a.b.a),b);
+    BIG_XXX_fromBytes(b,&W->val[11*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->a.b.a.b.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[12*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->a.b.b.a.a),b);
+    BIG_XXX_fromBytes(b,&W->val[13*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->a.b.b.a.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[14*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->a.b.b.b.a),b);
+    BIG_XXX_fromBytes(b,&W->val[15*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->a.b.b.b.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[16*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->b.a.a.a.a),b);
+    BIG_XXX_fromBytes(b,&W->val[17*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->b.a.a.a.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[18*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->b.a.a.b.a),b);
+    BIG_XXX_fromBytes(b,&W->val[19*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->b.a.a.b.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[20*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->b.a.b.a.a),b);
+    BIG_XXX_fromBytes(b,&W->val[21*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->b.a.b.a.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[22*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->b.a.b.b.a),b);
+    BIG_XXX_fromBytes(b,&W->val[23*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->b.a.b.b.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[24*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->b.b.a.a.a),b);
+    BIG_XXX_fromBytes(b,&W->val[25*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->b.b.a.a.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[26*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->b.b.a.b.a),b);
+    BIG_XXX_fromBytes(b,&W->val[27*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->b.b.a.b.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[28*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->b.b.b.a.a),b);
+    BIG_XXX_fromBytes(b,&W->val[29*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->b.b.b.a.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[30*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->b.b.b.b.a),b);
+    BIG_XXX_fromBytes(b,&W->val[31*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->b.b.b.b.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[32*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->c.a.a.a.a),b);
+    BIG_XXX_fromBytes(b,&W->val[33*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->c.a.a.a.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[34*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->c.a.a.b.a),b);
+    BIG_XXX_fromBytes(b,&W->val[35*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->c.a.a.b.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[36*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->c.a.b.a.a),b);
+    BIG_XXX_fromBytes(b,&W->val[37*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->c.a.b.a.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[38*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->c.a.b.b.a),b);
+    BIG_XXX_fromBytes(b,&W->val[39*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->c.a.b.b.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[40*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->c.b.a.a.a),b);
+    BIG_XXX_fromBytes(b,&W->val[41*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->c.b.a.a.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[42*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->c.b.a.b.a),b);
+    BIG_XXX_fromBytes(b,&W->val[43*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->c.b.a.b.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[44*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->c.b.b.a.a),b);
+    BIG_XXX_fromBytes(b,&W->val[45*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->c.b.b.a.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[46*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->c.b.b.b.a),b);
+    BIG_XXX_fromBytes(b,&W->val[47*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->c.b.b.b.b),b);
+
+}
+
+/* Move b to a if d=1 */
+void FP48_YYY_cmove(FP48_YYY *f,FP48_YYY *g,int d)
+{
+    FP16_YYY_cmove(&(f->a),&(g->a),d);
+    FP16_YYY_cmove(&(f->b),&(g->b),d);
+    FP16_YYY_cmove(&(f->c),&(g->c),d);
+}
+
+#ifdef HAS_MAIN
+int main()
+{
+    int i;
+    FP2_YYY f,w0,w1,X;
+    FP4_YYY f0,f1;
+    FP16_YYY t0,t1,t2;
+    FP48_YYY w,t,lv;
+    BIG_XXX a,b;
+    BIG_XXX p;
+
+    char raw[100];
+    csprng RNG;                // Crypto Strong RNG
+
+    for (i=0; i<100; i++) raw[i]=i;
+
+    BIG_XXX_rcopy(a,Fra_YYY);
+    BIG_XXX_rcopy(b,Frb_YYY);
+    FP2_YYY_from_BIGs(&X,a,b);
+
+    RAND_seed(&RNG,100,raw);   // initialise strong RNG
+
+    BIG_XXX_rcopy(p,Modulus);
+
+    BIG_XXX_randomnum(a,p,&RNG);
+    BIG_XXX_randomnum(b,p,&RNG);
+    FP2_YYY_from_BIGs(&w0,a,b);
+
+    BIG_XXX_randomnum(a,p,&RNG);
+    BIG_XXX_randomnum(b,p,&RNG);
+    FP2_YYY_from_BIGs(&w1,a,b);
+
+    FP4_YYY_from_FP2s(&f0,&w0,&w1);
+
+    BIG_XXX_randomnum(a,p,&RNG);
+    BIG_XXX_randomnum(b,p,&RNG);
+    FP2_YYY_from_BIGs(&w0,a,b);
+
+    BIG_XXX_randomnum(a,p,&RNG);
+    BIG_XXX_randomnum(b,p,&RNG);
+    FP2_YYY_from_BIGs(&w1,a,b);
+
+    FP4_YYY_from_FP2s(&f1,&w0,&w1);
+    FP16_YYY_from_FP4s(&t0,&f0,&f1);
+
+    BIG_XXX_randomnum(a,p,&RNG);
+    BIG_XXX_randomnum(b,p,&RNG);
+    FP2_YYY_from_BIGs(&w0,a,b);
+
+    BIG_XXX_randomnum(a,p,&RNG);
+    BIG_XXX_randomnum(b,p,&RNG);
+    FP2_YYY_from_BIGs(&w1,a,b);
+
+    FP4_YYY_from_FP2s(&f0,&w0,&w1);
+
+    BIG_XXX_randomnum(a,p,&RNG);
+    BIG_XXX_randomnum(b,p,&RNG);
+    FP2_YYY_from_BIGs(&w0,a,b);
+
+    BIG_XXX_randomnum(a,p,&RNG);
+    BIG_XXX_randomnum(b,p,&RNG);
+    FP2_YYY_from_BIGs(&w1,a,b);
+
+    FP4_YYY_from_FP2s(&f1,&w0,&w1);
+    FP16_YYY_from_FP4s(&t1,&f0,&f1);
+
+    BIG_XXX_randomnum(a,p,&RNG);
+    BIG_XXX_randomnum(b,p,&RNG);
+    FP2_YYY_from_BIGs(&w0,a,b);
+
+    BIG_XXX_randomnum(a,p,&RNG);
+    BIG_XXX_randomnum(b,p,&RNG);
+    FP2_YYY_from_BIGs(&w1,a,b);
+
+    FP4_YYY_from_FP2s(&f0,&w0,&w1);
+
+    BIG_XXX_randomnum(a,p,&RNG);
+    BIG_XXX_randomnum(b,p,&RNG);
+    FP2_YYY_from_BIGs(&w0,a,b);
+
+    BIG_XXX_randomnum(a,p,&RNG);
+    BIG_XXX_randomnum(b,p,&RNG);
+    FP2_YYY_from_BIGs(&w1,a,b);
+
+    FP4_YYY_from_FP2s(&f1,&w0,&w1);
+    FP16_YYY_from_FP4s(&t2,&f0,&f1);
+
+    FP48_YYY_from_FP16s(&w,&t0,&t1,&t2);
+
+
+    FP48_YYY_copy(&t,&w);
+
+    printf("w= ");
+    FP48_YYY_output(&w);
+    printf("\n");
+
+    FP48_YYY_norm(&w);
+
+    printf("w^p= ");
+    FP48_YYY_frob(&w,&X);
+    FP48_YYY_output(&w);
+    printf("\n");
+
+    printf("1/w= ");
+    FP48_YYY_inv(&t,&w);
+    FP48_YYY_output(&t);
+    printf("\n");
+
+    printf("w= ");
+    FP48_YYY_inv(&w,&t);
+    FP48_YYY_output(&w);
+    printf("\n");
+
+    return 0;
+}
+#endif