diff --git a/app/src/main/java/com/wireguard/crypto/Curve25519.java b/app/src/main/java/com/wireguard/crypto/Curve25519.java index fdc8963..5d27c3e 100644 --- a/app/src/main/java/com/wireguard/crypto/Curve25519.java +++ b/app/src/main/java/com/wireguard/crypto/Curve25519.java @@ -41,495 +41,495 @@ import java.util.Arrays; @SuppressWarnings("ALL") public final class Curve25519 { - // Numbers modulo 2^255 - 19 are broken up into ten 26-bit words. - private static final int NUM_LIMBS_255BIT = 10; - private static final int NUM_LIMBS_510BIT = 20; - private int[] x_1; - private int[] x_2; - private int[] x_3; - private int[] z_2; - private int[] z_3; - private int[] A; - private int[] B; - private int[] C; - private int[] D; - private int[] E; - private int[] AA; - private int[] BB; - private int[] DA; - private int[] CB; - private long[] t1; - private int[] t2; - - /** - * Constructs the temporary state holder for Curve25519 evaluation. - */ - private Curve25519() - { - // Allocate memory for all of the temporary variables we will need. - x_1 = new int [NUM_LIMBS_255BIT]; - x_2 = new int [NUM_LIMBS_255BIT]; - x_3 = new int [NUM_LIMBS_255BIT]; - z_2 = new int [NUM_LIMBS_255BIT]; - z_3 = new int [NUM_LIMBS_255BIT]; - A = new int [NUM_LIMBS_255BIT]; - B = new int [NUM_LIMBS_255BIT]; - C = new int [NUM_LIMBS_255BIT]; - D = new int [NUM_LIMBS_255BIT]; - E = new int [NUM_LIMBS_255BIT]; - AA = new int [NUM_LIMBS_255BIT]; - BB = new int [NUM_LIMBS_255BIT]; - DA = new int [NUM_LIMBS_255BIT]; - CB = new int [NUM_LIMBS_255BIT]; - t1 = new long [NUM_LIMBS_510BIT]; - t2 = new int [NUM_LIMBS_510BIT]; - } - - - /** - * Destroy all sensitive data in this object. - */ - private void destroy() { - // Destroy all temporary variables. - Arrays.fill(x_1, 0); - Arrays.fill(x_2, 0); - Arrays.fill(x_3, 0); - Arrays.fill(z_2, 0); - Arrays.fill(z_3, 0); - Arrays.fill(A, 0); - Arrays.fill(B, 0); - Arrays.fill(C, 0); - Arrays.fill(D, 0); - Arrays.fill(E, 0); - Arrays.fill(AA, 0); - Arrays.fill(BB, 0); - Arrays.fill(DA, 0); - Arrays.fill(CB, 0); - Arrays.fill(t1, 0L); - Arrays.fill(t2, 0); - } - - /** - * Reduces a number modulo 2^255 - 19 where it is known that the - * number can be reduced with only 1 trial subtraction. - * - * @param x The number to reduce, and the result. - */ - private void reduceQuick(int[] x) - { - int index, carry; - - // Perform a trial subtraction of (2^255 - 19) from "x" which is - // equivalent to adding 19 and subtracting 2^255. We add 19 here; - // the subtraction of 2^255 occurs in the next step. - carry = 19; - for (index = 0; index < NUM_LIMBS_255BIT; ++index) { - carry += x[index]; - t2[index] = carry & 0x03FFFFFF; - carry >>= 26; - } - - // If there was a borrow, then the original "x" is the correct answer. - // If there was no borrow, then "t2" is the correct answer. Select the - // correct answer but do it in a way that instruction timing will not - // reveal which value was selected. Borrow will occur if bit 21 of - // "t2" is zero. Turn the bit into a selection mask. - int mask = -((t2[NUM_LIMBS_255BIT - 1] >> 21) & 0x01); - int nmask = ~mask; - t2[NUM_LIMBS_255BIT - 1] &= 0x001FFFFF; - for (index = 0; index < NUM_LIMBS_255BIT; ++index) - x[index] = (x[index] & nmask) | (t2[index] & mask); - } - - /** - * Reduce a number modulo 2^255 - 19. - * - * @param result The result. - * @param x The value to be reduced. This array will be - * modified during the reduction. - * @param size The number of limbs in the high order half of x. - */ - private void reduce(int[] result, int[] x, int size) - { - int index, limb, carry; - - // Calculate (x mod 2^255) + ((x / 2^255) * 19) which will - // either produce the answer we want or it will produce a - // value of the form "answer + j * (2^255 - 19)". There are - // 5 left-over bits in the top-most limb of the bottom half. - carry = 0; - limb = x[NUM_LIMBS_255BIT - 1] >> 21; - x[NUM_LIMBS_255BIT - 1] &= 0x001FFFFF; - for (index = 0; index < size; ++index) { - limb += x[NUM_LIMBS_255BIT + index] << 5; - carry += (limb & 0x03FFFFFF) * 19 + x[index]; - x[index] = carry & 0x03FFFFFF; - limb >>= 26; - carry >>= 26; - } - if (size < NUM_LIMBS_255BIT) { - // The high order half of the number is short; e.g. for mulA24(). - // Propagate the carry through the rest of the low order part. - for (index = size; index < NUM_LIMBS_255BIT; ++index) { - carry += x[index]; - x[index] = carry & 0x03FFFFFF; - carry >>= 26; - } - } - - // The "j" value may still be too large due to the final carry-out. - // We must repeat the reduction. If we already have the answer, - // then this won't do any harm but we must still do the calculation - // to preserve the overall timing. The "j" value will be between - // 0 and 19, which means that the carry we care about is in the - // top 5 bits of the highest limb of the bottom half. - carry = (x[NUM_LIMBS_255BIT - 1] >> 21) * 19; - x[NUM_LIMBS_255BIT - 1] &= 0x001FFFFF; - for (index = 0; index < NUM_LIMBS_255BIT; ++index) { - carry += x[index]; - result[index] = carry & 0x03FFFFFF; - carry >>= 26; - } - - // At this point "x" will either be the answer or it will be the - // answer plus (2^255 - 19). Perform a trial subtraction to - // complete the reduction process. - reduceQuick(result); - } - - /** - * Multiplies two numbers modulo 2^255 - 19. - * - * @param result The result. - * @param x The first number to multiply. - * @param y The second number to multiply. - */ - private void mul(int[] result, int[] x, int[] y) - { - int i, j; - - // Multiply the two numbers to create the intermediate result. - long v = x[0]; - for (i = 0; i < NUM_LIMBS_255BIT; ++i) { - t1[i] = v * y[i]; - } - for (i = 1; i < NUM_LIMBS_255BIT; ++i) { - v = x[i]; - for (j = 0; j < (NUM_LIMBS_255BIT - 1); ++j) { - t1[i + j] += v * y[j]; - } - t1[i + NUM_LIMBS_255BIT - 1] = v * y[NUM_LIMBS_255BIT - 1]; - } - - // Propagate carries and convert back into 26-bit words. - v = t1[0]; - t2[0] = ((int)v) & 0x03FFFFFF; - for (i = 1; i < NUM_LIMBS_510BIT; ++i) { - v = (v >> 26) + t1[i]; - t2[i] = ((int)v) & 0x03FFFFFF; - } - - // Reduce the result modulo 2^255 - 19. - reduce(result, t2, NUM_LIMBS_255BIT); - } - - /** - * Squares a number modulo 2^255 - 19. - * - * @param result The result. - * @param x The number to square. - */ - private void square(int[] result, int[] x) - { - mul(result, x, x); - } - - /** - * Multiplies a number by the a24 constant, modulo 2^255 - 19. - * - * @param result The result. - * @param x The number to multiply by a24. - */ - private void mulA24(int[] result, int[] x) - { - long a24 = 121665; - long carry = 0; - int index; - for (index = 0; index < NUM_LIMBS_255BIT; ++index) { - carry += a24 * x[index]; - t2[index] = ((int)carry) & 0x03FFFFFF; - carry >>= 26; - } - t2[NUM_LIMBS_255BIT] = ((int)carry) & 0x03FFFFFF; - reduce(result, t2, 1); - } - - /** - * Adds two numbers modulo 2^255 - 19. - * - * @param result The result. - * @param x The first number to add. - * @param y The second number to add. - */ - private void add(int[] result, int[] x, int[] y) - { - int index, carry; - carry = x[0] + y[0]; - result[0] = carry & 0x03FFFFFF; - for (index = 1; index < NUM_LIMBS_255BIT; ++index) { - carry = (carry >> 26) + x[index] + y[index]; - result[index] = carry & 0x03FFFFFF; - } - reduceQuick(result); - } - - /** - * Subtracts two numbers modulo 2^255 - 19. - * - * @param result The result. - * @param x The first number to subtract. - * @param y The second number to subtract. - */ - private void sub(int[] result, int[] x, int[] y) - { - int index, borrow; - - // Subtract y from x to generate the intermediate result. - borrow = 0; - for (index = 0; index < NUM_LIMBS_255BIT; ++index) { - borrow = x[index] - y[index] - ((borrow >> 26) & 0x01); - result[index] = borrow & 0x03FFFFFF; - } - - // If we had a borrow, then the result has gone negative and we - // have to add 2^255 - 19 to the result to make it positive again. - // The top bits of "borrow" will be all 1's if there is a borrow - // or it will be all 0's if there was no borrow. Easiest is to - // conditionally subtract 19 and then mask off the high bits. - borrow = result[0] - ((-((borrow >> 26) & 0x01)) & 19); - result[0] = borrow & 0x03FFFFFF; - for (index = 1; index < NUM_LIMBS_255BIT; ++index) { - borrow = result[index] - ((borrow >> 26) & 0x01); - result[index] = borrow & 0x03FFFFFF; - } - result[NUM_LIMBS_255BIT - 1] &= 0x001FFFFF; - } - - /** - * Conditional swap of two values. - * - * @param select Set to 1 to swap, 0 to leave as-is. - * @param x The first value. - * @param y The second value. - */ - private static void cswap(int select, int[] x, int[] y) - { - int dummy; - select = -select; - for (int index = 0; index < NUM_LIMBS_255BIT; ++index) { - dummy = select & (x[index] ^ y[index]); - x[index] ^= dummy; - y[index] ^= dummy; - } - } - - /** - * Raise x to the power of (2^250 - 1). - * - * @param result The result. Must not overlap with x. - * @param x The argument. - */ - private void pow250(int[] result, int[] x) - { - int i, j; - - // The big-endian hexadecimal expansion of (2^250 - 1) is: - // 03FFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF - // - // The naive implementation needs to do 2 multiplications per 1 bit and - // 1 multiplication per 0 bit. We can improve upon this by creating a - // pattern 0000000001 ... 0000000001. If we square and multiply the - // pattern by itself we can turn the pattern into the partial results - // 0000000011 ... 0000000011, 0000000111 ... 0000000111, etc. - // This averages out to about 1.1 multiplications per 1 bit instead of 2. - - // Build a pattern of 250 bits in length of repeated copies of 0000000001. - square(A, x); - for (j = 0; j < 9; ++j) - square(A, A); - mul(result, A, x); - for (i = 0; i < 23; ++i) { - for (j = 0; j < 10; ++j) - square(A, A); - mul(result, result, A); - } - - // Multiply bit-shifted versions of the 0000000001 pattern into - // the result to "fill in" the gaps in the pattern. - square(A, result); - mul(result, result, A); - for (j = 0; j < 8; ++j) { - square(A, A); - mul(result, result, A); - } - } - - /** - * Computes the reciprocal of a number modulo 2^255 - 19. - * - * @param result The result. Must not overlap with x. - * @param x The argument. - */ - private void recip(int[] result, int[] x) - { - // The reciprocal is the same as x ^ (p - 2) where p = 2^255 - 19. - // The big-endian hexadecimal expansion of (p - 2) is: - // 7FFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFEB - // Start with the 250 upper bits of the expansion of (p - 2). - pow250(result, x); - - // Deal with the 5 lowest bits of (p - 2), 01011, from highest to lowest. - square(result, result); - square(result, result); - mul(result, result, x); - square(result, result); - square(result, result); - mul(result, result, x); - square(result, result); - mul(result, result, x); - } - - /** - * Evaluates the curve for every bit in a secret key. - * - * @param s The 32-byte secret key. - */ - private void evalCurve(byte[] s) - { - int sposn = 31; - int sbit = 6; - int svalue = s[sposn] | 0x40; - int swap = 0; - int select; - - // Iterate over all 255 bits of "s" from the highest to the lowest. - // We ignore the high bit of the 256-bit representation of "s". - for (;;) { - // Conditional swaps on entry to this bit but only if we - // didn't swap on the previous bit. - select = (svalue >> sbit) & 0x01; - swap ^= select; - cswap(swap, x_2, x_3); - cswap(swap, z_2, z_3); - swap = select; - - // Evaluate the curve. - add(A, x_2, z_2); // A = x_2 + z_2 - square(AA, A); // AA = A^2 - sub(B, x_2, z_2); // B = x_2 - z_2 - square(BB, B); // BB = B^2 - sub(E, AA, BB); // E = AA - BB - add(C, x_3, z_3); // C = x_3 + z_3 - sub(D, x_3, z_3); // D = x_3 - z_3 - mul(DA, D, A); // DA = D * A - mul(CB, C, B); // CB = C * B - add(x_3, DA, CB); // x_3 = (DA + CB)^2 - square(x_3, x_3); - sub(z_3, DA, CB); // z_3 = x_1 * (DA - CB)^2 - square(z_3, z_3); - mul(z_3, z_3, x_1); - mul(x_2, AA, BB); // x_2 = AA * BB - mulA24(z_2, E); // z_2 = E * (AA + a24 * E) - add(z_2, z_2, AA); - mul(z_2, z_2, E); - - // Move onto the next lower bit of "s". - if (sbit > 0) { - --sbit; - } else if (sposn == 0) { - break; - } else if (sposn == 1) { - --sposn; - svalue = s[sposn] & 0xF8; - sbit = 7; - } else { - --sposn; - svalue = s[sposn]; - sbit = 7; - } - } - - // Final conditional swaps. - cswap(swap, x_2, x_3); - cswap(swap, z_2, z_3); - } - - /** - * Evaluates the Curve25519 curve. - * - * @param result Buffer to place the result of the evaluation into. - * @param offset Offset into the result buffer. - * @param privateKey The private key to use in the evaluation. - * @param publicKey The public key to use in the evaluation, or null - * if the base point of the curve should be used. - */ - public static void eval(byte[] result, int offset, byte[] privateKey, byte[] publicKey) - { - Curve25519 state = new Curve25519(); - try { - // Unpack the public key value. If null, use 9 as the base point. - Arrays.fill(state.x_1, 0); - if (publicKey != null) { - // Convert the input value from little-endian into 26-bit limbs. - for (int index = 0; index < 32; ++index) { - int bit = (index * 8) % 26; - int word = (index * 8) / 26; - int value = publicKey[index] & 0xFF; - if (bit <= (26 - 8)) { - state.x_1[word] |= value << bit; - } else { - state.x_1[word] |= value << bit; - state.x_1[word] &= 0x03FFFFFF; - state.x_1[word + 1] |= value >> (26 - bit); - } - } - - // Just in case, we reduce the number modulo 2^255 - 19 to - // make sure that it is in range of the field before we start. - // This eliminates values between 2^255 - 19 and 2^256 - 1. - state.reduceQuick(state.x_1); - state.reduceQuick(state.x_1); - } else { - state.x_1[0] = 9; - } - - // Initialize the other temporary variables. - Arrays.fill(state.x_2, 0); // x_2 = 1 - state.x_2[0] = 1; - Arrays.fill(state.z_2, 0); // z_2 = 0 - System.arraycopy(state.x_1, 0, state.x_3, 0, state.x_1.length); // x_3 = x_1 - Arrays.fill(state.z_3, 0); // z_3 = 1 - state.z_3[0] = 1; - - // Evaluate the curve for every bit of the private key. - state.evalCurve(privateKey); - - // Compute x_2 * (z_2 ^ (p - 2)) where p = 2^255 - 19. - state.recip(state.z_3, state.z_2); - state.mul(state.x_2, state.x_2, state.z_3); - - // Convert x_2 into little-endian in the result buffer. - for (int index = 0; index < 32; ++index) { - int bit = (index * 8) % 26; - int word = (index * 8) / 26; - if (bit <= (26 - 8)) - result[offset + index] = (byte)(state.x_2[word] >> bit); - else - result[offset + index] = (byte)((state.x_2[word] >> bit) | (state.x_2[word + 1] << (26 - bit))); - } - } finally { - // Clean up all temporary state before we exit. - state.destroy(); - } - } + // Numbers modulo 2^255 - 19 are broken up into ten 26-bit words. + private static final int NUM_LIMBS_255BIT = 10; + private static final int NUM_LIMBS_510BIT = 20; + private int[] x_1; + private int[] x_2; + private int[] x_3; + private int[] z_2; + private int[] z_3; + private int[] A; + private int[] B; + private int[] C; + private int[] D; + private int[] E; + private int[] AA; + private int[] BB; + private int[] DA; + private int[] CB; + private long[] t1; + private int[] t2; + + /** + * Constructs the temporary state holder for Curve25519 evaluation. + */ + private Curve25519() + { + // Allocate memory for all of the temporary variables we will need. + x_1 = new int [NUM_LIMBS_255BIT]; + x_2 = new int [NUM_LIMBS_255BIT]; + x_3 = new int [NUM_LIMBS_255BIT]; + z_2 = new int [NUM_LIMBS_255BIT]; + z_3 = new int [NUM_LIMBS_255BIT]; + A = new int [NUM_LIMBS_255BIT]; + B = new int [NUM_LIMBS_255BIT]; + C = new int [NUM_LIMBS_255BIT]; + D = new int [NUM_LIMBS_255BIT]; + E = new int [NUM_LIMBS_255BIT]; + AA = new int [NUM_LIMBS_255BIT]; + BB = new int [NUM_LIMBS_255BIT]; + DA = new int [NUM_LIMBS_255BIT]; + CB = new int [NUM_LIMBS_255BIT]; + t1 = new long [NUM_LIMBS_510BIT]; + t2 = new int [NUM_LIMBS_510BIT]; + } + + + /** + * Destroy all sensitive data in this object. + */ + private void destroy() { + // Destroy all temporary variables. + Arrays.fill(x_1, 0); + Arrays.fill(x_2, 0); + Arrays.fill(x_3, 0); + Arrays.fill(z_2, 0); + Arrays.fill(z_3, 0); + Arrays.fill(A, 0); + Arrays.fill(B, 0); + Arrays.fill(C, 0); + Arrays.fill(D, 0); + Arrays.fill(E, 0); + Arrays.fill(AA, 0); + Arrays.fill(BB, 0); + Arrays.fill(DA, 0); + Arrays.fill(CB, 0); + Arrays.fill(t1, 0L); + Arrays.fill(t2, 0); + } + + /** + * Reduces a number modulo 2^255 - 19 where it is known that the + * number can be reduced with only 1 trial subtraction. + * + * @param x The number to reduce, and the result. + */ + private void reduceQuick(int[] x) + { + int index, carry; + + // Perform a trial subtraction of (2^255 - 19) from "x" which is + // equivalent to adding 19 and subtracting 2^255. We add 19 here; + // the subtraction of 2^255 occurs in the next step. + carry = 19; + for (index = 0; index < NUM_LIMBS_255BIT; ++index) { + carry += x[index]; + t2[index] = carry & 0x03FFFFFF; + carry >>= 26; + } + + // If there was a borrow, then the original "x" is the correct answer. + // If there was no borrow, then "t2" is the correct answer. Select the + // correct answer but do it in a way that instruction timing will not + // reveal which value was selected. Borrow will occur if bit 21 of + // "t2" is zero. Turn the bit into a selection mask. + int mask = -((t2[NUM_LIMBS_255BIT - 1] >> 21) & 0x01); + int nmask = ~mask; + t2[NUM_LIMBS_255BIT - 1] &= 0x001FFFFF; + for (index = 0; index < NUM_LIMBS_255BIT; ++index) + x[index] = (x[index] & nmask) | (t2[index] & mask); + } + + /** + * Reduce a number modulo 2^255 - 19. + * + * @param result The result. + * @param x The value to be reduced. This array will be + * modified during the reduction. + * @param size The number of limbs in the high order half of x. + */ + private void reduce(int[] result, int[] x, int size) + { + int index, limb, carry; + + // Calculate (x mod 2^255) + ((x / 2^255) * 19) which will + // either produce the answer we want or it will produce a + // value of the form "answer + j * (2^255 - 19)". There are + // 5 left-over bits in the top-most limb of the bottom half. + carry = 0; + limb = x[NUM_LIMBS_255BIT - 1] >> 21; + x[NUM_LIMBS_255BIT - 1] &= 0x001FFFFF; + for (index = 0; index < size; ++index) { + limb += x[NUM_LIMBS_255BIT + index] << 5; + carry += (limb & 0x03FFFFFF) * 19 + x[index]; + x[index] = carry & 0x03FFFFFF; + limb >>= 26; + carry >>= 26; + } + if (size < NUM_LIMBS_255BIT) { + // The high order half of the number is short; e.g. for mulA24(). + // Propagate the carry through the rest of the low order part. + for (index = size; index < NUM_LIMBS_255BIT; ++index) { + carry += x[index]; + x[index] = carry & 0x03FFFFFF; + carry >>= 26; + } + } + + // The "j" value may still be too large due to the final carry-out. + // We must repeat the reduction. If we already have the answer, + // then this won't do any harm but we must still do the calculation + // to preserve the overall timing. The "j" value will be between + // 0 and 19, which means that the carry we care about is in the + // top 5 bits of the highest limb of the bottom half. + carry = (x[NUM_LIMBS_255BIT - 1] >> 21) * 19; + x[NUM_LIMBS_255BIT - 1] &= 0x001FFFFF; + for (index = 0; index < NUM_LIMBS_255BIT; ++index) { + carry += x[index]; + result[index] = carry & 0x03FFFFFF; + carry >>= 26; + } + + // At this point "x" will either be the answer or it will be the + // answer plus (2^255 - 19). Perform a trial subtraction to + // complete the reduction process. + reduceQuick(result); + } + + /** + * Multiplies two numbers modulo 2^255 - 19. + * + * @param result The result. + * @param x The first number to multiply. + * @param y The second number to multiply. + */ + private void mul(int[] result, int[] x, int[] y) + { + int i, j; + + // Multiply the two numbers to create the intermediate result. + long v = x[0]; + for (i = 0; i < NUM_LIMBS_255BIT; ++i) { + t1[i] = v * y[i]; + } + for (i = 1; i < NUM_LIMBS_255BIT; ++i) { + v = x[i]; + for (j = 0; j < (NUM_LIMBS_255BIT - 1); ++j) { + t1[i + j] += v * y[j]; + } + t1[i + NUM_LIMBS_255BIT - 1] = v * y[NUM_LIMBS_255BIT - 1]; + } + + // Propagate carries and convert back into 26-bit words. + v = t1[0]; + t2[0] = ((int)v) & 0x03FFFFFF; + for (i = 1; i < NUM_LIMBS_510BIT; ++i) { + v = (v >> 26) + t1[i]; + t2[i] = ((int)v) & 0x03FFFFFF; + } + + // Reduce the result modulo 2^255 - 19. + reduce(result, t2, NUM_LIMBS_255BIT); + } + + /** + * Squares a number modulo 2^255 - 19. + * + * @param result The result. + * @param x The number to square. + */ + private void square(int[] result, int[] x) + { + mul(result, x, x); + } + + /** + * Multiplies a number by the a24 constant, modulo 2^255 - 19. + * + * @param result The result. + * @param x The number to multiply by a24. + */ + private void mulA24(int[] result, int[] x) + { + long a24 = 121665; + long carry = 0; + int index; + for (index = 0; index < NUM_LIMBS_255BIT; ++index) { + carry += a24 * x[index]; + t2[index] = ((int)carry) & 0x03FFFFFF; + carry >>= 26; + } + t2[NUM_LIMBS_255BIT] = ((int)carry) & 0x03FFFFFF; + reduce(result, t2, 1); + } + + /** + * Adds two numbers modulo 2^255 - 19. + * + * @param result The result. + * @param x The first number to add. + * @param y The second number to add. + */ + private void add(int[] result, int[] x, int[] y) + { + int index, carry; + carry = x[0] + y[0]; + result[0] = carry & 0x03FFFFFF; + for (index = 1; index < NUM_LIMBS_255BIT; ++index) { + carry = (carry >> 26) + x[index] + y[index]; + result[index] = carry & 0x03FFFFFF; + } + reduceQuick(result); + } + + /** + * Subtracts two numbers modulo 2^255 - 19. + * + * @param result The result. + * @param x The first number to subtract. + * @param y The second number to subtract. + */ + private void sub(int[] result, int[] x, int[] y) + { + int index, borrow; + + // Subtract y from x to generate the intermediate result. + borrow = 0; + for (index = 0; index < NUM_LIMBS_255BIT; ++index) { + borrow = x[index] - y[index] - ((borrow >> 26) & 0x01); + result[index] = borrow & 0x03FFFFFF; + } + + // If we had a borrow, then the result has gone negative and we + // have to add 2^255 - 19 to the result to make it positive again. + // The top bits of "borrow" will be all 1's if there is a borrow + // or it will be all 0's if there was no borrow. Easiest is to + // conditionally subtract 19 and then mask off the high bits. + borrow = result[0] - ((-((borrow >> 26) & 0x01)) & 19); + result[0] = borrow & 0x03FFFFFF; + for (index = 1; index < NUM_LIMBS_255BIT; ++index) { + borrow = result[index] - ((borrow >> 26) & 0x01); + result[index] = borrow & 0x03FFFFFF; + } + result[NUM_LIMBS_255BIT - 1] &= 0x001FFFFF; + } + + /** + * Conditional swap of two values. + * + * @param select Set to 1 to swap, 0 to leave as-is. + * @param x The first value. + * @param y The second value. + */ + private static void cswap(int select, int[] x, int[] y) + { + int dummy; + select = -select; + for (int index = 0; index < NUM_LIMBS_255BIT; ++index) { + dummy = select & (x[index] ^ y[index]); + x[index] ^= dummy; + y[index] ^= dummy; + } + } + + /** + * Raise x to the power of (2^250 - 1). + * + * @param result The result. Must not overlap with x. + * @param x The argument. + */ + private void pow250(int[] result, int[] x) + { + int i, j; + + // The big-endian hexadecimal expansion of (2^250 - 1) is: + // 03FFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF + // + // The naive implementation needs to do 2 multiplications per 1 bit and + // 1 multiplication per 0 bit. We can improve upon this by creating a + // pattern 0000000001 ... 0000000001. If we square and multiply the + // pattern by itself we can turn the pattern into the partial results + // 0000000011 ... 0000000011, 0000000111 ... 0000000111, etc. + // This averages out to about 1.1 multiplications per 1 bit instead of 2. + + // Build a pattern of 250 bits in length of repeated copies of 0000000001. + square(A, x); + for (j = 0; j < 9; ++j) + square(A, A); + mul(result, A, x); + for (i = 0; i < 23; ++i) { + for (j = 0; j < 10; ++j) + square(A, A); + mul(result, result, A); + } + + // Multiply bit-shifted versions of the 0000000001 pattern into + // the result to "fill in" the gaps in the pattern. + square(A, result); + mul(result, result, A); + for (j = 0; j < 8; ++j) { + square(A, A); + mul(result, result, A); + } + } + + /** + * Computes the reciprocal of a number modulo 2^255 - 19. + * + * @param result The result. Must not overlap with x. + * @param x The argument. + */ + private void recip(int[] result, int[] x) + { + // The reciprocal is the same as x ^ (p - 2) where p = 2^255 - 19. + // The big-endian hexadecimal expansion of (p - 2) is: + // 7FFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFEB + // Start with the 250 upper bits of the expansion of (p - 2). + pow250(result, x); + + // Deal with the 5 lowest bits of (p - 2), 01011, from highest to lowest. + square(result, result); + square(result, result); + mul(result, result, x); + square(result, result); + square(result, result); + mul(result, result, x); + square(result, result); + mul(result, result, x); + } + + /** + * Evaluates the curve for every bit in a secret key. + * + * @param s The 32-byte secret key. + */ + private void evalCurve(byte[] s) + { + int sposn = 31; + int sbit = 6; + int svalue = s[sposn] | 0x40; + int swap = 0; + int select; + + // Iterate over all 255 bits of "s" from the highest to the lowest. + // We ignore the high bit of the 256-bit representation of "s". + for (;;) { + // Conditional swaps on entry to this bit but only if we + // didn't swap on the previous bit. + select = (svalue >> sbit) & 0x01; + swap ^= select; + cswap(swap, x_2, x_3); + cswap(swap, z_2, z_3); + swap = select; + + // Evaluate the curve. + add(A, x_2, z_2); // A = x_2 + z_2 + square(AA, A); // AA = A^2 + sub(B, x_2, z_2); // B = x_2 - z_2 + square(BB, B); // BB = B^2 + sub(E, AA, BB); // E = AA - BB + add(C, x_3, z_3); // C = x_3 + z_3 + sub(D, x_3, z_3); // D = x_3 - z_3 + mul(DA, D, A); // DA = D * A + mul(CB, C, B); // CB = C * B + add(x_3, DA, CB); // x_3 = (DA + CB)^2 + square(x_3, x_3); + sub(z_3, DA, CB); // z_3 = x_1 * (DA - CB)^2 + square(z_3, z_3); + mul(z_3, z_3, x_1); + mul(x_2, AA, BB); // x_2 = AA * BB + mulA24(z_2, E); // z_2 = E * (AA + a24 * E) + add(z_2, z_2, AA); + mul(z_2, z_2, E); + + // Move onto the next lower bit of "s". + if (sbit > 0) { + --sbit; + } else if (sposn == 0) { + break; + } else if (sposn == 1) { + --sposn; + svalue = s[sposn] & 0xF8; + sbit = 7; + } else { + --sposn; + svalue = s[sposn]; + sbit = 7; + } + } + + // Final conditional swaps. + cswap(swap, x_2, x_3); + cswap(swap, z_2, z_3); + } + + /** + * Evaluates the Curve25519 curve. + * + * @param result Buffer to place the result of the evaluation into. + * @param offset Offset into the result buffer. + * @param privateKey The private key to use in the evaluation. + * @param publicKey The public key to use in the evaluation, or null + * if the base point of the curve should be used. + */ + public static void eval(byte[] result, int offset, byte[] privateKey, byte[] publicKey) + { + Curve25519 state = new Curve25519(); + try { + // Unpack the public key value. If null, use 9 as the base point. + Arrays.fill(state.x_1, 0); + if (publicKey != null) { + // Convert the input value from little-endian into 26-bit limbs. + for (int index = 0; index < 32; ++index) { + int bit = (index * 8) % 26; + int word = (index * 8) / 26; + int value = publicKey[index] & 0xFF; + if (bit <= (26 - 8)) { + state.x_1[word] |= value << bit; + } else { + state.x_1[word] |= value << bit; + state.x_1[word] &= 0x03FFFFFF; + state.x_1[word + 1] |= value >> (26 - bit); + } + } + + // Just in case, we reduce the number modulo 2^255 - 19 to + // make sure that it is in range of the field before we start. + // This eliminates values between 2^255 - 19 and 2^256 - 1. + state.reduceQuick(state.x_1); + state.reduceQuick(state.x_1); + } else { + state.x_1[0] = 9; + } + + // Initialize the other temporary variables. + Arrays.fill(state.x_2, 0); // x_2 = 1 + state.x_2[0] = 1; + Arrays.fill(state.z_2, 0); // z_2 = 0 + System.arraycopy(state.x_1, 0, state.x_3, 0, state.x_1.length); // x_3 = x_1 + Arrays.fill(state.z_3, 0); // z_3 = 1 + state.z_3[0] = 1; + + // Evaluate the curve for every bit of the private key. + state.evalCurve(privateKey); + + // Compute x_2 * (z_2 ^ (p - 2)) where p = 2^255 - 19. + state.recip(state.z_3, state.z_2); + state.mul(state.x_2, state.x_2, state.z_3); + + // Convert x_2 into little-endian in the result buffer. + for (int index = 0; index < 32; ++index) { + int bit = (index * 8) % 26; + int word = (index * 8) / 26; + if (bit <= (26 - 8)) + result[offset + index] = (byte)(state.x_2[word] >> bit); + else + result[offset + index] = (byte)((state.x_2[word] >> bit) | (state.x_2[word + 1] << (26 - bit))); + } + } finally { + // Clean up all temporary state before we exit. + state.destroy(); + } + } }