num_bigint_dig/
monty.rs

1#![allow(clippy::many_single_char_names)]
2
3use alloc::vec::Vec;
4use core::ops::Shl;
5use num_traits::{One, Zero};
6
7use crate::big_digit::{self, BigDigit, DoubleBigDigit, SignedDoubleBigDigit};
8use crate::biguint::BigUint;
9
10struct MontyReducer {
11    n0inv: BigDigit,
12}
13
14// k0 = -m**-1 mod 2**BITS. Algorithm from: Dumas, J.G. "On Newton–Raphson
15// Iteration for Multiplicative Inverses Modulo Prime Powers".
16fn inv_mod_alt(b: BigDigit) -> BigDigit {
17    assert_ne!(b & 1, 0);
18
19    let mut k0 = 2 - b as SignedDoubleBigDigit;
20    let mut t = (b - 1) as SignedDoubleBigDigit;
21    let mut i = 1;
22    while i < big_digit::BITS {
23        t = t.wrapping_mul(t);
24        k0 = k0.wrapping_mul(t + 1);
25
26        i <<= 1;
27    }
28    -k0 as BigDigit
29}
30
31impl MontyReducer {
32    fn new(n: &BigUint) -> Self {
33        let n0inv = inv_mod_alt(n.data[0]);
34        MontyReducer { n0inv }
35    }
36}
37
38/// Computes z mod m = x * y * 2 ** (-n*_W) mod m
39/// assuming k = -1/m mod 2**_W
40/// See Gueron, "Efficient Software Implementations of Modular Exponentiation".
41/// https://eprint.iacr.org/2011/239.pdf
42/// In the terminology of that paper, this is an "Almost Montgomery Multiplication":
43/// x and y are required to satisfy 0 <= z < 2**(n*_W) and then the result
44/// z is guaranteed to satisfy 0 <= z < 2**(n*_W), but it may not be < m.
45fn montgomery(z: &mut BigUint, x: &BigUint, y: &BigUint, m: &BigUint, k: BigDigit, n: usize) {
46    // This code assumes x, y, m are all the same length, n.
47    // (required by addMulVVW and the for loop).
48    // It also assumes that x, y are already reduced mod m,
49    // or else the result will not be properly reduced.
50    assert!(
51        x.data.len() == n && y.data.len() == n && m.data.len() == n,
52        "{:?} {:?} {:?} {}",
53        x,
54        y,
55        m,
56        n
57    );
58
59    z.data.clear();
60    z.data.resize(n * 2, 0);
61
62    let mut c: BigDigit = 0;
63
64    for i in 0..n {
65        let c2 = add_mul_vvw(&mut z.data[i..n + i], &x.data, y.data[i]);
66        let t = z.data[i].wrapping_mul(k);
67        let c3 = add_mul_vvw(&mut z.data[i..n + i], &m.data, t);
68        let cx = c.wrapping_add(c2);
69        let cy = cx.wrapping_add(c3);
70        z.data[n + i] = cy;
71        c = if cx < c2 || cy < c3 { 1 } else { 0 };
72    }
73
74    if c == 0 {
75        let (first, second) = z.data.split_at_mut(n);
76        first.swap_with_slice(&mut second[..]);
77    } else {
78        let (mut first, second) = z.data.split_at_mut(n);
79        sub_vv(&mut first, &second, &m.data);
80    }
81    z.data.truncate(n);
82}
83
84#[inline]
85fn add_mul_vvw(z: &mut [BigDigit], x: &[BigDigit], y: BigDigit) -> BigDigit {
86    let mut c = 0;
87    for (zi, xi) in z.iter_mut().zip(x.iter()) {
88        let (z1, z0) = mul_add_www(*xi, y, *zi);
89        let (c_, zi_) = add_ww(z0, c, 0);
90        *zi = zi_;
91        c = c_ + z1;
92    }
93
94    c
95}
96
97/// The resulting carry c is either 0 or 1.
98#[inline]
99fn sub_vv(z: &mut [BigDigit], x: &[BigDigit], y: &[BigDigit]) -> BigDigit {
100    let mut c = 0;
101    for (i, (xi, yi)) in x.iter().zip(y.iter()).enumerate().take(z.len()) {
102        let zi = xi.wrapping_sub(*yi).wrapping_sub(c);
103        z[i] = zi;
104        // see "Hacker's Delight", section 2-12 (overflow detection)
105        c = ((yi & !xi) | ((yi | !xi) & zi)) >> (big_digit::BITS - 1)
106    }
107
108    c
109}
110
111/// z1<<_W + z0 = x+y+c, with c == 0 or 1
112#[inline]
113fn add_ww(x: BigDigit, y: BigDigit, c: BigDigit) -> (BigDigit, BigDigit) {
114    let yc = y.wrapping_add(c);
115    let z0 = x.wrapping_add(yc);
116    let z1 = if z0 < x || yc < y { 1 } else { 0 };
117
118    (z1, z0)
119}
120
121/// z1 << _W + z0 = x * y + c
122#[inline]
123fn mul_add_www(x: BigDigit, y: BigDigit, c: BigDigit) -> (BigDigit, BigDigit) {
124    let z = x as DoubleBigDigit * y as DoubleBigDigit + c as DoubleBigDigit;
125    ((z >> big_digit::BITS) as BigDigit, z as BigDigit)
126}
127
128/// Calculates x ** y mod m using a fixed, 4-bit window.
129pub fn monty_modpow(x: &BigUint, y: &BigUint, m: &BigUint) -> BigUint {
130    assert!(m.data[0] & 1 == 1);
131    let mr = MontyReducer::new(m);
132    let num_words = m.data.len();
133
134    let mut x = x.clone();
135
136    // We want the lengths of x and m to be equal.
137    // It is OK if x >= m as long as len(x) == len(m).
138    if x.data.len() > num_words {
139        x %= m;
140        // Note: now len(x) <= numWords, not guaranteed ==.
141    }
142    if x.data.len() < num_words {
143        x.data.resize(num_words, 0);
144    }
145
146    // rr = 2**(2*_W*len(m)) mod m
147    let mut rr = BigUint::one();
148    rr = (rr.shl(2 * num_words * big_digit::BITS)) % m;
149    if rr.data.len() < num_words {
150        rr.data.resize(num_words, 0);
151    }
152    // one = 1, with equal length to that of m
153    let mut one = BigUint::one();
154    one.data.resize(num_words, 0);
155
156    let n = 4;
157    // powers[i] contains x^i
158    let mut powers = Vec::with_capacity(1 << n);
159
160    let mut v1 = BigUint::zero();
161    montgomery(&mut v1, &one, &rr, m, mr.n0inv, num_words);
162    powers.push(v1);
163    let mut v2 = BigUint::zero();
164    montgomery(&mut v2, &x, &rr, m, mr.n0inv, num_words);
165    powers.push(v2);
166    for i in 2..1 << n {
167        let mut r = BigUint::zero();
168        montgomery(&mut r, &powers[i - 1], &powers[1], m, mr.n0inv, num_words);
169        powers.push(r);
170    }
171
172    // initialize z = 1 (Montgomery 1)
173    let mut z = powers[0].clone();
174    z.data.resize(num_words, 0);
175    let mut zz = BigUint::zero();
176    zz.data.resize(num_words, 0);
177
178    // same windowed exponent, but with Montgomery multiplications
179    for i in (0..y.data.len()).rev() {
180        let mut yi = y.data[i];
181        let mut j = 0;
182        while j < big_digit::BITS {
183            if i != y.data.len() - 1 || j != 0 {
184                montgomery(&mut zz, &z, &z, m, mr.n0inv, num_words);
185                montgomery(&mut z, &zz, &zz, m, mr.n0inv, num_words);
186                montgomery(&mut zz, &z, &z, m, mr.n0inv, num_words);
187                montgomery(&mut z, &zz, &zz, m, mr.n0inv, num_words);
188            }
189            montgomery(
190                &mut zz,
191                &z,
192                &powers[(yi >> (big_digit::BITS - n)) as usize],
193                m,
194                mr.n0inv,
195                num_words,
196            );
197            core::mem::swap(&mut z, &mut zz);
198            yi <<= n;
199            j += n;
200        }
201    }
202
203    // convert to regular number
204    montgomery(&mut zz, &z, &one, m, mr.n0inv, num_words);
205
206    zz.normalize();
207    // One last reduction, just in case.
208    // See golang.org/issue/13907.
209    if &zz >= m {
210        // Common case is m has high bit set; in that case,
211        // since zz is the same length as m, there can be just
212        // one multiple of m to remove. Just subtract.
213        // We think that the subtract should be sufficient in general,
214        // so do that unconditionally, but double-check,
215        // in case our beliefs are wrong.
216        // The div is not expected to be reached.
217        zz -= m;
218        if &zz >= m {
219            zz %= m;
220        }
221    }
222
223    zz.normalize();
224    zz
225}