You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: content/english/hpc/algorithms/factorization.md
+18-15Lines changed: 18 additions & 15 deletions
Display the source diff
Display the rich diff
Original file line number
Diff line number
Diff line change
@@ -271,12 +271,13 @@ u64 diff(u64 a, u64 b) {
271
271
return a > b ? a - b : b - a;
272
272
}
273
273
274
+
const u64 SEED = 42;
275
+
274
276
u64 find_factor(u64 n) {
275
-
u64 x = x0, y = x0, g = 1;
277
+
u64 x = SEED, y = SEED, g = 1;
276
278
while (g == 1) {
277
-
x = f(x, a, n);
278
-
y = f(y, a, n);
279
-
y = f(y, a, n);
279
+
x = f(f(x, n), n); // advance x twice
280
+
y = f(y, n); // advance y once
280
281
g = gcd(diff(x, y));
281
282
}
282
283
return g;
@@ -290,13 +291,13 @@ While it processes 25k 30-bit numbers — almost 15 times slower than the fastes
290
291
Floyd's cycle-finding algorithm has a problem in that it does more iterator increments than necessary. One way to solve it is to memorize the values that the faster iterator visits and compute the gcd using the difference of $x_i$ and $x_{\lfloor i / 2 \rfloor}$, but it can also be done without extra memory using this trick:
291
292
292
293
```c++
293
-
u64 find_factor(u64 n, u64 x0 = 2, u64 a = 1) {
294
-
u64 x = x0, y = x0;
294
+
u64 find_factor(u64 n) {
295
+
u64 x = SEED;
295
296
296
297
for (int l = 256; l < (1 << 20); l *= 2) {
297
-
x = y;
298
+
u64 y = x;
298
299
for (int i = 0; i < l; i++) {
299
-
y = f(y, a, n);
300
+
x = f(x, n);
300
301
if (u64 g = gcd(diff(x, y), n); g != 1)
301
302
return g;
302
303
}
@@ -313,14 +314,14 @@ We can remove the logarithm from the asymptotic using the fact that if one of $a
313
314
```c++
314
315
constint M = 1024;
315
316
316
-
u64find_factor(u64 n, u64 x0 = 2, u64 a = 1) {
317
-
u64 x = x0, y = x0, p = 1;
317
+
u64find_factor(u64 n) {
318
+
u64 x = SEED;
318
319
319
320
for (int l = M; l < (1 << 20); l *= 2) {
320
-
x = y;
321
+
u64 y = x, p = 1;
321
322
for (int i = 0; i < l; i += M) {
322
323
for (int j = 0; j < M; j++) {
323
-
y = f(y, a, n);
324
+
y = f(y, n);
324
325
p = (u128) p * diff(x, y) % n;
325
326
}
326
327
if (u64 g = gcd(p, n); g != 1)
@@ -340,6 +341,8 @@ The next step is to actually apply [Montgomery Multiplication](/hpc/number-theor
340
341
341
342
This is exactly the type of problem when we need specific knowledge, because we have 64-bit modulo by not-compile-constants, and compiler can't really do much to optimize it.
342
343
344
+
We do not need to convert numbers out of Montgomery representation before computing the GCD.
0 commit comments