為什麼礦機除了asic用的都是amd顯卡而不是nvidia顯卡?


就拿bitcoin來說。用OpenCL來實現的話,核心代碼是這樣的(來自 phoenix2/phoenix)

// This file is in the public domain

#ifdef VECTORS4
typedef uint4 u;
#elif defined VECTORS
typedef uint2 u;
#else
typedef uint u;
#endif

__constant uint K[64] = {
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
};

#ifdef BITALIGN
#pragma OPENCL EXTENSION cl_amd_media_ops : enable
#define rotr(x, y) amd_bitalign((u)x, (u)x, (u)y)
#else
#define rotr(x, y) rotate((u)x, (u)(32-y))
#endif

// Some AMD devices have a BFI_INT opcode, which behaves exactly like the
// SHA-256 Ch function, but provides it in exactly one instruction. If
// detected, use it for Ch. Otherwise, use bitselect() for Ch.

#ifdef BFI_INT
// Well, slight problem... It turns out BFI_INT isn"t actually exposed to
// OpenCL (or CAL IL for that matter) in any way. However, there is
// a similar instruction, BYTE_ALIGN_INT, which is exposed to OpenCL via
// amd_bytealign, takes the same inputs, and provides the same output.
// We can use that as a placeholder for BFI_INT and have the application
// patch it after compilation.

// This is the BFI_INT function
#define Ch(x, y, z) amd_bytealign((u)x, (u)y, (u)z)
// Ma can also be implemented in terms of BFI_INT...
#define Ma(x, y, z) amd_bytealign(((u)z^(u)x), (u)y, (u)x)
#else
#define Ch(x, y, z) ((u)z ^ ((u)x ((u)y ^ (u)z)))
#define Ma(x, y, z) (((u)x (u)z) | ((u)y ((u)x | (u)z)))
#endif

__kernel void search( const uint state0, const uint state1, const uint state2, const uint state3,
const uint state4, const uint state5, const uint state6, const uint state7,
const uint B1, const uint C1, const uint D1,
const uint F1, const uint G1, const uint H1,
const uint base,
const uint fW0, const uint fW1, const uint fW2, const uint fW3, const uint fW15, const uint fW01r, const uint fcty_e, const uint fcty_e2,
__global uint * output)
{
u W0, W1, W2, W3, W4, W5, W6, W7, W8, W9, W10, W11, W12, W13, W14, W15;
u A,B,C,D,E,F,G,H;
u nonce;
#ifdef VECTORS4
#ifdef GOFFSET
nonce = (get_global_id(0)&<&<2) + (u)(0, 1, 2, 3); #else nonce = ((base + get_global_id(0))&<&<2) + (u)(0, 1, 2, 3); #endif #elif defined VECTORS #ifdef GOFFSET nonce = (get_global_id(0)&<&<1) + (u)(0, 1); #else nonce = ((base + get_global_id(0))&<&<1) + (u)(0, 1); #endif #else #ifdef GOFFSET nonce = get_global_id(0); #else nonce = base + get_global_id(0); #endif #endif W3 = nonce + fW3; E = fcty_e + nonce; A = state0 + E; E = E + fcty_e2; D = D1 + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + Ch(A, B1, C1) + K[ 4] + 0x80000000; H = H1 + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + Ma(G1, E, F1); C = C1 + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + Ch(H, A, B1) + K[ 5]; G = G1 + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + Ma(F1, D, E); B = B1 + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + Ch(G, H, A) + K[ 6]; F = F1 + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + Ma(E, C, D); A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + Ch(F, G, H) + K[ 7]; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + Ma(D, B, C); H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + Ch(E, F, G) + K[ 8]; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + Ma(C, A, B); G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + Ch(D, E, F) + K[ 9]; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + Ma(B, H, A); F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + Ch(C, D, E) + K[10]; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + Ma(A, G, H); E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + Ch(B, C, D) + K[11]; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + Ma(H, F, G); D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + Ch(A, B, C) + K[12]; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + Ma(G, E, F); C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + Ch(H, A, B) + K[13]; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + Ma(F, D, E); B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + Ch(G, H, A) + K[14]; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + Ma(E, C, D); A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + Ch(F, G, H) + K[15] + 0x00000280U; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + Ma(D, B, C); H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + Ch(E, F, G) + K[16] + fW0; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + Ma(C, A, B); G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + Ch(D, E, F) + K[17] + fW1; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + Ma(B, H, A); W2 = (rotr(nonce, 7) ^ rotr(nonce, 18) ^ (nonce &>&> 3U)) + fW2;
F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + Ch(C, D, E) + K[18] + W2; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + Ma(A, G, H);
E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + Ch(B, C, D) + K[19] + W3; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + Ma(H, F, G);
W4 = (rotr(W2, 17) ^ rotr(W2, 19) ^ (W2 &>&> 10U)) + 0x80000000;
D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + Ch(A, B, C) + K[20] + W4; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + Ma(G, E, F);
W5 = (rotr(W3, 17) ^ rotr(W3, 19) ^ (W3 &>&> 10U));
C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + Ch(H, A, B) + K[21] + W5; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + Ma(F, D, E);
W6 = (rotr(W4, 17) ^ rotr(W4, 19) ^ (W4 &>&> 10U)) + 0x00000280U;
B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + Ch(G, H, A) + K[22] + W6; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + Ma(E, C, D);
W7 = (rotr(W5, 17) ^ rotr(W5, 19) ^ (W5 &>&> 10U)) + fW0;
A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + Ch(F, G, H) + K[23] + W7; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + Ma(D, B, C);
W8 = (rotr(W6, 17) ^ rotr(W6, 19) ^ (W6 &>&> 10U)) + fW1;
H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + Ch(E, F, G) + K[24] + W8; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + Ma(C, A, B);
W9 = W2 + (rotr(W7, 17) ^ rotr(W7, 19) ^ (W7 &>&> 10U));
G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + Ch(D, E, F) + K[25] + W9; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + Ma(B, H, A);
W10 = W3 + (rotr(W8, 17) ^ rotr(W8, 19) ^ (W8 &>&> 10U));
F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + Ch(C, D, E) + K[26] + W10; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + Ma(A, G, H);
W11 = W4 + (rotr(W9, 17) ^ rotr(W9, 19) ^ (W9 &>&> 10U));
E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + Ch(B, C, D) + K[27] + W11; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + Ma(H, F, G);
W12 = W5 + (rotr(W10, 17) ^ rotr(W10, 19) ^ (W10 &>&> 10U));
D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + Ch(A, B, C) + K[28] + W12; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + Ma(G, E, F);
W13 = W6 + (rotr(W11, 17) ^ rotr(W11, 19) ^ (W11 &>&> 10U));
C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + Ch(H, A, B) + K[29] + W13; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + Ma(F, D, E);
W14 = 0x00a00055U + W7 + (rotr(W12, 17) ^ rotr(W12, 19) ^ (W12 &>&> 10U));
B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + Ch(G, H, A) + K[30] + W14; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + Ma(E, C, D);
W15 = fW15 + W8 + (rotr(W13, 17) ^ rotr(W13, 19) ^ (W13 &>&> 10U));
A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + Ch(F, G, H) + K[31] + W15; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + Ma(D, B, C);
W0 = fW01r + W9 + (rotr(W14, 17) ^ rotr(W14, 19) ^ (W14 &>&> 10U));
H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + Ch(E, F, G) + K[32] + W0; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + Ma(C, A, B);
W1 = fW1 + (rotr(W2, 7) ^ rotr(W2, 18) ^ (W2 &>&> 3U)) + W10 + (rotr(W15, 17) ^ rotr(W15, 19) ^ (W15 &>&> 10U));
G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + Ch(D, E, F) + K[33] + W1; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + Ma(B, H, A);
W2 = W2 + (rotr(W3, 7) ^ rotr(W3, 18) ^ (W3 &>&> 3U)) + W11 + (rotr(W0, 17) ^ rotr(W0, 19) ^ (W0 &>&> 10U));
F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + Ch(C, D, E) + K[34] + W2; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + Ma(A, G, H);
W3 = W3 + (rotr(W4, 7) ^ rotr(W4, 18) ^ (W4 &>&> 3U)) + W12 + (rotr(W1, 17) ^ rotr(W1, 19) ^ (W1 &>&> 10U));
E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + Ch(B, C, D) + K[35] + W3; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + Ma(H, F, G);
W4 = W4 + (rotr(W5, 7) ^ rotr(W5, 18) ^ (W5 &>&> 3U)) + W13 + (rotr(W2, 17) ^ rotr(W2, 19) ^ (W2 &>&> 10U));
D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + Ch(A, B, C) + K[36] + W4; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + Ma(G, E, F);
W5 = W5 + (rotr(W6, 7) ^ rotr(W6, 18) ^ (W6 &>&> 3U)) + W14 + (rotr(W3, 17) ^ rotr(W3, 19) ^ (W3 &>&> 10U));
C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + Ch(H, A, B) + K[37] + W5; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + Ma(F, D, E);
W6 = W6 + (rotr(W7, 7) ^ rotr(W7, 18) ^ (W7 &>&> 3U)) + W15 + (rotr(W4, 17) ^ rotr(W4, 19) ^ (W4 &>&> 10U));
B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + Ch(G, H, A) + K[38] + W6; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + Ma(E, C, D);
W7 = W7 + (rotr(W8, 7) ^ rotr(W8, 18) ^ (W8 &>&> 3U)) + W0 + (rotr(W5, 17) ^ rotr(W5, 19) ^ (W5 &>&> 10U));
A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + Ch(F, G, H) + K[39] + W7; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + Ma(D, B, C);
W8 = W8 + (rotr(W9, 7) ^ rotr(W9, 18) ^ (W9 &>&> 3U)) + W1 + (rotr(W6, 17) ^ rotr(W6, 19) ^ (W6 &>&> 10U));
H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + Ch(E, F, G) + K[40] + W8; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + Ma(C, A, B);
W9 = W9 + (rotr(W10, 7) ^ rotr(W10, 18) ^ (W10 &>&> 3U)) + W2 + (rotr(W7, 17) ^ rotr(W7, 19) ^ (W7 &>&> 10U));
G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + Ch(D, E, F) + K[41] + W9; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + Ma(B, H, A);
W10 = W10 + (rotr(W11, 7) ^ rotr(W11, 18) ^ (W11 &>&> 3U)) + W3 + (rotr(W8, 17) ^ rotr(W8, 19) ^ (W8 &>&> 10U));
F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + Ch(C, D, E) + K[42] + W10; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + Ma(A, G, H);
W11 = W11 + (rotr(W12, 7) ^ rotr(W12, 18) ^ (W12 &>&> 3U)) + W4 + (rotr(W9, 17) ^ rotr(W9, 19) ^ (W9 &>&> 10U));
E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + Ch(B, C, D) + K[43] + W11; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + Ma(H, F, G);
W12 = W12 + (rotr(W13, 7) ^ rotr(W13, 18) ^ (W13 &>&> 3U)) + W5 + (rotr(W10, 17) ^ rotr(W10, 19) ^ (W10 &>&> 10U));
D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + Ch(A, B, C) + K[44] + W12; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + Ma(G, E, F);
W13 = W13 + (rotr(W14, 7) ^ rotr(W14, 18) ^ (W14 &>&> 3U)) + W6 + (rotr(W11, 17) ^ rotr(W11, 19) ^ (W11 &>&> 10U));
C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + Ch(H, A, B) + K[45] + W13; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + Ma(F, D, E);
W14 = W14 + (rotr(W15, 7) ^ rotr(W15, 18) ^ (W15 &>&> 3U)) + W7 + (rotr(W12, 17) ^ rotr(W12, 19) ^ (W12 &>&> 10U));
B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + Ch(G, H, A) + K[46] + W14; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + Ma(E, C, D);
W15 = W15 + (rotr(W0, 7) ^ rotr(W0, 18) ^ (W0 &>&> 3U)) + W8 + (rotr(W13, 17) ^ rotr(W13, 19) ^ (W13 &>&> 10U));
A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + Ch(F, G, H) + K[47] + W15; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + Ma(D, B, C);
W0 = W0 + (rotr(W1, 7) ^ rotr(W1, 18) ^ (W1 &>&> 3U)) + W9 + (rotr(W14, 17) ^ rotr(W14, 19) ^ (W14 &>&> 10U));
H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + Ch(E, F, G) + K[48] + W0; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + Ma(C, A, B);
W1 = W1 + (rotr(W2, 7) ^ rotr(W2, 18) ^ (W2 &>&> 3U)) + W10 + (rotr(W15, 17) ^ rotr(W15, 19) ^ (W15 &>&> 10U));
G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + Ch(D, E, F) + K[49] + W1; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + Ma(B, H, A);
W2 = W2 + (rotr(W3, 7) ^ rotr(W3, 18) ^ (W3 &>&> 3U)) + W11 + (rotr(W0, 17) ^ rotr(W0, 19) ^ (W0 &>&> 10U));
F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + Ch(C, D, E) + K[50] + W2; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + Ma(A, G, H);
W3 = W3 + (rotr(W4, 7) ^ rotr(W4, 18) ^ (W4 &>&> 3U)) + W12 + (rotr(W1, 17) ^ rotr(W1, 19) ^ (W1 &>&> 10U));
E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + Ch(B, C, D) + K[51] + W3; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + Ma(H, F, G);
W4 = W4 + (rotr(W5, 7) ^ rotr(W5, 18) ^ (W5 &>&> 3U)) + W13 + (rotr(W2, 17) ^ rotr(W2, 19) ^ (W2 &>&> 10U));
D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + Ch(A, B, C) + K[52] + W4; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + Ma(G, E, F);
W5 = W5 + (rotr(W6, 7) ^ rotr(W6, 18) ^ (W6 &>&> 3U)) + W14 + (rotr(W3, 17) ^ rotr(W3, 19) ^ (W3 &>&> 10U));
C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + Ch(H, A, B) + K[53] + W5; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + Ma(F, D, E);
W6 = W6 + (rotr(W7, 7) ^ rotr(W7, 18) ^ (W7 &>&> 3U)) + W15 + (rotr(W4, 17) ^ rotr(W4, 19) ^ (W4 &>&> 10U));
B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + Ch(G, H, A) + K[54] + W6; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + Ma(E, C, D);
W7 = W7 + (rotr(W8, 7) ^ rotr(W8, 18) ^ (W8 &>&> 3U)) + W0 + (rotr(W5, 17) ^ rotr(W5, 19) ^ (W5 &>&> 10U));
A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + Ch(F, G, H) + K[55] + W7; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + Ma(D, B, C);
W8 = W8 + (rotr(W9, 7) ^ rotr(W9, 18) ^ (W9 &>&> 3U)) + W1 + (rotr(W6, 17) ^ rotr(W6, 19) ^ (W6 &>&> 10U));
H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + Ch(E, F, G) + K[56] + W8; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + Ma(C, A, B);
W9 = W9 + (rotr(W10, 7) ^ rotr(W10, 18) ^ (W10 &>&> 3U)) + W2 + (rotr(W7, 17) ^ rotr(W7, 19) ^ (W7 &>&> 10U));
G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + Ch(D, E, F) + K[57] + W9; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + Ma(B, H, A);
W10 = W10 + (rotr(W11, 7) ^ rotr(W11, 18) ^ (W11 &>&> 3U)) + W3 + (rotr(W8, 17) ^ rotr(W8, 19) ^ (W8 &>&> 10U));
F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + Ch(C, D, E) + K[58] + W10; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + Ma(A, G, H);
W11 = W11 + (rotr(W12, 7) ^ rotr(W12, 18) ^ (W12 &>&> 3U)) + W4 + (rotr(W9, 17) ^ rotr(W9, 19) ^ (W9 &>&> 10U));
E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + Ch(B, C, D) + K[59] + W11; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + Ma(H, F, G);
W12 = W12 + (rotr(W13, 7) ^ rotr(W13, 18) ^ (W13 &>&> 3U)) + W5 + (rotr(W10, 17) ^ rotr(W10, 19) ^ (W10 &>&> 10U));
D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + Ch(A, B, C) + K[60] + W12; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + Ma(G, E, F);
W13 = W13 + (rotr(W14, 7) ^ rotr(W14, 18) ^ (W14 &>&> 3U)) + W6 + (rotr(W11, 17) ^ rotr(W11, 19) ^ (W11 &>&> 10U));
C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + Ch(H, A, B) + K[61] + W13; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + Ma(F, D, E);
W14 = W14 + (rotr(W15, 7) ^ rotr(W15, 18) ^ (W15 &>&> 3U)) + W7 + (rotr(W12, 17) ^ rotr(W12, 19) ^ (W12 &>&> 10U));
B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + Ch(G, H, A) + K[62] + W14; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + Ma(E, C, D);
W15 = W15 + (rotr(W0, 7) ^ rotr(W0, 18) ^ (W0 &>&> 3U)) + W8 + (rotr(W13, 17) ^ rotr(W13, 19) ^ (W13 &>&> 10U));
A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + Ch(F, G, H) + K[63] + W15; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + Ma(D, B, C);

W0 = A + state0; W1 = B + state1;
W2 = C + state2; W3 = D + state3;
W4 = E + state4; W5 = F + state5;
W6 = G + state6; W7 = H + state7;

H = 0xb0edbdd0 + K[ 0] + W0; D = 0xa54ff53a + H; H = H + 0x08909ae5U;
G = 0x1f83d9abU + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (0x9b05688cU ^ (D 0xca0b3af3U)) + K[ 1] + W1; C = 0x3c6ef372U + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + Ma(0xbb67ae85U, H, 0x6a09e667U);
F = 0x9b05688cU + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + Ch(C, D, 0x510e527fU) + K[ 2] + W2; B = 0xbb67ae85U + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + Ma(0x6a09e667U, G, H);
E = 0x510e527fU + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + Ch(B, C, D) + K[ 3] + W3; A = 0x6a09e667U + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + Ma(H, F, G);
D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + Ch(A, B, C) + K[ 4] + W4; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + Ma(G, E, F);
C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + Ch(H, A, B) + K[ 5] + W5; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + Ma(F, D, E);
B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + Ch(G, H, A) + K[ 6] + W6; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + Ma(E, C, D);
A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + Ch(F, G, H) + K[ 7] + W7; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + Ma(D, B, C);
H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + Ch(E, F, G) + K[ 8] + 0x80000000; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + Ma(C, A, B);
G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + Ch(D, E, F) + K[ 9]; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + Ma(B, H, A);
F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + Ch(C, D, E) + K[10]; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + Ma(A, G, H);
E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + Ch(B, C, D) + K[11]; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + Ma(H, F, G);
D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + Ch(A, B, C) + K[12]; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + Ma(G, E, F);
C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + Ch(H, A, B) + K[13]; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + Ma(F, D, E);
B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + Ch(G, H, A) + K[14]; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + Ma(E, C, D);
A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + Ch(F, G, H) + K[15] + 0x00000100U; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + Ma(D, B, C);
W0 = W0 + (rotr(W1, 7) ^ rotr(W1, 18) ^ (W1 &>&> 3U));
H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + Ch(E, F, G) + K[16] + W0; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + Ma(C, A, B);
W1 = W1 + (rotr(W2, 7) ^ rotr(W2, 18) ^ (W2 &>&> 3U)) + 0x00a00000U;
G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + Ch(D, E, F) + K[17] + W1; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + Ma(B, H, A);
W2 = W2 + (rotr(W3, 7) ^ rotr(W3, 18) ^ (W3 &>&> 3U)) + (rotr(W0, 17) ^ rotr(W0, 19) ^ (W0 &>&> 10U));
F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + Ch(C, D, E) + K[18] + W2; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + Ma(A, G, H);
W3 = W3 + (rotr(W4, 7) ^ rotr(W4, 18) ^ (W4 &>&> 3U)) + (rotr(W1, 17) ^ rotr(W1, 19) ^ (W1 &>&> 10U));
E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + Ch(B, C, D) + K[19] + W3; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + Ma(H, F, G);
W4 = W4 + (rotr(W5, 7) ^ rotr(W5, 18) ^ (W5 &>&> 3U)) + (rotr(W2, 17) ^ rotr(W2, 19) ^ (W2 &>&> 10U));
D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + Ch(A, B, C) + K[20] + W4; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + Ma(G, E, F);
W5 = W5 + (rotr(W6, 7) ^ rotr(W6, 18) ^ (W6 &>&> 3U)) + (rotr(W3, 17) ^ rotr(W3, 19) ^ (W3 &>&> 10U));
C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + Ch(H, A, B) + K[21] + W5; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + Ma(F, D, E);
W6 = W6 + (rotr(W7, 7) ^ rotr(W7, 18) ^ (W7 &>&> 3U)) + 0x00000100U + (rotr(W4, 17) ^ rotr(W4, 19) ^ (W4 &>&> 10U));
B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + Ch(G, H, A) + K[22] + W6; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + Ma(E, C, D);
W7 = W7 + 0x11002000U + W0 + (rotr(W5, 17) ^ rotr(W5, 19) ^ (W5 &>&> 10U));
A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + Ch(F, G, H) + K[23] + W7; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + Ma(D, B, C);
W8 = 0x80000000 + W1 + (rotr(W6, 17) ^ rotr(W6, 19) ^ (W6 &>&> 10U));
H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + Ch(E, F, G) + K[24] + W8; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + Ma(C, A, B);
W9 = W2 + (rotr(W7, 17) ^ rotr(W7, 19) ^ (W7 &>&> 10U));
G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + Ch(D, E, F) + K[25] + W9; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + Ma(B, H, A);
W10 = W3 + (rotr(W8, 17) ^ rotr(W8, 19) ^ (W8 &>&> 10U));
F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + Ch(C, D, E) + K[26] + W10; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + Ma(A, G, H);
W11 = W4 + (rotr(W9, 17) ^ rotr(W9, 19) ^ (W9 &>&> 10U));
E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + Ch(B, C, D) + K[27] + W11; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + Ma(H, F, G);
W12 = W5 + (rotr(W10, 17) ^ rotr(W10, 19) ^ (W10 &>&> 10U));
D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + Ch(A, B, C) + K[28] + W12; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + Ma(G, E, F);
W13 = W6 + (rotr(W11, 17) ^ rotr(W11, 19) ^ (W11 &>&> 10U));
C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + Ch(H, A, B) + K[29] + W13; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + Ma(F, D, E);
W14 = 0x00400022U + W7 + (rotr(W12, 17) ^ rotr(W12, 19) ^ (W12 &>&> 10U));
B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + Ch(G, H, A) + K[30] + W14; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + Ma(E, C, D);
W15 = 0x00000100U + (rotr(W0, 7) ^ rotr(W0, 18) ^ (W0 &>&> 3U)) + W8 + (rotr(W13, 17) ^ rotr(W13, 19) ^ (W13 &>&> 10U));
A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + Ch(F, G, H) + K[31] + W15; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + Ma(D, B, C);
W0 = W0 + (rotr(W1, 7) ^ rotr(W1, 18) ^ (W1 &>&> 3U)) + W9 + (rotr(W14, 17) ^ rotr(W14, 19) ^ (W14 &>&> 10U));
H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + Ch(E, F, G) + K[32] + W0; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + Ma(C, A, B);
W1 = W1 + (rotr(W2, 7) ^ rotr(W2, 18) ^ (W2 &>&> 3U)) + W10 + (rotr(W15, 17) ^ rotr(W15, 19) ^ (W15 &>&> 10U));
G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + Ch(D, E, F) + K[33] + W1; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + Ma(B, H, A);
W2 = W2 + (rotr(W3, 7) ^ rotr(W3, 18) ^ (W3 &>&> 3U)) + W11 + (rotr(W0, 17) ^ rotr(W0, 19) ^ (W0 &>&> 10U));
F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + Ch(C, D, E) + K[34] + W2; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + Ma(A, G, H);
W3 = W3 + (rotr(W4, 7) ^ rotr(W4, 18) ^ (W4 &>&> 3U)) + W12 + (rotr(W1, 17) ^ rotr(W1, 19) ^ (W1 &>&> 10U));
E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + Ch(B, C, D) + K[35] + W3; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + Ma(H, F, G);
W4 = W4 + (rotr(W5, 7) ^ rotr(W5, 18) ^ (W5 &>&> 3U)) + W13 + (rotr(W2, 17) ^ rotr(W2, 19) ^ (W2 &>&> 10U));
D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + Ch(A, B, C) + K[36] + W4; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + Ma(G, E, F);
W5 = W5 + (rotr(W6, 7) ^ rotr(W6, 18) ^ (W6 &>&> 3U)) + W14 + (rotr(W3, 17) ^ rotr(W3, 19) ^ (W3 &>&> 10U));
C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + Ch(H, A, B) + K[37] + W5; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + Ma(F, D, E);
W6 = W6 + (rotr(W7, 7) ^ rotr(W7, 18) ^ (W7 &>&> 3U)) + W15 + (rotr(W4, 17) ^ rotr(W4, 19) ^ (W4 &>&> 10U));
B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + Ch(G, H, A) + K[38] + W6; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + Ma(E, C, D);
W7 = W7 + (rotr(W8, 7) ^ rotr(W8, 18) ^ (W8 &>&> 3U)) + W0 + (rotr(W5, 17) ^ rotr(W5, 19) ^ (W5 &>&> 10U));
A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + Ch(F, G, H) + K[39] + W7; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + Ma(D, B, C);
W8 = W8 + (rotr(W9, 7) ^ rotr(W9, 18) ^ (W9 &>&> 3U)) + W1 + (rotr(W6, 17) ^ rotr(W6, 19) ^ (W6 &>&> 10U));
H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + Ch(E, F, G) + K[40] + W8; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + Ma(C, A, B);
W9 = W9 + (rotr(W10, 7) ^ rotr(W10, 18) ^ (W10 &>&> 3U)) + W2 + (rotr(W7, 17) ^ rotr(W7, 19) ^ (W7 &>&> 10U));
G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + Ch(D, E, F) + K[41] + W9; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + Ma(B, H, A);
W10 = W10 + (rotr(W11, 7) ^ rotr(W11, 18) ^ (W11 &>&> 3U)) + W3 + (rotr(W8, 17) ^ rotr(W8, 19) ^ (W8 &>&> 10U));
F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + Ch(C, D, E) + K[42] + W10; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + Ma(A, G, H);
W11 = W11 + (rotr(W12, 7) ^ rotr(W12, 18) ^ (W12 &>&> 3U)) + W4 + (rotr(W9, 17) ^ rotr(W9, 19) ^ (W9 &>&> 10U));
E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + Ch(B, C, D) + K[43] + W11; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + Ma(H, F, G);
W12 = W12 + (rotr(W13, 7) ^ rotr(W13, 18) ^ (W13 &>&> 3U)) + W5 + (rotr(W10, 17) ^ rotr(W10, 19) ^ (W10 &>&> 10U));
D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + Ch(A, B, C) + K[44] + W12; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + Ma(G, E, F);
W13 = W13 + (rotr(W14, 7) ^ rotr(W14, 18) ^ (W14 &>&> 3U)) + W6 + (rotr(W11, 17) ^ rotr(W11, 19) ^ (W11 &>&> 10U));
C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + Ch(H, A, B) + K[45] + W13; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + Ma(F, D, E);
W14 = W14 + (rotr(W15, 7) ^ rotr(W15, 18) ^ (W15 &>&> 3U)) + W7 + (rotr(W12, 17) ^ rotr(W12, 19) ^ (W12 &>&> 10U));
B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + Ch(G, H, A) + K[46] + W14; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + Ma(E, C, D);
W15 = W15 + (rotr(W0, 7) ^ rotr(W0, 18) ^ (W0 &>&> 3U)) + W8 + (rotr(W13, 17) ^ rotr(W13, 19) ^ (W13 &>&> 10U));
A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + Ch(F, G, H) + K[47] + W15; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + Ma(D, B, C);
W0 = W0 + (rotr(W1, 7) ^ rotr(W1, 18) ^ (W1 &>&> 3U)) + W9 + (rotr(W14, 17) ^ rotr(W14, 19) ^ (W14 &>&> 10U));
H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + Ch(E, F, G) + K[48] + W0; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + Ma(C, A, B);
W1 = W1 + (rotr(W2, 7) ^ rotr(W2, 18) ^ (W2 &>&> 3U)) + W10 + (rotr(W15, 17) ^ rotr(W15, 19) ^ (W15 &>&> 10U));
G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + Ch(D, E, F) + K[49] + W1; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + Ma(B, H, A);
W2 = W2 + (rotr(W3, 7) ^ rotr(W3, 18) ^ (W3 &>&> 3U)) + W11 + (rotr(W0, 17) ^ rotr(W0, 19) ^ (W0 &>&> 10U));
F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + Ch(C, D, E) + K[50] + W2; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + Ma(A, G, H);
W3 = W3 + (rotr(W4, 7) ^ rotr(W4, 18) ^ (W4 &>&> 3U)) + W12 + (rotr(W1, 17) ^ rotr(W1, 19) ^ (W1 &>&> 10U));
E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + Ch(B, C, D) + K[51] + W3; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + Ma(H, F, G);
W4 = W4 + (rotr(W5, 7) ^ rotr(W5, 18) ^ (W5 &>&> 3U)) + W13 + (rotr(W2, 17) ^ rotr(W2, 19) ^ (W2 &>&> 10U));
D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + Ch(A, B, C) + K[52] + W4; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + Ma(G, E, F);
W5 = W5 + (rotr(W6, 7) ^ rotr(W6, 18) ^ (W6 &>&> 3U)) + W14 + (rotr(W3, 17) ^ rotr(W3, 19) ^ (W3 &>&> 10U));
C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + Ch(H, A, B) + K[53] + W5; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + Ma(F, D, E);
W6 = W6 + (rotr(W7, 7) ^ rotr(W7, 18) ^ (W7 &>&> 3U)) + W15 + (rotr(W4, 17) ^ rotr(W4, 19) ^ (W4 &>&> 10U));
B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + Ch(G, H, A) + K[54] + W6; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + Ma(E, C, D);
W7 = W7 + (rotr(W8, 7) ^ rotr(W8, 18) ^ (W8 &>&> 3U)) + W0 + (rotr(W5, 17) ^ rotr(W5, 19) ^ (W5 &>&> 10U));
A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + Ch(F, G, H) + K[55] + W7; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + Ma(D, B, C);
W8 = W8 + (rotr(W9, 7) ^ rotr(W9, 18) ^ (W9 &>&> 3U)) + W1 + (rotr(W6, 17) ^ rotr(W6, 19) ^ (W6 &>&> 10U));
H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + Ch(E, F, G) + K[56] + W8; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + Ma(C, A, B);
W9 = W9 + (rotr(W10, 7) ^ rotr(W10, 18) ^ (W10 &>&> 3U)) + W2 + (rotr(W7, 17) ^ rotr(W7, 19) ^ (W7 &>&> 10U));
G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + Ch(D, E, F) + K[57] + W9; C = C + G;
W10 = W10 + (rotr(W11, 7) ^ rotr(W11, 18) ^ (W11 &>&> 3U)) + W3 + (rotr(W8, 17) ^ rotr(W8, 19) ^ (W8 &>&> 10U));
F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + Ch(C, D, E) + K[58] + W10; B = B + F;
W11 = W11 + (rotr(W12, 7) ^ rotr(W12, 18) ^ (W12 &>&> 3U)) + W4 + (rotr(W9, 17) ^ rotr(W9, 19) ^ (W9 &>&> 10U));
E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + Ch(B, C, D) + K[59] + W11; A = A + E;
W12 = W12 + (rotr(W13, 7) ^ rotr(W13, 18) ^ (W13 &>&> 3U)) + W5 + (rotr(W10, 17) ^ rotr(W10, 19) ^ (W10 &>&> 10U));
H = H + D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + Ch(A, B, C) + K[60] + W12;

H+=0x5be0cd19U;

#ifdef VECTORS4
if (H.x == 0)
{
output[WORKSIZE] = output[get_local_id(0)] = nonce.x;
}
else if (H.y == 0)
{
output[WORKSIZE] = output[get_local_id(0)] = nonce.y;
}
else if (H.z == 0)
{
output[WORKSIZE] = output[get_local_id(0)] = nonce.z;
}
else if (H.w == 0)
{
output[WORKSIZE] = output[get_local_id(0)] = nonce.w;
}
#elif defined VECTORS
if (H.x == 0)
{
output[WORKSIZE] = output[get_local_id(0)] = nonce.x;
}
else if (H.y == 0)
{
output[WORKSIZE] = output[get_local_id(0)] = nonce.y;
}
#else
if (H == 0)
{
output[WORKSIZE] = output[get_local_id(0)] = nonce;
}
#endif
}

很明顯可以看出

  1. 全是整形運算
  2. 幾乎都是硬算
  3. IO非常少
  4. 分支流程非常少

這種傻大粗的程序,需要的是滿是ALU的硬體,對於分支預測、IO帶寬等常規來說重要的能力,倒是無所謂了。ASIC/FPGA上,這個可以堆晶體管實現。AMD的顯卡,同價位的情況下ALU遠多於NV的顯卡,所以也更適合。我用CUDA實現過類似的挖礦,在NV卡上跑,只有AMD的1/3到1/2速度,連電錢都賺不回來。


老規矩,先問是不是,再問為什麼。

利益相關:計算機硬體愛好者,閑余時間參與挖礦。老舊的設備就不獻醜了,新設備見圖。

簡略版:

Q: 是不是礦機除了asic用的都是amd顯卡而不是nvidia顯卡?

A: 不是。

詳細版:

目前挖礦最流行的大概是四個幣種,BTC、LTC、ETH、ZEC。前兩種都有了專門的ASIC礦機,顯卡挖礦收益為負(考慮到設備折舊、電費等),後兩種是顯卡挖礦的主要陣地。

ETH挖礦是典型的IO密集型場景,其演算法為 DaggerHashimoto 。HashRate(下文稱為算力)極大依賴於顯存的延遲(也就是參數上的時序)。網上流傳著很多Polaris架構的「雞血」BIOS,查看後發現都是通過修改顯存頻率和時序實現的。此種演算法是Polaris的主陣地,因為Polaris的GRRD5顯存延時較低,算力出色。同時,Pascal的GTX1060和GTX1070在此種演算法里也有較好的表現。但是Fuji和GTX1080等顯卡在此種演算法下表現很差,因為HBM/GDDR5X的延遲比GDDR5高的多。至於每瓦收益,我沒有做比較,原因會在下面說明。

ZEC挖礦是典型的運算密集型場景,其演算法為Equihash。此種演算法對顯存頻率和時序的要求不高,同款顯卡的HashRate和主頻基本是線性相關。在這種演算法下,RX480的算力大約是300Sol/s,GTX1080的算力大約是520Sol/s,GTX1080Ti的算力大約是730Sol/s。Pascal架構大約是2.6Sol/Watt,而Polaris架構大約是2.0Sol/Watt。

結合目前的幣價,A卡幾乎都在挖ETH,1080/1080Ti幾乎都在挖ZEC。為什麼?因為收益高啊。A卡挖ZEC收益驟減,1080/1080Ti不適合挖ETH。比較值得關注的是1070,在目前的時間點上挖ETH和ZEC收益幾乎一致(ZEC略高),可以根據幣價選擇幣種。有一點需要注意的是,RX480的六卡礦機,工作功耗是高於1000W的,而1070的六卡礦機,工作功耗不到900W,公版甚至不到800W。關於功耗這個數據請不要和我辯,您自己組了就知道了。

撕逼前請先亮出ETH/ZEC虛擬貨幣地址、礦池鏈接並向我轉0.001個以太幣,大約摺合1.52元人民幣,我會通過支付寶向您轉回2元。非礦工請勿在此閑聊。

我的以太幣地址: 0x35e9787ca1f52a574cb1b2ce105c2fbf3337345f

禁止轉載,一旦轉載即視為同意按照每百字1比特幣計算稿酬。我的比特幣地址: 184teDEU52LePd32VvjrQeDcQwkYjC8uH5

順便求個贊。


假設同價位的卡圖形渲染性能基本持平,而AMD的流處理器數量大約是NV的兩倍,原因在於基礎架構設計缺陷導致大多數情形GCN的核心不能高負荷運轉(做到這個是很難的,也是NVIDIA在圖形領域的傳統優勢),計算單元空閑周期多導致利用率低,而在做比圖形渲染簡單得多的挖礦操作時,核心多還是有優勢的,因為沒有利用率的問題,演算法並行化很直接,但這不說明GCN更好,而是真的落後。。


比特幣挖礦的SHA256運算是整型運算,和浮點沒關係!!!

不懂的不要強答!!!

那為什麼A卡和N卡的差距如此之大呢?比特幣挖掘器採用的是SHA-256,這是由美國國家安全局發明的一種安全散列函數,一般用於密碼加密與解密。這種演算法會進行大量32位整數循環右移運算,這個操作在AMD GPU那裡可以通過單一硬體指令實現,而在NVIDIA GPU那裡需要三次硬體指令來模擬(2移+1加),僅這一條就為AMD帶來額外的1.7倍運算效率優勢(大約1900指令來執行SHA-256壓縮操作,而不是NVIDIA的大約3250指令)。

轉碼/解密/挖礦!顯卡計算能力大對比_顯卡評測-泡泡網

另一方面目前同檔次的顯卡,A卡理論性能仍然是N卡的差不多兩倍,也有 Yubo Zhang 所說的因素。


感覺那個nv圖形架構師說的話好奇怪啊……不同架構流處理器效率不同很正常啊,費米單流處理器效率完爆開普勒,難道開普勒比費米落後?搞不懂什麼邏輯。泰坦流處理器數量比7870多不少,頻率也沒低多少,挖礦依舊不如7870,是不是n卡效率低呀……如果不同架構流處理器直接對比,那n卡比a卡少那麼多流處理器數量核心面積還差不多是不是工藝有點問題(手動眼斜),依照樓上二位的描述,一個n家流處理器頂2個a家流處理器,可是同性能a卡挖礦效率是n卡3倍啊……所以您二位要是故意的我也就不說啥了。他說的某些地方有一定道理,a卡相對來說注重流處理器數量不追求效率,因此在無腦暴力的挖礦演算法面前有一定優勢。但和gcn架構是否比帕斯卡馬克思韋開普勒落後沒有直接關係,最近一代rx580性能功耗比遠低於1070和a家用的gf的辣雞工藝有一定關係,具體我還是不太了解,只是您要挖礦,買a卡沒錯,您要玩遊戲,選適合自己的就好


可以看一下zcash的equihash,計算協議是內存依賴的所以沒法兒做asic。另外n卡挖zcash也很吊。


因為AMD垃圾啊^_^

在圖形計算領域,AMD要比Nvidia多使用接近兩倍的核心單元才能夠在效能上和Nvidia打成平手。

而在挖礦專註於浮點運算速度項目上,核心單元數量又是最重要的影響因素。so~

,挖礦用AMD沒毛病!


不作為晶片與演演算法相關專業人士,以小投資身份研究加密貨幣身份回答

所知SHA-256與Scrypt演算法都是屬於ASIC能建構的演算方案,連去What to mine的官網看了一下,標註為ASIC挖礦的演演算法還有X11(達世)與Quark以及Qubit等共五種,不過後者這兩種我長期來看沒有特別突出的幣種,暫時列入然並卵的行列有待觀察。

至於樓主標題有點小題大作了,A卡N卡都可挖,相對效能不同,有那麼一點標題殺人的意味,否則我兩位朋友就不會大量進貨N卡的P106做為礦場的主角了,因為N卡相對於A卡有更容易「管理」的特色,所謂管理不僅是安裝上的難度問題,也有著後續溫度控制上N卡明顯優於A卡的對比在(我個人各有N1060 6GB與A470 4GB的設備對照。

A卡當然具備較高效能的優點,這點當然是無可厚非的,同時使用Claymore的挖礦軟體對照下,A卡具備在雙挖同時算力不降反能微幅上升的補正效益(當然dcri值需要微調,同時特別需要注意溫度管理,寧可不毀卡,莫要貪心),所以稍具電腦管理知識經驗的人常常選擇A卡,然則我仍對N卡具備一定的信賴度(穩定性)。大致我是這麼對兩種卡別的認定的。

也可能樓主發文的時候基於P106尚未濫觴的同時,所以在評估上稍有差異,那麼就是歷史技術設備的差異了,畢竟加密貨幣真正紅起來也仍不過三四年時間,能挺得住多久?雖然我很想看好他,也真的在漸進投入,真要問十年後,個人還是覺得且行且觀察吧。


不聊什麼硬體GPU品牌和理論知識了,目前純硬礦工周期維護成本都太高,只談談如何賺錢幣圈無非兩種,炒幣 or 挖礦,我個人是同時進行,一個短期可以利滾利,一個長期穩賺不賠現在都是雲挖礦了,我主挖比特幣,直接網站購買完算力坐等每日收益,有興趣的看下去吧

個人一年的炒幣情況

都是今年的事,有被套超過4個月的,有盈利,別說什麼入行晚,我來的夠晚了,只你要拿的住,注意止盈止損就能賺錢,大投入幣種如下,其餘波段的,中期來來回回的小幣就不說了

BTC 比特幣,我2W2買的 + 挖礦的到現在還拿著 : )

LTC 萊特幣,358 買入,1600拋了,12月最新價格2200左右

NEO 螞蟻,19追高買入,280賣出,12月最高價350左右

EOS 9塊買入,28賣出,31接回來被套了很久,直到12月份55又賣出,12月最高價58左右

推薦幣:TNB / KNC /BTS / VEN / ETC / ETH / RCN / VEN / RXP / MANA / QTUM

註:推薦只是建議,不構成投資指點,不過這上面的幣你自己看看一周的漲幅

—————————————————————————————

2017-12-09更新

上面這些幣又他媽新高了,漲幅200%+

炒幣和挖礦一定要同時進行,確保不會踏空,交易平台地址如下

最新國內靠譜比特幣交易平台大全

—————————————————————————————

挖礦

(1)普通挖礦

CPU挖礦 -&> GPU挖礦 -&> FPGA挖礦 -&> ASIC挖礦 -&> 大規模集群挖礦

背景資料:當然CPU挖礦時代早已過去了,算力不足,為什麼你想配電腦買不到正常價位的顯卡?因為都被平民級礦工囤走了,普通挖礦就是指購買物理設備自行安放進行挖礦牽扯到(供電費,主板,顯卡,配件,噪音,散熱,佔地)或者你配置好礦機託管到天價機房裡

挖礦速度,專業的說法叫算力,就是計算機每秒產生hash碰撞的能力。也就是說,我們手裡的礦機每秒能做多少次hash碰撞,就是算力。算力就是挖比特幣的能力,算力越高,挖得比特幣越多,回報越高

註:礦機投資回本後剩下的收益都是穩定的,沒有炒幣風險大,個人推薦挖礦和炒幣同時進行

(2)雲挖礦

這個就一聽名字就比較高端了,屬於購買後只要填寫好貨幣錢包地址就行,不用你操心

背景資料:雲礦機是唯一一個提供低進入成本低,風險低,費用低的挖礦方式,有別於傳統的需要採購高配置的硬體並進行專業的維護的挖礦模型。由於挖礦的複雜性和外部因素依賴性(如開採困難,比特幣匯率,設備成本等),幾乎不可能預測個人的收益的多少,故云挖礦是一種成本低,風險低,費用低的挖礦方式,不想自己組礦機就可以在該平台購買挖礦算力,挖掘自己的加密貨幣

推薦的海外雲挖礦平台,(需翻牆後才能註冊)

收益指數:

操作難度:容易

1)Hashflare 【強烈推薦,收益高,頁面簡潔,目前已穩定挖了6個比特幣】

  • 註冊網址:https://hashflare.io

簡介:HashFlare是由HashCoins團隊的挖礦演算法專家帶來的一種新領域的雲礦機服務。團隊3年前從比特幣開始參與數字貨幣挖掘領域,希望有助於礦機服務的開發並在隨後發展、建立和採用比特幣的匯率。不難預測的是比特幣作為主要的顛覆性技術存在了幾十年。Hashflare為挖礦算力販售平台可以挖掘比特幣、以太幣、達世幣等等

購買流程:先選購算力,比特幣或信用卡支付然後選擇雲挖礦幣種就行了,目前該網站支持的挖礦演算法有:SHA256,Scrypt,X11等5種,能夠挖BTC,LTC,DASH,ZEC等等。Hashflare目前建議購買 SHA-256 比特幣挖礦合約,每天收益最高!! 約為0.5% !! 月收益約為15%,我自己也用的是這個,目前挖了6個btc

註:信用卡支付每日購買限額好像是3000USDT美元(2W人民幣),BTC支付沒有支付限額,自己選擇合適的方式吧,價格差不多

個人收益情況

購買算力:100TH/s

24小時比特幣收益:0.01565671個,按照現價128000計算,每天2004RMB

(很多人私信給我說漲價了,的確漲了,只能怪你們下手慢了,撐死膽大的餓死膽小的)

(每日收益和比特幣現價是有關係的,但是長期看漲的話不要關注現價,關注有多少個幣)

後言:凡是承諾每天巨額回報的雲挖礦網站都是騙子

2)Genesis Mining

收益指數:

操作難度:一般

  • 註冊網站:https://www.genesis-mining.com

3%優惠折扣代碼:NIbQCY

簡介:Genesis Mining創建於2013年,是全球最大的雲算力供應商之一。聯合創始人Marco是比特幣的早期用戶,致力於發展雲算力服務市場,為廣大客戶提供一個公開透明的平台。 Genesis Mining同時也積极參加各類比特幣相關的會議活動。Genesis mining公司挖礦算力販售可以挖掘比特幣, 以太幣 , 門羅幣等

購買流程:先選定你要選購挖礦的算力的大小,然後比特幣支付雲挖礦費用,之後就可以自動挖礦了。支持挖礦的幣種有btc,Zec,ltc,dash,eth和門羅6個幣種。輸入我的推廣碼: IWkGaC 購買任何挖礦算力都會折扣3%。 9月8號才過2天預售的比特幣挖礦合約就被賣光了!下次請早,合約開始販售時,一樣會在第一時間郵件通知

後言:凡是承諾每天巨額回報的雲挖礦網站都是騙子

推廣閱讀:想去交易所炒幣的下面是最齊的

最新國內靠譜比特幣交易平台大全

更多乾貨歡迎加入收費小密圈:微信掃二維碼加群


一句大白話 a卡更擅長大量簡單的運算 n卡擅長少量複雜的運算


划算,另外有全新570礦機出手


首先說,我是所答非所問。僅占坑。

挖礦永遠是幣價、算力和設備的最優選擇。目前,從算力和幣價來看,比特幣是回本比較慢的選擇,用最好的S9礦機,最快150天回本,而且有專業礦機被淘汰變垃圾的風險,但幣價還是比較穩應該會最後崩盤;萊特幣是回本最快的,用最快的L3+,大概40天回本,但同樣是要預定,有變垃圾風險,但幣價穩;以太幣在rx470,480是1500塊的時候是不錯,但現在580要3000RMB也買不到,回本周期也大概80天往上,優勢就是顯卡挖不了礦還有一定殘值,風險就是以太以後很可能宣布要不能挖了;零幣ZCASH在目前1070都過3000的現狀下,最好的選擇就是1080Ti,一個是還能買到,二是價格在5500左右還行,100多天回本,優點就是零幣2016年底才出來,鑒於影子經紀人都認可,這個東西還是有前途的,短期沒有不能挖的隱患,但缺點是儘管設計宣稱演算法抵制ASIC礦機,但也不排除比1080Ti更經濟礦機的出現,到時候算力增長過快,擠壓挖礦空間。

實時上,現在這個時間不論是專業礦機還是顯卡都統統很難買到,大家都知道無論什麼投資能在半年到一年就回本簡直就是暴力了,所以不要買期貨,不要猶豫,在幣價最高的時候抓緊投資,回本,之後再說。誰知道S9在9月份到貨的時候幣價還能不能在這個位置了。

目前一個6*1080Ti礦機,成本32K,挖了不到一個月,日平均收益260RMB(這幾天跌了),哪位大神幫助設計一個比1080Ti挖ZCASH更經濟礦機!優化演算法也行!跪謝! @叛逆者


推薦閱讀:

到底 CPU 的 64 位技術指的是什麼?64 位系統是否是指對此的支持?
個人電腦領域:為什麼現在AMD CPU主頻高、核心多,而實際性能卻低於intel CPU呢?
如何評價AMD織女星(Vega)新顯卡?
怎麼看待 AMD 在北美的 Ryzen 7 發布會?接下來AMD的CPU市場趨勢會如何?
組裝新電腦有沒有必要用 AMD?

TAG:AMD | 圖形處理器GPU | CUDA | 比特幣Bitcoin | 比特幣礦機 |