21 #include "runtime.hpp"
27 #include "algorithm.hpp"
31 namespace STL = STL_NAMESPACE;
37 #define blk0(i) (W[i] = buffer_[i])
38 #define blk1(i) (W[i&15] = \
39 rotlFixed(W[(i+13)&15]^W[(i+8)&15]^W[(i+2)&15]^W[i&15],1))
41 #define f1(x,y,z) (z^(x &(y^z)))
42 #define f2(x,y,z) (x^y^z)
43 #define f3(x,y,z) ((x&y)|(z&(x|y)))
44 #define f4(x,y,z) (x^y^z)
47 #define R0(v,w,x,y,z,i) z+= f1(w,x,y) + blk0(i) + 0x5A827999+ \
48 rotlFixed(v,5); w = rotlFixed(w,30);
49 #define R1(v,w,x,y,z,i) z+= f1(w,x,y) + blk1(i) + 0x5A827999+ \
50 rotlFixed(v,5); w = rotlFixed(w,30);
51 #define R2(v,w,x,y,z,i) z+= f2(w,x,y) + blk1(i) + 0x6ED9EBA1+ \
52 rotlFixed(v,5); w = rotlFixed(w,30);
53 #define R3(v,w,x,y,z,i) z+= f3(w,x,y) + blk1(i) + 0x8F1BBCDC+ \
54 rotlFixed(v,5); w = rotlFixed(w,30);
55 #define R4(v,w,x,y,z,i) z+= f4(w,x,y) + blk1(i) + 0xCA62C1D6+ \
56 rotlFixed(v,5); w = rotlFixed(w,30);
61 digest_[0] = 0x67452301L;
62 digest_[1] = 0xEFCDAB89L;
63 digest_[2] = 0x98BADCFEL;
64 digest_[3] = 0x10325476L;
65 digest_[4] = 0xC3D2E1F0L;
74 digest_[0] = 0x6A09E667L;
75 digest_[1] = 0xBB67AE85L;
76 digest_[2] = 0x3C6EF372L;
77 digest_[3] = 0xA54FF53AL;
78 digest_[4] = 0x510E527FL;
79 digest_[5] = 0x9B05688CL;
80 digest_[6] = 0x1F83D9ABL;
81 digest_[7] = 0x5BE0CD19L;
91 digest_[0] = 0xc1059ed8;
92 digest_[1] = 0x367cd507;
93 digest_[2] = 0x3070dd17;
94 digest_[3] = 0xf70e5939;
95 digest_[4] = 0xffc00b31;
96 digest_[5] = 0x68581511;
97 digest_[6] = 0x64f98fa7;
98 digest_[7] = 0xbefa4fa4;
106 #ifdef WORD64_AVAILABLE
110 digest_[0] = W64LIT(0x6a09e667f3bcc908);
111 digest_[1] = W64LIT(0xbb67ae8584caa73b);
112 digest_[2] = W64LIT(0x3c6ef372fe94f82b);
113 digest_[3] = W64LIT(0xa54ff53a5f1d36f1);
114 digest_[4] = W64LIT(0x510e527fade682d1);
115 digest_[5] = W64LIT(0x9b05688c2b3e6c1f);
116 digest_[6] = W64LIT(0x1f83d9abfb41bd6b);
117 digest_[7] = W64LIT(0x5be0cd19137e2179);
127 digest_[0] = W64LIT(0xcbbb9d5dc1059ed8);
128 digest_[1] = W64LIT(0x629a292a367cd507);
129 digest_[2] = W64LIT(0x9159015a3070dd17);
130 digest_[3] = W64LIT(0x152fecd8f70e5939);
131 digest_[4] = W64LIT(0x67332667ffc00b31);
132 digest_[5] = W64LIT(0x8eb44a8768581511);
133 digest_[6] = W64LIT(0xdb0c2e0d64f98fa7);
134 digest_[7] = W64LIT(0x47b5481dbefa4fa4);
141 #endif // WORD64_AVAILABLE
144 SHA::SHA(
const SHA& that) : HASHwithTransform(DIGEST_SIZE / sizeof(word32),
147 buffLen_ = that.buffLen_;
148 loLen_ = that.loLen_;
149 hiLen_ = that.hiLen_;
151 memcpy(digest_, that.digest_, DIGEST_SIZE);
152 memcpy(buffer_, that.buffer_, BLOCK_SIZE);
156 SHA256::SHA256(
const SHA256& that) : HASHwithTransform(DIGEST_SIZE /
157 sizeof(word32), BLOCK_SIZE)
159 buffLen_ = that.buffLen_;
160 loLen_ = that.loLen_;
161 hiLen_ = that.hiLen_;
163 memcpy(digest_, that.digest_, DIGEST_SIZE);
164 memcpy(buffer_, that.buffer_, BLOCK_SIZE);
168 SHA224::SHA224(
const SHA224& that) : HASHwithTransform(SHA256::DIGEST_SIZE /
169 sizeof(word32), BLOCK_SIZE)
171 buffLen_ = that.buffLen_;
172 loLen_ = that.loLen_;
173 hiLen_ = that.hiLen_;
175 memcpy(digest_, that.digest_, DIGEST_SIZE);
176 memcpy(buffer_, that.buffer_, BLOCK_SIZE);
180 #ifdef WORD64_AVAILABLE
182 SHA512::SHA512(
const SHA512& that) : HASH64withTransform(DIGEST_SIZE /
183 sizeof(word64), BLOCK_SIZE)
185 buffLen_ = that.buffLen_;
186 loLen_ = that.loLen_;
187 hiLen_ = that.hiLen_;
189 memcpy(digest_, that.digest_, DIGEST_SIZE);
190 memcpy(buffer_, that.buffer_, BLOCK_SIZE);
194 SHA384::SHA384(
const SHA384& that) : HASH64withTransform(SHA512::DIGEST_SIZE /
195 sizeof(word64), BLOCK_SIZE)
197 buffLen_ = that.buffLen_;
198 loLen_ = that.loLen_;
199 hiLen_ = that.hiLen_;
201 memcpy(digest_, that.digest_, DIGEST_SIZE);
202 memcpy(buffer_, that.buffer_, BLOCK_SIZE);
205 #endif // WORD64_AVAILABLE
208 SHA& SHA::operator= (
const SHA& that)
217 SHA256& SHA256::operator= (
const SHA256& that)
226 SHA224& SHA224::operator= (
const SHA224& that)
235 #ifdef WORD64_AVAILABLE
237 SHA512& SHA512::operator= (
const SHA512& that)
246 SHA384& SHA384::operator= (
const SHA384& that)
254 #endif // WORD64_AVAILABLE
257 void SHA::Swap(SHA& other)
259 STL::swap(loLen_, other.loLen_);
260 STL::swap(hiLen_, other.hiLen_);
261 STL::swap(buffLen_, other.buffLen_);
263 memcpy(digest_, other.digest_, DIGEST_SIZE);
264 memcpy(buffer_, other.buffer_, BLOCK_SIZE);
268 void SHA256::Swap(SHA256& other)
270 STL::swap(loLen_, other.loLen_);
271 STL::swap(hiLen_, other.hiLen_);
272 STL::swap(buffLen_, other.buffLen_);
274 memcpy(digest_, other.digest_, DIGEST_SIZE);
275 memcpy(buffer_, other.buffer_, BLOCK_SIZE);
279 void SHA224::Swap(SHA224& other)
281 STL::swap(loLen_, other.loLen_);
282 STL::swap(hiLen_, other.hiLen_);
283 STL::swap(buffLen_, other.buffLen_);
285 memcpy(digest_, other.digest_, DIGEST_SIZE);
286 memcpy(buffer_, other.buffer_, BLOCK_SIZE);
290 #ifdef WORD64_AVAILABLE
292 void SHA512::Swap(SHA512& other)
294 STL::swap(loLen_, other.loLen_);
295 STL::swap(hiLen_, other.hiLen_);
296 STL::swap(buffLen_, other.buffLen_);
298 memcpy(digest_, other.digest_, DIGEST_SIZE);
299 memcpy(buffer_, other.buffer_, BLOCK_SIZE);
303 void SHA384::Swap(SHA384& other)
305 STL::swap(loLen_, other.loLen_);
306 STL::swap(hiLen_, other.hiLen_);
307 STL::swap(buffLen_, other.buffLen_);
309 memcpy(digest_, other.digest_, DIGEST_SIZE);
310 memcpy(buffer_, other.buffer_, BLOCK_SIZE);
313 #endif // WORD64_AVIALABLE
319 void SHA::Update(
const byte* data, word32 len)
322 HASHwithTransform::Update(data, len);
326 byte* local =
reinterpret_cast<byte*
>(buffer_);
330 word32 add = min(len, BLOCK_SIZE - buffLen_);
331 memcpy(&local[buffLen_], data, add);
337 if (buffLen_ == BLOCK_SIZE) {
338 ByteReverse(local, local, BLOCK_SIZE);
340 AddLength(BLOCK_SIZE);
347 word32 times = len / BLOCK_SIZE;
349 AsmTransform(data, times);
350 const word32 add = BLOCK_SIZE * times;
359 memcpy(&local[buffLen_], data, len);
367 void SHA::Transform()
369 word32 W[BLOCK_SIZE /
sizeof(word32)];
372 word32 a = digest_[0];
373 word32 b = digest_[1];
374 word32 c = digest_[2];
375 word32 d = digest_[3];
376 word32 e = digest_[4];
379 R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3);
380 R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7);
381 R0(c,d,e,a,b, 8); R0(b,c,d,e,a, 9); R0(a,b,c,d,e,10); R0(e,a,b,c,d,11);
382 R0(d,e,a,b,c,12); R0(c,d,e,a,b,13); R0(b,c,d,e,a,14); R0(a,b,c,d,e,15);
384 R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19);
386 R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23);
387 R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27);
388 R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31);
389 R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35);
390 R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39);
392 R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43);
393 R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47);
394 R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51);
395 R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55);
396 R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59);
398 R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63);
399 R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67);
400 R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71);
401 R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75);
402 R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79);
412 a = b = c = d = e = 0;
413 memset(W, 0,
sizeof(W));
417 #define blk2(i) (W[i&15]+=s1(W[(i-2)&15])+W[(i-7)&15]+s0(W[(i-15)&15]))
419 #define Ch(x,y,z) (z^(x&(y^z)))
420 #define Maj(x,y,z) ((x&y)|(z&(x|y)))
422 #define a(i) T[(0-i)&7]
423 #define b(i) T[(1-i)&7]
424 #define c(i) T[(2-i)&7]
425 #define d(i) T[(3-i)&7]
426 #define e(i) T[(4-i)&7]
427 #define f(i) T[(5-i)&7]
428 #define g(i) T[(6-i)&7]
429 #define h(i) T[(7-i)&7]
431 #define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+K[i+j]+(j?blk2(i):blk0(i));\
432 d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i))
435 #define S0(x) (rotrFixed(x,2)^rotrFixed(x,13)^rotrFixed(x,22))
436 #define S1(x) (rotrFixed(x,6)^rotrFixed(x,11)^rotrFixed(x,25))
437 #define s0(x) (rotrFixed(x,7)^rotrFixed(x,18)^(x>>3))
438 #define s1(x) (rotrFixed(x,17)^rotrFixed(x,19)^(x>>10))
441 static const word32 K256[64] = {
442 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
443 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
444 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
445 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
446 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
447 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
448 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
449 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
450 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
451 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
452 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
453 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
454 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
455 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
456 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
457 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
461 static void Transform256(word32* digest_, word32* buffer_)
463 const word32* K = K256;
469 memcpy(T, digest_,
sizeof(T));
472 for (
unsigned int j = 0; j < 64; j += 16) {
473 R( 0); R( 1); R( 2); R( 3);
474 R( 4); R( 5); R( 6); R( 7);
475 R( 8); R( 9); R(10); R(11);
476 R(12); R(13); R(14); R(15);
490 memset(W, 0,
sizeof(W));
491 memset(T, 0,
sizeof(T));
502 void SHA256::Transform()
504 Transform256(digest_, buffer_);
508 void SHA224::Transform()
510 Transform256(digest_, buffer_);
514 #ifdef WORD64_AVAILABLE
516 static const word64 K512[80] = {
517 W64LIT(0x428a2f98d728ae22), W64LIT(0x7137449123ef65cd),
518 W64LIT(0xb5c0fbcfec4d3b2f), W64LIT(0xe9b5dba58189dbbc),
519 W64LIT(0x3956c25bf348b538), W64LIT(0x59f111f1b605d019),
520 W64LIT(0x923f82a4af194f9b), W64LIT(0xab1c5ed5da6d8118),
521 W64LIT(0xd807aa98a3030242), W64LIT(0x12835b0145706fbe),
522 W64LIT(0x243185be4ee4b28c), W64LIT(0x550c7dc3d5ffb4e2),
523 W64LIT(0x72be5d74f27b896f), W64LIT(0x80deb1fe3b1696b1),
524 W64LIT(0x9bdc06a725c71235), W64LIT(0xc19bf174cf692694),
525 W64LIT(0xe49b69c19ef14ad2), W64LIT(0xefbe4786384f25e3),
526 W64LIT(0x0fc19dc68b8cd5b5), W64LIT(0x240ca1cc77ac9c65),
527 W64LIT(0x2de92c6f592b0275), W64LIT(0x4a7484aa6ea6e483),
528 W64LIT(0x5cb0a9dcbd41fbd4), W64LIT(0x76f988da831153b5),
529 W64LIT(0x983e5152ee66dfab), W64LIT(0xa831c66d2db43210),
530 W64LIT(0xb00327c898fb213f), W64LIT(0xbf597fc7beef0ee4),
531 W64LIT(0xc6e00bf33da88fc2), W64LIT(0xd5a79147930aa725),
532 W64LIT(0x06ca6351e003826f), W64LIT(0x142929670a0e6e70),
533 W64LIT(0x27b70a8546d22ffc), W64LIT(0x2e1b21385c26c926),
534 W64LIT(0x4d2c6dfc5ac42aed), W64LIT(0x53380d139d95b3df),
535 W64LIT(0x650a73548baf63de), W64LIT(0x766a0abb3c77b2a8),
536 W64LIT(0x81c2c92e47edaee6), W64LIT(0x92722c851482353b),
537 W64LIT(0xa2bfe8a14cf10364), W64LIT(0xa81a664bbc423001),
538 W64LIT(0xc24b8b70d0f89791), W64LIT(0xc76c51a30654be30),
539 W64LIT(0xd192e819d6ef5218), W64LIT(0xd69906245565a910),
540 W64LIT(0xf40e35855771202a), W64LIT(0x106aa07032bbd1b8),
541 W64LIT(0x19a4c116b8d2d0c8), W64LIT(0x1e376c085141ab53),
542 W64LIT(0x2748774cdf8eeb99), W64LIT(0x34b0bcb5e19b48a8),
543 W64LIT(0x391c0cb3c5c95a63), W64LIT(0x4ed8aa4ae3418acb),
544 W64LIT(0x5b9cca4f7763e373), W64LIT(0x682e6ff3d6b2b8a3),
545 W64LIT(0x748f82ee5defb2fc), W64LIT(0x78a5636f43172f60),
546 W64LIT(0x84c87814a1f0ab72), W64LIT(0x8cc702081a6439ec),
547 W64LIT(0x90befffa23631e28), W64LIT(0xa4506cebde82bde9),
548 W64LIT(0xbef9a3f7b2c67915), W64LIT(0xc67178f2e372532b),
549 W64LIT(0xca273eceea26619c), W64LIT(0xd186b8c721c0c207),
550 W64LIT(0xeada7dd6cde0eb1e), W64LIT(0xf57d4f7fee6ed178),
551 W64LIT(0x06f067aa72176fba), W64LIT(0x0a637dc5a2c898a6),
552 W64LIT(0x113f9804bef90dae), W64LIT(0x1b710b35131c471b),
553 W64LIT(0x28db77f523047d84), W64LIT(0x32caab7b40c72493),
554 W64LIT(0x3c9ebe0a15c9bebc), W64LIT(0x431d67c49c100d4c),
555 W64LIT(0x4cc5d4becb3e42b6), W64LIT(0x597f299cfc657e2a),
556 W64LIT(0x5fcb6fab3ad6faec), W64LIT(0x6c44198c4a475817)
561 #define S0(x) (rotrFixed(x,28)^rotrFixed(x,34)^rotrFixed(x,39))
562 #define S1(x) (rotrFixed(x,14)^rotrFixed(x,18)^rotrFixed(x,41))
563 #define s0(x) (rotrFixed(x,1)^rotrFixed(x,8)^(x>>7))
564 #define s1(x) (rotrFixed(x,19)^rotrFixed(x,61)^(x>>6))
567 static void Transform512(word64* digest_, word64* buffer_)
569 const word64* K = K512;
575 memcpy(T, digest_,
sizeof(T));
578 for (
unsigned int j = 0; j < 80; j += 16) {
579 R( 0); R( 1); R( 2); R( 3);
580 R( 4); R( 5); R( 6); R( 7);
581 R( 8); R( 9); R(10); R(11);
582 R(12); R(13); R(14); R(15);
597 memset(W, 0,
sizeof(W));
598 memset(T, 0,
sizeof(T));
602 void SHA512::Transform()
604 Transform512(digest_, buffer_);
608 void SHA384::Transform()
610 Transform512(digest_, buffer_);
613 #endif // WORD64_AVIALABLE
620 #define ASMf1(x,y,z) \
635 #define ASMR0(v,w,x,y,z,i) \
637 AS2( mov edi, [esp + i * 4] ) \
640 AS2( lea z, [edi + z + 0x5A827999] ) \
677 #define ASMR1(v,w,x,y,z,i,W1,W2,W3,W4) \
678 AS2( mov edi, [esp + W1 * 4] ) \
680 AS2( xor edi, [esp + W2 * 4] ) \
682 AS2( xor edi, [esp + W3 * 4] ) \
684 AS2( xor edi, [esp + W4 * 4] ) \
687 AS2( mov [esp + W1 * 4], edi ) \
688 AS2( lea z, [edi + z + 0x5A827999] ) \
698 #define ASMR2(v,w,x,y,z,i,W1,W2,W3,W4) \
699 AS2( mov edi, [esp + W1 * 4] ) \
701 AS2( xor edi, [esp + W2 * 4] ) \
703 AS2( xor edi, [esp + W3 * 4] ) \
705 AS2( xor edi, [esp + W4 * 4] ) \
708 AS2( mov [esp + W1 * 4], edi ) \
709 AS2( lea z, [edi + z + 0x6ED9EBA1] ) \
719 #define ASMR3(v,w,x,y,z,i,W1,W2,W3,W4) \
720 AS2( mov edi, [esp + W1 * 4] ) \
722 AS2( xor edi, [esp + W2 * 4] ) \
724 AS2( xor edi, [esp + W3 * 4] ) \
726 AS2( xor edi, [esp + W4 * 4] ) \
727 AS2( movd mm0, esi ) \
730 AS2( mov [esp + W1 * 4], edi ) \
732 AS2( lea z, [edi + z + 0x8F1BBCDC] ) \
733 AS2( movd edi, mm0 ) \
744 #define ASMR4(v,w,x,y,z,i,W1,W2,W3,W4) \
745 AS2( mov edi, [esp + W1 * 4] ) \
747 AS2( xor edi, [esp + W2 * 4] ) \
749 AS2( xor edi, [esp + W3 * 4] ) \
751 AS2( xor edi, [esp + W4 * 4] ) \
754 AS2( mov [esp + W1 * 4], edi ) \
755 AS2( lea z, [edi + z + 0xCA62C1D6] ) \
765 void SHA::AsmTransform(
const byte* data, word32 times)
768 #define AS1(x) asm(#x);
769 #define AS2(x, y) asm(#x ", " #y);
772 asm(".intel_syntax noprefix"); \
773 AS2( movd mm3, edi ) \
774 AS2( movd mm4, ebx ) \
775 AS2( movd mm5, esi ) \
776 AS2( movd mm6, ebp ) \
777 AS2( mov ecx, DWORD PTR [ebp + 8] ) \
778 AS2( mov edi, DWORD PTR [ebp + 12] ) \
779 AS2( mov eax, DWORD PTR [ebp + 16] )
782 AS2( movd ebp, mm6 ) \
783 AS2( movd esi, mm5 ) \
784 AS2( movd ebx, mm4 ) \
785 AS2( mov esp, ebp ) \
786 AS2( movd edi, mm3 ) \
790 #define AS1(x) __asm x
791 #define AS2(x, y) __asm x, y
795 AS2( mov ebp, esp ) \
796 AS2( movd mm3, edi ) \
797 AS2( movd mm4, ebx ) \
798 AS2( movd mm5, esi ) \
799 AS2( movd mm6, ebp ) \
800 AS2( mov edi, data ) \
801 AS2( mov eax, times )
804 AS2( movd ebp, mm6 ) \
805 AS2( movd esi, mm5 ) \
806 AS2( movd ebx, mm4 ) \
807 AS2( movd edi, mm3 ) \
808 AS2( mov esp, ebp ) \
818 #ifdef OLD_GCC_OFFSET
834 AS2( mov eax, [edi] )
835 AS2( mov ebx, [edi + 4] )
836 AS2( mov ecx, [edi + 8] )
837 AS2( mov edx, [edi + 12] )
844 AS2( mov [esp], eax )
845 AS2( mov [esp + 4], ebx )
846 AS2( mov [esp + 8], ecx )
847 AS2( mov [esp + 12], edx )
850 AS2( mov eax, [edi + 16] )
851 AS2( mov ebx, [edi + 20] )
852 AS2( mov ecx, [edi + 24] )
853 AS2( mov edx, [edi + 28] )
860 AS2( mov [esp + 16], eax )
861 AS2( mov [esp + 20], ebx )
862 AS2( mov [esp + 24], ecx )
863 AS2( mov [esp + 28], edx )
867 AS2( mov eax, [edi + 32] )
868 AS2( mov ebx, [edi + 36] )
869 AS2( mov ecx, [edi + 40] )
870 AS2( mov edx, [edi + 44] )
877 AS2( mov [esp + 32], eax )
878 AS2( mov [esp + 36], ebx )
879 AS2( mov [esp + 40], ecx )
880 AS2( mov [esp + 44], edx )
884 AS2( mov eax, [edi + 48] )
885 AS2( mov ebx, [edi + 52] )
886 AS2( mov ecx, [edi + 56] )
887 AS2( mov edx, [edi + 60] )
894 AS2( mov [esp + 48], eax )
895 AS2( mov [esp + 52], ebx )
896 AS2( mov [esp + 56], ecx )
897 AS2( mov [esp + 60], edx )
899 AS2( mov [esp + 64], edi )
902 AS2( mov eax, [esi] )
903 AS2( mov ebx, [esi + 4] )
904 AS2( mov ecx, [esi + 8] )
905 AS2( mov edx, [esi + 12] )
906 AS2( mov ebp, [esi + 16] )
909 ASMR0(eax, ebx, ecx, edx, ebp, 0)
910 ASMR0(ebp, eax, ebx, ecx, edx, 1)
911 ASMR0(edx, ebp, eax, ebx, ecx, 2)
912 ASMR0(ecx, edx, ebp, eax, ebx, 3)
913 ASMR0(ebx, ecx, edx, ebp, eax, 4)
914 ASMR0(eax, ebx, ecx, edx, ebp, 5)
915 ASMR0(ebp, eax, ebx, ecx, edx, 6)
916 ASMR0(edx, ebp, eax, ebx, ecx, 7)
917 ASMR0(ecx, edx, ebp, eax, ebx, 8)
918 ASMR0(ebx, ecx, edx, ebp, eax, 9)
919 ASMR0(eax, ebx, ecx, edx, ebp, 10)
920 ASMR0(ebp, eax, ebx, ecx, edx, 11)
921 ASMR0(edx, ebp, eax, ebx, ecx, 12)
922 ASMR0(ecx, edx, ebp, eax, ebx, 13)
923 ASMR0(ebx, ecx, edx, ebp, eax, 14)
924 ASMR0(eax, ebx, ecx, edx, ebp, 15)
926 ASMR1(ebp, eax, ebx, ecx, edx, 16, 0, 2, 8, 13)
927 ASMR1(edx, ebp, eax, ebx, ecx, 17, 1, 3, 9, 14)
928 ASMR1(ecx, edx, ebp, eax, ebx, 18, 2, 4, 10, 15)
929 ASMR1(ebx, ecx, edx, ebp, eax, 19, 3, 5, 11, 0)
931 ASMR2(eax, ebx, ecx, edx, ebp, 20, 4, 6, 12, 1)
932 ASMR2(ebp, eax, ebx, ecx, edx, 21, 5, 7, 13, 2)
933 ASMR2(edx, ebp, eax, ebx, ecx, 22, 6, 8, 14, 3)
934 ASMR2(ecx, edx, ebp, eax, ebx, 23, 7, 9, 15, 4)
935 ASMR2(ebx, ecx, edx, ebp, eax, 24, 8, 10, 0, 5)
936 ASMR2(eax, ebx, ecx, edx, ebp, 25, 9, 11, 1, 6)
937 ASMR2(ebp, eax, ebx, ecx, edx, 26, 10, 12, 2, 7)
938 ASMR2(edx, ebp, eax, ebx, ecx, 27, 11, 13, 3, 8)
939 ASMR2(ecx, edx, ebp, eax, ebx, 28, 12, 14, 4, 9)
940 ASMR2(ebx, ecx, edx, ebp, eax, 29, 13, 15, 5, 10)
941 ASMR2(eax, ebx, ecx, edx, ebp, 30, 14, 0, 6, 11)
942 ASMR2(ebp, eax, ebx, ecx, edx, 31, 15, 1, 7, 12)
943 ASMR2(edx, ebp, eax, ebx, ecx, 32, 0, 2, 8, 13)
944 ASMR2(ecx, edx, ebp, eax, ebx, 33, 1, 3, 9, 14)
945 ASMR2(ebx, ecx, edx, ebp, eax, 34, 2, 4, 10, 15)
946 ASMR2(eax, ebx, ecx, edx, ebp, 35, 3, 5, 11, 0)
947 ASMR2(ebp, eax, ebx, ecx, edx, 36, 4, 6, 12, 1)
948 ASMR2(edx, ebp, eax, ebx, ecx, 37, 5, 7, 13, 2)
949 ASMR2(ecx, edx, ebp, eax, ebx, 38, 6, 8, 14, 3)
950 ASMR2(ebx, ecx, edx, ebp, eax, 39, 7, 9, 15, 4)
953 ASMR3(eax, ebx, ecx, edx, ebp, 40, 8, 10, 0, 5)
954 ASMR3(ebp, eax, ebx, ecx, edx, 41, 9, 11, 1, 6)
955 ASMR3(edx, ebp, eax, ebx, ecx, 42, 10, 12, 2, 7)
956 ASMR3(ecx, edx, ebp, eax, ebx, 43, 11, 13, 3, 8)
957 ASMR3(ebx, ecx, edx, ebp, eax, 44, 12, 14, 4, 9)
958 ASMR3(eax, ebx, ecx, edx, ebp, 45, 13, 15, 5, 10)
959 ASMR3(ebp, eax, ebx, ecx, edx, 46, 14, 0, 6, 11)
960 ASMR3(edx, ebp, eax, ebx, ecx, 47, 15, 1, 7, 12)
961 ASMR3(ecx, edx, ebp, eax, ebx, 48, 0, 2, 8, 13)
962 ASMR3(ebx, ecx, edx, ebp, eax, 49, 1, 3, 9, 14)
963 ASMR3(eax, ebx, ecx, edx, ebp, 50, 2, 4, 10, 15)
964 ASMR3(ebp, eax, ebx, ecx, edx, 51, 3, 5, 11, 0)
965 ASMR3(edx, ebp, eax, ebx, ecx, 52, 4, 6, 12, 1)
966 ASMR3(ecx, edx, ebp, eax, ebx, 53, 5, 7, 13, 2)
967 ASMR3(ebx, ecx, edx, ebp, eax, 54, 6, 8, 14, 3)
968 ASMR3(eax, ebx, ecx, edx, ebp, 55, 7, 9, 15, 4)
969 ASMR3(ebp, eax, ebx, ecx, edx, 56, 8, 10, 0, 5)
970 ASMR3(edx, ebp, eax, ebx, ecx, 57, 9, 11, 1, 6)
971 ASMR3(ecx, edx, ebp, eax, ebx, 58, 10, 12, 2, 7)
972 ASMR3(ebx, ecx, edx, ebp, eax, 59, 11, 13, 3, 8)
974 ASMR4(eax, ebx, ecx, edx, ebp, 60, 12, 14, 4, 9)
975 ASMR4(ebp, eax, ebx, ecx, edx, 61, 13, 15, 5, 10)
976 ASMR4(edx, ebp, eax, ebx, ecx, 62, 14, 0, 6, 11)
977 ASMR4(ecx, edx, ebp, eax, ebx, 63, 15, 1, 7, 12)
978 ASMR4(ebx, ecx, edx, ebp, eax, 64, 0, 2, 8, 13)
979 ASMR4(eax, ebx, ecx, edx, ebp, 65, 1, 3, 9, 14)
980 ASMR4(ebp, eax, ebx, ecx, edx, 66, 2, 4, 10, 15)
981 ASMR4(edx, ebp, eax, ebx, ecx, 67, 3, 5, 11, 0)
982 ASMR4(ecx, edx, ebp, eax, ebx, 68, 4, 6, 12, 1)
983 ASMR4(ebx, ecx, edx, ebp, eax, 69, 5, 7, 13, 2)
984 ASMR4(eax, ebx, ecx, edx, ebp, 70, 6, 8, 14, 3)
985 ASMR4(ebp, eax, ebx, ecx, edx, 71, 7, 9, 15, 4)
986 ASMR4(edx, ebp, eax, ebx, ecx, 72, 8, 10, 0, 5)
987 ASMR4(ecx, edx, ebp, eax, ebx, 73, 9, 11, 1, 6)
988 ASMR4(ebx, ecx, edx, ebp, eax, 74, 10, 12, 2, 7)
989 ASMR4(eax, ebx, ecx, edx, ebp, 75, 11, 13, 3, 8)
990 ASMR4(ebp, eax, ebx, ecx, edx, 76, 12, 14, 4, 9)
991 ASMR4(edx, ebp, eax, ebx, ecx, 77, 13, 15, 5, 10)
992 ASMR4(ecx, edx, ebp, eax, ebx, 78, 14, 0, 6, 11)
993 ASMR4(ebx, ecx, edx, ebp, eax, 79, 15, 1, 7, 12)
998 AS2( add [esi], eax )
999 AS2( add [esi + 4], ebx )
1000 AS2( add [esi + 8], ecx )
1001 AS2( add [esi + 12], edx )
1002 AS2( add [esi + 16], ebp )
1005 AS2( movd ebp, mm2 )
1007 AS2( mov edi, DWORD PTR [esp + 64] )
1010 AS2( mov [esp + 64], edi )
1013 AS2( movd mm2, ebp )
1014 AS1( jnz loopStart )
1021 #endif // DO_SHA_ASM