From e2967396ac73cb7410787886cdaf072a184ffc49 Mon Sep 17 00:00:00 2001 From: irungentoo Date: Tue, 2 Jul 2013 09:53:34 -0400 Subject: Added NaCl crypto library. --- nacl/MACROS | 56 + nacl/OPERATIONS | 11 + nacl/PROTOTYPES.c | 26 + nacl/PROTOTYPES.cpp | 17 + nacl/commandline/nacl-sha256.c | 64 + nacl/commandline/nacl-sha512.c | 64 + nacl/cpucycles/alpha.c | 80 + nacl/cpucycles/alpha.h | 27 + nacl/cpucycles/amd64cpuinfo.c | 16 + nacl/cpucycles/amd64cpuinfo.h | 27 + nacl/cpucycles/amd64cpuspeed.c | 25 + nacl/cpucycles/amd64cpuspeed.h | 27 + nacl/cpucycles/amd64tscfreq.c | 18 + nacl/cpucycles/amd64tscfreq.h | 27 + nacl/cpucycles/celllinux.c | 83 + nacl/cpucycles/celllinux.h | 27 + nacl/cpucycles/cortex.c | 73 + nacl/cpucycles/cortex.h | 27 + nacl/cpucycles/dev4ns.c | 62 + nacl/cpucycles/dev4ns.h | 27 + nacl/cpucycles/do | 105 + nacl/cpucycles/gettimeofday.c | 32 + nacl/cpucycles/gettimeofday.h | 27 + nacl/cpucycles/hppapstat.c | 26 + nacl/cpucycles/hppapstat.h | 27 + nacl/cpucycles/ia64cpuinfo.c | 15 + nacl/cpucycles/ia64cpuinfo.h | 27 + nacl/cpucycles/mips.c | 65 + nacl/cpucycles/mips.h | 27 + nacl/cpucycles/monotonic.c | 34 + nacl/cpucycles/monotonic.h | 27 + nacl/cpucycles/monotoniccpuinfo.c | 33 + nacl/cpucycles/monotoniccpuinfo.h | 27 + nacl/cpucycles/osfreq.c | 65 + nacl/cpucycles/powerpccpuinfo.c | 95 + nacl/cpucycles/powerpccpuinfo.h | 27 + nacl/cpucycles/powerpcmacos.c | 42 + nacl/cpucycles/powerpcmacos.h | 27 + nacl/cpucycles/sgi.c | 38 + nacl/cpucycles/sgi.h | 27 + nacl/cpucycles/sparc32cpuinfo.c | 16 + nacl/cpucycles/sparc32cpuinfo.h | 27 + nacl/cpucycles/sparccpuinfo.c | 15 + nacl/cpucycles/sparccpuinfo.h | 27 + nacl/cpucycles/test.c | 77 + nacl/cpucycles/x86cpuinfo.c | 15 + nacl/cpucycles/x86cpuinfo.h | 27 + nacl/cpucycles/x86cpuspeed.c | 24 + nacl/cpucycles/x86cpuspeed.h | 27 + nacl/cpucycles/x86estimate.c | 59 + nacl/cpucycles/x86estimate.h | 27 + nacl/cpucycles/x86tscfreq.c | 17 + nacl/cpucycles/x86tscfreq.h | 27 + nacl/cpuid/cbytes.c | 16 + nacl/cpuid/cpuid.c | 41 + nacl/cpuid/do | 37 + nacl/cpuid/unknown.c | 7 + nacl/cpuid/x86.c | 41 + nacl/crypto_auth/hmacsha256/checksum | 1 + nacl/crypto_auth/hmacsha256/ref/api.h | 2 + nacl/crypto_auth/hmacsha256/ref/hmac.c | 83 + nacl/crypto_auth/hmacsha256/ref/verify.c | 9 + nacl/crypto_auth/hmacsha256/used | 0 nacl/crypto_auth/hmacsha512256/checksum | 1 + nacl/crypto_auth/hmacsha512256/ref/api.h | 2 + nacl/crypto_auth/hmacsha512256/ref/hmac.c | 86 + nacl/crypto_auth/hmacsha512256/ref/verify.c | 9 + nacl/crypto_auth/hmacsha512256/selected | 0 nacl/crypto_auth/hmacsha512256/used | 0 nacl/crypto_auth/measure.c | 69 + nacl/crypto_auth/try.c | 119 + nacl/crypto_auth/wrapper-auth.cpp | 11 + nacl/crypto_auth/wrapper-verify.cpp | 14 + .../crypto_box/curve25519xsalsa20poly1305/checksum | 1 + .../curve25519xsalsa20poly1305/ref/after.c | 22 + .../curve25519xsalsa20poly1305/ref/api.h | 6 + .../curve25519xsalsa20poly1305/ref/before.c | 17 + .../curve25519xsalsa20poly1305/ref/box.c | 27 + .../curve25519xsalsa20poly1305/ref/keypair.c | 12 + .../crypto_box/curve25519xsalsa20poly1305/selected | 0 nacl/crypto_box/curve25519xsalsa20poly1305/used | 0 nacl/crypto_box/measure.c | 137 + nacl/crypto_box/try.c | 195 + nacl/crypto_box/wrapper-box.cpp | 24 + nacl/crypto_box/wrapper-keypair.cpp | 12 + nacl/crypto_box/wrapper-open.cpp | 27 + nacl/crypto_core/hsalsa20/checksum | 1 + nacl/crypto_core/hsalsa20/ref/api.h | 4 + nacl/crypto_core/hsalsa20/ref/core.c | 135 + nacl/crypto_core/hsalsa20/ref/implementors | 1 + nacl/crypto_core/hsalsa20/ref2/api.h | 4 + nacl/crypto_core/hsalsa20/ref2/core.c | 108 + nacl/crypto_core/hsalsa20/ref2/implementors | 1 + nacl/crypto_core/hsalsa20/used | 0 nacl/crypto_core/measure.c | 18 + nacl/crypto_core/salsa20/checksum | 1 + nacl/crypto_core/salsa20/ref/api.h | 4 + nacl/crypto_core/salsa20/ref/core.c | 134 + nacl/crypto_core/salsa20/ref/implementors | 1 + nacl/crypto_core/salsa20/used | 0 nacl/crypto_core/salsa2012/checksum | 1 + nacl/crypto_core/salsa2012/ref/api.h | 4 + nacl/crypto_core/salsa2012/ref/core.c | 134 + nacl/crypto_core/salsa2012/ref/implementors | 1 + nacl/crypto_core/salsa2012/used | 0 nacl/crypto_core/salsa208/checksum | 1 + nacl/crypto_core/salsa208/ref/api.h | 4 + nacl/crypto_core/salsa208/ref/core.c | 134 + nacl/crypto_core/salsa208/ref/implementors | 1 + nacl/crypto_core/salsa208/used | 0 nacl/crypto_core/try.c | 116 + nacl/crypto_core/wrapper-empty.cpp | 0 nacl/crypto_hash/measure.c | 66 + nacl/crypto_hash/sha256/checksum | 1 + nacl/crypto_hash/sha256/ref/api.h | 1 + nacl/crypto_hash/sha256/ref/hash.c | 69 + nacl/crypto_hash/sha256/ref/implementors | 1 + nacl/crypto_hash/sha256/used | 0 nacl/crypto_hash/sha512/checksum | 1 + nacl/crypto_hash/sha512/ref/api.h | 1 + nacl/crypto_hash/sha512/ref/hash.c | 71 + nacl/crypto_hash/sha512/ref/implementors | 1 + nacl/crypto_hash/sha512/selected | 0 nacl/crypto_hash/sha512/used | 0 nacl/crypto_hash/try.c | 77 + nacl/crypto_hash/wrapper-hash.cpp | 10 + nacl/crypto_hashblocks/measure.c | 18 + nacl/crypto_hashblocks/sha256/checksum | 1 + nacl/crypto_hashblocks/sha256/inplace/api.h | 2 + nacl/crypto_hashblocks/sha256/inplace/blocks.c | 228 + nacl/crypto_hashblocks/sha256/inplace/implementors | 1 + nacl/crypto_hashblocks/sha256/ref/api.h | 2 + nacl/crypto_hashblocks/sha256/ref/blocks.c | 212 + nacl/crypto_hashblocks/sha256/ref/implementors | 1 + nacl/crypto_hashblocks/sha256/used | 0 nacl/crypto_hashblocks/sha512/checksum | 1 + nacl/crypto_hashblocks/sha512/inplace/api.h | 2 + nacl/crypto_hashblocks/sha512/inplace/blocks.c | 256 + nacl/crypto_hashblocks/sha512/inplace/implementors | 1 + nacl/crypto_hashblocks/sha512/ref/api.h | 2 + nacl/crypto_hashblocks/sha512/ref/blocks.c | 239 + nacl/crypto_hashblocks/sha512/ref/implementors | 1 + nacl/crypto_hashblocks/sha512/selected | 0 nacl/crypto_hashblocks/sha512/used | 0 nacl/crypto_hashblocks/try.c | 79 + nacl/crypto_hashblocks/wrapper-empty.cpp | 0 nacl/crypto_onetimeauth/measure.c | 69 + nacl/crypto_onetimeauth/poly1305/53/api.h | 2 + nacl/crypto_onetimeauth/poly1305/53/auth.c | 1616 +++ nacl/crypto_onetimeauth/poly1305/53/verify.c | 9 + nacl/crypto_onetimeauth/poly1305/amd64/api.h | 2 + nacl/crypto_onetimeauth/poly1305/amd64/auth.s | 2787 ++++ nacl/crypto_onetimeauth/poly1305/amd64/constants.s | 85 + nacl/crypto_onetimeauth/poly1305/amd64/verify.c | 9 + nacl/crypto_onetimeauth/poly1305/checksum | 1 + nacl/crypto_onetimeauth/poly1305/ref/api.h | 2 + nacl/crypto_onetimeauth/poly1305/ref/auth.c | 104 + nacl/crypto_onetimeauth/poly1305/ref/verify.c | 9 + nacl/crypto_onetimeauth/poly1305/selected | 0 nacl/crypto_onetimeauth/poly1305/used | 0 nacl/crypto_onetimeauth/poly1305/x86/api.h | 2 + nacl/crypto_onetimeauth/poly1305/x86/auth.s | 2779 ++++ nacl/crypto_onetimeauth/poly1305/x86/constants.s | 85 + nacl/crypto_onetimeauth/poly1305/x86/verify.c | 9 + nacl/crypto_onetimeauth/try.c | 119 + nacl/crypto_onetimeauth/wrapper-auth.cpp | 11 + nacl/crypto_onetimeauth/wrapper-verify.cpp | 14 + nacl/crypto_scalarmult/curve25519/athlon/api.h | 2 + nacl/crypto_scalarmult/curve25519/athlon/base.c | 8 + nacl/crypto_scalarmult/curve25519/athlon/const.s | 114 + .../curve25519/athlon/fromdouble.s | 195 + .../curve25519/athlon/implementors | 1 + nacl/crypto_scalarmult/curve25519/athlon/init.s | 13 + .../crypto_scalarmult/curve25519/athlon/mainloop.s | 3990 ++++++ nacl/crypto_scalarmult/curve25519/athlon/mult.s | 410 + nacl/crypto_scalarmult/curve25519/athlon/smult.c | 91 + nacl/crypto_scalarmult/curve25519/athlon/square.s | 298 + .../crypto_scalarmult/curve25519/athlon/todouble.s | 144 + nacl/crypto_scalarmult/curve25519/checksum | 1 + nacl/crypto_scalarmult/curve25519/donna_c64/api.h | 2 + nacl/crypto_scalarmult/curve25519/donna_c64/base.c | 8 + .../curve25519/donna_c64/implementors | 1 + .../crypto_scalarmult/curve25519/donna_c64/smult.c | 477 + nacl/crypto_scalarmult/curve25519/ref/api.h | 2 + nacl/crypto_scalarmult/curve25519/ref/base.c | 16 + nacl/crypto_scalarmult/curve25519/ref/implementors | 1 + nacl/crypto_scalarmult/curve25519/ref/smult.c | 265 + nacl/crypto_scalarmult/curve25519/used | 0 nacl/crypto_scalarmult/measure.c | 61 + nacl/crypto_scalarmult/try.c | 126 + nacl/crypto_scalarmult/wrapper-base.cpp | 11 + nacl/crypto_scalarmult/wrapper-mult.cpp | 12 + nacl/crypto_secretbox/measure.c | 75 + nacl/crypto_secretbox/try.c | 129 + nacl/crypto_secretbox/wrapper-box.cpp | 19 + nacl/crypto_secretbox/wrapper-open.cpp | 22 + nacl/crypto_secretbox/xsalsa20poly1305/checksum | 1 + nacl/crypto_secretbox/xsalsa20poly1305/ref/api.h | 4 + nacl/crypto_secretbox/xsalsa20poly1305/ref/box.c | 35 + nacl/crypto_secretbox/xsalsa20poly1305/selected | 0 nacl/crypto_secretbox/xsalsa20poly1305/used | 0 nacl/crypto_sign/edwards25519sha512batch/ref/api.h | 3 + .../edwards25519sha512batch/ref/fe25519.c | 345 + .../edwards25519sha512batch/ref/fe25519.h | 54 + .../edwards25519sha512batch/ref/ge25519.c | 227 + .../edwards25519sha512batch/ref/ge25519.h | 34 + .../edwards25519sha512batch/ref/sc25519.c | 146 + .../edwards25519sha512batch/ref/sc25519.h | 51 + .../crypto_sign/edwards25519sha512batch/ref/sign.c | 103 + nacl/crypto_sign/edwards25519sha512batch/selected | 0 nacl/crypto_sign/edwards25519sha512batch/used | 0 nacl/crypto_sign/measure.c | 83 + nacl/crypto_sign/try.c | 86 + nacl/crypto_sign/wrapper-keypair.cpp | 12 + nacl/crypto_sign/wrapper-sign-open.cpp | 24 + nacl/crypto_sign/wrapper-sign.cpp | 23 + nacl/crypto_stream/aes128ctr/checksum | 1 + nacl/crypto_stream/aes128ctr/core2/afternm.s | 12308 +++++++++++++++++ nacl/crypto_stream/aes128ctr/core2/api.h | 3 + nacl/crypto_stream/aes128ctr/core2/beforenm.s | 13694 +++++++++++++++++++ nacl/crypto_stream/aes128ctr/core2/stream.c | 14 + nacl/crypto_stream/aes128ctr/core2/xor.c | 15 + nacl/crypto_stream/aes128ctr/core2/xor_afternm.s | 12407 +++++++++++++++++ nacl/crypto_stream/aes128ctr/portable/afternm.c | 158 + nacl/crypto_stream/aes128ctr/portable/api.h | 3 + nacl/crypto_stream/aes128ctr/portable/beforenm.c | 59 + nacl/crypto_stream/aes128ctr/portable/common.c | 64 + nacl/crypto_stream/aes128ctr/portable/common.h | 788 ++ nacl/crypto_stream/aes128ctr/portable/consts.c | 14 + nacl/crypto_stream/aes128ctr/portable/consts.h | 28 + nacl/crypto_stream/aes128ctr/portable/int128.c | 128 + nacl/crypto_stream/aes128ctr/portable/int128.h | 47 + nacl/crypto_stream/aes128ctr/portable/stream.c | 28 + nacl/crypto_stream/aes128ctr/portable/types.h | 10 + .../crypto_stream/aes128ctr/portable/xor_afternm.c | 180 + nacl/crypto_stream/aes128ctr/used | 0 nacl/crypto_stream/measure.c | 73 + nacl/crypto_stream/salsa20/amd64_xmm6/api.h | 2 + nacl/crypto_stream/salsa20/amd64_xmm6/implementors | 1 + nacl/crypto_stream/salsa20/amd64_xmm6/stream.s | 4823 +++++++ nacl/crypto_stream/salsa20/checksum | 1 + nacl/crypto_stream/salsa20/ref/api.h | 2 + nacl/crypto_stream/salsa20/ref/implementors | 1 + nacl/crypto_stream/salsa20/ref/stream.c | 49 + nacl/crypto_stream/salsa20/ref/xor.c | 52 + nacl/crypto_stream/salsa20/used | 0 nacl/crypto_stream/salsa20/x86_xmm5/api.h | 2 + nacl/crypto_stream/salsa20/x86_xmm5/implementors | 1 + nacl/crypto_stream/salsa20/x86_xmm5/stream.s | 5078 +++++++ nacl/crypto_stream/salsa2012/amd64_xmm6/api.h | 2 + .../salsa2012/amd64_xmm6/implementors | 1 + nacl/crypto_stream/salsa2012/amd64_xmm6/stream.s | 4823 +++++++ nacl/crypto_stream/salsa2012/checksum | 1 + nacl/crypto_stream/salsa2012/ref/api.h | 2 + nacl/crypto_stream/salsa2012/ref/implementors | 1 + nacl/crypto_stream/salsa2012/ref/stream.c | 49 + nacl/crypto_stream/salsa2012/ref/xor.c | 52 + nacl/crypto_stream/salsa2012/used | 0 nacl/crypto_stream/salsa2012/x86_xmm5/api.h | 2 + nacl/crypto_stream/salsa2012/x86_xmm5/implementors | 1 + nacl/crypto_stream/salsa2012/x86_xmm5/stream.s | 5078 +++++++ nacl/crypto_stream/salsa208/amd64_xmm6/api.h | 2 + .../crypto_stream/salsa208/amd64_xmm6/implementors | 1 + nacl/crypto_stream/salsa208/amd64_xmm6/stream.s | 4823 +++++++ nacl/crypto_stream/salsa208/checksum | 1 + nacl/crypto_stream/salsa208/ref/api.h | 2 + nacl/crypto_stream/salsa208/ref/implementors | 1 + nacl/crypto_stream/salsa208/ref/stream.c | 49 + nacl/crypto_stream/salsa208/ref/xor.c | 52 + nacl/crypto_stream/salsa208/used | 0 nacl/crypto_stream/salsa208/x86_xmm5/api.h | 2 + nacl/crypto_stream/salsa208/x86_xmm5/implementors | 1 + nacl/crypto_stream/salsa208/x86_xmm5/stream.s | 5078 +++++++ nacl/crypto_stream/try.c | 124 + nacl/crypto_stream/wrapper-stream.cpp | 12 + nacl/crypto_stream/wrapper-xor.cpp | 17 + nacl/crypto_stream/xsalsa20/checksum | 1 + nacl/crypto_stream/xsalsa20/ref/api.h | 2 + nacl/crypto_stream/xsalsa20/ref/implementors | 1 + nacl/crypto_stream/xsalsa20/ref/stream.c | 22 + nacl/crypto_stream/xsalsa20/ref/xor.c | 23 + nacl/crypto_stream/xsalsa20/selected | 0 nacl/crypto_stream/xsalsa20/used | 0 nacl/crypto_verify/16/checksum | 1 + nacl/crypto_verify/16/ref/api.h | 1 + nacl/crypto_verify/16/ref/verify.c | 24 + nacl/crypto_verify/16/used | 0 nacl/crypto_verify/32/checksum | 1 + nacl/crypto_verify/32/ref/api.h | 1 + nacl/crypto_verify/32/ref/verify.c | 40 + nacl/crypto_verify/32/used | 0 nacl/crypto_verify/measure.c | 18 + nacl/crypto_verify/try.c | 75 + nacl/crypto_verify/wrapper-empty.cpp | 0 nacl/curvecp/LIBS | 31 + nacl/curvecp/README | 10 + nacl/curvecp/SOURCES | 36 + nacl/curvecp/TARGETS | 5 + nacl/curvecp/blocking.c | 12 + nacl/curvecp/blocking.h | 7 + nacl/curvecp/byte.h | 8 + nacl/curvecp/byte_copy.c | 8 + nacl/curvecp/byte_isequal.c | 10 + nacl/curvecp/byte_zero.c | 7 + nacl/curvecp/crypto_block.c | 35 + nacl/curvecp/crypto_block.h | 4 + nacl/curvecp/curvecpclient.c | 476 + nacl/curvecp/curvecpmakekey.c | 57 + nacl/curvecp/curvecpmessage.c | 654 + nacl/curvecp/curvecpprintkey.c | 46 + nacl/curvecp/curvecpserver.c | 497 + nacl/curvecp/die.c | 42 + nacl/curvecp/die.h | 16 + nacl/curvecp/e.c | 106 + nacl/curvecp/e.h | 438 + nacl/curvecp/hexparse.c | 25 + nacl/curvecp/hexparse.h | 6 + nacl/curvecp/load.c | 33 + nacl/curvecp/load.h | 6 + nacl/curvecp/nameparse.c | 19 + nacl/curvecp/nameparse.h | 6 + nacl/curvecp/nanoseconds.c | 12 + nacl/curvecp/nanoseconds.h | 6 + nacl/curvecp/open.h | 10 + nacl/curvecp/open_cwd.c | 6 + nacl/curvecp/open_lock.c | 19 + nacl/curvecp/open_pipe.c | 15 + nacl/curvecp/open_read.c | 17 + nacl/curvecp/open_write.c | 17 + nacl/curvecp/portparse.c | 14 + nacl/curvecp/portparse.h | 6 + nacl/curvecp/randommod.c | 14 + nacl/curvecp/randommod.h | 6 + nacl/curvecp/safenonce.c | 74 + nacl/curvecp/safenonce.h | 6 + nacl/curvecp/savesync.c | 24 + nacl/curvecp/savesync.h | 6 + nacl/curvecp/socket.h | 9 + nacl/curvecp/socket_bind.c | 15 + nacl/curvecp/socket_recv.c | 23 + nacl/curvecp/socket_send.c | 19 + nacl/curvecp/socket_udp.c | 36 + nacl/curvecp/uint16_pack.c | 7 + nacl/curvecp/uint16_pack.h | 8 + nacl/curvecp/uint16_unpack.c | 9 + nacl/curvecp/uint16_unpack.h | 8 + nacl/curvecp/uint32_pack.c | 9 + nacl/curvecp/uint32_pack.h | 8 + nacl/curvecp/uint32_unpack.c | 11 + nacl/curvecp/uint32_unpack.h | 8 + nacl/curvecp/uint64_pack.c | 13 + nacl/curvecp/uint64_pack.h | 8 + nacl/curvecp/uint64_unpack.c | 15 + nacl/curvecp/uint64_unpack.h | 8 + nacl/curvecp/writeall.c | 27 + nacl/curvecp/writeall.h | 6 + nacl/do | 468 + nacl/inttypes/crypto_int16.c | 3 + nacl/inttypes/crypto_int32.c | 3 + nacl/inttypes/crypto_int64.c | 3 + nacl/inttypes/crypto_int8.c | 3 + nacl/inttypes/crypto_uint16.c | 3 + nacl/inttypes/crypto_uint32.c | 3 + nacl/inttypes/crypto_uint64.c | 3 + nacl/inttypes/crypto_uint8.c | 3 + nacl/inttypes/do | 47 + nacl/inttypes/signed.h | 17 + nacl/inttypes/unsigned.h | 17 + nacl/measure-anything.c | 225 + nacl/okcompilers/abiname.c | 45 + nacl/okcompilers/archivers | 2 + nacl/okcompilers/c | 8 + nacl/okcompilers/cpp | 8 + nacl/okcompilers/do | 196 + nacl/okcompilers/lib.c | 29 + nacl/okcompilers/lib.cpp | 19 + nacl/okcompilers/main.c | 25 + nacl/okcompilers/main.cpp | 22 + nacl/randombytes/devurandom.c | 34 + nacl/randombytes/devurandom.h | 24 + nacl/randombytes/do | 43 + nacl/randombytes/test.c | 15 + nacl/tests/auth.c | 19 + nacl/tests/auth.out | 4 + nacl/tests/auth2.c | 34 + nacl/tests/auth2.out | 4 + nacl/tests/auth3.c | 34 + nacl/tests/auth3.out | 1 + nacl/tests/auth4.cpp | 44 + nacl/tests/auth4.out | 1 + nacl/tests/auth5.c | 36 + nacl/tests/auth5.out | 0 nacl/tests/auth6.cpp | 46 + nacl/tests/auth6.out | 0 nacl/tests/box.c | 63 + nacl/tests/box.out | 19 + nacl/tests/box2.c | 64 + nacl/tests/box2.out | 17 + nacl/tests/box3.cpp | 60 + nacl/tests/box3.out | 19 + nacl/tests/box4.cpp | 66 + nacl/tests/box4.out | 17 + nacl/tests/box5.cpp | 30 + nacl/tests/box5.out | 0 nacl/tests/box6.cpp | 43 + nacl/tests/box6.out | 0 nacl/tests/box7.c | 36 + nacl/tests/box7.out | 0 nacl/tests/box8.c | 41 + nacl/tests/box8.out | 0 nacl/tests/core1.c | 30 + nacl/tests/core1.out | 4 + nacl/tests/core2.c | 33 + nacl/tests/core2.out | 4 + nacl/tests/core3.c | 41 + nacl/tests/core3.out | 1 + nacl/tests/core4.c | 33 + nacl/tests/core4.out | 8 + nacl/tests/core5.c | 32 + nacl/tests/core5.out | 4 + nacl/tests/core6.c | 47 + nacl/tests/core6.out | 4 + nacl/tests/hash.c | 14 + nacl/tests/hash.out | 1 + nacl/tests/hash2.cpp | 18 + nacl/tests/hash2.out | 1 + nacl/tests/hash3.c | 14 + nacl/tests/hash3.out | 1 + nacl/tests/hash4.cpp | 18 + nacl/tests/hash4.out | 1 + nacl/tests/onetimeauth.c | 42 + nacl/tests/onetimeauth.out | 2 + nacl/tests/onetimeauth2.c | 40 + nacl/tests/onetimeauth2.out | 1 + nacl/tests/onetimeauth5.cpp | 46 + nacl/tests/onetimeauth5.out | 2 + nacl/tests/onetimeauth6.cpp | 50 + nacl/tests/onetimeauth6.out | 1 + nacl/tests/onetimeauth7.c | 36 + nacl/tests/onetimeauth7.out | 0 nacl/tests/onetimeauth8.cpp | 46 + nacl/tests/onetimeauth8.out | 0 nacl/tests/scalarmult.c | 23 + nacl/tests/scalarmult.out | 4 + nacl/tests/scalarmult2.c | 23 + nacl/tests/scalarmult2.out | 4 + nacl/tests/scalarmult3.cpp | 31 + nacl/tests/scalarmult3.out | 4 + nacl/tests/scalarmult4.cpp | 31 + nacl/tests/scalarmult4.out | 4 + nacl/tests/scalarmult5.c | 30 + nacl/tests/scalarmult5.out | 4 + nacl/tests/scalarmult6.c | 30 + nacl/tests/scalarmult6.out | 4 + nacl/tests/scalarmult7.cpp | 32 + nacl/tests/scalarmult7.out | 4 + nacl/tests/secretbox.c | 56 + nacl/tests/secretbox.out | 19 + nacl/tests/secretbox2.c | 57 + nacl/tests/secretbox2.out | 17 + nacl/tests/secretbox3.cpp | 52 + nacl/tests/secretbox3.out | 19 + nacl/tests/secretbox4.cpp | 54 + nacl/tests/secretbox4.out | 17 + nacl/tests/secretbox5.cpp | 29 + nacl/tests/secretbox5.out | 0 nacl/tests/secretbox6.cpp | 42 + nacl/tests/secretbox6.out | 0 nacl/tests/secretbox7.c | 32 + nacl/tests/secretbox7.out | 0 nacl/tests/secretbox8.c | 37 + nacl/tests/secretbox8.out | 0 nacl/tests/stream.c | 29 + nacl/tests/stream.out | 1 + nacl/tests/stream2.c | 27 + nacl/tests/stream2.out | 1 + nacl/tests/stream3.c | 28 + nacl/tests/stream3.out | 4 + nacl/tests/stream4.c | 53 + nacl/tests/stream4.out | 17 + nacl/tests/stream5.cpp | 29 + nacl/tests/stream5.out | 1 + nacl/tests/stream6.cpp | 27 + nacl/tests/stream6.out | 1 + nacl/tests/stream7.cpp | 30 + nacl/tests/stream7.out | 4 + nacl/tests/stream8.cpp | 56 + nacl/tests/stream8.out | 17 + nacl/try-anything.c | 173 + nacl/version | 1 + 490 files changed, 97801 insertions(+) create mode 100644 nacl/MACROS create mode 100644 nacl/OPERATIONS create mode 100644 nacl/PROTOTYPES.c create mode 100644 nacl/PROTOTYPES.cpp create mode 100644 nacl/commandline/nacl-sha256.c create mode 100644 nacl/commandline/nacl-sha512.c create mode 100644 nacl/cpucycles/alpha.c create mode 100644 nacl/cpucycles/alpha.h create mode 100644 nacl/cpucycles/amd64cpuinfo.c create mode 100644 nacl/cpucycles/amd64cpuinfo.h create mode 100644 nacl/cpucycles/amd64cpuspeed.c create mode 100644 nacl/cpucycles/amd64cpuspeed.h create mode 100644 nacl/cpucycles/amd64tscfreq.c create mode 100644 nacl/cpucycles/amd64tscfreq.h create mode 100644 nacl/cpucycles/celllinux.c create mode 100644 nacl/cpucycles/celllinux.h create mode 100644 nacl/cpucycles/cortex.c create mode 100644 nacl/cpucycles/cortex.h create mode 100644 nacl/cpucycles/dev4ns.c create mode 100644 nacl/cpucycles/dev4ns.h create mode 100755 nacl/cpucycles/do create mode 100644 nacl/cpucycles/gettimeofday.c create mode 100644 nacl/cpucycles/gettimeofday.h create mode 100644 nacl/cpucycles/hppapstat.c create mode 100644 nacl/cpucycles/hppapstat.h create mode 100644 nacl/cpucycles/ia64cpuinfo.c create mode 100644 nacl/cpucycles/ia64cpuinfo.h create mode 100644 nacl/cpucycles/mips.c create mode 100644 nacl/cpucycles/mips.h create mode 100644 nacl/cpucycles/monotonic.c create mode 100644 nacl/cpucycles/monotonic.h create mode 100644 nacl/cpucycles/monotoniccpuinfo.c create mode 100644 nacl/cpucycles/monotoniccpuinfo.h create mode 100644 nacl/cpucycles/osfreq.c create mode 100644 nacl/cpucycles/powerpccpuinfo.c create mode 100644 nacl/cpucycles/powerpccpuinfo.h create mode 100644 nacl/cpucycles/powerpcmacos.c create mode 100644 nacl/cpucycles/powerpcmacos.h create mode 100644 nacl/cpucycles/sgi.c create mode 100644 nacl/cpucycles/sgi.h create mode 100644 nacl/cpucycles/sparc32cpuinfo.c create mode 100644 nacl/cpucycles/sparc32cpuinfo.h create mode 100644 nacl/cpucycles/sparccpuinfo.c create mode 100644 nacl/cpucycles/sparccpuinfo.h create mode 100644 nacl/cpucycles/test.c create mode 100644 nacl/cpucycles/x86cpuinfo.c create mode 100644 nacl/cpucycles/x86cpuinfo.h create mode 100644 nacl/cpucycles/x86cpuspeed.c create mode 100644 nacl/cpucycles/x86cpuspeed.h create mode 100644 nacl/cpucycles/x86estimate.c create mode 100644 nacl/cpucycles/x86estimate.h create mode 100644 nacl/cpucycles/x86tscfreq.c create mode 100644 nacl/cpucycles/x86tscfreq.h create mode 100644 nacl/cpuid/cbytes.c create mode 100644 nacl/cpuid/cpuid.c create mode 100755 nacl/cpuid/do create mode 100644 nacl/cpuid/unknown.c create mode 100644 nacl/cpuid/x86.c create mode 100644 nacl/crypto_auth/hmacsha256/checksum create mode 100644 nacl/crypto_auth/hmacsha256/ref/api.h create mode 100644 nacl/crypto_auth/hmacsha256/ref/hmac.c create mode 100644 nacl/crypto_auth/hmacsha256/ref/verify.c create mode 100644 nacl/crypto_auth/hmacsha256/used create mode 100644 nacl/crypto_auth/hmacsha512256/checksum create mode 100644 nacl/crypto_auth/hmacsha512256/ref/api.h create mode 100644 nacl/crypto_auth/hmacsha512256/ref/hmac.c create mode 100644 nacl/crypto_auth/hmacsha512256/ref/verify.c create mode 100644 nacl/crypto_auth/hmacsha512256/selected create mode 100644 nacl/crypto_auth/hmacsha512256/used create mode 100644 nacl/crypto_auth/measure.c create mode 100644 nacl/crypto_auth/try.c create mode 100644 nacl/crypto_auth/wrapper-auth.cpp create mode 100644 nacl/crypto_auth/wrapper-verify.cpp create mode 100644 nacl/crypto_box/curve25519xsalsa20poly1305/checksum create mode 100644 nacl/crypto_box/curve25519xsalsa20poly1305/ref/after.c create mode 100644 nacl/crypto_box/curve25519xsalsa20poly1305/ref/api.h create mode 100644 nacl/crypto_box/curve25519xsalsa20poly1305/ref/before.c create mode 100644 nacl/crypto_box/curve25519xsalsa20poly1305/ref/box.c create mode 100644 nacl/crypto_box/curve25519xsalsa20poly1305/ref/keypair.c create mode 100644 nacl/crypto_box/curve25519xsalsa20poly1305/selected create mode 100644 nacl/crypto_box/curve25519xsalsa20poly1305/used create mode 100644 nacl/crypto_box/measure.c create mode 100644 nacl/crypto_box/try.c create mode 100644 nacl/crypto_box/wrapper-box.cpp create mode 100644 nacl/crypto_box/wrapper-keypair.cpp create mode 100644 nacl/crypto_box/wrapper-open.cpp create mode 100644 nacl/crypto_core/hsalsa20/checksum create mode 100644 nacl/crypto_core/hsalsa20/ref/api.h create mode 100644 nacl/crypto_core/hsalsa20/ref/core.c create mode 100644 nacl/crypto_core/hsalsa20/ref/implementors create mode 100644 nacl/crypto_core/hsalsa20/ref2/api.h create mode 100644 nacl/crypto_core/hsalsa20/ref2/core.c create mode 100644 nacl/crypto_core/hsalsa20/ref2/implementors create mode 100644 nacl/crypto_core/hsalsa20/used create mode 100644 nacl/crypto_core/measure.c create mode 100644 nacl/crypto_core/salsa20/checksum create mode 100644 nacl/crypto_core/salsa20/ref/api.h create mode 100644 nacl/crypto_core/salsa20/ref/core.c create mode 100644 nacl/crypto_core/salsa20/ref/implementors create mode 100644 nacl/crypto_core/salsa20/used create mode 100644 nacl/crypto_core/salsa2012/checksum create mode 100644 nacl/crypto_core/salsa2012/ref/api.h create mode 100644 nacl/crypto_core/salsa2012/ref/core.c create mode 100644 nacl/crypto_core/salsa2012/ref/implementors create mode 100644 nacl/crypto_core/salsa2012/used create mode 100644 nacl/crypto_core/salsa208/checksum create mode 100644 nacl/crypto_core/salsa208/ref/api.h create mode 100644 nacl/crypto_core/salsa208/ref/core.c create mode 100644 nacl/crypto_core/salsa208/ref/implementors create mode 100644 nacl/crypto_core/salsa208/used create mode 100644 nacl/crypto_core/try.c create mode 100644 nacl/crypto_core/wrapper-empty.cpp create mode 100644 nacl/crypto_hash/measure.c create mode 100644 nacl/crypto_hash/sha256/checksum create mode 100644 nacl/crypto_hash/sha256/ref/api.h create mode 100644 nacl/crypto_hash/sha256/ref/hash.c create mode 100644 nacl/crypto_hash/sha256/ref/implementors create mode 100644 nacl/crypto_hash/sha256/used create mode 100644 nacl/crypto_hash/sha512/checksum create mode 100644 nacl/crypto_hash/sha512/ref/api.h create mode 100644 nacl/crypto_hash/sha512/ref/hash.c create mode 100644 nacl/crypto_hash/sha512/ref/implementors create mode 100644 nacl/crypto_hash/sha512/selected create mode 100644 nacl/crypto_hash/sha512/used create mode 100644 nacl/crypto_hash/try.c create mode 100644 nacl/crypto_hash/wrapper-hash.cpp create mode 100644 nacl/crypto_hashblocks/measure.c create mode 100644 nacl/crypto_hashblocks/sha256/checksum create mode 100644 nacl/crypto_hashblocks/sha256/inplace/api.h create mode 100644 nacl/crypto_hashblocks/sha256/inplace/blocks.c create mode 100644 nacl/crypto_hashblocks/sha256/inplace/implementors create mode 100644 nacl/crypto_hashblocks/sha256/ref/api.h create mode 100644 nacl/crypto_hashblocks/sha256/ref/blocks.c create mode 100644 nacl/crypto_hashblocks/sha256/ref/implementors create mode 100644 nacl/crypto_hashblocks/sha256/used create mode 100644 nacl/crypto_hashblocks/sha512/checksum create mode 100644 nacl/crypto_hashblocks/sha512/inplace/api.h create mode 100644 nacl/crypto_hashblocks/sha512/inplace/blocks.c create mode 100644 nacl/crypto_hashblocks/sha512/inplace/implementors create mode 100644 nacl/crypto_hashblocks/sha512/ref/api.h create mode 100644 nacl/crypto_hashblocks/sha512/ref/blocks.c create mode 100644 nacl/crypto_hashblocks/sha512/ref/implementors create mode 100644 nacl/crypto_hashblocks/sha512/selected create mode 100644 nacl/crypto_hashblocks/sha512/used create mode 100644 nacl/crypto_hashblocks/try.c create mode 100644 nacl/crypto_hashblocks/wrapper-empty.cpp create mode 100644 nacl/crypto_onetimeauth/measure.c create mode 100644 nacl/crypto_onetimeauth/poly1305/53/api.h create mode 100644 nacl/crypto_onetimeauth/poly1305/53/auth.c create mode 100644 nacl/crypto_onetimeauth/poly1305/53/verify.c create mode 100644 nacl/crypto_onetimeauth/poly1305/amd64/api.h create mode 100644 nacl/crypto_onetimeauth/poly1305/amd64/auth.s create mode 100644 nacl/crypto_onetimeauth/poly1305/amd64/constants.s create mode 100644 nacl/crypto_onetimeauth/poly1305/amd64/verify.c create mode 100644 nacl/crypto_onetimeauth/poly1305/checksum create mode 100644 nacl/crypto_onetimeauth/poly1305/ref/api.h create mode 100644 nacl/crypto_onetimeauth/poly1305/ref/auth.c create mode 100644 nacl/crypto_onetimeauth/poly1305/ref/verify.c create mode 100644 nacl/crypto_onetimeauth/poly1305/selected create mode 100644 nacl/crypto_onetimeauth/poly1305/used create mode 100644 nacl/crypto_onetimeauth/poly1305/x86/api.h create mode 100644 nacl/crypto_onetimeauth/poly1305/x86/auth.s create mode 100644 nacl/crypto_onetimeauth/poly1305/x86/constants.s create mode 100644 nacl/crypto_onetimeauth/poly1305/x86/verify.c create mode 100644 nacl/crypto_onetimeauth/try.c create mode 100644 nacl/crypto_onetimeauth/wrapper-auth.cpp create mode 100644 nacl/crypto_onetimeauth/wrapper-verify.cpp create mode 100644 nacl/crypto_scalarmult/curve25519/athlon/api.h create mode 100644 nacl/crypto_scalarmult/curve25519/athlon/base.c create mode 100644 nacl/crypto_scalarmult/curve25519/athlon/const.s create mode 100644 nacl/crypto_scalarmult/curve25519/athlon/fromdouble.s create mode 100644 nacl/crypto_scalarmult/curve25519/athlon/implementors create mode 100644 nacl/crypto_scalarmult/curve25519/athlon/init.s create mode 100644 nacl/crypto_scalarmult/curve25519/athlon/mainloop.s create mode 100644 nacl/crypto_scalarmult/curve25519/athlon/mult.s create mode 100644 nacl/crypto_scalarmult/curve25519/athlon/smult.c create mode 100644 nacl/crypto_scalarmult/curve25519/athlon/square.s create mode 100644 nacl/crypto_scalarmult/curve25519/athlon/todouble.s create mode 100644 nacl/crypto_scalarmult/curve25519/checksum create mode 100644 nacl/crypto_scalarmult/curve25519/donna_c64/api.h create mode 100644 nacl/crypto_scalarmult/curve25519/donna_c64/base.c create mode 100644 nacl/crypto_scalarmult/curve25519/donna_c64/implementors create mode 100644 nacl/crypto_scalarmult/curve25519/donna_c64/smult.c create mode 100644 nacl/crypto_scalarmult/curve25519/ref/api.h create mode 100644 nacl/crypto_scalarmult/curve25519/ref/base.c create mode 100644 nacl/crypto_scalarmult/curve25519/ref/implementors create mode 100644 nacl/crypto_scalarmult/curve25519/ref/smult.c create mode 100644 nacl/crypto_scalarmult/curve25519/used create mode 100644 nacl/crypto_scalarmult/measure.c create mode 100644 nacl/crypto_scalarmult/try.c create mode 100644 nacl/crypto_scalarmult/wrapper-base.cpp create mode 100644 nacl/crypto_scalarmult/wrapper-mult.cpp create mode 100644 nacl/crypto_secretbox/measure.c create mode 100644 nacl/crypto_secretbox/try.c create mode 100644 nacl/crypto_secretbox/wrapper-box.cpp create mode 100644 nacl/crypto_secretbox/wrapper-open.cpp create mode 100644 nacl/crypto_secretbox/xsalsa20poly1305/checksum create mode 100644 nacl/crypto_secretbox/xsalsa20poly1305/ref/api.h create mode 100644 nacl/crypto_secretbox/xsalsa20poly1305/ref/box.c create mode 100644 nacl/crypto_secretbox/xsalsa20poly1305/selected create mode 100644 nacl/crypto_secretbox/xsalsa20poly1305/used create mode 100644 nacl/crypto_sign/edwards25519sha512batch/ref/api.h create mode 100644 nacl/crypto_sign/edwards25519sha512batch/ref/fe25519.c create mode 100644 nacl/crypto_sign/edwards25519sha512batch/ref/fe25519.h create mode 100644 nacl/crypto_sign/edwards25519sha512batch/ref/ge25519.c create mode 100644 nacl/crypto_sign/edwards25519sha512batch/ref/ge25519.h create mode 100644 nacl/crypto_sign/edwards25519sha512batch/ref/sc25519.c create mode 100644 nacl/crypto_sign/edwards25519sha512batch/ref/sc25519.h create mode 100644 nacl/crypto_sign/edwards25519sha512batch/ref/sign.c create mode 100644 nacl/crypto_sign/edwards25519sha512batch/selected create mode 100644 nacl/crypto_sign/edwards25519sha512batch/used create mode 100644 nacl/crypto_sign/measure.c create mode 100644 nacl/crypto_sign/try.c create mode 100644 nacl/crypto_sign/wrapper-keypair.cpp create mode 100644 nacl/crypto_sign/wrapper-sign-open.cpp create mode 100644 nacl/crypto_sign/wrapper-sign.cpp create mode 100644 nacl/crypto_stream/aes128ctr/checksum create mode 100644 nacl/crypto_stream/aes128ctr/core2/afternm.s create mode 100644 nacl/crypto_stream/aes128ctr/core2/api.h create mode 100644 nacl/crypto_stream/aes128ctr/core2/beforenm.s create mode 100644 nacl/crypto_stream/aes128ctr/core2/stream.c create mode 100644 nacl/crypto_stream/aes128ctr/core2/xor.c create mode 100644 nacl/crypto_stream/aes128ctr/core2/xor_afternm.s create mode 100644 nacl/crypto_stream/aes128ctr/portable/afternm.c create mode 100644 nacl/crypto_stream/aes128ctr/portable/api.h create mode 100644 nacl/crypto_stream/aes128ctr/portable/beforenm.c create mode 100644 nacl/crypto_stream/aes128ctr/portable/common.c create mode 100644 nacl/crypto_stream/aes128ctr/portable/common.h create mode 100644 nacl/crypto_stream/aes128ctr/portable/consts.c create mode 100644 nacl/crypto_stream/aes128ctr/portable/consts.h create mode 100644 nacl/crypto_stream/aes128ctr/portable/int128.c create mode 100644 nacl/crypto_stream/aes128ctr/portable/int128.h create mode 100644 nacl/crypto_stream/aes128ctr/portable/stream.c create mode 100644 nacl/crypto_stream/aes128ctr/portable/types.h create mode 100644 nacl/crypto_stream/aes128ctr/portable/xor_afternm.c create mode 100644 nacl/crypto_stream/aes128ctr/used create mode 100644 nacl/crypto_stream/measure.c create mode 100644 nacl/crypto_stream/salsa20/amd64_xmm6/api.h create mode 100644 nacl/crypto_stream/salsa20/amd64_xmm6/implementors create mode 100644 nacl/crypto_stream/salsa20/amd64_xmm6/stream.s create mode 100644 nacl/crypto_stream/salsa20/checksum create mode 100644 nacl/crypto_stream/salsa20/ref/api.h create mode 100644 nacl/crypto_stream/salsa20/ref/implementors create mode 100644 nacl/crypto_stream/salsa20/ref/stream.c create mode 100644 nacl/crypto_stream/salsa20/ref/xor.c create mode 100644 nacl/crypto_stream/salsa20/used create mode 100644 nacl/crypto_stream/salsa20/x86_xmm5/api.h create mode 100644 nacl/crypto_stream/salsa20/x86_xmm5/implementors create mode 100644 nacl/crypto_stream/salsa20/x86_xmm5/stream.s create mode 100644 nacl/crypto_stream/salsa2012/amd64_xmm6/api.h create mode 100644 nacl/crypto_stream/salsa2012/amd64_xmm6/implementors create mode 100644 nacl/crypto_stream/salsa2012/amd64_xmm6/stream.s create mode 100644 nacl/crypto_stream/salsa2012/checksum create mode 100644 nacl/crypto_stream/salsa2012/ref/api.h create mode 100644 nacl/crypto_stream/salsa2012/ref/implementors create mode 100644 nacl/crypto_stream/salsa2012/ref/stream.c create mode 100644 nacl/crypto_stream/salsa2012/ref/xor.c create mode 100644 nacl/crypto_stream/salsa2012/used create mode 100644 nacl/crypto_stream/salsa2012/x86_xmm5/api.h create mode 100644 nacl/crypto_stream/salsa2012/x86_xmm5/implementors create mode 100644 nacl/crypto_stream/salsa2012/x86_xmm5/stream.s create mode 100644 nacl/crypto_stream/salsa208/amd64_xmm6/api.h create mode 100644 nacl/crypto_stream/salsa208/amd64_xmm6/implementors create mode 100644 nacl/crypto_stream/salsa208/amd64_xmm6/stream.s create mode 100644 nacl/crypto_stream/salsa208/checksum create mode 100644 nacl/crypto_stream/salsa208/ref/api.h create mode 100644 nacl/crypto_stream/salsa208/ref/implementors create mode 100644 nacl/crypto_stream/salsa208/ref/stream.c create mode 100644 nacl/crypto_stream/salsa208/ref/xor.c create mode 100644 nacl/crypto_stream/salsa208/used create mode 100644 nacl/crypto_stream/salsa208/x86_xmm5/api.h create mode 100644 nacl/crypto_stream/salsa208/x86_xmm5/implementors create mode 100644 nacl/crypto_stream/salsa208/x86_xmm5/stream.s create mode 100644 nacl/crypto_stream/try.c create mode 100644 nacl/crypto_stream/wrapper-stream.cpp create mode 100644 nacl/crypto_stream/wrapper-xor.cpp create mode 100644 nacl/crypto_stream/xsalsa20/checksum create mode 100644 nacl/crypto_stream/xsalsa20/ref/api.h create mode 100644 nacl/crypto_stream/xsalsa20/ref/implementors create mode 100644 nacl/crypto_stream/xsalsa20/ref/stream.c create mode 100644 nacl/crypto_stream/xsalsa20/ref/xor.c create mode 100644 nacl/crypto_stream/xsalsa20/selected create mode 100644 nacl/crypto_stream/xsalsa20/used create mode 100644 nacl/crypto_verify/16/checksum create mode 100644 nacl/crypto_verify/16/ref/api.h create mode 100644 nacl/crypto_verify/16/ref/verify.c create mode 100644 nacl/crypto_verify/16/used create mode 100644 nacl/crypto_verify/32/checksum create mode 100644 nacl/crypto_verify/32/ref/api.h create mode 100644 nacl/crypto_verify/32/ref/verify.c create mode 100644 nacl/crypto_verify/32/used create mode 100644 nacl/crypto_verify/measure.c create mode 100644 nacl/crypto_verify/try.c create mode 100644 nacl/crypto_verify/wrapper-empty.cpp create mode 100644 nacl/curvecp/LIBS create mode 100644 nacl/curvecp/README create mode 100644 nacl/curvecp/SOURCES create mode 100644 nacl/curvecp/TARGETS create mode 100644 nacl/curvecp/blocking.c create mode 100644 nacl/curvecp/blocking.h create mode 100644 nacl/curvecp/byte.h create mode 100644 nacl/curvecp/byte_copy.c create mode 100644 nacl/curvecp/byte_isequal.c create mode 100644 nacl/curvecp/byte_zero.c create mode 100644 nacl/curvecp/crypto_block.c create mode 100644 nacl/curvecp/crypto_block.h create mode 100644 nacl/curvecp/curvecpclient.c create mode 100644 nacl/curvecp/curvecpmakekey.c create mode 100644 nacl/curvecp/curvecpmessage.c create mode 100644 nacl/curvecp/curvecpprintkey.c create mode 100644 nacl/curvecp/curvecpserver.c create mode 100644 nacl/curvecp/die.c create mode 100644 nacl/curvecp/die.h create mode 100644 nacl/curvecp/e.c create mode 100644 nacl/curvecp/e.h create mode 100644 nacl/curvecp/hexparse.c create mode 100644 nacl/curvecp/hexparse.h create mode 100644 nacl/curvecp/load.c create mode 100644 nacl/curvecp/load.h create mode 100644 nacl/curvecp/nameparse.c create mode 100644 nacl/curvecp/nameparse.h create mode 100644 nacl/curvecp/nanoseconds.c create mode 100644 nacl/curvecp/nanoseconds.h create mode 100644 nacl/curvecp/open.h create mode 100644 nacl/curvecp/open_cwd.c create mode 100644 nacl/curvecp/open_lock.c create mode 100644 nacl/curvecp/open_pipe.c create mode 100644 nacl/curvecp/open_read.c create mode 100644 nacl/curvecp/open_write.c create mode 100644 nacl/curvecp/portparse.c create mode 100644 nacl/curvecp/portparse.h create mode 100644 nacl/curvecp/randommod.c create mode 100644 nacl/curvecp/randommod.h create mode 100644 nacl/curvecp/safenonce.c create mode 100644 nacl/curvecp/safenonce.h create mode 100644 nacl/curvecp/savesync.c create mode 100644 nacl/curvecp/savesync.h create mode 100644 nacl/curvecp/socket.h create mode 100644 nacl/curvecp/socket_bind.c create mode 100644 nacl/curvecp/socket_recv.c create mode 100644 nacl/curvecp/socket_send.c create mode 100644 nacl/curvecp/socket_udp.c create mode 100644 nacl/curvecp/uint16_pack.c create mode 100644 nacl/curvecp/uint16_pack.h create mode 100644 nacl/curvecp/uint16_unpack.c create mode 100644 nacl/curvecp/uint16_unpack.h create mode 100644 nacl/curvecp/uint32_pack.c create mode 100644 nacl/curvecp/uint32_pack.h create mode 100644 nacl/curvecp/uint32_unpack.c create mode 100644 nacl/curvecp/uint32_unpack.h create mode 100644 nacl/curvecp/uint64_pack.c create mode 100644 nacl/curvecp/uint64_pack.h create mode 100644 nacl/curvecp/uint64_unpack.c create mode 100644 nacl/curvecp/uint64_unpack.h create mode 100644 nacl/curvecp/writeall.c create mode 100644 nacl/curvecp/writeall.h create mode 100755 nacl/do create mode 100644 nacl/inttypes/crypto_int16.c create mode 100644 nacl/inttypes/crypto_int32.c create mode 100644 nacl/inttypes/crypto_int64.c create mode 100644 nacl/inttypes/crypto_int8.c create mode 100644 nacl/inttypes/crypto_uint16.c create mode 100644 nacl/inttypes/crypto_uint32.c create mode 100644 nacl/inttypes/crypto_uint64.c create mode 100644 nacl/inttypes/crypto_uint8.c create mode 100644 nacl/inttypes/do create mode 100644 nacl/inttypes/signed.h create mode 100644 nacl/inttypes/unsigned.h create mode 100644 nacl/measure-anything.c create mode 100644 nacl/okcompilers/abiname.c create mode 100644 nacl/okcompilers/archivers create mode 100644 nacl/okcompilers/c create mode 100644 nacl/okcompilers/cpp create mode 100755 nacl/okcompilers/do create mode 100644 nacl/okcompilers/lib.c create mode 100644 nacl/okcompilers/lib.cpp create mode 100644 nacl/okcompilers/main.c create mode 100644 nacl/okcompilers/main.cpp create mode 100644 nacl/randombytes/devurandom.c create mode 100644 nacl/randombytes/devurandom.h create mode 100644 nacl/randombytes/do create mode 100644 nacl/randombytes/test.c create mode 100644 nacl/tests/auth.c create mode 100644 nacl/tests/auth.out create mode 100644 nacl/tests/auth2.c create mode 100644 nacl/tests/auth2.out create mode 100644 nacl/tests/auth3.c create mode 100644 nacl/tests/auth3.out create mode 100644 nacl/tests/auth4.cpp create mode 100644 nacl/tests/auth4.out create mode 100644 nacl/tests/auth5.c create mode 100644 nacl/tests/auth5.out create mode 100644 nacl/tests/auth6.cpp create mode 100644 nacl/tests/auth6.out create mode 100644 nacl/tests/box.c create mode 100644 nacl/tests/box.out create mode 100644 nacl/tests/box2.c create mode 100644 nacl/tests/box2.out create mode 100644 nacl/tests/box3.cpp create mode 100644 nacl/tests/box3.out create mode 100644 nacl/tests/box4.cpp create mode 100644 nacl/tests/box4.out create mode 100644 nacl/tests/box5.cpp create mode 100644 nacl/tests/box5.out create mode 100644 nacl/tests/box6.cpp create mode 100644 nacl/tests/box6.out create mode 100644 nacl/tests/box7.c create mode 100644 nacl/tests/box7.out create mode 100644 nacl/tests/box8.c create mode 100644 nacl/tests/box8.out create mode 100644 nacl/tests/core1.c create mode 100644 nacl/tests/core1.out create mode 100644 nacl/tests/core2.c create mode 100644 nacl/tests/core2.out create mode 100644 nacl/tests/core3.c create mode 100644 nacl/tests/core3.out create mode 100644 nacl/tests/core4.c create mode 100644 nacl/tests/core4.out create mode 100644 nacl/tests/core5.c create mode 100644 nacl/tests/core5.out create mode 100644 nacl/tests/core6.c create mode 100644 nacl/tests/core6.out create mode 100644 nacl/tests/hash.c create mode 100644 nacl/tests/hash.out create mode 100644 nacl/tests/hash2.cpp create mode 100644 nacl/tests/hash2.out create mode 100644 nacl/tests/hash3.c create mode 100644 nacl/tests/hash3.out create mode 100644 nacl/tests/hash4.cpp create mode 100644 nacl/tests/hash4.out create mode 100644 nacl/tests/onetimeauth.c create mode 100644 nacl/tests/onetimeauth.out create mode 100644 nacl/tests/onetimeauth2.c create mode 100644 nacl/tests/onetimeauth2.out create mode 100644 nacl/tests/onetimeauth5.cpp create mode 100644 nacl/tests/onetimeauth5.out create mode 100644 nacl/tests/onetimeauth6.cpp create mode 100644 nacl/tests/onetimeauth6.out create mode 100644 nacl/tests/onetimeauth7.c create mode 100644 nacl/tests/onetimeauth7.out create mode 100644 nacl/tests/onetimeauth8.cpp create mode 100644 nacl/tests/onetimeauth8.out create mode 100644 nacl/tests/scalarmult.c create mode 100644 nacl/tests/scalarmult.out create mode 100644 nacl/tests/scalarmult2.c create mode 100644 nacl/tests/scalarmult2.out create mode 100644 nacl/tests/scalarmult3.cpp create mode 100644 nacl/tests/scalarmult3.out create mode 100644 nacl/tests/scalarmult4.cpp create mode 100644 nacl/tests/scalarmult4.out create mode 100644 nacl/tests/scalarmult5.c create mode 100644 nacl/tests/scalarmult5.out create mode 100644 nacl/tests/scalarmult6.c create mode 100644 nacl/tests/scalarmult6.out create mode 100644 nacl/tests/scalarmult7.cpp create mode 100644 nacl/tests/scalarmult7.out create mode 100644 nacl/tests/secretbox.c create mode 100644 nacl/tests/secretbox.out create mode 100644 nacl/tests/secretbox2.c create mode 100644 nacl/tests/secretbox2.out create mode 100644 nacl/tests/secretbox3.cpp create mode 100644 nacl/tests/secretbox3.out create mode 100644 nacl/tests/secretbox4.cpp create mode 100644 nacl/tests/secretbox4.out create mode 100644 nacl/tests/secretbox5.cpp create mode 100644 nacl/tests/secretbox5.out create mode 100644 nacl/tests/secretbox6.cpp create mode 100644 nacl/tests/secretbox6.out create mode 100644 nacl/tests/secretbox7.c create mode 100644 nacl/tests/secretbox7.out create mode 100644 nacl/tests/secretbox8.c create mode 100644 nacl/tests/secretbox8.out create mode 100644 nacl/tests/stream.c create mode 100644 nacl/tests/stream.out create mode 100644 nacl/tests/stream2.c create mode 100644 nacl/tests/stream2.out create mode 100644 nacl/tests/stream3.c create mode 100644 nacl/tests/stream3.out create mode 100644 nacl/tests/stream4.c create mode 100644 nacl/tests/stream4.out create mode 100644 nacl/tests/stream5.cpp create mode 100644 nacl/tests/stream5.out create mode 100644 nacl/tests/stream6.cpp create mode 100644 nacl/tests/stream6.out create mode 100644 nacl/tests/stream7.cpp create mode 100644 nacl/tests/stream7.out create mode 100644 nacl/tests/stream8.cpp create mode 100644 nacl/tests/stream8.out create mode 100644 nacl/try-anything.c create mode 100644 nacl/version (limited to 'nacl') diff --git a/nacl/MACROS b/nacl/MACROS new file mode 100644 index 00000000..26f6cd02 --- /dev/null +++ b/nacl/MACROS @@ -0,0 +1,56 @@ +crypto_verify +crypto_verify_BYTES +crypto_core +crypto_core_OUTPUTBYTES +crypto_core_INPUTBYTES +crypto_core_KEYBYTES +crypto_core_CONSTBYTES +crypto_hashblocks +crypto_hashblocks_STATEBYTES +crypto_hashblocks_BLOCKBYTES +crypto_hash +crypto_hash_BYTES +crypto_stream +crypto_stream_xor +crypto_stream_beforenm +crypto_stream_afternm +crypto_stream_xor_afternm +crypto_stream_KEYBYTES +crypto_stream_NONCEBYTES +crypto_stream_BEFORENMBYTES +crypto_onetimeauth +crypto_onetimeauth_verify +crypto_onetimeauth_BYTES +crypto_onetimeauth_KEYBYTES +crypto_auth +crypto_auth_verify +crypto_auth_BYTES +crypto_auth_KEYBYTES +crypto_secretbox +crypto_secretbox_open +crypto_secretbox_KEYBYTES +crypto_secretbox_NONCEBYTES +crypto_secretbox_ZEROBYTES +crypto_secretbox_BOXZEROBYTES +crypto_scalarmult +crypto_scalarmult_base +crypto_scalarmult_BYTES +crypto_scalarmult_SCALARBYTES +crypto_box +crypto_box_open +crypto_box_keypair +crypto_box_beforenm +crypto_box_afternm +crypto_box_open_afternm +crypto_box_PUBLICKEYBYTES +crypto_box_SECRETKEYBYTES +crypto_box_BEFORENMBYTES +crypto_box_NONCEBYTES +crypto_box_ZEROBYTES +crypto_box_BOXZEROBYTES +crypto_sign +crypto_sign_open +crypto_sign_keypair +crypto_sign_BYTES +crypto_sign_PUBLICKEYBYTES +crypto_sign_SECRETKEYBYTES diff --git a/nacl/OPERATIONS b/nacl/OPERATIONS new file mode 100644 index 00000000..5fc25d35 --- /dev/null +++ b/nacl/OPERATIONS @@ -0,0 +1,11 @@ +crypto_verify +crypto_core +crypto_hashblocks +crypto_hash +crypto_stream +crypto_onetimeauth +crypto_auth +crypto_secretbox +crypto_scalarmult +crypto_box +crypto_sign diff --git a/nacl/PROTOTYPES.c b/nacl/PROTOTYPES.c new file mode 100644 index 00000000..bc8ca531 --- /dev/null +++ b/nacl/PROTOTYPES.c @@ -0,0 +1,26 @@ +extern int crypto_verify(const unsigned char *,const unsigned char *); +extern int crypto_core(unsigned char *,const unsigned char *,const unsigned char *,const unsigned char *); +extern int crypto_hashblocks(unsigned char *,const unsigned char *,unsigned long long); +extern int crypto_hash(unsigned char *,const unsigned char *,unsigned long long); +extern int crypto_stream(unsigned char *,unsigned long long,const unsigned char *,const unsigned char *); +extern int crypto_stream_xor(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *); +extern int crypto_stream_beforenm(unsigned char *,const unsigned char *); +extern int crypto_stream_afternm(unsigned char *,unsigned long long,const unsigned char *,const unsigned char *); +extern int crypto_stream_xor_afternm(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *); +extern int crypto_onetimeauth(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *); +extern int crypto_onetimeauth_verify(const unsigned char *,const unsigned char *,unsigned long long,const unsigned char *); +extern int crypto_auth(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *); +extern int crypto_auth_verify(const unsigned char *,const unsigned char *,unsigned long long,const unsigned char *); +extern int crypto_secretbox(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *); +extern int crypto_secretbox_open(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *); +extern int crypto_scalarmult(unsigned char *,const unsigned char *,const unsigned char *); +extern int crypto_scalarmult_base(unsigned char *,const unsigned char *); +extern int crypto_box(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *,const unsigned char *); +extern int crypto_box_open(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *,const unsigned char *); +extern int crypto_box_keypair(unsigned char *,unsigned char *); +extern int crypto_box_beforenm(unsigned char *,const unsigned char *,const unsigned char *); +extern int crypto_box_afternm(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *); +extern int crypto_box_open_afternm(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *); +extern int crypto_sign(unsigned char *,unsigned long long *,const unsigned char *,unsigned long long,const unsigned char *); +extern int crypto_sign_open(unsigned char *,unsigned long long *,const unsigned char *,unsigned long long,const unsigned char *); +extern int crypto_sign_keypair(unsigned char *,unsigned char *); diff --git a/nacl/PROTOTYPES.cpp b/nacl/PROTOTYPES.cpp new file mode 100644 index 00000000..4318a049 --- /dev/null +++ b/nacl/PROTOTYPES.cpp @@ -0,0 +1,17 @@ +extern std::string crypto_auth(const std::string &,const std::string &); +extern void crypto_auth_verify(const std::string &,const std::string &,const std::string &); +extern std::string crypto_box(const std::string &,const std::string &,const std::string &,const std::string &); +extern std::string crypto_box_open(const std::string &,const std::string &,const std::string &,const std::string &); +extern std::string crypto_box_keypair(std::string *); +extern std::string crypto_hash(const std::string &); +extern std::string crypto_onetimeauth(const std::string &,const std::string &); +extern void crypto_onetimeauth_verify(const std::string &,const std::string &,const std::string &); +extern std::string crypto_scalarmult(const std::string &,const std::string &); +extern std::string crypto_scalarmult_base(const std::string &); +extern std::string crypto_secretbox(const std::string &,const std::string &,const std::string &); +extern std::string crypto_secretbox_open(const std::string &,const std::string &,const std::string &); +extern std::string crypto_stream(size_t,const std::string &,const std::string &); +extern std::string crypto_stream_xor(const std::string &,const std::string &,const std::string &); +extern std::string crypto_sign(const std::string &,const std::string &); +extern std::string crypto_sign_open(const std::string &,const std::string &); +extern std::string crypto_sign_keypair(std::string *); diff --git a/nacl/commandline/nacl-sha256.c b/nacl/commandline/nacl-sha256.c new file mode 100644 index 00000000..8e0df453 --- /dev/null +++ b/nacl/commandline/nacl-sha256.c @@ -0,0 +1,64 @@ +/* +commandline/nacl-sha256.c version 20080713 +D. J. Bernstein +Public domain. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include "crypto_hash_sha256.h" + +unsigned char *input; +unsigned long long inputalloc; +unsigned long long inputlen; + +unsigned char h[crypto_hash_sha256_BYTES]; + +void h_print(void) +{ + int i; + for (i = 0;i < crypto_hash_sha256_BYTES;++i) printf("%02x",255 & (int) h[i]); + printf("\n"); +} + +int main() +{ + struct stat st; + int ch; + + if (fstat(0,&st) == 0) { + input = mmap(0,st.st_size,PROT_READ,MAP_SHARED,0,0); + if (input != MAP_FAILED) { + crypto_hash_sha256(h,input,st.st_size); + h_print(); + return 0; + } + } + + input = 0; + inputalloc = 0; + inputlen = 0; + + while ((ch = getchar()) != EOF) { + if (inputlen >= inputalloc) { + void *newinput; + while (inputlen >= inputalloc) + inputalloc = inputalloc * 2 + 1; + if (posix_memalign(&newinput,16,inputalloc) != 0) return 111; + memcpy(newinput,input,inputlen); + free(input); + input = newinput; + } + input[inputlen++] = ch; + } + + crypto_hash_sha256(h,input,inputlen); + h_print(); + + return 0; +} diff --git a/nacl/commandline/nacl-sha512.c b/nacl/commandline/nacl-sha512.c new file mode 100644 index 00000000..6864c76a --- /dev/null +++ b/nacl/commandline/nacl-sha512.c @@ -0,0 +1,64 @@ +/* +commandline/nacl-sha512.c version 20080713 +D. J. Bernstein +Public domain. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include "crypto_hash_sha512.h" + +unsigned char *input; +unsigned long long inputalloc; +unsigned long long inputlen; + +unsigned char h[crypto_hash_sha512_BYTES]; + +void h_print(void) +{ + int i; + for (i = 0;i < crypto_hash_sha512_BYTES;++i) printf("%02x",255 & (int) h[i]); + printf("\n"); +} + +int main() +{ + struct stat st; + int ch; + + if (fstat(0,&st) == 0) { + input = mmap(0,st.st_size,PROT_READ,MAP_SHARED,0,0); + if (input != MAP_FAILED) { + crypto_hash_sha512(h,input,st.st_size); + h_print(); + return 0; + } + } + + input = 0; + inputalloc = 0; + inputlen = 0; + + while ((ch = getchar()) != EOF) { + if (inputlen >= inputalloc) { + void *newinput; + while (inputlen >= inputalloc) + inputalloc = inputalloc * 2 + 1; + if (posix_memalign(&newinput,16,inputalloc) != 0) return 111; + memcpy(newinput,input,inputlen); + free(input); + input = newinput; + } + input[inputlen++] = ch; + } + + crypto_hash_sha512(h,input,inputlen); + h_print(); + + return 0; +} diff --git a/nacl/cpucycles/alpha.c b/nacl/cpucycles/alpha.c new file mode 100644 index 00000000..ef497999 --- /dev/null +++ b/nacl/cpucycles/alpha.c @@ -0,0 +1,80 @@ +/* +cpucycles/alpha.c version 20060316 +D. J. Bernstein +Public domain. +*/ + +#include +#include +#include + +static long long tod(void) +{ + struct timeval t; + gettimeofday(&t,(struct timezone *) 0); + return t.tv_sec * (long long) 1000000 + t.tv_usec; +} + +static long long rpcc(void) +{ + unsigned long long t; + asm volatile("rpcc %0" : "=r"(t)); + return t & 0xffffffff; +} + +static long long firstrpcc; +static long long firsttod; +static long long lastrpcc; +static long long lasttod; +static double mhz = 0; + +static void init(void) +{ + firstrpcc = rpcc(); + firsttod = tod(); + + do { + lastrpcc = rpcc(); + lasttod = tod(); + } while (lasttod - firsttod < 10000); + + lastrpcc -= firstrpcc; lastrpcc &= 0xffffffff; + lasttod -= firsttod; + + mhz = (double) lastrpcc / (double) lasttod; +} + +long long cpucycles_alpha(void) +{ + double x; + long long y; + + if (!mhz) init(); + + lastrpcc = rpcc(); + lasttod = tod(); + + lastrpcc -= firstrpcc; lastrpcc &= 0xffffffff; + lasttod -= firsttod; + + /* Number of cycles since firstrpcc is lastrpcc + 2^32 y for unknown y. */ + /* Number of microseconds since firsttod is lasttod. */ + + x = (lasttod * mhz - lastrpcc) * 0.00000000023283064365386962890625; + y = x; + while (x > y + 0.5) y += 1; + while (x < y - 0.5) y -= 1; + + y *= 4294967296ULL; + lastrpcc += y; + + mhz = (double) lastrpcc / (double) lasttod; + + return firstrpcc + lastrpcc; +} + +long long cpucycles_alpha_persecond(void) +{ + if (!mhz) init(); + return 1000000.0 * mhz; +} diff --git a/nacl/cpucycles/alpha.h b/nacl/cpucycles/alpha.h new file mode 100644 index 00000000..c97672af --- /dev/null +++ b/nacl/cpucycles/alpha.h @@ -0,0 +1,27 @@ +/* +cpucycles alpha.h version 20060318 +D. J. Bernstein +Public domain. +*/ + +#ifndef CPUCYCLES_alpha_h +#define CPUCYCLES_alpha_h + +#ifdef __cplusplus +extern "C" { +#endif + +extern long long cpucycles_alpha(void); +extern long long cpucycles_alpha_persecond(void); + +#ifdef __cplusplus +} +#endif + +#ifndef cpucycles_implementation +#define cpucycles_implementation "alpha" +#define cpucycles cpucycles_alpha +#define cpucycles_persecond cpucycles_alpha_persecond +#endif + +#endif diff --git a/nacl/cpucycles/amd64cpuinfo.c b/nacl/cpucycles/amd64cpuinfo.c new file mode 100644 index 00000000..729f2612 --- /dev/null +++ b/nacl/cpucycles/amd64cpuinfo.c @@ -0,0 +1,16 @@ +#include +#include +#include "osfreq.c" + +long long cpucycles_amd64cpuinfo(void) +{ + unsigned long long result; + asm volatile(".byte 15;.byte 49;shlq $32,%%rdx;orq %%rdx,%%rax" + : "=a" (result) :: "%rdx"); + return result; +} + +long long cpucycles_amd64cpuinfo_persecond(void) +{ + return osfreq(); +} diff --git a/nacl/cpucycles/amd64cpuinfo.h b/nacl/cpucycles/amd64cpuinfo.h new file mode 100644 index 00000000..8f858ae7 --- /dev/null +++ b/nacl/cpucycles/amd64cpuinfo.h @@ -0,0 +1,27 @@ +/* +cpucycles amd64cpuinfo.h version 20100803 +D. J. Bernstein +Public domain. +*/ + +#ifndef CPUCYCLES_amd64cpuinfo_h +#define CPUCYCLES_amd64cpuinfo_h + +#ifdef __cplusplus +extern "C" { +#endif + +extern long long cpucycles_amd64cpuinfo(void); +extern long long cpucycles_amd64cpuinfo_persecond(void); + +#ifdef __cplusplus +} +#endif + +#ifndef cpucycles_implementation +#define cpucycles_implementation "amd64cpuinfo" +#define cpucycles cpucycles_amd64cpuinfo +#define cpucycles_persecond cpucycles_amd64cpuinfo_persecond +#endif + +#endif diff --git a/nacl/cpucycles/amd64cpuspeed.c b/nacl/cpucycles/amd64cpuspeed.c new file mode 100644 index 00000000..7e89511c --- /dev/null +++ b/nacl/cpucycles/amd64cpuspeed.c @@ -0,0 +1,25 @@ +#include +#include +#include +#include + +long long cpucycles_amd64cpuspeed(void) +{ + unsigned long long result; + asm volatile(".byte 15;.byte 49;shlq $32,%%rdx;orq %%rdx,%%rax" + : "=a" (result) :: "%rdx"); + return result; +} + +long long cpucycles_amd64cpuspeed_persecond(void) +{ + int oid[2]; + int val; + size_t size; + oid[0] = CTL_HW; + oid[1] = HW_CPUSPEED; + size = sizeof val; + if (sysctl(oid,2,&val,&size,0,0) == -1) return 0; + if (size != sizeof val) return 0; + return val * 1000000LL; +} diff --git a/nacl/cpucycles/amd64cpuspeed.h b/nacl/cpucycles/amd64cpuspeed.h new file mode 100644 index 00000000..1f6ed54d --- /dev/null +++ b/nacl/cpucycles/amd64cpuspeed.h @@ -0,0 +1,27 @@ +/* +cpucycles amd64cpuspeed.h version 20090716 +Matthew Dempsky +Public domain. +*/ + +#ifndef CPUCYCLES_amd64cpuspeed_h +#define CPUCYCLES_amd64cpuspeed_h + +#ifdef __cplusplus +extern "C" { +#endif + +extern long long cpucycles_amd64cpuspeed(void); +extern long long cpucycles_amd64cpuspeed_persecond(void); + +#ifdef __cplusplus +} +#endif + +#ifndef cpucycles_implementation +#define cpucycles_implementation "amd64cpuspeed" +#define cpucycles cpucycles_amd64cpuspeed +#define cpucycles_persecond cpucycles_amd64cpuspeed_persecond +#endif + +#endif diff --git a/nacl/cpucycles/amd64tscfreq.c b/nacl/cpucycles/amd64tscfreq.c new file mode 100644 index 00000000..ef182c1b --- /dev/null +++ b/nacl/cpucycles/amd64tscfreq.c @@ -0,0 +1,18 @@ +#include +#include + +long long cpucycles_amd64tscfreq(void) +{ + unsigned long long result; + asm volatile(".byte 15;.byte 49;shlq $32,%%rdx;orq %%rdx,%%rax" + : "=a" (result) :: "%rdx"); + return result; +} + +long long cpucycles_amd64tscfreq_persecond(void) +{ + long result = 0; + size_t resultlen = sizeof(long); + sysctlbyname("machdep.tsc_freq",&result,&resultlen,0,0); + return result; +} diff --git a/nacl/cpucycles/amd64tscfreq.h b/nacl/cpucycles/amd64tscfreq.h new file mode 100644 index 00000000..a3c7aa6f --- /dev/null +++ b/nacl/cpucycles/amd64tscfreq.h @@ -0,0 +1,27 @@ +/* +cpucycles amd64tscfreq.h version 20060318 +D. J. Bernstein +Public domain. +*/ + +#ifndef CPUCYCLES_amd64tscfreq_h +#define CPUCYCLES_amd64tscfreq_h + +#ifdef __cplusplus +extern "C" { +#endif + +extern long long cpucycles_amd64tscfreq(void); +extern long long cpucycles_amd64tscfreq_persecond(void); + +#ifdef __cplusplus +} +#endif + +#ifndef cpucycles_implementation +#define cpucycles_implementation "amd64tscfreq" +#define cpucycles cpucycles_amd64tscfreq +#define cpucycles_persecond cpucycles_amd64tscfreq_persecond +#endif + +#endif diff --git a/nacl/cpucycles/celllinux.c b/nacl/cpucycles/celllinux.c new file mode 100644 index 00000000..83a0c38a --- /dev/null +++ b/nacl/cpucycles/celllinux.c @@ -0,0 +1,83 @@ +#include +#include +#include +#include +#include +#include + +static long myround(double u) +{ + long result = u; + while (result + 0.5 < u) result += 1; + while (result - 0.5 > u) result -= 1; + return result; +} + +static long long microseconds(void) +{ + struct timeval t; + gettimeofday(&t,(struct timezone *) 0); + return t.tv_sec * (long long) 1000000 + t.tv_usec; +} + +static long long timebase(void) +{ + unsigned long long result; + result = -spu_read_decrementer(); + return 0xffffffff & result; +} + +static double cpufrequency = 0; +static long tbcycles = 0; + +static double guesstbcycles(void) +{ + long long tb0; long long us0; + long long tb1; long long us1; + + tb0 = timebase(); + us0 = microseconds(); + do { + tb1 = timebase(); + us1 = microseconds(); + } while (us1 - us0 < 10000 || tb1 - tb0 < 1000); + if (tb1 <= tb0) return 0; + tb1 -= tb0; + us1 -= us0; + return (cpufrequency * 0.000001 * (double) us1) / (double) tb1; +} + +static void init(void) +{ + int loop; + double guess1; + double guess2; + + spu_write_decrementer(0xffffffff); + + cpufrequency = 3192000000.0; + + for (loop = 0;loop < 100;++loop) { + guess1 = guesstbcycles(); + guess2 = guesstbcycles(); + tbcycles = myround(guess1); + if (guess1 - tbcycles > 0.1) continue; + if (tbcycles - guess1 > 0.1) continue; + if (guess2 - tbcycles > 0.1) continue; + if (tbcycles - guess2 > 0.1) continue; + return; + } + tbcycles = 0; +} + +long long cpucycles_celllinux(void) +{ + if (!tbcycles) init(); + return timebase() * tbcycles; +} + +long long cpucycles_celllinux_persecond(void) +{ + if (!tbcycles) init(); + return cpufrequency; +} diff --git a/nacl/cpucycles/celllinux.h b/nacl/cpucycles/celllinux.h new file mode 100644 index 00000000..75a5a3f2 --- /dev/null +++ b/nacl/cpucycles/celllinux.h @@ -0,0 +1,27 @@ +/* +cpucycles celllinux.h version 20081201 +D. J. Bernstein +Public domain. +*/ + +#ifndef CPUCYCLES_celllinux_h +#define CPUCYCLES_celllinux_h + +#ifdef __cplusplus +extern "C" { +#endif + +extern long long cpucycles_celllinux(void); +extern long long cpucycles_celllinux_persecond(void); + +#ifdef __cplusplus +} +#endif + +#ifndef cpucycles_implementation +#define cpucycles_implementation "celllinux" +#define cpucycles cpucycles_celllinux +#define cpucycles_persecond cpucycles_celllinux_persecond +#endif + +#endif diff --git a/nacl/cpucycles/cortex.c b/nacl/cpucycles/cortex.c new file mode 100644 index 00000000..07e2fa02 --- /dev/null +++ b/nacl/cpucycles/cortex.c @@ -0,0 +1,73 @@ +/* +cpucycles/cortex.c version 20101203 +D. J. Bernstein +Public domain. +*/ + +#define SCALE 1 +#include +#include +#include + +static int enabled = 0; + +static int prev[3]; +static unsigned long long prevcycles = 0; +static int now[3]; +static long long cyclespersec = 0; + +static void readticks(unsigned int *result) +{ + struct timeval t; + unsigned int cc; + if (!enabled) { + asm volatile("mcr p15, 0, %0, c9, c12, 0" :: "r"(17)); + asm volatile("mcr p15, 0, %0, c9, c12, 1" :: "r"(0x8000000f)); + asm volatile("mcr p15, 0, %0, c9, c12, 3" :: "r"(0x8000000f)); + enabled = 1; + } + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(cc)); + gettimeofday(&t,(struct timezone *) 0); + result[0] = cc; + result[1] = t.tv_usec; + result[2] = t.tv_sec; +} + +long long cpucycles_cortex(void) +{ + unsigned long long delta4; + int deltan; + int deltas; + unsigned long long guesscycles; + + readticks(now); + delta4 = (unsigned int) (now[0] - prev[0]); /* unsigned change in number of cycles mod 2^32 */ + deltan = now[1] - prev[1]; /* signed change in number of nanoseconds mod 10^9 */ + deltas = now[2] - prev[2]; /* signed change in number of seconds */ + if ((deltas == 0 && deltan < 200000) || (deltas == 1 && deltan < -800000)) + return (prevcycles + delta4) * SCALE; + + prev[0] = now[0]; + prev[1] = now[1]; + prev[2] = now[2]; + + if ((deltas == 0 && deltan < 300000) || (deltas == 1 && deltan < -700000)) { + // actual number of cycles cannot have increased by 2^32 in <0.3ms + cyclespersec = 1000000 * (unsigned long long) delta4; + cyclespersec /= deltan + 1000000 * (long long) deltas; + } else { + guesscycles = deltas * cyclespersec; + guesscycles += (deltan * cyclespersec) / 1000000; + while (delta4 + 2147483648ULL < guesscycles) delta4 += 4294967296ULL; + /* XXX: could do longer-term extrapolation here */ + } + + prevcycles += delta4; + return prevcycles * SCALE; +} + +long long cpucycles_cortex_persecond(void) +{ + while (!cyclespersec) cpucycles_cortex(); + return cyclespersec * SCALE; +} diff --git a/nacl/cpucycles/cortex.h b/nacl/cpucycles/cortex.h new file mode 100644 index 00000000..e622f132 --- /dev/null +++ b/nacl/cpucycles/cortex.h @@ -0,0 +1,27 @@ +/* +cpucycles cortex.h version 20100912 +D. J. Bernstein +Public domain. +*/ + +#ifndef CPUCYCLES_cortex_h +#define CPUCYCLES_cortex_h + +#ifdef __cplusplus +extern "C" { +#endif + +extern long long cpucycles_cortex(void); +extern long long cpucycles_cortex_persecond(void); + +#ifdef __cplusplus +} +#endif + +#ifndef cpucycles_implementation +#define cpucycles_implementation "cortex" +#define cpucycles cpucycles_cortex +#define cpucycles_persecond cpucycles_cortex_persecond +#endif + +#endif diff --git a/nacl/cpucycles/dev4ns.c b/nacl/cpucycles/dev4ns.c new file mode 100644 index 00000000..73ff5755 --- /dev/null +++ b/nacl/cpucycles/dev4ns.c @@ -0,0 +1,62 @@ +#include +#include +#include +#include +#include +#include + +static int fddev = -1; +static int prev[3]; +static unsigned long long prevcycles = 0; +static int now[3]; +static long long cyclespersec = 0; + +static void readdev(unsigned int *result) +{ + if (read(fddev,result,12) == 12) return; + result[0] = result[1] = result[2] = 0; +} + +long long cpucycles_dev4ns(void) +{ + unsigned long long delta4; + int deltan; + int deltas; + unsigned long long guesscycles; + + if (fddev == -1) { + fddev = open("/dev/cpucycles4ns",O_RDONLY); + readdev(prev); + } + + readdev(now); + delta4 = (unsigned int) (now[0] - prev[0]); /* unsigned change in number of cycles mod 2^32 */ + deltan = now[1] - prev[1]; /* signed change in number of nanoseconds mod 10^9 */ + deltas = now[2] - prev[2]; /* signed change in number of seconds */ + if ((deltas == 0 && deltan < 200000000) || (deltas == 1 && deltan < -800000000)) + return prevcycles + delta4; + + prev[0] = now[0]; + prev[1] = now[1]; + prev[2] = now[2]; + + if ((deltas == 0 && deltan < 300000000) || (deltas == 1 && deltan < -700000000)) { + // actual number of cycles cannot have increased by 2^32 in <0.3ms + cyclespersec = 1000000000 * (unsigned long long) delta4; + cyclespersec /= deltan + 1000000000 * (long long) deltas; + } else { + guesscycles = deltas * cyclespersec; + guesscycles += (deltan * cyclespersec) / 1000000000; + while (delta4 + 2147483648ULL < guesscycles) delta4 += 4294967296ULL; + /* XXX: could do longer-term extrapolation here */ + } + + prevcycles += delta4; + return prevcycles; +} + +long long cpucycles_dev4ns_persecond(void) +{ + while (!cyclespersec) cpucycles_dev4ns(); + return cyclespersec; +} diff --git a/nacl/cpucycles/dev4ns.h b/nacl/cpucycles/dev4ns.h new file mode 100644 index 00000000..1d99639a --- /dev/null +++ b/nacl/cpucycles/dev4ns.h @@ -0,0 +1,27 @@ +/* +cpucycles dev4ns.h version 20100803 +D. J. Bernstein +Public domain. +*/ + +#ifndef CPUCYCLES_dev4ns_h +#define CPUCYCLES_dev4ns_h + +#ifdef __cplusplus +extern "C" { +#endif + +extern long long cpucycles_dev4ns(void); +extern long long cpucycles_dev4ns_persecond(void); + +#ifdef __cplusplus +} +#endif + +#ifndef cpucycles_implementation +#define cpucycles_implementation "dev4ns" +#define cpucycles cpucycles_dev4ns +#define cpucycles_persecond cpucycles_dev4ns_persecond +#endif + +#endif diff --git a/nacl/cpucycles/do b/nacl/cpucycles/do new file mode 100755 index 00000000..efc063de --- /dev/null +++ b/nacl/cpucycles/do @@ -0,0 +1,105 @@ +#!/bin/sh -e + +okabi | ( + while read abi + do + + rm -f cpucycles.o cpucycles.h + + ( + case "$abi" in + ppc*) + echo powerpccpuinfo + echo powerpcmacos + ;; + amd64*) + echo amd64tscfreq + echo amd64cpuinfo + echo amd64cpuspeed + ;; + x86*) + echo x86tscfreq + echo x86cpuinfo + echo x86cpuspeed + echo x86estimate + ;; + cell*) + echo celllinux + ;; + sparc*) + echo sparccpuinfo + echo sparc32cpuinfo + ;; + mips*) + echo mips + ;; + hppa*) + echo hppapstat + ;; + alpha*) + echo alpha + ;; + sgi*) + echo sgi + ;; + arm*) + echo cortex + echo dev4ns + ;; + esac + + echo amd64tscfreq + echo amd64cpuinfo + echo amd64cpuspeed + echo x86tscfreq + echo x86cpuinfo + echo x86cpuspeed + echo x86estimate + echo ia64cpuinfo + echo powerpccpuinfo + echo powerpcmacos + echo celllinux + echo sparccpuinfo + echo sparc32cpuinfo + echo mips + echo hppapstat + echo alpha + echo sgi + echo cortex + echo dev4ns + echo monotoniccpuinfo + echo monotonic + echo gettimeofday + ) | ( + while read n + do + okc-$abi | ( + while read c + do + echo "=== `date` === Trying $n.c with $c..." >&2 + rm -f test cpucycles-impl.o cpucycles-impl.h cpucycles-impl.c + cp $n.c cpucycles-impl.c || continue + cp $n.h cpucycles-impl.h || continue + $c -c cpucycles-impl.c || continue + $c -o test test.c cpucycles-impl.o || continue + ./test || continue + echo "=== `date` === Success. Using $n.c." >&2 + mkdir -p lib/$abi + mv cpucycles-impl.o lib/$abi/cpucycles.o + mkdir -p include/$abi + mv cpucycles-impl.h include/$abi/cpucycles.h + exit 0 + done + exit 111 + ) && exit 0 + done + exit 111 + ) || ( + echo ===== Giving up. >&2 + rm -f test cpucycles-impl.o cpucycles-impl.h cpucycles-impl.c + exit 111 + ) || exit 0 + + done + exit 0 +) || exit 111 diff --git a/nacl/cpucycles/gettimeofday.c b/nacl/cpucycles/gettimeofday.c new file mode 100644 index 00000000..0bf5e03c --- /dev/null +++ b/nacl/cpucycles/gettimeofday.c @@ -0,0 +1,32 @@ +#include +#include +#include +#include +#include +#include "osfreq.c" + +static double cpufrequency = 0; + +static void init(void) +{ + cpufrequency = osfreq(); +} + +long long cpucycles_gettimeofday(void) +{ + double result; + struct timeval t; + if (!cpufrequency) init(); + gettimeofday(&t,(struct timezone *) 0); + result = t.tv_usec; + result *= 0.000001; + result += (double) t.tv_sec; + result *= cpufrequency; + return result; +} + +long long cpucycles_gettimeofday_persecond(void) +{ + if (!cpufrequency) init(); + return cpufrequency; +} diff --git a/nacl/cpucycles/gettimeofday.h b/nacl/cpucycles/gettimeofday.h new file mode 100644 index 00000000..147b127b --- /dev/null +++ b/nacl/cpucycles/gettimeofday.h @@ -0,0 +1,27 @@ +/* +cpucycles gettimeofday.h version 20060318 +D. J. Bernstein +Public domain. +*/ + +#ifndef CPUCYCLES_gettimeofday_h +#define CPUCYCLES_gettimeofday_h + +#ifdef __cplusplus +extern "C" { +#endif + +extern long long cpucycles_gettimeofday(void); +extern long long cpucycles_gettimeofday_persecond(void); + +#ifdef __cplusplus +} +#endif + +#ifndef cpucycles_implementation +#define cpucycles_implementation "gettimeofday" +#define cpucycles cpucycles_gettimeofday +#define cpucycles_persecond cpucycles_gettimeofday_persecond +#endif + +#endif diff --git a/nacl/cpucycles/hppapstat.c b/nacl/cpucycles/hppapstat.c new file mode 100644 index 00000000..5ae1e843 --- /dev/null +++ b/nacl/cpucycles/hppapstat.c @@ -0,0 +1,26 @@ +#include +#include +#include +#include +#include +#include + +long long cpucycles_hppapstat(void) +{ + register long long result; + _MFCTL(16,result); + return result; +} + +long long cpucycles_hppapstat_persecond(void) +{ + struct pst_processor pst; + union pstun pu; + double result; + + pu.pst_processor = &pst; + if (pstat(PSTAT_PROCESSOR,pu,sizeof(pst),1,0) < 0) return 0; + result = pst.psp_iticksperclktick; + result *= (double) sysconf(_SC_CLK_TCK); + return result; +} diff --git a/nacl/cpucycles/hppapstat.h b/nacl/cpucycles/hppapstat.h new file mode 100644 index 00000000..721814bb --- /dev/null +++ b/nacl/cpucycles/hppapstat.h @@ -0,0 +1,27 @@ +/* +cpucycles hppapstat.h version 20060319 +D. J. Bernstein +Public domain. +*/ + +#ifndef CPUCYCLES_hppapstat_h +#define CPUCYCLES_hppapstat_h + +#ifdef __cplusplus +extern "C" { +#endif + +extern long long cpucycles_hppapstat(void); +extern long long cpucycles_hppapstat_persecond(void); + +#ifdef __cplusplus +} +#endif + +#ifndef cpucycles_implementation +#define cpucycles_implementation "hppapstat" +#define cpucycles cpucycles_hppapstat +#define cpucycles_persecond cpucycles_hppapstat_persecond +#endif + +#endif diff --git a/nacl/cpucycles/ia64cpuinfo.c b/nacl/cpucycles/ia64cpuinfo.c new file mode 100644 index 00000000..580c6cee --- /dev/null +++ b/nacl/cpucycles/ia64cpuinfo.c @@ -0,0 +1,15 @@ +#include +#include +#include "osfreq.c" + +long long cpucycles_ia64cpuinfo(void) +{ + long long result; + asm volatile("mov %0=ar.itc" : "=r"(result)); + return result; +} + +long long cpucycles_ia64cpuinfo_persecond(void) +{ + return osfreq(); +} diff --git a/nacl/cpucycles/ia64cpuinfo.h b/nacl/cpucycles/ia64cpuinfo.h new file mode 100644 index 00000000..a6bcf47d --- /dev/null +++ b/nacl/cpucycles/ia64cpuinfo.h @@ -0,0 +1,27 @@ +/* +cpucycles ia64cpuinfo.h version 20100803 +D. J. Bernstein +Public domain. +*/ + +#ifndef CPUCYCLES_ia64cpuinfo_h +#define CPUCYCLES_ia64cpuinfo_h + +#ifdef __cplusplus +extern "C" { +#endif + +extern long long cpucycles_ia64cpuinfo(void); +extern long long cpucycles_ia64cpuinfo_persecond(void); + +#ifdef __cplusplus +} +#endif + +#ifndef cpucycles_implementation +#define cpucycles_implementation "ia64cpuinfo" +#define cpucycles cpucycles_ia64cpuinfo +#define cpucycles_persecond cpucycles_ia64cpuinfo_persecond +#endif + +#endif diff --git a/nacl/cpucycles/mips.c b/nacl/cpucycles/mips.c new file mode 100644 index 00000000..8b75f824 --- /dev/null +++ b/nacl/cpucycles/mips.c @@ -0,0 +1,65 @@ +/* +cpucycles/mips.c version 20100803 +D. J. Bernstein +Public domain. +*/ + +#define SCALE 2 +#include +#include +#include + +static int prev[3]; +static unsigned long long prevcycles = 0; +static int now[3]; +static long long cyclespersec = 0; + +static void readticks(unsigned int *result) +{ + struct timeval t; + unsigned int cc; + asm volatile(".byte 59; .byte 16; .byte 2; .byte 124; move %0,$2" : "=r"(cc) : : "$2"); + gettimeofday(&t,(struct timezone *) 0); + result[0] = cc; + result[1] = t.tv_usec; + result[2] = t.tv_sec; +} + +long long cpucycles_mips(void) +{ + unsigned long long delta4; + int deltan; + int deltas; + unsigned long long guesscycles; + + readticks(now); + delta4 = (unsigned int) (now[0] - prev[0]); /* unsigned change in number of cycles mod 2^32 */ + deltan = now[1] - prev[1]; /* signed change in number of nanoseconds mod 10^9 */ + deltas = now[2] - prev[2]; /* signed change in number of seconds */ + if ((deltas == 0 && deltan < 200000) || (deltas == 1 && deltan < -800000)) + return (prevcycles + delta4) * SCALE; + + prev[0] = now[0]; + prev[1] = now[1]; + prev[2] = now[2]; + + if ((deltas == 0 && deltan < 300000) || (deltas == 1 && deltan < -700000)) { + // actual number of cycles cannot have increased by 2^32 in <0.3ms + cyclespersec = 1000000 * (unsigned long long) delta4; + cyclespersec /= deltan + 1000000 * (long long) deltas; + } else { + guesscycles = deltas * cyclespersec; + guesscycles += (deltan * cyclespersec) / 1000000; + while (delta4 + 2147483648ULL < guesscycles) delta4 += 4294967296ULL; + /* XXX: could do longer-term extrapolation here */ + } + + prevcycles += delta4; + return prevcycles * SCALE; +} + +long long cpucycles_mips_persecond(void) +{ + while (!cyclespersec) cpucycles_mips(); + return cyclespersec * SCALE; +} diff --git a/nacl/cpucycles/mips.h b/nacl/cpucycles/mips.h new file mode 100644 index 00000000..6f1b26c3 --- /dev/null +++ b/nacl/cpucycles/mips.h @@ -0,0 +1,27 @@ +/* +cpucycles mips.h version 20100802 +D. J. Bernstein +Public domain. +*/ + +#ifndef CPUCYCLES_mips_h +#define CPUCYCLES_mips_h + +#ifdef __cplusplus +extern "C" { +#endif + +extern long long cpucycles_mips(void); +extern long long cpucycles_mips_persecond(void); + +#ifdef __cplusplus +} +#endif + +#ifndef cpucycles_implementation +#define cpucycles_implementation "mips" +#define cpucycles cpucycles_mips +#define cpucycles_persecond cpucycles_mips_persecond +#endif + +#endif diff --git a/nacl/cpucycles/monotonic.c b/nacl/cpucycles/monotonic.c new file mode 100644 index 00000000..412a44fb --- /dev/null +++ b/nacl/cpucycles/monotonic.c @@ -0,0 +1,34 @@ +#include +#include +#include +#include +#include +#include + +static double cpufrequency = 0; + +static void init(void) +{ + long result = 0; size_t resultlen = sizeof(long); + sysctlbyname("machdep.tsc_freq",&result,&resultlen,0,0); + cpufrequency = result; +} + +long long cpucycles_monotonic(void) +{ + double result; + struct timespec t; + if (!cpufrequency) init(); + clock_gettime(CLOCK_MONOTONIC,&t); + result = t.tv_nsec; + result *= 0.000000001; + result += (double) t.tv_sec; + result *= cpufrequency; + return result; +} + +long long cpucycles_monotonic_persecond(void) +{ + if (!cpufrequency) init(); + return cpufrequency; +} diff --git a/nacl/cpucycles/monotonic.h b/nacl/cpucycles/monotonic.h new file mode 100644 index 00000000..9070860b --- /dev/null +++ b/nacl/cpucycles/monotonic.h @@ -0,0 +1,27 @@ +/* +cpucycles monotonic.h version 20100803 +D. J. Bernstein +Public domain. +*/ + +#ifndef CPUCYCLES_monotonic_h +#define CPUCYCLES_monotonic_h + +#ifdef __cplusplus +extern "C" { +#endif + +extern long long cpucycles_monotonic(void); +extern long long cpucycles_monotonic_persecond(void); + +#ifdef __cplusplus +} +#endif + +#ifndef cpucycles_implementation +#define cpucycles_implementation "monotonic" +#define cpucycles cpucycles_monotonic +#define cpucycles_persecond cpucycles_monotonic_persecond +#endif + +#endif diff --git a/nacl/cpucycles/monotoniccpuinfo.c b/nacl/cpucycles/monotoniccpuinfo.c new file mode 100644 index 00000000..609c6305 --- /dev/null +++ b/nacl/cpucycles/monotoniccpuinfo.c @@ -0,0 +1,33 @@ +#include +#include +#include +#include +#include +#include +#include "osfreq.c" + +static double cpufrequency = 0; + +static void init(void) +{ + cpufrequency = osfreq(); +} + +long long cpucycles_monotoniccpuinfo(void) +{ + double result; + struct timespec t; + if (!cpufrequency) init(); + clock_gettime(CLOCK_MONOTONIC,&t); + result = t.tv_nsec; + result *= 0.000000001; + result += (double) t.tv_sec; + result *= cpufrequency; + return result; +} + +long long cpucycles_monotoniccpuinfo_persecond(void) +{ + if (!cpufrequency) init(); + return cpufrequency; +} diff --git a/nacl/cpucycles/monotoniccpuinfo.h b/nacl/cpucycles/monotoniccpuinfo.h new file mode 100644 index 00000000..d4ba7ea8 --- /dev/null +++ b/nacl/cpucycles/monotoniccpuinfo.h @@ -0,0 +1,27 @@ +/* +cpucycles monotoniccpuinfo.h version 20100804 +D. J. Bernstein +Public domain. +*/ + +#ifndef CPUCYCLES_monotoniccpuinfo_h +#define CPUCYCLES_monotoniccpuinfo_h + +#ifdef __cplusplus +extern "C" { +#endif + +extern long long cpucycles_monotoniccpuinfo(void); +extern long long cpucycles_monotoniccpuinfo_persecond(void); + +#ifdef __cplusplus +} +#endif + +#ifndef cpucycles_implementation +#define cpucycles_implementation "monotoniccpuinfo" +#define cpucycles cpucycles_monotoniccpuinfo +#define cpucycles_persecond cpucycles_monotoniccpuinfo_persecond +#endif + +#endif diff --git a/nacl/cpucycles/osfreq.c b/nacl/cpucycles/osfreq.c new file mode 100644 index 00000000..4e106a23 --- /dev/null +++ b/nacl/cpucycles/osfreq.c @@ -0,0 +1,65 @@ +static double osfreq(void) +{ + FILE *f; + double result; + int s; + + f = fopen("/sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq", "r"); + if (f) { + s = fscanf(f,"%lf",&result); + fclose(f); + if (s > 0) return 1000.0 * result; + } + + f = fopen("/sys/devices/system/cpu/cpu0/clock_tick", "r"); + if (f) { + s = fscanf(f,"%lf",&result); + fclose(f); + if (s > 0) return result; + } + + f = fopen("/proc/cpuinfo","r"); + if (f) { + for (;;) { + s = fscanf(f,"cpu MHz : %lf",&result); + if (s > 0) break; + if (s == 0) s = fscanf(f,"%*[^\n]\n"); + if (s < 0) { result = 0; break; } + } + fclose(f); + if (result) return 1000000.0 * result; + } + + f = fopen("/proc/cpuinfo","r"); + if (f) { + for (;;) { + s = fscanf(f,"clock : %lf",&result); + if (s > 0) break; + if (s == 0) s = fscanf(f,"%*[^\n]\n"); + if (s < 0) { result = 0; break; } + } + fclose(f); + if (result) return 1000000.0 * result; + } + + f = popen("/usr/sbin/lsattr -E -l proc0 -a frequency 2>/dev/null","r"); + if (f) { + s = fscanf(f,"frequency %lf",&result); + pclose(f); + if (s > 0) return result; + } + + f = popen("/usr/sbin/psrinfo -v 2>/dev/null","r"); + if (f) { + for (;;) { + s = fscanf(f," The %*s processor operates at %lf MHz",&result); + if (s > 0) break; + if (s == 0) s = fscanf(f,"%*[^\n]\n"); + if (s < 0) { result = 0; break; } + } + pclose(f); + if (result) return 1000000.0 * result; + } + + return 0; +} diff --git a/nacl/cpucycles/powerpccpuinfo.c b/nacl/cpucycles/powerpccpuinfo.c new file mode 100644 index 00000000..b70c745a --- /dev/null +++ b/nacl/cpucycles/powerpccpuinfo.c @@ -0,0 +1,95 @@ +#include +#include +#include +#include +#include +#include "osfreq.c" + +static long myround(double u) +{ + long result = u; + while (result + 0.5 < u) result += 1; + while (result - 0.5 > u) result -= 1; + return result; +} + +static long long microseconds(void) +{ + struct timeval t; + gettimeofday(&t,(struct timezone *) 0); + return t.tv_sec * (long long) 1000000 + t.tv_usec; +} + +static int tbshift = 0; + +static long long timebase(void) +{ + unsigned long high; + unsigned long low; + unsigned long newhigh; + unsigned long long result; + asm volatile( + "7:mftbu %0;mftb %1;mftbu %2;cmpw %0,%2;bne 7b" + : "=r" (high), "=r" (low), "=r" (newhigh) + ); + result = high; + result <<= 32; + result |= low; + return result >> tbshift; +} + +static double cpufrequency = 0; +static long tbcycles = 0; + +static double guesstbcycles(void) +{ + long long tb0; long long us0; + long long tb1; long long us1; + + tb0 = timebase(); + us0 = microseconds(); + do { + tb1 = timebase(); + us1 = microseconds(); + } while (us1 - us0 < 10000 || tb1 - tb0 < 1000); + if (tb1 <= tb0) return 0; + tb1 -= tb0; + us1 -= us0; + return (cpufrequency * 0.000001 * (double) us1) / (double) tb1; +} + +static void init(void) +{ + int loop; + double guess1; + double guess2; + + cpufrequency = osfreq(); + if (!cpufrequency) return; + + for (tbshift = 0;tbshift < 10;++tbshift) { + for (loop = 0;loop < 100;++loop) { + guess1 = guesstbcycles(); + guess2 = guesstbcycles(); + tbcycles = myround(guess1); + if (guess1 - tbcycles > 0.1) continue; + if (tbcycles - guess1 > 0.1) continue; + if (guess2 - tbcycles > 0.1) continue; + if (tbcycles - guess2 > 0.1) continue; + return; + } + } + tbcycles = 0; +} + +long long cpucycles_powerpccpuinfo(void) +{ + if (!tbcycles) init(); + return timebase() * tbcycles; +} + +long long cpucycles_powerpccpuinfo_persecond(void) +{ + if (!tbcycles) init(); + return cpufrequency; +} diff --git a/nacl/cpucycles/powerpccpuinfo.h b/nacl/cpucycles/powerpccpuinfo.h new file mode 100644 index 00000000..c763a1b4 --- /dev/null +++ b/nacl/cpucycles/powerpccpuinfo.h @@ -0,0 +1,27 @@ +/* +cpucycles powerpccpuinfo.h version 20100803 +D. J. Bernstein +Public domain. +*/ + +#ifndef CPUCYCLES_powerpccpuinfo_h +#define CPUCYCLES_powerpccpuinfo_h + +#ifdef __cplusplus +extern "C" { +#endif + +extern long long cpucycles_powerpccpuinfo(void); +extern long long cpucycles_powerpccpuinfo_persecond(void); + +#ifdef __cplusplus +} +#endif + +#ifndef cpucycles_implementation +#define cpucycles_implementation "powerpccpuinfo" +#define cpucycles cpucycles_powerpccpuinfo +#define cpucycles_persecond cpucycles_powerpccpuinfo_persecond +#endif + +#endif diff --git a/nacl/cpucycles/powerpcmacos.c b/nacl/cpucycles/powerpcmacos.c new file mode 100644 index 00000000..ab0be1ea --- /dev/null +++ b/nacl/cpucycles/powerpcmacos.c @@ -0,0 +1,42 @@ +#include +#include +#include + +#define timebase mach_absolute_time + +static int cpumib[2] = { CTL_HW, HW_CPU_FREQ } ; +static int tbmib[2] = { CTL_HW, HW_TB_FREQ } ; + +static long myround(double u) +{ + long result = u; + while (result + 0.5 < u) result += 1; + while (result - 0.5 > u) result -= 1; + return result; +} + +static long tbcycles = 0; + +static void init(void) +{ + unsigned int cpufrequency = 0; size_t cpufrequencylen = sizeof(unsigned int); + unsigned int tbfrequency = 0; size_t tbfrequencylen = sizeof(unsigned int); + sysctl(cpumib,2,&cpufrequency,&cpufrequencylen,0,0); + sysctl(tbmib,2,&tbfrequency,&tbfrequencylen,0,0); + if (tbfrequency > 0) + tbcycles = myround((double) (unsigned long long) cpufrequency + / (double) (unsigned long long) tbfrequency); +} + +long long cpucycles_powerpcmacos(void) +{ + if (!tbcycles) init(); + return timebase() * tbcycles; +} + +long long cpucycles_powerpcmacos_persecond(void) +{ + unsigned int result = 0; size_t resultlen = sizeof(unsigned int); + sysctl(cpumib,2,&result,&resultlen,0,0); + return (unsigned long long) result; +} diff --git a/nacl/cpucycles/powerpcmacos.h b/nacl/cpucycles/powerpcmacos.h new file mode 100644 index 00000000..f66c0e36 --- /dev/null +++ b/nacl/cpucycles/powerpcmacos.h @@ -0,0 +1,27 @@ +/* +cpucycles powerpcmacos.h version 20060319 +D. J. Bernstein +Public domain. +*/ + +#ifndef CPUCYCLES_powerpcmacos_h +#define CPUCYCLES_powerpcmacos_h + +#ifdef __cplusplus +extern "C" { +#endif + +extern long long cpucycles_powerpcmacos(void); +extern long long cpucycles_powerpcmacos_persecond(void); + +#ifdef __cplusplus +} +#endif + +#ifndef cpucycles_implementation +#define cpucycles_implementation "powerpcmacos" +#define cpucycles cpucycles_powerpcmacos +#define cpucycles_persecond cpucycles_powerpcmacos_persecond +#endif + +#endif diff --git a/nacl/cpucycles/sgi.c b/nacl/cpucycles/sgi.c new file mode 100644 index 00000000..c232af09 --- /dev/null +++ b/nacl/cpucycles/sgi.c @@ -0,0 +1,38 @@ +#include +#include +#include +#include +#include +#include + +static double cpufrequency = 0; + +static void init(void) +{ + FILE *f; + + f = popen("hinv -c processor | awk '{if ($3==\"MHZ\") print $2*1000000}'","r"); + if (!f) return; + if (fscanf(f,"%lf",&cpufrequency) < 1) cpufrequency = 0; + pclose(f); + if (!cpufrequency) return; +} + +long long cpucycles_sgi(void) +{ + double result; + struct timespec t; + if (!cpufrequency) init(); + clock_gettime(CLOCK_SGI_CYCLE,&t); + result = t.tv_nsec; + result *= 0.000000001; + result += (double) t.tv_sec; + result *= cpufrequency; + return result; +} + +long long cpucycles_sgi_persecond(void) +{ + if (!cpufrequency) init(); + return cpufrequency; +} diff --git a/nacl/cpucycles/sgi.h b/nacl/cpucycles/sgi.h new file mode 100644 index 00000000..56bad976 --- /dev/null +++ b/nacl/cpucycles/sgi.h @@ -0,0 +1,27 @@ +/* +cpucycles sgi.h version 20070916 +D. J. Bernstein +Public domain. +*/ + +#ifndef CPUCYCLES_sgi_h +#define CPUCYCLES_sgi_h + +#ifdef __cplusplus +extern "C" { +#endif + +extern long long cpucycles_sgi(void); +extern long long cpucycles_sgi_persecond(void); + +#ifdef __cplusplus +} +#endif + +#ifndef cpucycles_implementation +#define cpucycles_implementation "sgi" +#define cpucycles cpucycles_sgi +#define cpucycles_persecond cpucycles_sgi_persecond +#endif + +#endif diff --git a/nacl/cpucycles/sparc32cpuinfo.c b/nacl/cpucycles/sparc32cpuinfo.c new file mode 100644 index 00000000..1fc53d06 --- /dev/null +++ b/nacl/cpucycles/sparc32cpuinfo.c @@ -0,0 +1,16 @@ +#include +#include +#include "osfreq.c" + +long long cpucycles_sparc32cpuinfo(void) +{ + long long result; + asm volatile(".word 2202075136; .word 2570088480; srl %%g1,0,%L0; mov %%o4,%H0" + : "=r" (result) : : "g1","o4"); + return result; +} + +long long cpucycles_sparc32cpuinfo_persecond(void) +{ + return osfreq(); +} diff --git a/nacl/cpucycles/sparc32cpuinfo.h b/nacl/cpucycles/sparc32cpuinfo.h new file mode 100644 index 00000000..9d39dc65 --- /dev/null +++ b/nacl/cpucycles/sparc32cpuinfo.h @@ -0,0 +1,27 @@ +/* +cpucycles sparc32cpuinfo.h version 20100804 +D. J. Bernstein +Public domain. +*/ + +#ifndef CPUCYCLES_sparc32cpuinfo_h +#define CPUCYCLES_sparc32cpuinfo_h + +#ifdef __cplusplus +extern "C" { +#endif + +extern long long cpucycles_sparc32cpuinfo(void); +extern long long cpucycles_sparc32cpuinfo_persecond(void); + +#ifdef __cplusplus +} +#endif + +#ifndef cpucycles_implementation +#define cpucycles_implementation "sparc32cpuinfo" +#define cpucycles cpucycles_sparc32cpuinfo +#define cpucycles_persecond cpucycles_sparc32cpuinfo_persecond +#endif + +#endif diff --git a/nacl/cpucycles/sparccpuinfo.c b/nacl/cpucycles/sparccpuinfo.c new file mode 100644 index 00000000..d07aafec --- /dev/null +++ b/nacl/cpucycles/sparccpuinfo.c @@ -0,0 +1,15 @@ +#include +#include +#include "osfreq.c" + +long long cpucycles_sparccpuinfo(void) +{ + long long result; + asm volatile("rd %%tick,%0" : "=r" (result)); + return result; +} + +long long cpucycles_sparccpuinfo_persecond(void) +{ + return osfreq(); +} diff --git a/nacl/cpucycles/sparccpuinfo.h b/nacl/cpucycles/sparccpuinfo.h new file mode 100644 index 00000000..badb2144 --- /dev/null +++ b/nacl/cpucycles/sparccpuinfo.h @@ -0,0 +1,27 @@ +/* +cpucycles sparccpuinfo.h version 20100803 +D. J. Bernstein +Public domain. +*/ + +#ifndef CPUCYCLES_sparccpuinfo_h +#define CPUCYCLES_sparccpuinfo_h + +#ifdef __cplusplus +extern "C" { +#endif + +extern long long cpucycles_sparccpuinfo(void); +extern long long cpucycles_sparccpuinfo_persecond(void); + +#ifdef __cplusplus +} +#endif + +#ifndef cpucycles_implementation +#define cpucycles_implementation "sparccpuinfo" +#define cpucycles cpucycles_sparccpuinfo +#define cpucycles_persecond cpucycles_sparccpuinfo_persecond +#endif + +#endif diff --git a/nacl/cpucycles/test.c b/nacl/cpucycles/test.c new file mode 100644 index 00000000..bc43d719 --- /dev/null +++ b/nacl/cpucycles/test.c @@ -0,0 +1,77 @@ +#include +#include +#include +#include +#include "cpucycles-impl.h" + +static long long tod(void) +{ + struct timeval t; + gettimeofday(&t,(struct timezone *) 0); + return t.tv_sec * (long long) 1000000 + t.tv_usec; +} + +long long todstart; +long long todend; +long long cpustart; +long long cpuend; + +long long cyclespersecond; +long long cyclespertod; + +long long t[1001]; + +int main() +{ + int j; + int i; + + if (!cpucycles()) { + fprintf(stderr,"cpucycles() = %lld\n",cpucycles()); + return 100; + } + for (i = 0;i <= 1000;++i) t[i] = cpucycles(); + for (i = 0;i < 1000;++i) if (t[i] > t[i + 1]) { + fprintf(stderr,"t[%d] = %lld\n",i,t[i]); + fprintf(stderr,"t[%d] = %lld\n",i + 1,t[i + 1]); + fprintf(stderr,"cpucycles_persecond() = %lld\n",cpucycles_persecond()); + return 100; + } + if (t[0] == t[1000]) { + fprintf(stderr,"t[%d] = %lld\n",0,t[0]); + fprintf(stderr,"t[%d] = %lld\n",1000,t[1000]); + fprintf(stderr,"cpucycles_persecond() = %lld\n",cpucycles_persecond()); + return 100; + } + + cyclespersecond = cpucycles_persecond(); + + if (cyclespersecond <= 0) { + fprintf(stderr,"cpucycles_persecond() = %lld\n",cyclespersecond); + return 100; + } + + todstart = tod(); + cpustart = cpucycles(); + for (j = 0;j < 1000;++j) for (i = 0;i <= 1000;++i) t[i] = t[i] + i + j; + todend = tod(); + cpuend = cpucycles(); + + todend -= todstart; + cpuend -= cpustart; + + cyclespertod = (long long) (((double) cpuend) * 1000000.0 / (double) todend); + + if (cyclespertod > 10 * cyclespersecond) { + fprintf(stderr,"cyclespertod = %lld, cyclespersecond = %lld\n",cyclespertod,cyclespersecond); + return 100; + } + + for (i = 0;i <= 1000;++i) t[i] = cpucycles(); + printf("%s",cpucycles_implementation); + printf(" %lld",cyclespersecond); + printf(" %lld",cyclespertod); + for (i = 0;i < 64;++i) printf(" %lld",t[i + 1] - t[i]); + printf("\n"); + return 0; +} diff --git a/nacl/cpucycles/x86cpuinfo.c b/nacl/cpucycles/x86cpuinfo.c new file mode 100644 index 00000000..3fb0a1b0 --- /dev/null +++ b/nacl/cpucycles/x86cpuinfo.c @@ -0,0 +1,15 @@ +#include +#include +#include "osfreq.c" + +long long cpucycles_x86cpuinfo(void) +{ + long long result; + asm volatile(".byte 15;.byte 49" : "=A" (result)); + return result; +} + +long long cpucycles_x86cpuinfo_persecond(void) +{ + return osfreq(); +} diff --git a/nacl/cpucycles/x86cpuinfo.h b/nacl/cpucycles/x86cpuinfo.h new file mode 100644 index 00000000..88f151dd --- /dev/null +++ b/nacl/cpucycles/x86cpuinfo.h @@ -0,0 +1,27 @@ +/* +cpucycles x86cpuinfo.h version 20100803 +D. J. Bernstein +Public domain. +*/ + +#ifndef CPUCYCLES_x86cpuinfo_h +#define CPUCYCLES_x86cpuinfo_h + +#ifdef __cplusplus +extern "C" { +#endif + +extern long long cpucycles_x86cpuinfo(void); +extern long long cpucycles_x86cpuinfo_persecond(void); + +#ifdef __cplusplus +} +#endif + +#ifndef cpucycles_implementation +#define cpucycles_implementation "x86cpuinfo" +#define cpucycles cpucycles_x86cpuinfo +#define cpucycles_persecond cpucycles_x86cpuinfo_persecond +#endif + +#endif diff --git a/nacl/cpucycles/x86cpuspeed.c b/nacl/cpucycles/x86cpuspeed.c new file mode 100644 index 00000000..34222565 --- /dev/null +++ b/nacl/cpucycles/x86cpuspeed.c @@ -0,0 +1,24 @@ +#include +#include +#include +#include + +long long cpucycles_x86cpuspeed(void) +{ + long long result; + asm volatile(".byte 15;.byte 49" : "=A" (result)); + return result; +} + +long long cpucycles_x86cpuspeed_persecond(void) +{ + int oid[2]; + int val; + size_t size; + oid[0] = CTL_HW; + oid[1] = HW_CPUSPEED; + size = sizeof val; + if (sysctl(oid,2,&val,&size,0,0) == -1) return 0; + if (size != sizeof val) return 0; + return val * 1000000LL; +} diff --git a/nacl/cpucycles/x86cpuspeed.h b/nacl/cpucycles/x86cpuspeed.h new file mode 100644 index 00000000..43005cda --- /dev/null +++ b/nacl/cpucycles/x86cpuspeed.h @@ -0,0 +1,27 @@ +/* +cpucycles x86cpuspeed.h version 20090716 +Matthew Dempsky +Public domain. +*/ + +#ifndef CPUCYCLES_x86cpuspeed_h +#define CPUCYCLES_x86cpuspeed_h + +#ifdef __cplusplus +extern "C" { +#endif + +extern long long cpucycles_x86cpuspeed(void); +extern long long cpucycles_x86cpuspeed_persecond(void); + +#ifdef __cplusplus +} +#endif + +#ifndef cpucycles_implementation +#define cpucycles_implementation "x86cpuspeed" +#define cpucycles cpucycles_x86cpuspeed +#define cpucycles_persecond cpucycles_x86cpuspeed_persecond +#endif + +#endif diff --git a/nacl/cpucycles/x86estimate.c b/nacl/cpucycles/x86estimate.c new file mode 100644 index 00000000..e5ae66cf --- /dev/null +++ b/nacl/cpucycles/x86estimate.c @@ -0,0 +1,59 @@ +#include +#include +#include +#include + +long long cpucycles_x86estimate(void) +{ + long long result; + asm volatile(".byte 15;.byte 49" : "=A" (result)); + return result; +} + +static long long microseconds(void) +{ + struct timeval t; + gettimeofday(&t,(struct timezone *) 0); + return t.tv_sec * (long long) 1000000 + t.tv_usec; +} + +static double guessfreq(void) +{ + long long tb0; long long us0; + long long tb1; long long us1; + + tb0 = cpucycles_x86estimate(); + us0 = microseconds(); + do { + tb1 = cpucycles_x86estimate(); + us1 = microseconds(); + } while (us1 - us0 < 10000 || tb1 - tb0 < 1000); + if (tb1 <= tb0) return 0; + tb1 -= tb0; + us1 -= us0; + return ((double) tb1) / (0.000001 * (double) us1); +} + +static double cpufrequency = 0; + +static void init(void) +{ + double guess1; + double guess2; + int loop; + + for (loop = 0;loop < 100;++loop) { + guess1 = guessfreq(); + guess2 = guessfreq(); + if (guess1 > 1.01 * guess2) continue; + if (guess2 > 1.01 * guess1) continue; + cpufrequency = 0.5 * (guess1 + guess2); + break; + } +} + +long long cpucycles_x86estimate_persecond(void) +{ + if (!cpufrequency) init(); + return cpufrequency; +} diff --git a/nacl/cpucycles/x86estimate.h b/nacl/cpucycles/x86estimate.h new file mode 100644 index 00000000..98f2dd15 --- /dev/null +++ b/nacl/cpucycles/x86estimate.h @@ -0,0 +1,27 @@ +/* +cpucycles x86estimate.h version 20070121 +D. J. Bernstein +Public domain. +*/ + +#ifndef CPUCYCLES_x86estimate_h +#define CPUCYCLES_x86estimate_h + +#ifdef __cplusplus +extern "C" { +#endif + +extern long long cpucycles_x86estimate(void); +extern long long cpucycles_x86estimate_persecond(void); + +#ifdef __cplusplus +} +#endif + +#ifndef cpucycles_implementation +#define cpucycles_implementation "x86estimate" +#define cpucycles cpucycles_x86estimate +#define cpucycles_persecond cpucycles_x86estimate_persecond +#endif + +#endif diff --git a/nacl/cpucycles/x86tscfreq.c b/nacl/cpucycles/x86tscfreq.c new file mode 100644 index 00000000..a1b94b62 --- /dev/null +++ b/nacl/cpucycles/x86tscfreq.c @@ -0,0 +1,17 @@ +#include +#include + +long long cpucycles_x86tscfreq(void) +{ + long long result; + asm volatile(".byte 15;.byte 49" : "=A" (result)); + return result; +} + +long long cpucycles_x86tscfreq_persecond(void) +{ + long result = 0; + size_t resultlen = sizeof(long); + sysctlbyname("machdep.tsc_freq",&result,&resultlen,0,0); + return result; +} diff --git a/nacl/cpucycles/x86tscfreq.h b/nacl/cpucycles/x86tscfreq.h new file mode 100644 index 00000000..abf616e5 --- /dev/null +++ b/nacl/cpucycles/x86tscfreq.h @@ -0,0 +1,27 @@ +/* +cpucycles x86tscfreq.h version 20060318 +D. J. Bernstein +Public domain. +*/ + +#ifndef CPUCYCLES_x86tscfreq_h +#define CPUCYCLES_x86tscfreq_h + +#ifdef __cplusplus +extern "C" { +#endif + +extern long long cpucycles_x86tscfreq(void); +extern long long cpucycles_x86tscfreq_persecond(void); + +#ifdef __cplusplus +} +#endif + +#ifndef cpucycles_implementation +#define cpucycles_implementation "x86tscfreq" +#define cpucycles cpucycles_x86tscfreq +#define cpucycles_persecond cpucycles_x86tscfreq_persecond +#endif + +#endif diff --git a/nacl/cpuid/cbytes.c b/nacl/cpuid/cbytes.c new file mode 100644 index 00000000..bd5d1444 --- /dev/null +++ b/nacl/cpuid/cbytes.c @@ -0,0 +1,16 @@ +#include + +int main() +{ + char ch; + int loop = 0; + while (scanf("%c",&ch) == 1) { + printf("0x%02x,",255 & (int) ch); + if (++loop == 16) { + loop = 0; + printf("\n"); + } + } + printf("0x00\n"); + return 0; +} diff --git a/nacl/cpuid/cpuid.c b/nacl/cpuid/cpuid.c new file mode 100644 index 00000000..1a5c6b8e --- /dev/null +++ b/nacl/cpuid/cpuid.c @@ -0,0 +1,41 @@ +#include +#include +#include +#include + +void nope() +{ + exit(1); +} + +int main() +{ + unsigned long x[4]; + unsigned long y[4]; + int i; + int j; + char c; + + signal(SIGILL,nope); + + x[0] = 0; + x[1] = 0; + x[2] = 0; + x[3] = 0; + + asm volatile(".byte 15;.byte 162" : "=a"(x[0]),"=b"(x[1]),"=c"(x[3]),"=d"(x[2]) : "0"(0) ); + if (!x[0]) return 0; + asm volatile(".byte 15;.byte 162" : "=a"(y[0]),"=b"(y[1]),"=c"(y[2]),"=d"(y[3]) : "0"(1) ); + + for (i = 1;i < 4;++i) + for (j = 0;j < 4;++j) { + c = x[i] >> (8 * j); + if (c < 32) c = 32; + if (c > 126) c = 126; + putchar(c); + } + + printf("-%08x-%08x\n",y[0],y[3]); + + return 0; +} diff --git a/nacl/cpuid/do b/nacl/cpuid/do new file mode 100755 index 00000000..8a3a6f7b --- /dev/null +++ b/nacl/cpuid/do @@ -0,0 +1,37 @@ +#!/bin/sh -e + +mkdir include + +( + echo x86 + echo unknown +) | ( + while read n + do + okabi | ( + while read abi + do + okc-$abi | ( + while read c + do + echo "=== `date` === Trying $n.c with $c..." >&2 + rm -f cpuid.c + cp $n.c cpuid.c || continue + $c -o cpuid cpuid.c || continue + $c -o cbytes cbytes.c || continue + ./cpuid > cpuid.out || continue + echo 'static const char cpuid[] = {' > cpuid.h || continue + ./cbytes < cpuid.out >> cpuid.h || continue + echo '} ;' >> cpuid.h || continue + cp cpuid.h include/cpuid.h || continue + cat cpuid.out + exit 0 + done + exit 111 + ) && exit 0 + done + exit 111 + ) && exit 0 + done + exit 111 +) diff --git a/nacl/cpuid/unknown.c b/nacl/cpuid/unknown.c new file mode 100644 index 00000000..786a5e72 --- /dev/null +++ b/nacl/cpuid/unknown.c @@ -0,0 +1,7 @@ +#include + +main() +{ + printf("unknown CPU ID\n"); + return 0; +} diff --git a/nacl/cpuid/x86.c b/nacl/cpuid/x86.c new file mode 100644 index 00000000..99e6a0c4 --- /dev/null +++ b/nacl/cpuid/x86.c @@ -0,0 +1,41 @@ +#include +#include +#include +#include + +void nope() +{ + exit(1); +} + +int main() +{ + unsigned long x[4]; + unsigned long y[4]; + int i; + int j; + char c; + + signal(SIGILL,nope); + + x[0] = 0; + x[1] = 0; + x[2] = 0; + x[3] = 0; + + asm volatile(".byte 15;.byte 162" : "=a"(x[0]),"=b"(x[1]),"=c"(x[3]),"=d"(x[2]) : "0"(0) ); + if (!x[0]) return 0; + asm volatile(".byte 15;.byte 162" : "=a"(y[0]),"=b"(y[1]),"=c"(y[2]),"=d"(y[3]) : "0"(1) ); + + for (i = 1;i < 4;++i) + for (j = 0;j < 4;++j) { + c = x[i] >> (8 * j); + if (c < 32) c = 32; + if (c > 126) c = 126; + putchar(c); + } + + printf("-%08x-%08x\n",(unsigned int) y[0],(unsigned int) y[3]); + + return 0; +} diff --git a/nacl/crypto_auth/hmacsha256/checksum b/nacl/crypto_auth/hmacsha256/checksum new file mode 100644 index 00000000..2fa9604b --- /dev/null +++ b/nacl/crypto_auth/hmacsha256/checksum @@ -0,0 +1 @@ +3bd7abd4f4dce04396f2ac7cb1cff70607f692411c49a1563b037d31e1662632 diff --git a/nacl/crypto_auth/hmacsha256/ref/api.h b/nacl/crypto_auth/hmacsha256/ref/api.h new file mode 100644 index 00000000..c224d9d5 --- /dev/null +++ b/nacl/crypto_auth/hmacsha256/ref/api.h @@ -0,0 +1,2 @@ +#define CRYPTO_BYTES 32 +#define CRYPTO_KEYBYTES 32 diff --git a/nacl/crypto_auth/hmacsha256/ref/hmac.c b/nacl/crypto_auth/hmacsha256/ref/hmac.c new file mode 100644 index 00000000..8ab30bb4 --- /dev/null +++ b/nacl/crypto_auth/hmacsha256/ref/hmac.c @@ -0,0 +1,83 @@ +/* + * 20080913 + * D. J. Bernstein + * Public domain. + * */ + +#include "crypto_hashblocks_sha256.h" +#include "crypto_auth.h" + +#define blocks crypto_hashblocks_sha256 + +typedef unsigned int uint32; + +static const char iv[32] = { + 0x6a,0x09,0xe6,0x67, + 0xbb,0x67,0xae,0x85, + 0x3c,0x6e,0xf3,0x72, + 0xa5,0x4f,0xf5,0x3a, + 0x51,0x0e,0x52,0x7f, + 0x9b,0x05,0x68,0x8c, + 0x1f,0x83,0xd9,0xab, + 0x5b,0xe0,0xcd,0x19, +} ; + +int crypto_auth(unsigned char *out,const unsigned char *in,unsigned long long inlen,const unsigned char *k) +{ + unsigned char h[32]; + unsigned char padded[128]; + int i; + unsigned long long bits = 512 + (inlen << 3); + + for (i = 0;i < 32;++i) h[i] = iv[i]; + + for (i = 0;i < 32;++i) padded[i] = k[i] ^ 0x36; + for (i = 32;i < 64;++i) padded[i] = 0x36; + + blocks(h,padded,64); + blocks(h,in,inlen); + in += inlen; + inlen &= 63; + in -= inlen; + + for (i = 0;i < inlen;++i) padded[i] = in[i]; + padded[inlen] = 0x80; + + if (inlen < 56) { + for (i = inlen + 1;i < 56;++i) padded[i] = 0; + padded[56] = bits >> 56; + padded[57] = bits >> 48; + padded[58] = bits >> 40; + padded[59] = bits >> 32; + padded[60] = bits >> 24; + padded[61] = bits >> 16; + padded[62] = bits >> 8; + padded[63] = bits; + blocks(h,padded,64); + } else { + for (i = inlen + 1;i < 120;++i) padded[i] = 0; + padded[120] = bits >> 56; + padded[121] = bits >> 48; + padded[122] = bits >> 40; + padded[123] = bits >> 32; + padded[124] = bits >> 24; + padded[125] = bits >> 16; + padded[126] = bits >> 8; + padded[127] = bits; + blocks(h,padded,128); + } + + for (i = 0;i < 32;++i) padded[i] = k[i] ^ 0x5c; + for (i = 32;i < 64;++i) padded[i] = 0x5c; + for (i = 0;i < 32;++i) padded[64 + i] = h[i]; + + for (i = 0;i < 32;++i) out[i] = iv[i]; + + for (i = 32;i < 64;++i) padded[64 + i] = 0; + padded[64 + 32] = 0x80; + padded[64 + 62] = 3; + + blocks(out,padded,128); + + return 0; +} diff --git a/nacl/crypto_auth/hmacsha256/ref/verify.c b/nacl/crypto_auth/hmacsha256/ref/verify.c new file mode 100644 index 00000000..96ff0ea8 --- /dev/null +++ b/nacl/crypto_auth/hmacsha256/ref/verify.c @@ -0,0 +1,9 @@ +#include "crypto_verify_32.h" +#include "crypto_auth.h" + +int crypto_auth_verify(const unsigned char *h,const unsigned char *in,unsigned long long inlen,const unsigned char *k) +{ + unsigned char correct[32]; + crypto_auth(correct,in,inlen,k); + return crypto_verify_32(h,correct); +} diff --git a/nacl/crypto_auth/hmacsha256/used b/nacl/crypto_auth/hmacsha256/used new file mode 100644 index 00000000..e69de29b diff --git a/nacl/crypto_auth/hmacsha512256/checksum b/nacl/crypto_auth/hmacsha512256/checksum new file mode 100644 index 00000000..1c037f2d --- /dev/null +++ b/nacl/crypto_auth/hmacsha512256/checksum @@ -0,0 +1 @@ +2f5e8a6a0cac012d8d001351d7d583e69f91390df46305c3608e0c2893491886 diff --git a/nacl/crypto_auth/hmacsha512256/ref/api.h b/nacl/crypto_auth/hmacsha512256/ref/api.h new file mode 100644 index 00000000..c224d9d5 --- /dev/null +++ b/nacl/crypto_auth/hmacsha512256/ref/api.h @@ -0,0 +1,2 @@ +#define CRYPTO_BYTES 32 +#define CRYPTO_KEYBYTES 32 diff --git a/nacl/crypto_auth/hmacsha512256/ref/hmac.c b/nacl/crypto_auth/hmacsha512256/ref/hmac.c new file mode 100644 index 00000000..56ebfa6b --- /dev/null +++ b/nacl/crypto_auth/hmacsha512256/ref/hmac.c @@ -0,0 +1,86 @@ +/* + * 20080913 + * D. J. Bernstein + * Public domain. + * */ + +#include "crypto_hashblocks_sha512.h" +#include "crypto_auth.h" + +#define blocks crypto_hashblocks_sha512 + +typedef unsigned long long uint64; + +static const unsigned char iv[64] = { + 0x6a,0x09,0xe6,0x67,0xf3,0xbc,0xc9,0x08, + 0xbb,0x67,0xae,0x85,0x84,0xca,0xa7,0x3b, + 0x3c,0x6e,0xf3,0x72,0xfe,0x94,0xf8,0x2b, + 0xa5,0x4f,0xf5,0x3a,0x5f,0x1d,0x36,0xf1, + 0x51,0x0e,0x52,0x7f,0xad,0xe6,0x82,0xd1, + 0x9b,0x05,0x68,0x8c,0x2b,0x3e,0x6c,0x1f, + 0x1f,0x83,0xd9,0xab,0xfb,0x41,0xbd,0x6b, + 0x5b,0xe0,0xcd,0x19,0x13,0x7e,0x21,0x79 +} ; + +int crypto_auth(unsigned char *out,const unsigned char *in,unsigned long long inlen,const unsigned char *k) +{ + unsigned char h[64]; + unsigned char padded[256]; + int i; + unsigned long long bytes = 128 + inlen; + + for (i = 0;i < 64;++i) h[i] = iv[i]; + + for (i = 0;i < 32;++i) padded[i] = k[i] ^ 0x36; + for (i = 32;i < 128;++i) padded[i] = 0x36; + + blocks(h,padded,128); + blocks(h,in,inlen); + in += inlen; + inlen &= 127; + in -= inlen; + + for (i = 0;i < inlen;++i) padded[i] = in[i]; + padded[inlen] = 0x80; + + if (inlen < 112) { + for (i = inlen + 1;i < 119;++i) padded[i] = 0; + padded[119] = bytes >> 61; + padded[120] = bytes >> 53; + padded[121] = bytes >> 45; + padded[122] = bytes >> 37; + padded[123] = bytes >> 29; + padded[124] = bytes >> 21; + padded[125] = bytes >> 13; + padded[126] = bytes >> 5; + padded[127] = bytes << 3; + blocks(h,padded,128); + } else { + for (i = inlen + 1;i < 247;++i) padded[i] = 0; + padded[247] = bytes >> 61; + padded[248] = bytes >> 53; + padded[249] = bytes >> 45; + padded[250] = bytes >> 37; + padded[251] = bytes >> 29; + padded[252] = bytes >> 21; + padded[253] = bytes >> 13; + padded[254] = bytes >> 5; + padded[255] = bytes << 3; + blocks(h,padded,256); + } + + for (i = 0;i < 32;++i) padded[i] = k[i] ^ 0x5c; + for (i = 32;i < 128;++i) padded[i] = 0x5c; + + for (i = 0;i < 64;++i) padded[128 + i] = h[i]; + for (i = 0;i < 64;++i) h[i] = iv[i]; + + for (i = 64;i < 128;++i) padded[128 + i] = 0; + padded[128 + 64] = 0x80; + padded[128 + 126] = 6; + + blocks(h,padded,256); + for (i = 0;i < 32;++i) out[i] = h[i]; + + return 0; +} diff --git a/nacl/crypto_auth/hmacsha512256/ref/verify.c b/nacl/crypto_auth/hmacsha512256/ref/verify.c new file mode 100644 index 00000000..96ff0ea8 --- /dev/null +++ b/nacl/crypto_auth/hmacsha512256/ref/verify.c @@ -0,0 +1,9 @@ +#include "crypto_verify_32.h" +#include "crypto_auth.h" + +int crypto_auth_verify(const unsigned char *h,const unsigned char *in,unsigned long long inlen,const unsigned char *k) +{ + unsigned char correct[32]; + crypto_auth(correct,in,inlen,k); + return crypto_verify_32(h,correct); +} diff --git a/nacl/crypto_auth/hmacsha512256/selected b/nacl/crypto_auth/hmacsha512256/selected new file mode 100644 index 00000000..e69de29b diff --git a/nacl/crypto_auth/hmacsha512256/used b/nacl/crypto_auth/hmacsha512256/used new file mode 100644 index 00000000..e69de29b diff --git a/nacl/crypto_auth/measure.c b/nacl/crypto_auth/measure.c new file mode 100644 index 00000000..e5209903 --- /dev/null +++ b/nacl/crypto_auth/measure.c @@ -0,0 +1,69 @@ +#include "crypto_auth.h" +#include "randombytes.h" +#include "cpucycles.h" + +extern void printentry(long long,const char *,long long *,long long); +extern unsigned char *alignedcalloc(unsigned long long); +extern const char *primitiveimplementation; +extern const char *implementationversion; +extern const char *sizenames[]; +extern const long long sizes[]; +extern void allocate(void); +extern void measure(void); + +const char *primitiveimplementation = crypto_auth_IMPLEMENTATION; +const char *implementationversion = crypto_auth_VERSION; +const char *sizenames[] = { "outputbytes", "keybytes", 0 }; +const long long sizes[] = { crypto_auth_BYTES, crypto_auth_KEYBYTES }; + +#define MAXTEST_BYTES 4096 +#ifdef SUPERCOP +#define MGAP 8192 +#else +#define MGAP 8 +#endif + +static unsigned char *k; +static unsigned char *m; +static unsigned char *h; + +void preallocate(void) +{ +} + +void allocate(void) +{ + k = alignedcalloc(crypto_auth_KEYBYTES); + m = alignedcalloc(MAXTEST_BYTES); + h = alignedcalloc(crypto_auth_BYTES); +} + +#define TIMINGS 15 +static long long cycles[TIMINGS + 1]; + +void measure(void) +{ + int i; + int loop; + int mlen; + + for (loop = 0;loop < LOOPS;++loop) { + for (mlen = 0;mlen <= MAXTEST_BYTES;mlen += 1 + mlen / MGAP) { + randombytes(k,crypto_auth_KEYBYTES); + randombytes(m,mlen); + randombytes(h,crypto_auth_BYTES); + for (i = 0;i <= TIMINGS;++i) { + cycles[i] = cpucycles(); + crypto_auth(h,m,mlen,k); + } + for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i]; + printentry(mlen,"cycles",cycles,TIMINGS); + for (i = 0;i <= TIMINGS;++i) { + cycles[i] = cpucycles(); + crypto_auth_verify(h,m,mlen,k); + } + for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i]; + printentry(mlen,"verify_cycles",cycles,TIMINGS); + } + } +} diff --git a/nacl/crypto_auth/try.c b/nacl/crypto_auth/try.c new file mode 100644 index 00000000..6f855dc9 --- /dev/null +++ b/nacl/crypto_auth/try.c @@ -0,0 +1,119 @@ +/* + * crypto_auth/try.c version 20090118 + * D. J. Bernstein + * Public domain. + */ + +#include "crypto_hash_sha256.h" +#include "crypto_auth.h" + +extern unsigned char *alignedcalloc(unsigned long long); + +const char *primitiveimplementation = crypto_auth_IMPLEMENTATION; + +#define MAXTEST_BYTES 10000 +#define CHECKSUM_BYTES 4096 +#define TUNE_BYTES 1536 + +static unsigned char *h; +static unsigned char *m; +static unsigned char *k; +static unsigned char *h2; +static unsigned char *m2; +static unsigned char *k2; + +void preallocate(void) +{ +} + +void allocate(void) +{ + h = alignedcalloc(crypto_auth_BYTES); + m = alignedcalloc(MAXTEST_BYTES); + k = alignedcalloc(crypto_auth_KEYBYTES); + h2 = alignedcalloc(crypto_auth_BYTES); + m2 = alignedcalloc(MAXTEST_BYTES + crypto_auth_BYTES); + k2 = alignedcalloc(crypto_auth_KEYBYTES + crypto_auth_BYTES); +} + +void predoit(void) +{ +} + +void doit(void) +{ + crypto_auth(h,m,TUNE_BYTES,k); + crypto_auth_verify(h,m,TUNE_BYTES,k); +} + +char checksum[crypto_auth_BYTES * 2 + 1]; + +const char *checksum_compute(void) +{ + long long i; + long long j; + + for (i = 0;i < CHECKSUM_BYTES;++i) { + long long mlen = i; + long long klen = crypto_auth_KEYBYTES; + long long hlen = crypto_auth_BYTES; + + for (j = -16;j < 0;++j) h[j] = random(); + for (j = -16;j < 0;++j) k[j] = random(); + for (j = -16;j < 0;++j) m[j] = random(); + for (j = hlen;j < hlen + 16;++j) h[j] = random(); + for (j = klen;j < klen + 16;++j) k[j] = random(); + for (j = mlen;j < mlen + 16;++j) m[j] = random(); + for (j = -16;j < hlen + 16;++j) h2[j] = h[j]; + for (j = -16;j < klen + 16;++j) k2[j] = k[j]; + for (j = -16;j < mlen + 16;++j) m2[j] = m[j]; + + if (crypto_auth(h,m,mlen,k) != 0) return "crypto_auth returns nonzero"; + + for (j = -16;j < klen + 16;++j) if (k[j] != k2[j]) return "crypto_auth overwrites k"; + for (j = -16;j < mlen + 16;++j) if (m[j] != m2[j]) return "crypto_auth overwrites m"; + for (j = -16;j < 0;++j) if (h[j] != h2[j]) return "crypto_auth writes before output"; + for (j = hlen;j < hlen + 16;++j) if (h[j] != h2[j]) return "crypto_auth writes after output"; + + for (j = -16;j < 0;++j) h[j] = random(); + for (j = -16;j < 0;++j) k[j] = random(); + for (j = -16;j < 0;++j) m[j] = random(); + for (j = hlen;j < hlen + 16;++j) h[j] = random(); + for (j = klen;j < klen + 16;++j) k[j] = random(); + for (j = mlen;j < mlen + 16;++j) m[j] = random(); + for (j = -16;j < hlen + 16;++j) h2[j] = h[j]; + for (j = -16;j < klen + 16;++j) k2[j] = k[j]; + for (j = -16;j < mlen + 16;++j) m2[j] = m[j]; + + if (crypto_auth(m2,m2,mlen,k) != 0) return "crypto_auth returns nonzero"; + for (j = 0;j < hlen;++j) if (m2[j] != h[j]) return "crypto_auth does not handle m overlap"; + for (j = 0;j < hlen;++j) m2[j] = m[j]; + if (crypto_auth(k2,m2,mlen,k2) != 0) return "crypto_auth returns nonzero"; + for (j = 0;j < hlen;++j) if (k2[j] != h[j]) return "crypto_auth does not handle k overlap"; + for (j = 0;j < hlen;++j) k2[j] = k[j]; + + if (crypto_auth_verify(h,m,mlen,k) != 0) return "crypto_auth_verify returns nonzero"; + + for (j = -16;j < hlen + 16;++j) if (h[j] != h2[j]) return "crypto_auth overwrites h"; + for (j = -16;j < klen + 16;++j) if (k[j] != k2[j]) return "crypto_auth overwrites k"; + for (j = -16;j < mlen + 16;++j) if (m[j] != m2[j]) return "crypto_auth overwrites m"; + + crypto_hash_sha256(h2,h,hlen); + for (j = 0;j < klen;++j) k[j] ^= h2[j % 32]; + if (crypto_auth(h,m,mlen,k) != 0) return "crypto_auth returns nonzero"; + if (crypto_auth_verify(h,m,mlen,k) != 0) return "crypto_auth_verify returns nonzero"; + + crypto_hash_sha256(h2,h,hlen); + for (j = 0;j < mlen;++j) m[j] ^= h2[j % 32]; + m[mlen] = h2[0]; + } + if (crypto_auth(h,m,CHECKSUM_BYTES,k) != 0) return "crypto_auth returns nonzero"; + if (crypto_auth_verify(h,m,CHECKSUM_BYTES,k) != 0) return "crypto_auth_verify returns nonzero"; + + for (i = 0;i < crypto_auth_BYTES;++i) { + checksum[2 * i] = "0123456789abcdef"[15 & (h[i] >> 4)]; + checksum[2 * i + 1] = "0123456789abcdef"[15 & h[i]]; + } + checksum[2 * i] = 0; + return 0; +} diff --git a/nacl/crypto_auth/wrapper-auth.cpp b/nacl/crypto_auth/wrapper-auth.cpp new file mode 100644 index 00000000..2108aa31 --- /dev/null +++ b/nacl/crypto_auth/wrapper-auth.cpp @@ -0,0 +1,11 @@ +#include +using std::string; +#include "crypto_auth.h" + +string crypto_auth(const string &m,const string &k) +{ + if (k.size() != crypto_auth_KEYBYTES) throw "incorrect key length"; + unsigned char a[crypto_auth_BYTES]; + crypto_auth(a,(const unsigned char *) m.c_str(),m.size(),(const unsigned char *) k.c_str()); + return string((char *) a,crypto_auth_BYTES); +} diff --git a/nacl/crypto_auth/wrapper-verify.cpp b/nacl/crypto_auth/wrapper-verify.cpp new file mode 100644 index 00000000..57e25a26 --- /dev/null +++ b/nacl/crypto_auth/wrapper-verify.cpp @@ -0,0 +1,14 @@ +#include +using std::string; +#include "crypto_auth.h" + +void crypto_auth_verify(const string &a,const string &m,const string &k) +{ + if (k.size() != crypto_auth_KEYBYTES) throw "incorrect key length"; + if (a.size() != crypto_auth_BYTES) throw "incorrect authenticator length"; + if (crypto_auth_verify( + (const unsigned char *) a.c_str(), + (const unsigned char *) m.c_str(),m.size(), + (const unsigned char *) k.c_str()) == 0) return; + throw "invalid authenticator"; +} diff --git a/nacl/crypto_box/curve25519xsalsa20poly1305/checksum b/nacl/crypto_box/curve25519xsalsa20poly1305/checksum new file mode 100644 index 00000000..56a20083 --- /dev/null +++ b/nacl/crypto_box/curve25519xsalsa20poly1305/checksum @@ -0,0 +1 @@ +5fac7400caabc14a99c5c0bc13fb1df5e468e870382a3a1c diff --git a/nacl/crypto_box/curve25519xsalsa20poly1305/ref/after.c b/nacl/crypto_box/curve25519xsalsa20poly1305/ref/after.c new file mode 100644 index 00000000..eb243e22 --- /dev/null +++ b/nacl/crypto_box/curve25519xsalsa20poly1305/ref/after.c @@ -0,0 +1,22 @@ +#include "crypto_secretbox_xsalsa20poly1305.h" +#include "crypto_box.h" + +int crypto_box_afternm( + unsigned char *c, + const unsigned char *m,unsigned long long mlen, + const unsigned char *n, + const unsigned char *k +) +{ + return crypto_secretbox_xsalsa20poly1305(c,m,mlen,n,k); +} + +int crypto_box_open_afternm( + unsigned char *m, + const unsigned char *c,unsigned long long clen, + const unsigned char *n, + const unsigned char *k +) +{ + return crypto_secretbox_xsalsa20poly1305_open(m,c,clen,n,k); +} diff --git a/nacl/crypto_box/curve25519xsalsa20poly1305/ref/api.h b/nacl/crypto_box/curve25519xsalsa20poly1305/ref/api.h new file mode 100644 index 00000000..ce7762df --- /dev/null +++ b/nacl/crypto_box/curve25519xsalsa20poly1305/ref/api.h @@ -0,0 +1,6 @@ +#define CRYPTO_PUBLICKEYBYTES 32 +#define CRYPTO_SECRETKEYBYTES 32 +#define CRYPTO_BEFORENMBYTES 32 +#define CRYPTO_NONCEBYTES 24 +#define CRYPTO_ZEROBYTES 32 +#define CRYPTO_BOXZEROBYTES 16 diff --git a/nacl/crypto_box/curve25519xsalsa20poly1305/ref/before.c b/nacl/crypto_box/curve25519xsalsa20poly1305/ref/before.c new file mode 100644 index 00000000..279bb12a --- /dev/null +++ b/nacl/crypto_box/curve25519xsalsa20poly1305/ref/before.c @@ -0,0 +1,17 @@ +#include "crypto_core_hsalsa20.h" +#include "crypto_scalarmult_curve25519.h" +#include "crypto_box.h" + +static const unsigned char sigma[16] = "expand 32-byte k"; +static const unsigned char n[16] = {0}; + +int crypto_box_beforenm( + unsigned char *k, + const unsigned char *pk, + const unsigned char *sk +) +{ + unsigned char s[32]; + crypto_scalarmult_curve25519(s,sk,pk); + return crypto_core_hsalsa20(k,n,s,sigma); +} diff --git a/nacl/crypto_box/curve25519xsalsa20poly1305/ref/box.c b/nacl/crypto_box/curve25519xsalsa20poly1305/ref/box.c new file mode 100644 index 00000000..81ff72e2 --- /dev/null +++ b/nacl/crypto_box/curve25519xsalsa20poly1305/ref/box.c @@ -0,0 +1,27 @@ +#include "crypto_box.h" + +int crypto_box( + unsigned char *c, + const unsigned char *m,unsigned long long mlen, + const unsigned char *n, + const unsigned char *pk, + const unsigned char *sk +) +{ + unsigned char k[crypto_box_BEFORENMBYTES]; + crypto_box_beforenm(k,pk,sk); + return crypto_box_afternm(c,m,mlen,n,k); +} + +int crypto_box_open( + unsigned char *m, + const unsigned char *c,unsigned long long clen, + const unsigned char *n, + const unsigned char *pk, + const unsigned char *sk +) +{ + unsigned char k[crypto_box_BEFORENMBYTES]; + crypto_box_beforenm(k,pk,sk); + return crypto_box_open_afternm(m,c,clen,n,k); +} diff --git a/nacl/crypto_box/curve25519xsalsa20poly1305/ref/keypair.c b/nacl/crypto_box/curve25519xsalsa20poly1305/ref/keypair.c new file mode 100644 index 00000000..233bc950 --- /dev/null +++ b/nacl/crypto_box/curve25519xsalsa20poly1305/ref/keypair.c @@ -0,0 +1,12 @@ +#include "crypto_scalarmult_curve25519.h" +#include "crypto_box.h" +#include "randombytes.h" + +int crypto_box_keypair( + unsigned char *pk, + unsigned char *sk +) +{ + randombytes(sk,32); + return crypto_scalarmult_curve25519_base(pk,sk); +} diff --git a/nacl/crypto_box/curve25519xsalsa20poly1305/selected b/nacl/crypto_box/curve25519xsalsa20poly1305/selected new file mode 100644 index 00000000..e69de29b diff --git a/nacl/crypto_box/curve25519xsalsa20poly1305/used b/nacl/crypto_box/curve25519xsalsa20poly1305/used new file mode 100644 index 00000000..e69de29b diff --git a/nacl/crypto_box/measure.c b/nacl/crypto_box/measure.c new file mode 100644 index 00000000..08df1e39 --- /dev/null +++ b/nacl/crypto_box/measure.c @@ -0,0 +1,137 @@ +#include +#include "randombytes.h" +#include "cpucycles.h" +#include "crypto_box.h" + +extern void printentry(long long,const char *,long long *,long long); +extern unsigned char *alignedcalloc(unsigned long long); +extern const char *primitiveimplementation; +extern const char *implementationversion; +extern const char *sizenames[]; +extern const long long sizes[]; +extern void allocate(void); +extern void measure(void); + +const char *primitiveimplementation = crypto_box_IMPLEMENTATION; +const char *implementationversion = crypto_box_VERSION; +const char *sizenames[] = { "publickeybytes", "secretkeybytes", "beforenmbytes", "noncebytes", "zerobytes", "boxzerobytes", 0 }; +const long long sizes[] = { crypto_box_PUBLICKEYBYTES, crypto_box_SECRETKEYBYTES, crypto_box_BEFORENMBYTES, crypto_box_NONCEBYTES, crypto_box_ZEROBYTES, crypto_box_BOXZEROBYTES }; + +#define MAXTEST_BYTES 4096 + +static unsigned char *ska; +static unsigned char *pka; +static unsigned char *skb; +static unsigned char *pkb; +static unsigned char *n; +static unsigned char *m; +static unsigned char *c; +static unsigned char *sa; +static unsigned char *sb; + +void preallocate(void) +{ +} + +void allocate(void) +{ + ska = alignedcalloc(crypto_box_SECRETKEYBYTES); + pka = alignedcalloc(crypto_box_PUBLICKEYBYTES); + skb = alignedcalloc(crypto_box_SECRETKEYBYTES); + pkb = alignedcalloc(crypto_box_PUBLICKEYBYTES); + n = alignedcalloc(crypto_box_NONCEBYTES); + m = alignedcalloc(MAXTEST_BYTES + crypto_box_ZEROBYTES); + c = alignedcalloc(MAXTEST_BYTES + crypto_box_ZEROBYTES); + sa = alignedcalloc(crypto_box_BEFORENMBYTES); + sb = alignedcalloc(crypto_box_BEFORENMBYTES); +} + +#define TIMINGS 15 +static long long cycles[TIMINGS + 1]; + +void measure(void) +{ + int i; + int loop; + int mlen; + + for (loop = 0;loop < LOOPS;++loop) { + for (i = 0;i <= TIMINGS;++i) { + cycles[i] = cpucycles(); + crypto_box_keypair(pka,ska); + } + for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i]; + printentry(-1,"keypair_cycles",cycles,TIMINGS); + + for (i = 0;i <= TIMINGS;++i) { + cycles[i] = cpucycles(); + crypto_box_keypair(pkb,skb); + } + for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i]; + printentry(-1,"keypair_cycles",cycles,TIMINGS); + + for (i = 0;i <= TIMINGS;++i) { + cycles[i] = cpucycles(); + crypto_box_beforenm(sa,pkb,ska); + } + for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i]; + printentry(-1,"beforenm_cycles",cycles,TIMINGS); + + for (i = 0;i <= TIMINGS;++i) { + cycles[i] = cpucycles(); + crypto_box_beforenm(sb,pka,skb); + } + for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i]; + printentry(-1,"beforenm_cycles",cycles,TIMINGS); + + for (mlen = 0;mlen <= MAXTEST_BYTES;mlen += 1 + mlen / 8) { + randombytes(n,crypto_box_NONCEBYTES); + randombytes(m + crypto_box_ZEROBYTES,mlen); + randombytes(c,mlen + crypto_box_ZEROBYTES); + + for (i = 0;i <= TIMINGS;++i) { + cycles[i] = cpucycles(); + crypto_box(c,m,mlen + crypto_box_ZEROBYTES,n,pka,skb); + } + for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i]; + printentry(mlen,"cycles",cycles,TIMINGS); + + for (i = 0;i <= TIMINGS;++i) { + cycles[i] = cpucycles(); + crypto_box_open(m,c,mlen + crypto_box_ZEROBYTES,n,pkb,ska); + } + for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i]; + printentry(mlen,"open_cycles",cycles,TIMINGS); + + ++c[crypto_box_ZEROBYTES]; + for (i = 0;i <= TIMINGS;++i) { + cycles[i] = cpucycles(); + crypto_box_open(m,c,mlen + crypto_box_ZEROBYTES,n,pkb,ska); + } + for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i]; + printentry(mlen,"forgery_open_cycles",cycles,TIMINGS); + + for (i = 0;i <= TIMINGS;++i) { + cycles[i] = cpucycles(); + crypto_box_afternm(c,m,mlen + crypto_box_ZEROBYTES,n,sb); + } + for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i]; + printentry(mlen,"afternm_cycles",cycles,TIMINGS); + + for (i = 0;i <= TIMINGS;++i) { + cycles[i] = cpucycles(); + crypto_box_open_afternm(m,c,mlen + crypto_box_ZEROBYTES,n,sa); + } + for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i]; + printentry(mlen,"open_afternm_cycles",cycles,TIMINGS); + + ++c[crypto_box_ZEROBYTES]; + for (i = 0;i <= TIMINGS;++i) { + cycles[i] = cpucycles(); + crypto_box_open_afternm(m,c,mlen + crypto_box_ZEROBYTES,n,sa); + } + for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i]; + printentry(mlen,"forgery_open_afternm_cycles",cycles,TIMINGS); + } + } +} diff --git a/nacl/crypto_box/try.c b/nacl/crypto_box/try.c new file mode 100644 index 00000000..f7029909 --- /dev/null +++ b/nacl/crypto_box/try.c @@ -0,0 +1,195 @@ +/* + * crypto_box/try.c version 20090118 + * D. J. Bernstein + * Public domain. + */ + +#include "crypto_box.h" + +extern unsigned char *alignedcalloc(unsigned long long); + +const char *primitiveimplementation = crypto_box_IMPLEMENTATION; + +#define MAXTEST_BYTES 10000 +#define CHECKSUM_BYTES 4096 +#define TUNE_BYTES 1536 + +static unsigned char *ska; +static unsigned char *pka; +static unsigned char *skb; +static unsigned char *pkb; +static unsigned char *s; +static unsigned char *n; +static unsigned char *m; +static unsigned char *c; +static unsigned char *t; +static unsigned char *ska2; +static unsigned char *pka2; +static unsigned char *skb2; +static unsigned char *pkb2; +static unsigned char *s2; +static unsigned char *n2; +static unsigned char *m2; +static unsigned char *c2; +static unsigned char *t2; + +#define sklen crypto_box_SECRETKEYBYTES +#define pklen crypto_box_PUBLICKEYBYTES +#define nlen crypto_box_NONCEBYTES +#define slen crypto_box_BEFORENMBYTES + +void preallocate(void) +{ +} + +void allocate(void) +{ + ska = alignedcalloc(sklen); + pka = alignedcalloc(pklen); + skb = alignedcalloc(sklen); + pkb = alignedcalloc(pklen); + n = alignedcalloc(nlen); + m = alignedcalloc(MAXTEST_BYTES + crypto_box_ZEROBYTES); + c = alignedcalloc(MAXTEST_BYTES + crypto_box_ZEROBYTES); + t = alignedcalloc(MAXTEST_BYTES + crypto_box_ZEROBYTES); + s = alignedcalloc(slen); + ska2 = alignedcalloc(sklen); + pka2 = alignedcalloc(pklen); + skb2 = alignedcalloc(sklen); + pkb2 = alignedcalloc(pklen); + n2 = alignedcalloc(nlen); + m2 = alignedcalloc(MAXTEST_BYTES + crypto_box_ZEROBYTES); + c2 = alignedcalloc(MAXTEST_BYTES + crypto_box_ZEROBYTES); + t2 = alignedcalloc(MAXTEST_BYTES + crypto_box_ZEROBYTES); + s2 = alignedcalloc(slen); +} + +void predoit(void) +{ +} + +void doit(void) +{ + crypto_box(c,m,TUNE_BYTES + crypto_box_ZEROBYTES,n,pka,skb); + crypto_box_open(t,c,TUNE_BYTES + crypto_box_ZEROBYTES,n,pkb,ska); +} + +char checksum[nlen * 2 + 1]; + +const char *checksum_compute(void) +{ + long long i; + long long j; + + if (crypto_box_keypair(pka,ska) != 0) return "crypto_box_keypair returns nonzero"; + if (crypto_box_keypair(pkb,skb) != 0) return "crypto_box_keypair returns nonzero"; + + for (j = 0;j < crypto_box_ZEROBYTES;++j) m[j] = 0; + + for (i = 0;i < CHECKSUM_BYTES;++i) { + long long mlen = i + crypto_box_ZEROBYTES; + long long tlen = i + crypto_box_ZEROBYTES; + long long clen = i + crypto_box_ZEROBYTES; + + for (j = -16;j < 0;++j) ska[j] = random(); + for (j = -16;j < 0;++j) skb[j] = random(); + for (j = -16;j < 0;++j) pka[j] = random(); + for (j = -16;j < 0;++j) pkb[j] = random(); + for (j = -16;j < 0;++j) m[j] = random(); + for (j = -16;j < 0;++j) n[j] = random(); + + for (j = sklen;j < sklen + 16;++j) ska[j] = random(); + for (j = sklen;j < sklen + 16;++j) skb[j] = random(); + for (j = pklen;j < pklen + 16;++j) pka[j] = random(); + for (j = pklen;j < pklen + 16;++j) pkb[j] = random(); + for (j = mlen;j < mlen + 16;++j) m[j] = random(); + for (j = nlen;j < nlen + 16;++j) n[j] = random(); + + for (j = -16;j < sklen + 16;++j) ska2[j] = ska[j]; + for (j = -16;j < sklen + 16;++j) skb2[j] = skb[j]; + for (j = -16;j < pklen + 16;++j) pka2[j] = pka[j]; + for (j = -16;j < pklen + 16;++j) pkb2[j] = pkb[j]; + for (j = -16;j < mlen + 16;++j) m2[j] = m[j]; + for (j = -16;j < nlen + 16;++j) n2[j] = n[j]; + for (j = -16;j < clen + 16;++j) c2[j] = c[j] = random(); + + if (crypto_box(c,m,mlen,n,pkb,ska) != 0) return "crypto_box returns nonzero"; + + for (j = -16;j < mlen + 16;++j) if (m2[j] != m[j]) return "crypto_box overwrites m"; + for (j = -16;j < nlen + 16;++j) if (n2[j] != n[j]) return "crypto_box overwrites n"; + for (j = -16;j < 0;++j) if (c2[j] != c[j]) return "crypto_box writes before output"; + for (j = clen;j < clen + 16;++j) if (c2[j] != c[j]) return "crypto_box writes after output"; + for (j = 0;j < crypto_box_BOXZEROBYTES;++j) + if (c[j] != 0) return "crypto_box does not clear extra bytes"; + + for (j = -16;j < sklen + 16;++j) if (ska2[j] != ska[j]) return "crypto_box overwrites ska"; + for (j = -16;j < sklen + 16;++j) if (skb2[j] != skb[j]) return "crypto_box overwrites skb"; + for (j = -16;j < pklen + 16;++j) if (pka2[j] != pka[j]) return "crypto_box overwrites pka"; + for (j = -16;j < pklen + 16;++j) if (pkb2[j] != pkb[j]) return "crypto_box overwrites pkb"; + + for (j = -16;j < 0;++j) c[j] = random(); + for (j = clen;j < clen + 16;++j) c[j] = random(); + for (j = -16;j < clen + 16;++j) c2[j] = c[j]; + for (j = -16;j < tlen + 16;++j) t2[j] = t[j] = random(); + + if (crypto_box_open(t,c,clen,n,pka,skb) != 0) return "crypto_box_open returns nonzero"; + + for (j = -16;j < clen + 16;++j) if (c2[j] != c[j]) return "crypto_box_open overwrites c"; + for (j = -16;j < nlen + 16;++j) if (n2[j] != n[j]) return "crypto_box_open overwrites n"; + for (j = -16;j < 0;++j) if (t2[j] != t[j]) return "crypto_box_open writes before output"; + for (j = tlen;j < tlen + 16;++j) if (t2[j] != t[j]) return "crypto_box_open writes after output"; + for (j = 0;j < crypto_box_ZEROBYTES;++j) + if (t[j] != 0) return "crypto_box_open does not clear extra bytes"; + + for (j = -16;j < sklen + 16;++j) if (ska2[j] != ska[j]) return "crypto_box_open overwrites ska"; + for (j = -16;j < sklen + 16;++j) if (skb2[j] != skb[j]) return "crypto_box_open overwrites skb"; + for (j = -16;j < pklen + 16;++j) if (pka2[j] != pka[j]) return "crypto_box_open overwrites pka"; + for (j = -16;j < pklen + 16;++j) if (pkb2[j] != pkb[j]) return "crypto_box_open overwrites pkb"; + + for (j = 0;j < mlen;++j) if (t[j] != m[j]) return "plaintext does not match"; + + for (j = -16;j < slen + 16;++j) s2[j] = s[j] = random(); + if (crypto_box_beforenm(s,pkb,ska) != 0) return "crypto_box_beforenm returns nonzero"; + for (j = -16;j < pklen + 16;++j) if (pka2[j] != pka[j]) return "crypto_box_open overwrites pk"; + for (j = -16;j < sklen + 16;++j) if (skb2[j] != skb[j]) return "crypto_box_open overwrites sk"; + for (j = -16;j < 0;++j) if (s2[j] != s[j]) return "crypto_box_beforenm writes before output"; + for (j = slen;j < slen + 16;++j) if (s2[j] != s[j]) return "crypto_box_beforenm writes after output"; + + for (j = -16;j < slen + 16;++j) s2[j] = s[j]; + for (j = -16;j < tlen + 16;++j) t2[j] = t[j] = random(); + if (crypto_box_afternm(t,m,mlen,n,s) != 0) return "crypto_box_afternm returns nonzero"; + for (j = -16;j < slen + 16;++j) if (s2[j] != s[j]) return "crypto_box_afternm overwrites s"; + for (j = -16;j < mlen + 16;++j) if (m2[j] != m[j]) return "crypto_box_afternm overwrites m"; + for (j = -16;j < nlen + 16;++j) if (n2[j] != n[j]) return "crypto_box_afternm overwrites n"; + for (j = -16;j < 0;++j) if (t2[j] != t[j]) return "crypto_box_afternm writes before output"; + for (j = tlen;j < tlen + 16;++j) if (t2[j] != t[j]) return "crypto_box_afternm writes after output"; + for (j = 0;j < crypto_box_BOXZEROBYTES;++j) + if (t[j] != 0) return "crypto_box_afternm does not clear extra bytes"; + for (j = 0;j < mlen;++j) if (t[j] != c[j]) return "crypto_box_afternm does not match crypto_box"; + + if (crypto_box_beforenm(s,pka,skb) != 0) return "crypto_box_beforenm returns nonzero"; + + for (j = -16;j < tlen + 16;++j) t2[j] = t[j] = random(); + if (crypto_box_open_afternm(t,c,clen,n,s) != 0) return "crypto_box_open_afternm returns nonzero"; + for (j = -16;j < slen + 16;++j) if (s2[j] != s[j]) return "crypto_box_open_afternm overwrites s"; + for (j = -16;j < mlen + 16;++j) if (m2[j] != m[j]) return "crypto_box_open_afternm overwrites m"; + for (j = -16;j < nlen + 16;++j) if (n2[j] != n[j]) return "crypto_box_open_afternm overwrites n"; + for (j = -16;j < 0;++j) if (t2[j] != t[j]) return "crypto_box_open_afternm writes before output"; + for (j = tlen;j < tlen + 16;++j) if (t2[j] != t[j]) return "crypto_box_open_afternm writes after output"; + for (j = 0;j < crypto_box_ZEROBYTES;++j) + if (t[j] != 0) return "crypto_box_open_afternm does not clear extra bytes"; + for (j = 0;j < mlen;++j) if (t[j] != m[j]) return "crypto_box_open_afternm does not match crypto_box_open"; + + for (j = 0;j < i;++j) n[j % nlen] ^= c[j + crypto_box_BOXZEROBYTES]; + if (i == 0) m[crypto_box_ZEROBYTES] = 0; + m[i + crypto_box_ZEROBYTES] = m[crypto_box_ZEROBYTES]; + for (j = 0;j < i;++j) m[j + crypto_box_ZEROBYTES] ^= c[j + crypto_box_BOXZEROBYTES]; + } + + for (i = 0;i < nlen;++i) { + checksum[2 * i] = "0123456789abcdef"[15 & (n[i] >> 4)]; + checksum[2 * i + 1] = "0123456789abcdef"[15 & n[i]]; + } + checksum[2 * i] = 0; + return 0; +} diff --git a/nacl/crypto_box/wrapper-box.cpp b/nacl/crypto_box/wrapper-box.cpp new file mode 100644 index 00000000..f0429295 --- /dev/null +++ b/nacl/crypto_box/wrapper-box.cpp @@ -0,0 +1,24 @@ +#include +using std::string; +#include "crypto_box.h" + +string crypto_box(const string &m,const string &n,const string &pk,const string &sk) +{ + if (pk.size() != crypto_box_PUBLICKEYBYTES) throw "incorrect public-key length"; + if (sk.size() != crypto_box_SECRETKEYBYTES) throw "incorrect secret-key length"; + if (n.size() != crypto_box_NONCEBYTES) throw "incorrect nonce length"; + size_t mlen = m.size() + crypto_box_ZEROBYTES; + unsigned char mpad[mlen]; + for (int i = 0;i < crypto_box_ZEROBYTES;++i) mpad[i] = 0; + for (int i = crypto_box_ZEROBYTES;i < mlen;++i) mpad[i] = m[i - crypto_box_ZEROBYTES]; + unsigned char cpad[mlen]; + crypto_box(cpad,mpad,mlen, + (const unsigned char *) n.c_str(), + (const unsigned char *) pk.c_str(), + (const unsigned char *) sk.c_str() + ); + return string( + (char *) cpad + crypto_box_BOXZEROBYTES, + mlen - crypto_box_BOXZEROBYTES + ); +} diff --git a/nacl/crypto_box/wrapper-keypair.cpp b/nacl/crypto_box/wrapper-keypair.cpp new file mode 100644 index 00000000..b59f92d9 --- /dev/null +++ b/nacl/crypto_box/wrapper-keypair.cpp @@ -0,0 +1,12 @@ +#include +using std::string; +#include "crypto_box.h" + +string crypto_box_keypair(string *sk_string) +{ + unsigned char pk[crypto_box_PUBLICKEYBYTES]; + unsigned char sk[crypto_box_SECRETKEYBYTES]; + crypto_box_keypair(pk,sk); + *sk_string = string((char *) sk,sizeof sk); + return string((char *) pk,sizeof pk); +} diff --git a/nacl/crypto_box/wrapper-open.cpp b/nacl/crypto_box/wrapper-open.cpp new file mode 100644 index 00000000..67663a21 --- /dev/null +++ b/nacl/crypto_box/wrapper-open.cpp @@ -0,0 +1,27 @@ +#include +using std::string; +#include "crypto_box.h" + +string crypto_box_open(const string &c,const string &n,const string &pk,const string &sk) +{ + if (pk.size() != crypto_box_PUBLICKEYBYTES) throw "incorrect public-key length"; + if (sk.size() != crypto_box_SECRETKEYBYTES) throw "incorrect secret-key length"; + if (n.size() != crypto_box_NONCEBYTES) throw "incorrect nonce length"; + size_t clen = c.size() + crypto_box_BOXZEROBYTES; + unsigned char cpad[clen]; + for (int i = 0;i < crypto_box_BOXZEROBYTES;++i) cpad[i] = 0; + for (int i = crypto_box_BOXZEROBYTES;i < clen;++i) cpad[i] = c[i - crypto_box_BOXZEROBYTES]; + unsigned char mpad[clen]; + if (crypto_box_open(mpad,cpad,clen, + (const unsigned char *) n.c_str(), + (const unsigned char *) pk.c_str(), + (const unsigned char *) sk.c_str() + ) != 0) + throw "ciphertext fails verification"; + if (clen < crypto_box_ZEROBYTES) + throw "ciphertext too short"; // should have been caught by _open + return string( + (char *) mpad + crypto_box_ZEROBYTES, + clen - crypto_box_ZEROBYTES + ); +} diff --git a/nacl/crypto_core/hsalsa20/checksum b/nacl/crypto_core/hsalsa20/checksum new file mode 100644 index 00000000..f67bb2e2 --- /dev/null +++ b/nacl/crypto_core/hsalsa20/checksum @@ -0,0 +1 @@ +28ebe700b5878570702a68740aa131e6fa907e58a3f6915cd183c6db3f7afd7a diff --git a/nacl/crypto_core/hsalsa20/ref/api.h b/nacl/crypto_core/hsalsa20/ref/api.h new file mode 100644 index 00000000..73bd8541 --- /dev/null +++ b/nacl/crypto_core/hsalsa20/ref/api.h @@ -0,0 +1,4 @@ +#define CRYPTO_OUTPUTBYTES 32 +#define CRYPTO_INPUTBYTES 16 +#define CRYPTO_KEYBYTES 32 +#define CRYPTO_CONSTBYTES 16 diff --git a/nacl/crypto_core/hsalsa20/ref/core.c b/nacl/crypto_core/hsalsa20/ref/core.c new file mode 100644 index 00000000..36118da0 --- /dev/null +++ b/nacl/crypto_core/hsalsa20/ref/core.c @@ -0,0 +1,135 @@ +/* +version 20080912 +D. J. Bernstein +Public domain. +*/ + +#include "crypto_core.h" + +#define ROUNDS 20 + +typedef unsigned int uint32; + +static uint32 rotate(uint32 u,int c) +{ + return (u << c) | (u >> (32 - c)); +} + +static uint32 load_littleendian(const unsigned char *x) +{ + return + (uint32) (x[0]) \ + | (((uint32) (x[1])) << 8) \ + | (((uint32) (x[2])) << 16) \ + | (((uint32) (x[3])) << 24) + ; +} + +static void store_littleendian(unsigned char *x,uint32 u) +{ + x[0] = u; u >>= 8; + x[1] = u; u >>= 8; + x[2] = u; u >>= 8; + x[3] = u; +} + +int crypto_core( + unsigned char *out, + const unsigned char *in, + const unsigned char *k, + const unsigned char *c +) +{ + uint32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; + uint32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15; + int i; + + j0 = x0 = load_littleendian(c + 0); + j1 = x1 = load_littleendian(k + 0); + j2 = x2 = load_littleendian(k + 4); + j3 = x3 = load_littleendian(k + 8); + j4 = x4 = load_littleendian(k + 12); + j5 = x5 = load_littleendian(c + 4); + j6 = x6 = load_littleendian(in + 0); + j7 = x7 = load_littleendian(in + 4); + j8 = x8 = load_littleendian(in + 8); + j9 = x9 = load_littleendian(in + 12); + j10 = x10 = load_littleendian(c + 8); + j11 = x11 = load_littleendian(k + 16); + j12 = x12 = load_littleendian(k + 20); + j13 = x13 = load_littleendian(k + 24); + j14 = x14 = load_littleendian(k + 28); + j15 = x15 = load_littleendian(c + 12); + + for (i = ROUNDS;i > 0;i -= 2) { + x4 ^= rotate( x0+x12, 7); + x8 ^= rotate( x4+ x0, 9); + x12 ^= rotate( x8+ x4,13); + x0 ^= rotate(x12+ x8,18); + x9 ^= rotate( x5+ x1, 7); + x13 ^= rotate( x9+ x5, 9); + x1 ^= rotate(x13+ x9,13); + x5 ^= rotate( x1+x13,18); + x14 ^= rotate(x10+ x6, 7); + x2 ^= rotate(x14+x10, 9); + x6 ^= rotate( x2+x14,13); + x10 ^= rotate( x6+ x2,18); + x3 ^= rotate(x15+x11, 7); + x7 ^= rotate( x3+x15, 9); + x11 ^= rotate( x7+ x3,13); + x15 ^= rotate(x11+ x7,18); + x1 ^= rotate( x0+ x3, 7); + x2 ^= rotate( x1+ x0, 9); + x3 ^= rotate( x2+ x1,13); + x0 ^= rotate( x3+ x2,18); + x6 ^= rotate( x5+ x4, 7); + x7 ^= rotate( x6+ x5, 9); + x4 ^= rotate( x7+ x6,13); + x5 ^= rotate( x4+ x7,18); + x11 ^= rotate(x10+ x9, 7); + x8 ^= rotate(x11+x10, 9); + x9 ^= rotate( x8+x11,13); + x10 ^= rotate( x9+ x8,18); + x12 ^= rotate(x15+x14, 7); + x13 ^= rotate(x12+x15, 9); + x14 ^= rotate(x13+x12,13); + x15 ^= rotate(x14+x13,18); + } + + x0 += j0; + x1 += j1; + x2 += j2; + x3 += j3; + x4 += j4; + x5 += j5; + x6 += j6; + x7 += j7; + x8 += j8; + x9 += j9; + x10 += j10; + x11 += j11; + x12 += j12; + x13 += j13; + x14 += j14; + x15 += j15; + + x0 -= load_littleendian(c + 0); + x5 -= load_littleendian(c + 4); + x10 -= load_littleendian(c + 8); + x15 -= load_littleendian(c + 12); + x6 -= load_littleendian(in + 0); + x7 -= load_littleendian(in + 4); + x8 -= load_littleendian(in + 8); + x9 -= load_littleendian(in + 12); + + store_littleendian(out + 0,x0); + store_littleendian(out + 4,x5); + store_littleendian(out + 8,x10); + store_littleendian(out + 12,x15); + store_littleendian(out + 16,x6); + store_littleendian(out + 20,x7); + store_littleendian(out + 24,x8); + store_littleendian(out + 28,x9); + + return 0; +} diff --git a/nacl/crypto_core/hsalsa20/ref/implementors b/nacl/crypto_core/hsalsa20/ref/implementors new file mode 100644 index 00000000..f6fb3c73 --- /dev/null +++ b/nacl/crypto_core/hsalsa20/ref/implementors @@ -0,0 +1 @@ +Daniel J. Bernstein diff --git a/nacl/crypto_core/hsalsa20/ref2/api.h b/nacl/crypto_core/hsalsa20/ref2/api.h new file mode 100644 index 00000000..73bd8541 --- /dev/null +++ b/nacl/crypto_core/hsalsa20/ref2/api.h @@ -0,0 +1,4 @@ +#define CRYPTO_OUTPUTBYTES 32 +#define CRYPTO_INPUTBYTES 16 +#define CRYPTO_KEYBYTES 32 +#define CRYPTO_CONSTBYTES 16 diff --git a/nacl/crypto_core/hsalsa20/ref2/core.c b/nacl/crypto_core/hsalsa20/ref2/core.c new file mode 100644 index 00000000..9a9a8c7c --- /dev/null +++ b/nacl/crypto_core/hsalsa20/ref2/core.c @@ -0,0 +1,108 @@ +/* +version 20080912 +D. J. Bernstein +Public domain. +*/ + +#include "crypto_core.h" + +#define ROUNDS 20 + +typedef unsigned int uint32; + +static uint32 rotate(uint32 u,int c) +{ + return (u << c) | (u >> (32 - c)); +} + +static uint32 load_littleendian(const unsigned char *x) +{ + return + (uint32) (x[0]) \ + | (((uint32) (x[1])) << 8) \ + | (((uint32) (x[2])) << 16) \ + | (((uint32) (x[3])) << 24) + ; +} + +static void store_littleendian(unsigned char *x,uint32 u) +{ + x[0] = u; u >>= 8; + x[1] = u; u >>= 8; + x[2] = u; u >>= 8; + x[3] = u; +} + +int crypto_core( + unsigned char *out, + const unsigned char *in, + const unsigned char *k, + const unsigned char *c +) +{ + uint32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; + int i; + + x0 = load_littleendian(c + 0); + x1 = load_littleendian(k + 0); + x2 = load_littleendian(k + 4); + x3 = load_littleendian(k + 8); + x4 = load_littleendian(k + 12); + x5 = load_littleendian(c + 4); + x6 = load_littleendian(in + 0); + x7 = load_littleendian(in + 4); + x8 = load_littleendian(in + 8); + x9 = load_littleendian(in + 12); + x10 = load_littleendian(c + 8); + x11 = load_littleendian(k + 16); + x12 = load_littleendian(k + 20); + x13 = load_littleendian(k + 24); + x14 = load_littleendian(k + 28); + x15 = load_littleendian(c + 12); + + for (i = ROUNDS;i > 0;i -= 2) { + x4 ^= rotate( x0+x12, 7); + x8 ^= rotate( x4+ x0, 9); + x12 ^= rotate( x8+ x4,13); + x0 ^= rotate(x12+ x8,18); + x9 ^= rotate( x5+ x1, 7); + x13 ^= rotate( x9+ x5, 9); + x1 ^= rotate(x13+ x9,13); + x5 ^= rotate( x1+x13,18); + x14 ^= rotate(x10+ x6, 7); + x2 ^= rotate(x14+x10, 9); + x6 ^= rotate( x2+x14,13); + x10 ^= rotate( x6+ x2,18); + x3 ^= rotate(x15+x11, 7); + x7 ^= rotate( x3+x15, 9); + x11 ^= rotate( x7+ x3,13); + x15 ^= rotate(x11+ x7,18); + x1 ^= rotate( x0+ x3, 7); + x2 ^= rotate( x1+ x0, 9); + x3 ^= rotate( x2+ x1,13); + x0 ^= rotate( x3+ x2,18); + x6 ^= rotate( x5+ x4, 7); + x7 ^= rotate( x6+ x5, 9); + x4 ^= rotate( x7+ x6,13); + x5 ^= rotate( x4+ x7,18); + x11 ^= rotate(x10+ x9, 7); + x8 ^= rotate(x11+x10, 9); + x9 ^= rotate( x8+x11,13); + x10 ^= rotate( x9+ x8,18); + x12 ^= rotate(x15+x14, 7); + x13 ^= rotate(x12+x15, 9); + x14 ^= rotate(x13+x12,13); + x15 ^= rotate(x14+x13,18); + } + + store_littleendian(out + 0,x0); + store_littleendian(out + 4,x5); + store_littleendian(out + 8,x10); + store_littleendian(out + 12,x15); + store_littleendian(out + 16,x6); + store_littleendian(out + 20,x7); + store_littleendian(out + 24,x8); + store_littleendian(out + 28,x9); + + return 0; +} diff --git a/nacl/crypto_core/hsalsa20/ref2/implementors b/nacl/crypto_core/hsalsa20/ref2/implementors new file mode 100644 index 00000000..f6fb3c73 --- /dev/null +++ b/nacl/crypto_core/hsalsa20/ref2/implementors @@ -0,0 +1 @@ +Daniel J. Bernstein diff --git a/nacl/crypto_core/hsalsa20/used b/nacl/crypto_core/hsalsa20/used new file mode 100644 index 00000000..e69de29b diff --git a/nacl/crypto_core/measure.c b/nacl/crypto_core/measure.c new file mode 100644 index 00000000..dd7bac81 --- /dev/null +++ b/nacl/crypto_core/measure.c @@ -0,0 +1,18 @@ +#include "crypto_core.h" + +const char *primitiveimplementation = crypto_core_IMPLEMENTATION; +const char *implementationversion = crypto_core_VERSION; +const char *sizenames[] = { "outputbytes", "inputbytes", "keybytes", "constbytes", 0 }; +const long long sizes[] = { crypto_core_OUTPUTBYTES, crypto_core_INPUTBYTES, crypto_core_KEYBYTES, crypto_core_CONSTBYTES }; + +void preallocate(void) +{ +} + +void allocate(void) +{ +} + +void measure(void) +{ +} diff --git a/nacl/crypto_core/salsa20/checksum b/nacl/crypto_core/salsa20/checksum new file mode 100644 index 00000000..fcf56186 --- /dev/null +++ b/nacl/crypto_core/salsa20/checksum @@ -0,0 +1 @@ +9d1ee8d84b974e648507ffd93829376c5b4420751710e44f6593abd8769378011d85ecda51ceb8f43661d3c65ef5b57c4f5bf8df76c8202784c8df8def61e6a6 diff --git a/nacl/crypto_core/salsa20/ref/api.h b/nacl/crypto_core/salsa20/ref/api.h new file mode 100644 index 00000000..2a387b6d --- /dev/null +++ b/nacl/crypto_core/salsa20/ref/api.h @@ -0,0 +1,4 @@ +#define CRYPTO_OUTPUTBYTES 64 +#define CRYPTO_INPUTBYTES 16 +#define CRYPTO_KEYBYTES 32 +#define CRYPTO_CONSTBYTES 16 diff --git a/nacl/crypto_core/salsa20/ref/core.c b/nacl/crypto_core/salsa20/ref/core.c new file mode 100644 index 00000000..910a0056 --- /dev/null +++ b/nacl/crypto_core/salsa20/ref/core.c @@ -0,0 +1,134 @@ +/* +version 20080912 +D. J. Bernstein +Public domain. +*/ + +#include "crypto_core.h" + +#define ROUNDS 20 + +typedef unsigned int uint32; + +static uint32 rotate(uint32 u,int c) +{ + return (u << c) | (u >> (32 - c)); +} + +static uint32 load_littleendian(const unsigned char *x) +{ + return + (uint32) (x[0]) \ + | (((uint32) (x[1])) << 8) \ + | (((uint32) (x[2])) << 16) \ + | (((uint32) (x[3])) << 24) + ; +} + +static void store_littleendian(unsigned char *x,uint32 u) +{ + x[0] = u; u >>= 8; + x[1] = u; u >>= 8; + x[2] = u; u >>= 8; + x[3] = u; +} + +int crypto_core( + unsigned char *out, + const unsigned char *in, + const unsigned char *k, + const unsigned char *c +) +{ + uint32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; + uint32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15; + int i; + + j0 = x0 = load_littleendian(c + 0); + j1 = x1 = load_littleendian(k + 0); + j2 = x2 = load_littleendian(k + 4); + j3 = x3 = load_littleendian(k + 8); + j4 = x4 = load_littleendian(k + 12); + j5 = x5 = load_littleendian(c + 4); + j6 = x6 = load_littleendian(in + 0); + j7 = x7 = load_littleendian(in + 4); + j8 = x8 = load_littleendian(in + 8); + j9 = x9 = load_littleendian(in + 12); + j10 = x10 = load_littleendian(c + 8); + j11 = x11 = load_littleendian(k + 16); + j12 = x12 = load_littleendian(k + 20); + j13 = x13 = load_littleendian(k + 24); + j14 = x14 = load_littleendian(k + 28); + j15 = x15 = load_littleendian(c + 12); + + for (i = ROUNDS;i > 0;i -= 2) { + x4 ^= rotate( x0+x12, 7); + x8 ^= rotate( x4+ x0, 9); + x12 ^= rotate( x8+ x4,13); + x0 ^= rotate(x12+ x8,18); + x9 ^= rotate( x5+ x1, 7); + x13 ^= rotate( x9+ x5, 9); + x1 ^= rotate(x13+ x9,13); + x5 ^= rotate( x1+x13,18); + x14 ^= rotate(x10+ x6, 7); + x2 ^= rotate(x14+x10, 9); + x6 ^= rotate( x2+x14,13); + x10 ^= rotate( x6+ x2,18); + x3 ^= rotate(x15+x11, 7); + x7 ^= rotate( x3+x15, 9); + x11 ^= rotate( x7+ x3,13); + x15 ^= rotate(x11+ x7,18); + x1 ^= rotate( x0+ x3, 7); + x2 ^= rotate( x1+ x0, 9); + x3 ^= rotate( x2+ x1,13); + x0 ^= rotate( x3+ x2,18); + x6 ^= rotate( x5+ x4, 7); + x7 ^= rotate( x6+ x5, 9); + x4 ^= rotate( x7+ x6,13); + x5 ^= rotate( x4+ x7,18); + x11 ^= rotate(x10+ x9, 7); + x8 ^= rotate(x11+x10, 9); + x9 ^= rotate( x8+x11,13); + x10 ^= rotate( x9+ x8,18); + x12 ^= rotate(x15+x14, 7); + x13 ^= rotate(x12+x15, 9); + x14 ^= rotate(x13+x12,13); + x15 ^= rotate(x14+x13,18); + } + + x0 += j0; + x1 += j1; + x2 += j2; + x3 += j3; + x4 += j4; + x5 += j5; + x6 += j6; + x7 += j7; + x8 += j8; + x9 += j9; + x10 += j10; + x11 += j11; + x12 += j12; + x13 += j13; + x14 += j14; + x15 += j15; + + store_littleendian(out + 0,x0); + store_littleendian(out + 4,x1); + store_littleendian(out + 8,x2); + store_littleendian(out + 12,x3); + store_littleendian(out + 16,x4); + store_littleendian(out + 20,x5); + store_littleendian(out + 24,x6); + store_littleendian(out + 28,x7); + store_littleendian(out + 32,x8); + store_littleendian(out + 36,x9); + store_littleendian(out + 40,x10); + store_littleendian(out + 44,x11); + store_littleendian(out + 48,x12); + store_littleendian(out + 52,x13); + store_littleendian(out + 56,x14); + store_littleendian(out + 60,x15); + + return 0; +} diff --git a/nacl/crypto_core/salsa20/ref/implementors b/nacl/crypto_core/salsa20/ref/implementors new file mode 100644 index 00000000..f6fb3c73 --- /dev/null +++ b/nacl/crypto_core/salsa20/ref/implementors @@ -0,0 +1 @@ +Daniel J. Bernstein diff --git a/nacl/crypto_core/salsa20/used b/nacl/crypto_core/salsa20/used new file mode 100644 index 00000000..e69de29b diff --git a/nacl/crypto_core/salsa2012/checksum b/nacl/crypto_core/salsa2012/checksum new file mode 100644 index 00000000..2f99a8d6 --- /dev/null +++ b/nacl/crypto_core/salsa2012/checksum @@ -0,0 +1 @@ +f36d643f798efc0fca888d3ac4bdcc54c98a968c2da16bd5b8bfe9fe9025a6ca3a207e9362dc7cf17ddfc7477ee754d3f521b1df91640093754f7275b1a54293 diff --git a/nacl/crypto_core/salsa2012/ref/api.h b/nacl/crypto_core/salsa2012/ref/api.h new file mode 100644 index 00000000..2a387b6d --- /dev/null +++ b/nacl/crypto_core/salsa2012/ref/api.h @@ -0,0 +1,4 @@ +#define CRYPTO_OUTPUTBYTES 64 +#define CRYPTO_INPUTBYTES 16 +#define CRYPTO_KEYBYTES 32 +#define CRYPTO_CONSTBYTES 16 diff --git a/nacl/crypto_core/salsa2012/ref/core.c b/nacl/crypto_core/salsa2012/ref/core.c new file mode 100644 index 00000000..d4b59e48 --- /dev/null +++ b/nacl/crypto_core/salsa2012/ref/core.c @@ -0,0 +1,134 @@ +/* +version 20080913 +D. J. Bernstein +Public domain. +*/ + +#include "crypto_core.h" + +#define ROUNDS 12 + +typedef unsigned int uint32; + +static uint32 rotate(uint32 u,int c) +{ + return (u << c) | (u >> (32 - c)); +} + +static uint32 load_littleendian(const unsigned char *x) +{ + return + (uint32) (x[0]) \ + | (((uint32) (x[1])) << 8) \ + | (((uint32) (x[2])) << 16) \ + | (((uint32) (x[3])) << 24) + ; +} + +static void store_littleendian(unsigned char *x,uint32 u) +{ + x[0] = u; u >>= 8; + x[1] = u; u >>= 8; + x[2] = u; u >>= 8; + x[3] = u; +} + +int crypto_core( + unsigned char *out, + const unsigned char *in, + const unsigned char *k, + const unsigned char *c +) +{ + uint32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; + uint32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15; + int i; + + j0 = x0 = load_littleendian(c + 0); + j1 = x1 = load_littleendian(k + 0); + j2 = x2 = load_littleendian(k + 4); + j3 = x3 = load_littleendian(k + 8); + j4 = x4 = load_littleendian(k + 12); + j5 = x5 = load_littleendian(c + 4); + j6 = x6 = load_littleendian(in + 0); + j7 = x7 = load_littleendian(in + 4); + j8 = x8 = load_littleendian(in + 8); + j9 = x9 = load_littleendian(in + 12); + j10 = x10 = load_littleendian(c + 8); + j11 = x11 = load_littleendian(k + 16); + j12 = x12 = load_littleendian(k + 20); + j13 = x13 = load_littleendian(k + 24); + j14 = x14 = load_littleendian(k + 28); + j15 = x15 = load_littleendian(c + 12); + + for (i = ROUNDS;i > 0;i -= 2) { + x4 ^= rotate( x0+x12, 7); + x8 ^= rotate( x4+ x0, 9); + x12 ^= rotate( x8+ x4,13); + x0 ^= rotate(x12+ x8,18); + x9 ^= rotate( x5+ x1, 7); + x13 ^= rotate( x9+ x5, 9); + x1 ^= rotate(x13+ x9,13); + x5 ^= rotate( x1+x13,18); + x14 ^= rotate(x10+ x6, 7); + x2 ^= rotate(x14+x10, 9); + x6 ^= rotate( x2+x14,13); + x10 ^= rotate( x6+ x2,18); + x3 ^= rotate(x15+x11, 7); + x7 ^= rotate( x3+x15, 9); + x11 ^= rotate( x7+ x3,13); + x15 ^= rotate(x11+ x7,18); + x1 ^= rotate( x0+ x3, 7); + x2 ^= rotate( x1+ x0, 9); + x3 ^= rotate( x2+ x1,13); + x0 ^= rotate( x3+ x2,18); + x6 ^= rotate( x5+ x4, 7); + x7 ^= rotate( x6+ x5, 9); + x4 ^= rotate( x7+ x6,13); + x5 ^= rotate( x4+ x7,18); + x11 ^= rotate(x10+ x9, 7); + x8 ^= rotate(x11+x10, 9); + x9 ^= rotate( x8+x11,13); + x10 ^= rotate( x9+ x8,18); + x12 ^= rotate(x15+x14, 7); + x13 ^= rotate(x12+x15, 9); + x14 ^= rotate(x13+x12,13); + x15 ^= rotate(x14+x13,18); + } + + x0 += j0; + x1 += j1; + x2 += j2; + x3 += j3; + x4 += j4; + x5 += j5; + x6 += j6; + x7 += j7; + x8 += j8; + x9 += j9; + x10 += j10; + x11 += j11; + x12 += j12; + x13 += j13; + x14 += j14; + x15 += j15; + + store_littleendian(out + 0,x0); + store_littleendian(out + 4,x1); + store_littleendian(out + 8,x2); + store_littleendian(out + 12,x3); + store_littleendian(out + 16,x4); + store_littleendian(out + 20,x5); + store_littleendian(out + 24,x6); + store_littleendian(out + 28,x7); + store_littleendian(out + 32,x8); + store_littleendian(out + 36,x9); + store_littleendian(out + 40,x10); + store_littleendian(out + 44,x11); + store_littleendian(out + 48,x12); + store_littleendian(out + 52,x13); + store_littleendian(out + 56,x14); + store_littleendian(out + 60,x15); + + return 0; +} diff --git a/nacl/crypto_core/salsa2012/ref/implementors b/nacl/crypto_core/salsa2012/ref/implementors new file mode 100644 index 00000000..f6fb3c73 --- /dev/null +++ b/nacl/crypto_core/salsa2012/ref/implementors @@ -0,0 +1 @@ +Daniel J. Bernstein diff --git a/nacl/crypto_core/salsa2012/used b/nacl/crypto_core/salsa2012/used new file mode 100644 index 00000000..e69de29b diff --git a/nacl/crypto_core/salsa208/checksum b/nacl/crypto_core/salsa208/checksum new file mode 100644 index 00000000..a16cb52f --- /dev/null +++ b/nacl/crypto_core/salsa208/checksum @@ -0,0 +1 @@ +1e13ea9e74cb36989f7cbf4abc80b29154e1a8b150bd5244951318abea002a93ae9fe2abbcf7217526ac2a85b66c256ba9374b1257eda0c01816da328edfa11a diff --git a/nacl/crypto_core/salsa208/ref/api.h b/nacl/crypto_core/salsa208/ref/api.h new file mode 100644 index 00000000..2a387b6d --- /dev/null +++ b/nacl/crypto_core/salsa208/ref/api.h @@ -0,0 +1,4 @@ +#define CRYPTO_OUTPUTBYTES 64 +#define CRYPTO_INPUTBYTES 16 +#define CRYPTO_KEYBYTES 32 +#define CRYPTO_CONSTBYTES 16 diff --git a/nacl/crypto_core/salsa208/ref/core.c b/nacl/crypto_core/salsa208/ref/core.c new file mode 100644 index 00000000..921e7a86 --- /dev/null +++ b/nacl/crypto_core/salsa208/ref/core.c @@ -0,0 +1,134 @@ +/* +version 20080913 +D. J. Bernstein +Public domain. +*/ + +#include "crypto_core.h" + +#define ROUNDS 8 + +typedef unsigned int uint32; + +static uint32 rotate(uint32 u,int c) +{ + return (u << c) | (u >> (32 - c)); +} + +static uint32 load_littleendian(const unsigned char *x) +{ + return + (uint32) (x[0]) \ + | (((uint32) (x[1])) << 8) \ + | (((uint32) (x[2])) << 16) \ + | (((uint32) (x[3])) << 24) + ; +} + +static void store_littleendian(unsigned char *x,uint32 u) +{ + x[0] = u; u >>= 8; + x[1] = u; u >>= 8; + x[2] = u; u >>= 8; + x[3] = u; +} + +int crypto_core( + unsigned char *out, + const unsigned char *in, + const unsigned char *k, + const unsigned char *c +) +{ + uint32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; + uint32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15; + int i; + + j0 = x0 = load_littleendian(c + 0); + j1 = x1 = load_littleendian(k + 0); + j2 = x2 = load_littleendian(k + 4); + j3 = x3 = load_littleendian(k + 8); + j4 = x4 = load_littleendian(k + 12); + j5 = x5 = load_littleendian(c + 4); + j6 = x6 = load_littleendian(in + 0); + j7 = x7 = load_littleendian(in + 4); + j8 = x8 = load_littleendian(in + 8); + j9 = x9 = load_littleendian(in + 12); + j10 = x10 = load_littleendian(c + 8); + j11 = x11 = load_littleendian(k + 16); + j12 = x12 = load_littleendian(k + 20); + j13 = x13 = load_littleendian(k + 24); + j14 = x14 = load_littleendian(k + 28); + j15 = x15 = load_littleendian(c + 12); + + for (i = ROUNDS;i > 0;i -= 2) { + x4 ^= rotate( x0+x12, 7); + x8 ^= rotate( x4+ x0, 9); + x12 ^= rotate( x8+ x4,13); + x0 ^= rotate(x12+ x8,18); + x9 ^= rotate( x5+ x1, 7); + x13 ^= rotate( x9+ x5, 9); + x1 ^= rotate(x13+ x9,13); + x5 ^= rotate( x1+x13,18); + x14 ^= rotate(x10+ x6, 7); + x2 ^= rotate(x14+x10, 9); + x6 ^= rotate( x2+x14,13); + x10 ^= rotate( x6+ x2,18); + x3 ^= rotate(x15+x11, 7); + x7 ^= rotate( x3+x15, 9); + x11 ^= rotate( x7+ x3,13); + x15 ^= rotate(x11+ x7,18); + x1 ^= rotate( x0+ x3, 7); + x2 ^= rotate( x1+ x0, 9); + x3 ^= rotate( x2+ x1,13); + x0 ^= rotate( x3+ x2,18); + x6 ^= rotate( x5+ x4, 7); + x7 ^= rotate( x6+ x5, 9); + x4 ^= rotate( x7+ x6,13); + x5 ^= rotate( x4+ x7,18); + x11 ^= rotate(x10+ x9, 7); + x8 ^= rotate(x11+x10, 9); + x9 ^= rotate( x8+x11,13); + x10 ^= rotate( x9+ x8,18); + x12 ^= rotate(x15+x14, 7); + x13 ^= rotate(x12+x15, 9); + x14 ^= rotate(x13+x12,13); + x15 ^= rotate(x14+x13,18); + } + + x0 += j0; + x1 += j1; + x2 += j2; + x3 += j3; + x4 += j4; + x5 += j5; + x6 += j6; + x7 += j7; + x8 += j8; + x9 += j9; + x10 += j10; + x11 += j11; + x12 += j12; + x13 += j13; + x14 += j14; + x15 += j15; + + store_littleendian(out + 0,x0); + store_littleendian(out + 4,x1); + store_littleendian(out + 8,x2); + store_littleendian(out + 12,x3); + store_littleendian(out + 16,x4); + store_littleendian(out + 20,x5); + store_littleendian(out + 24,x6); + store_littleendian(out + 28,x7); + store_littleendian(out + 32,x8); + store_littleendian(out + 36,x9); + store_littleendian(out + 40,x10); + store_littleendian(out + 44,x11); + store_littleendian(out + 48,x12); + store_littleendian(out + 52,x13); + store_littleendian(out + 56,x14); + store_littleendian(out + 60,x15); + + return 0; +} diff --git a/nacl/crypto_core/salsa208/ref/implementors b/nacl/crypto_core/salsa208/ref/implementors new file mode 100644 index 00000000..f6fb3c73 --- /dev/null +++ b/nacl/crypto_core/salsa208/ref/implementors @@ -0,0 +1 @@ +Daniel J. Bernstein diff --git a/nacl/crypto_core/salsa208/used b/nacl/crypto_core/salsa208/used new file mode 100644 index 00000000..e69de29b diff --git a/nacl/crypto_core/try.c b/nacl/crypto_core/try.c new file mode 100644 index 00000000..7eb1c677 --- /dev/null +++ b/nacl/crypto_core/try.c @@ -0,0 +1,116 @@ +/* + * crypto_core/try.c version 20090118 + * D. J. Bernstein + * Public domain. + */ + +#include +#include "crypto_core.h" + +extern unsigned char *alignedcalloc(unsigned long long); + +const char *primitiveimplementation = crypto_core_IMPLEMENTATION; + +static unsigned char *h; +static unsigned char *n; +static unsigned char *k; +static unsigned char *c; +static unsigned char *h2; +static unsigned char *n2; +static unsigned char *k2; +static unsigned char *c2; + +#define hlen crypto_core_OUTPUTBYTES +#define nlen crypto_core_INPUTBYTES +#define klen crypto_core_KEYBYTES +#define clen crypto_core_CONSTBYTES + +void preallocate(void) +{ +} + +void allocate(void) +{ + h = alignedcalloc(hlen); + n = alignedcalloc(nlen); + k = alignedcalloc(klen); + c = alignedcalloc(clen); + h2 = alignedcalloc(hlen); + n2 = alignedcalloc(nlen + crypto_core_OUTPUTBYTES); + k2 = alignedcalloc(klen + crypto_core_OUTPUTBYTES); + c2 = alignedcalloc(clen + crypto_core_OUTPUTBYTES); +} + +void predoit(void) +{ +} + +void doit(void) +{ + crypto_core(h,n,k,c); +} + +static unsigned char newbyte(void) +{ + unsigned long long x; + long long j; + x = 8675309; + for (j = 0;j < hlen;++j) { x += h[j]; x *= x; x += (x >> 31); } + for (j = 0;j < nlen;++j) { x += n[j]; x *= x; x += (x >> 31); } + for (j = 0;j < klen;++j) { x += k[j]; x *= x; x += (x >> 31); } + for (j = 0;j < clen;++j) { x += c[j]; x *= x; x += (x >> 31); } + for (j = 0;j < 100;++j) { x += j ; x *= x; x += (x >> 31); } + return x; +} + +char checksum[hlen * 2 + 1]; + +const char *checksum_compute(void) +{ + long long i; + long long j; + + for (i = 0;i < 100;++i) { + for (j = -16;j < 0;++j) h[j] = random(); + for (j = hlen;j < hlen + 16;++j) h[j] = random(); + for (j = -16;j < hlen + 16;++j) h2[j] = h[j]; + for (j = -16;j < 0;++j) n[j] = random(); + for (j = nlen;j < nlen + 16;++j) n[j] = random(); + for (j = -16;j < nlen + 16;++j) n2[j] = n[j]; + for (j = -16;j < 0;++j) k[j] = random(); + for (j = klen;j < klen + 16;++j) k[j] = random(); + for (j = -16;j < klen + 16;++j) k2[j] = k[j]; + for (j = -16;j < 0;++j) c[j] = random(); + for (j = clen;j < clen + 16;++j) c[j] = random(); + for (j = -16;j < clen + 16;++j) c2[j] = c[j]; + if (crypto_core(h,n,k,c) != 0) return "crypto_core returns nonzero"; + for (j = -16;j < 0;++j) if (h2[j] != h[j]) return "crypto_core writes before output"; + for (j = hlen;j < hlen + 16;++j) if (h2[j] != h[j]) return "crypto_core writes after output"; + for (j = -16;j < klen + 16;++j) if (k2[j] != k[j]) return "crypto_core writes to k"; + for (j = -16;j < nlen + 16;++j) if (n2[j] != n[j]) return "crypto_core writes to n"; + for (j = -16;j < clen + 16;++j) if (c2[j] != c[j]) return "crypto_core writes to c"; + + if (crypto_core(n2,n2,k,c) != 0) return "crypto_core returns nonzero"; + for (j = 0;j < hlen;++j) if (h[j] != n2[j]) return "crypto_core does not handle n overlap"; + for (j = 0;j < hlen;++j) n2[j] = n[j]; + if (crypto_core(k2,n2,k2,c) != 0) return "crypto_core returns nonzero"; + for (j = 0;j < hlen;++j) if (h[j] != k2[j]) return "crypto_core does not handle k overlap"; + for (j = 0;j < hlen;++j) k2[j] = k[j]; + if (crypto_core(c2,n2,k2,c2) != 0) return "crypto_core returns nonzero"; + for (j = 0;j < hlen;++j) if (h[j] != c2[j]) return "crypto_core does not handle c overlap"; + for (j = 0;j < hlen;++j) c2[j] = c[j]; + + for (j = 0;j < nlen;++j) n[j] = newbyte(); + if (crypto_core(h,n,k,c) != 0) return "crypto_core returns nonzero"; + for (j = 0;j < klen;++j) k[j] = newbyte(); + if (crypto_core(h,n,k,c) != 0) return "crypto_core returns nonzero"; + for (j = 0;j < clen;++j) c[j] = newbyte(); + } + + for (i = 0;i < hlen;++i) { + checksum[2 * i] = "0123456789abcdef"[15 & (h[i] >> 4)]; + checksum[2 * i + 1] = "0123456789abcdef"[15 & h[i]]; + } + checksum[2 * i] = 0; + return 0; +} diff --git a/nacl/crypto_core/wrapper-empty.cpp b/nacl/crypto_core/wrapper-empty.cpp new file mode 100644 index 00000000..e69de29b diff --git a/nacl/crypto_hash/measure.c b/nacl/crypto_hash/measure.c new file mode 100644 index 00000000..cec0404d --- /dev/null +++ b/nacl/crypto_hash/measure.c @@ -0,0 +1,66 @@ +#include +#include "randombytes.h" +#include "cpucycles.h" +#include "crypto_hash.h" + +extern void printentry(long long,const char *,long long *,long long); +extern unsigned char *alignedcalloc(unsigned long long); +extern const char *primitiveimplementation; +extern const char *implementationversion; +extern const char *sizenames[]; +extern const long long sizes[]; +extern void allocate(void); +extern void measure(void); + +const char *primitiveimplementation = crypto_hash_IMPLEMENTATION; +const char *implementationversion = crypto_hash_VERSION; +const char *sizenames[] = { "outputbytes", 0 }; +const long long sizes[] = { crypto_hash_BYTES }; + +#define MAXTEST_BYTES 4096 +#ifdef SUPERCOP +#define MGAP 8192 +#else +#define MGAP 8 +#endif + +static unsigned char *h; +static unsigned char *m; + +void preallocate(void) +{ +} + +void allocate(void) +{ + h = alignedcalloc(crypto_hash_BYTES); + m = alignedcalloc(MAXTEST_BYTES); +} + +#define TIMINGS 15 +static long long cycles[TIMINGS + 1]; + +static void printcycles(long long mlen) +{ + int i; + for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i]; + printentry(mlen,"cycles",cycles,TIMINGS); +} + +void measure(void) +{ + int i; + int loop; + int mlen; + + for (loop = 0;loop < LOOPS;++loop) { + for (mlen = 0;mlen <= MAXTEST_BYTES;mlen += 1 + mlen / MGAP) { + randombytes(m,mlen); + for (i = 0;i <= TIMINGS;++i) { + cycles[i] = cpucycles(); + crypto_hash(h,m,mlen); + } + printcycles(mlen); + } + } +} diff --git a/nacl/crypto_hash/sha256/checksum b/nacl/crypto_hash/sha256/checksum new file mode 100644 index 00000000..ee52aa30 --- /dev/null +++ b/nacl/crypto_hash/sha256/checksum @@ -0,0 +1 @@ +86df8bd202b2a2b5fdc04a7f50a591e43a345849c12fef08d487109648a08e05 diff --git a/nacl/crypto_hash/sha256/ref/api.h b/nacl/crypto_hash/sha256/ref/api.h new file mode 100644 index 00000000..ae8c7f6a --- /dev/null +++ b/nacl/crypto_hash/sha256/ref/api.h @@ -0,0 +1 @@ +#define CRYPTO_BYTES 32 diff --git a/nacl/crypto_hash/sha256/ref/hash.c b/nacl/crypto_hash/sha256/ref/hash.c new file mode 100644 index 00000000..21ce68a0 --- /dev/null +++ b/nacl/crypto_hash/sha256/ref/hash.c @@ -0,0 +1,69 @@ +/* +20080913 +D. J. Bernstein +Public domain. +*/ + +#include "crypto_hashblocks_sha256.h" +#include "crypto_hash.h" + +#define blocks crypto_hashblocks_sha256 + +typedef unsigned int uint32; + +static const char iv[32] = { + 0x6a,0x09,0xe6,0x67, + 0xbb,0x67,0xae,0x85, + 0x3c,0x6e,0xf3,0x72, + 0xa5,0x4f,0xf5,0x3a, + 0x51,0x0e,0x52,0x7f, + 0x9b,0x05,0x68,0x8c, + 0x1f,0x83,0xd9,0xab, + 0x5b,0xe0,0xcd,0x19, +} ; + +int crypto_hash(unsigned char *out,const unsigned char *in,unsigned long long inlen) +{ + unsigned char h[32]; + unsigned char padded[128]; + int i; + unsigned long long bits = inlen << 3; + + for (i = 0;i < 32;++i) h[i] = iv[i]; + + blocks(h,in,inlen); + in += inlen; + inlen &= 63; + in -= inlen; + + for (i = 0;i < inlen;++i) padded[i] = in[i]; + padded[inlen] = 0x80; + + if (inlen < 56) { + for (i = inlen + 1;i < 56;++i) padded[i] = 0; + padded[56] = bits >> 56; + padded[57] = bits >> 48; + padded[58] = bits >> 40; + padded[59] = bits >> 32; + padded[60] = bits >> 24; + padded[61] = bits >> 16; + padded[62] = bits >> 8; + padded[63] = bits; + blocks(h,padded,64); + } else { + for (i = inlen + 1;i < 120;++i) padded[i] = 0; + padded[120] = bits >> 56; + padded[121] = bits >> 48; + padded[122] = bits >> 40; + padded[123] = bits >> 32; + padded[124] = bits >> 24; + padded[125] = bits >> 16; + padded[126] = bits >> 8; + padded[127] = bits; + blocks(h,padded,128); + } + + for (i = 0;i < 32;++i) out[i] = h[i]; + + return 0; +} diff --git a/nacl/crypto_hash/sha256/ref/implementors b/nacl/crypto_hash/sha256/ref/implementors new file mode 100644 index 00000000..962e7d8e --- /dev/null +++ b/nacl/crypto_hash/sha256/ref/implementors @@ -0,0 +1 @@ +Daniel J. Bernstein (wrapper around crypto_hashblocks/sha256) diff --git a/nacl/crypto_hash/sha256/used b/nacl/crypto_hash/sha256/used new file mode 100644 index 00000000..e69de29b diff --git a/nacl/crypto_hash/sha512/checksum b/nacl/crypto_hash/sha512/checksum new file mode 100644 index 00000000..edf714e9 --- /dev/null +++ b/nacl/crypto_hash/sha512/checksum @@ -0,0 +1 @@ +9a2a989e136a02c3362c98e6e1e0b52fab980a1dafbebe4dd5e44d15d061742e35fb686befd4e33c608d251c96e26c020f90d92bb7ec8a657f79bb8e0b00a473 diff --git a/nacl/crypto_hash/sha512/ref/api.h b/nacl/crypto_hash/sha512/ref/api.h new file mode 100644 index 00000000..de9380d7 --- /dev/null +++ b/nacl/crypto_hash/sha512/ref/api.h @@ -0,0 +1 @@ +#define CRYPTO_BYTES 64 diff --git a/nacl/crypto_hash/sha512/ref/hash.c b/nacl/crypto_hash/sha512/ref/hash.c new file mode 100644 index 00000000..fc4347bb --- /dev/null +++ b/nacl/crypto_hash/sha512/ref/hash.c @@ -0,0 +1,71 @@ +/* +20080913 +D. J. Bernstein +Public domain. +*/ + +#include "crypto_hashblocks_sha512.h" +#include "crypto_hash.h" + +#define blocks crypto_hashblocks_sha512 + +static const unsigned char iv[64] = { + 0x6a,0x09,0xe6,0x67,0xf3,0xbc,0xc9,0x08, + 0xbb,0x67,0xae,0x85,0x84,0xca,0xa7,0x3b, + 0x3c,0x6e,0xf3,0x72,0xfe,0x94,0xf8,0x2b, + 0xa5,0x4f,0xf5,0x3a,0x5f,0x1d,0x36,0xf1, + 0x51,0x0e,0x52,0x7f,0xad,0xe6,0x82,0xd1, + 0x9b,0x05,0x68,0x8c,0x2b,0x3e,0x6c,0x1f, + 0x1f,0x83,0xd9,0xab,0xfb,0x41,0xbd,0x6b, + 0x5b,0xe0,0xcd,0x19,0x13,0x7e,0x21,0x79 +} ; + +typedef unsigned long long uint64; + +int crypto_hash(unsigned char *out,const unsigned char *in,unsigned long long inlen) +{ + unsigned char h[64]; + unsigned char padded[256]; + int i; + unsigned long long bytes = inlen; + + for (i = 0;i < 64;++i) h[i] = iv[i]; + + blocks(h,in,inlen); + in += inlen; + inlen &= 127; + in -= inlen; + + for (i = 0;i < inlen;++i) padded[i] = in[i]; + padded[inlen] = 0x80; + + if (inlen < 112) { + for (i = inlen + 1;i < 119;++i) padded[i] = 0; + padded[119] = bytes >> 61; + padded[120] = bytes >> 53; + padded[121] = bytes >> 45; + padded[122] = bytes >> 37; + padded[123] = bytes >> 29; + padded[124] = bytes >> 21; + padded[125] = bytes >> 13; + padded[126] = bytes >> 5; + padded[127] = bytes << 3; + blocks(h,padded,128); + } else { + for (i = inlen + 1;i < 247;++i) padded[i] = 0; + padded[247] = bytes >> 61; + padded[248] = bytes >> 53; + padded[249] = bytes >> 45; + padded[250] = bytes >> 37; + padded[251] = bytes >> 29; + padded[252] = bytes >> 21; + padded[253] = bytes >> 13; + padded[254] = bytes >> 5; + padded[255] = bytes << 3; + blocks(h,padded,256); + } + + for (i = 0;i < 64;++i) out[i] = h[i]; + + return 0; +} diff --git a/nacl/crypto_hash/sha512/ref/implementors b/nacl/crypto_hash/sha512/ref/implementors new file mode 100644 index 00000000..40afca09 --- /dev/null +++ b/nacl/crypto_hash/sha512/ref/implementors @@ -0,0 +1 @@ +Daniel J. Bernstein (wrapper around crypto_hashblocks/sha512) diff --git a/nacl/crypto_hash/sha512/selected b/nacl/crypto_hash/sha512/selected new file mode 100644 index 00000000..e69de29b diff --git a/nacl/crypto_hash/sha512/used b/nacl/crypto_hash/sha512/used new file mode 100644 index 00000000..e69de29b diff --git a/nacl/crypto_hash/try.c b/nacl/crypto_hash/try.c new file mode 100644 index 00000000..fab49c99 --- /dev/null +++ b/nacl/crypto_hash/try.c @@ -0,0 +1,77 @@ +/* + * crypto_hash/try.c version 20090118 + * D. J. Bernstein + * Public domain. + */ + +#include +#include "crypto_hash.h" + +extern unsigned char *alignedcalloc(unsigned long long); + +const char *primitiveimplementation = crypto_hash_IMPLEMENTATION; + +#define MAXTEST_BYTES (10000 + crypto_hash_BYTES) +#define CHECKSUM_BYTES 4096 +#define TUNE_BYTES 1536 + +static unsigned char *h; +static unsigned char *h2; +static unsigned char *m; +static unsigned char *m2; + +void preallocate(void) +{ +} + +void allocate(void) +{ + h = alignedcalloc(crypto_hash_BYTES); + h2 = alignedcalloc(crypto_hash_BYTES); + m = alignedcalloc(MAXTEST_BYTES); + m2 = alignedcalloc(MAXTEST_BYTES); +} + +void predoit(void) +{ +} + +void doit(void) +{ + crypto_hash(h,m,TUNE_BYTES); +} + +char checksum[crypto_hash_BYTES * 2 + 1]; + +const char *checksum_compute(void) +{ + long long i; + long long j; + + for (i = 0;i < CHECKSUM_BYTES;++i) { + long long hlen = crypto_hash_BYTES; + long long mlen = i; + for (j = -16;j < 0;++j) h[j] = random(); + for (j = hlen;j < hlen + 16;++j) h[j] = random(); + for (j = -16;j < hlen + 16;++j) h2[j] = h[j]; + for (j = -16;j < 0;++j) m[j] = random(); + for (j = mlen;j < mlen + 16;++j) m[j] = random(); + for (j = -16;j < mlen + 16;++j) m2[j] = m[j]; + if (crypto_hash(h,m,mlen) != 0) return "crypto_hash returns nonzero"; + for (j = -16;j < mlen + 16;++j) if (m2[j] != m[j]) return "crypto_hash writes to input"; + for (j = -16;j < 0;++j) if (h2[j] != h[j]) return "crypto_hash writes before output"; + for (j = hlen;j < hlen + 16;++j) if (h2[j] != h[j]) return "crypto_hash writes after output"; + if (crypto_hash(m2,m2,mlen) != 0) return "crypto_hash returns nonzero"; + for (j = 0;j < hlen;++j) if (m2[j] != h[j]) return "crypto_hash does not handle overlap"; + for (j = 0;j < mlen;++j) m[j] ^= h[j % hlen]; + m[mlen] = h[0]; + } + if (crypto_hash(h,m,CHECKSUM_BYTES) != 0) return "crypto_hash returns nonzero"; + + for (i = 0;i < crypto_hash_BYTES;++i) { + checksum[2 * i] = "0123456789abcdef"[15 & (h[i] >> 4)]; + checksum[2 * i + 1] = "0123456789abcdef"[15 & h[i]]; + } + checksum[2 * i] = 0; + return 0; +} diff --git a/nacl/crypto_hash/wrapper-hash.cpp b/nacl/crypto_hash/wrapper-hash.cpp new file mode 100644 index 00000000..4c0fb590 --- /dev/null +++ b/nacl/crypto_hash/wrapper-hash.cpp @@ -0,0 +1,10 @@ +#include +using std::string; +#include "crypto_hash.h" + +string crypto_hash(const string &m) +{ + unsigned char h[crypto_hash_BYTES]; + crypto_hash(h,(const unsigned char *) m.c_str(),m.size()); + return string((char *) h,sizeof h); +} diff --git a/nacl/crypto_hashblocks/measure.c b/nacl/crypto_hashblocks/measure.c new file mode 100644 index 00000000..145fbbc4 --- /dev/null +++ b/nacl/crypto_hashblocks/measure.c @@ -0,0 +1,18 @@ +#include "crypto_hashblocks.h" + +const char *primitiveimplementation = crypto_hashblocks_IMPLEMENTATION; +const char *implementationversion = crypto_hashblocks_VERSION; +const char *sizenames[] = { "statebytes", 0 }; +const long long sizes[] = { crypto_hashblocks_STATEBYTES }; + +void preallocate(void) +{ +} + +void allocate(void) +{ +} + +void measure(void) +{ +} diff --git a/nacl/crypto_hashblocks/sha256/checksum b/nacl/crypto_hashblocks/sha256/checksum new file mode 100644 index 00000000..edde1d4f --- /dev/null +++ b/nacl/crypto_hashblocks/sha256/checksum @@ -0,0 +1 @@ +69a9dc2464f9593161e462d3dbb634b84f1d68d67d26df29aaa805f9dcd8f656 diff --git a/nacl/crypto_hashblocks/sha256/inplace/api.h b/nacl/crypto_hashblocks/sha256/inplace/api.h new file mode 100644 index 00000000..005a4f47 --- /dev/null +++ b/nacl/crypto_hashblocks/sha256/inplace/api.h @@ -0,0 +1,2 @@ +#define CRYPTO_STATEBYTES 32 +#define CRYPTO_BLOCKBYTES 64 diff --git a/nacl/crypto_hashblocks/sha256/inplace/blocks.c b/nacl/crypto_hashblocks/sha256/inplace/blocks.c new file mode 100644 index 00000000..4a191501 --- /dev/null +++ b/nacl/crypto_hashblocks/sha256/inplace/blocks.c @@ -0,0 +1,228 @@ +#include "crypto_hashblocks.h" + +typedef unsigned int uint32; + +static uint32 load_bigendian(const unsigned char *x) +{ + return + (uint32) (x[3]) \ + | (((uint32) (x[2])) << 8) \ + | (((uint32) (x[1])) << 16) \ + | (((uint32) (x[0])) << 24) + ; +} + +static void store_bigendian(unsigned char *x,uint32 u) +{ + x[3] = u; u >>= 8; + x[2] = u; u >>= 8; + x[1] = u; u >>= 8; + x[0] = u; +} + +#define SHR(x,c) ((x) >> (c)) +#define ROTR(x,c) (((x) >> (c)) | ((x) << (32 - (c)))) + +#define Ch(x,y,z) ((x & y) ^ (~x & z)) +#define Maj(x,y,z) ((x & y) ^ (x & z) ^ (y & z)) +#define Sigma0(x) (ROTR(x, 2) ^ ROTR(x,13) ^ ROTR(x,22)) +#define Sigma1(x) (ROTR(x, 6) ^ ROTR(x,11) ^ ROTR(x,25)) +#define sigma0(x) (ROTR(x, 7) ^ ROTR(x,18) ^ SHR(x, 3)) +#define sigma1(x) (ROTR(x,17) ^ ROTR(x,19) ^ SHR(x,10)) + +#define M(w0,w14,w9,w1) w0 += sigma1(w14) + w9 + sigma0(w1); + +#define EXPAND \ + M(w0 ,w14,w9 ,w1 ) \ + M(w1 ,w15,w10,w2 ) \ + M(w2 ,w0 ,w11,w3 ) \ + M(w3 ,w1 ,w12,w4 ) \ + M(w4 ,w2 ,w13,w5 ) \ + M(w5 ,w3 ,w14,w6 ) \ + M(w6 ,w4 ,w15,w7 ) \ + M(w7 ,w5 ,w0 ,w8 ) \ + M(w8 ,w6 ,w1 ,w9 ) \ + M(w9 ,w7 ,w2 ,w10) \ + M(w10,w8 ,w3 ,w11) \ + M(w11,w9 ,w4 ,w12) \ + M(w12,w10,w5 ,w13) \ + M(w13,w11,w6 ,w14) \ + M(w14,w12,w7 ,w15) \ + M(w15,w13,w8 ,w0 ) + +#define F(r0,r1,r2,r3,r4,r5,r6,r7,w,k) \ + r7 += Sigma1(r4) + Ch(r4,r5,r6) + k + w; \ + r3 += r7; \ + r7 += Sigma0(r0) + Maj(r0,r1,r2); + +#define G(r0,r1,r2,r3,r4,r5,r6,r7,i) \ + F(r0,r1,r2,r3,r4,r5,r6,r7,w0 ,round[i + 0]) \ + F(r7,r0,r1,r2,r3,r4,r5,r6,w1 ,round[i + 1]) \ + F(r6,r7,r0,r1,r2,r3,r4,r5,w2 ,round[i + 2]) \ + F(r5,r6,r7,r0,r1,r2,r3,r4,w3 ,round[i + 3]) \ + F(r4,r5,r6,r7,r0,r1,r2,r3,w4 ,round[i + 4]) \ + F(r3,r4,r5,r6,r7,r0,r1,r2,w5 ,round[i + 5]) \ + F(r2,r3,r4,r5,r6,r7,r0,r1,w6 ,round[i + 6]) \ + F(r1,r2,r3,r4,r5,r6,r7,r0,w7 ,round[i + 7]) \ + F(r0,r1,r2,r3,r4,r5,r6,r7,w8 ,round[i + 8]) \ + F(r7,r0,r1,r2,r3,r4,r5,r6,w9 ,round[i + 9]) \ + F(r6,r7,r0,r1,r2,r3,r4,r5,w10,round[i + 10]) \ + F(r5,r6,r7,r0,r1,r2,r3,r4,w11,round[i + 11]) \ + F(r4,r5,r6,r7,r0,r1,r2,r3,w12,round[i + 12]) \ + F(r3,r4,r5,r6,r7,r0,r1,r2,w13,round[i + 13]) \ + F(r2,r3,r4,r5,r6,r7,r0,r1,w14,round[i + 14]) \ + F(r1,r2,r3,r4,r5,r6,r7,r0,w15,round[i + 15]) + +static const uint32 round[64] = { + 0x428a2f98 +, 0x71374491 +, 0xb5c0fbcf +, 0xe9b5dba5 +, 0x3956c25b +, 0x59f111f1 +, 0x923f82a4 +, 0xab1c5ed5 +, 0xd807aa98 +, 0x12835b01 +, 0x243185be +, 0x550c7dc3 +, 0x72be5d74 +, 0x80deb1fe +, 0x9bdc06a7 +, 0xc19bf174 +, 0xe49b69c1 +, 0xefbe4786 +, 0x0fc19dc6 +, 0x240ca1cc +, 0x2de92c6f +, 0x4a7484aa +, 0x5cb0a9dc +, 0x76f988da +, 0x983e5152 +, 0xa831c66d +, 0xb00327c8 +, 0xbf597fc7 +, 0xc6e00bf3 +, 0xd5a79147 +, 0x06ca6351 +, 0x14292967 +, 0x27b70a85 +, 0x2e1b2138 +, 0x4d2c6dfc +, 0x53380d13 +, 0x650a7354 +, 0x766a0abb +, 0x81c2c92e +, 0x92722c85 +, 0xa2bfe8a1 +, 0xa81a664b +, 0xc24b8b70 +, 0xc76c51a3 +, 0xd192e819 +, 0xd6990624 +, 0xf40e3585 +, 0x106aa070 +, 0x19a4c116 +, 0x1e376c08 +, 0x2748774c +, 0x34b0bcb5 +, 0x391c0cb3 +, 0x4ed8aa4a +, 0x5b9cca4f +, 0x682e6ff3 +, 0x748f82ee +, 0x78a5636f +, 0x84c87814 +, 0x8cc70208 +, 0x90befffa +, 0xa4506ceb +, 0xbef9a3f7 +, 0xc67178f2 +} ; + +int crypto_hashblocks(unsigned char *statebytes,const unsigned char *in,unsigned long long inlen) +{ + uint32 state[8]; + uint32 r0; + uint32 r1; + uint32 r2; + uint32 r3; + uint32 r4; + uint32 r5; + uint32 r6; + uint32 r7; + + r0 = load_bigendian(statebytes + 0); state[0] = r0; + r1 = load_bigendian(statebytes + 4); state[1] = r1; + r2 = load_bigendian(statebytes + 8); state[2] = r2; + r3 = load_bigendian(statebytes + 12); state[3] = r3; + r4 = load_bigendian(statebytes + 16); state[4] = r4; + r5 = load_bigendian(statebytes + 20); state[5] = r5; + r6 = load_bigendian(statebytes + 24); state[6] = r6; + r7 = load_bigendian(statebytes + 28); state[7] = r7; + + while (inlen >= 64) { + uint32 w0 = load_bigendian(in + 0); + uint32 w1 = load_bigendian(in + 4); + uint32 w2 = load_bigendian(in + 8); + uint32 w3 = load_bigendian(in + 12); + uint32 w4 = load_bigendian(in + 16); + uint32 w5 = load_bigendian(in + 20); + uint32 w6 = load_bigendian(in + 24); + uint32 w7 = load_bigendian(in + 28); + uint32 w8 = load_bigendian(in + 32); + uint32 w9 = load_bigendian(in + 36); + uint32 w10 = load_bigendian(in + 40); + uint32 w11 = load_bigendian(in + 44); + uint32 w12 = load_bigendian(in + 48); + uint32 w13 = load_bigendian(in + 52); + uint32 w14 = load_bigendian(in + 56); + uint32 w15 = load_bigendian(in + 60); + + G(r0,r1,r2,r3,r4,r5,r6,r7,0) + + EXPAND + + G(r0,r1,r2,r3,r4,r5,r6,r7,16) + + EXPAND + + G(r0,r1,r2,r3,r4,r5,r6,r7,32) + + EXPAND + + G(r0,r1,r2,r3,r4,r5,r6,r7,48) + + r0 += state[0]; + r1 += state[1]; + r2 += state[2]; + r3 += state[3]; + r4 += state[4]; + r5 += state[5]; + r6 += state[6]; + r7 += state[7]; + + state[0] = r0; + state[1] = r1; + state[2] = r2; + state[3] = r3; + state[4] = r4; + state[5] = r5; + state[6] = r6; + state[7] = r7; + + in += 64; + inlen -= 64; + } + + store_bigendian(statebytes + 0,state[0]); + store_bigendian(statebytes + 4,state[1]); + store_bigendian(statebytes + 8,state[2]); + store_bigendian(statebytes + 12,state[3]); + store_bigendian(statebytes + 16,state[4]); + store_bigendian(statebytes + 20,state[5]); + store_bigendian(statebytes + 24,state[6]); + store_bigendian(statebytes + 28,state[7]); + + return 0; +} diff --git a/nacl/crypto_hashblocks/sha256/inplace/implementors b/nacl/crypto_hashblocks/sha256/inplace/implementors new file mode 100644 index 00000000..f6fb3c73 --- /dev/null +++ b/nacl/crypto_hashblocks/sha256/inplace/implementors @@ -0,0 +1 @@ +Daniel J. Bernstein diff --git a/nacl/crypto_hashblocks/sha256/ref/api.h b/nacl/crypto_hashblocks/sha256/ref/api.h new file mode 100644 index 00000000..005a4f47 --- /dev/null +++ b/nacl/crypto_hashblocks/sha256/ref/api.h @@ -0,0 +1,2 @@ +#define CRYPTO_STATEBYTES 32 +#define CRYPTO_BLOCKBYTES 64 diff --git a/nacl/crypto_hashblocks/sha256/ref/blocks.c b/nacl/crypto_hashblocks/sha256/ref/blocks.c new file mode 100644 index 00000000..ad977945 --- /dev/null +++ b/nacl/crypto_hashblocks/sha256/ref/blocks.c @@ -0,0 +1,212 @@ +#include "crypto_hashblocks.h" + +typedef unsigned int uint32; + +static uint32 load_bigendian(const unsigned char *x) +{ + return + (uint32) (x[3]) \ + | (((uint32) (x[2])) << 8) \ + | (((uint32) (x[1])) << 16) \ + | (((uint32) (x[0])) << 24) + ; +} + +static void store_bigendian(unsigned char *x,uint32 u) +{ + x[3] = u; u >>= 8; + x[2] = u; u >>= 8; + x[1] = u; u >>= 8; + x[0] = u; +} + +#define SHR(x,c) ((x) >> (c)) +#define ROTR(x,c) (((x) >> (c)) | ((x) << (32 - (c)))) + +#define Ch(x,y,z) ((x & y) ^ (~x & z)) +#define Maj(x,y,z) ((x & y) ^ (x & z) ^ (y & z)) +#define Sigma0(x) (ROTR(x, 2) ^ ROTR(x,13) ^ ROTR(x,22)) +#define Sigma1(x) (ROTR(x, 6) ^ ROTR(x,11) ^ ROTR(x,25)) +#define sigma0(x) (ROTR(x, 7) ^ ROTR(x,18) ^ SHR(x, 3)) +#define sigma1(x) (ROTR(x,17) ^ ROTR(x,19) ^ SHR(x,10)) + +#define M(w0,w14,w9,w1) w0 = sigma1(w14) + w9 + sigma0(w1) + w0; + +#define EXPAND \ + M(w0 ,w14,w9 ,w1 ) \ + M(w1 ,w15,w10,w2 ) \ + M(w2 ,w0 ,w11,w3 ) \ + M(w3 ,w1 ,w12,w4 ) \ + M(w4 ,w2 ,w13,w5 ) \ + M(w5 ,w3 ,w14,w6 ) \ + M(w6 ,w4 ,w15,w7 ) \ + M(w7 ,w5 ,w0 ,w8 ) \ + M(w8 ,w6 ,w1 ,w9 ) \ + M(w9 ,w7 ,w2 ,w10) \ + M(w10,w8 ,w3 ,w11) \ + M(w11,w9 ,w4 ,w12) \ + M(w12,w10,w5 ,w13) \ + M(w13,w11,w6 ,w14) \ + M(w14,w12,w7 ,w15) \ + M(w15,w13,w8 ,w0 ) + +#define F(w,k) \ + T1 = h + Sigma1(e) + Ch(e,f,g) + k + w; \ + T2 = Sigma0(a) + Maj(a,b,c); \ + h = g; \ + g = f; \ + f = e; \ + e = d + T1; \ + d = c; \ + c = b; \ + b = a; \ + a = T1 + T2; + +int crypto_hashblocks(unsigned char *statebytes,const unsigned char *in,unsigned long long inlen) +{ + uint32 state[8]; + uint32 a; + uint32 b; + uint32 c; + uint32 d; + uint32 e; + uint32 f; + uint32 g; + uint32 h; + uint32 T1; + uint32 T2; + + a = load_bigendian(statebytes + 0); state[0] = a; + b = load_bigendian(statebytes + 4); state[1] = b; + c = load_bigendian(statebytes + 8); state[2] = c; + d = load_bigendian(statebytes + 12); state[3] = d; + e = load_bigendian(statebytes + 16); state[4] = e; + f = load_bigendian(statebytes + 20); state[5] = f; + g = load_bigendian(statebytes + 24); state[6] = g; + h = load_bigendian(statebytes + 28); state[7] = h; + + while (inlen >= 64) { + uint32 w0 = load_bigendian(in + 0); + uint32 w1 = load_bigendian(in + 4); + uint32 w2 = load_bigendian(in + 8); + uint32 w3 = load_bigendian(in + 12); + uint32 w4 = load_bigendian(in + 16); + uint32 w5 = load_bigendian(in + 20); + uint32 w6 = load_bigendian(in + 24); + uint32 w7 = load_bigendian(in + 28); + uint32 w8 = load_bigendian(in + 32); + uint32 w9 = load_bigendian(in + 36); + uint32 w10 = load_bigendian(in + 40); + uint32 w11 = load_bigendian(in + 44); + uint32 w12 = load_bigendian(in + 48); + uint32 w13 = load_bigendian(in + 52); + uint32 w14 = load_bigendian(in + 56); + uint32 w15 = load_bigendian(in + 60); + + F(w0 ,0x428a2f98) + F(w1 ,0x71374491) + F(w2 ,0xb5c0fbcf) + F(w3 ,0xe9b5dba5) + F(w4 ,0x3956c25b) + F(w5 ,0x59f111f1) + F(w6 ,0x923f82a4) + F(w7 ,0xab1c5ed5) + F(w8 ,0xd807aa98) + F(w9 ,0x12835b01) + F(w10,0x243185be) + F(w11,0x550c7dc3) + F(w12,0x72be5d74) + F(w13,0x80deb1fe) + F(w14,0x9bdc06a7) + F(w15,0xc19bf174) + + EXPAND + + F(w0 ,0xe49b69c1) + F(w1 ,0xefbe4786) + F(w2 ,0x0fc19dc6) + F(w3 ,0x240ca1cc) + F(w4 ,0x2de92c6f) + F(w5 ,0x4a7484aa) + F(w6 ,0x5cb0a9dc) + F(w7 ,0x76f988da) + F(w8 ,0x983e5152) + F(w9 ,0xa831c66d) + F(w10,0xb00327c8) + F(w11,0xbf597fc7) + F(w12,0xc6e00bf3) + F(w13,0xd5a79147) + F(w14,0x06ca6351) + F(w15,0x14292967) + + EXPAND + + F(w0 ,0x27b70a85) + F(w1 ,0x2e1b2138) + F(w2 ,0x4d2c6dfc) + F(w3 ,0x53380d13) + F(w4 ,0x650a7354) + F(w5 ,0x766a0abb) + F(w6 ,0x81c2c92e) + F(w7 ,0x92722c85) + F(w8 ,0xa2bfe8a1) + F(w9 ,0xa81a664b) + F(w10,0xc24b8b70) + F(w11,0xc76c51a3) + F(w12,0xd192e819) + F(w13,0xd6990624) + F(w14,0xf40e3585) + F(w15,0x106aa070) + + EXPAND + + F(w0 ,0x19a4c116) + F(w1 ,0x1e376c08) + F(w2 ,0x2748774c) + F(w3 ,0x34b0bcb5) + F(w4 ,0x391c0cb3) + F(w5 ,0x4ed8aa4a) + F(w6 ,0x5b9cca4f) + F(w7 ,0x682e6ff3) + F(w8 ,0x748f82ee) + F(w9 ,0x78a5636f) + F(w10,0x84c87814) + F(w11,0x8cc70208) + F(w12,0x90befffa) + F(w13,0xa4506ceb) + F(w14,0xbef9a3f7) + F(w15,0xc67178f2) + + a += state[0]; + b += state[1]; + c += state[2]; + d += state[3]; + e += state[4]; + f += state[5]; + g += state[6]; + h += state[7]; + + state[0] = a; + state[1] = b; + state[2] = c; + state[3] = d; + state[4] = e; + state[5] = f; + state[6] = g; + state[7] = h; + + in += 64; + inlen -= 64; + } + + store_bigendian(statebytes + 0,state[0]); + store_bigendian(statebytes + 4,state[1]); + store_bigendian(statebytes + 8,state[2]); + store_bigendian(statebytes + 12,state[3]); + store_bigendian(statebytes + 16,state[4]); + store_bigendian(statebytes + 20,state[5]); + store_bigendian(statebytes + 24,state[6]); + store_bigendian(statebytes + 28,state[7]); + + return 0; +} diff --git a/nacl/crypto_hashblocks/sha256/ref/implementors b/nacl/crypto_hashblocks/sha256/ref/implementors new file mode 100644 index 00000000..f6fb3c73 --- /dev/null +++ b/nacl/crypto_hashblocks/sha256/ref/implementors @@ -0,0 +1 @@ +Daniel J. Bernstein diff --git a/nacl/crypto_hashblocks/sha256/used b/nacl/crypto_hashblocks/sha256/used new file mode 100644 index 00000000..e69de29b diff --git a/nacl/crypto_hashblocks/sha512/checksum b/nacl/crypto_hashblocks/sha512/checksum new file mode 100644 index 00000000..ed5245ec --- /dev/null +++ b/nacl/crypto_hashblocks/sha512/checksum @@ -0,0 +1 @@ +f005c91634ae549f0dd4529ddbaf07038cb75a59b818cd1d4eb4e2b4019ab6733556131f320c4a145c735a22594581d454cccb15c18bf198ffcb2da29fe39456 diff --git a/nacl/crypto_hashblocks/sha512/inplace/api.h b/nacl/crypto_hashblocks/sha512/inplace/api.h new file mode 100644 index 00000000..ac45d103 --- /dev/null +++ b/nacl/crypto_hashblocks/sha512/inplace/api.h @@ -0,0 +1,2 @@ +#define CRYPTO_STATEBYTES 64 +#define CRYPTO_BLOCKBYTES 128 diff --git a/nacl/crypto_hashblocks/sha512/inplace/blocks.c b/nacl/crypto_hashblocks/sha512/inplace/blocks.c new file mode 100644 index 00000000..93791b69 --- /dev/null +++ b/nacl/crypto_hashblocks/sha512/inplace/blocks.c @@ -0,0 +1,256 @@ +#include "crypto_hashblocks.h" + +typedef unsigned long long uint64; + +static uint64 load_bigendian(const unsigned char *x) +{ + return + (uint64) (x[7]) \ + | (((uint64) (x[6])) << 8) \ + | (((uint64) (x[5])) << 16) \ + | (((uint64) (x[4])) << 24) \ + | (((uint64) (x[3])) << 32) \ + | (((uint64) (x[2])) << 40) \ + | (((uint64) (x[1])) << 48) \ + | (((uint64) (x[0])) << 56) + ; +} + +static void store_bigendian(unsigned char *x,uint64 u) +{ + x[7] = u; u >>= 8; + x[6] = u; u >>= 8; + x[5] = u; u >>= 8; + x[4] = u; u >>= 8; + x[3] = u; u >>= 8; + x[2] = u; u >>= 8; + x[1] = u; u >>= 8; + x[0] = u; +} + +#define SHR(x,c) ((x) >> (c)) +#define ROTR(x,c) (((x) >> (c)) | ((x) << (64 - (c)))) + +#define Ch(x,y,z) ((x & y) ^ (~x & z)) +#define Maj(x,y,z) ((x & y) ^ (x & z) ^ (y & z)) +#define Sigma0(x) (ROTR(x,28) ^ ROTR(x,34) ^ ROTR(x,39)) +#define Sigma1(x) (ROTR(x,14) ^ ROTR(x,18) ^ ROTR(x,41)) +#define sigma0(x) (ROTR(x, 1) ^ ROTR(x, 8) ^ SHR(x,7)) +#define sigma1(x) (ROTR(x,19) ^ ROTR(x,61) ^ SHR(x,6)) + +#define M(w0,w14,w9,w1) w0 = sigma1(w14) + w9 + sigma0(w1) + w0; + +#define EXPAND \ + M(w0 ,w14,w9 ,w1 ) \ + M(w1 ,w15,w10,w2 ) \ + M(w2 ,w0 ,w11,w3 ) \ + M(w3 ,w1 ,w12,w4 ) \ + M(w4 ,w2 ,w13,w5 ) \ + M(w5 ,w3 ,w14,w6 ) \ + M(w6 ,w4 ,w15,w7 ) \ + M(w7 ,w5 ,w0 ,w8 ) \ + M(w8 ,w6 ,w1 ,w9 ) \ + M(w9 ,w7 ,w2 ,w10) \ + M(w10,w8 ,w3 ,w11) \ + M(w11,w9 ,w4 ,w12) \ + M(w12,w10,w5 ,w13) \ + M(w13,w11,w6 ,w14) \ + M(w14,w12,w7 ,w15) \ + M(w15,w13,w8 ,w0 ) + +#define F(r0,r1,r2,r3,r4,r5,r6,r7,w,k) \ + r7 += Sigma1(r4) + Ch(r4,r5,r6) + k + w; \ + r3 += r7; \ + r7 += Sigma0(r0) + Maj(r0,r1,r2); + +#define G(r0,r1,r2,r3,r4,r5,r6,r7,i) \ + F(r0,r1,r2,r3,r4,r5,r6,r7,w0 ,round[i + 0]) \ + F(r7,r0,r1,r2,r3,r4,r5,r6,w1 ,round[i + 1]) \ + F(r6,r7,r0,r1,r2,r3,r4,r5,w2 ,round[i + 2]) \ + F(r5,r6,r7,r0,r1,r2,r3,r4,w3 ,round[i + 3]) \ + F(r4,r5,r6,r7,r0,r1,r2,r3,w4 ,round[i + 4]) \ + F(r3,r4,r5,r6,r7,r0,r1,r2,w5 ,round[i + 5]) \ + F(r2,r3,r4,r5,r6,r7,r0,r1,w6 ,round[i + 6]) \ + F(r1,r2,r3,r4,r5,r6,r7,r0,w7 ,round[i + 7]) \ + F(r0,r1,r2,r3,r4,r5,r6,r7,w8 ,round[i + 8]) \ + F(r7,r0,r1,r2,r3,r4,r5,r6,w9 ,round[i + 9]) \ + F(r6,r7,r0,r1,r2,r3,r4,r5,w10,round[i + 10]) \ + F(r5,r6,r7,r0,r1,r2,r3,r4,w11,round[i + 11]) \ + F(r4,r5,r6,r7,r0,r1,r2,r3,w12,round[i + 12]) \ + F(r3,r4,r5,r6,r7,r0,r1,r2,w13,round[i + 13]) \ + F(r2,r3,r4,r5,r6,r7,r0,r1,w14,round[i + 14]) \ + F(r1,r2,r3,r4,r5,r6,r7,r0,w15,round[i + 15]) + +static const uint64 round[80] = { + 0x428a2f98d728ae22ULL +, 0x7137449123ef65cdULL +, 0xb5c0fbcfec4d3b2fULL +, 0xe9b5dba58189dbbcULL +, 0x3956c25bf348b538ULL +, 0x59f111f1b605d019ULL +, 0x923f82a4af194f9bULL +, 0xab1c5ed5da6d8118ULL +, 0xd807aa98a3030242ULL +, 0x12835b0145706fbeULL +, 0x243185be4ee4b28cULL +, 0x550c7dc3d5ffb4e2ULL +, 0x72be5d74f27b896fULL +, 0x80deb1fe3b1696b1ULL +, 0x9bdc06a725c71235ULL +, 0xc19bf174cf692694ULL +, 0xe49b69c19ef14ad2ULL +, 0xefbe4786384f25e3ULL +, 0x0fc19dc68b8cd5b5ULL +, 0x240ca1cc77ac9c65ULL +, 0x2de92c6f592b0275ULL +, 0x4a7484aa6ea6e483ULL +, 0x5cb0a9dcbd41fbd4ULL +, 0x76f988da831153b5ULL +, 0x983e5152ee66dfabULL +, 0xa831c66d2db43210ULL +, 0xb00327c898fb213fULL +, 0xbf597fc7beef0ee4ULL +, 0xc6e00bf33da88fc2ULL +, 0xd5a79147930aa725ULL +, 0x06ca6351e003826fULL +, 0x142929670a0e6e70ULL +, 0x27b70a8546d22ffcULL +, 0x2e1b21385c26c926ULL +, 0x4d2c6dfc5ac42aedULL +, 0x53380d139d95b3dfULL +, 0x650a73548baf63deULL +, 0x766a0abb3c77b2a8ULL +, 0x81c2c92e47edaee6ULL +, 0x92722c851482353bULL +, 0xa2bfe8a14cf10364ULL +, 0xa81a664bbc423001ULL +, 0xc24b8b70d0f89791ULL +, 0xc76c51a30654be30ULL +, 0xd192e819d6ef5218ULL +, 0xd69906245565a910ULL +, 0xf40e35855771202aULL +, 0x106aa07032bbd1b8ULL +, 0x19a4c116b8d2d0c8ULL +, 0x1e376c085141ab53ULL +, 0x2748774cdf8eeb99ULL +, 0x34b0bcb5e19b48a8ULL +, 0x391c0cb3c5c95a63ULL +, 0x4ed8aa4ae3418acbULL +, 0x5b9cca4f7763e373ULL +, 0x682e6ff3d6b2b8a3ULL +, 0x748f82ee5defb2fcULL +, 0x78a5636f43172f60ULL +, 0x84c87814a1f0ab72ULL +, 0x8cc702081a6439ecULL +, 0x90befffa23631e28ULL +, 0xa4506cebde82bde9ULL +, 0xbef9a3f7b2c67915ULL +, 0xc67178f2e372532bULL +, 0xca273eceea26619cULL +, 0xd186b8c721c0c207ULL +, 0xeada7dd6cde0eb1eULL +, 0xf57d4f7fee6ed178ULL +, 0x06f067aa72176fbaULL +, 0x0a637dc5a2c898a6ULL +, 0x113f9804bef90daeULL +, 0x1b710b35131c471bULL +, 0x28db77f523047d84ULL +, 0x32caab7b40c72493ULL +, 0x3c9ebe0a15c9bebcULL +, 0x431d67c49c100d4cULL +, 0x4cc5d4becb3e42b6ULL +, 0x597f299cfc657e2aULL +, 0x5fcb6fab3ad6faecULL +, 0x6c44198c4a475817ULL +}; + +int crypto_hashblocks(unsigned char *statebytes,const unsigned char *in,unsigned long long inlen) +{ + uint64 state[8]; + uint64 r0; + uint64 r1; + uint64 r2; + uint64 r3; + uint64 r4; + uint64 r5; + uint64 r6; + uint64 r7; + + r0 = load_bigendian(statebytes + 0); state[0] = r0; + r1 = load_bigendian(statebytes + 8); state[1] = r1; + r2 = load_bigendian(statebytes + 16); state[2] = r2; + r3 = load_bigendian(statebytes + 24); state[3] = r3; + r4 = load_bigendian(statebytes + 32); state[4] = r4; + r5 = load_bigendian(statebytes + 40); state[5] = r5; + r6 = load_bigendian(statebytes + 48); state[6] = r6; + r7 = load_bigendian(statebytes + 56); state[7] = r7; + + while (inlen >= 128) { + uint64 w0 = load_bigendian(in + 0); + uint64 w1 = load_bigendian(in + 8); + uint64 w2 = load_bigendian(in + 16); + uint64 w3 = load_bigendian(in + 24); + uint64 w4 = load_bigendian(in + 32); + uint64 w5 = load_bigendian(in + 40); + uint64 w6 = load_bigendian(in + 48); + uint64 w7 = load_bigendian(in + 56); + uint64 w8 = load_bigendian(in + 64); + uint64 w9 = load_bigendian(in + 72); + uint64 w10 = load_bigendian(in + 80); + uint64 w11 = load_bigendian(in + 88); + uint64 w12 = load_bigendian(in + 96); + uint64 w13 = load_bigendian(in + 104); + uint64 w14 = load_bigendian(in + 112); + uint64 w15 = load_bigendian(in + 120); + + G(r0,r1,r2,r3,r4,r5,r6,r7,0) + + EXPAND + + G(r0,r1,r2,r3,r4,r5,r6,r7,16) + + EXPAND + + G(r0,r1,r2,r3,r4,r5,r6,r7,32) + + EXPAND + + G(r0,r1,r2,r3,r4,r5,r6,r7,48) + + EXPAND + + G(r0,r1,r2,r3,r4,r5,r6,r7,64) + + r0 += state[0]; + r1 += state[1]; + r2 += state[2]; + r3 += state[3]; + r4 += state[4]; + r5 += state[5]; + r6 += state[6]; + r7 += state[7]; + + state[0] = r0; + state[1] = r1; + state[2] = r2; + state[3] = r3; + state[4] = r4; + state[5] = r5; + state[6] = r6; + state[7] = r7; + + in += 128; + inlen -= 128; + } + + store_bigendian(statebytes + 0,state[0]); + store_bigendian(statebytes + 8,state[1]); + store_bigendian(statebytes + 16,state[2]); + store_bigendian(statebytes + 24,state[3]); + store_bigendian(statebytes + 32,state[4]); + store_bigendian(statebytes + 40,state[5]); + store_bigendian(statebytes + 48,state[6]); + store_bigendian(statebytes + 56,state[7]); + + return 0; +} diff --git a/nacl/crypto_hashblocks/sha512/inplace/implementors b/nacl/crypto_hashblocks/sha512/inplace/implementors new file mode 100644 index 00000000..f6fb3c73 --- /dev/null +++ b/nacl/crypto_hashblocks/sha512/inplace/implementors @@ -0,0 +1 @@ +Daniel J. Bernstein diff --git a/nacl/crypto_hashblocks/sha512/ref/api.h b/nacl/crypto_hashblocks/sha512/ref/api.h new file mode 100644 index 00000000..ac45d103 --- /dev/null +++ b/nacl/crypto_hashblocks/sha512/ref/api.h @@ -0,0 +1,2 @@ +#define CRYPTO_STATEBYTES 64 +#define CRYPTO_BLOCKBYTES 128 diff --git a/nacl/crypto_hashblocks/sha512/ref/blocks.c b/nacl/crypto_hashblocks/sha512/ref/blocks.c new file mode 100644 index 00000000..f8fae491 --- /dev/null +++ b/nacl/crypto_hashblocks/sha512/ref/blocks.c @@ -0,0 +1,239 @@ +#include "crypto_hashblocks.h" + +typedef unsigned long long uint64; + +static uint64 load_bigendian(const unsigned char *x) +{ + return + (uint64) (x[7]) \ + | (((uint64) (x[6])) << 8) \ + | (((uint64) (x[5])) << 16) \ + | (((uint64) (x[4])) << 24) \ + | (((uint64) (x[3])) << 32) \ + | (((uint64) (x[2])) << 40) \ + | (((uint64) (x[1])) << 48) \ + | (((uint64) (x[0])) << 56) + ; +} + +static void store_bigendian(unsigned char *x,uint64 u) +{ + x[7] = u; u >>= 8; + x[6] = u; u >>= 8; + x[5] = u; u >>= 8; + x[4] = u; u >>= 8; + x[3] = u; u >>= 8; + x[2] = u; u >>= 8; + x[1] = u; u >>= 8; + x[0] = u; +} + +#define SHR(x,c) ((x) >> (c)) +#define ROTR(x,c) (((x) >> (c)) | ((x) << (64 - (c)))) + +#define Ch(x,y,z) ((x & y) ^ (~x & z)) +#define Maj(x,y,z) ((x & y) ^ (x & z) ^ (y & z)) +#define Sigma0(x) (ROTR(x,28) ^ ROTR(x,34) ^ ROTR(x,39)) +#define Sigma1(x) (ROTR(x,14) ^ ROTR(x,18) ^ ROTR(x,41)) +#define sigma0(x) (ROTR(x, 1) ^ ROTR(x, 8) ^ SHR(x,7)) +#define sigma1(x) (ROTR(x,19) ^ ROTR(x,61) ^ SHR(x,6)) + +#define M(w0,w14,w9,w1) w0 = sigma1(w14) + w9 + sigma0(w1) + w0; + +#define EXPAND \ + M(w0 ,w14,w9 ,w1 ) \ + M(w1 ,w15,w10,w2 ) \ + M(w2 ,w0 ,w11,w3 ) \ + M(w3 ,w1 ,w12,w4 ) \ + M(w4 ,w2 ,w13,w5 ) \ + M(w5 ,w3 ,w14,w6 ) \ + M(w6 ,w4 ,w15,w7 ) \ + M(w7 ,w5 ,w0 ,w8 ) \ + M(w8 ,w6 ,w1 ,w9 ) \ + M(w9 ,w7 ,w2 ,w10) \ + M(w10,w8 ,w3 ,w11) \ + M(w11,w9 ,w4 ,w12) \ + M(w12,w10,w5 ,w13) \ + M(w13,w11,w6 ,w14) \ + M(w14,w12,w7 ,w15) \ + M(w15,w13,w8 ,w0 ) + +#define F(w,k) \ + T1 = h + Sigma1(e) + Ch(e,f,g) + k + w; \ + T2 = Sigma0(a) + Maj(a,b,c); \ + h = g; \ + g = f; \ + f = e; \ + e = d + T1; \ + d = c; \ + c = b; \ + b = a; \ + a = T1 + T2; + +int crypto_hashblocks(unsigned char *statebytes,const unsigned char *in,unsigned long long inlen) +{ + uint64 state[8]; + uint64 a; + uint64 b; + uint64 c; + uint64 d; + uint64 e; + uint64 f; + uint64 g; + uint64 h; + uint64 T1; + uint64 T2; + + a = load_bigendian(statebytes + 0); state[0] = a; + b = load_bigendian(statebytes + 8); state[1] = b; + c = load_bigendian(statebytes + 16); state[2] = c; + d = load_bigendian(statebytes + 24); state[3] = d; + e = load_bigendian(statebytes + 32); state[4] = e; + f = load_bigendian(statebytes + 40); state[5] = f; + g = load_bigendian(statebytes + 48); state[6] = g; + h = load_bigendian(statebytes + 56); state[7] = h; + + while (inlen >= 128) { + uint64 w0 = load_bigendian(in + 0); + uint64 w1 = load_bigendian(in + 8); + uint64 w2 = load_bigendian(in + 16); + uint64 w3 = load_bigendian(in + 24); + uint64 w4 = load_bigendian(in + 32); + uint64 w5 = load_bigendian(in + 40); + uint64 w6 = load_bigendian(in + 48); + uint64 w7 = load_bigendian(in + 56); + uint64 w8 = load_bigendian(in + 64); + uint64 w9 = load_bigendian(in + 72); + uint64 w10 = load_bigendian(in + 80); + uint64 w11 = load_bigendian(in + 88); + uint64 w12 = load_bigendian(in + 96); + uint64 w13 = load_bigendian(in + 104); + uint64 w14 = load_bigendian(in + 112); + uint64 w15 = load_bigendian(in + 120); + + F(w0 ,0x428a2f98d728ae22ULL) + F(w1 ,0x7137449123ef65cdULL) + F(w2 ,0xb5c0fbcfec4d3b2fULL) + F(w3 ,0xe9b5dba58189dbbcULL) + F(w4 ,0x3956c25bf348b538ULL) + F(w5 ,0x59f111f1b605d019ULL) + F(w6 ,0x923f82a4af194f9bULL) + F(w7 ,0xab1c5ed5da6d8118ULL) + F(w8 ,0xd807aa98a3030242ULL) + F(w9 ,0x12835b0145706fbeULL) + F(w10,0x243185be4ee4b28cULL) + F(w11,0x550c7dc3d5ffb4e2ULL) + F(w12,0x72be5d74f27b896fULL) + F(w13,0x80deb1fe3b1696b1ULL) + F(w14,0x9bdc06a725c71235ULL) + F(w15,0xc19bf174cf692694ULL) + + EXPAND + + F(w0 ,0xe49b69c19ef14ad2ULL) + F(w1 ,0xefbe4786384f25e3ULL) + F(w2 ,0x0fc19dc68b8cd5b5ULL) + F(w3 ,0x240ca1cc77ac9c65ULL) + F(w4 ,0x2de92c6f592b0275ULL) + F(w5 ,0x4a7484aa6ea6e483ULL) + F(w6 ,0x5cb0a9dcbd41fbd4ULL) + F(w7 ,0x76f988da831153b5ULL) + F(w8 ,0x983e5152ee66dfabULL) + F(w9 ,0xa831c66d2db43210ULL) + F(w10,0xb00327c898fb213fULL) + F(w11,0xbf597fc7beef0ee4ULL) + F(w12,0xc6e00bf33da88fc2ULL) + F(w13,0xd5a79147930aa725ULL) + F(w14,0x06ca6351e003826fULL) + F(w15,0x142929670a0e6e70ULL) + + EXPAND + + F(w0 ,0x27b70a8546d22ffcULL) + F(w1 ,0x2e1b21385c26c926ULL) + F(w2 ,0x4d2c6dfc5ac42aedULL) + F(w3 ,0x53380d139d95b3dfULL) + F(w4 ,0x650a73548baf63deULL) + F(w5 ,0x766a0abb3c77b2a8ULL) + F(w6 ,0x81c2c92e47edaee6ULL) + F(w7 ,0x92722c851482353bULL) + F(w8 ,0xa2bfe8a14cf10364ULL) + F(w9 ,0xa81a664bbc423001ULL) + F(w10,0xc24b8b70d0f89791ULL) + F(w11,0xc76c51a30654be30ULL) + F(w12,0xd192e819d6ef5218ULL) + F(w13,0xd69906245565a910ULL) + F(w14,0xf40e35855771202aULL) + F(w15,0x106aa07032bbd1b8ULL) + + EXPAND + + F(w0 ,0x19a4c116b8d2d0c8ULL) + F(w1 ,0x1e376c085141ab53ULL) + F(w2 ,0x2748774cdf8eeb99ULL) + F(w3 ,0x34b0bcb5e19b48a8ULL) + F(w4 ,0x391c0cb3c5c95a63ULL) + F(w5 ,0x4ed8aa4ae3418acbULL) + F(w6 ,0x5b9cca4f7763e373ULL) + F(w7 ,0x682e6ff3d6b2b8a3ULL) + F(w8 ,0x748f82ee5defb2fcULL) + F(w9 ,0x78a5636f43172f60ULL) + F(w10,0x84c87814a1f0ab72ULL) + F(w11,0x8cc702081a6439ecULL) + F(w12,0x90befffa23631e28ULL) + F(w13,0xa4506cebde82bde9ULL) + F(w14,0xbef9a3f7b2c67915ULL) + F(w15,0xc67178f2e372532bULL) + + EXPAND + + F(w0 ,0xca273eceea26619cULL) + F(w1 ,0xd186b8c721c0c207ULL) + F(w2 ,0xeada7dd6cde0eb1eULL) + F(w3 ,0xf57d4f7fee6ed178ULL) + F(w4 ,0x06f067aa72176fbaULL) + F(w5 ,0x0a637dc5a2c898a6ULL) + F(w6 ,0x113f9804bef90daeULL) + F(w7 ,0x1b710b35131c471bULL) + F(w8 ,0x28db77f523047d84ULL) + F(w9 ,0x32caab7b40c72493ULL) + F(w10,0x3c9ebe0a15c9bebcULL) + F(w11,0x431d67c49c100d4cULL) + F(w12,0x4cc5d4becb3e42b6ULL) + F(w13,0x597f299cfc657e2aULL) + F(w14,0x5fcb6fab3ad6faecULL) + F(w15,0x6c44198c4a475817ULL) + + a += state[0]; + b += state[1]; + c += state[2]; + d += state[3]; + e += state[4]; + f += state[5]; + g += state[6]; + h += state[7]; + + state[0] = a; + state[1] = b; + state[2] = c; + state[3] = d; + state[4] = e; + state[5] = f; + state[6] = g; + state[7] = h; + + in += 128; + inlen -= 128; + } + + store_bigendian(statebytes + 0,state[0]); + store_bigendian(statebytes + 8,state[1]); + store_bigendian(statebytes + 16,state[2]); + store_bigendian(statebytes + 24,state[3]); + store_bigendian(statebytes + 32,state[4]); + store_bigendian(statebytes + 40,state[5]); + store_bigendian(statebytes + 48,state[6]); + store_bigendian(statebytes + 56,state[7]); + + return 0; +} diff --git a/nacl/crypto_hashblocks/sha512/ref/implementors b/nacl/crypto_hashblocks/sha512/ref/implementors new file mode 100644 index 00000000..f6fb3c73 --- /dev/null +++ b/nacl/crypto_hashblocks/sha512/ref/implementors @@ -0,0 +1 @@ +Daniel J. Bernstein diff --git a/nacl/crypto_hashblocks/sha512/selected b/nacl/crypto_hashblocks/sha512/selected new file mode 100644 index 00000000..e69de29b diff --git a/nacl/crypto_hashblocks/sha512/used b/nacl/crypto_hashblocks/sha512/used new file mode 100644 index 00000000..e69de29b diff --git a/nacl/crypto_hashblocks/try.c b/nacl/crypto_hashblocks/try.c new file mode 100644 index 00000000..720d2fb3 --- /dev/null +++ b/nacl/crypto_hashblocks/try.c @@ -0,0 +1,79 @@ +/* + * crypto_hashblocks/try.c version 20090118 + * D. J. Bernstein + * Public domain. + */ + +#include +#include "crypto_hashblocks.h" + +extern unsigned char *alignedcalloc(unsigned long long); + +const char *primitiveimplementation = crypto_hashblocks_IMPLEMENTATION; + +#define MAXTEST_BYTES (10000 + crypto_hashblocks_STATEBYTES) +#define CHECKSUM_BYTES 4096 +#define TUNE_BYTES 1536 + +static unsigned char *h; +static unsigned char *h2; +static unsigned char *m; +static unsigned char *m2; + +void preallocate(void) +{ +} + +void allocate(void) +{ + h = alignedcalloc(crypto_hashblocks_STATEBYTES); + h2 = alignedcalloc(crypto_hashblocks_STATEBYTES); + m = alignedcalloc(MAXTEST_BYTES); + m2 = alignedcalloc(MAXTEST_BYTES); +} + +void predoit(void) +{ +} + +void doit(void) +{ + crypto_hashblocks(h,m,TUNE_BYTES); +} + +char checksum[crypto_hashblocks_STATEBYTES * 2 + 1]; + +const char *checksum_compute(void) +{ + long long i; + long long j; + + for (i = 0;i < CHECKSUM_BYTES;++i) { + long long hlen = crypto_hashblocks_STATEBYTES; + long long mlen = i; + for (j = -16;j < 0;++j) h[j] = random(); + for (j = hlen;j < hlen + 16;++j) h[j] = random(); + for (j = -16;j < hlen + 16;++j) h2[j] = h[j]; + for (j = -16;j < 0;++j) m[j] = random(); + for (j = mlen;j < mlen + 16;++j) m[j] = random(); + for (j = -16;j < mlen + 16;++j) m2[j] = m[j]; + if (crypto_hashblocks(h,m,mlen) != 0) return "crypto_hashblocks returns nonzero"; + for (j = -16;j < mlen + 16;++j) if (m2[j] != m[j]) return "crypto_hashblocks writes to input"; + for (j = -16;j < 0;++j) if (h2[j] != h[j]) return "crypto_hashblocks writes before output"; + for (j = hlen;j < hlen + 16;++j) if (h2[j] != h[j]) return "crypto_hashblocks writes after output"; + for (j = 0;j < hlen;++j) m2[j] = h2[j]; + if (crypto_hashblocks(h2,m2,mlen) != 0) return "crypto_hashblocks returns nonzero"; + if (crypto_hashblocks(m2,m2,mlen) != 0) return "crypto_hashblocks returns nonzero"; + for (j = 0;j < hlen;++j) if (m2[j] != h2[j]) return "crypto_hashblocks does not handle overlap"; + for (j = 0;j < mlen;++j) m[j] ^= h[j % hlen]; + m[mlen] = h[0]; + } + if (crypto_hashblocks(h,m,CHECKSUM_BYTES) != 0) return "crypto_hashblocks returns nonzero"; + + for (i = 0;i < crypto_hashblocks_STATEBYTES;++i) { + checksum[2 * i] = "0123456789abcdef"[15 & (h[i] >> 4)]; + checksum[2 * i + 1] = "0123456789abcdef"[15 & h[i]]; + } + checksum[2 * i] = 0; + return 0; +} diff --git a/nacl/crypto_hashblocks/wrapper-empty.cpp b/nacl/crypto_hashblocks/wrapper-empty.cpp new file mode 100644 index 00000000..e69de29b diff --git a/nacl/crypto_onetimeauth/measure.c b/nacl/crypto_onetimeauth/measure.c new file mode 100644 index 00000000..6d3ddfd5 --- /dev/null +++ b/nacl/crypto_onetimeauth/measure.c @@ -0,0 +1,69 @@ +#include "crypto_onetimeauth.h" +#include "randombytes.h" +#include "cpucycles.h" + +extern void printentry(long long,const char *,long long *,long long); +extern unsigned char *alignedcalloc(unsigned long long); +extern const char *primitiveimplementation; +extern const char *implementationversion; +extern const char *sizenames[]; +extern const long long sizes[]; +extern void allocate(void); +extern void measure(void); + +const char *primitiveimplementation = crypto_onetimeauth_IMPLEMENTATION; +const char *implementationversion = crypto_onetimeauth_VERSION; +const char *sizenames[] = { "outputbytes", "keybytes", 0 }; +const long long sizes[] = { crypto_onetimeauth_BYTES, crypto_onetimeauth_KEYBYTES }; + +#define MAXTEST_BYTES 4096 +#ifdef SUPERCOP +#define MGAP 8192 +#else +#define MGAP 8 +#endif + +static unsigned char *k; +static unsigned char *m; +static unsigned char *h; + +void preallocate(void) +{ +} + +void allocate(void) +{ + k = alignedcalloc(crypto_onetimeauth_KEYBYTES); + m = alignedcalloc(MAXTEST_BYTES); + h = alignedcalloc(crypto_onetimeauth_BYTES); +} + +#define TIMINGS 15 +static long long cycles[TIMINGS + 1]; + +void measure(void) +{ + int i; + int loop; + int mlen; + + for (loop = 0;loop < LOOPS;++loop) { + for (mlen = 0;mlen <= MAXTEST_BYTES;mlen += 1 + mlen / MGAP) { + randombytes(k,crypto_onetimeauth_KEYBYTES); + randombytes(m,mlen); + randombytes(h,crypto_onetimeauth_BYTES); + for (i = 0;i <= TIMINGS;++i) { + cycles[i] = cpucycles(); + crypto_onetimeauth(h,m,mlen,k); + } + for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i]; + printentry(mlen,"cycles",cycles,TIMINGS); + for (i = 0;i <= TIMINGS;++i) { + cycles[i] = cpucycles(); + crypto_onetimeauth_verify(h,m,mlen,k); + } + for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i]; + printentry(mlen,"verify_cycles",cycles,TIMINGS); + } + } +} diff --git a/nacl/crypto_onetimeauth/poly1305/53/api.h b/nacl/crypto_onetimeauth/poly1305/53/api.h new file mode 100644 index 00000000..acc133ed --- /dev/null +++ b/nacl/crypto_onetimeauth/poly1305/53/api.h @@ -0,0 +1,2 @@ +#define CRYPTO_BYTES 16 +#define CRYPTO_KEYBYTES 32 diff --git a/nacl/crypto_onetimeauth/poly1305/53/auth.c b/nacl/crypto_onetimeauth/poly1305/53/auth.c new file mode 100644 index 00000000..a4a9c3f6 --- /dev/null +++ b/nacl/crypto_onetimeauth/poly1305/53/auth.c @@ -0,0 +1,1616 @@ +/* +20080910 +D. J. Bernstein +Public domain. +*/ + +#include "crypto_onetimeauth.h" + +typedef unsigned char uchar; +typedef int int32; +typedef unsigned int uint32; +typedef long long int64; +typedef unsigned long long uint64; + +static const double poly1305_53_constants[] = { + 0.00000000558793544769287109375 /* alpham80 = 3 2^(-29) */ +, 24.0 /* alpham48 = 3 2^3 */ +, 103079215104.0 /* alpham16 = 3 2^35 */ +, 6755399441055744.0 /* alpha0 = 3 2^51 */ +, 1770887431076116955136.0 /* alpha18 = 3 2^69 */ +, 29014219670751100192948224.0 /* alpha32 = 3 2^83 */ +, 7605903601369376408980219232256.0 /* alpha50 = 3 2^101 */ +, 124615124604835863084731911901282304.0 /* alpha64 = 3 2^115 */ +, 32667107224410092492483962313449748299776.0 /* alpha82 = 3 2^133 */ +, 535217884764734955396857238543560676143529984.0 /* alpha96 = 3 2^147 */ +, 35076039295941670036888435985190792471742381031424.0 /* alpha112 = 3 2^163 */ +, 9194973245195333150150082162901855101712434733101613056.0 /* alpha130 = 3 2^181 */ +, 0.0000000000000000000000000000000000000036734198463196484624023016788195177431833298649127735047148490821200539357960224151611328125 /* scale = 5 2^(-130) */ +, 6755408030990331.0 /* offset0 = alpha0 + 2^33 - 5 */ +, 29014256564239239022116864.0 /* offset1 = alpha32 + 2^65 - 2^33 */ +, 124615283061160854719918951570079744.0 /* offset2 = alpha64 + 2^97 - 2^65 */ +, 535219245894202480694386063513315216128475136.0 /* offset3 = alpha96 + 2^130 - 2^97 */ +} ; + +int crypto_onetimeauth(unsigned char *out,const unsigned char *m,unsigned long long l,const unsigned char *k) +{ + register const unsigned char *r = k; + register const unsigned char *s = k + 16; + double r0high_stack; + double r1high_stack; + double r1low_stack; + double sr1high_stack; + double r2low_stack; + double sr2high_stack; + double r0low_stack; + double sr1low_stack; + double r2high_stack; + double sr2low_stack; + double r3high_stack; + double sr3high_stack; + double r3low_stack; + double sr3low_stack; + int64 d0; + int64 d1; + int64 d2; + int64 d3; + register double scale; + register double alpha0; + register double alpha32; + register double alpha64; + register double alpha96; + register double alpha130; + register double h0; + register double h1; + register double h2; + register double h3; + register double h4; + register double h5; + register double h6; + register double h7; + register double y7; + register double y6; + register double y1; + register double y0; + register double y5; + register double y4; + register double x7; + register double x6; + register double x1; + register double x0; + register double y3; + register double y2; + register double r3low; + register double r0low; + register double r3high; + register double r0high; + register double sr1low; + register double x5; + register double r3lowx0; + register double sr1high; + register double x4; + register double r0lowx6; + register double r1low; + register double x3; + register double r3highx0; + register double r1high; + register double x2; + register double r0highx6; + register double sr2low; + register double r0lowx0; + register double sr2high; + register double sr1lowx6; + register double r2low; + register double r0highx0; + register double r2high; + register double sr1highx6; + register double sr3low; + register double r1lowx0; + register double sr3high; + register double sr2lowx6; + register double r1highx0; + register double sr2highx6; + register double r2lowx0; + register double sr3lowx6; + register double r2highx0; + register double sr3highx6; + register double r1highx4; + register double r1lowx4; + register double r0highx4; + register double r0lowx4; + register double sr3highx4; + register double sr3lowx4; + register double sr2highx4; + register double sr2lowx4; + register double r0lowx2; + register double r0highx2; + register double r1lowx2; + register double r1highx2; + register double r2lowx2; + register double r2highx2; + register double sr3lowx2; + register double sr3highx2; + register double z0; + register double z1; + register double z2; + register double z3; + register int64 r0; + register int64 r1; + register int64 r2; + register int64 r3; + register uint32 r00; + register uint32 r01; + register uint32 r02; + register uint32 r03; + register uint32 r10; + register uint32 r11; + register uint32 r12; + register uint32 r13; + register uint32 r20; + register uint32 r21; + register uint32 r22; + register uint32 r23; + register uint32 r30; + register uint32 r31; + register uint32 r32; + register uint32 r33; + register int64 m0; + register int64 m1; + register int64 m2; + register int64 m3; + register uint32 m00; + register uint32 m01; + register uint32 m02; + register uint32 m03; + register uint32 m10; + register uint32 m11; + register uint32 m12; + register uint32 m13; + register uint32 m20; + register uint32 m21; + register uint32 m22; + register uint32 m23; + register uint32 m30; + register uint32 m31; + register uint32 m32; + register uint64 m33; + register char *constants; + register int32 lbelow2; + register int32 lbelow3; + register int32 lbelow4; + register int32 lbelow5; + register int32 lbelow6; + register int32 lbelow7; + register int32 lbelow8; + register int32 lbelow9; + register int32 lbelow10; + register int32 lbelow11; + register int32 lbelow12; + register int32 lbelow13; + register int32 lbelow14; + register int32 lbelow15; + register double alpham80; + register double alpham48; + register double alpham16; + register double alpha18; + register double alpha50; + register double alpha82; + register double alpha112; + register double offset0; + register double offset1; + register double offset2; + register double offset3; + register uint32 s00; + register uint32 s01; + register uint32 s02; + register uint32 s03; + register uint32 s10; + register uint32 s11; + register uint32 s12; + register uint32 s13; + register uint32 s20; + register uint32 s21; + register uint32 s22; + register uint32 s23; + register uint32 s30; + register uint32 s31; + register uint32 s32; + register uint32 s33; + register uint64 bits32; + register uint64 f; + register uint64 f0; + register uint64 f1; + register uint64 f2; + register uint64 f3; + register uint64 f4; + register uint64 g; + register uint64 g0; + register uint64 g1; + register uint64 g2; + register uint64 g3; + register uint64 g4; + + r00 = *(uchar *) (r + 0); + constants = (char *) &poly1305_53_constants; + + r01 = *(uchar *) (r + 1); + + r02 = *(uchar *) (r + 2); + r0 = 2151; + + r03 = *(uchar *) (r + 3); r03 &= 15; + r0 <<= 51; + + r10 = *(uchar *) (r + 4); r10 &= 252; + r01 <<= 8; + r0 += r00; + + r11 = *(uchar *) (r + 5); + r02 <<= 16; + r0 += r01; + + r12 = *(uchar *) (r + 6); + r03 <<= 24; + r0 += r02; + + r13 = *(uchar *) (r + 7); r13 &= 15; + r1 = 2215; + r0 += r03; + + d0 = r0; + r1 <<= 51; + r2 = 2279; + + r20 = *(uchar *) (r + 8); r20 &= 252; + r11 <<= 8; + r1 += r10; + + r21 = *(uchar *) (r + 9); + r12 <<= 16; + r1 += r11; + + r22 = *(uchar *) (r + 10); + r13 <<= 24; + r1 += r12; + + r23 = *(uchar *) (r + 11); r23 &= 15; + r2 <<= 51; + r1 += r13; + + d1 = r1; + r21 <<= 8; + r2 += r20; + + r30 = *(uchar *) (r + 12); r30 &= 252; + r22 <<= 16; + r2 += r21; + + r31 = *(uchar *) (r + 13); + r23 <<= 24; + r2 += r22; + + r32 = *(uchar *) (r + 14); + r2 += r23; + r3 = 2343; + + d2 = r2; + r3 <<= 51; + alpha32 = *(double *) (constants + 40); + + r33 = *(uchar *) (r + 15); r33 &= 15; + r31 <<= 8; + r3 += r30; + + r32 <<= 16; + r3 += r31; + + r33 <<= 24; + r3 += r32; + + r3 += r33; + h0 = alpha32 - alpha32; + + d3 = r3; + h1 = alpha32 - alpha32; + + alpha0 = *(double *) (constants + 24); + h2 = alpha32 - alpha32; + + alpha64 = *(double *) (constants + 56); + h3 = alpha32 - alpha32; + + alpha18 = *(double *) (constants + 32); + h4 = alpha32 - alpha32; + + r0low = *(double *) &d0; + h5 = alpha32 - alpha32; + + r1low = *(double *) &d1; + h6 = alpha32 - alpha32; + + r2low = *(double *) &d2; + h7 = alpha32 - alpha32; + + alpha50 = *(double *) (constants + 48); + r0low -= alpha0; + + alpha82 = *(double *) (constants + 64); + r1low -= alpha32; + + scale = *(double *) (constants + 96); + r2low -= alpha64; + + alpha96 = *(double *) (constants + 72); + r0high = r0low + alpha18; + + r3low = *(double *) &d3; + + alpham80 = *(double *) (constants + 0); + r1high = r1low + alpha50; + sr1low = scale * r1low; + + alpham48 = *(double *) (constants + 8); + r2high = r2low + alpha82; + sr2low = scale * r2low; + + r0high -= alpha18; + r0high_stack = r0high; + + r3low -= alpha96; + + r1high -= alpha50; + r1high_stack = r1high; + + sr1high = sr1low + alpham80; + + alpha112 = *(double *) (constants + 80); + r0low -= r0high; + + alpham16 = *(double *) (constants + 16); + r2high -= alpha82; + sr3low = scale * r3low; + + alpha130 = *(double *) (constants + 88); + sr2high = sr2low + alpham48; + + r1low -= r1high; + r1low_stack = r1low; + + sr1high -= alpham80; + sr1high_stack = sr1high; + + r2low -= r2high; + r2low_stack = r2low; + + sr2high -= alpham48; + sr2high_stack = sr2high; + + r3high = r3low + alpha112; + r0low_stack = r0low; + + sr1low -= sr1high; + sr1low_stack = sr1low; + + sr3high = sr3low + alpham16; + r2high_stack = r2high; + + sr2low -= sr2high; + sr2low_stack = sr2low; + + r3high -= alpha112; + r3high_stack = r3high; + + + sr3high -= alpham16; + sr3high_stack = sr3high; + + + r3low -= r3high; + r3low_stack = r3low; + + + sr3low -= sr3high; + sr3low_stack = sr3low; + +if (l < 16) goto addatmost15bytes; + + m00 = *(uchar *) (m + 0); + m0 = 2151; + + m0 <<= 51; + m1 = 2215; + m01 = *(uchar *) (m + 1); + + m1 <<= 51; + m2 = 2279; + m02 = *(uchar *) (m + 2); + + m2 <<= 51; + m3 = 2343; + m03 = *(uchar *) (m + 3); + + m10 = *(uchar *) (m + 4); + m01 <<= 8; + m0 += m00; + + m11 = *(uchar *) (m + 5); + m02 <<= 16; + m0 += m01; + + m12 = *(uchar *) (m + 6); + m03 <<= 24; + m0 += m02; + + m13 = *(uchar *) (m + 7); + m3 <<= 51; + m0 += m03; + + m20 = *(uchar *) (m + 8); + m11 <<= 8; + m1 += m10; + + m21 = *(uchar *) (m + 9); + m12 <<= 16; + m1 += m11; + + m22 = *(uchar *) (m + 10); + m13 <<= 24; + m1 += m12; + + m23 = *(uchar *) (m + 11); + m1 += m13; + + m30 = *(uchar *) (m + 12); + m21 <<= 8; + m2 += m20; + + m31 = *(uchar *) (m + 13); + m22 <<= 16; + m2 += m21; + + m32 = *(uchar *) (m + 14); + m23 <<= 24; + m2 += m22; + + m33 = *(uchar *) (m + 15); + m2 += m23; + + d0 = m0; + m31 <<= 8; + m3 += m30; + + d1 = m1; + m32 <<= 16; + m3 += m31; + + d2 = m2; + m33 += 256; + + m33 <<= 24; + m3 += m32; + + m3 += m33; + d3 = m3; + + m += 16; + l -= 16; + + z0 = *(double *) &d0; + + z1 = *(double *) &d1; + + z2 = *(double *) &d2; + + z3 = *(double *) &d3; + + z0 -= alpha0; + + z1 -= alpha32; + + z2 -= alpha64; + + z3 -= alpha96; + + h0 += z0; + + h1 += z1; + + h3 += z2; + + h5 += z3; + +if (l < 16) goto multiplyaddatmost15bytes; + +multiplyaddatleast16bytes:; + + m2 = 2279; + m20 = *(uchar *) (m + 8); + y7 = h7 + alpha130; + + m2 <<= 51; + m3 = 2343; + m21 = *(uchar *) (m + 9); + y6 = h6 + alpha130; + + m3 <<= 51; + m0 = 2151; + m22 = *(uchar *) (m + 10); + y1 = h1 + alpha32; + + m0 <<= 51; + m1 = 2215; + m23 = *(uchar *) (m + 11); + y0 = h0 + alpha32; + + m1 <<= 51; + m30 = *(uchar *) (m + 12); + y7 -= alpha130; + + m21 <<= 8; + m2 += m20; + m31 = *(uchar *) (m + 13); + y6 -= alpha130; + + m22 <<= 16; + m2 += m21; + m32 = *(uchar *) (m + 14); + y1 -= alpha32; + + m23 <<= 24; + m2 += m22; + m33 = *(uchar *) (m + 15); + y0 -= alpha32; + + m2 += m23; + m00 = *(uchar *) (m + 0); + y5 = h5 + alpha96; + + m31 <<= 8; + m3 += m30; + m01 = *(uchar *) (m + 1); + y4 = h4 + alpha96; + + m32 <<= 16; + m02 = *(uchar *) (m + 2); + x7 = h7 - y7; + y7 *= scale; + + m33 += 256; + m03 = *(uchar *) (m + 3); + x6 = h6 - y6; + y6 *= scale; + + m33 <<= 24; + m3 += m31; + m10 = *(uchar *) (m + 4); + x1 = h1 - y1; + + m01 <<= 8; + m3 += m32; + m11 = *(uchar *) (m + 5); + x0 = h0 - y0; + + m3 += m33; + m0 += m00; + m12 = *(uchar *) (m + 6); + y5 -= alpha96; + + m02 <<= 16; + m0 += m01; + m13 = *(uchar *) (m + 7); + y4 -= alpha96; + + m03 <<= 24; + m0 += m02; + d2 = m2; + x1 += y7; + + m0 += m03; + d3 = m3; + x0 += y6; + + m11 <<= 8; + m1 += m10; + d0 = m0; + x7 += y5; + + m12 <<= 16; + m1 += m11; + x6 += y4; + + m13 <<= 24; + m1 += m12; + y3 = h3 + alpha64; + + m1 += m13; + d1 = m1; + y2 = h2 + alpha64; + + x0 += x1; + + x6 += x7; + + y3 -= alpha64; + r3low = r3low_stack; + + y2 -= alpha64; + r0low = r0low_stack; + + x5 = h5 - y5; + r3lowx0 = r3low * x0; + r3high = r3high_stack; + + x4 = h4 - y4; + r0lowx6 = r0low * x6; + r0high = r0high_stack; + + x3 = h3 - y3; + r3highx0 = r3high * x0; + sr1low = sr1low_stack; + + x2 = h2 - y2; + r0highx6 = r0high * x6; + sr1high = sr1high_stack; + + x5 += y3; + r0lowx0 = r0low * x0; + r1low = r1low_stack; + + h6 = r3lowx0 + r0lowx6; + sr1lowx6 = sr1low * x6; + r1high = r1high_stack; + + x4 += y2; + r0highx0 = r0high * x0; + sr2low = sr2low_stack; + + h7 = r3highx0 + r0highx6; + sr1highx6 = sr1high * x6; + sr2high = sr2high_stack; + + x3 += y1; + r1lowx0 = r1low * x0; + r2low = r2low_stack; + + h0 = r0lowx0 + sr1lowx6; + sr2lowx6 = sr2low * x6; + r2high = r2high_stack; + + x2 += y0; + r1highx0 = r1high * x0; + sr3low = sr3low_stack; + + h1 = r0highx0 + sr1highx6; + sr2highx6 = sr2high * x6; + sr3high = sr3high_stack; + + x4 += x5; + r2lowx0 = r2low * x0; + z2 = *(double *) &d2; + + h2 = r1lowx0 + sr2lowx6; + sr3lowx6 = sr3low * x6; + + x2 += x3; + r2highx0 = r2high * x0; + z3 = *(double *) &d3; + + h3 = r1highx0 + sr2highx6; + sr3highx6 = sr3high * x6; + + r1highx4 = r1high * x4; + z2 -= alpha64; + + h4 = r2lowx0 + sr3lowx6; + r1lowx4 = r1low * x4; + + r0highx4 = r0high * x4; + z3 -= alpha96; + + h5 = r2highx0 + sr3highx6; + r0lowx4 = r0low * x4; + + h7 += r1highx4; + sr3highx4 = sr3high * x4; + + h6 += r1lowx4; + sr3lowx4 = sr3low * x4; + + h5 += r0highx4; + sr2highx4 = sr2high * x4; + + h4 += r0lowx4; + sr2lowx4 = sr2low * x4; + + h3 += sr3highx4; + r0lowx2 = r0low * x2; + + h2 += sr3lowx4; + r0highx2 = r0high * x2; + + h1 += sr2highx4; + r1lowx2 = r1low * x2; + + h0 += sr2lowx4; + r1highx2 = r1high * x2; + + h2 += r0lowx2; + r2lowx2 = r2low * x2; + + h3 += r0highx2; + r2highx2 = r2high * x2; + + h4 += r1lowx2; + sr3lowx2 = sr3low * x2; + + h5 += r1highx2; + sr3highx2 = sr3high * x2; + alpha0 = *(double *) (constants + 24); + + m += 16; + h6 += r2lowx2; + + l -= 16; + h7 += r2highx2; + + z1 = *(double *) &d1; + h0 += sr3lowx2; + + z0 = *(double *) &d0; + h1 += sr3highx2; + + z1 -= alpha32; + + z0 -= alpha0; + + h5 += z3; + + h3 += z2; + + h1 += z1; + + h0 += z0; + +if (l >= 16) goto multiplyaddatleast16bytes; + +multiplyaddatmost15bytes:; + + y7 = h7 + alpha130; + + y6 = h6 + alpha130; + + y1 = h1 + alpha32; + + y0 = h0 + alpha32; + + y7 -= alpha130; + + y6 -= alpha130; + + y1 -= alpha32; + + y0 -= alpha32; + + y5 = h5 + alpha96; + + y4 = h4 + alpha96; + + x7 = h7 - y7; + y7 *= scale; + + x6 = h6 - y6; + y6 *= scale; + + x1 = h1 - y1; + + x0 = h0 - y0; + + y5 -= alpha96; + + y4 -= alpha96; + + x1 += y7; + + x0 += y6; + + x7 += y5; + + x6 += y4; + + y3 = h3 + alpha64; + + y2 = h2 + alpha64; + + x0 += x1; + + x6 += x7; + + y3 -= alpha64; + r3low = r3low_stack; + + y2 -= alpha64; + r0low = r0low_stack; + + x5 = h5 - y5; + r3lowx0 = r3low * x0; + r3high = r3high_stack; + + x4 = h4 - y4; + r0lowx6 = r0low * x6; + r0high = r0high_stack; + + x3 = h3 - y3; + r3highx0 = r3high * x0; + sr1low = sr1low_stack; + + x2 = h2 - y2; + r0highx6 = r0high * x6; + sr1high = sr1high_stack; + + x5 += y3; + r0lowx0 = r0low * x0; + r1low = r1low_stack; + + h6 = r3lowx0 + r0lowx6; + sr1lowx6 = sr1low * x6; + r1high = r1high_stack; + + x4 += y2; + r0highx0 = r0high * x0; + sr2low = sr2low_stack; + + h7 = r3highx0 + r0highx6; + sr1highx6 = sr1high * x6; + sr2high = sr2high_stack; + + x3 += y1; + r1lowx0 = r1low * x0; + r2low = r2low_stack; + + h0 = r0lowx0 + sr1lowx6; + sr2lowx6 = sr2low * x6; + r2high = r2high_stack; + + x2 += y0; + r1highx0 = r1high * x0; + sr3low = sr3low_stack; + + h1 = r0highx0 + sr1highx6; + sr2highx6 = sr2high * x6; + sr3high = sr3high_stack; + + x4 += x5; + r2lowx0 = r2low * x0; + + h2 = r1lowx0 + sr2lowx6; + sr3lowx6 = sr3low * x6; + + x2 += x3; + r2highx0 = r2high * x0; + + h3 = r1highx0 + sr2highx6; + sr3highx6 = sr3high * x6; + + r1highx4 = r1high * x4; + + h4 = r2lowx0 + sr3lowx6; + r1lowx4 = r1low * x4; + + r0highx4 = r0high * x4; + + h5 = r2highx0 + sr3highx6; + r0lowx4 = r0low * x4; + + h7 += r1highx4; + sr3highx4 = sr3high * x4; + + h6 += r1lowx4; + sr3lowx4 = sr3low * x4; + + h5 += r0highx4; + sr2highx4 = sr2high * x4; + + h4 += r0lowx4; + sr2lowx4 = sr2low * x4; + + h3 += sr3highx4; + r0lowx2 = r0low * x2; + + h2 += sr3lowx4; + r0highx2 = r0high * x2; + + h1 += sr2highx4; + r1lowx2 = r1low * x2; + + h0 += sr2lowx4; + r1highx2 = r1high * x2; + + h2 += r0lowx2; + r2lowx2 = r2low * x2; + + h3 += r0highx2; + r2highx2 = r2high * x2; + + h4 += r1lowx2; + sr3lowx2 = sr3low * x2; + + h5 += r1highx2; + sr3highx2 = sr3high * x2; + + h6 += r2lowx2; + + h7 += r2highx2; + + h0 += sr3lowx2; + + h1 += sr3highx2; + +addatmost15bytes:; + +if (l == 0) goto nomorebytes; + + lbelow2 = l - 2; + + lbelow3 = l - 3; + + lbelow2 >>= 31; + lbelow4 = l - 4; + + m00 = *(uchar *) (m + 0); + lbelow3 >>= 31; + m += lbelow2; + + m01 = *(uchar *) (m + 1); + lbelow4 >>= 31; + m += lbelow3; + + m02 = *(uchar *) (m + 2); + m += lbelow4; + m0 = 2151; + + m03 = *(uchar *) (m + 3); + m0 <<= 51; + m1 = 2215; + + m0 += m00; + m01 &= ~lbelow2; + + m02 &= ~lbelow3; + m01 -= lbelow2; + + m01 <<= 8; + m03 &= ~lbelow4; + + m0 += m01; + lbelow2 -= lbelow3; + + m02 += lbelow2; + lbelow3 -= lbelow4; + + m02 <<= 16; + m03 += lbelow3; + + m03 <<= 24; + m0 += m02; + + m0 += m03; + lbelow5 = l - 5; + + lbelow6 = l - 6; + lbelow7 = l - 7; + + lbelow5 >>= 31; + lbelow8 = l - 8; + + lbelow6 >>= 31; + m += lbelow5; + + m10 = *(uchar *) (m + 4); + lbelow7 >>= 31; + m += lbelow6; + + m11 = *(uchar *) (m + 5); + lbelow8 >>= 31; + m += lbelow7; + + m12 = *(uchar *) (m + 6); + m1 <<= 51; + m += lbelow8; + + m13 = *(uchar *) (m + 7); + m10 &= ~lbelow5; + lbelow4 -= lbelow5; + + m10 += lbelow4; + lbelow5 -= lbelow6; + + m11 &= ~lbelow6; + m11 += lbelow5; + + m11 <<= 8; + m1 += m10; + + m1 += m11; + m12 &= ~lbelow7; + + lbelow6 -= lbelow7; + m13 &= ~lbelow8; + + m12 += lbelow6; + lbelow7 -= lbelow8; + + m12 <<= 16; + m13 += lbelow7; + + m13 <<= 24; + m1 += m12; + + m1 += m13; + m2 = 2279; + + lbelow9 = l - 9; + m3 = 2343; + + lbelow10 = l - 10; + lbelow11 = l - 11; + + lbelow9 >>= 31; + lbelow12 = l - 12; + + lbelow10 >>= 31; + m += lbelow9; + + m20 = *(uchar *) (m + 8); + lbelow11 >>= 31; + m += lbelow10; + + m21 = *(uchar *) (m + 9); + lbelow12 >>= 31; + m += lbelow11; + + m22 = *(uchar *) (m + 10); + m2 <<= 51; + m += lbelow12; + + m23 = *(uchar *) (m + 11); + m20 &= ~lbelow9; + lbelow8 -= lbelow9; + + m20 += lbelow8; + lbelow9 -= lbelow10; + + m21 &= ~lbelow10; + m21 += lbelow9; + + m21 <<= 8; + m2 += m20; + + m2 += m21; + m22 &= ~lbelow11; + + lbelow10 -= lbelow11; + m23 &= ~lbelow12; + + m22 += lbelow10; + lbelow11 -= lbelow12; + + m22 <<= 16; + m23 += lbelow11; + + m23 <<= 24; + m2 += m22; + + m3 <<= 51; + lbelow13 = l - 13; + + lbelow13 >>= 31; + lbelow14 = l - 14; + + lbelow14 >>= 31; + m += lbelow13; + lbelow15 = l - 15; + + m30 = *(uchar *) (m + 12); + lbelow15 >>= 31; + m += lbelow14; + + m31 = *(uchar *) (m + 13); + m += lbelow15; + m2 += m23; + + m32 = *(uchar *) (m + 14); + m30 &= ~lbelow13; + lbelow12 -= lbelow13; + + m30 += lbelow12; + lbelow13 -= lbelow14; + + m3 += m30; + m31 &= ~lbelow14; + + m31 += lbelow13; + m32 &= ~lbelow15; + + m31 <<= 8; + lbelow14 -= lbelow15; + + m3 += m31; + m32 += lbelow14; + d0 = m0; + + m32 <<= 16; + m33 = lbelow15 + 1; + d1 = m1; + + m33 <<= 24; + m3 += m32; + d2 = m2; + + m3 += m33; + d3 = m3; + + alpha0 = *(double *) (constants + 24); + + z3 = *(double *) &d3; + + z2 = *(double *) &d2; + + z1 = *(double *) &d1; + + z0 = *(double *) &d0; + + z3 -= alpha96; + + z2 -= alpha64; + + z1 -= alpha32; + + z0 -= alpha0; + + h5 += z3; + + h3 += z2; + + h1 += z1; + + h0 += z0; + + y7 = h7 + alpha130; + + y6 = h6 + alpha130; + + y1 = h1 + alpha32; + + y0 = h0 + alpha32; + + y7 -= alpha130; + + y6 -= alpha130; + + y1 -= alpha32; + + y0 -= alpha32; + + y5 = h5 + alpha96; + + y4 = h4 + alpha96; + + x7 = h7 - y7; + y7 *= scale; + + x6 = h6 - y6; + y6 *= scale; + + x1 = h1 - y1; + + x0 = h0 - y0; + + y5 -= alpha96; + + y4 -= alpha96; + + x1 += y7; + + x0 += y6; + + x7 += y5; + + x6 += y4; + + y3 = h3 + alpha64; + + y2 = h2 + alpha64; + + x0 += x1; + + x6 += x7; + + y3 -= alpha64; + r3low = r3low_stack; + + y2 -= alpha64; + r0low = r0low_stack; + + x5 = h5 - y5; + r3lowx0 = r3low * x0; + r3high = r3high_stack; + + x4 = h4 - y4; + r0lowx6 = r0low * x6; + r0high = r0high_stack; + + x3 = h3 - y3; + r3highx0 = r3high * x0; + sr1low = sr1low_stack; + + x2 = h2 - y2; + r0highx6 = r0high * x6; + sr1high = sr1high_stack; + + x5 += y3; + r0lowx0 = r0low * x0; + r1low = r1low_stack; + + h6 = r3lowx0 + r0lowx6; + sr1lowx6 = sr1low * x6; + r1high = r1high_stack; + + x4 += y2; + r0highx0 = r0high * x0; + sr2low = sr2low_stack; + + h7 = r3highx0 + r0highx6; + sr1highx6 = sr1high * x6; + sr2high = sr2high_stack; + + x3 += y1; + r1lowx0 = r1low * x0; + r2low = r2low_stack; + + h0 = r0lowx0 + sr1lowx6; + sr2lowx6 = sr2low * x6; + r2high = r2high_stack; + + x2 += y0; + r1highx0 = r1high * x0; + sr3low = sr3low_stack; + + h1 = r0highx0 + sr1highx6; + sr2highx6 = sr2high * x6; + sr3high = sr3high_stack; + + x4 += x5; + r2lowx0 = r2low * x0; + + h2 = r1lowx0 + sr2lowx6; + sr3lowx6 = sr3low * x6; + + x2 += x3; + r2highx0 = r2high * x0; + + h3 = r1highx0 + sr2highx6; + sr3highx6 = sr3high * x6; + + r1highx4 = r1high * x4; + + h4 = r2lowx0 + sr3lowx6; + r1lowx4 = r1low * x4; + + r0highx4 = r0high * x4; + + h5 = r2highx0 + sr3highx6; + r0lowx4 = r0low * x4; + + h7 += r1highx4; + sr3highx4 = sr3high * x4; + + h6 += r1lowx4; + sr3lowx4 = sr3low * x4; + + h5 += r0highx4; + sr2highx4 = sr2high * x4; + + h4 += r0lowx4; + sr2lowx4 = sr2low * x4; + + h3 += sr3highx4; + r0lowx2 = r0low * x2; + + h2 += sr3lowx4; + r0highx2 = r0high * x2; + + h1 += sr2highx4; + r1lowx2 = r1low * x2; + + h0 += sr2lowx4; + r1highx2 = r1high * x2; + + h2 += r0lowx2; + r2lowx2 = r2low * x2; + + h3 += r0highx2; + r2highx2 = r2high * x2; + + h4 += r1lowx2; + sr3lowx2 = sr3low * x2; + + h5 += r1highx2; + sr3highx2 = sr3high * x2; + + h6 += r2lowx2; + + h7 += r2highx2; + + h0 += sr3lowx2; + + h1 += sr3highx2; + + +nomorebytes:; + + offset0 = *(double *) (constants + 104); + y7 = h7 + alpha130; + + offset1 = *(double *) (constants + 112); + y0 = h0 + alpha32; + + offset2 = *(double *) (constants + 120); + y1 = h1 + alpha32; + + offset3 = *(double *) (constants + 128); + y2 = h2 + alpha64; + + y7 -= alpha130; + + y3 = h3 + alpha64; + + y4 = h4 + alpha96; + + y5 = h5 + alpha96; + + x7 = h7 - y7; + y7 *= scale; + + y0 -= alpha32; + + y1 -= alpha32; + + y2 -= alpha64; + + h6 += x7; + + y3 -= alpha64; + + y4 -= alpha96; + + y5 -= alpha96; + + y6 = h6 + alpha130; + + x0 = h0 - y0; + + x1 = h1 - y1; + + x2 = h2 - y2; + + y6 -= alpha130; + + x0 += y7; + + x3 = h3 - y3; + + x4 = h4 - y4; + + x5 = h5 - y5; + + x6 = h6 - y6; + + y6 *= scale; + + x2 += y0; + + x3 += y1; + + x4 += y2; + + x0 += y6; + + x5 += y3; + + x6 += y4; + + x2 += x3; + + x0 += x1; + + x4 += x5; + + x6 += y5; + + x2 += offset1; + *(double *) &d1 = x2; + + x0 += offset0; + *(double *) &d0 = x0; + + x4 += offset2; + *(double *) &d2 = x4; + + x6 += offset3; + *(double *) &d3 = x6; + + + + + f0 = d0; + + f1 = d1; + bits32 = -1; + + f2 = d2; + bits32 >>= 32; + + f3 = d3; + f = f0 >> 32; + + f0 &= bits32; + f &= 255; + + f1 += f; + g0 = f0 + 5; + + g = g0 >> 32; + g0 &= bits32; + + f = f1 >> 32; + f1 &= bits32; + + f &= 255; + g1 = f1 + g; + + g = g1 >> 32; + f2 += f; + + f = f2 >> 32; + g1 &= bits32; + + f2 &= bits32; + f &= 255; + + f3 += f; + g2 = f2 + g; + + g = g2 >> 32; + g2 &= bits32; + + f4 = f3 >> 32; + f3 &= bits32; + + f4 &= 255; + g3 = f3 + g; + + g = g3 >> 32; + g3 &= bits32; + + g4 = f4 + g; + + g4 = g4 - 4; + s00 = *(uchar *) (s + 0); + + f = (int64) g4 >> 63; + s01 = *(uchar *) (s + 1); + + f0 &= f; + g0 &= ~f; + s02 = *(uchar *) (s + 2); + + f1 &= f; + f0 |= g0; + s03 = *(uchar *) (s + 3); + + g1 &= ~f; + f2 &= f; + s10 = *(uchar *) (s + 4); + + f3 &= f; + g2 &= ~f; + s11 = *(uchar *) (s + 5); + + g3 &= ~f; + f1 |= g1; + s12 = *(uchar *) (s + 6); + + f2 |= g2; + f3 |= g3; + s13 = *(uchar *) (s + 7); + + s01 <<= 8; + f0 += s00; + s20 = *(uchar *) (s + 8); + + s02 <<= 16; + f0 += s01; + s21 = *(uchar *) (s + 9); + + s03 <<= 24; + f0 += s02; + s22 = *(uchar *) (s + 10); + + s11 <<= 8; + f1 += s10; + s23 = *(uchar *) (s + 11); + + s12 <<= 16; + f1 += s11; + s30 = *(uchar *) (s + 12); + + s13 <<= 24; + f1 += s12; + s31 = *(uchar *) (s + 13); + + f0 += s03; + f1 += s13; + s32 = *(uchar *) (s + 14); + + s21 <<= 8; + f2 += s20; + s33 = *(uchar *) (s + 15); + + s22 <<= 16; + f2 += s21; + + s23 <<= 24; + f2 += s22; + + s31 <<= 8; + f3 += s30; + + s32 <<= 16; + f3 += s31; + + s33 <<= 24; + f3 += s32; + + f2 += s23; + f3 += s33; + + *(uchar *) (out + 0) = f0; + f0 >>= 8; + *(uchar *) (out + 1) = f0; + f0 >>= 8; + *(uchar *) (out + 2) = f0; + f0 >>= 8; + *(uchar *) (out + 3) = f0; + f0 >>= 8; + f1 += f0; + + *(uchar *) (out + 4) = f1; + f1 >>= 8; + *(uchar *) (out + 5) = f1; + f1 >>= 8; + *(uchar *) (out + 6) = f1; + f1 >>= 8; + *(uchar *) (out + 7) = f1; + f1 >>= 8; + f2 += f1; + + *(uchar *) (out + 8) = f2; + f2 >>= 8; + *(uchar *) (out + 9) = f2; + f2 >>= 8; + *(uchar *) (out + 10) = f2; + f2 >>= 8; + *(uchar *) (out + 11) = f2; + f2 >>= 8; + f3 += f2; + + *(uchar *) (out + 12) = f3; + f3 >>= 8; + *(uchar *) (out + 13) = f3; + f3 >>= 8; + *(uchar *) (out + 14) = f3; + f3 >>= 8; + *(uchar *) (out + 15) = f3; + + return 0; +} diff --git a/nacl/crypto_onetimeauth/poly1305/53/verify.c b/nacl/crypto_onetimeauth/poly1305/53/verify.c new file mode 100644 index 00000000..c7e063f1 --- /dev/null +++ b/nacl/crypto_onetimeauth/poly1305/53/verify.c @@ -0,0 +1,9 @@ +#include "crypto_verify_16.h" +#include "crypto_onetimeauth.h" + +int crypto_onetimeauth_verify(const unsigned char *h,const unsigned char *in,unsigned long long inlen,const unsigned char *k) +{ + unsigned char correct[16]; + crypto_onetimeauth(correct,in,inlen,k); + return crypto_verify_16(h,correct); +} diff --git a/nacl/crypto_onetimeauth/poly1305/amd64/api.h b/nacl/crypto_onetimeauth/poly1305/amd64/api.h new file mode 100644 index 00000000..acc133ed --- /dev/null +++ b/nacl/crypto_onetimeauth/poly1305/amd64/api.h @@ -0,0 +1,2 @@ +#define CRYPTO_BYTES 16 +#define CRYPTO_KEYBYTES 32 diff --git a/nacl/crypto_onetimeauth/poly1305/amd64/auth.s b/nacl/crypto_onetimeauth/poly1305/amd64/auth.s new file mode 100644 index 00000000..5212a3e7 --- /dev/null +++ b/nacl/crypto_onetimeauth/poly1305/amd64/auth.s @@ -0,0 +1,2787 @@ + +# qhasm: int64 r11_caller + +# qhasm: int64 r12_caller + +# qhasm: int64 r13_caller + +# qhasm: int64 r14_caller + +# qhasm: int64 r15_caller + +# qhasm: int64 rbx_caller + +# qhasm: int64 rbp_caller + +# qhasm: caller r11_caller + +# qhasm: caller r12_caller + +# qhasm: caller r13_caller + +# qhasm: caller r14_caller + +# qhasm: caller r15_caller + +# qhasm: caller rbx_caller + +# qhasm: caller rbp_caller + +# qhasm: stack64 r11_stack + +# qhasm: stack64 r12_stack + +# qhasm: stack64 r13_stack + +# qhasm: stack64 r14_stack + +# qhasm: stack64 r15_stack + +# qhasm: stack64 rbx_stack + +# qhasm: stack64 rbp_stack + +# qhasm: int64 out + +# qhasm: stack64 out_stack + +# qhasm: int64 m + +# qhasm: int64 l + +# qhasm: int64 k + +# qhasm: stack64 k_stack + +# qhasm: int64 m0 + +# qhasm: int64 m1 + +# qhasm: int64 m2 + +# qhasm: int64 m3 + +# qhasm: float80 a0 + +# qhasm: float80 a1 + +# qhasm: float80 a2 + +# qhasm: float80 a3 + +# qhasm: float80 h0 + +# qhasm: float80 h1 + +# qhasm: float80 h2 + +# qhasm: float80 h3 + +# qhasm: float80 x0 + +# qhasm: float80 x1 + +# qhasm: float80 x2 + +# qhasm: float80 x3 + +# qhasm: float80 y0 + +# qhasm: float80 y1 + +# qhasm: float80 y2 + +# qhasm: float80 y3 + +# qhasm: float80 r0x0 + +# qhasm: float80 r1x0 + +# qhasm: float80 r2x0 + +# qhasm: float80 r3x0 + +# qhasm: float80 r0x1 + +# qhasm: float80 r1x1 + +# qhasm: float80 r2x1 + +# qhasm: float80 sr3x1 + +# qhasm: float80 r0x2 + +# qhasm: float80 r1x2 + +# qhasm: float80 sr2x2 + +# qhasm: float80 sr3x2 + +# qhasm: float80 r0x3 + +# qhasm: float80 sr1x3 + +# qhasm: float80 sr2x3 + +# qhasm: float80 sr3x3 + +# qhasm: stack64 d0 + +# qhasm: stack64 d1 + +# qhasm: stack64 d2 + +# qhasm: stack64 d3 + +# qhasm: stack64 r0 + +# qhasm: stack64 r1 + +# qhasm: stack64 r2 + +# qhasm: stack64 r3 + +# qhasm: stack64 sr1 + +# qhasm: stack64 sr2 + +# qhasm: stack64 sr3 + +# qhasm: enter crypto_onetimeauth_poly1305_amd64 +.text +.p2align 5 +.globl _crypto_onetimeauth_poly1305_amd64 +.globl crypto_onetimeauth_poly1305_amd64 +_crypto_onetimeauth_poly1305_amd64: +crypto_onetimeauth_poly1305_amd64: +mov %rsp,%r11 +and $31,%r11 +add $192,%r11 +sub %r11,%rsp + +# qhasm: input out + +# qhasm: input m + +# qhasm: input l + +# qhasm: input k + +# qhasm: r11_stack = r11_caller +# asm 1: movq r11_stack=stack64#1 +# asm 2: movq r11_stack=32(%rsp) +movq %r11,32(%rsp) + +# qhasm: r12_stack = r12_caller +# asm 1: movq r12_stack=stack64#2 +# asm 2: movq r12_stack=40(%rsp) +movq %r12,40(%rsp) + +# qhasm: r13_stack = r13_caller +# asm 1: movq r13_stack=stack64#3 +# asm 2: movq r13_stack=48(%rsp) +movq %r13,48(%rsp) + +# qhasm: r14_stack = r14_caller +# asm 1: movq r14_stack=stack64#4 +# asm 2: movq r14_stack=56(%rsp) +movq %r14,56(%rsp) + +# qhasm: r15_stack = r15_caller +# asm 1: movq r15_stack=stack64#5 +# asm 2: movq r15_stack=64(%rsp) +movq %r15,64(%rsp) + +# qhasm: rbx_stack = rbx_caller +# asm 1: movq rbx_stack=stack64#6 +# asm 2: movq rbx_stack=72(%rsp) +movq %rbx,72(%rsp) + +# qhasm: rbp_stack = rbp_caller +# asm 1: movq rbp_stack=stack64#7 +# asm 2: movq rbp_stack=80(%rsp) +movq %rbp,80(%rsp) + +# qhasm: round *(uint16 *) &crypto_onetimeauth_poly1305_amd64_rounding +fldcw crypto_onetimeauth_poly1305_amd64_rounding(%rip) + +# qhasm: m0 = *(uint32 *) (k + 0) +# asm 1: movl 0(m0=int64#5d +# asm 2: movl 0(m0=%r8d +movl 0(%rcx),%r8d + +# qhasm: m1 = *(uint32 *) (k + 4) +# asm 1: movl 4(m1=int64#6d +# asm 2: movl 4(m1=%r9d +movl 4(%rcx),%r9d + +# qhasm: m2 = *(uint32 *) (k + 8) +# asm 1: movl 8(m2=int64#7d +# asm 2: movl 8(m2=%eax +movl 8(%rcx),%eax + +# qhasm: m3 = *(uint32 *) (k + 12) +# asm 1: movl 12(m3=int64#8d +# asm 2: movl 12(m3=%r10d +movl 12(%rcx),%r10d + +# qhasm: out_stack = out +# asm 1: movq out_stack=stack64#8 +# asm 2: movq out_stack=88(%rsp) +movq %rdi,88(%rsp) + +# qhasm: k_stack = k +# asm 1: movq k_stack=stack64#9 +# asm 2: movq k_stack=96(%rsp) +movq %rcx,96(%rsp) + +# qhasm: d0 top = 0x43300000 +# asm 1: movl $0x43300000,>d0=stack64#10 +# asm 2: movl $0x43300000,>d0=108(%rsp) +movl $0x43300000,108(%rsp) + +# qhasm: d1 top = 0x45300000 +# asm 1: movl $0x45300000,>d1=stack64#11 +# asm 2: movl $0x45300000,>d1=116(%rsp) +movl $0x45300000,116(%rsp) + +# qhasm: d2 top = 0x47300000 +# asm 1: movl $0x47300000,>d2=stack64#12 +# asm 2: movl $0x47300000,>d2=124(%rsp) +movl $0x47300000,124(%rsp) + +# qhasm: d3 top = 0x49300000 +# asm 1: movl $0x49300000,>d3=stack64#13 +# asm 2: movl $0x49300000,>d3=132(%rsp) +movl $0x49300000,132(%rsp) + +# qhasm: (uint32) m0 &= 0x0fffffff +# asm 1: and $0x0fffffff,r0=stack64#14 +# asm 2: fstpl >r0=136(%rsp) +fstpl 136(%rsp) +# comment:fpstackfrombottom:r1=stack64#15 +# asm 2: fstl >r1=144(%rsp) +fstl 144(%rsp) +# comment:fpstackfrombottom:sr1=stack64#16 +# asm 2: fstpl >sr1=152(%rsp) +fstpl 152(%rsp) +# comment:fpstackfrombottom:r2=stack64#17 +# asm 2: fstl >r2=160(%rsp) +fstl 160(%rsp) +# comment:fpstackfrombottom:sr2=stack64#18 +# asm 2: fstpl >sr2=168(%rsp) +fstpl 168(%rsp) +# comment:fpstackfrombottom:r3=stack64#19 +# asm 2: fstl >r3=176(%rsp) +fstl 176(%rsp) +# comment:fpstackfrombottom:sr3=stack64#20 +# asm 2: fstpl >sr3=184(%rsp) +fstpl 184(%rsp) +# comment:fpstackfrombottom: + +# qhasm: h3 = 0 +fldz +# comment:fpstackfrombottom:m3=int64#1d +# asm 2: movl 12(m3=%edi +movl 12(%rsi),%edi +# comment:fpstackfrombottom:m2=int64#4d +# asm 2: movl 8(m2=%ecx +movl 8(%rsi),%ecx +# comment:fpstackfrombottom:m1=int64#5d +# asm 2: movl 4(m1=%r8d +movl 4(%rsi),%r8d +# comment:fpstackfrombottom:m0=int64#6d +# asm 2: movl 0(m0=%r9d +movl 0(%rsi),%r9d +# comment:fpstackfrombottom:m3=int64#1d +# asm 2: movl 12(m3=%edi +movl 12(%rsi),%edi +# comment:fpstackfrombottom:m2=int64#4d +# asm 2: movl 8(m2=%ecx +movl 8(%rsi),%ecx +# comment:fpstackfrombottom:m1=int64#5d +# asm 2: movl 4(m1=%r8d +movl 4(%rsi),%r8d +# comment:fpstackfrombottom:m0=int64#6d +# asm 2: movl 0(m0=%r9d +movl 0(%rsi),%r9d +# comment:fpstackfrombottom:lastchunk=stack128#1 +# asm 2: movl $0,>lastchunk=0(%rsp) +movl $0,0(%rsp) +# comment:fpstackfrombottom:destination=int64#1 +# asm 2: leaq destination=%rdi +leaq 0(%rsp),%rdi +# comment:fpstackfrombottom:numbytes=int64#4 +# asm 2: mov numbytes=%rcx +mov %rdx,%rcx +# comment:fpstackfrombottom:m3=int64#1d +# asm 2: movl 12+m3=%edi +movl 12+0(%rsp),%edi +# comment:fpstackfrombottom:m2=int64#2d +# asm 2: movl 8+m2=%esi +movl 8+0(%rsp),%esi +# comment:fpstackfrombottom:m1=int64#3d +# asm 2: movl 4+m1=%edx +movl 4+0(%rsp),%edx +# comment:fpstackfrombottom:m0=int64#4d +# asm 2: movl m0=%ecx +movl 0(%rsp),%ecx +# comment:fpstackfrombottom:d0=stack64#10 +# asm 2: fstpl >d0=104(%rsp) +fstpl 104(%rsp) +# comment:fpstackfrombottom:d1=stack64#11 +# asm 2: fstpl >d1=112(%rsp) +fstpl 112(%rsp) +# comment:fpstackfrombottom:d2=stack64#12 +# asm 2: fstpl >d2=120(%rsp) +fstpl 120(%rsp) +# comment:fpstackfrombottom:d3=stack64#13 +# asm 2: fstpl >d3=128(%rsp) +fstpl 128(%rsp) +# comment:fpstackfrombottom: + +# qhasm: int64 f0 + +# qhasm: int64 f1 + +# qhasm: int64 f2 + +# qhasm: int64 f3 + +# qhasm: int64 f4 + +# qhasm: int64 g0 + +# qhasm: int64 g1 + +# qhasm: int64 g2 + +# qhasm: int64 g3 + +# qhasm: int64 f + +# qhasm: int64 notf + +# qhasm: stack64 f1_stack + +# qhasm: stack64 f2_stack + +# qhasm: stack64 f3_stack + +# qhasm: stack64 f4_stack + +# qhasm: stack64 g0_stack + +# qhasm: stack64 g1_stack + +# qhasm: stack64 g2_stack + +# qhasm: stack64 g3_stack + +# qhasm: g0 = top d0 +# asm 1: movl g0=int64#1d +# asm 2: movl g0=%edi +movl 108(%rsp),%edi + +# qhasm: (uint32) g0 &= 63 +# asm 1: and $63,g1=int64#2d +# asm 2: movl g1=%esi +movl 116(%rsp),%esi + +# qhasm: (uint32) g1 &= 63 +# asm 1: and $63,g2=int64#3d +# asm 2: movl g2=%edx +movl 124(%rsp),%edx + +# qhasm: (uint32) g2 &= 63 +# asm 1: and $63,g3=int64#4d +# asm 2: movl g3=%ecx +movl 132(%rsp),%ecx + +# qhasm: (uint32) g3 &= 63 +# asm 1: and $63,f1=int64#5d +# asm 2: movl f1=%r8d +movl 112(%rsp),%r8d + +# qhasm: carry? (uint32) f1 += g0 +# asm 1: add f1_stack=stack64#11 +# asm 2: movq f1_stack=112(%rsp) +movq %r8,112(%rsp) + +# qhasm: f2 = bottom d2 +# asm 1: movl f2=int64#1d +# asm 2: movl f2=%edi +movl 120(%rsp),%edi + +# qhasm: carry? (uint32) f2 += g1 + carry +# asm 1: adc f2_stack=stack64#12 +# asm 2: movq f2_stack=120(%rsp) +movq %rdi,120(%rsp) + +# qhasm: f3 = bottom d3 +# asm 1: movl f3=int64#1d +# asm 2: movl f3=%edi +movl 128(%rsp),%edi + +# qhasm: carry? (uint32) f3 += g2 + carry +# asm 1: adc f3_stack=stack64#13 +# asm 2: movq f3_stack=128(%rsp) +movq %rdi,128(%rsp) + +# qhasm: f4 = 0 +# asm 1: mov $0,>f4=int64#1 +# asm 2: mov $0,>f4=%rdi +mov $0,%rdi + +# qhasm: carry? (uint32) f4 += g3 + carry +# asm 1: adc f4_stack=stack64#14 +# asm 2: movq f4_stack=136(%rsp) +movq %rdi,136(%rsp) + +# qhasm: g0 = 5 +# asm 1: mov $5,>g0=int64#1 +# asm 2: mov $5,>g0=%rdi +mov $5,%rdi + +# qhasm: f0 = bottom d0 +# asm 1: movl f0=int64#2d +# asm 2: movl f0=%esi +movl 104(%rsp),%esi + +# qhasm: carry? (uint32) g0 += f0 +# asm 1: add g0_stack=stack64#10 +# asm 2: movq g0_stack=104(%rsp) +movq %rdi,104(%rsp) + +# qhasm: g1 = 0 +# asm 1: mov $0,>g1=int64#1 +# asm 2: mov $0,>g1=%rdi +mov $0,%rdi + +# qhasm: f1 = f1_stack +# asm 1: movq f1=int64#3 +# asm 2: movq f1=%rdx +movq 112(%rsp),%rdx + +# qhasm: carry? (uint32) g1 += f1 + carry +# asm 1: adc g1_stack=stack64#11 +# asm 2: movq g1_stack=112(%rsp) +movq %rdi,112(%rsp) + +# qhasm: g2 = 0 +# asm 1: mov $0,>g2=int64#1 +# asm 2: mov $0,>g2=%rdi +mov $0,%rdi + +# qhasm: f2 = f2_stack +# asm 1: movq f2=int64#4 +# asm 2: movq f2=%rcx +movq 120(%rsp),%rcx + +# qhasm: carry? (uint32) g2 += f2 + carry +# asm 1: adc g2_stack=stack64#12 +# asm 2: movq g2_stack=120(%rsp) +movq %rdi,120(%rsp) + +# qhasm: g3 = 0 +# asm 1: mov $0,>g3=int64#1 +# asm 2: mov $0,>g3=%rdi +mov $0,%rdi + +# qhasm: f3 = f3_stack +# asm 1: movq f3=int64#5 +# asm 2: movq f3=%r8 +movq 128(%rsp),%r8 + +# qhasm: carry? (uint32) g3 += f3 + carry +# asm 1: adc g3_stack=stack64#13 +# asm 2: movq g3_stack=128(%rsp) +movq %rdi,128(%rsp) + +# qhasm: f = 0xfffffffc +# asm 1: mov $0xfffffffc,>f=int64#1 +# asm 2: mov $0xfffffffc,>f=%rdi +mov $0xfffffffc,%rdi + +# qhasm: f4 = f4_stack +# asm 1: movq f4=int64#6 +# asm 2: movq f4=%r9 +movq 136(%rsp),%r9 + +# qhasm: carry? (uint32) f += f4 + carry +# asm 1: adc >= 16 +# asm 1: sar $16,notf=int64#6 +# asm 2: mov notf=%r9 +mov %rdi,%r9 + +# qhasm: (uint32) notf ^= 0xffffffff +# asm 1: xor $0xffffffff,g0=int64#7 +# asm 2: movq g0=%rax +movq 104(%rsp),%rax + +# qhasm: g0 &= notf +# asm 1: and g1=int64#7 +# asm 2: movq g1=%rax +movq 112(%rsp),%rax + +# qhasm: g1 &= notf +# asm 1: and g2=int64#7 +# asm 2: movq g2=%rax +movq 120(%rsp),%rax + +# qhasm: g2 &= notf +# asm 1: and g3=int64#1 +# asm 2: movq g3=%rdi +movq 128(%rsp),%rdi + +# qhasm: g3 &= notf +# asm 1: and out=int64#1 +# asm 2: movq out=%rdi +movq 88(%rsp),%rdi + +# qhasm: k = k_stack +# asm 1: movq k=int64#6 +# asm 2: movq k=%r9 +movq 96(%rsp),%r9 + +# qhasm: carry? (uint32) f0 += *(uint32 *) (k + 16) +# asm 1: addl 16(r11_caller=int64#9 +# asm 2: movq r11_caller=%r11 +movq 32(%rsp),%r11 + +# qhasm: r12_caller = r12_stack +# asm 1: movq r12_caller=int64#10 +# asm 2: movq r12_caller=%r12 +movq 40(%rsp),%r12 + +# qhasm: r13_caller = r13_stack +# asm 1: movq r13_caller=int64#11 +# asm 2: movq r13_caller=%r13 +movq 48(%rsp),%r13 + +# qhasm: r14_caller = r14_stack +# asm 1: movq r14_caller=int64#12 +# asm 2: movq r14_caller=%r14 +movq 56(%rsp),%r14 + +# qhasm: r15_caller = r15_stack +# asm 1: movq r15_caller=int64#13 +# asm 2: movq r15_caller=%r15 +movq 64(%rsp),%r15 + +# qhasm: rbx_caller = rbx_stack +# asm 1: movq rbx_caller=int64#14 +# asm 2: movq rbx_caller=%rbx +movq 72(%rsp),%rbx + +# qhasm: rbp_caller = rbp_stack +# asm 1: movq rbp_caller=int64#15 +# asm 2: movq rbp_caller=%rbp +movq 80(%rsp),%rbp + +# qhasm: leave +add %r11,%rsp +xor %rax,%rax +xor %rdx,%rdx +ret diff --git a/nacl/crypto_onetimeauth/poly1305/amd64/constants.s b/nacl/crypto_onetimeauth/poly1305/amd64/constants.s new file mode 100644 index 00000000..1bfb0be9 --- /dev/null +++ b/nacl/crypto_onetimeauth/poly1305/amd64/constants.s @@ -0,0 +1,85 @@ +# version 20080913 +# D. J. Bernstein +# Public domain. + +.data +.section .rodata +.p2align 5 + +.globl _crypto_onetimeauth_poly1305_amd64_constants +.globl crypto_onetimeauth_poly1305_amd64_constants +.globl crypto_onetimeauth_poly1305_amd64_scale +.globl crypto_onetimeauth_poly1305_amd64_two32 +.globl crypto_onetimeauth_poly1305_amd64_two64 +.globl crypto_onetimeauth_poly1305_amd64_two96 +.globl crypto_onetimeauth_poly1305_amd64_alpha32 +.globl crypto_onetimeauth_poly1305_amd64_alpha64 +.globl crypto_onetimeauth_poly1305_amd64_alpha96 +.globl crypto_onetimeauth_poly1305_amd64_alpha130 +.globl crypto_onetimeauth_poly1305_amd64_doffset0 +.globl crypto_onetimeauth_poly1305_amd64_doffset1 +.globl crypto_onetimeauth_poly1305_amd64_doffset2 +.globl crypto_onetimeauth_poly1305_amd64_doffset3 +.globl crypto_onetimeauth_poly1305_amd64_doffset3minustwo128 +.globl crypto_onetimeauth_poly1305_amd64_hoffset0 +.globl crypto_onetimeauth_poly1305_amd64_hoffset1 +.globl crypto_onetimeauth_poly1305_amd64_hoffset2 +.globl crypto_onetimeauth_poly1305_amd64_hoffset3 +.globl crypto_onetimeauth_poly1305_amd64_rounding + +_crypto_onetimeauth_poly1305_amd64_constants: +crypto_onetimeauth_poly1305_amd64_constants: +crypto_onetimeauth_poly1305_amd64_scale: +.long 0x0,0x37f40000 + +crypto_onetimeauth_poly1305_amd64_two32: +.long 0x0,0x41f00000 + +crypto_onetimeauth_poly1305_amd64_two64: +.long 0x0,0x43f00000 + +crypto_onetimeauth_poly1305_amd64_two96: +.long 0x0,0x45f00000 + +crypto_onetimeauth_poly1305_amd64_alpha32: +.long 0x0,0x45e80000 + +crypto_onetimeauth_poly1305_amd64_alpha64: +.long 0x0,0x47e80000 + +crypto_onetimeauth_poly1305_amd64_alpha96: +.long 0x0,0x49e80000 + +crypto_onetimeauth_poly1305_amd64_alpha130: +.long 0x0,0x4c080000 + +crypto_onetimeauth_poly1305_amd64_doffset0: +.long 0x0,0x43300000 + +crypto_onetimeauth_poly1305_amd64_doffset1: +.long 0x0,0x45300000 + +crypto_onetimeauth_poly1305_amd64_doffset2: +.long 0x0,0x47300000 + +crypto_onetimeauth_poly1305_amd64_doffset3: +.long 0x0,0x49300000 + +crypto_onetimeauth_poly1305_amd64_doffset3minustwo128: +.long 0x0,0x492ffffe + +crypto_onetimeauth_poly1305_amd64_hoffset0: +.long 0xfffffffb,0x43300001 + +crypto_onetimeauth_poly1305_amd64_hoffset1: +.long 0xfffffffe,0x45300001 + +crypto_onetimeauth_poly1305_amd64_hoffset2: +.long 0xfffffffe,0x47300001 + +crypto_onetimeauth_poly1305_amd64_hoffset3: +.long 0xfffffffe,0x49300003 + +crypto_onetimeauth_poly1305_amd64_rounding: +.byte 0x7f +.byte 0x13 diff --git a/nacl/crypto_onetimeauth/poly1305/amd64/verify.c b/nacl/crypto_onetimeauth/poly1305/amd64/verify.c new file mode 100644 index 00000000..c7e063f1 --- /dev/null +++ b/nacl/crypto_onetimeauth/poly1305/amd64/verify.c @@ -0,0 +1,9 @@ +#include "crypto_verify_16.h" +#include "crypto_onetimeauth.h" + +int crypto_onetimeauth_verify(const unsigned char *h,const unsigned char *in,unsigned long long inlen,const unsigned char *k) +{ + unsigned char correct[16]; + crypto_onetimeauth(correct,in,inlen,k); + return crypto_verify_16(h,correct); +} diff --git a/nacl/crypto_onetimeauth/poly1305/checksum b/nacl/crypto_onetimeauth/poly1305/checksum new file mode 100644 index 00000000..a713ea40 --- /dev/null +++ b/nacl/crypto_onetimeauth/poly1305/checksum @@ -0,0 +1 @@ +e836d5ca58cf673fca2b4910f23f3990 diff --git a/nacl/crypto_onetimeauth/poly1305/ref/api.h b/nacl/crypto_onetimeauth/poly1305/ref/api.h new file mode 100644 index 00000000..acc133ed --- /dev/null +++ b/nacl/crypto_onetimeauth/poly1305/ref/api.h @@ -0,0 +1,2 @@ +#define CRYPTO_BYTES 16 +#define CRYPTO_KEYBYTES 32 diff --git a/nacl/crypto_onetimeauth/poly1305/ref/auth.c b/nacl/crypto_onetimeauth/poly1305/ref/auth.c new file mode 100644 index 00000000..06cf115d --- /dev/null +++ b/nacl/crypto_onetimeauth/poly1305/ref/auth.c @@ -0,0 +1,104 @@ +/* +20080912 +D. J. Bernstein +Public domain. +*/ + +#include "crypto_onetimeauth.h" + +static void add(unsigned int h[17],const unsigned int c[17]) +{ + unsigned int j; + unsigned int u; + u = 0; + for (j = 0;j < 17;++j) { u += h[j] + c[j]; h[j] = u & 255; u >>= 8; } +} + +static void squeeze(unsigned int h[17]) +{ + unsigned int j; + unsigned int u; + u = 0; + for (j = 0;j < 16;++j) { u += h[j]; h[j] = u & 255; u >>= 8; } + u += h[16]; h[16] = u & 3; + u = 5 * (u >> 2); + for (j = 0;j < 16;++j) { u += h[j]; h[j] = u & 255; u >>= 8; } + u += h[16]; h[16] = u; +} + +static const unsigned int minusp[17] = { + 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 252 +} ; + +static void freeze(unsigned int h[17]) +{ + unsigned int horig[17]; + unsigned int j; + unsigned int negative; + for (j = 0;j < 17;++j) horig[j] = h[j]; + add(h,minusp); + negative = -(h[16] >> 7); + for (j = 0;j < 17;++j) h[j] ^= negative & (horig[j] ^ h[j]); +} + +static void mulmod(unsigned int h[17],const unsigned int r[17]) +{ + unsigned int hr[17]; + unsigned int i; + unsigned int j; + unsigned int u; + + for (i = 0;i < 17;++i) { + u = 0; + for (j = 0;j <= i;++j) u += h[j] * r[i - j]; + for (j = i + 1;j < 17;++j) u += 320 * h[j] * r[i + 17 - j]; + hr[i] = u; + } + for (i = 0;i < 17;++i) h[i] = hr[i]; + squeeze(h); +} + +int crypto_onetimeauth(unsigned char *out,const unsigned char *in,unsigned long long inlen,const unsigned char *k) +{ + unsigned int j; + unsigned int r[17]; + unsigned int h[17]; + unsigned int c[17]; + + r[0] = k[0]; + r[1] = k[1]; + r[2] = k[2]; + r[3] = k[3] & 15; + r[4] = k[4] & 252; + r[5] = k[5]; + r[6] = k[6]; + r[7] = k[7] & 15; + r[8] = k[8] & 252; + r[9] = k[9]; + r[10] = k[10]; + r[11] = k[11] & 15; + r[12] = k[12] & 252; + r[13] = k[13]; + r[14] = k[14]; + r[15] = k[15] & 15; + r[16] = 0; + + for (j = 0;j < 17;++j) h[j] = 0; + + while (inlen > 0) { + for (j = 0;j < 17;++j) c[j] = 0; + for (j = 0;(j < 16) && (j < inlen);++j) c[j] = in[j]; + c[j] = 1; + in += j; inlen -= j; + add(h,c); + mulmod(h,r); + } + + freeze(h); + + for (j = 0;j < 16;++j) c[j] = k[j + 16]; + c[16] = 0; + add(h,c); + for (j = 0;j < 16;++j) out[j] = h[j]; + return 0; +} diff --git a/nacl/crypto_onetimeauth/poly1305/ref/verify.c b/nacl/crypto_onetimeauth/poly1305/ref/verify.c new file mode 100644 index 00000000..c7e063f1 --- /dev/null +++ b/nacl/crypto_onetimeauth/poly1305/ref/verify.c @@ -0,0 +1,9 @@ +#include "crypto_verify_16.h" +#include "crypto_onetimeauth.h" + +int crypto_onetimeauth_verify(const unsigned char *h,const unsigned char *in,unsigned long long inlen,const unsigned char *k) +{ + unsigned char correct[16]; + crypto_onetimeauth(correct,in,inlen,k); + return crypto_verify_16(h,correct); +} diff --git a/nacl/crypto_onetimeauth/poly1305/selected b/nacl/crypto_onetimeauth/poly1305/selected new file mode 100644 index 00000000..e69de29b diff --git a/nacl/crypto_onetimeauth/poly1305/used b/nacl/crypto_onetimeauth/poly1305/used new file mode 100644 index 00000000..e69de29b diff --git a/nacl/crypto_onetimeauth/poly1305/x86/api.h b/nacl/crypto_onetimeauth/poly1305/x86/api.h new file mode 100644 index 00000000..acc133ed --- /dev/null +++ b/nacl/crypto_onetimeauth/poly1305/x86/api.h @@ -0,0 +1,2 @@ +#define CRYPTO_BYTES 16 +#define CRYPTO_KEYBYTES 32 diff --git a/nacl/crypto_onetimeauth/poly1305/x86/auth.s b/nacl/crypto_onetimeauth/poly1305/x86/auth.s new file mode 100644 index 00000000..acb8c51c --- /dev/null +++ b/nacl/crypto_onetimeauth/poly1305/x86/auth.s @@ -0,0 +1,2779 @@ + +# qhasm: stack32 arg_out + +# qhasm: stack32 arg_m + +# qhasm: stack32 arg_l + +# qhasm: stack32 arg_ltop + +# qhasm: stack32 arg_k + +# qhasm: input arg_out + +# qhasm: input arg_m + +# qhasm: input arg_l + +# qhasm: input arg_ltop + +# qhasm: input arg_k + +# qhasm: int32 eax + +# qhasm: int32 ebx + +# qhasm: int32 esi + +# qhasm: int32 edi + +# qhasm: int32 ebp + +# qhasm: caller eax + +# qhasm: caller ebx + +# qhasm: caller esi + +# qhasm: caller edi + +# qhasm: caller ebp + +# qhasm: stack32 eax_stack + +# qhasm: stack32 ebx_stack + +# qhasm: stack32 esi_stack + +# qhasm: stack32 edi_stack + +# qhasm: stack32 ebp_stack + +# qhasm: int32 out + +# qhasm: stack32 out_stack + +# qhasm: int32 k + +# qhasm: stack32 k_stack + +# qhasm: int32 m + +# qhasm: int32 l + +# qhasm: int32 m0 + +# qhasm: int32 m1 + +# qhasm: int32 m2 + +# qhasm: int32 m3 + +# qhasm: float80 a0 + +# qhasm: float80 a1 + +# qhasm: float80 a2 + +# qhasm: float80 a3 + +# qhasm: float80 h0 + +# qhasm: float80 h1 + +# qhasm: float80 h2 + +# qhasm: float80 h3 + +# qhasm: float80 x0 + +# qhasm: float80 x1 + +# qhasm: float80 x2 + +# qhasm: float80 x3 + +# qhasm: float80 y0 + +# qhasm: float80 y1 + +# qhasm: float80 y2 + +# qhasm: float80 y3 + +# qhasm: float80 r0x0 + +# qhasm: float80 r1x0 + +# qhasm: float80 r2x0 + +# qhasm: float80 r3x0 + +# qhasm: float80 r0x1 + +# qhasm: float80 r1x1 + +# qhasm: float80 r2x1 + +# qhasm: float80 sr3x1 + +# qhasm: float80 r0x2 + +# qhasm: float80 r1x2 + +# qhasm: float80 sr2x2 + +# qhasm: float80 sr3x2 + +# qhasm: float80 r0x3 + +# qhasm: float80 sr1x3 + +# qhasm: float80 sr2x3 + +# qhasm: float80 sr3x3 + +# qhasm: stack64 d0 + +# qhasm: stack64 d1 + +# qhasm: stack64 d2 + +# qhasm: stack64 d3 + +# qhasm: stack64 r0 + +# qhasm: stack64 r1 + +# qhasm: stack64 r2 + +# qhasm: stack64 r3 + +# qhasm: stack64 sr1 + +# qhasm: stack64 sr2 + +# qhasm: stack64 sr3 + +# qhasm: enter crypto_onetimeauth_poly1305_x86 stackaligned4096 crypto_onetimeauth_poly1305_x86_constants +.text +.p2align 5 +.globl _crypto_onetimeauth_poly1305_x86 +.globl crypto_onetimeauth_poly1305_x86 +_crypto_onetimeauth_poly1305_x86: +crypto_onetimeauth_poly1305_x86: +mov %esp,%eax +sub $crypto_onetimeauth_poly1305_x86_constants,%eax +and $4095,%eax +add $192,%eax +sub %eax,%esp + +# qhasm: eax_stack = eax +# asm 1: movl eax_stack=stack32#1 +# asm 2: movl eax_stack=0(%esp) +movl %eax,0(%esp) + +# qhasm: ebx_stack = ebx +# asm 1: movl ebx_stack=stack32#2 +# asm 2: movl ebx_stack=4(%esp) +movl %ebx,4(%esp) + +# qhasm: esi_stack = esi +# asm 1: movl esi_stack=stack32#3 +# asm 2: movl esi_stack=8(%esp) +movl %esi,8(%esp) + +# qhasm: edi_stack = edi +# asm 1: movl edi_stack=stack32#4 +# asm 2: movl edi_stack=12(%esp) +movl %edi,12(%esp) + +# qhasm: ebp_stack = ebp +# asm 1: movl ebp_stack=stack32#5 +# asm 2: movl ebp_stack=16(%esp) +movl %ebp,16(%esp) + +# qhasm: round *(uint16 *) &crypto_onetimeauth_poly1305_x86_rounding +fldcw crypto_onetimeauth_poly1305_x86_rounding + +# qhasm: k = arg_k +# asm 1: movl k=int32#3 +# asm 2: movl k=%edx +movl 20(%esp,%eax),%edx + +# qhasm: m0 = *(uint32 *) (k + 0) +# asm 1: movl 0(m0=int32#2 +# asm 2: movl 0(m0=%ecx +movl 0(%edx),%ecx + +# qhasm: m1 = *(uint32 *) (k + 4) +# asm 1: movl 4(m1=int32#4 +# asm 2: movl 4(m1=%ebx +movl 4(%edx),%ebx + +# qhasm: m2 = *(uint32 *) (k + 8) +# asm 1: movl 8(m2=int32#5 +# asm 2: movl 8(m2=%esi +movl 8(%edx),%esi + +# qhasm: m3 = *(uint32 *) (k + 12) +# asm 1: movl 12(m3=int32#6 +# asm 2: movl 12(m3=%edi +movl 12(%edx),%edi + +# qhasm: d0 top = 0x43300000 +# asm 1: movl $0x43300000,>d0=stack64#1 +# asm 2: movl $0x43300000,>d0=100(%esp) +movl $0x43300000,100(%esp) + +# qhasm: d1 top = 0x45300000 +# asm 1: movl $0x45300000,>d1=stack64#2 +# asm 2: movl $0x45300000,>d1=108(%esp) +movl $0x45300000,108(%esp) + +# qhasm: d2 top = 0x47300000 +# asm 1: movl $0x47300000,>d2=stack64#3 +# asm 2: movl $0x47300000,>d2=116(%esp) +movl $0x47300000,116(%esp) + +# qhasm: d3 top = 0x49300000 +# asm 1: movl $0x49300000,>d3=stack64#4 +# asm 2: movl $0x49300000,>d3=124(%esp) +movl $0x49300000,124(%esp) + +# qhasm: m0 &= 0x0fffffff +# asm 1: and $0x0fffffff,r0=stack64#5 +# asm 2: fstpl >r0=128(%esp) +fstpl 128(%esp) +# comment:fpstackfrombottom:r1=stack64#6 +# asm 2: fstl >r1=136(%esp) +fstl 136(%esp) +# comment:fpstackfrombottom:sr1=stack64#7 +# asm 2: fstpl >sr1=144(%esp) +fstpl 144(%esp) +# comment:fpstackfrombottom:r2=stack64#8 +# asm 2: fstl >r2=152(%esp) +fstl 152(%esp) +# comment:fpstackfrombottom:sr2=stack64#9 +# asm 2: fstpl >sr2=160(%esp) +fstpl 160(%esp) +# comment:fpstackfrombottom:r3=stack64#10 +# asm 2: fstl >r3=168(%esp) +fstl 168(%esp) +# comment:fpstackfrombottom:sr3=stack64#11 +# asm 2: fstpl >sr3=176(%esp) +fstpl 176(%esp) +# comment:fpstackfrombottom: + +# qhasm: out = arg_out +# asm 1: movl out=int32#4 +# asm 2: movl out=%ebx +movl 4(%esp,%eax),%ebx + +# qhasm: m = arg_m +# asm 1: movl m=int32#5 +# asm 2: movl m=%esi +movl 8(%esp,%eax),%esi + +# qhasm: l = arg_l +# asm 1: movl l=int32#2 +# asm 2: movl l=%ecx +movl 12(%esp,%eax),%ecx + +# qhasm: h3 = 0 +fldz +# comment:fpstackfrombottom:k_stack=stack32#6 +# asm 2: movl k_stack=20(%esp) +movl %edx,20(%esp) +# comment:fpstackfrombottom:out_stack=stack32#7 +# asm 2: movl out_stack=24(%esp) +movl %ebx,24(%esp) +# comment:fpstackfrombottom:m3=int32#1 +# asm 2: movl 12(m3=%eax +movl 12(%esi),%eax +# comment:fpstackfrombottom:m2=int32#3 +# asm 2: movl 8(m2=%edx +movl 8(%esi),%edx +# comment:fpstackfrombottom:m1=int32#4 +# asm 2: movl 4(m1=%ebx +movl 4(%esi),%ebx +# comment:fpstackfrombottom:m0=int32#6 +# asm 2: movl 0(m0=%edi +movl 0(%esi),%edi +# comment:fpstackfrombottom:m3=int32#1 +# asm 2: movl 12(m3=%eax +movl 12(%esi),%eax +# comment:fpstackfrombottom:m2=int32#3 +# asm 2: movl 8(m2=%edx +movl 8(%esi),%edx +# comment:fpstackfrombottom:m1=int32#4 +# asm 2: movl 4(m1=%ebx +movl 4(%esi),%ebx +# comment:fpstackfrombottom:m0=int32#6 +# asm 2: movl 0(m0=%edi +movl 0(%esi),%edi +# comment:fpstackfrombottom:lastchunk=stack128#1 +# asm 2: movl $0,>lastchunk=64(%esp) +movl $0,64(%esp) +# comment:fpstackfrombottom:destination=int32#6 +# asm 2: leal destination=%edi +leal 64(%esp),%edi +# comment:fpstackfrombottom:m3=int32#1 +# asm 2: movl 12+m3=%eax +movl 12+64(%esp),%eax +# comment:fpstackfrombottom:m2=int32#2 +# asm 2: movl 8+m2=%ecx +movl 8+64(%esp),%ecx +# comment:fpstackfrombottom:m1=int32#3 +# asm 2: movl 4+m1=%edx +movl 4+64(%esp),%edx +# comment:fpstackfrombottom:m0=int32#4 +# asm 2: movl m0=%ebx +movl 64(%esp),%ebx +# comment:fpstackfrombottom:d0=stack64#1 +# asm 2: fstpl >d0=96(%esp) +fstpl 96(%esp) +# comment:fpstackfrombottom:d1=stack64#2 +# asm 2: fstpl >d1=104(%esp) +fstpl 104(%esp) +# comment:fpstackfrombottom:d2=stack64#3 +# asm 2: fstpl >d2=112(%esp) +fstpl 112(%esp) +# comment:fpstackfrombottom:d3=stack64#4 +# asm 2: fstpl >d3=120(%esp) +fstpl 120(%esp) +# comment:fpstackfrombottom: + +# qhasm: int32 f0 + +# qhasm: int32 f1 + +# qhasm: int32 f2 + +# qhasm: int32 f3 + +# qhasm: int32 f4 + +# qhasm: int32 g0 + +# qhasm: int32 g1 + +# qhasm: int32 g2 + +# qhasm: int32 g3 + +# qhasm: int32 f + +# qhasm: int32 notf + +# qhasm: stack32 f1_stack + +# qhasm: stack32 f2_stack + +# qhasm: stack32 f3_stack + +# qhasm: stack32 f4_stack + +# qhasm: stack32 g0_stack + +# qhasm: stack32 g1_stack + +# qhasm: stack32 g2_stack + +# qhasm: stack32 g3_stack + +# qhasm: g0 = top d0 +# asm 1: movl g0=int32#1 +# asm 2: movl g0=%eax +movl 100(%esp),%eax + +# qhasm: g0 &= 63 +# asm 1: and $63,g1=int32#2 +# asm 2: movl g1=%ecx +movl 108(%esp),%ecx + +# qhasm: g1 &= 63 +# asm 1: and $63,g2=int32#3 +# asm 2: movl g2=%edx +movl 116(%esp),%edx + +# qhasm: g2 &= 63 +# asm 1: and $63,g3=int32#4 +# asm 2: movl g3=%ebx +movl 124(%esp),%ebx + +# qhasm: g3 &= 63 +# asm 1: and $63,f1=int32#5 +# asm 2: movl f1=%esi +movl 104(%esp),%esi + +# qhasm: carry? f1 += g0 +# asm 1: addl f1_stack=stack32#8 +# asm 2: movl f1_stack=28(%esp) +movl %esi,28(%esp) + +# qhasm: f2 = bottom d2 +# asm 1: movl f2=int32#1 +# asm 2: movl f2=%eax +movl 112(%esp),%eax + +# qhasm: carry? f2 += g1 + carry +# asm 1: adcl f2_stack=stack32#9 +# asm 2: movl f2_stack=32(%esp) +movl %eax,32(%esp) + +# qhasm: f3 = bottom d3 +# asm 1: movl f3=int32#1 +# asm 2: movl f3=%eax +movl 120(%esp),%eax + +# qhasm: carry? f3 += g2 + carry +# asm 1: adcl f3_stack=stack32#10 +# asm 2: movl f3_stack=36(%esp) +movl %eax,36(%esp) + +# qhasm: f4 = 0 +# asm 1: mov $0,>f4=int32#1 +# asm 2: mov $0,>f4=%eax +mov $0,%eax + +# qhasm: carry? f4 += g3 + carry +# asm 1: adcl f4_stack=stack32#11 +# asm 2: movl f4_stack=40(%esp) +movl %eax,40(%esp) + +# qhasm: g0 = 5 +# asm 1: mov $5,>g0=int32#1 +# asm 2: mov $5,>g0=%eax +mov $5,%eax + +# qhasm: f0 = bottom d0 +# asm 1: movl f0=int32#2 +# asm 2: movl f0=%ecx +movl 96(%esp),%ecx + +# qhasm: carry? g0 += f0 +# asm 1: addl g0_stack=stack32#12 +# asm 2: movl g0_stack=44(%esp) +movl %eax,44(%esp) + +# qhasm: g1 = 0 +# asm 1: mov $0,>g1=int32#1 +# asm 2: mov $0,>g1=%eax +mov $0,%eax + +# qhasm: f1 = f1_stack +# asm 1: movl f1=int32#3 +# asm 2: movl f1=%edx +movl 28(%esp),%edx + +# qhasm: carry? g1 += f1 + carry +# asm 1: adcl g1_stack=stack32#8 +# asm 2: movl g1_stack=28(%esp) +movl %eax,28(%esp) + +# qhasm: g2 = 0 +# asm 1: mov $0,>g2=int32#1 +# asm 2: mov $0,>g2=%eax +mov $0,%eax + +# qhasm: f2 = f2_stack +# asm 1: movl f2=int32#4 +# asm 2: movl f2=%ebx +movl 32(%esp),%ebx + +# qhasm: carry? g2 += f2 + carry +# asm 1: adcl g2_stack=stack32#9 +# asm 2: movl g2_stack=32(%esp) +movl %eax,32(%esp) + +# qhasm: g3 = 0 +# asm 1: mov $0,>g3=int32#1 +# asm 2: mov $0,>g3=%eax +mov $0,%eax + +# qhasm: f3 = f3_stack +# asm 1: movl f3=int32#5 +# asm 2: movl f3=%esi +movl 36(%esp),%esi + +# qhasm: carry? g3 += f3 + carry +# asm 1: adcl g3_stack=stack32#10 +# asm 2: movl g3_stack=36(%esp) +movl %eax,36(%esp) + +# qhasm: f = 0xfffffffc +# asm 1: mov $0xfffffffc,>f=int32#1 +# asm 2: mov $0xfffffffc,>f=%eax +mov $0xfffffffc,%eax + +# qhasm: f4 = f4_stack +# asm 1: movl f4=int32#6 +# asm 2: movl f4=%edi +movl 40(%esp),%edi + +# qhasm: carry? f += f4 + carry +# asm 1: adcl >= 16 +# asm 1: sar $16,notf=int32#6 +# asm 2: mov notf=%edi +mov %eax,%edi + +# qhasm: notf ^= 0xffffffff +# asm 1: xor $0xffffffff,g0=int32#7 +# asm 2: movl g0=%ebp +movl 44(%esp),%ebp + +# qhasm: g0 &= notf +# asm 1: andl g1=int32#7 +# asm 2: movl g1=%ebp +movl 28(%esp),%ebp + +# qhasm: g1 &= notf +# asm 1: andl g2=int32#7 +# asm 2: movl g2=%ebp +movl 32(%esp),%ebp + +# qhasm: g2 &= notf +# asm 1: andl g3=int32#1 +# asm 2: movl g3=%eax +movl 36(%esp),%eax + +# qhasm: g3 &= notf +# asm 1: andl k=int32#1 +# asm 2: movl k=%eax +movl 20(%esp),%eax + +# qhasm: carry? f0 += *(uint32 *) (k + 16) +# asm 1: addl 16(out=int32#1 +# asm 2: movl out=%eax +movl 24(%esp),%eax + +# qhasm: *(uint32 *) (out + 0) = f0 +# asm 1: movl eax=int32#1 +# asm 2: movl eax=%eax +movl 0(%esp),%eax + +# qhasm: ebx = ebx_stack +# asm 1: movl ebx=int32#4 +# asm 2: movl ebx=%ebx +movl 4(%esp),%ebx + +# qhasm: esi = esi_stack +# asm 1: movl esi=int32#5 +# asm 2: movl esi=%esi +movl 8(%esp),%esi + +# qhasm: edi = edi_stack +# asm 1: movl edi=int32#6 +# asm 2: movl edi=%edi +movl 12(%esp),%edi + +# qhasm: ebp = ebp_stack +# asm 1: movl ebp=int32#7 +# asm 2: movl ebp=%ebp +movl 16(%esp),%ebp + +# qhasm: leave +add %eax,%esp +xor %eax,%eax +ret diff --git a/nacl/crypto_onetimeauth/poly1305/x86/constants.s b/nacl/crypto_onetimeauth/poly1305/x86/constants.s new file mode 100644 index 00000000..ab2456c3 --- /dev/null +++ b/nacl/crypto_onetimeauth/poly1305/x86/constants.s @@ -0,0 +1,85 @@ +# version 20080912 +# D. J. Bernstein +# Public domain. + +.data +.section .rodata +.p2align 5 + +.globl _crypto_onetimeauth_poly1305_x86_constants +.globl crypto_onetimeauth_poly1305_x86_constants +.globl crypto_onetimeauth_poly1305_x86_scale +.globl crypto_onetimeauth_poly1305_x86_two32 +.globl crypto_onetimeauth_poly1305_x86_two64 +.globl crypto_onetimeauth_poly1305_x86_two96 +.globl crypto_onetimeauth_poly1305_x86_alpha32 +.globl crypto_onetimeauth_poly1305_x86_alpha64 +.globl crypto_onetimeauth_poly1305_x86_alpha96 +.globl crypto_onetimeauth_poly1305_x86_alpha130 +.globl crypto_onetimeauth_poly1305_x86_doffset0 +.globl crypto_onetimeauth_poly1305_x86_doffset1 +.globl crypto_onetimeauth_poly1305_x86_doffset2 +.globl crypto_onetimeauth_poly1305_x86_doffset3 +.globl crypto_onetimeauth_poly1305_x86_doffset3minustwo128 +.globl crypto_onetimeauth_poly1305_x86_hoffset0 +.globl crypto_onetimeauth_poly1305_x86_hoffset1 +.globl crypto_onetimeauth_poly1305_x86_hoffset2 +.globl crypto_onetimeauth_poly1305_x86_hoffset3 +.globl crypto_onetimeauth_poly1305_x86_rounding + +_crypto_onetimeauth_poly1305_x86_constants: +crypto_onetimeauth_poly1305_x86_constants: +crypto_onetimeauth_poly1305_x86_scale: +.long 0x0,0x37f40000 + +crypto_onetimeauth_poly1305_x86_two32: +.long 0x0,0x41f00000 + +crypto_onetimeauth_poly1305_x86_two64: +.long 0x0,0x43f00000 + +crypto_onetimeauth_poly1305_x86_two96: +.long 0x0,0x45f00000 + +crypto_onetimeauth_poly1305_x86_alpha32: +.long 0x0,0x45e80000 + +crypto_onetimeauth_poly1305_x86_alpha64: +.long 0x0,0x47e80000 + +crypto_onetimeauth_poly1305_x86_alpha96: +.long 0x0,0x49e80000 + +crypto_onetimeauth_poly1305_x86_alpha130: +.long 0x0,0x4c080000 + +crypto_onetimeauth_poly1305_x86_doffset0: +.long 0x0,0x43300000 + +crypto_onetimeauth_poly1305_x86_doffset1: +.long 0x0,0x45300000 + +crypto_onetimeauth_poly1305_x86_doffset2: +.long 0x0,0x47300000 + +crypto_onetimeauth_poly1305_x86_doffset3: +.long 0x0,0x49300000 + +crypto_onetimeauth_poly1305_x86_doffset3minustwo128: +.long 0x0,0x492ffffe + +crypto_onetimeauth_poly1305_x86_hoffset0: +.long 0xfffffffb,0x43300001 + +crypto_onetimeauth_poly1305_x86_hoffset1: +.long 0xfffffffe,0x45300001 + +crypto_onetimeauth_poly1305_x86_hoffset2: +.long 0xfffffffe,0x47300001 + +crypto_onetimeauth_poly1305_x86_hoffset3: +.long 0xfffffffe,0x49300003 + +crypto_onetimeauth_poly1305_x86_rounding: +.byte 0x7f +.byte 0x13 diff --git a/nacl/crypto_onetimeauth/poly1305/x86/verify.c b/nacl/crypto_onetimeauth/poly1305/x86/verify.c new file mode 100644 index 00000000..c7e063f1 --- /dev/null +++ b/nacl/crypto_onetimeauth/poly1305/x86/verify.c @@ -0,0 +1,9 @@ +#include "crypto_verify_16.h" +#include "crypto_onetimeauth.h" + +int crypto_onetimeauth_verify(const unsigned char *h,const unsigned char *in,unsigned long long inlen,const unsigned char *k) +{ + unsigned char correct[16]; + crypto_onetimeauth(correct,in,inlen,k); + return crypto_verify_16(h,correct); +} diff --git a/nacl/crypto_onetimeauth/try.c b/nacl/crypto_onetimeauth/try.c new file mode 100644 index 00000000..54f4396d --- /dev/null +++ b/nacl/crypto_onetimeauth/try.c @@ -0,0 +1,119 @@ +/* + * crypto_onetimeauth/try.c version 20090118 + * D. J. Bernstein + * Public domain. + */ + +#include "crypto_hash_sha256.h" +#include "crypto_onetimeauth.h" + +extern unsigned char *alignedcalloc(unsigned long long); + +const char *primitiveimplementation = crypto_onetimeauth_IMPLEMENTATION; + +#define MAXTEST_BYTES 10000 +#define CHECKSUM_BYTES 4096 +#define TUNE_BYTES 1536 + +static unsigned char *h; +static unsigned char *m; +static unsigned char *k; +static unsigned char *h2; +static unsigned char *m2; +static unsigned char *k2; + +void preallocate(void) +{ +} + +void allocate(void) +{ + h = alignedcalloc(crypto_onetimeauth_BYTES); + m = alignedcalloc(MAXTEST_BYTES); + k = alignedcalloc(crypto_onetimeauth_KEYBYTES); + h2 = alignedcalloc(crypto_onetimeauth_BYTES); + m2 = alignedcalloc(MAXTEST_BYTES + crypto_onetimeauth_BYTES); + k2 = alignedcalloc(crypto_onetimeauth_KEYBYTES + crypto_onetimeauth_BYTES); +} + +void predoit(void) +{ +} + +void doit(void) +{ + crypto_onetimeauth(h,m,TUNE_BYTES,k); + crypto_onetimeauth_verify(h,m,TUNE_BYTES,k); +} + +char checksum[crypto_onetimeauth_BYTES * 2 + 1]; + +const char *checksum_compute(void) +{ + long long i; + long long j; + + for (i = 0;i < CHECKSUM_BYTES;++i) { + long long mlen = i; + long long klen = crypto_onetimeauth_KEYBYTES; + long long hlen = crypto_onetimeauth_BYTES; + + for (j = -16;j < 0;++j) h[j] = random(); + for (j = -16;j < 0;++j) k[j] = random(); + for (j = -16;j < 0;++j) m[j] = random(); + for (j = hlen;j < hlen + 16;++j) h[j] = random(); + for (j = klen;j < klen + 16;++j) k[j] = random(); + for (j = mlen;j < mlen + 16;++j) m[j] = random(); + for (j = -16;j < hlen + 16;++j) h2[j] = h[j]; + for (j = -16;j < klen + 16;++j) k2[j] = k[j]; + for (j = -16;j < mlen + 16;++j) m2[j] = m[j]; + + if (crypto_onetimeauth(h,m,mlen,k) != 0) return "crypto_onetimeauth returns nonzero"; + + for (j = -16;j < klen + 16;++j) if (k[j] != k2[j]) return "crypto_onetimeauth overwrites k"; + for (j = -16;j < mlen + 16;++j) if (m[j] != m2[j]) return "crypto_onetimeauth overwrites m"; + for (j = -16;j < 0;++j) if (h[j] != h2[j]) return "crypto_onetimeauth writes before output"; + for (j = hlen;j < hlen + 16;++j) if (h[j] != h2[j]) return "crypto_onetimeauth writes after output"; + + for (j = -16;j < 0;++j) h[j] = random(); + for (j = -16;j < 0;++j) k[j] = random(); + for (j = -16;j < 0;++j) m[j] = random(); + for (j = hlen;j < hlen + 16;++j) h[j] = random(); + for (j = klen;j < klen + 16;++j) k[j] = random(); + for (j = mlen;j < mlen + 16;++j) m[j] = random(); + for (j = -16;j < hlen + 16;++j) h2[j] = h[j]; + for (j = -16;j < klen + 16;++j) k2[j] = k[j]; + for (j = -16;j < mlen + 16;++j) m2[j] = m[j]; + + if (crypto_onetimeauth(m2,m2,mlen,k) != 0) return "crypto_onetimeauth returns nonzero"; + for (j = 0;j < hlen;++j) if (m2[j] != h[j]) return "crypto_onetimeauth does not handle m overlap"; + for (j = 0;j < hlen;++j) m2[j] = m[j]; + if (crypto_onetimeauth(k2,m2,mlen,k2) != 0) return "crypto_onetimeauth returns nonzero"; + for (j = 0;j < hlen;++j) if (k2[j] != h[j]) return "crypto_onetimeauth does not handle k overlap"; + for (j = 0;j < hlen;++j) k2[j] = k[j]; + + if (crypto_onetimeauth_verify(h,m,mlen,k) != 0) return "crypto_onetimeauth_verify returns nonzero"; + + for (j = -16;j < hlen + 16;++j) if (h[j] != h2[j]) return "crypto_onetimeauth overwrites h"; + for (j = -16;j < klen + 16;++j) if (k[j] != k2[j]) return "crypto_onetimeauth overwrites k"; + for (j = -16;j < mlen + 16;++j) if (m[j] != m2[j]) return "crypto_onetimeauth overwrites m"; + + crypto_hash_sha256(h2,h,hlen); + for (j = 0;j < klen;++j) k[j] ^= h2[j % 32]; + if (crypto_onetimeauth(h,m,mlen,k) != 0) return "crypto_onetimeauth returns nonzero"; + if (crypto_onetimeauth_verify(h,m,mlen,k) != 0) return "crypto_onetimeauth_verify returns nonzero"; + + crypto_hash_sha256(h2,h,hlen); + for (j = 0;j < mlen;++j) m[j] ^= h2[j % 32]; + m[mlen] = h2[0]; + } + if (crypto_onetimeauth(h,m,CHECKSUM_BYTES,k) != 0) return "crypto_onetimeauth returns nonzero"; + if (crypto_onetimeauth_verify(h,m,CHECKSUM_BYTES,k) != 0) return "crypto_onetimeauth_verify returns nonzero"; + + for (i = 0;i < crypto_onetimeauth_BYTES;++i) { + checksum[2 * i] = "0123456789abcdef"[15 & (h[i] >> 4)]; + checksum[2 * i + 1] = "0123456789abcdef"[15 & h[i]]; + } + checksum[2 * i] = 0; + return 0; +} diff --git a/nacl/crypto_onetimeauth/wrapper-auth.cpp b/nacl/crypto_onetimeauth/wrapper-auth.cpp new file mode 100644 index 00000000..f4279607 --- /dev/null +++ b/nacl/crypto_onetimeauth/wrapper-auth.cpp @@ -0,0 +1,11 @@ +#include +using std::string; +#include "crypto_onetimeauth.h" + +string crypto_onetimeauth(const string &m,const string &k) +{ + if (k.size() != crypto_onetimeauth_KEYBYTES) throw "incorrect key length"; + unsigned char a[crypto_onetimeauth_BYTES]; + crypto_onetimeauth(a,(const unsigned char *) m.c_str(),m.size(),(const unsigned char *) k.c_str()); + return string((char *) a,crypto_onetimeauth_BYTES); +} diff --git a/nacl/crypto_onetimeauth/wrapper-verify.cpp b/nacl/crypto_onetimeauth/wrapper-verify.cpp new file mode 100644 index 00000000..248239ee --- /dev/null +++ b/nacl/crypto_onetimeauth/wrapper-verify.cpp @@ -0,0 +1,14 @@ +#include +using std::string; +#include "crypto_onetimeauth.h" + +void crypto_onetimeauth_verify(const string &a,const string &m,const string &k) +{ + if (k.size() != crypto_onetimeauth_KEYBYTES) throw "incorrect key length"; + if (a.size() != crypto_onetimeauth_BYTES) throw "incorrect authenticator length"; + if (crypto_onetimeauth_verify( + (const unsigned char *) a.c_str(), + (const unsigned char *) m.c_str(),m.size(), + (const unsigned char *) k.c_str()) == 0) return; + throw "invalid authenticator"; +} diff --git a/nacl/crypto_scalarmult/curve25519/athlon/api.h b/nacl/crypto_scalarmult/curve25519/athlon/api.h new file mode 100644 index 00000000..60339596 --- /dev/null +++ b/nacl/crypto_scalarmult/curve25519/athlon/api.h @@ -0,0 +1,2 @@ +#define CRYPTO_BYTES 32 +#define CRYPTO_SCALARBYTES 32 diff --git a/nacl/crypto_scalarmult/curve25519/athlon/base.c b/nacl/crypto_scalarmult/curve25519/athlon/base.c new file mode 100644 index 00000000..dde929ec --- /dev/null +++ b/nacl/crypto_scalarmult/curve25519/athlon/base.c @@ -0,0 +1,8 @@ +#include "crypto_scalarmult.h" + +static char basepoint[32] = {9}; + +int crypto_scalarmult_base(unsigned char *q,const unsigned char *n) +{ + return crypto_scalarmult(q,n,basepoint); +} diff --git a/nacl/crypto_scalarmult/curve25519/athlon/const.s b/nacl/crypto_scalarmult/curve25519/athlon/const.s new file mode 100644 index 00000000..9042c2fb --- /dev/null +++ b/nacl/crypto_scalarmult/curve25519/athlon/const.s @@ -0,0 +1,114 @@ +.data +.section .rodata +.p2align 5 + +.globl crypto_scalarmult_curve25519_athlon_scale +.globl crypto_scalarmult_curve25519_athlon_121665 +.globl crypto_scalarmult_curve25519_athlon_alpha26 +.globl crypto_scalarmult_curve25519_athlon_alpha51 +.globl crypto_scalarmult_curve25519_athlon_alpha77 +.globl crypto_scalarmult_curve25519_athlon_alpha102 +.globl crypto_scalarmult_curve25519_athlon_alpha128 +.globl crypto_scalarmult_curve25519_athlon_alpha153 +.globl crypto_scalarmult_curve25519_athlon_alpha179 +.globl crypto_scalarmult_curve25519_athlon_alpha204 +.globl crypto_scalarmult_curve25519_athlon_alpha230 +.globl crypto_scalarmult_curve25519_athlon_alpha255 +.globl crypto_scalarmult_curve25519_athlon_in0offset +.globl crypto_scalarmult_curve25519_athlon_in1offset +.globl crypto_scalarmult_curve25519_athlon_in2offset +.globl crypto_scalarmult_curve25519_athlon_in3offset +.globl crypto_scalarmult_curve25519_athlon_in4offset +.globl crypto_scalarmult_curve25519_athlon_in5offset +.globl crypto_scalarmult_curve25519_athlon_in6offset +.globl crypto_scalarmult_curve25519_athlon_in7offset +.globl crypto_scalarmult_curve25519_athlon_in8offset +.globl crypto_scalarmult_curve25519_athlon_in9offset +.globl crypto_scalarmult_curve25519_athlon_out0offset +.globl crypto_scalarmult_curve25519_athlon_out1offset +.globl crypto_scalarmult_curve25519_athlon_out2offset +.globl crypto_scalarmult_curve25519_athlon_out3offset +.globl crypto_scalarmult_curve25519_athlon_out4offset +.globl crypto_scalarmult_curve25519_athlon_out5offset +.globl crypto_scalarmult_curve25519_athlon_out6offset +.globl crypto_scalarmult_curve25519_athlon_out7offset +.globl crypto_scalarmult_curve25519_athlon_out8offset +.globl crypto_scalarmult_curve25519_athlon_out9offset +.globl crypto_scalarmult_curve25519_athlon_two0 +.globl crypto_scalarmult_curve25519_athlon_two1 +.globl crypto_scalarmult_curve25519_athlon_zero +.globl crypto_scalarmult_curve25519_athlon_rounding + +crypto_scalarmult_curve25519_athlon_scale: + .long 0x0,0x30430000 +crypto_scalarmult_curve25519_athlon_121665: + .long 0x0,0x40fdb410 +crypto_scalarmult_curve25519_athlon_in0offset: + .long 0x0,0x43300000 +crypto_scalarmult_curve25519_athlon_in1offset: + .long 0x0,0x45300000 +crypto_scalarmult_curve25519_athlon_in2offset: + .long 0x0,0x46b00000 +crypto_scalarmult_curve25519_athlon_in3offset: + .long 0x0,0x48300000 +crypto_scalarmult_curve25519_athlon_in4offset: + .long 0x0,0x49b00000 +crypto_scalarmult_curve25519_athlon_in5offset: + .long 0x0,0x4b300000 +crypto_scalarmult_curve25519_athlon_in6offset: + .long 0x0,0x4d300000 +crypto_scalarmult_curve25519_athlon_in7offset: + .long 0x0,0x4eb00000 +crypto_scalarmult_curve25519_athlon_in8offset: + .long 0x0,0x50300000 +crypto_scalarmult_curve25519_athlon_in9offset: + .long 0x0,0x51b00000 +crypto_scalarmult_curve25519_athlon_alpha26: + .long 0x0,0x45880000 +crypto_scalarmult_curve25519_athlon_alpha51: + .long 0x0,0x47180000 +crypto_scalarmult_curve25519_athlon_alpha77: + .long 0x0,0x48b80000 +crypto_scalarmult_curve25519_athlon_alpha102: + .long 0x0,0x4a480000 +crypto_scalarmult_curve25519_athlon_alpha128: + .long 0x0,0x4be80000 +crypto_scalarmult_curve25519_athlon_alpha153: + .long 0x0,0x4d780000 +crypto_scalarmult_curve25519_athlon_alpha179: + .long 0x0,0x4f180000 +crypto_scalarmult_curve25519_athlon_alpha204: + .long 0x0,0x50a80000 +crypto_scalarmult_curve25519_athlon_alpha230: + .long 0x0,0x52480000 +crypto_scalarmult_curve25519_athlon_alpha255: + .long 0x0,0x53d80000 +crypto_scalarmult_curve25519_athlon_two0: + .long 0x0,0x3ff00000 +crypto_scalarmult_curve25519_athlon_two1: + .long 0x0,0x40000000 +crypto_scalarmult_curve25519_athlon_zero: + .long 0x0,0x0 +crypto_scalarmult_curve25519_athlon_out0offset: + .long 0x1fffffed,0x43380000 +crypto_scalarmult_curve25519_athlon_out1offset: + .long 0xffffff8,0x44d80000 +crypto_scalarmult_curve25519_athlon_out2offset: + .long 0x1ffffff8,0x46680000 +crypto_scalarmult_curve25519_athlon_out3offset: + .long 0xffffff8,0x48080000 +crypto_scalarmult_curve25519_athlon_out4offset: + .long 0x1ffffff8,0x49980000 +crypto_scalarmult_curve25519_athlon_out5offset: + .long 0xffffff8,0x4b380000 +crypto_scalarmult_curve25519_athlon_out6offset: + .long 0x1ffffff8,0x4cc80000 +crypto_scalarmult_curve25519_athlon_out7offset: + .long 0xffffff8,0x4e680000 +crypto_scalarmult_curve25519_athlon_out8offset: + .long 0x1ffffff8,0x4ff80000 +crypto_scalarmult_curve25519_athlon_out9offset: + .long 0x1fffff8,0x51980000 +crypto_scalarmult_curve25519_athlon_rounding: + .byte 0x7f + .byte 0x13 diff --git a/nacl/crypto_scalarmult/curve25519/athlon/fromdouble.s b/nacl/crypto_scalarmult/curve25519/athlon/fromdouble.s new file mode 100644 index 00000000..221ca35f --- /dev/null +++ b/nacl/crypto_scalarmult/curve25519/athlon/fromdouble.s @@ -0,0 +1,195 @@ +.text +.p2align 5 +.globl _crypto_scalarmult_curve25519_athlon_fromdouble +.globl crypto_scalarmult_curve25519_athlon_fromdouble +_crypto_scalarmult_curve25519_athlon_fromdouble: +crypto_scalarmult_curve25519_athlon_fromdouble: +mov %esp,%eax +and $31,%eax +add $192,%eax +sub %eax,%esp +movl %ebp,0(%esp) +movl 8(%esp,%eax),%ecx +fldl 0(%ecx) +faddl crypto_scalarmult_curve25519_athlon_out0offset +fstpl 96(%esp) +fldl 8(%ecx) +faddl crypto_scalarmult_curve25519_athlon_out1offset +fstpl 104(%esp) +fldl 16(%ecx) +faddl crypto_scalarmult_curve25519_athlon_out2offset +fstpl 112(%esp) +fldl 24(%ecx) +faddl crypto_scalarmult_curve25519_athlon_out3offset +fstpl 120(%esp) +fldl 32(%ecx) +faddl crypto_scalarmult_curve25519_athlon_out4offset +fstpl 128(%esp) +fldl 40(%ecx) +faddl crypto_scalarmult_curve25519_athlon_out5offset +fstpl 136(%esp) +fldl 48(%ecx) +faddl crypto_scalarmult_curve25519_athlon_out6offset +fstpl 144(%esp) +fldl 56(%ecx) +faddl crypto_scalarmult_curve25519_athlon_out7offset +fstpl 152(%esp) +fldl 64(%ecx) +faddl crypto_scalarmult_curve25519_athlon_out8offset +fstpl 160(%esp) +fldl 72(%ecx) +faddl crypto_scalarmult_curve25519_athlon_out9offset +fstpl 168(%esp) +movl 96(%esp),%ecx +movl %ecx,4(%esp) +movl 104(%esp),%ecx +shl $26,%ecx +movl %ecx,40(%esp) +movl 104(%esp),%ecx +shr $6,%ecx +movl %ecx,8(%esp) +movl 112(%esp),%ecx +shl $19,%ecx +movl %ecx,44(%esp) +movl 112(%esp),%ecx +shr $13,%ecx +movl %ecx,12(%esp) +movl 120(%esp),%ecx +shl $13,%ecx +movl %ecx,48(%esp) +movl 120(%esp),%ecx +shr $19,%ecx +movl %ecx,16(%esp) +movl 128(%esp),%ecx +shl $6,%ecx +movl %ecx,52(%esp) +movl 128(%esp),%ecx +shr $26,%ecx +movl 136(%esp),%edx +add %edx,%ecx +movl %ecx,20(%esp) +movl 144(%esp),%ecx +shl $25,%ecx +movl %ecx,56(%esp) +movl 144(%esp),%ecx +shr $7,%ecx +movl %ecx,24(%esp) +movl 152(%esp),%ecx +shl $19,%ecx +movl %ecx,60(%esp) +movl 152(%esp),%ecx +shr $13,%ecx +movl %ecx,28(%esp) +movl 160(%esp),%ecx +shl $12,%ecx +movl %ecx,64(%esp) +movl 160(%esp),%ecx +shr $20,%ecx +movl %ecx,32(%esp) +movl 168(%esp),%ecx +shl $6,%ecx +movl %ecx,68(%esp) +movl 168(%esp),%ecx +shr $26,%ecx +movl %ecx,36(%esp) +mov $0,%ecx +movl %ecx,72(%esp) +movl 4(%esp),%ecx +addl 40(%esp),%ecx +movl %ecx,4(%esp) +movl 8(%esp),%ecx +adcl 44(%esp),%ecx +movl %ecx,8(%esp) +movl 12(%esp),%ecx +adcl 48(%esp),%ecx +movl %ecx,12(%esp) +movl 16(%esp),%ecx +adcl 52(%esp),%ecx +movl %ecx,16(%esp) +movl 20(%esp),%ecx +adcl 56(%esp),%ecx +movl %ecx,20(%esp) +movl 24(%esp),%ecx +adcl 60(%esp),%ecx +movl %ecx,24(%esp) +movl 28(%esp),%ecx +adcl 64(%esp),%ecx +movl %ecx,28(%esp) +movl 32(%esp),%ecx +adcl 68(%esp),%ecx +movl %ecx,32(%esp) +movl 36(%esp),%ecx +adcl 72(%esp),%ecx +movl %ecx,36(%esp) +movl 4(%esp),%ecx +adc $0x13,%ecx +movl %ecx,40(%esp) +movl 8(%esp),%ecx +adc $0,%ecx +movl %ecx,44(%esp) +movl 12(%esp),%ecx +adc $0,%ecx +movl %ecx,48(%esp) +movl 16(%esp),%ecx +adc $0,%ecx +movl %ecx,52(%esp) +movl 20(%esp),%ecx +adc $0,%ecx +movl %ecx,56(%esp) +movl 24(%esp),%ecx +adc $0,%ecx +movl %ecx,60(%esp) +movl 28(%esp),%ecx +adc $0,%ecx +movl %ecx,64(%esp) +movl 32(%esp),%ecx +adc $0x80000000,%ecx +movl %ecx,68(%esp) +movl 36(%esp),%ebp +adc $0xffffffff,%ebp +and $0x80000000,%ebp +sar $31,%ebp +movl 4(%esp,%eax),%ecx +movl 4(%esp),%edx +xorl 40(%esp),%edx +and %ebp,%edx +xorl 40(%esp),%edx +movl %edx,0(%ecx) +movl 8(%esp),%edx +xorl 44(%esp),%edx +and %ebp,%edx +xorl 44(%esp),%edx +movl %edx,4(%ecx) +movl 12(%esp),%edx +xorl 48(%esp),%edx +and %ebp,%edx +xorl 48(%esp),%edx +movl %edx,8(%ecx) +movl 16(%esp),%edx +xorl 52(%esp),%edx +and %ebp,%edx +xorl 52(%esp),%edx +movl %edx,12(%ecx) +movl 20(%esp),%edx +xorl 56(%esp),%edx +and %ebp,%edx +xorl 56(%esp),%edx +movl %edx,16(%ecx) +movl 24(%esp),%edx +xorl 60(%esp),%edx +and %ebp,%edx +xorl 60(%esp),%edx +movl %edx,20(%ecx) +movl 28(%esp),%edx +xorl 64(%esp),%edx +and %ebp,%edx +xorl 64(%esp),%edx +movl %edx,24(%ecx) +movl 32(%esp),%edx +xorl 68(%esp),%edx +and %ebp,%edx +xorl 68(%esp),%edx +movl %edx,28(%ecx) +movl 0(%esp),%ebp +add %eax,%esp +ret diff --git a/nacl/crypto_scalarmult/curve25519/athlon/implementors b/nacl/crypto_scalarmult/curve25519/athlon/implementors new file mode 100644 index 00000000..f6fb3c73 --- /dev/null +++ b/nacl/crypto_scalarmult/curve25519/athlon/implementors @@ -0,0 +1 @@ +Daniel J. Bernstein diff --git a/nacl/crypto_scalarmult/curve25519/athlon/init.s b/nacl/crypto_scalarmult/curve25519/athlon/init.s new file mode 100644 index 00000000..edd3c589 --- /dev/null +++ b/nacl/crypto_scalarmult/curve25519/athlon/init.s @@ -0,0 +1,13 @@ +.text +.p2align 5 +.globl _crypto_scalarmult_curve25519_athlon_init +.globl crypto_scalarmult_curve25519_athlon_init +_crypto_scalarmult_curve25519_athlon_init: +crypto_scalarmult_curve25519_athlon_init: +mov %esp,%eax +and $31,%eax +add $0,%eax +sub %eax,%esp +fldcw crypto_scalarmult_curve25519_athlon_rounding +add %eax,%esp +ret diff --git a/nacl/crypto_scalarmult/curve25519/athlon/mainloop.s b/nacl/crypto_scalarmult/curve25519/athlon/mainloop.s new file mode 100644 index 00000000..47412905 --- /dev/null +++ b/nacl/crypto_scalarmult/curve25519/athlon/mainloop.s @@ -0,0 +1,3990 @@ +.text +.p2align 5 +.globl _crypto_scalarmult_curve25519_athlon_mainloop +.globl crypto_scalarmult_curve25519_athlon_mainloop +_crypto_scalarmult_curve25519_athlon_mainloop: +crypto_scalarmult_curve25519_athlon_mainloop: +mov %esp,%eax +and $31,%eax +add $704,%eax +sub %eax,%esp +lea 256(%esp),%edx +lea 512(%esp),%ecx +fldl crypto_scalarmult_curve25519_athlon_two0 +fldl crypto_scalarmult_curve25519_athlon_zero +movl %eax,160(%ecx) +movl %ebx,164(%ecx) +movl %esi,168(%ecx) +movl %edi,172(%ecx) +movl %ebp,176(%ecx) +movl 4(%esp,%eax),%ebx +fxch %st(1) +fstl 0(%esp) +fxch %st(1) +fstl 8(%esp) +fstl 16(%esp) +fstl 24(%esp) +fstl 32(%esp) +fstl 40(%esp) +fstl 48(%esp) +fstl -120(%edx) +fstl -112(%edx) +fstl -104(%edx) +fstl -96(%edx) +fstl -88(%edx) +fstl -80(%edx) +fstl -72(%edx) +fstl -64(%edx) +fstl -56(%edx) +fstl -48(%edx) +fstl -40(%edx) +fstl -32(%edx) +fstl -24(%edx) +fxch %st(1) +fstpl 64(%edx) +fstl 72(%edx) +fstl 80(%edx) +fstl 88(%edx) +fstl 96(%edx) +fstl 104(%edx) +fstl 112(%edx) +fstl 120(%edx) +fstl -128(%ecx) +fstpl -120(%ecx) +fldl 0(%ebx) +fldl 8(%ebx) +fldl 16(%ebx) +fldl 24(%ebx) +fxch %st(3) +fstl -16(%edx) +fstpl 56(%esp) +fldl 32(%ebx) +fxch %st(2) +fstl -8(%edx) +fstpl 64(%esp) +fldl 40(%ebx) +fxch %st(1) +fstl 0(%edx) +fstpl 72(%esp) +fldl 48(%ebx) +fxch %st(3) +fstl 8(%edx) +fstpl 80(%esp) +fldl 56(%ebx) +fxch %st(2) +fstl 16(%edx) +fstpl 88(%esp) +fldl 64(%ebx) +fxch %st(1) +fstl 24(%edx) +fstpl 96(%esp) +fldl 72(%ebx) +fxch %st(3) +fstl 32(%edx) +fstpl 104(%esp) +fxch %st(1) +fstl 40(%edx) +fstpl 112(%esp) +fstl 48(%edx) +fstpl 120(%esp) +fstl 56(%edx) +fstpl -128(%edx) +movl 8(%esp,%eax),%ebx +mov $28,%edi +mov $31,%ebp +movl 28(%ebx),%esi +rol $1,%esi +._morebytes: +movl %edi,188(%ecx) +._morebits: +rol $1,%esi +movl %esi,180(%ecx) +movl %ebp,184(%ecx) +and $1,%esi +movl $0x43300000,-108(%ecx) +movl %esi,-112(%ecx) +fldl -96(%edx) +fldl 0(%esp) +fadd %st(0),%st(1) +fsubl -96(%edx) +fldl 64(%edx) +fldl -16(%edx) +fadd %st(0),%st(1) +fsubl 64(%edx) +fldl -88(%edx) +fldl 8(%esp) +fadd %st(0),%st(1) +fsubl -88(%edx) +fxch %st(5) +fstpl 0(%esp) +fxch %st(3) +fstpl -96(%edx) +fldl 72(%edx) +fldl -8(%edx) +fadd %st(0),%st(1) +fsubl 72(%edx) +fxch %st(3) +fstpl -16(%edx) +fxch %st(1) +fstpl 64(%edx) +fldl -80(%edx) +fldl 16(%esp) +fadd %st(0),%st(1) +fsubl -80(%edx) +fxch %st(4) +fstpl 8(%esp) +fxch %st(4) +fstpl -88(%edx) +fldl 80(%edx) +fldl 0(%edx) +fadd %st(0),%st(1) +fsubl 80(%edx) +fxch %st(2) +fstpl -8(%edx) +fxch %st(2) +fstpl 72(%edx) +fldl -72(%edx) +fldl 24(%esp) +fadd %st(0),%st(1) +fsubl -72(%edx) +fxch %st(5) +fstpl 16(%esp) +fxch %st(3) +fstpl -80(%edx) +fldl 88(%edx) +fldl 8(%edx) +fadd %st(0),%st(1) +fsubl 88(%edx) +fxch %st(3) +fstpl 0(%edx) +fxch %st(1) +fstpl 80(%edx) +fldl -64(%edx) +fldl 32(%esp) +fadd %st(0),%st(1) +fsubl -64(%edx) +fxch %st(4) +fstpl 24(%esp) +fxch %st(4) +fstpl -72(%edx) +fldl 96(%edx) +fldl 16(%edx) +fadd %st(0),%st(1) +fsubl 96(%edx) +fxch %st(2) +fstpl 8(%edx) +fxch %st(2) +fstpl 88(%edx) +fldl -56(%edx) +fldl 40(%esp) +fadd %st(0),%st(1) +fsubl -56(%edx) +fxch %st(5) +fstpl 32(%esp) +fxch %st(3) +fstpl -64(%edx) +fldl 104(%edx) +fldl 24(%edx) +fadd %st(0),%st(1) +fsubl 104(%edx) +fxch %st(3) +fstpl 16(%edx) +fxch %st(1) +fstpl 96(%edx) +fldl -48(%edx) +fldl 48(%esp) +fadd %st(0),%st(1) +fsubl -48(%edx) +fxch %st(4) +fstpl 40(%esp) +fxch %st(4) +fstpl -56(%edx) +fldl 112(%edx) +fldl 32(%edx) +fadd %st(0),%st(1) +fsubl 112(%edx) +fxch %st(2) +fstpl 24(%edx) +fxch %st(2) +fstpl 104(%edx) +fldl -40(%edx) +fldl -120(%edx) +fadd %st(0),%st(1) +fsubl -40(%edx) +fxch %st(5) +fstpl 48(%esp) +fxch %st(3) +fstpl -48(%edx) +fldl 120(%edx) +fldl 40(%edx) +fadd %st(0),%st(1) +fsubl 120(%edx) +fxch %st(3) +fstpl 32(%edx) +fxch %st(1) +fstpl 112(%edx) +fldl -32(%edx) +fldl -112(%edx) +fadd %st(0),%st(1) +fsubl -32(%edx) +fxch %st(4) +fstpl -120(%edx) +fxch %st(4) +fstpl -40(%edx) +fldl -128(%ecx) +fldl 48(%edx) +fadd %st(0),%st(1) +fsubl -128(%ecx) +fxch %st(2) +fstpl 40(%edx) +fxch %st(2) +fstpl 120(%edx) +fldl -24(%edx) +fldl -104(%edx) +fadd %st(0),%st(1) +fsubl -24(%edx) +fxch %st(5) +fstpl -112(%edx) +fxch %st(3) +fstpl -32(%edx) +fldl -120(%ecx) +fldl 56(%edx) +fadd %st(0),%st(1) +fsubl -120(%ecx) +fxch %st(3) +fstpl 48(%edx) +fxch %st(1) +fstpl -128(%ecx) +fldl -112(%ecx) +fsubl crypto_scalarmult_curve25519_athlon_in0offset +fldl crypto_scalarmult_curve25519_athlon_two0 +fsub %st(1),%st(0) +fxch %st(4) +fstpl -104(%edx) +fxch %st(4) +fstpl -24(%edx) +fstpl 56(%edx) +fstpl -120(%ecx) +fxch %st(1) +fstl 136(%ecx) +fldl 0(%esp) +fmul %st(2),%st(0) +fldl -16(%edx) +fmul %st(2),%st(0) +faddp %st(0),%st(1) +fldl 8(%esp) +fmul %st(3),%st(0) +fldl -8(%edx) +fmul %st(3),%st(0) +faddp %st(0),%st(1) +fldl 16(%esp) +fmul %st(4),%st(0) +fldl 0(%edx) +fmul %st(4),%st(0) +faddp %st(0),%st(1) +fldl 24(%esp) +fmul %st(5),%st(0) +fldl 8(%edx) +fmul %st(5),%st(0) +faddp %st(0),%st(1) +fxch %st(3) +fstpl -112(%ecx) +fldl 32(%esp) +fmul %st(5),%st(0) +fldl 16(%edx) +fmul %st(5),%st(0) +faddp %st(0),%st(1) +fxch %st(2) +fstpl -104(%ecx) +fldl 40(%esp) +fmul %st(5),%st(0) +fldl 24(%edx) +fmul %st(5),%st(0) +faddp %st(0),%st(1) +fxch %st(1) +fstpl -96(%ecx) +fldl 48(%esp) +fmul %st(5),%st(0) +fldl 32(%edx) +fmul %st(5),%st(0) +faddp %st(0),%st(1) +fxch %st(3) +fstpl -88(%ecx) +fldl -120(%edx) +fmul %st(5),%st(0) +fldl 40(%edx) +fmul %st(5),%st(0) +faddp %st(0),%st(1) +fxch %st(2) +fstpl -80(%ecx) +fldl -112(%edx) +fmul %st(5),%st(0) +fldl 48(%edx) +fmul %st(5),%st(0) +faddp %st(0),%st(1) +fxch %st(1) +fstpl -72(%ecx) +fldl -104(%edx) +fmul %st(5),%st(0) +fldl 56(%edx) +fmul %st(5),%st(0) +faddp %st(0),%st(1) +fxch %st(3) +fstpl -64(%ecx) +fldl -96(%edx) +fmul %st(5),%st(0) +fldl 64(%edx) +fmul %st(5),%st(0) +faddp %st(0),%st(1) +fxch %st(2) +fstpl -56(%ecx) +fldl -88(%edx) +fmul %st(5),%st(0) +fldl 72(%edx) +fmul %st(5),%st(0) +faddp %st(0),%st(1) +fxch %st(1) +fstpl -48(%ecx) +fldl -80(%edx) +fmul %st(5),%st(0) +fldl 80(%edx) +fmul %st(5),%st(0) +faddp %st(0),%st(1) +fxch %st(3) +fstpl -40(%ecx) +fldl -72(%edx) +fmul %st(5),%st(0) +fldl 88(%edx) +fmul %st(5),%st(0) +faddp %st(0),%st(1) +fxch %st(2) +fstpl -32(%ecx) +fldl -64(%edx) +fmul %st(5),%st(0) +fldl 96(%edx) +fmul %st(5),%st(0) +faddp %st(0),%st(1) +fxch %st(1) +fstpl -24(%ecx) +fldl -56(%edx) +fmul %st(5),%st(0) +fldl 104(%edx) +fmul %st(5),%st(0) +faddp %st(0),%st(1) +fxch %st(3) +fstpl -16(%ecx) +fldl -48(%edx) +fmul %st(5),%st(0) +fldl 112(%edx) +fmul %st(5),%st(0) +faddp %st(0),%st(1) +fxch %st(2) +fstpl -8(%ecx) +fldl -40(%edx) +fmul %st(5),%st(0) +fldl 120(%edx) +fmul %st(5),%st(0) +faddp %st(0),%st(1) +fxch %st(1) +fstpl 0(%ecx) +fldl -32(%edx) +fmul %st(5),%st(0) +fldl -128(%ecx) +fmul %st(5),%st(0) +faddp %st(0),%st(1) +fxch %st(3) +fstpl 8(%ecx) +fldl -24(%edx) +fmulp %st(0),%st(5) +fldl -120(%ecx) +fmulp %st(0),%st(4) +fxch %st(3) +faddp %st(0),%st(4) +fstpl 16(%ecx) +fxch %st(1) +fstpl 24(%ecx) +fstpl 32(%ecx) +fstpl 40(%ecx) +fldl -24(%edx) +fmull 56(%edx) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl -96(%edx) +fmull 48(%edx) +faddp %st(0),%st(1) +fldl -88(%edx) +fmull 40(%edx) +faddp %st(0),%st(1) +fldl -96(%edx) +fmull 56(%edx) +fldl -80(%edx) +fmull 32(%edx) +faddp %st(0),%st(2) +fldl -88(%edx) +fmull 48(%edx) +faddp %st(0),%st(1) +fldl -72(%edx) +fmull 24(%edx) +faddp %st(0),%st(2) +fldl -80(%edx) +fmull 40(%edx) +faddp %st(0),%st(1) +fldl -64(%edx) +fmull 16(%edx) +faddp %st(0),%st(2) +fldl -72(%edx) +fmull 32(%edx) +faddp %st(0),%st(1) +fldl -88(%edx) +fmull 56(%edx) +fldl -56(%edx) +fmull 8(%edx) +faddp %st(0),%st(3) +fldl -64(%edx) +fmull 24(%edx) +faddp %st(0),%st(2) +fldl -80(%edx) +fmull 48(%edx) +faddp %st(0),%st(1) +fldl -48(%edx) +fmull 0(%edx) +faddp %st(0),%st(3) +fldl -56(%edx) +fmull 16(%edx) +faddp %st(0),%st(2) +fldl -72(%edx) +fmull 40(%edx) +faddp %st(0),%st(1) +fldl -40(%edx) +fmull -8(%edx) +faddp %st(0),%st(3) +fldl -48(%edx) +fmull 8(%edx) +faddp %st(0),%st(2) +fldl -64(%edx) +fmull 32(%edx) +faddp %st(0),%st(1) +fldl -32(%edx) +fmull -16(%edx) +faddp %st(0),%st(3) +fldl -40(%edx) +fmull 0(%edx) +faddp %st(0),%st(2) +fldl -56(%edx) +fmull 24(%edx) +faddp %st(0),%st(1) +fldl -80(%edx) +fmull 56(%edx) +fldl -48(%edx) +fmull 16(%edx) +faddp %st(0),%st(2) +fldl -32(%edx) +fmull -8(%edx) +faddp %st(0),%st(3) +fldl crypto_scalarmult_curve25519_athlon_alpha230 +fadd %st(4),%st(0) +fldl -72(%edx) +fmull 48(%edx) +faddp %st(0),%st(2) +fldl -40(%edx) +fmull 8(%edx) +faddp %st(0),%st(3) +fldl -24(%edx) +fmull -16(%edx) +faddp %st(0),%st(4) +fsubl crypto_scalarmult_curve25519_athlon_alpha230 +fldl -64(%edx) +fmull 40(%edx) +faddp %st(0),%st(2) +fldl -72(%edx) +fmull 56(%edx) +fldl -32(%edx) +fmull 0(%edx) +faddp %st(0),%st(4) +fxch %st(1) +fadd %st(0),%st(4) +fldl -56(%edx) +fmull 32(%edx) +faddp %st(0),%st(3) +fldl -64(%edx) +fmull 48(%edx) +faddp %st(0),%st(2) +fsubrp %st(0),%st(5) +fldl crypto_scalarmult_curve25519_athlon_alpha255 +fadd %st(4),%st(0) +fldl -48(%edx) +fmull 24(%edx) +faddp %st(0),%st(3) +fldl -56(%edx) +fmull 40(%edx) +faddp %st(0),%st(2) +fldl -24(%edx) +fmull -8(%edx) +faddp %st(0),%st(4) +fsubl crypto_scalarmult_curve25519_athlon_alpha255 +fldl -40(%edx) +fmull 16(%edx) +faddp %st(0),%st(3) +fldl -64(%edx) +fmull 56(%edx) +fldl -48(%edx) +fmull 32(%edx) +faddp %st(0),%st(3) +fldl -32(%edx) +fmull 8(%edx) +faddp %st(0),%st(4) +fxch %st(1) +fadd %st(0),%st(4) +fsubrp %st(0),%st(5) +fxch %st(5) +fstpl 64(%ecx) +fldl -56(%edx) +fmull 48(%edx) +faddp %st(0),%st(5) +fldl -40(%edx) +fmull 24(%edx) +faddp %st(0),%st(1) +fldl -24(%edx) +fmull 0(%edx) +faddp %st(0),%st(2) +fxch %st(2) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl -48(%edx) +fmull 40(%edx) +faddp %st(0),%st(5) +fldl -32(%edx) +fmull 16(%edx) +faddp %st(0),%st(3) +fxch %st(1) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl -96(%edx) +fmull -16(%edx) +faddp %st(0),%st(2) +fxch %st(3) +fstpl 72(%ecx) +fldl -56(%edx) +fmull 56(%edx) +fldl -40(%edx) +fmull 32(%edx) +faddp %st(0),%st(5) +fldl -24(%edx) +fmull 8(%edx) +faddp %st(0),%st(3) +fldl -96(%edx) +fmull -8(%edx) +faddp %st(0),%st(4) +fldl crypto_scalarmult_curve25519_athlon_alpha26 +fadd %st(2),%st(0) +fldl -48(%edx) +fmull 48(%edx) +faddp %st(0),%st(2) +fldl -32(%edx) +fmull 24(%edx) +faddp %st(0),%st(6) +fxch %st(3) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl -88(%edx) +fmull -16(%edx) +faddp %st(0),%st(5) +fxch %st(3) +fsubl crypto_scalarmult_curve25519_athlon_alpha26 +fldl -40(%edx) +fmull 40(%edx) +faddp %st(0),%st(2) +fldl -24(%edx) +fmull 16(%edx) +faddp %st(0),%st(6) +fldl -96(%edx) +fmull 0(%edx) +faddp %st(0),%st(4) +fadd %st(0),%st(4) +fsubrp %st(0),%st(2) +fldl -48(%edx) +fmull 56(%edx) +fldl -32(%edx) +fmull 32(%edx) +faddp %st(0),%st(2) +fxch %st(5) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl -88(%edx) +fmull -8(%edx) +faddp %st(0),%st(4) +fldl crypto_scalarmult_curve25519_athlon_alpha51 +fadd %st(5),%st(0) +fldl -40(%edx) +fmull 48(%edx) +faddp %st(0),%st(7) +fldl -24(%edx) +fmull 24(%edx) +faddp %st(0),%st(3) +fldl -96(%edx) +fmull 8(%edx) +faddp %st(0),%st(2) +fldl -80(%edx) +fmull -16(%edx) +faddp %st(0),%st(5) +fsubl crypto_scalarmult_curve25519_athlon_alpha51 +fxch %st(3) +fstpl 48(%ecx) +fldl -32(%edx) +fmull 40(%edx) +faddp %st(0),%st(6) +fxch %st(1) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl -88(%edx) +fmull 0(%edx) +faddp %st(0),%st(2) +fxch %st(2) +fadd %st(0),%st(3) +fsubrp %st(0),%st(4) +fldl -40(%edx) +fmull 56(%edx) +fldl -24(%edx) +fmull 32(%edx) +faddp %st(0),%st(6) +fldl -96(%edx) +fmull 16(%edx) +faddp %st(0),%st(3) +fldl -80(%edx) +fmull -8(%edx) +faddp %st(0),%st(2) +fldl crypto_scalarmult_curve25519_athlon_alpha77 +fadd %st(4),%st(0) +fldl -32(%edx) +fmull 48(%edx) +faddp %st(0),%st(2) +fxch %st(6) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl -88(%edx) +fmull 8(%edx) +faddp %st(0),%st(4) +fldl -72(%edx) +fmull -16(%edx) +faddp %st(0),%st(3) +fxch %st(6) +fsubl crypto_scalarmult_curve25519_athlon_alpha77 +fxch %st(5) +fstpl 56(%ecx) +fldl -24(%edx) +fmull 40(%edx) +faddp %st(0),%st(1) +fldl -96(%edx) +fmull 24(%edx) +faddp %st(0),%st(6) +fldl -80(%edx) +fmull 0(%edx) +faddp %st(0),%st(3) +fxch %st(4) +fadd %st(0),%st(1) +fsubrp %st(0),%st(3) +fldl -32(%edx) +fmull 56(%edx) +fxch %st(4) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl -88(%edx) +fmull 16(%edx) +faddp %st(0),%st(6) +fldl -72(%edx) +fmull -8(%edx) +faddp %st(0),%st(3) +fldl crypto_scalarmult_curve25519_athlon_alpha102 +fadd %st(2),%st(0) +fldl -24(%edx) +fmull 48(%edx) +faddp %st(0),%st(6) +fldl -96(%edx) +fmull 32(%edx) +faddp %st(0),%st(2) +fldl -80(%edx) +fmull 8(%edx) +faddp %st(0),%st(7) +fldl -64(%edx) +fmull -16(%edx) +faddp %st(0),%st(4) +fsubl crypto_scalarmult_curve25519_athlon_alpha102 +fxch %st(4) +fstpl -24(%edx) +fxch %st(4) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl -88(%edx) +fmull 24(%edx) +faddp %st(0),%st(5) +fldl -72(%edx) +fmull 0(%edx) +faddp %st(0),%st(6) +fxch %st(3) +fadd %st(0),%st(2) +fsubrp %st(0),%st(1) +fldl -96(%edx) +fmull 40(%edx) +faddp %st(0),%st(3) +fldl -80(%edx) +fmull 16(%edx) +faddp %st(0),%st(4) +fldl -64(%edx) +fmull -8(%edx) +faddp %st(0),%st(5) +fldl crypto_scalarmult_curve25519_athlon_alpha128 +fadd %st(2),%st(0) +fldl -88(%edx) +fmull 32(%edx) +faddp %st(0),%st(4) +fldl -72(%edx) +fmull 8(%edx) +faddp %st(0),%st(5) +fldl -56(%edx) +fmull -16(%edx) +faddp %st(0),%st(6) +fsubl crypto_scalarmult_curve25519_athlon_alpha128 +fxch %st(1) +fstpl -96(%edx) +fldl -80(%edx) +fmull 24(%edx) +faddp %st(0),%st(3) +fldl -64(%edx) +fmull 0(%edx) +faddp %st(0),%st(4) +fadd %st(0),%st(4) +fsubrp %st(0),%st(1) +fstpl -88(%edx) +fldl -72(%edx) +fmull 16(%edx) +faddp %st(0),%st(1) +fldl -56(%edx) +fmull -8(%edx) +faddp %st(0),%st(2) +fldl crypto_scalarmult_curve25519_athlon_alpha153 +fadd %st(3),%st(0) +fldl -64(%edx) +fmull 8(%edx) +faddp %st(0),%st(2) +fldl -48(%edx) +fmull -16(%edx) +faddp %st(0),%st(3) +fsubl crypto_scalarmult_curve25519_athlon_alpha153 +fldl -56(%edx) +fmull 0(%edx) +faddp %st(0),%st(2) +fadd %st(0),%st(2) +fsubrp %st(0),%st(3) +fxch %st(2) +fstpl -80(%edx) +fldl -48(%edx) +fmull -8(%edx) +faddp %st(0),%st(2) +fldl crypto_scalarmult_curve25519_athlon_alpha179 +fadd %st(1),%st(0) +fldl -40(%edx) +fmull -16(%edx) +faddp %st(0),%st(3) +fsubl crypto_scalarmult_curve25519_athlon_alpha179 +fldl 64(%ecx) +fldl 72(%ecx) +fxch %st(2) +fadd %st(0),%st(4) +fsubrp %st(0),%st(3) +fldl crypto_scalarmult_curve25519_athlon_alpha204 +fadd %st(4),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha204 +fadd %st(0),%st(1) +fsubrp %st(0),%st(4) +fldl crypto_scalarmult_curve25519_athlon_alpha230 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha230 +fsubr %st(0),%st(1) +faddp %st(0),%st(2) +fxch %st(2) +fstpl -72(%edx) +fxch %st(2) +fstpl -64(%edx) +fstpl -56(%edx) +fstpl -48(%edx) +fldl -104(%edx) +fmull -120(%ecx) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 0(%esp) +fmull -128(%ecx) +faddp %st(0),%st(1) +fldl 8(%esp) +fmull 120(%edx) +faddp %st(0),%st(1) +fldl 0(%esp) +fmull -120(%ecx) +fldl 16(%esp) +fmull 112(%edx) +faddp %st(0),%st(2) +fldl 8(%esp) +fmull -128(%ecx) +faddp %st(0),%st(1) +fldl 24(%esp) +fmull 104(%edx) +faddp %st(0),%st(2) +fldl 16(%esp) +fmull 120(%edx) +faddp %st(0),%st(1) +fldl 32(%esp) +fmull 96(%edx) +faddp %st(0),%st(2) +fldl 24(%esp) +fmull 112(%edx) +faddp %st(0),%st(1) +fldl 8(%esp) +fmull -120(%ecx) +fldl 40(%esp) +fmull 88(%edx) +faddp %st(0),%st(3) +fldl 32(%esp) +fmull 104(%edx) +faddp %st(0),%st(2) +fldl 16(%esp) +fmull -128(%ecx) +faddp %st(0),%st(1) +fldl 48(%esp) +fmull 80(%edx) +faddp %st(0),%st(3) +fldl 40(%esp) +fmull 96(%edx) +faddp %st(0),%st(2) +fldl 24(%esp) +fmull 120(%edx) +faddp %st(0),%st(1) +fldl -120(%edx) +fmull 72(%edx) +faddp %st(0),%st(3) +fldl 48(%esp) +fmull 88(%edx) +faddp %st(0),%st(2) +fldl 32(%esp) +fmull 112(%edx) +faddp %st(0),%st(1) +fldl -112(%edx) +fmull 64(%edx) +faddp %st(0),%st(3) +fldl -120(%edx) +fmull 80(%edx) +faddp %st(0),%st(2) +fldl 40(%esp) +fmull 104(%edx) +faddp %st(0),%st(1) +fldl 16(%esp) +fmull -120(%ecx) +fldl 48(%esp) +fmull 96(%edx) +faddp %st(0),%st(2) +fldl -112(%edx) +fmull 72(%edx) +faddp %st(0),%st(3) +fldl crypto_scalarmult_curve25519_athlon_alpha230 +fadd %st(4),%st(0) +fldl 24(%esp) +fmull -128(%ecx) +faddp %st(0),%st(2) +fldl -120(%edx) +fmull 88(%edx) +faddp %st(0),%st(3) +fldl -104(%edx) +fmull 64(%edx) +faddp %st(0),%st(4) +fsubl crypto_scalarmult_curve25519_athlon_alpha230 +fldl 32(%esp) +fmull 120(%edx) +faddp %st(0),%st(2) +fldl 24(%esp) +fmull -120(%ecx) +fldl -112(%edx) +fmull 80(%edx) +faddp %st(0),%st(4) +fxch %st(1) +fadd %st(0),%st(4) +fldl 40(%esp) +fmull 112(%edx) +faddp %st(0),%st(3) +fldl 32(%esp) +fmull -128(%ecx) +faddp %st(0),%st(2) +fsubrp %st(0),%st(5) +fldl crypto_scalarmult_curve25519_athlon_alpha255 +fadd %st(4),%st(0) +fldl 48(%esp) +fmull 104(%edx) +faddp %st(0),%st(3) +fldl 40(%esp) +fmull 120(%edx) +faddp %st(0),%st(2) +fldl -104(%edx) +fmull 72(%edx) +faddp %st(0),%st(4) +fsubl crypto_scalarmult_curve25519_athlon_alpha255 +fldl -120(%edx) +fmull 96(%edx) +faddp %st(0),%st(3) +fldl 32(%esp) +fmull -120(%ecx) +fldl 48(%esp) +fmull 112(%edx) +faddp %st(0),%st(3) +fldl -112(%edx) +fmull 88(%edx) +faddp %st(0),%st(4) +fxch %st(1) +fadd %st(0),%st(4) +fsubrp %st(0),%st(5) +fxch %st(5) +fstpl 8(%edx) +fldl 40(%esp) +fmull -128(%ecx) +faddp %st(0),%st(5) +fldl -120(%edx) +fmull 104(%edx) +faddp %st(0),%st(1) +fldl -104(%edx) +fmull 80(%edx) +faddp %st(0),%st(2) +fxch %st(2) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 48(%esp) +fmull 120(%edx) +faddp %st(0),%st(5) +fldl -112(%edx) +fmull 96(%edx) +faddp %st(0),%st(3) +fxch %st(1) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 0(%esp) +fmull 64(%edx) +faddp %st(0),%st(2) +fxch %st(3) +fstpl 16(%edx) +fldl 40(%esp) +fmull -120(%ecx) +fldl -120(%edx) +fmull 112(%edx) +faddp %st(0),%st(5) +fldl -104(%edx) +fmull 88(%edx) +faddp %st(0),%st(3) +fldl 0(%esp) +fmull 72(%edx) +faddp %st(0),%st(4) +fldl crypto_scalarmult_curve25519_athlon_alpha26 +fadd %st(2),%st(0) +fldl 48(%esp) +fmull -128(%ecx) +faddp %st(0),%st(2) +fldl -112(%edx) +fmull 104(%edx) +faddp %st(0),%st(6) +fxch %st(3) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 8(%esp) +fmull 64(%edx) +faddp %st(0),%st(5) +fxch %st(3) +fsubl crypto_scalarmult_curve25519_athlon_alpha26 +fldl -120(%edx) +fmull 120(%edx) +faddp %st(0),%st(2) +fldl -104(%edx) +fmull 96(%edx) +faddp %st(0),%st(6) +fldl 0(%esp) +fmull 80(%edx) +faddp %st(0),%st(4) +fadd %st(0),%st(4) +fsubrp %st(0),%st(2) +fldl 48(%esp) +fmull -120(%ecx) +fldl -112(%edx) +fmull 112(%edx) +faddp %st(0),%st(2) +fxch %st(5) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 8(%esp) +fmull 72(%edx) +faddp %st(0),%st(4) +fldl crypto_scalarmult_curve25519_athlon_alpha51 +fadd %st(5),%st(0) +fldl -120(%edx) +fmull -128(%ecx) +faddp %st(0),%st(7) +fldl -104(%edx) +fmull 104(%edx) +faddp %st(0),%st(3) +fldl 0(%esp) +fmull 88(%edx) +faddp %st(0),%st(2) +fldl 16(%esp) +fmull 64(%edx) +faddp %st(0),%st(5) +fsubl crypto_scalarmult_curve25519_athlon_alpha51 +fxch %st(3) +fstpl -40(%edx) +fldl -112(%edx) +fmull 120(%edx) +faddp %st(0),%st(6) +fxch %st(1) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 8(%esp) +fmull 80(%edx) +faddp %st(0),%st(2) +fxch %st(2) +fadd %st(0),%st(3) +fsubrp %st(0),%st(4) +fldl -120(%edx) +fmull -120(%ecx) +fldl -104(%edx) +fmull 112(%edx) +faddp %st(0),%st(6) +fldl 0(%esp) +fmull 96(%edx) +faddp %st(0),%st(3) +fldl 16(%esp) +fmull 72(%edx) +faddp %st(0),%st(2) +fldl crypto_scalarmult_curve25519_athlon_alpha77 +fadd %st(4),%st(0) +fldl -112(%edx) +fmull -128(%ecx) +faddp %st(0),%st(2) +fxch %st(6) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 8(%esp) +fmull 88(%edx) +faddp %st(0),%st(4) +fldl 24(%esp) +fmull 64(%edx) +faddp %st(0),%st(3) +fxch %st(6) +fsubl crypto_scalarmult_curve25519_athlon_alpha77 +fxch %st(5) +fstpl -32(%edx) +fldl -104(%edx) +fmull 120(%edx) +faddp %st(0),%st(1) +fldl 0(%esp) +fmull 104(%edx) +faddp %st(0),%st(6) +fldl 16(%esp) +fmull 80(%edx) +faddp %st(0),%st(3) +fxch %st(4) +fadd %st(0),%st(1) +fsubrp %st(0),%st(3) +fldl -112(%edx) +fmull -120(%ecx) +fxch %st(4) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 8(%esp) +fmull 96(%edx) +faddp %st(0),%st(6) +fldl 24(%esp) +fmull 72(%edx) +faddp %st(0),%st(3) +fldl crypto_scalarmult_curve25519_athlon_alpha102 +fadd %st(2),%st(0) +fldl -104(%edx) +fmull -128(%ecx) +faddp %st(0),%st(6) +fldl 0(%esp) +fmull 112(%edx) +faddp %st(0),%st(2) +fldl 16(%esp) +fmull 88(%edx) +faddp %st(0),%st(7) +fldl 32(%esp) +fmull 64(%edx) +faddp %st(0),%st(4) +fsubl crypto_scalarmult_curve25519_athlon_alpha102 +fxch %st(4) +fstpl -104(%edx) +fxch %st(4) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 8(%esp) +fmull 104(%edx) +faddp %st(0),%st(5) +fldl 24(%esp) +fmull 80(%edx) +faddp %st(0),%st(6) +fxch %st(3) +fadd %st(0),%st(2) +fsubrp %st(0),%st(1) +fldl 0(%esp) +fmull 120(%edx) +faddp %st(0),%st(3) +fldl 16(%esp) +fmull 96(%edx) +faddp %st(0),%st(4) +fldl 32(%esp) +fmull 72(%edx) +faddp %st(0),%st(5) +fldl crypto_scalarmult_curve25519_athlon_alpha128 +fadd %st(2),%st(0) +fldl 8(%esp) +fmull 112(%edx) +faddp %st(0),%st(4) +fldl 24(%esp) +fmull 88(%edx) +faddp %st(0),%st(5) +fldl 40(%esp) +fmull 64(%edx) +faddp %st(0),%st(6) +fsubl crypto_scalarmult_curve25519_athlon_alpha128 +fxch %st(1) +fstpl -16(%edx) +fldl 16(%esp) +fmull 104(%edx) +faddp %st(0),%st(3) +fldl 32(%esp) +fmull 80(%edx) +faddp %st(0),%st(4) +fadd %st(0),%st(4) +fsubrp %st(0),%st(1) +fstpl -8(%edx) +fldl 24(%esp) +fmull 96(%edx) +faddp %st(0),%st(1) +fldl 40(%esp) +fmull 72(%edx) +faddp %st(0),%st(2) +fldl crypto_scalarmult_curve25519_athlon_alpha153 +fadd %st(3),%st(0) +fldl 32(%esp) +fmull 88(%edx) +faddp %st(0),%st(2) +fldl 48(%esp) +fmull 64(%edx) +faddp %st(0),%st(3) +fsubl crypto_scalarmult_curve25519_athlon_alpha153 +fldl 40(%esp) +fmull 80(%edx) +faddp %st(0),%st(2) +fadd %st(0),%st(2) +fsubrp %st(0),%st(3) +fxch %st(2) +fstpl 0(%edx) +fldl 48(%esp) +fmull 72(%edx) +faddp %st(0),%st(2) +fldl crypto_scalarmult_curve25519_athlon_alpha179 +fadd %st(1),%st(0) +fldl -120(%edx) +fmull 64(%edx) +faddp %st(0),%st(3) +fsubl crypto_scalarmult_curve25519_athlon_alpha179 +fldl 8(%edx) +fldl 16(%edx) +fxch %st(2) +fadd %st(0),%st(4) +fsubrp %st(0),%st(3) +fldl crypto_scalarmult_curve25519_athlon_alpha204 +fadd %st(4),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha204 +fadd %st(0),%st(1) +fsubrp %st(0),%st(4) +fldl crypto_scalarmult_curve25519_athlon_alpha230 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha230 +fsubr %st(0),%st(1) +faddp %st(0),%st(2) +fxch %st(2) +fstpl 8(%edx) +fxch %st(2) +fstpl 16(%edx) +fstpl 24(%edx) +fstpl 32(%edx) +fldl -40(%ecx) +fmul %st(0),%st(0) +fldl -112(%ecx) +fadd %st(0),%st(0) +fldl -104(%ecx) +fadd %st(0),%st(0) +fldl -96(%ecx) +fadd %st(0),%st(0) +fldl -56(%ecx) +fxch %st(4) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl -40(%ecx) +fmul %st(4),%st(0) +fldl -48(%ecx) +fmul %st(4),%st(0) +faddp %st(0),%st(1) +fxch %st(4) +fstl 0(%esp) +fxch %st(3) +fstl 8(%esp) +fxch %st(3) +fmull -48(%ecx) +faddp %st(0),%st(1) +fldl -64(%ecx) +fxch %st(5) +fmul %st(0),%st(3) +fxch %st(3) +faddp %st(0),%st(1) +fxch %st(2) +fadd %st(0),%st(0) +fldl -56(%ecx) +fmul %st(2),%st(0) +faddp %st(0),%st(4) +fxch %st(1) +fstl 16(%esp) +fldl -72(%ecx) +fxch %st(5) +fmul %st(0),%st(1) +fxch %st(1) +faddp %st(0),%st(3) +fadd %st(0),%st(0) +fstpl 48(%esp) +fldl -88(%ecx) +fadd %st(0),%st(0) +fstl 24(%esp) +fldl -64(%ecx) +fmul %st(1),%st(0) +faddp %st(0),%st(4) +fmul %st(4),%st(0) +faddp %st(0),%st(2) +fxch %st(3) +fadd %st(0),%st(0) +fstpl 40(%esp) +fldl -80(%ecx) +fmul %st(0),%st(0) +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha230 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha230 +fsubr %st(0),%st(1) +fldl 8(%esp) +fldl -40(%ecx) +fmul %st(0),%st(1) +fldl 16(%esp) +fmul %st(0),%st(1) +fldl -48(%ecx) +fmul %st(0),%st(1) +fxch %st(1) +faddp %st(0),%st(3) +fldl 24(%esp) +fmul %st(0),%st(1) +fxch %st(1) +faddp %st(0),%st(2) +fldl -80(%ecx) +fadd %st(0),%st(0) +fstl 32(%esp) +fmull -72(%ecx) +faddp %st(0),%st(6) +fxch %st(3) +faddp %st(0),%st(5) +fldl crypto_scalarmult_curve25519_athlon_alpha255 +fadd %st(5),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha255 +fsubr %st(0),%st(5) +fldl -56(%ecx) +fmul %st(0),%st(4) +fxch %st(4) +faddp %st(0),%st(3) +fldl 32(%esp) +fmul %st(0),%st(4) +fxch %st(4) +faddp %st(0),%st(2) +fldl -64(%ecx) +fmul %st(0),%st(4) +fxch %st(4) +faddp %st(0),%st(3) +fxch %st(3) +fmull 40(%esp) +faddp %st(0),%st(1) +fxch %st(3) +fstpl -120(%edx) +fldl -72(%ecx) +fmul %st(0),%st(0) +faddp %st(0),%st(1) +fxch %st(2) +fmull crypto_scalarmult_curve25519_athlon_scale +fxch %st(3) +fstpl -112(%edx) +faddp %st(0),%st(1) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 24(%esp) +fmull -40(%ecx) +fldl -112(%ecx) +fmul %st(0),%st(0) +faddp %st(0),%st(2) +fldl 32(%esp) +fmull -48(%ecx) +faddp %st(0),%st(1) +fldl 0(%esp) +fmull -104(%ecx) +faddp %st(0),%st(3) +fldl 40(%esp) +fmull -56(%ecx) +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha26 +fadd %st(2),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha26 +fsubr %st(0),%st(2) +faddp %st(0),%st(3) +fldl crypto_scalarmult_curve25519_athlon_alpha51 +fadd %st(3),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha51 +fsubr %st(0),%st(3) +fldl -64(%ecx) +fmul %st(0),%st(0) +faddp %st(0),%st(2) +fxch %st(1) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 0(%esp) +fmull -96(%ecx) +faddp %st(0),%st(1) +fldl -104(%ecx) +fmul %st(0),%st(0) +faddp %st(0),%st(1) +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha77 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha77 +fsubr %st(0),%st(1) +fxch %st(2) +fstpl 64(%edx) +fldl 32(%esp) +fmull -40(%ecx) +fldl 40(%esp) +fmull -48(%ecx) +faddp %st(0),%st(1) +fldl 48(%esp) +fmull -56(%ecx) +faddp %st(0),%st(1) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 0(%esp) +fmull -88(%ecx) +faddp %st(0),%st(1) +fldl 8(%esp) +fmull -96(%ecx) +faddp %st(0),%st(1) +faddp %st(0),%st(2) +fldl crypto_scalarmult_curve25519_athlon_alpha102 +fadd %st(2),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha102 +fsubr %st(0),%st(2) +fxch %st(3) +fstpl 72(%edx) +fldl 40(%esp) +fmull -40(%ecx) +fldl 48(%esp) +fmull -48(%ecx) +faddp %st(0),%st(1) +fldl -56(%ecx) +fmul %st(0),%st(0) +faddp %st(0),%st(1) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 0(%esp) +fmull -80(%ecx) +faddp %st(0),%st(1) +fldl 8(%esp) +fmull -88(%ecx) +faddp %st(0),%st(1) +fldl -96(%ecx) +fmul %st(0),%st(0) +faddp %st(0),%st(1) +faddp %st(0),%st(3) +fldl crypto_scalarmult_curve25519_athlon_alpha128 +fadd %st(3),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha128 +fsubr %st(0),%st(3) +fxch %st(1) +fstpl 80(%edx) +fldl 48(%esp) +fldl -40(%ecx) +fmul %st(0),%st(1) +fmul %st(5),%st(0) +fxch %st(5) +fmull -48(%ecx) +faddp %st(0),%st(1) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 0(%esp) +fmull -72(%ecx) +faddp %st(0),%st(1) +fldl 8(%esp) +fmull -80(%ecx) +faddp %st(0),%st(1) +fldl 16(%esp) +fmull -88(%ecx) +faddp %st(0),%st(1) +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha153 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha153 +fsubr %st(0),%st(1) +fxch %st(2) +fstpl 88(%edx) +fldl -48(%ecx) +fmul %st(0),%st(0) +faddp %st(0),%st(4) +fxch %st(3) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 0(%esp) +fmull -64(%ecx) +faddp %st(0),%st(1) +fldl 8(%esp) +fmull -72(%ecx) +faddp %st(0),%st(1) +fldl 16(%esp) +fmull -80(%ecx) +faddp %st(0),%st(1) +fldl -88(%ecx) +fmul %st(0),%st(0) +faddp %st(0),%st(1) +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha179 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha179 +fsubr %st(0),%st(1) +fldl -48(%ecx) +fadd %st(0),%st(0) +fmull -40(%ecx) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 0(%esp) +fmull -56(%ecx) +faddp %st(0),%st(1) +fldl 8(%esp) +fmull -64(%ecx) +faddp %st(0),%st(1) +fldl 16(%esp) +fmull -72(%ecx) +faddp %st(0),%st(1) +fldl 24(%esp) +fmull -80(%ecx) +faddp %st(0),%st(1) +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha204 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha204 +fsubr %st(0),%st(1) +fldl -120(%edx) +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha230 +fadd %st(1),%st(0) +fldl -112(%edx) +fxch %st(1) +fsubl crypto_scalarmult_curve25519_athlon_alpha230 +fsubr %st(0),%st(2) +faddp %st(0),%st(1) +fxch %st(4) +fstpl 96(%edx) +fxch %st(4) +fstpl 104(%edx) +fxch %st(1) +fstpl 112(%edx) +fstpl 120(%edx) +fxch %st(1) +fstpl -128(%ecx) +fstpl -120(%ecx) +fldl 40(%ecx) +fmul %st(0),%st(0) +fldl -32(%ecx) +fadd %st(0),%st(0) +fldl -24(%ecx) +fadd %st(0),%st(0) +fldl -16(%ecx) +fadd %st(0),%st(0) +fldl 24(%ecx) +fxch %st(4) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 40(%ecx) +fmul %st(4),%st(0) +fldl 32(%ecx) +fmul %st(4),%st(0) +faddp %st(0),%st(1) +fxch %st(4) +fstl 0(%esp) +fxch %st(3) +fstl 8(%esp) +fxch %st(3) +fmull 32(%ecx) +faddp %st(0),%st(1) +fldl 16(%ecx) +fxch %st(5) +fmul %st(0),%st(3) +fxch %st(3) +faddp %st(0),%st(1) +fxch %st(2) +fadd %st(0),%st(0) +fldl 24(%ecx) +fmul %st(2),%st(0) +faddp %st(0),%st(4) +fxch %st(1) +fstl 16(%esp) +fldl 8(%ecx) +fxch %st(5) +fmul %st(0),%st(1) +fxch %st(1) +faddp %st(0),%st(3) +fadd %st(0),%st(0) +fstpl 48(%esp) +fldl -8(%ecx) +fadd %st(0),%st(0) +fstl 24(%esp) +fldl 16(%ecx) +fmul %st(1),%st(0) +faddp %st(0),%st(4) +fmul %st(4),%st(0) +faddp %st(0),%st(2) +fxch %st(3) +fadd %st(0),%st(0) +fstpl 40(%esp) +fldl 0(%ecx) +fmul %st(0),%st(0) +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha230 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha230 +fsubr %st(0),%st(1) +fldl 8(%esp) +fldl 40(%ecx) +fmul %st(0),%st(1) +fldl 16(%esp) +fmul %st(0),%st(1) +fldl 32(%ecx) +fmul %st(0),%st(1) +fxch %st(1) +faddp %st(0),%st(3) +fldl 24(%esp) +fmul %st(0),%st(1) +fxch %st(1) +faddp %st(0),%st(2) +fldl 0(%ecx) +fadd %st(0),%st(0) +fstl 32(%esp) +fmull 8(%ecx) +faddp %st(0),%st(6) +fxch %st(3) +faddp %st(0),%st(5) +fldl crypto_scalarmult_curve25519_athlon_alpha255 +fadd %st(5),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha255 +fsubr %st(0),%st(5) +fldl 24(%ecx) +fmul %st(0),%st(4) +fxch %st(4) +faddp %st(0),%st(3) +fldl 32(%esp) +fmul %st(0),%st(4) +fxch %st(4) +faddp %st(0),%st(2) +fldl 16(%ecx) +fmul %st(0),%st(4) +fxch %st(4) +faddp %st(0),%st(3) +fxch %st(3) +fmull 40(%esp) +faddp %st(0),%st(1) +fxch %st(3) +fstpl -120(%edx) +fldl 8(%ecx) +fmul %st(0),%st(0) +faddp %st(0),%st(1) +fxch %st(2) +fmull crypto_scalarmult_curve25519_athlon_scale +fxch %st(3) +fstpl -112(%edx) +faddp %st(0),%st(1) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 24(%esp) +fmull 40(%ecx) +fldl -32(%ecx) +fmul %st(0),%st(0) +faddp %st(0),%st(2) +fldl 32(%esp) +fmull 32(%ecx) +faddp %st(0),%st(1) +fldl 0(%esp) +fmull -24(%ecx) +faddp %st(0),%st(3) +fldl 40(%esp) +fmull 24(%ecx) +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha26 +fadd %st(2),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha26 +fsubr %st(0),%st(2) +faddp %st(0),%st(3) +fldl crypto_scalarmult_curve25519_athlon_alpha51 +fadd %st(3),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha51 +fsubr %st(0),%st(3) +fldl 16(%ecx) +fmul %st(0),%st(0) +faddp %st(0),%st(2) +fxch %st(1) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 0(%esp) +fmull -16(%ecx) +faddp %st(0),%st(1) +fldl -24(%ecx) +fmul %st(0),%st(0) +faddp %st(0),%st(1) +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha77 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha77 +fsubr %st(0),%st(1) +fxch %st(2) +fstpl -112(%ecx) +fldl 32(%esp) +fmull 40(%ecx) +fldl 40(%esp) +fmull 32(%ecx) +faddp %st(0),%st(1) +fldl 48(%esp) +fmull 24(%ecx) +faddp %st(0),%st(1) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 0(%esp) +fmull -8(%ecx) +faddp %st(0),%st(1) +fldl 8(%esp) +fmull -16(%ecx) +faddp %st(0),%st(1) +faddp %st(0),%st(2) +fldl crypto_scalarmult_curve25519_athlon_alpha102 +fadd %st(2),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha102 +fsubr %st(0),%st(2) +fxch %st(3) +fstpl -104(%ecx) +fldl 40(%esp) +fmull 40(%ecx) +fldl 48(%esp) +fmull 32(%ecx) +faddp %st(0),%st(1) +fldl 24(%ecx) +fmul %st(0),%st(0) +faddp %st(0),%st(1) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 0(%esp) +fmull 0(%ecx) +faddp %st(0),%st(1) +fldl 8(%esp) +fmull -8(%ecx) +faddp %st(0),%st(1) +fldl -16(%ecx) +fmul %st(0),%st(0) +faddp %st(0),%st(1) +faddp %st(0),%st(3) +fldl crypto_scalarmult_curve25519_athlon_alpha128 +fadd %st(3),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha128 +fsubr %st(0),%st(3) +fxch %st(1) +fstpl -96(%ecx) +fldl 48(%esp) +fldl 40(%ecx) +fmul %st(0),%st(1) +fmul %st(5),%st(0) +fxch %st(5) +fmull 32(%ecx) +faddp %st(0),%st(1) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 0(%esp) +fmull 8(%ecx) +faddp %st(0),%st(1) +fldl 8(%esp) +fmull 0(%ecx) +faddp %st(0),%st(1) +fldl 16(%esp) +fmull -8(%ecx) +faddp %st(0),%st(1) +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha153 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha153 +fsubr %st(0),%st(1) +fxch %st(2) +fstpl -88(%ecx) +fldl 32(%ecx) +fmul %st(0),%st(0) +faddp %st(0),%st(4) +fxch %st(3) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 0(%esp) +fmull 16(%ecx) +faddp %st(0),%st(1) +fldl 8(%esp) +fmull 8(%ecx) +faddp %st(0),%st(1) +fldl 16(%esp) +fmull 0(%ecx) +faddp %st(0),%st(1) +fldl -8(%ecx) +fmul %st(0),%st(0) +faddp %st(0),%st(1) +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha179 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha179 +fsubr %st(0),%st(1) +fldl 32(%ecx) +fadd %st(0),%st(0) +fmull 40(%ecx) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 0(%esp) +fmull 24(%ecx) +faddp %st(0),%st(1) +fldl 8(%esp) +fmull 16(%ecx) +faddp %st(0),%st(1) +fldl 16(%esp) +fmull 8(%ecx) +faddp %st(0),%st(1) +fldl 24(%esp) +fmull 0(%ecx) +faddp %st(0),%st(1) +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha204 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha204 +fsubr %st(0),%st(1) +fldl -120(%edx) +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha230 +fadd %st(1),%st(0) +fldl -112(%edx) +fxch %st(1) +fsubl crypto_scalarmult_curve25519_athlon_alpha230 +fsubr %st(0),%st(2) +faddp %st(0),%st(1) +fxch %st(4) +fstpl -80(%ecx) +fxch %st(4) +fstpl -72(%ecx) +fxch %st(1) +fstpl -64(%ecx) +fstpl -56(%ecx) +fxch %st(1) +fstpl -48(%ecx) +fstpl -40(%ecx) +fldl -40(%edx) +fldl 48(%ecx) +fadd %st(0),%st(1) +fsubl -40(%edx) +fxch %st(1) +fstpl -120(%edx) +fstpl -40(%edx) +fldl -32(%edx) +fldl 56(%ecx) +fadd %st(0),%st(1) +fsubl -32(%edx) +fxch %st(1) +fstpl -112(%edx) +fstpl -32(%edx) +fldl -104(%edx) +fldl -24(%edx) +fadd %st(0),%st(1) +fsubl -104(%edx) +fxch %st(1) +fstpl -104(%edx) +fstpl -24(%edx) +fldl -16(%edx) +fldl -96(%edx) +fadd %st(0),%st(1) +fsubl -16(%edx) +fxch %st(1) +fstpl -96(%edx) +fstpl -16(%edx) +fldl -8(%edx) +fldl -88(%edx) +fadd %st(0),%st(1) +fsubl -8(%edx) +fxch %st(1) +fstpl -88(%edx) +fstpl -8(%edx) +fldl 0(%edx) +fldl -80(%edx) +fadd %st(0),%st(1) +fsubl 0(%edx) +fxch %st(1) +fstpl -80(%edx) +fstpl 0(%edx) +fldl 8(%edx) +fldl -72(%edx) +fadd %st(0),%st(1) +fsubl 8(%edx) +fxch %st(1) +fstpl -72(%edx) +fstpl 8(%edx) +fldl 16(%edx) +fldl -64(%edx) +fadd %st(0),%st(1) +fsubl 16(%edx) +fxch %st(1) +fstpl -64(%edx) +fstpl 16(%edx) +fldl 24(%edx) +fldl -56(%edx) +fadd %st(0),%st(1) +fsubl 24(%edx) +fxch %st(1) +fstpl -56(%edx) +fstpl 24(%edx) +fldl 32(%edx) +fldl -48(%edx) +fadd %st(0),%st(1) +fsubl 32(%edx) +fxch %st(1) +fstpl -48(%edx) +fstpl 32(%edx) +fldl 64(%edx) +fsubl -112(%ecx) +fstpl -32(%ecx) +fldl 72(%edx) +fsubl -104(%ecx) +fstpl -24(%ecx) +fldl 80(%edx) +fsubl -96(%ecx) +fstpl -16(%ecx) +fldl 88(%edx) +fsubl -88(%ecx) +fstpl -8(%ecx) +fldl 96(%edx) +fsubl -80(%ecx) +fstpl 0(%ecx) +fldl 104(%edx) +fsubl -72(%ecx) +fstpl 8(%ecx) +fldl 112(%edx) +fsubl -64(%ecx) +fstpl 16(%ecx) +fldl 120(%edx) +fsubl -56(%ecx) +fstpl 24(%ecx) +fldl -128(%ecx) +fsubl -48(%ecx) +fstpl 32(%ecx) +fldl -120(%ecx) +fsubl -40(%ecx) +fstpl 40(%ecx) +fldl -48(%edx) +fmul %st(0),%st(0) +fldl -120(%edx) +fadd %st(0),%st(0) +fldl -112(%edx) +fadd %st(0),%st(0) +fldl -104(%edx) +fadd %st(0),%st(0) +fldl -64(%edx) +fxch %st(4) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl -48(%edx) +fmul %st(4),%st(0) +fldl -56(%edx) +fmul %st(4),%st(0) +faddp %st(0),%st(1) +fxch %st(4) +fstl 0(%esp) +fxch %st(3) +fstl 8(%esp) +fxch %st(3) +fmull -56(%edx) +faddp %st(0),%st(1) +fldl -72(%edx) +fxch %st(5) +fmul %st(0),%st(3) +fxch %st(3) +faddp %st(0),%st(1) +fxch %st(2) +fadd %st(0),%st(0) +fldl -64(%edx) +fmul %st(2),%st(0) +faddp %st(0),%st(4) +fxch %st(1) +fstl 16(%esp) +fldl -80(%edx) +fxch %st(5) +fmul %st(0),%st(1) +fxch %st(1) +faddp %st(0),%st(3) +fadd %st(0),%st(0) +fstpl 48(%esp) +fldl -96(%edx) +fadd %st(0),%st(0) +fstl 24(%esp) +fldl -72(%edx) +fmul %st(1),%st(0) +faddp %st(0),%st(4) +fmul %st(4),%st(0) +faddp %st(0),%st(2) +fxch %st(3) +fadd %st(0),%st(0) +fstpl 40(%esp) +fldl -88(%edx) +fmul %st(0),%st(0) +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha230 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha230 +fsubr %st(0),%st(1) +fldl 8(%esp) +fldl -48(%edx) +fmul %st(0),%st(1) +fldl 16(%esp) +fmul %st(0),%st(1) +fldl -56(%edx) +fmul %st(0),%st(1) +fxch %st(1) +faddp %st(0),%st(3) +fldl 24(%esp) +fmul %st(0),%st(1) +fxch %st(1) +faddp %st(0),%st(2) +fldl -88(%edx) +fadd %st(0),%st(0) +fstl 32(%esp) +fmull -80(%edx) +faddp %st(0),%st(6) +fxch %st(3) +faddp %st(0),%st(5) +fldl crypto_scalarmult_curve25519_athlon_alpha255 +fadd %st(5),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha255 +fsubr %st(0),%st(5) +fldl -64(%edx) +fmul %st(0),%st(4) +fxch %st(4) +faddp %st(0),%st(3) +fldl 32(%esp) +fmul %st(0),%st(4) +fxch %st(4) +faddp %st(0),%st(2) +fldl -72(%edx) +fmul %st(0),%st(4) +fxch %st(4) +faddp %st(0),%st(3) +fxch %st(3) +fmull 40(%esp) +faddp %st(0),%st(1) +fxch %st(3) +fstpl 48(%edx) +fldl -80(%edx) +fmul %st(0),%st(0) +faddp %st(0),%st(1) +fxch %st(2) +fmull crypto_scalarmult_curve25519_athlon_scale +fxch %st(3) +fstpl 56(%edx) +faddp %st(0),%st(1) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 24(%esp) +fmull -48(%edx) +fldl -120(%edx) +fmul %st(0),%st(0) +faddp %st(0),%st(2) +fldl 32(%esp) +fmull -56(%edx) +faddp %st(0),%st(1) +fldl 0(%esp) +fmull -112(%edx) +faddp %st(0),%st(3) +fldl 40(%esp) +fmull -64(%edx) +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha26 +fadd %st(2),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha26 +fsubr %st(0),%st(2) +faddp %st(0),%st(3) +fldl crypto_scalarmult_curve25519_athlon_alpha51 +fadd %st(3),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha51 +fsubr %st(0),%st(3) +fldl -72(%edx) +fmul %st(0),%st(0) +faddp %st(0),%st(2) +fxch %st(1) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 0(%esp) +fmull -104(%edx) +faddp %st(0),%st(1) +fldl -112(%edx) +fmul %st(0),%st(0) +faddp %st(0),%st(1) +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha77 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha77 +fsubr %st(0),%st(1) +fxch %st(2) +fstpl -120(%edx) +fldl 32(%esp) +fmull -48(%edx) +fldl 40(%esp) +fmull -56(%edx) +faddp %st(0),%st(1) +fldl 48(%esp) +fmull -64(%edx) +faddp %st(0),%st(1) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 0(%esp) +fmull -96(%edx) +faddp %st(0),%st(1) +fldl 8(%esp) +fmull -104(%edx) +faddp %st(0),%st(1) +faddp %st(0),%st(2) +fldl crypto_scalarmult_curve25519_athlon_alpha102 +fadd %st(2),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha102 +fsubr %st(0),%st(2) +fxch %st(3) +fstpl -112(%edx) +fldl 40(%esp) +fmull -48(%edx) +fldl 48(%esp) +fmull -56(%edx) +faddp %st(0),%st(1) +fldl -64(%edx) +fmul %st(0),%st(0) +faddp %st(0),%st(1) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 0(%esp) +fmull -88(%edx) +faddp %st(0),%st(1) +fldl 8(%esp) +fmull -96(%edx) +faddp %st(0),%st(1) +fldl -104(%edx) +fmul %st(0),%st(0) +faddp %st(0),%st(1) +faddp %st(0),%st(3) +fldl crypto_scalarmult_curve25519_athlon_alpha128 +fadd %st(3),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha128 +fsubr %st(0),%st(3) +fxch %st(1) +fstpl -104(%edx) +fldl 48(%esp) +fldl -48(%edx) +fmul %st(0),%st(1) +fmul %st(5),%st(0) +fxch %st(5) +fmull -56(%edx) +faddp %st(0),%st(1) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 0(%esp) +fmull -80(%edx) +faddp %st(0),%st(1) +fldl 8(%esp) +fmull -88(%edx) +faddp %st(0),%st(1) +fldl 16(%esp) +fmull -96(%edx) +faddp %st(0),%st(1) +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha153 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha153 +fsubr %st(0),%st(1) +fxch %st(2) +fstpl 40(%edx) +fldl -56(%edx) +fmul %st(0),%st(0) +faddp %st(0),%st(4) +fxch %st(3) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 0(%esp) +fmull -72(%edx) +faddp %st(0),%st(1) +fldl 8(%esp) +fmull -80(%edx) +faddp %st(0),%st(1) +fldl 16(%esp) +fmull -88(%edx) +faddp %st(0),%st(1) +fldl -96(%edx) +fmul %st(0),%st(0) +faddp %st(0),%st(1) +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha179 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha179 +fsubr %st(0),%st(1) +fldl -56(%edx) +fadd %st(0),%st(0) +fmull -48(%edx) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 0(%esp) +fmull -64(%edx) +faddp %st(0),%st(1) +fldl 8(%esp) +fmull -72(%edx) +faddp %st(0),%st(1) +fldl 16(%esp) +fmull -80(%edx) +faddp %st(0),%st(1) +fldl 24(%esp) +fmull -88(%edx) +faddp %st(0),%st(1) +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha204 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha204 +fsubr %st(0),%st(1) +fldl 48(%edx) +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha230 +fadd %st(1),%st(0) +fldl 56(%edx) +fxch %st(1) +fsubl crypto_scalarmult_curve25519_athlon_alpha230 +fsubr %st(0),%st(2) +faddp %st(0),%st(1) +fxch %st(4) +fstpl -96(%edx) +fxch %st(4) +fstpl -88(%edx) +fxch %st(1) +fstpl -80(%edx) +fstpl -72(%edx) +fxch %st(1) +fstpl -64(%edx) +fstpl -56(%edx) +fldl 32(%edx) +fmul %st(0),%st(0) +fldl -40(%edx) +fadd %st(0),%st(0) +fldl -32(%edx) +fadd %st(0),%st(0) +fldl -24(%edx) +fadd %st(0),%st(0) +fldl 16(%edx) +fxch %st(4) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 32(%edx) +fmul %st(4),%st(0) +fldl 24(%edx) +fmul %st(4),%st(0) +faddp %st(0),%st(1) +fxch %st(4) +fstl 0(%esp) +fxch %st(3) +fstl 8(%esp) +fxch %st(3) +fmull 24(%edx) +faddp %st(0),%st(1) +fldl 8(%edx) +fxch %st(5) +fmul %st(0),%st(3) +fxch %st(3) +faddp %st(0),%st(1) +fxch %st(2) +fadd %st(0),%st(0) +fldl 16(%edx) +fmul %st(2),%st(0) +faddp %st(0),%st(4) +fxch %st(1) +fstl 16(%esp) +fldl 0(%edx) +fxch %st(5) +fmul %st(0),%st(1) +fxch %st(1) +faddp %st(0),%st(3) +fadd %st(0),%st(0) +fstpl 48(%esp) +fldl -16(%edx) +fadd %st(0),%st(0) +fstl 24(%esp) +fldl 8(%edx) +fmul %st(1),%st(0) +faddp %st(0),%st(4) +fmul %st(4),%st(0) +faddp %st(0),%st(2) +fxch %st(3) +fadd %st(0),%st(0) +fstpl 40(%esp) +fldl -8(%edx) +fmul %st(0),%st(0) +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha230 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha230 +fsubr %st(0),%st(1) +fldl 8(%esp) +fldl 32(%edx) +fmul %st(0),%st(1) +fldl 16(%esp) +fmul %st(0),%st(1) +fldl 24(%edx) +fmul %st(0),%st(1) +fxch %st(1) +faddp %st(0),%st(3) +fldl 24(%esp) +fmul %st(0),%st(1) +fxch %st(1) +faddp %st(0),%st(2) +fldl -8(%edx) +fadd %st(0),%st(0) +fstl 32(%esp) +fmull 0(%edx) +faddp %st(0),%st(6) +fxch %st(3) +faddp %st(0),%st(5) +fldl crypto_scalarmult_curve25519_athlon_alpha255 +fadd %st(5),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha255 +fsubr %st(0),%st(5) +fldl 16(%edx) +fmul %st(0),%st(4) +fxch %st(4) +faddp %st(0),%st(3) +fldl 32(%esp) +fmul %st(0),%st(4) +fxch %st(4) +faddp %st(0),%st(2) +fldl 8(%edx) +fmul %st(0),%st(4) +fxch %st(4) +faddp %st(0),%st(3) +fxch %st(3) +fmull 40(%esp) +faddp %st(0),%st(1) +fxch %st(3) +fstpl -48(%edx) +fldl 0(%edx) +fmul %st(0),%st(0) +faddp %st(0),%st(1) +fxch %st(2) +fmull crypto_scalarmult_curve25519_athlon_scale +fxch %st(3) +fstpl 48(%edx) +faddp %st(0),%st(1) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 24(%esp) +fmull 32(%edx) +fldl -40(%edx) +fmul %st(0),%st(0) +faddp %st(0),%st(2) +fldl 32(%esp) +fmull 24(%edx) +faddp %st(0),%st(1) +fldl 0(%esp) +fmull -32(%edx) +faddp %st(0),%st(3) +fldl 40(%esp) +fmull 16(%edx) +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha26 +fadd %st(2),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha26 +fsubr %st(0),%st(2) +faddp %st(0),%st(3) +fldl crypto_scalarmult_curve25519_athlon_alpha51 +fadd %st(3),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha51 +fsubr %st(0),%st(3) +fldl 8(%edx) +fmul %st(0),%st(0) +faddp %st(0),%st(2) +fxch %st(1) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 0(%esp) +fmull -24(%edx) +faddp %st(0),%st(1) +fldl -32(%edx) +fmul %st(0),%st(0) +faddp %st(0),%st(1) +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha77 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha77 +fsubr %st(0),%st(1) +fxch %st(2) +fstpl 56(%ecx) +fldl 32(%esp) +fmull 32(%edx) +fldl 40(%esp) +fmull 24(%edx) +faddp %st(0),%st(1) +fldl 48(%esp) +fmull 16(%edx) +faddp %st(0),%st(1) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 0(%esp) +fmull -16(%edx) +faddp %st(0),%st(1) +fldl 8(%esp) +fmull -24(%edx) +faddp %st(0),%st(1) +faddp %st(0),%st(2) +fldl crypto_scalarmult_curve25519_athlon_alpha102 +fadd %st(2),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha102 +fsubr %st(0),%st(2) +fxch %st(3) +fstpl 64(%ecx) +fldl 40(%esp) +fmull 32(%edx) +fldl 48(%esp) +fmull 24(%edx) +faddp %st(0),%st(1) +fldl 16(%edx) +fmul %st(0),%st(0) +faddp %st(0),%st(1) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 0(%esp) +fmull -8(%edx) +faddp %st(0),%st(1) +fldl 8(%esp) +fmull -16(%edx) +faddp %st(0),%st(1) +fldl -24(%edx) +fmul %st(0),%st(0) +faddp %st(0),%st(1) +faddp %st(0),%st(3) +fldl crypto_scalarmult_curve25519_athlon_alpha128 +fadd %st(3),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha128 +fsubr %st(0),%st(3) +fxch %st(1) +fstpl 72(%ecx) +fldl 48(%esp) +fldl 32(%edx) +fmul %st(0),%st(1) +fmul %st(5),%st(0) +fxch %st(5) +fmull 24(%edx) +faddp %st(0),%st(1) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 0(%esp) +fmull 0(%edx) +faddp %st(0),%st(1) +fldl 8(%esp) +fmull -8(%edx) +faddp %st(0),%st(1) +fldl 16(%esp) +fmull -16(%edx) +faddp %st(0),%st(1) +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha153 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha153 +fsubr %st(0),%st(1) +fxch %st(2) +fstpl 80(%ecx) +fldl 24(%edx) +fmul %st(0),%st(0) +faddp %st(0),%st(4) +fxch %st(3) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 0(%esp) +fmull 8(%edx) +faddp %st(0),%st(1) +fldl 8(%esp) +fmull 0(%edx) +faddp %st(0),%st(1) +fldl 16(%esp) +fmull -8(%edx) +faddp %st(0),%st(1) +fldl -16(%edx) +fmul %st(0),%st(0) +faddp %st(0),%st(1) +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha179 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha179 +fsubr %st(0),%st(1) +fldl 24(%edx) +fadd %st(0),%st(0) +fmull 32(%edx) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 0(%esp) +fmull 16(%edx) +faddp %st(0),%st(1) +fldl 8(%esp) +fmull 8(%edx) +faddp %st(0),%st(1) +fldl 16(%esp) +fmull 0(%edx) +faddp %st(0),%st(1) +fldl 24(%esp) +fmull -8(%edx) +faddp %st(0),%st(1) +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha204 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha204 +fsubr %st(0),%st(1) +fldl -48(%edx) +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha230 +fadd %st(1),%st(0) +fldl 48(%edx) +fxch %st(1) +fsubl crypto_scalarmult_curve25519_athlon_alpha230 +fsubr %st(0),%st(2) +faddp %st(0),%st(1) +fxch %st(4) +fstpl 88(%ecx) +fxch %st(4) +fstpl 96(%ecx) +fxch %st(1) +fstpl 104(%ecx) +fstpl 112(%ecx) +fxch %st(1) +fstpl 120(%ecx) +fstpl 128(%ecx) +fldl 32(%ecx) +fmull crypto_scalarmult_curve25519_athlon_121665 +fldl crypto_scalarmult_curve25519_athlon_alpha230 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha230 +fldl 40(%ecx) +fmull crypto_scalarmult_curve25519_athlon_121665 +fadd %st(1),%st(0) +fxch %st(1) +fsubrp %st(0),%st(2) +fxch %st(1) +fstpl 0(%esp) +fldl crypto_scalarmult_curve25519_athlon_alpha255 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha255 +fsubr %st(0),%st(1) +fmull crypto_scalarmult_curve25519_athlon_scale +fxch %st(1) +fstpl 8(%esp) +fldl -32(%ecx) +fmull crypto_scalarmult_curve25519_athlon_121665 +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha26 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha26 +fldl -24(%ecx) +fmull crypto_scalarmult_curve25519_athlon_121665 +fadd %st(1),%st(0) +fxch %st(1) +fsubrp %st(0),%st(2) +fxch %st(1) +fstpl -48(%edx) +fldl crypto_scalarmult_curve25519_athlon_alpha51 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha51 +fldl -16(%ecx) +fmull crypto_scalarmult_curve25519_athlon_121665 +fadd %st(1),%st(0) +fxch %st(1) +fsubrp %st(0),%st(2) +fxch %st(1) +fstpl -40(%edx) +fldl crypto_scalarmult_curve25519_athlon_alpha77 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha77 +fldl -8(%ecx) +fmull crypto_scalarmult_curve25519_athlon_121665 +fadd %st(1),%st(0) +fxch %st(1) +fsubrp %st(0),%st(2) +fxch %st(1) +fstpl -32(%edx) +fldl crypto_scalarmult_curve25519_athlon_alpha102 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha102 +fldl 0(%ecx) +fmull crypto_scalarmult_curve25519_athlon_121665 +fadd %st(1),%st(0) +fxch %st(1) +fsubrp %st(0),%st(2) +fxch %st(1) +fstpl -24(%edx) +fldl crypto_scalarmult_curve25519_athlon_alpha128 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha128 +fldl 8(%ecx) +fmull crypto_scalarmult_curve25519_athlon_121665 +fadd %st(1),%st(0) +fxch %st(1) +fsubrp %st(0),%st(2) +fxch %st(1) +fstpl -16(%edx) +fldl crypto_scalarmult_curve25519_athlon_alpha153 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha153 +fldl 16(%ecx) +fmull crypto_scalarmult_curve25519_athlon_121665 +fadd %st(1),%st(0) +fxch %st(1) +fsubrp %st(0),%st(2) +fxch %st(1) +fstpl -8(%edx) +fldl crypto_scalarmult_curve25519_athlon_alpha179 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha179 +fldl 24(%ecx) +fmull crypto_scalarmult_curve25519_athlon_121665 +fadd %st(1),%st(0) +fxch %st(1) +fsubrp %st(0),%st(2) +fxch %st(1) +fstpl 0(%edx) +fldl crypto_scalarmult_curve25519_athlon_alpha204 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha204 +fldl 0(%esp) +fadd %st(1),%st(0) +fxch %st(1) +fsubrp %st(0),%st(2) +fxch %st(1) +fstpl 8(%edx) +fldl crypto_scalarmult_curve25519_athlon_alpha230 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha230 +fldl 8(%esp) +fadd %st(1),%st(0) +fxch %st(1) +fsubrp %st(0),%st(2) +fxch %st(1) +fstpl 16(%edx) +fstpl 48(%ecx) +fldl -120(%ecx) +fmull -40(%ecx) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 64(%edx) +fmull -48(%ecx) +faddp %st(0),%st(1) +fldl 72(%edx) +fmull -56(%ecx) +faddp %st(0),%st(1) +fldl 64(%edx) +fmull -40(%ecx) +fldl 80(%edx) +fmull -64(%ecx) +faddp %st(0),%st(2) +fldl 72(%edx) +fmull -48(%ecx) +faddp %st(0),%st(1) +fldl 88(%edx) +fmull -72(%ecx) +faddp %st(0),%st(2) +fldl 80(%edx) +fmull -56(%ecx) +faddp %st(0),%st(1) +fldl 96(%edx) +fmull -80(%ecx) +faddp %st(0),%st(2) +fldl 88(%edx) +fmull -64(%ecx) +faddp %st(0),%st(1) +fldl 72(%edx) +fmull -40(%ecx) +fldl 104(%edx) +fmull -88(%ecx) +faddp %st(0),%st(3) +fldl 96(%edx) +fmull -72(%ecx) +faddp %st(0),%st(2) +fldl 80(%edx) +fmull -48(%ecx) +faddp %st(0),%st(1) +fldl 112(%edx) +fmull -96(%ecx) +faddp %st(0),%st(3) +fldl 104(%edx) +fmull -80(%ecx) +faddp %st(0),%st(2) +fldl 88(%edx) +fmull -56(%ecx) +faddp %st(0),%st(1) +fldl 120(%edx) +fmull -104(%ecx) +faddp %st(0),%st(3) +fldl 112(%edx) +fmull -88(%ecx) +faddp %st(0),%st(2) +fldl 96(%edx) +fmull -64(%ecx) +faddp %st(0),%st(1) +fldl -128(%ecx) +fmull -112(%ecx) +faddp %st(0),%st(3) +fldl 120(%edx) +fmull -96(%ecx) +faddp %st(0),%st(2) +fldl 104(%edx) +fmull -72(%ecx) +faddp %st(0),%st(1) +fldl 80(%edx) +fmull -40(%ecx) +fldl 112(%edx) +fmull -80(%ecx) +faddp %st(0),%st(2) +fldl -128(%ecx) +fmull -104(%ecx) +faddp %st(0),%st(3) +fldl crypto_scalarmult_curve25519_athlon_alpha230 +fadd %st(4),%st(0) +fldl 88(%edx) +fmull -48(%ecx) +faddp %st(0),%st(2) +fldl 120(%edx) +fmull -88(%ecx) +faddp %st(0),%st(3) +fldl -120(%ecx) +fmull -112(%ecx) +faddp %st(0),%st(4) +fsubl crypto_scalarmult_curve25519_athlon_alpha230 +fldl 96(%edx) +fmull -56(%ecx) +faddp %st(0),%st(2) +fldl 88(%edx) +fmull -40(%ecx) +fldl -128(%ecx) +fmull -96(%ecx) +faddp %st(0),%st(4) +fxch %st(1) +fadd %st(0),%st(4) +fldl 104(%edx) +fmull -64(%ecx) +faddp %st(0),%st(3) +fldl 96(%edx) +fmull -48(%ecx) +faddp %st(0),%st(2) +fsubrp %st(0),%st(5) +fldl crypto_scalarmult_curve25519_athlon_alpha255 +fadd %st(4),%st(0) +fldl 112(%edx) +fmull -72(%ecx) +faddp %st(0),%st(3) +fldl 104(%edx) +fmull -56(%ecx) +faddp %st(0),%st(2) +fldl -120(%ecx) +fmull -104(%ecx) +faddp %st(0),%st(4) +fsubl crypto_scalarmult_curve25519_athlon_alpha255 +fldl 120(%edx) +fmull -80(%ecx) +faddp %st(0),%st(3) +fldl 96(%edx) +fmull -40(%ecx) +fldl 112(%edx) +fmull -64(%ecx) +faddp %st(0),%st(3) +fldl -128(%ecx) +fmull -88(%ecx) +faddp %st(0),%st(4) +fxch %st(1) +fadd %st(0),%st(4) +fsubrp %st(0),%st(5) +fxch %st(5) +fstpl 0(%esp) +fldl 104(%edx) +fmull -48(%ecx) +faddp %st(0),%st(5) +fldl 120(%edx) +fmull -72(%ecx) +faddp %st(0),%st(1) +fldl -120(%ecx) +fmull -96(%ecx) +faddp %st(0),%st(2) +fxch %st(2) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 112(%edx) +fmull -56(%ecx) +faddp %st(0),%st(5) +fldl -128(%ecx) +fmull -80(%ecx) +faddp %st(0),%st(3) +fxch %st(1) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 64(%edx) +fmull -112(%ecx) +faddp %st(0),%st(2) +fxch %st(3) +fstpl 8(%esp) +fldl 104(%edx) +fmull -40(%ecx) +fldl 120(%edx) +fmull -64(%ecx) +faddp %st(0),%st(5) +fldl -120(%ecx) +fmull -88(%ecx) +faddp %st(0),%st(3) +fldl 64(%edx) +fmull -104(%ecx) +faddp %st(0),%st(4) +fldl crypto_scalarmult_curve25519_athlon_alpha26 +fadd %st(2),%st(0) +fldl 112(%edx) +fmull -48(%ecx) +faddp %st(0),%st(2) +fldl -128(%ecx) +fmull -72(%ecx) +faddp %st(0),%st(6) +fxch %st(3) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 72(%edx) +fmull -112(%ecx) +faddp %st(0),%st(5) +fxch %st(3) +fsubl crypto_scalarmult_curve25519_athlon_alpha26 +fldl 120(%edx) +fmull -56(%ecx) +faddp %st(0),%st(2) +fldl -120(%ecx) +fmull -80(%ecx) +faddp %st(0),%st(6) +fldl 64(%edx) +fmull -96(%ecx) +faddp %st(0),%st(4) +fadd %st(0),%st(4) +fsubrp %st(0),%st(2) +fldl 112(%edx) +fmull -40(%ecx) +fldl -128(%ecx) +fmull -64(%ecx) +faddp %st(0),%st(2) +fxch %st(5) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 72(%edx) +fmull -104(%ecx) +faddp %st(0),%st(4) +fldl crypto_scalarmult_curve25519_athlon_alpha51 +fadd %st(5),%st(0) +fldl 120(%edx) +fmull -48(%ecx) +faddp %st(0),%st(7) +fldl -120(%ecx) +fmull -72(%ecx) +faddp %st(0),%st(3) +fldl 64(%edx) +fmull -88(%ecx) +faddp %st(0),%st(2) +fldl 80(%edx) +fmull -112(%ecx) +faddp %st(0),%st(5) +fsubl crypto_scalarmult_curve25519_athlon_alpha51 +fxch %st(3) +fstpl 16(%esp) +fldl -128(%ecx) +fmull -56(%ecx) +faddp %st(0),%st(6) +fxch %st(1) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 72(%edx) +fmull -96(%ecx) +faddp %st(0),%st(2) +fxch %st(2) +fadd %st(0),%st(3) +fsubrp %st(0),%st(4) +fldl 120(%edx) +fmull -40(%ecx) +fldl -120(%ecx) +fmull -64(%ecx) +faddp %st(0),%st(6) +fldl 64(%edx) +fmull -80(%ecx) +faddp %st(0),%st(3) +fldl 80(%edx) +fmull -104(%ecx) +faddp %st(0),%st(2) +fldl crypto_scalarmult_curve25519_athlon_alpha77 +fadd %st(4),%st(0) +fldl -128(%ecx) +fmull -48(%ecx) +faddp %st(0),%st(2) +fxch %st(6) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 72(%edx) +fmull -88(%ecx) +faddp %st(0),%st(4) +fldl 88(%edx) +fmull -112(%ecx) +faddp %st(0),%st(3) +fxch %st(6) +fsubl crypto_scalarmult_curve25519_athlon_alpha77 +fxch %st(5) +fstpl 24(%esp) +fldl -120(%ecx) +fmull -56(%ecx) +faddp %st(0),%st(1) +fldl 64(%edx) +fmull -72(%ecx) +faddp %st(0),%st(6) +fldl 80(%edx) +fmull -96(%ecx) +faddp %st(0),%st(3) +fxch %st(4) +fadd %st(0),%st(1) +fsubrp %st(0),%st(3) +fldl -128(%ecx) +fmull -40(%ecx) +fxch %st(4) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 72(%edx) +fmull -80(%ecx) +faddp %st(0),%st(6) +fldl 88(%edx) +fmull -104(%ecx) +faddp %st(0),%st(3) +fldl crypto_scalarmult_curve25519_athlon_alpha102 +fadd %st(2),%st(0) +fldl -120(%ecx) +fmull -48(%ecx) +faddp %st(0),%st(6) +fldl 64(%edx) +fmull -64(%ecx) +faddp %st(0),%st(2) +fldl 80(%edx) +fmull -88(%ecx) +faddp %st(0),%st(7) +fldl 96(%edx) +fmull -112(%ecx) +faddp %st(0),%st(4) +fsubl crypto_scalarmult_curve25519_athlon_alpha102 +fxch %st(4) +fstpl 32(%esp) +fxch %st(4) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 72(%edx) +fmull -72(%ecx) +faddp %st(0),%st(5) +fldl 88(%edx) +fmull -96(%ecx) +faddp %st(0),%st(6) +fxch %st(3) +fadd %st(0),%st(2) +fsubrp %st(0),%st(1) +fldl 64(%edx) +fmull -56(%ecx) +faddp %st(0),%st(3) +fldl 80(%edx) +fmull -80(%ecx) +faddp %st(0),%st(4) +fldl 96(%edx) +fmull -104(%ecx) +faddp %st(0),%st(5) +fldl crypto_scalarmult_curve25519_athlon_alpha128 +fadd %st(2),%st(0) +fldl 72(%edx) +fmull -64(%ecx) +faddp %st(0),%st(4) +fldl 88(%edx) +fmull -88(%ecx) +faddp %st(0),%st(5) +fldl 104(%edx) +fmull -112(%ecx) +faddp %st(0),%st(6) +fsubl crypto_scalarmult_curve25519_athlon_alpha128 +fxch %st(1) +fstpl 40(%esp) +fldl 80(%edx) +fmull -72(%ecx) +faddp %st(0),%st(3) +fldl 96(%edx) +fmull -96(%ecx) +faddp %st(0),%st(4) +fadd %st(0),%st(4) +fsubrp %st(0),%st(1) +fstpl 48(%esp) +fldl 88(%edx) +fmull -80(%ecx) +faddp %st(0),%st(1) +fldl 104(%edx) +fmull -104(%ecx) +faddp %st(0),%st(2) +fldl crypto_scalarmult_curve25519_athlon_alpha153 +fadd %st(3),%st(0) +fldl 96(%edx) +fmull -88(%ecx) +faddp %st(0),%st(2) +fldl 112(%edx) +fmull -112(%ecx) +faddp %st(0),%st(3) +fsubl crypto_scalarmult_curve25519_athlon_alpha153 +fldl 104(%edx) +fmull -96(%ecx) +faddp %st(0),%st(2) +fadd %st(0),%st(2) +fsubrp %st(0),%st(3) +fxch %st(2) +fstpl 24(%edx) +fldl 112(%edx) +fmull -104(%ecx) +faddp %st(0),%st(2) +fldl crypto_scalarmult_curve25519_athlon_alpha179 +fadd %st(1),%st(0) +fldl 120(%edx) +fmull -112(%ecx) +faddp %st(0),%st(3) +fsubl crypto_scalarmult_curve25519_athlon_alpha179 +fldl 0(%esp) +fldl 8(%esp) +fxch %st(2) +fadd %st(0),%st(4) +fsubrp %st(0),%st(3) +fldl crypto_scalarmult_curve25519_athlon_alpha204 +fadd %st(4),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha204 +fadd %st(0),%st(1) +fsubrp %st(0),%st(4) +fldl crypto_scalarmult_curve25519_athlon_alpha230 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha230 +fsubr %st(0),%st(1) +faddp %st(0),%st(2) +fxch %st(2) +fstpl 32(%edx) +fxch %st(2) +fstpl 48(%edx) +fstpl 56(%edx) +fstpl -112(%ecx) +fldl -48(%edx) +faddl 64(%edx) +fstpl -104(%ecx) +fldl -40(%edx) +faddl 72(%edx) +fstpl -96(%ecx) +fldl -32(%edx) +faddl 80(%edx) +fstpl -88(%ecx) +fldl -24(%edx) +faddl 88(%edx) +fstpl -80(%ecx) +fldl -16(%edx) +faddl 96(%edx) +fstpl -16(%edx) +fldl -8(%edx) +faddl 104(%edx) +fstpl -8(%edx) +fldl 0(%edx) +faddl 112(%edx) +fstpl 0(%edx) +fldl 8(%edx) +faddl 120(%edx) +fstpl 8(%edx) +fldl 16(%edx) +faddl -128(%ecx) +fstpl 16(%edx) +fldl 48(%ecx) +faddl -120(%ecx) +fstpl 80(%edx) +fldl 128(%ecx) +fmull -128(%edx) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 56(%ecx) +fmull 120(%esp) +faddp %st(0),%st(1) +fldl 64(%ecx) +fmull 112(%esp) +faddp %st(0),%st(1) +fldl 56(%ecx) +fmull -128(%edx) +fldl 72(%ecx) +fmull 104(%esp) +faddp %st(0),%st(2) +fldl 64(%ecx) +fmull 120(%esp) +faddp %st(0),%st(1) +fldl 80(%ecx) +fmull 96(%esp) +faddp %st(0),%st(2) +fldl 72(%ecx) +fmull 112(%esp) +faddp %st(0),%st(1) +fldl 88(%ecx) +fmull 88(%esp) +faddp %st(0),%st(2) +fldl 80(%ecx) +fmull 104(%esp) +faddp %st(0),%st(1) +fldl 64(%ecx) +fmull -128(%edx) +fldl 96(%ecx) +fmull 80(%esp) +faddp %st(0),%st(3) +fldl 88(%ecx) +fmull 96(%esp) +faddp %st(0),%st(2) +fldl 72(%ecx) +fmull 120(%esp) +faddp %st(0),%st(1) +fldl 104(%ecx) +fmull 72(%esp) +faddp %st(0),%st(3) +fldl 96(%ecx) +fmull 88(%esp) +faddp %st(0),%st(2) +fldl 80(%ecx) +fmull 112(%esp) +faddp %st(0),%st(1) +fldl 112(%ecx) +fmull 64(%esp) +faddp %st(0),%st(3) +fldl 104(%ecx) +fmull 80(%esp) +faddp %st(0),%st(2) +fldl 88(%ecx) +fmull 104(%esp) +faddp %st(0),%st(1) +fldl 120(%ecx) +fmull 56(%esp) +faddp %st(0),%st(3) +fldl 112(%ecx) +fmull 72(%esp) +faddp %st(0),%st(2) +fldl 96(%ecx) +fmull 96(%esp) +faddp %st(0),%st(1) +fldl 72(%ecx) +fmull -128(%edx) +fldl 104(%ecx) +fmull 88(%esp) +faddp %st(0),%st(2) +fldl 120(%ecx) +fmull 64(%esp) +faddp %st(0),%st(3) +fldl crypto_scalarmult_curve25519_athlon_alpha230 +fadd %st(4),%st(0) +fldl 80(%ecx) +fmull 120(%esp) +faddp %st(0),%st(2) +fldl 112(%ecx) +fmull 80(%esp) +faddp %st(0),%st(3) +fldl 128(%ecx) +fmull 56(%esp) +faddp %st(0),%st(4) +fsubl crypto_scalarmult_curve25519_athlon_alpha230 +fldl 88(%ecx) +fmull 112(%esp) +faddp %st(0),%st(2) +fldl 80(%ecx) +fmull -128(%edx) +fldl 120(%ecx) +fmull 72(%esp) +faddp %st(0),%st(4) +fxch %st(1) +fadd %st(0),%st(4) +fldl 96(%ecx) +fmull 104(%esp) +faddp %st(0),%st(3) +fldl 88(%ecx) +fmull 120(%esp) +faddp %st(0),%st(2) +fsubrp %st(0),%st(5) +fldl crypto_scalarmult_curve25519_athlon_alpha255 +fadd %st(4),%st(0) +fldl 104(%ecx) +fmull 96(%esp) +faddp %st(0),%st(3) +fldl 96(%ecx) +fmull 112(%esp) +faddp %st(0),%st(2) +fldl 128(%ecx) +fmull 64(%esp) +faddp %st(0),%st(4) +fsubl crypto_scalarmult_curve25519_athlon_alpha255 +fldl 112(%ecx) +fmull 88(%esp) +faddp %st(0),%st(3) +fldl 88(%ecx) +fmull -128(%edx) +fldl 104(%ecx) +fmull 104(%esp) +faddp %st(0),%st(3) +fldl 120(%ecx) +fmull 80(%esp) +faddp %st(0),%st(4) +fxch %st(1) +fadd %st(0),%st(4) +fsubrp %st(0),%st(5) +fxch %st(5) +fstpl 0(%esp) +fldl 96(%ecx) +fmull 120(%esp) +faddp %st(0),%st(5) +fldl 112(%ecx) +fmull 96(%esp) +faddp %st(0),%st(1) +fldl 128(%ecx) +fmull 72(%esp) +faddp %st(0),%st(2) +fxch %st(2) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 104(%ecx) +fmull 112(%esp) +faddp %st(0),%st(5) +fldl 120(%ecx) +fmull 88(%esp) +faddp %st(0),%st(3) +fxch %st(1) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 56(%ecx) +fmull 56(%esp) +faddp %st(0),%st(2) +fxch %st(3) +fstpl 8(%esp) +fldl 96(%ecx) +fmull -128(%edx) +fldl 112(%ecx) +fmull 104(%esp) +faddp %st(0),%st(5) +fldl 128(%ecx) +fmull 80(%esp) +faddp %st(0),%st(3) +fldl 56(%ecx) +fmull 64(%esp) +faddp %st(0),%st(4) +fldl crypto_scalarmult_curve25519_athlon_alpha26 +fadd %st(2),%st(0) +fldl 104(%ecx) +fmull 120(%esp) +faddp %st(0),%st(2) +fldl 120(%ecx) +fmull 96(%esp) +faddp %st(0),%st(6) +fxch %st(3) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 64(%ecx) +fmull 56(%esp) +faddp %st(0),%st(5) +fxch %st(3) +fsubl crypto_scalarmult_curve25519_athlon_alpha26 +fldl 112(%ecx) +fmull 112(%esp) +faddp %st(0),%st(2) +fldl 128(%ecx) +fmull 88(%esp) +faddp %st(0),%st(6) +fldl 56(%ecx) +fmull 72(%esp) +faddp %st(0),%st(4) +fadd %st(0),%st(4) +fsubrp %st(0),%st(2) +fldl 104(%ecx) +fmull -128(%edx) +fldl 120(%ecx) +fmull 104(%esp) +faddp %st(0),%st(2) +fxch %st(5) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 64(%ecx) +fmull 64(%esp) +faddp %st(0),%st(4) +fldl crypto_scalarmult_curve25519_athlon_alpha51 +fadd %st(5),%st(0) +fldl 112(%ecx) +fmull 120(%esp) +faddp %st(0),%st(7) +fldl 128(%ecx) +fmull 96(%esp) +faddp %st(0),%st(3) +fldl 56(%ecx) +fmull 80(%esp) +faddp %st(0),%st(2) +fldl 72(%ecx) +fmull 56(%esp) +faddp %st(0),%st(5) +fsubl crypto_scalarmult_curve25519_athlon_alpha51 +fxch %st(3) +fstpl -48(%edx) +fldl 120(%ecx) +fmull 112(%esp) +faddp %st(0),%st(6) +fxch %st(1) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 64(%ecx) +fmull 72(%esp) +faddp %st(0),%st(2) +fxch %st(2) +fadd %st(0),%st(3) +fsubrp %st(0),%st(4) +fldl 112(%ecx) +fmull -128(%edx) +fldl 128(%ecx) +fmull 104(%esp) +faddp %st(0),%st(6) +fldl 56(%ecx) +fmull 88(%esp) +faddp %st(0),%st(3) +fldl 72(%ecx) +fmull 64(%esp) +faddp %st(0),%st(2) +fldl crypto_scalarmult_curve25519_athlon_alpha77 +fadd %st(4),%st(0) +fldl 120(%ecx) +fmull 120(%esp) +faddp %st(0),%st(2) +fxch %st(6) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 64(%ecx) +fmull 80(%esp) +faddp %st(0),%st(4) +fldl 80(%ecx) +fmull 56(%esp) +faddp %st(0),%st(3) +fxch %st(6) +fsubl crypto_scalarmult_curve25519_athlon_alpha77 +fxch %st(5) +fstpl -40(%edx) +fldl 128(%ecx) +fmull 112(%esp) +faddp %st(0),%st(1) +fldl 56(%ecx) +fmull 96(%esp) +faddp %st(0),%st(6) +fldl 72(%ecx) +fmull 72(%esp) +faddp %st(0),%st(3) +fxch %st(4) +fadd %st(0),%st(1) +fsubrp %st(0),%st(3) +fldl 120(%ecx) +fmull -128(%edx) +fxch %st(4) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 64(%ecx) +fmull 88(%esp) +faddp %st(0),%st(6) +fldl 80(%ecx) +fmull 64(%esp) +faddp %st(0),%st(3) +fldl crypto_scalarmult_curve25519_athlon_alpha102 +fadd %st(2),%st(0) +fldl 128(%ecx) +fmull 120(%esp) +faddp %st(0),%st(6) +fldl 56(%ecx) +fmull 104(%esp) +faddp %st(0),%st(2) +fldl 72(%ecx) +fmull 80(%esp) +faddp %st(0),%st(7) +fldl 88(%ecx) +fmull 56(%esp) +faddp %st(0),%st(4) +fsubl crypto_scalarmult_curve25519_athlon_alpha102 +fxch %st(4) +fstpl -32(%edx) +fxch %st(4) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 64(%ecx) +fmull 96(%esp) +faddp %st(0),%st(5) +fldl 80(%ecx) +fmull 72(%esp) +faddp %st(0),%st(6) +fxch %st(3) +fadd %st(0),%st(2) +fsubrp %st(0),%st(1) +fldl 56(%ecx) +fmull 112(%esp) +faddp %st(0),%st(3) +fldl 72(%ecx) +fmull 88(%esp) +faddp %st(0),%st(4) +fldl 88(%ecx) +fmull 64(%esp) +faddp %st(0),%st(5) +fldl crypto_scalarmult_curve25519_athlon_alpha128 +fadd %st(2),%st(0) +fldl 64(%ecx) +fmull 104(%esp) +faddp %st(0),%st(4) +fldl 80(%ecx) +fmull 80(%esp) +faddp %st(0),%st(5) +fldl 96(%ecx) +fmull 56(%esp) +faddp %st(0),%st(6) +fsubl crypto_scalarmult_curve25519_athlon_alpha128 +fxch %st(1) +fstpl -24(%edx) +fldl 72(%ecx) +fmull 96(%esp) +faddp %st(0),%st(3) +fldl 88(%ecx) +fmull 72(%esp) +faddp %st(0),%st(4) +fadd %st(0),%st(4) +fsubrp %st(0),%st(1) +fstpl 96(%edx) +fldl 80(%ecx) +fmull 88(%esp) +faddp %st(0),%st(1) +fldl 96(%ecx) +fmull 64(%esp) +faddp %st(0),%st(2) +fldl crypto_scalarmult_curve25519_athlon_alpha153 +fadd %st(3),%st(0) +fldl 88(%ecx) +fmull 80(%esp) +faddp %st(0),%st(2) +fldl 104(%ecx) +fmull 56(%esp) +faddp %st(0),%st(3) +fsubl crypto_scalarmult_curve25519_athlon_alpha153 +fldl 96(%ecx) +fmull 72(%esp) +faddp %st(0),%st(2) +fadd %st(0),%st(2) +fsubrp %st(0),%st(3) +fxch %st(2) +fstpl 104(%edx) +fldl 104(%ecx) +fmull 64(%esp) +faddp %st(0),%st(2) +fldl crypto_scalarmult_curve25519_athlon_alpha179 +fadd %st(1),%st(0) +fldl 112(%ecx) +fmull 56(%esp) +faddp %st(0),%st(3) +fsubl crypto_scalarmult_curve25519_athlon_alpha179 +fldl 0(%esp) +fldl 8(%esp) +fxch %st(2) +fadd %st(0),%st(4) +fsubrp %st(0),%st(3) +fldl crypto_scalarmult_curve25519_athlon_alpha204 +fadd %st(4),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha204 +fadd %st(0),%st(1) +fsubrp %st(0),%st(4) +fldl crypto_scalarmult_curve25519_athlon_alpha230 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha230 +fsubr %st(0),%st(1) +faddp %st(0),%st(2) +fxch %st(2) +fstpl 112(%edx) +fxch %st(2) +fstpl 120(%edx) +fstpl -128(%ecx) +fstpl -120(%ecx) +fldl 80(%edx) +fmull 40(%ecx) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl -104(%ecx) +fmull 32(%ecx) +faddp %st(0),%st(1) +fldl -96(%ecx) +fmull 24(%ecx) +faddp %st(0),%st(1) +fldl -104(%ecx) +fmull 40(%ecx) +fldl -88(%ecx) +fmull 16(%ecx) +faddp %st(0),%st(2) +fldl -96(%ecx) +fmull 32(%ecx) +faddp %st(0),%st(1) +fldl -80(%ecx) +fmull 8(%ecx) +faddp %st(0),%st(2) +fldl -88(%ecx) +fmull 24(%ecx) +faddp %st(0),%st(1) +fldl -16(%edx) +fmull 0(%ecx) +faddp %st(0),%st(2) +fldl -80(%ecx) +fmull 16(%ecx) +faddp %st(0),%st(1) +fldl -96(%ecx) +fmull 40(%ecx) +fldl -8(%edx) +fmull -8(%ecx) +faddp %st(0),%st(3) +fldl -16(%edx) +fmull 8(%ecx) +faddp %st(0),%st(2) +fldl -88(%ecx) +fmull 32(%ecx) +faddp %st(0),%st(1) +fldl 0(%edx) +fmull -16(%ecx) +faddp %st(0),%st(3) +fldl -8(%edx) +fmull 0(%ecx) +faddp %st(0),%st(2) +fldl -80(%ecx) +fmull 24(%ecx) +faddp %st(0),%st(1) +fldl 8(%edx) +fmull -24(%ecx) +faddp %st(0),%st(3) +fldl 0(%edx) +fmull -8(%ecx) +faddp %st(0),%st(2) +fldl -16(%edx) +fmull 16(%ecx) +faddp %st(0),%st(1) +fldl 16(%edx) +fmull -32(%ecx) +faddp %st(0),%st(3) +fldl 8(%edx) +fmull -16(%ecx) +faddp %st(0),%st(2) +fldl -8(%edx) +fmull 8(%ecx) +faddp %st(0),%st(1) +fldl -88(%ecx) +fmull 40(%ecx) +fldl 0(%edx) +fmull 0(%ecx) +faddp %st(0),%st(2) +fldl 16(%edx) +fmull -24(%ecx) +faddp %st(0),%st(3) +fldl crypto_scalarmult_curve25519_athlon_alpha230 +fadd %st(4),%st(0) +fldl -80(%ecx) +fmull 32(%ecx) +faddp %st(0),%st(2) +fldl 8(%edx) +fmull -8(%ecx) +faddp %st(0),%st(3) +fldl 80(%edx) +fmull -32(%ecx) +faddp %st(0),%st(4) +fsubl crypto_scalarmult_curve25519_athlon_alpha230 +fldl -16(%edx) +fmull 24(%ecx) +faddp %st(0),%st(2) +fldl -80(%ecx) +fmull 40(%ecx) +fldl 16(%edx) +fmull -16(%ecx) +faddp %st(0),%st(4) +fxch %st(1) +fadd %st(0),%st(4) +fldl -8(%edx) +fmull 16(%ecx) +faddp %st(0),%st(3) +fldl -16(%edx) +fmull 32(%ecx) +faddp %st(0),%st(2) +fsubrp %st(0),%st(5) +fldl crypto_scalarmult_curve25519_athlon_alpha255 +fadd %st(4),%st(0) +fldl 0(%edx) +fmull 8(%ecx) +faddp %st(0),%st(3) +fldl -8(%edx) +fmull 24(%ecx) +faddp %st(0),%st(2) +fldl 80(%edx) +fmull -24(%ecx) +faddp %st(0),%st(4) +fsubl crypto_scalarmult_curve25519_athlon_alpha255 +fldl 8(%edx) +fmull 0(%ecx) +faddp %st(0),%st(3) +fldl -16(%edx) +fmull 40(%ecx) +fldl 0(%edx) +fmull 16(%ecx) +faddp %st(0),%st(3) +fldl 16(%edx) +fmull -8(%ecx) +faddp %st(0),%st(4) +fxch %st(1) +fadd %st(0),%st(4) +fsubrp %st(0),%st(5) +fxch %st(5) +fstpl 0(%esp) +fldl -8(%edx) +fmull 32(%ecx) +faddp %st(0),%st(5) +fldl 8(%edx) +fmull 8(%ecx) +faddp %st(0),%st(1) +fldl 80(%edx) +fmull -16(%ecx) +faddp %st(0),%st(2) +fxch %st(2) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 0(%edx) +fmull 24(%ecx) +faddp %st(0),%st(5) +fldl 16(%edx) +fmull 0(%ecx) +faddp %st(0),%st(3) +fxch %st(1) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl -104(%ecx) +fmull -32(%ecx) +faddp %st(0),%st(2) +fxch %st(3) +fstpl 8(%esp) +fldl -8(%edx) +fmull 40(%ecx) +fldl 8(%edx) +fmull 16(%ecx) +faddp %st(0),%st(5) +fldl 80(%edx) +fmull -8(%ecx) +faddp %st(0),%st(3) +fldl -104(%ecx) +fmull -24(%ecx) +faddp %st(0),%st(4) +fldl crypto_scalarmult_curve25519_athlon_alpha26 +fadd %st(2),%st(0) +fldl 0(%edx) +fmull 32(%ecx) +faddp %st(0),%st(2) +fldl 16(%edx) +fmull 8(%ecx) +faddp %st(0),%st(6) +fxch %st(3) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl -96(%ecx) +fmull -32(%ecx) +faddp %st(0),%st(5) +fxch %st(3) +fsubl crypto_scalarmult_curve25519_athlon_alpha26 +fldl 8(%edx) +fmull 24(%ecx) +faddp %st(0),%st(2) +fldl 80(%edx) +fmull 0(%ecx) +faddp %st(0),%st(6) +fldl -104(%ecx) +fmull -16(%ecx) +faddp %st(0),%st(4) +fadd %st(0),%st(4) +fsubrp %st(0),%st(2) +fldl 0(%edx) +fmull 40(%ecx) +fldl 16(%edx) +fmull 16(%ecx) +faddp %st(0),%st(2) +fxch %st(5) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl -96(%ecx) +fmull -24(%ecx) +faddp %st(0),%st(4) +fldl crypto_scalarmult_curve25519_athlon_alpha51 +fadd %st(5),%st(0) +fldl 8(%edx) +fmull 32(%ecx) +faddp %st(0),%st(7) +fldl 80(%edx) +fmull 8(%ecx) +faddp %st(0),%st(3) +fldl -104(%ecx) +fmull -8(%ecx) +faddp %st(0),%st(2) +fldl -88(%ecx) +fmull -32(%ecx) +faddp %st(0),%st(5) +fsubl crypto_scalarmult_curve25519_athlon_alpha51 +fxch %st(3) +fstpl 64(%edx) +fldl 16(%edx) +fmull 24(%ecx) +faddp %st(0),%st(6) +fxch %st(1) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl -96(%ecx) +fmull -16(%ecx) +faddp %st(0),%st(2) +fxch %st(2) +fadd %st(0),%st(3) +fsubrp %st(0),%st(4) +fldl 8(%edx) +fmull 40(%ecx) +fldl 80(%edx) +fmull 16(%ecx) +faddp %st(0),%st(6) +fldl -104(%ecx) +fmull 0(%ecx) +faddp %st(0),%st(3) +fldl -88(%ecx) +fmull -24(%ecx) +faddp %st(0),%st(2) +fldl crypto_scalarmult_curve25519_athlon_alpha77 +fadd %st(4),%st(0) +fldl 16(%edx) +fmull 32(%ecx) +faddp %st(0),%st(2) +fxch %st(6) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl -96(%ecx) +fmull -8(%ecx) +faddp %st(0),%st(4) +fldl -80(%ecx) +fmull -32(%ecx) +faddp %st(0),%st(3) +fxch %st(6) +fsubl crypto_scalarmult_curve25519_athlon_alpha77 +fxch %st(5) +fstpl 72(%edx) +fldl 80(%edx) +fmull 24(%ecx) +faddp %st(0),%st(1) +fldl -104(%ecx) +fmull 8(%ecx) +faddp %st(0),%st(6) +fldl -88(%ecx) +fmull -16(%ecx) +faddp %st(0),%st(3) +fxch %st(4) +fadd %st(0),%st(1) +fsubrp %st(0),%st(3) +fldl 16(%edx) +fmull 40(%ecx) +fxch %st(4) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl -96(%ecx) +fmull 0(%ecx) +faddp %st(0),%st(6) +fldl -80(%ecx) +fmull -24(%ecx) +faddp %st(0),%st(3) +fldl crypto_scalarmult_curve25519_athlon_alpha102 +fadd %st(2),%st(0) +fldl 80(%edx) +fmull 32(%ecx) +faddp %st(0),%st(6) +fldl -104(%ecx) +fmull 16(%ecx) +faddp %st(0),%st(2) +fldl -88(%ecx) +fmull -8(%ecx) +faddp %st(0),%st(7) +fldl -16(%edx) +fmull -32(%ecx) +faddp %st(0),%st(4) +fsubl crypto_scalarmult_curve25519_athlon_alpha102 +fxch %st(4) +fstpl 80(%edx) +fxch %st(4) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl -96(%ecx) +fmull 8(%ecx) +faddp %st(0),%st(5) +fldl -80(%ecx) +fmull -16(%ecx) +faddp %st(0),%st(6) +fxch %st(3) +fadd %st(0),%st(2) +fsubrp %st(0),%st(1) +fldl -104(%ecx) +fmull 24(%ecx) +faddp %st(0),%st(3) +fldl -88(%ecx) +fmull 0(%ecx) +faddp %st(0),%st(4) +fldl -16(%edx) +fmull -24(%ecx) +faddp %st(0),%st(5) +fldl crypto_scalarmult_curve25519_athlon_alpha128 +fadd %st(2),%st(0) +fldl -96(%ecx) +fmull 16(%ecx) +faddp %st(0),%st(4) +fldl -80(%ecx) +fmull -8(%ecx) +faddp %st(0),%st(5) +fldl -8(%edx) +fmull -32(%ecx) +faddp %st(0),%st(6) +fsubl crypto_scalarmult_curve25519_athlon_alpha128 +fxch %st(1) +fstpl 88(%edx) +fldl -88(%ecx) +fmull 8(%ecx) +faddp %st(0),%st(3) +fldl -16(%edx) +fmull -16(%ecx) +faddp %st(0),%st(4) +fadd %st(0),%st(4) +fsubrp %st(0),%st(1) +fstpl -104(%ecx) +fldl -80(%ecx) +fmull 0(%ecx) +faddp %st(0),%st(1) +fldl -8(%edx) +fmull -24(%ecx) +faddp %st(0),%st(2) +fldl crypto_scalarmult_curve25519_athlon_alpha153 +fadd %st(3),%st(0) +fldl -16(%edx) +fmull -8(%ecx) +faddp %st(0),%st(2) +fldl 0(%edx) +fmull -32(%ecx) +faddp %st(0),%st(3) +fsubl crypto_scalarmult_curve25519_athlon_alpha153 +fldl -8(%edx) +fmull -16(%ecx) +faddp %st(0),%st(2) +fadd %st(0),%st(2) +fsubrp %st(0),%st(3) +fxch %st(2) +fstpl -96(%ecx) +fldl 0(%edx) +fmull -24(%ecx) +faddp %st(0),%st(2) +fldl crypto_scalarmult_curve25519_athlon_alpha179 +fadd %st(1),%st(0) +fldl 8(%edx) +fmull -32(%ecx) +faddp %st(0),%st(3) +fsubl crypto_scalarmult_curve25519_athlon_alpha179 +fldl 0(%esp) +fldl 8(%esp) +fxch %st(2) +fadd %st(0),%st(4) +fsubrp %st(0),%st(3) +fldl crypto_scalarmult_curve25519_athlon_alpha204 +fadd %st(4),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha204 +fadd %st(0),%st(1) +fsubrp %st(0),%st(4) +fldl crypto_scalarmult_curve25519_athlon_alpha230 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha230 +fsubr %st(0),%st(1) +faddp %st(0),%st(2) +fxch %st(2) +fstpl -88(%ecx) +fxch %st(2) +fstpl -80(%ecx) +fstpl -72(%ecx) +fstpl -64(%ecx) +fldl 136(%ecx) +fldl -120(%edx) +fldl 16(%esp) +fsubr %st(1),%st(0) +fmul %st(2),%st(0) +fsubr %st(0),%st(1) +faddl 16(%esp) +fxch %st(1) +fstpl -16(%edx) +fstpl 0(%esp) +fldl -112(%edx) +fldl 24(%esp) +fsubr %st(1),%st(0) +fmul %st(2),%st(0) +fsubr %st(0),%st(1) +faddl 24(%esp) +fxch %st(1) +fstpl -8(%edx) +fstpl 8(%esp) +fldl -104(%edx) +fldl 32(%esp) +fsubr %st(1),%st(0) +fmul %st(2),%st(0) +fsubr %st(0),%st(1) +faddl 32(%esp) +fxch %st(1) +fstpl 0(%edx) +fstpl 16(%esp) +fldl 40(%edx) +fldl 40(%esp) +fsubr %st(1),%st(0) +fmul %st(2),%st(0) +fsubr %st(0),%st(1) +faddl 40(%esp) +fxch %st(1) +fstpl 8(%edx) +fstpl 24(%esp) +fldl -96(%edx) +fldl 48(%esp) +fsubr %st(1),%st(0) +fmul %st(2),%st(0) +fsubr %st(0),%st(1) +faddl 48(%esp) +fxch %st(1) +fstpl 16(%edx) +fstpl 32(%esp) +fldl -88(%edx) +fldl 24(%edx) +fsubr %st(1),%st(0) +fmul %st(2),%st(0) +fsubr %st(0),%st(1) +faddl 24(%edx) +fxch %st(1) +fstpl 24(%edx) +fstpl 40(%esp) +fldl -80(%edx) +fldl 32(%edx) +fsubr %st(1),%st(0) +fmul %st(2),%st(0) +fsubr %st(0),%st(1) +faddl 32(%edx) +fxch %st(1) +fstpl 32(%edx) +fstpl 48(%esp) +fldl -72(%edx) +fldl 48(%edx) +fsubr %st(1),%st(0) +fmul %st(2),%st(0) +fsubr %st(0),%st(1) +faddl 48(%edx) +fxch %st(1) +fstpl 40(%edx) +fstpl -120(%edx) +fldl -64(%edx) +fldl 56(%edx) +fsubr %st(1),%st(0) +fmul %st(2),%st(0) +fsubr %st(0),%st(1) +faddl 56(%edx) +fxch %st(1) +fstpl 48(%edx) +fstpl -112(%edx) +fldl -56(%edx) +fldl -112(%ecx) +fsubr %st(1),%st(0) +fmul %st(2),%st(0) +fsubr %st(0),%st(1) +faddl -112(%ecx) +fxch %st(1) +fstpl 56(%edx) +fstpl -104(%edx) +fldl -48(%edx) +fldl 64(%edx) +fsubr %st(1),%st(0) +fmul %st(2),%st(0) +fsubr %st(0),%st(1) +faddl 64(%edx) +fxch %st(1) +fstpl 64(%edx) +fstpl -96(%edx) +fldl -40(%edx) +fldl 72(%edx) +fsubr %st(1),%st(0) +fmul %st(2),%st(0) +fsubr %st(0),%st(1) +faddl 72(%edx) +fxch %st(1) +fstpl 72(%edx) +fstpl -88(%edx) +fldl -32(%edx) +fldl 80(%edx) +fsubr %st(1),%st(0) +fmul %st(2),%st(0) +fsubr %st(0),%st(1) +faddl 80(%edx) +fxch %st(1) +fstpl 80(%edx) +fstpl -80(%edx) +fldl -24(%edx) +fldl 88(%edx) +fsubr %st(1),%st(0) +fmul %st(2),%st(0) +fsubr %st(0),%st(1) +faddl 88(%edx) +fxch %st(1) +fstpl 88(%edx) +fstpl -72(%edx) +fldl 96(%edx) +fldl -104(%ecx) +fsubr %st(1),%st(0) +fmul %st(2),%st(0) +fsubr %st(0),%st(1) +faddl -104(%ecx) +fxch %st(1) +fstpl 96(%edx) +fstpl -64(%edx) +fldl 104(%edx) +fldl -96(%ecx) +fsubr %st(1),%st(0) +fmul %st(2),%st(0) +fsubr %st(0),%st(1) +faddl -96(%ecx) +fxch %st(1) +fstpl 104(%edx) +fstpl -56(%edx) +fldl 112(%edx) +fldl -88(%ecx) +fsubr %st(1),%st(0) +fmul %st(2),%st(0) +fsubr %st(0),%st(1) +faddl -88(%ecx) +fxch %st(1) +fstpl 112(%edx) +fstpl -48(%edx) +fldl 120(%edx) +fldl -80(%ecx) +fsubr %st(1),%st(0) +fmul %st(2),%st(0) +fsubr %st(0),%st(1) +faddl -80(%ecx) +fxch %st(1) +fstpl 120(%edx) +fstpl -40(%edx) +fldl -128(%ecx) +fldl -72(%ecx) +fsubr %st(1),%st(0) +fmul %st(2),%st(0) +fsubr %st(0),%st(1) +faddl -72(%ecx) +fxch %st(1) +fstpl -128(%ecx) +fstpl -32(%edx) +fldl -120(%ecx) +fldl -64(%ecx) +fsubr %st(1),%st(0) +fmulp %st(0),%st(2) +fsub %st(1),%st(0) +fxch %st(1) +faddl -64(%ecx) +fxch %st(1) +fstpl -120(%ecx) +fstpl -24(%edx) +movl 180(%ecx),%esi +movl 184(%ecx),%ebp +sub $1,%ebp +ja ._morebits +movl 188(%ecx),%edi +sub $4,%edi +jb ._done +movl (%ebx,%edi),%esi +mov $32,%ebp +jmp ._morebytes +._done: +movl 4(%esp,%eax),%eax +fldl 0(%esp) +fstpl 0(%eax) +fldl 8(%esp) +fstpl 8(%eax) +fldl 16(%esp) +fstpl 16(%eax) +fldl 24(%esp) +fstpl 24(%eax) +fldl 32(%esp) +fstpl 32(%eax) +fldl 40(%esp) +fstpl 40(%eax) +fldl 48(%esp) +fstpl 48(%eax) +fldl -120(%edx) +fstpl 56(%eax) +fldl -112(%edx) +fstpl 64(%eax) +fldl -104(%edx) +fstpl 72(%eax) +fldl -96(%edx) +fstpl 80(%eax) +fldl -88(%edx) +fstpl 88(%eax) +fldl -80(%edx) +fstpl 96(%eax) +fldl -72(%edx) +fstpl 104(%eax) +fldl -64(%edx) +fstpl 112(%eax) +fldl -56(%edx) +fstpl 120(%eax) +fldl -48(%edx) +fstpl 128(%eax) +fldl -40(%edx) +fstpl 136(%eax) +fldl -32(%edx) +fstpl 144(%eax) +fldl -24(%edx) +fstpl 152(%eax) +movl 160(%ecx),%eax +movl 164(%ecx),%ebx +movl 168(%ecx),%esi +movl 172(%ecx),%edi +movl 176(%ecx),%ebp +add %eax,%esp +ret diff --git a/nacl/crypto_scalarmult/curve25519/athlon/mult.s b/nacl/crypto_scalarmult/curve25519/athlon/mult.s new file mode 100644 index 00000000..16f0e908 --- /dev/null +++ b/nacl/crypto_scalarmult/curve25519/athlon/mult.s @@ -0,0 +1,410 @@ +.text +.p2align 5 +.globl _crypto_scalarmult_curve25519_athlon_mult +.globl crypto_scalarmult_curve25519_athlon_mult +_crypto_scalarmult_curve25519_athlon_mult: +crypto_scalarmult_curve25519_athlon_mult: +mov %esp,%eax +and $31,%eax +add $32,%eax +sub %eax,%esp +movl %ebp,0(%esp) +movl 4(%esp,%eax),%ecx +movl 8(%esp,%eax),%edx +movl 12(%esp,%eax),%ebp +fldl 72(%edx) +fmull 72(%ebp) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 0(%edx) +fmull 64(%ebp) +faddp %st(0),%st(1) +fldl 8(%edx) +fmull 56(%ebp) +faddp %st(0),%st(1) +fldl 0(%edx) +fmull 72(%ebp) +fldl 16(%edx) +fmull 48(%ebp) +faddp %st(0),%st(2) +fldl 8(%edx) +fmull 64(%ebp) +faddp %st(0),%st(1) +fldl 24(%edx) +fmull 40(%ebp) +faddp %st(0),%st(2) +fldl 16(%edx) +fmull 56(%ebp) +faddp %st(0),%st(1) +fldl 32(%edx) +fmull 32(%ebp) +faddp %st(0),%st(2) +fldl 24(%edx) +fmull 48(%ebp) +faddp %st(0),%st(1) +fldl 8(%edx) +fmull 72(%ebp) +fldl 40(%edx) +fmull 24(%ebp) +faddp %st(0),%st(3) +fldl 32(%edx) +fmull 40(%ebp) +faddp %st(0),%st(2) +fldl 16(%edx) +fmull 64(%ebp) +faddp %st(0),%st(1) +fldl 48(%edx) +fmull 16(%ebp) +faddp %st(0),%st(3) +fldl 40(%edx) +fmull 32(%ebp) +faddp %st(0),%st(2) +fldl 24(%edx) +fmull 56(%ebp) +faddp %st(0),%st(1) +fldl 56(%edx) +fmull 8(%ebp) +faddp %st(0),%st(3) +fldl 48(%edx) +fmull 24(%ebp) +faddp %st(0),%st(2) +fldl 32(%edx) +fmull 48(%ebp) +faddp %st(0),%st(1) +fldl 64(%edx) +fmull 0(%ebp) +faddp %st(0),%st(3) +fldl 56(%edx) +fmull 16(%ebp) +faddp %st(0),%st(2) +fldl 40(%edx) +fmull 40(%ebp) +faddp %st(0),%st(1) +fldl 16(%edx) +fmull 72(%ebp) +fldl 48(%edx) +fmull 32(%ebp) +faddp %st(0),%st(2) +fldl 64(%edx) +fmull 8(%ebp) +faddp %st(0),%st(3) +fldl crypto_scalarmult_curve25519_athlon_alpha230 +fadd %st(4),%st(0) +fldl 24(%edx) +fmull 64(%ebp) +faddp %st(0),%st(2) +fldl 56(%edx) +fmull 24(%ebp) +faddp %st(0),%st(3) +fldl 72(%edx) +fmull 0(%ebp) +faddp %st(0),%st(4) +fsubl crypto_scalarmult_curve25519_athlon_alpha230 +fldl 32(%edx) +fmull 56(%ebp) +faddp %st(0),%st(2) +fldl 24(%edx) +fmull 72(%ebp) +fldl 64(%edx) +fmull 16(%ebp) +faddp %st(0),%st(4) +fxch %st(1) +fadd %st(0),%st(4) +fldl 40(%edx) +fmull 48(%ebp) +faddp %st(0),%st(3) +fldl 32(%edx) +fmull 64(%ebp) +faddp %st(0),%st(2) +fsubrp %st(0),%st(5) +fldl crypto_scalarmult_curve25519_athlon_alpha255 +fadd %st(4),%st(0) +fldl 48(%edx) +fmull 40(%ebp) +faddp %st(0),%st(3) +fldl 40(%edx) +fmull 56(%ebp) +faddp %st(0),%st(2) +fldl 72(%edx) +fmull 8(%ebp) +faddp %st(0),%st(4) +fsubl crypto_scalarmult_curve25519_athlon_alpha255 +fldl 56(%edx) +fmull 32(%ebp) +faddp %st(0),%st(3) +fldl 32(%edx) +fmull 72(%ebp) +fldl 48(%edx) +fmull 48(%ebp) +faddp %st(0),%st(3) +fldl 64(%edx) +fmull 24(%ebp) +faddp %st(0),%st(4) +fxch %st(1) +fadd %st(0),%st(4) +fsubrp %st(0),%st(5) +fxch %st(5) +fstpl 64(%ecx) +fldl 40(%edx) +fmull 64(%ebp) +faddp %st(0),%st(5) +fldl 56(%edx) +fmull 40(%ebp) +faddp %st(0),%st(1) +fldl 72(%edx) +fmull 16(%ebp) +faddp %st(0),%st(2) +fxch %st(2) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 48(%edx) +fmull 56(%ebp) +faddp %st(0),%st(5) +fldl 64(%edx) +fmull 32(%ebp) +faddp %st(0),%st(3) +fxch %st(1) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 0(%edx) +fmull 0(%ebp) +faddp %st(0),%st(2) +fxch %st(3) +fstpl 72(%ecx) +fldl 40(%edx) +fmull 72(%ebp) +fldl 56(%edx) +fmull 48(%ebp) +faddp %st(0),%st(5) +fldl 72(%edx) +fmull 24(%ebp) +faddp %st(0),%st(3) +fldl 0(%edx) +fmull 8(%ebp) +faddp %st(0),%st(4) +fldl crypto_scalarmult_curve25519_athlon_alpha26 +fadd %st(2),%st(0) +fldl 48(%edx) +fmull 64(%ebp) +faddp %st(0),%st(2) +fldl 64(%edx) +fmull 40(%ebp) +faddp %st(0),%st(6) +fxch %st(3) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 8(%edx) +fmull 0(%ebp) +faddp %st(0),%st(5) +fxch %st(3) +fsubl crypto_scalarmult_curve25519_athlon_alpha26 +fldl 56(%edx) +fmull 56(%ebp) +faddp %st(0),%st(2) +fldl 72(%edx) +fmull 32(%ebp) +faddp %st(0),%st(6) +fldl 0(%edx) +fmull 16(%ebp) +faddp %st(0),%st(4) +fadd %st(0),%st(4) +fsubrp %st(0),%st(2) +fldl 48(%edx) +fmull 72(%ebp) +fldl 64(%edx) +fmull 48(%ebp) +faddp %st(0),%st(2) +fxch %st(5) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 8(%edx) +fmull 8(%ebp) +faddp %st(0),%st(4) +fldl crypto_scalarmult_curve25519_athlon_alpha51 +fadd %st(5),%st(0) +fldl 56(%edx) +fmull 64(%ebp) +faddp %st(0),%st(7) +fldl 72(%edx) +fmull 40(%ebp) +faddp %st(0),%st(3) +fldl 0(%edx) +fmull 24(%ebp) +faddp %st(0),%st(2) +fldl 16(%edx) +fmull 0(%ebp) +faddp %st(0),%st(5) +fsubl crypto_scalarmult_curve25519_athlon_alpha51 +fxch %st(3) +fstpl 0(%ecx) +fldl 64(%edx) +fmull 56(%ebp) +faddp %st(0),%st(6) +fxch %st(1) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 8(%edx) +fmull 16(%ebp) +faddp %st(0),%st(2) +fxch %st(2) +fadd %st(0),%st(3) +fsubrp %st(0),%st(4) +fldl 56(%edx) +fmull 72(%ebp) +fldl 72(%edx) +fmull 48(%ebp) +faddp %st(0),%st(6) +fldl 0(%edx) +fmull 32(%ebp) +faddp %st(0),%st(3) +fldl 16(%edx) +fmull 8(%ebp) +faddp %st(0),%st(2) +fldl crypto_scalarmult_curve25519_athlon_alpha77 +fadd %st(4),%st(0) +fldl 64(%edx) +fmull 64(%ebp) +faddp %st(0),%st(2) +fxch %st(6) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 8(%edx) +fmull 24(%ebp) +faddp %st(0),%st(4) +fldl 24(%edx) +fmull 0(%ebp) +faddp %st(0),%st(3) +fxch %st(6) +fsubl crypto_scalarmult_curve25519_athlon_alpha77 +fxch %st(5) +fstpl 8(%ecx) +fldl 72(%edx) +fmull 56(%ebp) +faddp %st(0),%st(1) +fldl 0(%edx) +fmull 40(%ebp) +faddp %st(0),%st(6) +fldl 16(%edx) +fmull 16(%ebp) +faddp %st(0),%st(3) +fxch %st(4) +fadd %st(0),%st(1) +fsubrp %st(0),%st(3) +fldl 64(%edx) +fmull 72(%ebp) +fxch %st(4) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 8(%edx) +fmull 32(%ebp) +faddp %st(0),%st(6) +fldl 24(%edx) +fmull 8(%ebp) +faddp %st(0),%st(3) +fldl crypto_scalarmult_curve25519_athlon_alpha102 +fadd %st(2),%st(0) +fldl 72(%edx) +fmull 64(%ebp) +faddp %st(0),%st(6) +fldl 0(%edx) +fmull 48(%ebp) +faddp %st(0),%st(2) +fldl 16(%edx) +fmull 24(%ebp) +faddp %st(0),%st(7) +fldl 32(%edx) +fmull 0(%ebp) +faddp %st(0),%st(4) +fsubl crypto_scalarmult_curve25519_athlon_alpha102 +fxch %st(4) +fstpl 16(%ecx) +fxch %st(4) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 8(%edx) +fmull 40(%ebp) +faddp %st(0),%st(5) +fldl 24(%edx) +fmull 16(%ebp) +faddp %st(0),%st(6) +fxch %st(3) +fadd %st(0),%st(2) +fsubrp %st(0),%st(1) +fldl 0(%edx) +fmull 56(%ebp) +faddp %st(0),%st(3) +fldl 16(%edx) +fmull 32(%ebp) +faddp %st(0),%st(4) +fldl 32(%edx) +fmull 8(%ebp) +faddp %st(0),%st(5) +fldl crypto_scalarmult_curve25519_athlon_alpha128 +fadd %st(2),%st(0) +fldl 8(%edx) +fmull 48(%ebp) +faddp %st(0),%st(4) +fldl 24(%edx) +fmull 24(%ebp) +faddp %st(0),%st(5) +fldl 40(%edx) +fmull 0(%ebp) +faddp %st(0),%st(6) +fsubl crypto_scalarmult_curve25519_athlon_alpha128 +fxch %st(1) +fstpl 24(%ecx) +fldl 16(%edx) +fmull 40(%ebp) +faddp %st(0),%st(3) +fldl 32(%edx) +fmull 16(%ebp) +faddp %st(0),%st(4) +fadd %st(0),%st(4) +fsubrp %st(0),%st(1) +fstpl 32(%ecx) +fldl 24(%edx) +fmull 32(%ebp) +faddp %st(0),%st(1) +fldl 40(%edx) +fmull 8(%ebp) +faddp %st(0),%st(2) +fldl crypto_scalarmult_curve25519_athlon_alpha153 +fadd %st(3),%st(0) +fldl 32(%edx) +fmull 24(%ebp) +faddp %st(0),%st(2) +fldl 48(%edx) +fmull 0(%ebp) +faddp %st(0),%st(3) +fsubl crypto_scalarmult_curve25519_athlon_alpha153 +fldl 40(%edx) +fmull 16(%ebp) +faddp %st(0),%st(2) +fadd %st(0),%st(2) +fsubrp %st(0),%st(3) +fxch %st(2) +fstpl 40(%ecx) +fldl 48(%edx) +fmull 8(%ebp) +faddp %st(0),%st(2) +fldl crypto_scalarmult_curve25519_athlon_alpha179 +fadd %st(1),%st(0) +fldl 56(%edx) +fmull 0(%ebp) +faddp %st(0),%st(3) +fsubl crypto_scalarmult_curve25519_athlon_alpha179 +fldl 64(%ecx) +fldl 72(%ecx) +fxch %st(2) +fadd %st(0),%st(4) +fsubrp %st(0),%st(3) +fldl crypto_scalarmult_curve25519_athlon_alpha204 +fadd %st(4),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha204 +fadd %st(0),%st(1) +fsubrp %st(0),%st(4) +fldl crypto_scalarmult_curve25519_athlon_alpha230 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha230 +fsubr %st(0),%st(1) +faddp %st(0),%st(2) +fxch %st(2) +fstpl 48(%ecx) +fxch %st(2) +fstpl 56(%ecx) +fstpl 64(%ecx) +fstpl 72(%ecx) +movl 0(%esp),%ebp +add %eax,%esp +ret diff --git a/nacl/crypto_scalarmult/curve25519/athlon/smult.c b/nacl/crypto_scalarmult/curve25519/athlon/smult.c new file mode 100644 index 00000000..157f1e6c --- /dev/null +++ b/nacl/crypto_scalarmult/curve25519/athlon/smult.c @@ -0,0 +1,91 @@ +#include "crypto_scalarmult.h" + +#define mult crypto_scalarmult_curve25519_athlon_mult +#define square crypto_scalarmult_curve25519_athlon_square + +void crypto_scalarmult_curve25519_athlon_recip(double out[10],const double z[10]) +{ + double z2[10]; + double z9[10]; + double z11[10]; + double z2_5_0[10]; + double z2_10_0[10]; + double z2_20_0[10]; + double z2_50_0[10]; + double z2_100_0[10]; + double t0[10]; + double t1[10]; + int i; + + /* 2 */ square(z2,z); + /* 4 */ square(t1,z2); + /* 8 */ square(t0,t1); + /* 9 */ mult(z9,t0,z); + /* 11 */ mult(z11,z9,z2); + /* 22 */ square(t0,z11); + /* 2^5 - 2^0 = 31 */ mult(z2_5_0,t0,z9); + + /* 2^6 - 2^1 */ square(t0,z2_5_0); + /* 2^7 - 2^2 */ square(t1,t0); + /* 2^8 - 2^3 */ square(t0,t1); + /* 2^9 - 2^4 */ square(t1,t0); + /* 2^10 - 2^5 */ square(t0,t1); + /* 2^10 - 2^0 */ mult(z2_10_0,t0,z2_5_0); + + /* 2^11 - 2^1 */ square(t0,z2_10_0); + /* 2^12 - 2^2 */ square(t1,t0); + /* 2^20 - 2^10 */ for (i = 2;i < 10;i += 2) { square(t0,t1); square(t1,t0); } + /* 2^20 - 2^0 */ mult(z2_20_0,t1,z2_10_0); + + /* 2^21 - 2^1 */ square(t0,z2_20_0); + /* 2^22 - 2^2 */ square(t1,t0); + /* 2^40 - 2^20 */ for (i = 2;i < 20;i += 2) { square(t0,t1); square(t1,t0); } + /* 2^40 - 2^0 */ mult(t0,t1,z2_20_0); + + /* 2^41 - 2^1 */ square(t1,t0); + /* 2^42 - 2^2 */ square(t0,t1); + /* 2^50 - 2^10 */ for (i = 2;i < 10;i += 2) { square(t1,t0); square(t0,t1); } + /* 2^50 - 2^0 */ mult(z2_50_0,t0,z2_10_0); + + /* 2^51 - 2^1 */ square(t0,z2_50_0); + /* 2^52 - 2^2 */ square(t1,t0); + /* 2^100 - 2^50 */ for (i = 2;i < 50;i += 2) { square(t0,t1); square(t1,t0); } + /* 2^100 - 2^0 */ mult(z2_100_0,t1,z2_50_0); + + /* 2^101 - 2^1 */ square(t1,z2_100_0); + /* 2^102 - 2^2 */ square(t0,t1); + /* 2^200 - 2^100 */ for (i = 2;i < 100;i += 2) { square(t1,t0); square(t0,t1); } + /* 2^200 - 2^0 */ mult(t1,t0,z2_100_0); + + /* 2^201 - 2^1 */ square(t0,t1); + /* 2^202 - 2^2 */ square(t1,t0); + /* 2^250 - 2^50 */ for (i = 2;i < 50;i += 2) { square(t0,t1); square(t1,t0); } + /* 2^250 - 2^0 */ mult(t0,t1,z2_50_0); + + /* 2^251 - 2^1 */ square(t1,t0); + /* 2^252 - 2^2 */ square(t0,t1); + /* 2^253 - 2^3 */ square(t1,t0); + /* 2^254 - 2^4 */ square(t0,t1); + /* 2^255 - 2^5 */ square(t1,t0); + /* 2^255 - 21 */ mult(out,t1,z11); +} + +int crypto_scalarmult(unsigned char *q, + const unsigned char *n, + const unsigned char *p) +{ + double work[30]; + unsigned char e[32]; + int i; + for (i = 0;i < 32;++i) e[i] = n[i]; + e[0] &= 248; + e[31] &= 127; + e[31] |= 64; + crypto_scalarmult_curve25519_athlon_init(); + crypto_scalarmult_curve25519_athlon_todouble(work,p); + crypto_scalarmult_curve25519_athlon_mainloop(work,e); + crypto_scalarmult_curve25519_athlon_recip(work + 10,work + 10); + mult(work + 20,work,work + 10); + crypto_scalarmult_curve25519_athlon_fromdouble(q,work + 20); + return 0; +} diff --git a/nacl/crypto_scalarmult/curve25519/athlon/square.s b/nacl/crypto_scalarmult/curve25519/athlon/square.s new file mode 100644 index 00000000..754def78 --- /dev/null +++ b/nacl/crypto_scalarmult/curve25519/athlon/square.s @@ -0,0 +1,298 @@ +.text +.p2align 5 +.globl _crypto_scalarmult_curve25519_athlon_square +.globl crypto_scalarmult_curve25519_athlon_square +_crypto_scalarmult_curve25519_athlon_square: +crypto_scalarmult_curve25519_athlon_square: +mov %esp,%eax +and $31,%eax +add $64,%eax +sub %eax,%esp +movl 8(%esp,%eax),%edx +movl 4(%esp,%eax),%ecx +fldl 72(%edx) +fmul %st(0),%st(0) +fldl 0(%edx) +fadd %st(0),%st(0) +fldl 8(%edx) +fadd %st(0),%st(0) +fldl 16(%edx) +fadd %st(0),%st(0) +fldl 56(%edx) +fxch %st(4) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 72(%edx) +fmul %st(4),%st(0) +fldl 64(%edx) +fmul %st(4),%st(0) +faddp %st(0),%st(1) +fxch %st(4) +fstl 0(%esp) +fxch %st(3) +fstl 8(%esp) +fxch %st(3) +fmull 64(%edx) +faddp %st(0),%st(1) +fldl 48(%edx) +fxch %st(5) +fmul %st(0),%st(3) +fxch %st(3) +faddp %st(0),%st(1) +fxch %st(2) +fadd %st(0),%st(0) +fldl 56(%edx) +fmul %st(2),%st(0) +faddp %st(0),%st(4) +fxch %st(1) +fstl 16(%esp) +fldl 40(%edx) +fxch %st(5) +fmul %st(0),%st(1) +fxch %st(1) +faddp %st(0),%st(3) +fadd %st(0),%st(0) +fstpl 48(%esp) +fldl 24(%edx) +fadd %st(0),%st(0) +fstl 24(%esp) +fldl 48(%edx) +fmul %st(1),%st(0) +faddp %st(0),%st(4) +fmul %st(4),%st(0) +faddp %st(0),%st(2) +fxch %st(3) +fadd %st(0),%st(0) +fstpl 40(%esp) +fldl 32(%edx) +fmul %st(0),%st(0) +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha230 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha230 +fsubr %st(0),%st(1) +fldl 8(%esp) +fldl 72(%edx) +fmul %st(0),%st(1) +fldl 16(%esp) +fmul %st(0),%st(1) +fldl 64(%edx) +fmul %st(0),%st(1) +fxch %st(1) +faddp %st(0),%st(3) +fldl 24(%esp) +fmul %st(0),%st(1) +fxch %st(1) +faddp %st(0),%st(2) +fldl 32(%edx) +fadd %st(0),%st(0) +fstl 32(%esp) +fmull 40(%edx) +faddp %st(0),%st(6) +fxch %st(3) +faddp %st(0),%st(5) +fldl crypto_scalarmult_curve25519_athlon_alpha255 +fadd %st(5),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha255 +fsubr %st(0),%st(5) +fldl 56(%edx) +fmul %st(0),%st(4) +fxch %st(4) +faddp %st(0),%st(3) +fldl 32(%esp) +fmul %st(0),%st(4) +fxch %st(4) +faddp %st(0),%st(2) +fldl 48(%edx) +fmul %st(0),%st(4) +fxch %st(4) +faddp %st(0),%st(3) +fxch %st(3) +fmull 40(%esp) +faddp %st(0),%st(1) +fxch %st(3) +fstpl 64(%ecx) +fldl 40(%edx) +fmul %st(0),%st(0) +faddp %st(0),%st(1) +fxch %st(2) +fmull crypto_scalarmult_curve25519_athlon_scale +fxch %st(3) +fstpl 72(%ecx) +faddp %st(0),%st(1) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 24(%esp) +fmull 72(%edx) +fldl 0(%edx) +fmul %st(0),%st(0) +faddp %st(0),%st(2) +fldl 32(%esp) +fmull 64(%edx) +faddp %st(0),%st(1) +fldl 0(%esp) +fmull 8(%edx) +faddp %st(0),%st(3) +fldl 40(%esp) +fmull 56(%edx) +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha26 +fadd %st(2),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha26 +fsubr %st(0),%st(2) +faddp %st(0),%st(3) +fldl crypto_scalarmult_curve25519_athlon_alpha51 +fadd %st(3),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha51 +fsubr %st(0),%st(3) +fldl 48(%edx) +fmul %st(0),%st(0) +faddp %st(0),%st(2) +fxch %st(1) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 0(%esp) +fmull 16(%edx) +faddp %st(0),%st(1) +fldl 8(%edx) +fmul %st(0),%st(0) +faddp %st(0),%st(1) +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha77 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha77 +fsubr %st(0),%st(1) +fxch %st(2) +fstpl 0(%ecx) +fldl 32(%esp) +fmull 72(%edx) +fldl 40(%esp) +fmull 64(%edx) +faddp %st(0),%st(1) +fldl 48(%esp) +fmull 56(%edx) +faddp %st(0),%st(1) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 0(%esp) +fmull 24(%edx) +faddp %st(0),%st(1) +fldl 8(%esp) +fmull 16(%edx) +faddp %st(0),%st(1) +faddp %st(0),%st(2) +fldl crypto_scalarmult_curve25519_athlon_alpha102 +fadd %st(2),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha102 +fsubr %st(0),%st(2) +fxch %st(3) +fstpl 8(%ecx) +fldl 40(%esp) +fmull 72(%edx) +fldl 48(%esp) +fmull 64(%edx) +faddp %st(0),%st(1) +fldl 56(%edx) +fmul %st(0),%st(0) +faddp %st(0),%st(1) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 0(%esp) +fmull 32(%edx) +faddp %st(0),%st(1) +fldl 8(%esp) +fmull 24(%edx) +faddp %st(0),%st(1) +fldl 16(%edx) +fmul %st(0),%st(0) +faddp %st(0),%st(1) +faddp %st(0),%st(3) +fldl crypto_scalarmult_curve25519_athlon_alpha128 +fadd %st(3),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha128 +fsubr %st(0),%st(3) +fxch %st(1) +fstpl 16(%ecx) +fldl 48(%esp) +fldl 72(%edx) +fmul %st(0),%st(1) +fmul %st(5),%st(0) +fxch %st(5) +fmull 64(%edx) +faddp %st(0),%st(1) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 0(%esp) +fmull 40(%edx) +faddp %st(0),%st(1) +fldl 8(%esp) +fmull 32(%edx) +faddp %st(0),%st(1) +fldl 16(%esp) +fmull 24(%edx) +faddp %st(0),%st(1) +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha153 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha153 +fsubr %st(0),%st(1) +fxch %st(2) +fstpl 24(%ecx) +fldl 64(%edx) +fmul %st(0),%st(0) +faddp %st(0),%st(4) +fxch %st(3) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 0(%esp) +fmull 48(%edx) +faddp %st(0),%st(1) +fldl 8(%esp) +fmull 40(%edx) +faddp %st(0),%st(1) +fldl 16(%esp) +fmull 32(%edx) +faddp %st(0),%st(1) +fldl 24(%edx) +fmul %st(0),%st(0) +faddp %st(0),%st(1) +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha179 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha179 +fsubr %st(0),%st(1) +fldl 64(%edx) +fadd %st(0),%st(0) +fmull 72(%edx) +fmull crypto_scalarmult_curve25519_athlon_scale +fldl 0(%esp) +fmull 56(%edx) +faddp %st(0),%st(1) +fldl 8(%esp) +fmull 48(%edx) +faddp %st(0),%st(1) +fldl 16(%esp) +fmull 40(%edx) +faddp %st(0),%st(1) +fldl 24(%esp) +fmull 32(%edx) +faddp %st(0),%st(1) +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha204 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha204 +fsubr %st(0),%st(1) +fldl 64(%ecx) +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha230 +fadd %st(1),%st(0) +fldl 72(%ecx) +fxch %st(1) +fsubl crypto_scalarmult_curve25519_athlon_alpha230 +fsubr %st(0),%st(2) +faddp %st(0),%st(1) +fxch %st(4) +fstpl 32(%ecx) +fxch %st(4) +fstpl 40(%ecx) +fxch %st(1) +fstpl 48(%ecx) +fstpl 56(%ecx) +fxch %st(1) +fstpl 64(%ecx) +fstpl 72(%ecx) +add %eax,%esp +ret diff --git a/nacl/crypto_scalarmult/curve25519/athlon/todouble.s b/nacl/crypto_scalarmult/curve25519/athlon/todouble.s new file mode 100644 index 00000000..c37aa447 --- /dev/null +++ b/nacl/crypto_scalarmult/curve25519/athlon/todouble.s @@ -0,0 +1,144 @@ +.text +.p2align 5 +.globl _crypto_scalarmult_curve25519_athlon_todouble +.globl crypto_scalarmult_curve25519_athlon_todouble +_crypto_scalarmult_curve25519_athlon_todouble: +crypto_scalarmult_curve25519_athlon_todouble: +mov %esp,%eax +and $31,%eax +add $96,%eax +sub %eax,%esp +movl 8(%esp,%eax),%ecx +movl 0(%ecx),%edx +movl $0x43300000,4(%esp) +movl %edx,0(%esp) +movl 4(%ecx),%edx +and $0xffffff,%edx +movl $0x45300000,12(%esp) +movl %edx,8(%esp) +movl 7(%ecx),%edx +and $0xffffff,%edx +movl $0x46b00000,20(%esp) +movl %edx,16(%esp) +movl 10(%ecx),%edx +and $0xffffff,%edx +movl $0x48300000,28(%esp) +movl %edx,24(%esp) +movl 13(%ecx),%edx +and $0xffffff,%edx +movl $0x49b00000,36(%esp) +movl %edx,32(%esp) +movl 16(%ecx),%edx +movl $0x4b300000,44(%esp) +movl %edx,40(%esp) +movl 20(%ecx),%edx +and $0xffffff,%edx +movl $0x4d300000,52(%esp) +movl %edx,48(%esp) +movl 23(%ecx),%edx +and $0xffffff,%edx +movl $0x4eb00000,60(%esp) +movl %edx,56(%esp) +movl 26(%ecx),%edx +and $0xffffff,%edx +movl $0x50300000,68(%esp) +movl %edx,64(%esp) +movl 28(%ecx),%ecx +shr $8,%ecx +and $0x7fffff,%ecx +movl $0x51b00000,76(%esp) +movl %ecx,72(%esp) +movl 4(%esp,%eax),%ecx +fldl 72(%esp) +fsubl crypto_scalarmult_curve25519_athlon_in9offset +fldl crypto_scalarmult_curve25519_athlon_alpha255 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha255 +fsubr %st(0),%st(1) +fldl 0(%esp) +fsubl crypto_scalarmult_curve25519_athlon_in0offset +fxch %st(1) +fmull crypto_scalarmult_curve25519_athlon_scale +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha26 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha26 +fsubr %st(0),%st(1) +fxch %st(1) +fstpl 0(%ecx) +fldl 8(%esp) +fsubl crypto_scalarmult_curve25519_athlon_in1offset +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha51 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha51 +fsubr %st(0),%st(1) +fxch %st(1) +fstpl 8(%ecx) +fldl 16(%esp) +fsubl crypto_scalarmult_curve25519_athlon_in2offset +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha77 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha77 +fsubr %st(0),%st(1) +fxch %st(1) +fstpl 16(%ecx) +fldl 24(%esp) +fsubl crypto_scalarmult_curve25519_athlon_in3offset +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha102 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha102 +fsubr %st(0),%st(1) +fxch %st(1) +fstpl 24(%ecx) +fldl 32(%esp) +fsubl crypto_scalarmult_curve25519_athlon_in4offset +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha128 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha128 +fsubr %st(0),%st(1) +fxch %st(1) +fstpl 32(%ecx) +fldl 40(%esp) +fsubl crypto_scalarmult_curve25519_athlon_in5offset +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha153 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha153 +fsubr %st(0),%st(1) +fxch %st(1) +fstpl 40(%ecx) +fldl 48(%esp) +fsubl crypto_scalarmult_curve25519_athlon_in6offset +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha179 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha179 +fsubr %st(0),%st(1) +fxch %st(1) +fstpl 48(%ecx) +fldl 56(%esp) +fsubl crypto_scalarmult_curve25519_athlon_in7offset +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha204 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha204 +fsubr %st(0),%st(1) +fxch %st(1) +fstpl 56(%ecx) +fldl 64(%esp) +fsubl crypto_scalarmult_curve25519_athlon_in8offset +faddp %st(0),%st(1) +fldl crypto_scalarmult_curve25519_athlon_alpha230 +fadd %st(1),%st(0) +fsubl crypto_scalarmult_curve25519_athlon_alpha230 +fsubr %st(0),%st(1) +fxch %st(1) +fstpl 64(%ecx) +faddp %st(0),%st(1) +fstpl 72(%ecx) +add %eax,%esp +ret diff --git a/nacl/crypto_scalarmult/curve25519/checksum b/nacl/crypto_scalarmult/curve25519/checksum new file mode 100644 index 00000000..ce2d395b --- /dev/null +++ b/nacl/crypto_scalarmult/curve25519/checksum @@ -0,0 +1 @@ +dacdae4a0f12353dfc66757f2fd1fff538fe6616115dace9afb8016a55be2a52 diff --git a/nacl/crypto_scalarmult/curve25519/donna_c64/api.h b/nacl/crypto_scalarmult/curve25519/donna_c64/api.h new file mode 100644 index 00000000..60339596 --- /dev/null +++ b/nacl/crypto_scalarmult/curve25519/donna_c64/api.h @@ -0,0 +1,2 @@ +#define CRYPTO_BYTES 32 +#define CRYPTO_SCALARBYTES 32 diff --git a/nacl/crypto_scalarmult/curve25519/donna_c64/base.c b/nacl/crypto_scalarmult/curve25519/donna_c64/base.c new file mode 100644 index 00000000..f33419e8 --- /dev/null +++ b/nacl/crypto_scalarmult/curve25519/donna_c64/base.c @@ -0,0 +1,8 @@ +#include "crypto_scalarmult.h" + +static const unsigned char basepoint[32] = {9}; + +int crypto_scalarmult_base(unsigned char *q,const unsigned char *n) +{ + return crypto_scalarmult(q, n, basepoint); +} diff --git a/nacl/crypto_scalarmult/curve25519/donna_c64/implementors b/nacl/crypto_scalarmult/curve25519/donna_c64/implementors new file mode 100644 index 00000000..0ce43280 --- /dev/null +++ b/nacl/crypto_scalarmult/curve25519/donna_c64/implementors @@ -0,0 +1 @@ +Adam Langley (Google) diff --git a/nacl/crypto_scalarmult/curve25519/donna_c64/smult.c b/nacl/crypto_scalarmult/curve25519/donna_c64/smult.c new file mode 100644 index 00000000..6d26956b --- /dev/null +++ b/nacl/crypto_scalarmult/curve25519/donna_c64/smult.c @@ -0,0 +1,477 @@ +/* Copyright 2008, Google Inc. + * All rights reserved. + * + * Code released into the public domain. + * + * curve25519-donna: Curve25519 elliptic curve, public key function + * + * http://code.google.com/p/curve25519-donna/ + * + * Adam Langley + * + * Derived from public domain C code by Daniel J. Bernstein + * + * More information about curve25519 can be found here + * http://cr.yp.to/ecdh.html + * + * djb's sample implementation of curve25519 is written in a special assembly + * language called qhasm and uses the floating point registers. + * + * This is, almost, a clean room reimplementation from the curve25519 paper. It + * uses many of the tricks described therein. Only the crecip function is taken + * from the sample implementation. + */ + +#include +#include +#include "crypto_scalarmult.h" + +typedef uint8_t u8; +typedef uint64_t felem; +// This is a special gcc mode for 128-bit integers. It's implemented on 64-bit +// platforms only as far as I know. +typedef unsigned uint128_t __attribute__((mode(TI))); + +/* Sum two numbers: output += in */ +static void fsum(felem *output, const felem *in) { + unsigned i; + for (i = 0; i < 5; ++i) output[i] += in[i]; +} + +/* Find the difference of two numbers: output = in - output + * (note the order of the arguments!) + */ +static void fdifference_backwards(felem *ioutput, const felem *iin) { + static const int64_t twotothe51 = (1l << 51); + const int64_t *in = (const int64_t *) iin; + int64_t *out = (int64_t *) ioutput; + + out[0] = in[0] - out[0]; + out[1] = in[1] - out[1]; + out[2] = in[2] - out[2]; + out[3] = in[3] - out[3]; + out[4] = in[4] - out[4]; + + // An arithmetic shift right of 63 places turns a positive number to 0 and a + // negative number to all 1's. This gives us a bitmask that lets us avoid + // side-channel prone branches. + int64_t t; + +#define NEGCHAIN(a,b) \ + t = out[a] >> 63; \ + out[a] += twotothe51 & t; \ + out[b] -= 1 & t; + +#define NEGCHAIN19(a,b) \ + t = out[a] >> 63; \ + out[a] += twotothe51 & t; \ + out[b] -= 19 & t; + + NEGCHAIN(0, 1); + NEGCHAIN(1, 2); + NEGCHAIN(2, 3); + NEGCHAIN(3, 4); + NEGCHAIN19(4, 0); + NEGCHAIN(0, 1); + NEGCHAIN(1, 2); + NEGCHAIN(2, 3); + NEGCHAIN(3, 4); +} + +/* Multiply a number by a scalar: output = in * scalar */ +static void fscalar_product(felem *output, const felem *in, const felem scalar) { + uint128_t a; + + a = ((uint128_t) in[0]) * scalar; + output[0] = a & 0x7ffffffffffff; + + a = ((uint128_t) in[1]) * scalar + (a >> 51); + output[1] = a & 0x7ffffffffffff; + + a = ((uint128_t) in[2]) * scalar + (a >> 51); + output[2] = a & 0x7ffffffffffff; + + a = ((uint128_t) in[3]) * scalar + (a >> 51); + output[3] = a & 0x7ffffffffffff; + + a = ((uint128_t) in[4]) * scalar + (a >> 51); + output[4] = a & 0x7ffffffffffff; + + output[0] += (a >> 51) * 19; +} + +/* Multiply two numbers: output = in2 * in + * + * output must be distinct to both inputs. The inputs are reduced coefficient + * form, the output is not. + */ +static void fmul(felem *output, const felem *in2, const felem *in) { + uint128_t t[9]; + + t[0] = ((uint128_t) in[0]) * in2[0]; + t[1] = ((uint128_t) in[0]) * in2[1] + + ((uint128_t) in[1]) * in2[0]; + t[2] = ((uint128_t) in[0]) * in2[2] + + ((uint128_t) in[2]) * in2[0] + + ((uint128_t) in[1]) * in2[1]; + t[3] = ((uint128_t) in[0]) * in2[3] + + ((uint128_t) in[3]) * in2[0] + + ((uint128_t) in[1]) * in2[2] + + ((uint128_t) in[2]) * in2[1]; + t[4] = ((uint128_t) in[0]) * in2[4] + + ((uint128_t) in[4]) * in2[0] + + ((uint128_t) in[3]) * in2[1] + + ((uint128_t) in[1]) * in2[3] + + ((uint128_t) in[2]) * in2[2]; + t[5] = ((uint128_t) in[4]) * in2[1] + + ((uint128_t) in[1]) * in2[4] + + ((uint128_t) in[2]) * in2[3] + + ((uint128_t) in[3]) * in2[2]; + t[6] = ((uint128_t) in[4]) * in2[2] + + ((uint128_t) in[2]) * in2[4] + + ((uint128_t) in[3]) * in2[3]; + t[7] = ((uint128_t) in[3]) * in2[4] + + ((uint128_t) in[4]) * in2[3]; + t[8] = ((uint128_t) in[4]) * in2[4]; + + t[0] += t[5] * 19; + t[1] += t[6] * 19; + t[2] += t[7] * 19; + t[3] += t[8] * 19; + + t[1] += t[0] >> 51; + t[0] &= 0x7ffffffffffff; + t[2] += t[1] >> 51; + t[1] &= 0x7ffffffffffff; + t[3] += t[2] >> 51; + t[2] &= 0x7ffffffffffff; + t[4] += t[3] >> 51; + t[3] &= 0x7ffffffffffff; + t[0] += 19 * (t[4] >> 51); + t[4] &= 0x7ffffffffffff; + t[1] += t[0] >> 51; + t[0] &= 0x7ffffffffffff; + t[2] += t[1] >> 51; + t[1] &= 0x7ffffffffffff; + + output[0] = t[0]; + output[1] = t[1]; + output[2] = t[2]; + output[3] = t[3]; + output[4] = t[4]; +} + +static void +fsquare(felem *output, const felem *in) { + uint128_t t[9]; + + t[0] = ((uint128_t) in[0]) * in[0]; + t[1] = ((uint128_t) in[0]) * in[1] * 2; + t[2] = ((uint128_t) in[0]) * in[2] * 2 + + ((uint128_t) in[1]) * in[1]; + t[3] = ((uint128_t) in[0]) * in[3] * 2 + + ((uint128_t) in[1]) * in[2] * 2; + t[4] = ((uint128_t) in[0]) * in[4] * 2 + + ((uint128_t) in[3]) * in[1] * 2 + + ((uint128_t) in[2]) * in[2]; + t[5] = ((uint128_t) in[4]) * in[1] * 2 + + ((uint128_t) in[2]) * in[3] * 2; + t[6] = ((uint128_t) in[4]) * in[2] * 2 + + ((uint128_t) in[3]) * in[3]; + t[7] = ((uint128_t) in[3]) * in[4] * 2; + t[8] = ((uint128_t) in[4]) * in[4]; + + t[0] += t[5] * 19; + t[1] += t[6] * 19; + t[2] += t[7] * 19; + t[3] += t[8] * 19; + + t[1] += t[0] >> 51; + t[0] &= 0x7ffffffffffff; + t[2] += t[1] >> 51; + t[1] &= 0x7ffffffffffff; + t[3] += t[2] >> 51; + t[2] &= 0x7ffffffffffff; + t[4] += t[3] >> 51; + t[3] &= 0x7ffffffffffff; + t[0] += 19 * (t[4] >> 51); + t[4] &= 0x7ffffffffffff; + t[1] += t[0] >> 51; + t[0] &= 0x7ffffffffffff; + + output[0] = t[0]; + output[1] = t[1]; + output[2] = t[2]; + output[3] = t[3]; + output[4] = t[4]; +} + +/* Take a little-endian, 32-byte number and expand it into polynomial form */ +static void +fexpand(felem *output, const u8 *in) { + output[0] = *((const uint64_t *)(in)) & 0x7ffffffffffff; + output[1] = (*((const uint64_t *)(in+6)) >> 3) & 0x7ffffffffffff; + output[2] = (*((const uint64_t *)(in+12)) >> 6) & 0x7ffffffffffff; + output[3] = (*((const uint64_t *)(in+19)) >> 1) & 0x7ffffffffffff; + output[4] = (*((const uint64_t *)(in+25)) >> 4) & 0x7ffffffffffff; +} + +/* Take a fully reduced polynomial form number and contract it into a + * little-endian, 32-byte array + */ +static void +fcontract(u8 *output, const felem *input) { + uint128_t t[5]; + + t[0] = input[0]; + t[1] = input[1]; + t[2] = input[2]; + t[3] = input[3]; + t[4] = input[4]; + + t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff; + t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff; + t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff; + t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff; + t[0] += 19 * (t[4] >> 51); t[4] &= 0x7ffffffffffff; + + t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff; + t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff; + t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff; + t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff; + t[0] += 19 * (t[4] >> 51); t[4] &= 0x7ffffffffffff; + + /* now t is between 0 and 2^255-1, properly carried. */ + /* case 1: between 0 and 2^255-20. case 2: between 2^255-19 and 2^255-1. */ + + t[0] += 19; + + t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff; + t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff; + t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff; + t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff; + t[0] += 19 * (t[4] >> 51); t[4] &= 0x7ffffffffffff; + + /* now between 19 and 2^255-1 in both cases, and offset by 19. */ + + t[0] += 0x8000000000000 - 19; + t[1] += 0x8000000000000 - 1; + t[2] += 0x8000000000000 - 1; + t[3] += 0x8000000000000 - 1; + t[4] += 0x8000000000000 - 1; + + /* now between 2^255 and 2^256-20, and offset by 2^255. */ + + t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff; + t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff; + t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff; + t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff; + t[4] &= 0x7ffffffffffff; + + *((uint64_t *)(output)) = t[0] | (t[1] << 51); + *((uint64_t *)(output+8)) = (t[1] >> 13) | (t[2] << 38); + *((uint64_t *)(output+16)) = (t[2] >> 26) | (t[3] << 25); + *((uint64_t *)(output+24)) = (t[3] >> 39) | (t[4] << 12); +} + +/* Input: Q, Q', Q-Q' + * Output: 2Q, Q+Q' + * + * x2 z3: long form + * x3 z3: long form + * x z: short form, destroyed + * xprime zprime: short form, destroyed + * qmqp: short form, preserved + */ +static void +fmonty(felem *x2, felem *z2, /* output 2Q */ + felem *x3, felem *z3, /* output Q + Q' */ + felem *x, felem *z, /* input Q */ + felem *xprime, felem *zprime, /* input Q' */ + const felem *qmqp /* input Q - Q' */) { + felem origx[5], origxprime[5], zzz[5], xx[5], zz[5], xxprime[5], + zzprime[5], zzzprime[5]; + + memcpy(origx, x, 5 * sizeof(felem)); + fsum(x, z); + fdifference_backwards(z, origx); // does x - z + + memcpy(origxprime, xprime, sizeof(felem) * 5); + fsum(xprime, zprime); + fdifference_backwards(zprime, origxprime); + fmul(xxprime, xprime, z); + fmul(zzprime, x, zprime); + memcpy(origxprime, xxprime, sizeof(felem) * 5); + fsum(xxprime, zzprime); + fdifference_backwards(zzprime, origxprime); + fsquare(x3, xxprime); + fsquare(zzzprime, zzprime); + fmul(z3, zzzprime, qmqp); + + fsquare(xx, x); + fsquare(zz, z); + fmul(x2, xx, zz); + fdifference_backwards(zz, xx); // does zz = xx - zz + fscalar_product(zzz, zz, 121665); + fsum(zzz, xx); + fmul(z2, zz, zzz); +} + +// ----------------------------------------------------------------------------- +// Maybe swap the contents of two felem arrays (@a and @b), each @len elements +// long. Perform the swap iff @swap is non-zero. +// +// This function performs the swap without leaking any side-channel +// information. +// ----------------------------------------------------------------------------- +static void +swap_conditional(felem *a, felem *b, unsigned len, felem iswap) { + unsigned i; + const felem swap = -iswap; + + for (i = 0; i < len; ++i) { + const felem x = swap & (a[i] ^ b[i]); + a[i] ^= x; + b[i] ^= x; + } +} + +/* Calculates nQ where Q is the x-coordinate of a point on the curve + * + * resultx/resultz: the x coordinate of the resulting curve point (short form) + * n: a little endian, 32-byte number + * q: a point of the curve (short form) + */ +static void +cmult(felem *resultx, felem *resultz, const u8 *n, const felem *q) { + felem a[5] = {0}, b[5] = {1}, c[5] = {1}, d[5] = {0}; + felem *nqpqx = a, *nqpqz = b, *nqx = c, *nqz = d, *t; + felem e[5] = {0}, f[5] = {1}, g[5] = {0}, h[5] = {1}; + felem *nqpqx2 = e, *nqpqz2 = f, *nqx2 = g, *nqz2 = h; + + unsigned i, j; + + memcpy(nqpqx, q, sizeof(felem) * 5); + + for (i = 0; i < 32; ++i) { + u8 byte = n[31 - i]; + for (j = 0; j < 8; ++j) { + const felem bit = byte >> 7; + + swap_conditional(nqx, nqpqx, 5, bit); + swap_conditional(nqz, nqpqz, 5, bit); + fmonty(nqx2, nqz2, + nqpqx2, nqpqz2, + nqx, nqz, + nqpqx, nqpqz, + q); + swap_conditional(nqx2, nqpqx2, 5, bit); + swap_conditional(nqz2, nqpqz2, 5, bit); + + t = nqx; + nqx = nqx2; + nqx2 = t; + t = nqz; + nqz = nqz2; + nqz2 = t; + t = nqpqx; + nqpqx = nqpqx2; + nqpqx2 = t; + t = nqpqz; + nqpqz = nqpqz2; + nqpqz2 = t; + + byte <<= 1; + } + } + + memcpy(resultx, nqx, sizeof(felem) * 5); + memcpy(resultz, nqz, sizeof(felem) * 5); +} + +// ----------------------------------------------------------------------------- +// Shamelessly copied from djb's code +// ----------------------------------------------------------------------------- +static void +crecip(felem *out, const felem *z) { + felem z2[5]; + felem z9[5]; + felem z11[5]; + felem z2_5_0[5]; + felem z2_10_0[5]; + felem z2_20_0[5]; + felem z2_50_0[5]; + felem z2_100_0[5]; + felem t0[5]; + felem t1[5]; + int i; + + /* 2 */ fsquare(z2,z); + /* 4 */ fsquare(t1,z2); + /* 8 */ fsquare(t0,t1); + /* 9 */ fmul(z9,t0,z); + /* 11 */ fmul(z11,z9,z2); + /* 22 */ fsquare(t0,z11); + /* 2^5 - 2^0 = 31 */ fmul(z2_5_0,t0,z9); + + /* 2^6 - 2^1 */ fsquare(t0,z2_5_0); + /* 2^7 - 2^2 */ fsquare(t1,t0); + /* 2^8 - 2^3 */ fsquare(t0,t1); + /* 2^9 - 2^4 */ fsquare(t1,t0); + /* 2^10 - 2^5 */ fsquare(t0,t1); + /* 2^10 - 2^0 */ fmul(z2_10_0,t0,z2_5_0); + + /* 2^11 - 2^1 */ fsquare(t0,z2_10_0); + /* 2^12 - 2^2 */ fsquare(t1,t0); + /* 2^20 - 2^10 */ for (i = 2;i < 10;i += 2) { fsquare(t0,t1); fsquare(t1,t0); } + /* 2^20 - 2^0 */ fmul(z2_20_0,t1,z2_10_0); + + /* 2^21 - 2^1 */ fsquare(t0,z2_20_0); + /* 2^22 - 2^2 */ fsquare(t1,t0); + /* 2^40 - 2^20 */ for (i = 2;i < 20;i += 2) { fsquare(t0,t1); fsquare(t1,t0); } + /* 2^40 - 2^0 */ fmul(t0,t1,z2_20_0); + + /* 2^41 - 2^1 */ fsquare(t1,t0); + /* 2^42 - 2^2 */ fsquare(t0,t1); + /* 2^50 - 2^10 */ for (i = 2;i < 10;i += 2) { fsquare(t1,t0); fsquare(t0,t1); } + /* 2^50 - 2^0 */ fmul(z2_50_0,t0,z2_10_0); + + /* 2^51 - 2^1 */ fsquare(t0,z2_50_0); + /* 2^52 - 2^2 */ fsquare(t1,t0); + /* 2^100 - 2^50 */ for (i = 2;i < 50;i += 2) { fsquare(t0,t1); fsquare(t1,t0); } + /* 2^100 - 2^0 */ fmul(z2_100_0,t1,z2_50_0); + + /* 2^101 - 2^1 */ fsquare(t1,z2_100_0); + /* 2^102 - 2^2 */ fsquare(t0,t1); + /* 2^200 - 2^100 */ for (i = 2;i < 100;i += 2) { fsquare(t1,t0); fsquare(t0,t1); } + /* 2^200 - 2^0 */ fmul(t1,t0,z2_100_0); + + /* 2^201 - 2^1 */ fsquare(t0,t1); + /* 2^202 - 2^2 */ fsquare(t1,t0); + /* 2^250 - 2^50 */ for (i = 2;i < 50;i += 2) { fsquare(t0,t1); fsquare(t1,t0); } + /* 2^250 - 2^0 */ fmul(t0,t1,z2_50_0); + + /* 2^251 - 2^1 */ fsquare(t1,t0); + /* 2^252 - 2^2 */ fsquare(t0,t1); + /* 2^253 - 2^3 */ fsquare(t1,t0); + /* 2^254 - 2^4 */ fsquare(t0,t1); + /* 2^255 - 2^5 */ fsquare(t1,t0); + /* 2^255 - 21 */ fmul(out,t1,z11); +} + +int +crypto_scalarmult(u8 *mypublic, const u8 *secret, const u8 *basepoint) { + felem bp[5], x[5], z[5], zmone[5]; + unsigned char e[32]; + int i; + for (i = 0;i < 32;++i) e[i] = secret[i]; + e[0] &= 248; + e[31] &= 127; + e[31] |= 64; + fexpand(bp, basepoint); + cmult(x, z, e, bp); + crecip(zmone, z); + fmul(z, x, zmone); + fcontract(mypublic, z); + return 0; +} diff --git a/nacl/crypto_scalarmult/curve25519/ref/api.h b/nacl/crypto_scalarmult/curve25519/ref/api.h new file mode 100644 index 00000000..60339596 --- /dev/null +++ b/nacl/crypto_scalarmult/curve25519/ref/api.h @@ -0,0 +1,2 @@ +#define CRYPTO_BYTES 32 +#define CRYPTO_SCALARBYTES 32 diff --git a/nacl/crypto_scalarmult/curve25519/ref/base.c b/nacl/crypto_scalarmult/curve25519/ref/base.c new file mode 100644 index 00000000..ac2d7eb4 --- /dev/null +++ b/nacl/crypto_scalarmult/curve25519/ref/base.c @@ -0,0 +1,16 @@ +/* +version 20081011 +Matthew Dempsky +Public domain. +Derived from public domain code by D. J. Bernstein. +*/ + +#include "crypto_scalarmult.h" + +const unsigned char base[32] = {9}; + +int crypto_scalarmult_base(unsigned char *q, + const unsigned char *n) +{ + return crypto_scalarmult(q,n,base); +} diff --git a/nacl/crypto_scalarmult/curve25519/ref/implementors b/nacl/crypto_scalarmult/curve25519/ref/implementors new file mode 100644 index 00000000..aa551790 --- /dev/null +++ b/nacl/crypto_scalarmult/curve25519/ref/implementors @@ -0,0 +1 @@ +Matthew Dempsky (Mochi Media) diff --git a/nacl/crypto_scalarmult/curve25519/ref/smult.c b/nacl/crypto_scalarmult/curve25519/ref/smult.c new file mode 100644 index 00000000..6a479558 --- /dev/null +++ b/nacl/crypto_scalarmult/curve25519/ref/smult.c @@ -0,0 +1,265 @@ +/* +version 20081011 +Matthew Dempsky +Public domain. +Derived from public domain code by D. J. Bernstein. +*/ + +#include "crypto_scalarmult.h" + +static void add(unsigned int out[32],const unsigned int a[32],const unsigned int b[32]) +{ + unsigned int j; + unsigned int u; + u = 0; + for (j = 0;j < 31;++j) { u += a[j] + b[j]; out[j] = u & 255; u >>= 8; } + u += a[31] + b[31]; out[31] = u; +} + +static void sub(unsigned int out[32],const unsigned int a[32],const unsigned int b[32]) +{ + unsigned int j; + unsigned int u; + u = 218; + for (j = 0;j < 31;++j) { + u += a[j] + 65280 - b[j]; + out[j] = u & 255; + u >>= 8; + } + u += a[31] - b[31]; + out[31] = u; +} + +static void squeeze(unsigned int a[32]) +{ + unsigned int j; + unsigned int u; + u = 0; + for (j = 0;j < 31;++j) { u += a[j]; a[j] = u & 255; u >>= 8; } + u += a[31]; a[31] = u & 127; + u = 19 * (u >> 7); + for (j = 0;j < 31;++j) { u += a[j]; a[j] = u & 255; u >>= 8; } + u += a[31]; a[31] = u; +} + +static const unsigned int minusp[32] = { + 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 +} ; + +static void freeze(unsigned int a[32]) +{ + unsigned int aorig[32]; + unsigned int j; + unsigned int negative; + + for (j = 0;j < 32;++j) aorig[j] = a[j]; + add(a,a,minusp); + negative = -((a[31] >> 7) & 1); + for (j = 0;j < 32;++j) a[j] ^= negative & (aorig[j] ^ a[j]); +} + +static void mult(unsigned int out[32],const unsigned int a[32],const unsigned int b[32]) +{ + unsigned int i; + unsigned int j; + unsigned int u; + + for (i = 0;i < 32;++i) { + u = 0; + for (j = 0;j <= i;++j) u += a[j] * b[i - j]; + for (j = i + 1;j < 32;++j) u += 38 * a[j] * b[i + 32 - j]; + out[i] = u; + } + squeeze(out); +} + +static void mult121665(unsigned int out[32],const unsigned int a[32]) +{ + unsigned int j; + unsigned int u; + + u = 0; + for (j = 0;j < 31;++j) { u += 121665 * a[j]; out[j] = u & 255; u >>= 8; } + u += 121665 * a[31]; out[31] = u & 127; + u = 19 * (u >> 7); + for (j = 0;j < 31;++j) { u += out[j]; out[j] = u & 255; u >>= 8; } + u += out[j]; out[j] = u; +} + +static void square(unsigned int out[32],const unsigned int a[32]) +{ + unsigned int i; + unsigned int j; + unsigned int u; + + for (i = 0;i < 32;++i) { + u = 0; + for (j = 0;j < i - j;++j) u += a[j] * a[i - j]; + for (j = i + 1;j < i + 32 - j;++j) u += 38 * a[j] * a[i + 32 - j]; + u *= 2; + if ((i & 1) == 0) { + u += a[i / 2] * a[i / 2]; + u += 38 * a[i / 2 + 16] * a[i / 2 + 16]; + } + out[i] = u; + } + squeeze(out); +} + +static void select(unsigned int p[64],unsigned int q[64],const unsigned int r[64],const unsigned int s[64],unsigned int b) +{ + unsigned int j; + unsigned int t; + unsigned int bminus1; + + bminus1 = b - 1; + for (j = 0;j < 64;++j) { + t = bminus1 & (r[j] ^ s[j]); + p[j] = s[j] ^ t; + q[j] = r[j] ^ t; + } +} + +static void mainloop(unsigned int work[64],const unsigned char e[32]) +{ + unsigned int xzm1[64]; + unsigned int xzm[64]; + unsigned int xzmb[64]; + unsigned int xzm1b[64]; + unsigned int xznb[64]; + unsigned int xzn1b[64]; + unsigned int a0[64]; + unsigned int a1[64]; + unsigned int b0[64]; + unsigned int b1[64]; + unsigned int c1[64]; + unsigned int r[32]; + unsigned int s[32]; + unsigned int t[32]; + unsigned int u[32]; + unsigned int i; + unsigned int j; + unsigned int b; + int pos; + + for (j = 0;j < 32;++j) xzm1[j] = work[j]; + xzm1[32] = 1; + for (j = 33;j < 64;++j) xzm1[j] = 0; + + xzm[0] = 1; + for (j = 1;j < 64;++j) xzm[j] = 0; + + for (pos = 254;pos >= 0;--pos) { + b = e[pos / 8] >> (pos & 7); + b &= 1; + select(xzmb,xzm1b,xzm,xzm1,b); + add(a0,xzmb,xzmb + 32); + sub(a0 + 32,xzmb,xzmb + 32); + add(a1,xzm1b,xzm1b + 32); + sub(a1 + 32,xzm1b,xzm1b + 32); + square(b0,a0); + square(b0 + 32,a0 + 32); + mult(b1,a1,a0 + 32); + mult(b1 + 32,a1 + 32,a0); + add(c1,b1,b1 + 32); + sub(c1 + 32,b1,b1 + 32); + square(r,c1 + 32); + sub(s,b0,b0 + 32); + mult121665(t,s); + add(u,t,b0); + mult(xznb,b0,b0 + 32); + mult(xznb + 32,s,u); + square(xzn1b,c1); + mult(xzn1b + 32,r,work); + select(xzm,xzm1,xznb,xzn1b,b); + } + + for (j = 0;j < 64;++j) work[j] = xzm[j]; +} + +static void recip(unsigned int out[32],const unsigned int z[32]) +{ + unsigned int z2[32]; + unsigned int z9[32]; + unsigned int z11[32]; + unsigned int z2_5_0[32]; + unsigned int z2_10_0[32]; + unsigned int z2_20_0[32]; + unsigned int z2_50_0[32]; + unsigned int z2_100_0[32]; + unsigned int t0[32]; + unsigned int t1[32]; + int i; + + /* 2 */ square(z2,z); + /* 4 */ square(t1,z2); + /* 8 */ square(t0,t1); + /* 9 */ mult(z9,t0,z); + /* 11 */ mult(z11,z9,z2); + /* 22 */ square(t0,z11); + /* 2^5 - 2^0 = 31 */ mult(z2_5_0,t0,z9); + + /* 2^6 - 2^1 */ square(t0,z2_5_0); + /* 2^7 - 2^2 */ square(t1,t0); + /* 2^8 - 2^3 */ square(t0,t1); + /* 2^9 - 2^4 */ square(t1,t0); + /* 2^10 - 2^5 */ square(t0,t1); + /* 2^10 - 2^0 */ mult(z2_10_0,t0,z2_5_0); + + /* 2^11 - 2^1 */ square(t0,z2_10_0); + /* 2^12 - 2^2 */ square(t1,t0); + /* 2^20 - 2^10 */ for (i = 2;i < 10;i += 2) { square(t0,t1); square(t1,t0); } + /* 2^20 - 2^0 */ mult(z2_20_0,t1,z2_10_0); + + /* 2^21 - 2^1 */ square(t0,z2_20_0); + /* 2^22 - 2^2 */ square(t1,t0); + /* 2^40 - 2^20 */ for (i = 2;i < 20;i += 2) { square(t0,t1); square(t1,t0); } + /* 2^40 - 2^0 */ mult(t0,t1,z2_20_0); + + /* 2^41 - 2^1 */ square(t1,t0); + /* 2^42 - 2^2 */ square(t0,t1); + /* 2^50 - 2^10 */ for (i = 2;i < 10;i += 2) { square(t1,t0); square(t0,t1); } + /* 2^50 - 2^0 */ mult(z2_50_0,t0,z2_10_0); + + /* 2^51 - 2^1 */ square(t0,z2_50_0); + /* 2^52 - 2^2 */ square(t1,t0); + /* 2^100 - 2^50 */ for (i = 2;i < 50;i += 2) { square(t0,t1); square(t1,t0); } + /* 2^100 - 2^0 */ mult(z2_100_0,t1,z2_50_0); + + /* 2^101 - 2^1 */ square(t1,z2_100_0); + /* 2^102 - 2^2 */ square(t0,t1); + /* 2^200 - 2^100 */ for (i = 2;i < 100;i += 2) { square(t1,t0); square(t0,t1); } + /* 2^200 - 2^0 */ mult(t1,t0,z2_100_0); + + /* 2^201 - 2^1 */ square(t0,t1); + /* 2^202 - 2^2 */ square(t1,t0); + /* 2^250 - 2^50 */ for (i = 2;i < 50;i += 2) { square(t0,t1); square(t1,t0); } + /* 2^250 - 2^0 */ mult(t0,t1,z2_50_0); + + /* 2^251 - 2^1 */ square(t1,t0); + /* 2^252 - 2^2 */ square(t0,t1); + /* 2^253 - 2^3 */ square(t1,t0); + /* 2^254 - 2^4 */ square(t0,t1); + /* 2^255 - 2^5 */ square(t1,t0); + /* 2^255 - 21 */ mult(out,t1,z11); +} + +int crypto_scalarmult(unsigned char *q, + const unsigned char *n, + const unsigned char *p) +{ + unsigned int work[96]; + unsigned char e[32]; + unsigned int i; + for (i = 0;i < 32;++i) e[i] = n[i]; + e[0] &= 248; + e[31] &= 127; + e[31] |= 64; + for (i = 0;i < 32;++i) work[i] = p[i]; + mainloop(work,e); + recip(work + 32,work + 32); + mult(work + 64,work,work + 32); + freeze(work + 64); + for (i = 0;i < 32;++i) q[i] = work[64 + i]; + return 0; +} diff --git a/nacl/crypto_scalarmult/curve25519/used b/nacl/crypto_scalarmult/curve25519/used new file mode 100644 index 00000000..e69de29b diff --git a/nacl/crypto_scalarmult/measure.c b/nacl/crypto_scalarmult/measure.c new file mode 100644 index 00000000..0c7265d5 --- /dev/null +++ b/nacl/crypto_scalarmult/measure.c @@ -0,0 +1,61 @@ +#include +#include "randombytes.h" +#include "cpucycles.h" +#include "crypto_scalarmult.h" + +extern void printentry(long long,const char *,long long *,long long); +extern unsigned char *alignedcalloc(unsigned long long); +extern const char *primitiveimplementation; +extern const char *implementationversion; +extern const char *sizenames[]; +extern const long long sizes[]; +extern void allocate(void); +extern void measure(void); + +const char *primitiveimplementation = crypto_scalarmult_IMPLEMENTATION; +const char *implementationversion = crypto_scalarmult_VERSION; +const char *sizenames[] = { "outputbytes", "scalarbytes", 0 }; +const long long sizes[] = { crypto_scalarmult_BYTES, crypto_scalarmult_SCALARBYTES }; + +static unsigned char *m; +static unsigned char *n; +static unsigned char *p; +static unsigned char *q; + +void preallocate(void) +{ +} + +void allocate(void) +{ + m = alignedcalloc(crypto_scalarmult_SCALARBYTES); + n = alignedcalloc(crypto_scalarmult_SCALARBYTES); + p = alignedcalloc(crypto_scalarmult_BYTES); + q = alignedcalloc(crypto_scalarmult_BYTES); +} + +#define TIMINGS 63 +static long long cycles[TIMINGS + 1]; + +void measure(void) +{ + int i; + int loop; + + for (loop = 0;loop < LOOPS;++loop) { + randombytes(m,crypto_scalarmult_SCALARBYTES); + randombytes(n,crypto_scalarmult_SCALARBYTES); + for (i = 0;i <= TIMINGS;++i) { + cycles[i] = cpucycles(); + crypto_scalarmult_base(p,m); + } + for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i]; + printentry(-1,"base_cycles",cycles,TIMINGS); + for (i = 0;i <= TIMINGS;++i) { + cycles[i] = cpucycles(); + crypto_scalarmult(q,n,p); + } + for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i]; + printentry(-1,"cycles",cycles,TIMINGS); + } +} diff --git a/nacl/crypto_scalarmult/try.c b/nacl/crypto_scalarmult/try.c new file mode 100644 index 00000000..560ce493 --- /dev/null +++ b/nacl/crypto_scalarmult/try.c @@ -0,0 +1,126 @@ +/* + * crypto_scalarmult/try.c version 20090118 + * D. J. Bernstein + * Public domain. + */ + +#include +#include "crypto_scalarmult.h" + +extern unsigned char *alignedcalloc(unsigned long long); + +const char *primitiveimplementation = crypto_scalarmult_IMPLEMENTATION; + +#define mlen crypto_scalarmult_SCALARBYTES +#define nlen crypto_scalarmult_SCALARBYTES +#define plen crypto_scalarmult_BYTES +#define qlen crypto_scalarmult_BYTES +#define rlen crypto_scalarmult_BYTES + +static unsigned char *m; +static unsigned char *n; +static unsigned char *p; +static unsigned char *q; +static unsigned char *r; + +static unsigned char *m2; +static unsigned char *n2; +static unsigned char *p2; +static unsigned char *q2; +static unsigned char *r2; + +void preallocate(void) +{ +} + +void allocate(void) +{ + m = alignedcalloc(mlen); + n = alignedcalloc(nlen); + p = alignedcalloc(plen); + q = alignedcalloc(qlen); + r = alignedcalloc(rlen); + m2 = alignedcalloc(mlen + crypto_scalarmult_BYTES); + n2 = alignedcalloc(nlen + crypto_scalarmult_BYTES); + p2 = alignedcalloc(plen + crypto_scalarmult_BYTES); + q2 = alignedcalloc(qlen + crypto_scalarmult_BYTES); + r2 = alignedcalloc(rlen + crypto_scalarmult_BYTES); +} + +void predoit(void) +{ +} + +void doit(void) +{ + crypto_scalarmult(q,n,p); + crypto_scalarmult_base(r,n); +} + +char checksum[crypto_scalarmult_BYTES * 2 + 1]; + +const char *checksum_compute(void) +{ + long long i; + long long j; + long long tests; + + for (i = 0;i < mlen;++i) m[i] = i; + for (i = 0;i < nlen;++i) n[i] = i + 1; + for (i = 0;i < plen;++i) p[i] = i + 2; + for (i = 0;i < qlen;++i) q[i] = i + 3; + for (i = 0;i < rlen;++i) r[i] = i + 4; + + for (i = -16;i < 0;++i) p[i] = random(); + for (i = -16;i < 0;++i) n[i] = random(); + for (i = plen;i < plen + 16;++i) p[i] = random(); + for (i = nlen;i < nlen + 16;++i) n[i] = random(); + for (i = -16;i < plen + 16;++i) p2[i] = p[i]; + for (i = -16;i < nlen + 16;++i) n2[i] = n[i]; + + if (crypto_scalarmult_base(p,n) != 0) return "crypto_scalarmult_base returns nonzero"; + + for (i = -16;i < nlen + 16;++i) if (n2[i] != n[i]) return "crypto_scalarmult_base overwrites input"; + for (i = -16;i < 0;++i) if (p2[i] != p[i]) return "crypto_scalarmult_base writes before output"; + for (i = plen;i < plen + 16;++i) if (p2[i] != p[i]) return "crypto_scalarmult_base writes after output"; + + for (tests = 0;tests < 100;++tests) { + for (i = -16;i < 0;++i) q[i] = random(); + for (i = -16;i < 0;++i) p[i] = random(); + for (i = -16;i < 0;++i) m[i] = random(); + for (i = qlen;i < qlen + 16;++i) q[i] = random(); + for (i = plen;i < plen + 16;++i) p[i] = random(); + for (i = mlen;i < mlen + 16;++i) m[i] = random(); + for (i = -16;i < qlen + 16;++i) q2[i] = q[i]; + for (i = -16;i < plen + 16;++i) p2[i] = p[i]; + for (i = -16;i < mlen + 16;++i) m2[i] = m[i]; + + if (crypto_scalarmult(q,m,p) != 0) return "crypto_scalarmult returns nonzero"; + + for (i = -16;i < mlen + 16;++i) if (m2[i] != m[i]) return "crypto_scalarmult overwrites n input"; + for (i = -16;i < plen + 16;++i) if (p2[i] != p[i]) return "crypto_scalarmult overwrites p input"; + for (i = -16;i < 0;++i) if (q2[i] != q[i]) return "crypto_scalarmult writes before output"; + for (i = qlen;i < qlen + 16;++i) if (q2[i] != q[i]) return "crypto_scalarmult writes after output"; + + if (crypto_scalarmult(m2,m2,p) != 0) return "crypto_scalarmult returns nonzero"; + for (i = 0;i < qlen;++i) if (q[i] != m2[i]) return "crypto_scalarmult does not handle n overlap"; + for (i = 0;i < qlen;++i) m2[i] = m[i]; + + if (crypto_scalarmult(p2,m2,p2) != 0) return "crypto_scalarmult returns nonzero"; + for (i = 0;i < qlen;++i) if (q[i] != p2[i]) return "crypto_scalarmult does not handle p overlap"; + + if (crypto_scalarmult(r,n,q) != 0) return "crypto_scalarmult returns nonzero"; + if (crypto_scalarmult(q,n,p) != 0) return "crypto_scalarmult returns nonzero"; + if (crypto_scalarmult(p,m,q) != 0) return "crypto_scalarmult returns nonzero"; + for (j = 0;j < plen;++j) if (p[j] != r[j]) return "crypto_scalarmult not associative"; + for (j = 0;j < mlen;++j) m[j] ^= q[j % qlen]; + for (j = 0;j < nlen;++j) n[j] ^= p[j % plen]; + } + + for (i = 0;i < crypto_scalarmult_BYTES;++i) { + checksum[2 * i] = "0123456789abcdef"[15 & (p[i] >> 4)]; + checksum[2 * i + 1] = "0123456789abcdef"[15 & p[i]]; + } + checksum[2 * i] = 0; + return 0; +} diff --git a/nacl/crypto_scalarmult/wrapper-base.cpp b/nacl/crypto_scalarmult/wrapper-base.cpp new file mode 100644 index 00000000..f71ce19a --- /dev/null +++ b/nacl/crypto_scalarmult/wrapper-base.cpp @@ -0,0 +1,11 @@ +#include +using std::string; +#include "crypto_scalarmult.h" + +string crypto_scalarmult_base(const string &n) +{ + unsigned char q[crypto_scalarmult_BYTES]; + if (n.size() != crypto_scalarmult_SCALARBYTES) throw "incorrect scalar length"; + crypto_scalarmult_base(q,(const unsigned char *) n.c_str()); + return string((char *) q,sizeof q); +} diff --git a/nacl/crypto_scalarmult/wrapper-mult.cpp b/nacl/crypto_scalarmult/wrapper-mult.cpp new file mode 100644 index 00000000..fc693cf0 --- /dev/null +++ b/nacl/crypto_scalarmult/wrapper-mult.cpp @@ -0,0 +1,12 @@ +#include +using std::string; +#include "crypto_scalarmult.h" + +string crypto_scalarmult(const string &n,const string &p) +{ + unsigned char q[crypto_scalarmult_BYTES]; + if (n.size() != crypto_scalarmult_SCALARBYTES) throw "incorrect scalar length"; + if (p.size() != crypto_scalarmult_BYTES) throw "incorrect element length"; + crypto_scalarmult(q,(const unsigned char *) n.c_str(),(const unsigned char *) p.c_str()); + return string((char *) q,sizeof q); +} diff --git a/nacl/crypto_secretbox/measure.c b/nacl/crypto_secretbox/measure.c new file mode 100644 index 00000000..6cb0692f --- /dev/null +++ b/nacl/crypto_secretbox/measure.c @@ -0,0 +1,75 @@ +#include +#include "randombytes.h" +#include "cpucycles.h" +#include "crypto_secretbox.h" + +extern void printentry(long long,const char *,long long *,long long); +extern unsigned char *alignedcalloc(unsigned long long); +extern const char *primitiveimplementation; +extern const char *implementationversion; +extern const char *sizenames[]; +extern const long long sizes[]; +extern void allocate(void); +extern void measure(void); + +const char *primitiveimplementation = crypto_secretbox_IMPLEMENTATION; +const char *implementationversion = crypto_secretbox_VERSION; +const char *sizenames[] = { "keybytes", "noncebytes", "zerobytes", "boxzerobytes", 0 }; +const long long sizes[] = { crypto_secretbox_KEYBYTES, crypto_secretbox_NONCEBYTES, crypto_secretbox_ZEROBYTES, crypto_secretbox_BOXZEROBYTES }; + +#define MAXTEST_BYTES 4096 + +static unsigned char *k; +static unsigned char *n; +static unsigned char *m; +static unsigned char *c; + +void preallocate(void) +{ +} + +void allocate(void) +{ + k = alignedcalloc(crypto_secretbox_KEYBYTES); + n = alignedcalloc(crypto_secretbox_NONCEBYTES); + m = alignedcalloc(MAXTEST_BYTES + crypto_secretbox_ZEROBYTES); + c = alignedcalloc(MAXTEST_BYTES + crypto_secretbox_ZEROBYTES); +} + +#define TIMINGS 15 +static long long cycles[TIMINGS + 1]; + +void measure(void) +{ + int i; + int loop; + int mlen; + + for (loop = 0;loop < LOOPS;++loop) { + for (mlen = 0;mlen <= MAXTEST_BYTES;mlen += 1 + mlen / 8) { + randombytes(k,crypto_secretbox_KEYBYTES); + randombytes(n,crypto_secretbox_NONCEBYTES); + randombytes(m + crypto_secretbox_ZEROBYTES,mlen); + randombytes(c,mlen + crypto_secretbox_ZEROBYTES); + for (i = 0;i <= TIMINGS;++i) { + cycles[i] = cpucycles(); + crypto_secretbox(c,m,mlen + crypto_secretbox_ZEROBYTES,n,k); + } + for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i]; + printentry(mlen,"cycles",cycles,TIMINGS); + for (i = 0;i <= TIMINGS;++i) { + cycles[i] = cpucycles(); + crypto_secretbox_open(m,c,mlen + crypto_secretbox_ZEROBYTES,n,k); + } + for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i]; + printentry(mlen,"open_cycles",cycles,TIMINGS); + ++c[crypto_secretbox_ZEROBYTES]; + for (i = 0;i <= TIMINGS;++i) { + cycles[i] = cpucycles(); + crypto_secretbox_open(m,c,mlen + crypto_secretbox_ZEROBYTES,n,k); + } + for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i]; + printentry(mlen,"forgery_open_cycles",cycles,TIMINGS); + } + } +} diff --git a/nacl/crypto_secretbox/try.c b/nacl/crypto_secretbox/try.c new file mode 100644 index 00000000..eda091e9 --- /dev/null +++ b/nacl/crypto_secretbox/try.c @@ -0,0 +1,129 @@ +/* + * crypto_secretbox/try.c version 20090118 + * D. J. Bernstein + * Public domain. + */ + +#include "crypto_secretbox.h" + +extern unsigned char *alignedcalloc(unsigned long long); + +const char *primitiveimplementation = crypto_secretbox_IMPLEMENTATION; + +#define MAXTEST_BYTES 10000 +#define CHECKSUM_BYTES 4096 +#define TUNE_BYTES 1536 + +static unsigned char *k; +static unsigned char *n; +static unsigned char *m; +static unsigned char *c; +static unsigned char *t; +static unsigned char *k2; +static unsigned char *n2; +static unsigned char *m2; +static unsigned char *c2; +static unsigned char *t2; + +#define klen crypto_secretbox_KEYBYTES +#define nlen crypto_secretbox_NONCEBYTES + +void preallocate(void) +{ +} + +void allocate(void) +{ + k = alignedcalloc(klen); + n = alignedcalloc(nlen); + m = alignedcalloc(MAXTEST_BYTES + crypto_secretbox_ZEROBYTES); + c = alignedcalloc(MAXTEST_BYTES + crypto_secretbox_ZEROBYTES); + t = alignedcalloc(MAXTEST_BYTES + crypto_secretbox_ZEROBYTES); + k2 = alignedcalloc(klen); + n2 = alignedcalloc(nlen); + m2 = alignedcalloc(MAXTEST_BYTES + crypto_secretbox_ZEROBYTES); + c2 = alignedcalloc(MAXTEST_BYTES + crypto_secretbox_ZEROBYTES); + t2 = alignedcalloc(MAXTEST_BYTES + crypto_secretbox_ZEROBYTES); +} + +void predoit(void) +{ +} + +void doit(void) +{ + crypto_secretbox(c,m,TUNE_BYTES + crypto_secretbox_ZEROBYTES,n,k); + crypto_secretbox_open(t,c,TUNE_BYTES + crypto_secretbox_ZEROBYTES,n,k); +} + +char checksum[klen * 2 + 1]; + +const char *checksum_compute(void) +{ + long long i; + long long j; + + for (j = 0;j < crypto_secretbox_ZEROBYTES;++j) m[j] = 0; + + for (i = 0;i < CHECKSUM_BYTES;++i) { + long long mlen = i + crypto_secretbox_ZEROBYTES; + long long tlen = i + crypto_secretbox_ZEROBYTES; + long long clen = i + crypto_secretbox_ZEROBYTES; + + for (j = -16;j < 0;++j) k[j] = random(); + for (j = -16;j < 0;++j) n[j] = random(); + for (j = -16;j < 0;++j) m[j] = random(); + for (j = klen;j < klen + 16;++j) k[j] = random(); + for (j = nlen;j < nlen + 16;++j) n[j] = random(); + for (j = mlen;j < mlen + 16;++j) m[j] = random(); + for (j = -16;j < klen + 16;++j) k2[j] = k[j]; + for (j = -16;j < nlen + 16;++j) n2[j] = n[j]; + for (j = -16;j < mlen + 16;++j) m2[j] = m[j]; + for (j = -16;j < clen + 16;++j) c2[j] = c[j] = random(); + + if (crypto_secretbox(c,m,mlen,n,k) != 0) return "crypto_secretbox returns nonzero"; + + for (j = -16;j < mlen + 16;++j) if (m2[j] != m[j]) return "crypto_secretbox overwrites m"; + for (j = -16;j < nlen + 16;++j) if (n2[j] != n[j]) return "crypto_secretbox overwrites n"; + for (j = -16;j < klen + 16;++j) if (k2[j] != k[j]) return "crypto_secretbox overwrites k"; + for (j = -16;j < 0;++j) if (c2[j] != c[j]) return "crypto_secretbox writes before output"; + for (j = clen;j < clen + 16;++j) if (c2[j] != c[j]) return "crypto_secretbox writes after output"; + for (j = 0;j < crypto_secretbox_BOXZEROBYTES;++j) + if (c[j] != 0) return "crypto_secretbox does not clear extra bytes"; + + for (j = -16;j < 0;++j) c[j] = random(); + for (j = clen;j < clen + 16;++j) c[j] = random(); + for (j = -16;j < clen + 16;++j) c2[j] = c[j]; + for (j = -16;j < tlen + 16;++j) t2[j] = t[j] = random(); + + if (crypto_secretbox_open(t,c,clen,n,k) != 0) return "crypto_secretbox_open returns nonzero"; + + for (j = -16;j < clen + 16;++j) if (c2[j] != c[j]) return "crypto_secretbox_open overwrites c"; + for (j = -16;j < nlen + 16;++j) if (n2[j] != n[j]) return "crypto_secretbox_open overwrites n"; + for (j = -16;j < klen + 16;++j) if (k2[j] != k[j]) return "crypto_secretbox_open overwrites k"; + for (j = -16;j < 0;++j) if (t2[j] != t[j]) return "crypto_secretbox_open writes before output"; + for (j = tlen;j < tlen + 16;++j) if (t2[j] != t[j]) return "crypto_secretbox_open writes after output"; + for (j = 0;j < crypto_secretbox_ZEROBYTES;++j) + if (t[j] != 0) return "crypto_secretbox_open does not clear extra bytes"; + + for (j = 0;j < i;++j) if (t[j] != m[j]) return "plaintext does not match"; + + for (j = 0;j < i;++j) + k[j % klen] ^= c[j + crypto_secretbox_BOXZEROBYTES]; + crypto_secretbox(c,m,mlen,n,k); + for (j = 0;j < i;++j) + n[j % nlen] ^= c[j + crypto_secretbox_BOXZEROBYTES]; + crypto_secretbox(c,m,mlen,n,k); + if (i == 0) m[crypto_secretbox_ZEROBYTES + 0] = 0; + m[crypto_secretbox_ZEROBYTES + i] = m[crypto_secretbox_ZEROBYTES + 0]; + for (j = 0;j < i;++j) + m[j + crypto_secretbox_ZEROBYTES] ^= c[j + crypto_secretbox_BOXZEROBYTES]; + } + + for (i = 0;i < klen;++i) { + checksum[2 * i] = "0123456789abcdef"[15 & (k[i] >> 4)]; + checksum[2 * i + 1] = "0123456789abcdef"[15 & k[i]]; + } + checksum[2 * i] = 0; + return 0; +} diff --git a/nacl/crypto_secretbox/wrapper-box.cpp b/nacl/crypto_secretbox/wrapper-box.cpp new file mode 100644 index 00000000..fb8b1784 --- /dev/null +++ b/nacl/crypto_secretbox/wrapper-box.cpp @@ -0,0 +1,19 @@ +#include +using std::string; +#include "crypto_secretbox.h" + +string crypto_secretbox(const string &m,const string &n,const string &k) +{ + if (k.size() != crypto_secretbox_KEYBYTES) throw "incorrect key length"; + if (n.size() != crypto_secretbox_NONCEBYTES) throw "incorrect nonce length"; + size_t mlen = m.size() + crypto_secretbox_ZEROBYTES; + unsigned char mpad[mlen]; + for (int i = 0;i < crypto_secretbox_ZEROBYTES;++i) mpad[i] = 0; + for (int i = crypto_secretbox_ZEROBYTES;i < mlen;++i) mpad[i] = m[i - crypto_secretbox_ZEROBYTES]; + unsigned char cpad[mlen]; + crypto_secretbox(cpad,mpad,mlen,(const unsigned char *) n.c_str(),(const unsigned char *) k.c_str()); + return string( + (char *) cpad + crypto_secretbox_BOXZEROBYTES, + mlen - crypto_secretbox_BOXZEROBYTES + ); +} diff --git a/nacl/crypto_secretbox/wrapper-open.cpp b/nacl/crypto_secretbox/wrapper-open.cpp new file mode 100644 index 00000000..07989813 --- /dev/null +++ b/nacl/crypto_secretbox/wrapper-open.cpp @@ -0,0 +1,22 @@ +#include +using std::string; +#include "crypto_secretbox.h" + +string crypto_secretbox_open(const string &c,const string &n,const string &k) +{ + if (k.size() != crypto_secretbox_KEYBYTES) throw "incorrect key length"; + if (n.size() != crypto_secretbox_NONCEBYTES) throw "incorrect nonce length"; + size_t clen = c.size() + crypto_secretbox_BOXZEROBYTES; + unsigned char cpad[clen]; + for (int i = 0;i < crypto_secretbox_BOXZEROBYTES;++i) cpad[i] = 0; + for (int i = crypto_secretbox_BOXZEROBYTES;i < clen;++i) cpad[i] = c[i - crypto_secretbox_BOXZEROBYTES]; + unsigned char mpad[clen]; + if (crypto_secretbox_open(mpad,cpad,clen,(const unsigned char *) n.c_str(),(const unsigned char *) k.c_str()) != 0) + throw "ciphertext fails verification"; + if (clen < crypto_secretbox_ZEROBYTES) + throw "ciphertext too short"; // should have been caught by _open + return string( + (char *) mpad + crypto_secretbox_ZEROBYTES, + clen - crypto_secretbox_ZEROBYTES + ); +} diff --git a/nacl/crypto_secretbox/xsalsa20poly1305/checksum b/nacl/crypto_secretbox/xsalsa20poly1305/checksum new file mode 100644 index 00000000..af3c6897 --- /dev/null +++ b/nacl/crypto_secretbox/xsalsa20poly1305/checksum @@ -0,0 +1 @@ +df372f95dd87381b7c9ceb6f340ccaa03d19bed5d9e4ab004d99d847675a9658 diff --git a/nacl/crypto_secretbox/xsalsa20poly1305/ref/api.h b/nacl/crypto_secretbox/xsalsa20poly1305/ref/api.h new file mode 100644 index 00000000..f5aeb356 --- /dev/null +++ b/nacl/crypto_secretbox/xsalsa20poly1305/ref/api.h @@ -0,0 +1,4 @@ +#define CRYPTO_KEYBYTES 32 +#define CRYPTO_NONCEBYTES 24 +#define CRYPTO_ZEROBYTES 32 +#define CRYPTO_BOXZEROBYTES 16 diff --git a/nacl/crypto_secretbox/xsalsa20poly1305/ref/box.c b/nacl/crypto_secretbox/xsalsa20poly1305/ref/box.c new file mode 100644 index 00000000..f1abb06f --- /dev/null +++ b/nacl/crypto_secretbox/xsalsa20poly1305/ref/box.c @@ -0,0 +1,35 @@ +#include "crypto_onetimeauth_poly1305.h" +#include "crypto_stream_xsalsa20.h" +#include "crypto_secretbox.h" + +int crypto_secretbox( + unsigned char *c, + const unsigned char *m,unsigned long long mlen, + const unsigned char *n, + const unsigned char *k +) +{ + int i; + if (mlen < 32) return -1; + crypto_stream_xsalsa20_xor(c,m,mlen,n,k); + crypto_onetimeauth_poly1305(c + 16,c + 32,mlen - 32,c); + for (i = 0;i < 16;++i) c[i] = 0; + return 0; +} + +int crypto_secretbox_open( + unsigned char *m, + const unsigned char *c,unsigned long long clen, + const unsigned char *n, + const unsigned char *k +) +{ + int i; + unsigned char subkey[32]; + if (clen < 32) return -1; + crypto_stream_xsalsa20(subkey,32,n,k); + if (crypto_onetimeauth_poly1305_verify(c + 16,c + 32,clen - 32,subkey) != 0) return -1; + crypto_stream_xsalsa20_xor(m,c,clen,n,k); + for (i = 0;i < 32;++i) m[i] = 0; + return 0; +} diff --git a/nacl/crypto_secretbox/xsalsa20poly1305/selected b/nacl/crypto_secretbox/xsalsa20poly1305/selected new file mode 100644 index 00000000..e69de29b diff --git a/nacl/crypto_secretbox/xsalsa20poly1305/used b/nacl/crypto_secretbox/xsalsa20poly1305/used new file mode 100644 index 00000000..e69de29b diff --git a/nacl/crypto_sign/edwards25519sha512batch/ref/api.h b/nacl/crypto_sign/edwards25519sha512batch/ref/api.h new file mode 100644 index 00000000..352240c0 --- /dev/null +++ b/nacl/crypto_sign/edwards25519sha512batch/ref/api.h @@ -0,0 +1,3 @@ +#define CRYPTO_SECRETKEYBYTES 64 +#define CRYPTO_PUBLICKEYBYTES 32 +#define CRYPTO_BYTES 64 diff --git a/nacl/crypto_sign/edwards25519sha512batch/ref/fe25519.c b/nacl/crypto_sign/edwards25519sha512batch/ref/fe25519.c new file mode 100644 index 00000000..a9f806d2 --- /dev/null +++ b/nacl/crypto_sign/edwards25519sha512batch/ref/fe25519.c @@ -0,0 +1,345 @@ +#include "fe25519.h" + +#define WINDOWSIZE 4 /* Should be 1,2, or 4 */ +#define WINDOWMASK ((1<v[31] >> 7; + r->v[31] &= 127; + t *= 19; + r->v[0] += t; + for(i=0;i<31;i++) + { + t = r->v[i] >> 8; + r->v[i+1] += t; + r->v[i] &= 255; + } + } +} + +static void reduce_mul(fe25519 *r) +{ + crypto_uint32 t; + int i,rep; + + for(rep=0;rep<2;rep++) + { + t = r->v[31] >> 7; + r->v[31] &= 127; + t *= 19; + r->v[0] += t; + for(i=0;i<31;i++) + { + t = r->v[i] >> 8; + r->v[i+1] += t; + r->v[i] &= 255; + } + } +} + +/* reduction modulo 2^255-19 */ +static void freeze(fe25519 *r) +{ + int i; + unsigned int m = (r->v[31] == 127); + for(i=30;i>1;i--) + m *= (r->v[i] == 255); + m *= (r->v[0] >= 237); + + r->v[31] -= m*127; + for(i=30;i>0;i--) + r->v[i] -= m*255; + r->v[0] -= m*237; +} + +/*freeze input before calling isone*/ +static int isone(const fe25519 *x) +{ + int i; + int r = (x->v[0] == 1); + for(i=1;i<32;i++) + r *= (x->v[i] == 0); + return r; +} + +/*freeze input before calling iszero*/ +static int iszero(const fe25519 *x) +{ + int i; + int r = (x->v[0] == 0); + for(i=1;i<32;i++) + r *= (x->v[i] == 0); + return r; +} + + +static int issquare(const fe25519 *x) +{ + unsigned char e[32] = {0xf6,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x3f}; /* (p-1)/2 */ + fe25519 t; + + fe25519_pow(&t,x,e); + freeze(&t); + return isone(&t) || iszero(&t); +} + +void fe25519_unpack(fe25519 *r, const unsigned char x[32]) +{ + int i; + for(i=0;i<32;i++) r->v[i] = x[i]; + r->v[31] &= 127; +} + +/* Assumes input x being reduced mod 2^255 */ +void fe25519_pack(unsigned char r[32], const fe25519 *x) +{ + int i; + for(i=0;i<32;i++) + r[i] = x->v[i]; + + /* freeze byte array */ + unsigned int m = (r[31] == 127); /* XXX: some compilers might use branches; fix */ + for(i=30;i>1;i--) + m *= (r[i] == 255); + m *= (r[0] >= 237); + r[31] -= m*127; + for(i=30;i>0;i--) + r[i] -= m*255; + r[0] -= m*237; +} + +void fe25519_cmov(fe25519 *r, const fe25519 *x, unsigned char b) +{ + unsigned char nb = 1-b; + int i; + for(i=0;i<32;i++) r->v[i] = nb * r->v[i] + b * x->v[i]; +} + +unsigned char fe25519_getparity(const fe25519 *x) +{ + fe25519 t; + int i; + for(i=0;i<32;i++) t.v[i] = x->v[i]; + freeze(&t); + return t.v[0] & 1; +} + +void fe25519_setone(fe25519 *r) +{ + int i; + r->v[0] = 1; + for(i=1;i<32;i++) r->v[i]=0; +} + +void fe25519_setzero(fe25519 *r) +{ + int i; + for(i=0;i<32;i++) r->v[i]=0; +} + +void fe25519_neg(fe25519 *r, const fe25519 *x) +{ + fe25519 t; + int i; + for(i=0;i<32;i++) t.v[i]=x->v[i]; + fe25519_setzero(r); + fe25519_sub(r, r, &t); +} + +void fe25519_add(fe25519 *r, const fe25519 *x, const fe25519 *y) +{ + int i; + for(i=0;i<32;i++) r->v[i] = x->v[i] + y->v[i]; + reduce_add_sub(r); +} + +void fe25519_sub(fe25519 *r, const fe25519 *x, const fe25519 *y) +{ + int i; + crypto_uint32 t[32]; + t[0] = x->v[0] + 0x1da; + t[31] = x->v[31] + 0xfe; + for(i=1;i<31;i++) t[i] = x->v[i] + 0x1fe; + for(i=0;i<32;i++) r->v[i] = t[i] - y->v[i]; + reduce_add_sub(r); +} + +void fe25519_mul(fe25519 *r, const fe25519 *x, const fe25519 *y) +{ + int i,j; + crypto_uint32 t[63]; + for(i=0;i<63;i++)t[i] = 0; + + for(i=0;i<32;i++) + for(j=0;j<32;j++) + t[i+j] += x->v[i] * y->v[j]; + + for(i=32;i<63;i++) + r->v[i-32] = t[i-32] + 38*t[i]; + r->v[31] = t[31]; /* result now in r[0]...r[31] */ + + reduce_mul(r); +} + +void fe25519_square(fe25519 *r, const fe25519 *x) +{ + fe25519_mul(r, x, x); +} + +/*XXX: Make constant time! */ +void fe25519_pow(fe25519 *r, const fe25519 *x, const unsigned char *e) +{ + /* + fe25519 g; + fe25519_setone(&g); + int i; + unsigned char j; + for(i=32;i>0;i--) + { + for(j=128;j>0;j>>=1) + { + fe25519_square(&g,&g); + if(e[i-1] & j) + fe25519_mul(&g,&g,x); + } + } + for(i=0;i<32;i++) r->v[i] = g.v[i]; + */ + fe25519 g; + fe25519_setone(&g); + int i,j,k; + fe25519 pre[(1 << WINDOWSIZE)]; + fe25519 t; + unsigned char w; + + // Precomputation + fe25519_setone(pre); + pre[1] = *x; + for(i=2;i<(1<0;i--) + { + for(j=8-WINDOWSIZE;j>=0;j-=WINDOWSIZE) + { + for(k=0;k>j) & WINDOWMASK; + t = pre[0]; + for(k=1;k<(1<v[i]; + fe25519_pow(&d,&d,e3); + for(i=0;i<32;i++) + r->v[i] = 2*x->v[i]; + fe25519_mul(r,r,&d); + } + freeze(r); + if((r->v[0] & 1) != (parity & 1)) + { + fe25519_sub(r,&p,r); + } + return 0; +} + +void fe25519_invert(fe25519 *r, const fe25519 *x) +{ + fe25519 z2; + fe25519 z9; + fe25519 z11; + fe25519 z2_5_0; + fe25519 z2_10_0; + fe25519 z2_20_0; + fe25519 z2_50_0; + fe25519 z2_100_0; + fe25519 t0; + fe25519 t1; + int i; + + /* 2 */ fe25519_square(&z2,x); + /* 4 */ fe25519_square(&t1,&z2); + /* 8 */ fe25519_square(&t0,&t1); + /* 9 */ fe25519_mul(&z9,&t0,x); + /* 11 */ fe25519_mul(&z11,&z9,&z2); + /* 22 */ fe25519_square(&t0,&z11); + /* 2^5 - 2^0 = 31 */ fe25519_mul(&z2_5_0,&t0,&z9); + + /* 2^6 - 2^1 */ fe25519_square(&t0,&z2_5_0); + /* 2^7 - 2^2 */ fe25519_square(&t1,&t0); + /* 2^8 - 2^3 */ fe25519_square(&t0,&t1); + /* 2^9 - 2^4 */ fe25519_square(&t1,&t0); + /* 2^10 - 2^5 */ fe25519_square(&t0,&t1); + /* 2^10 - 2^0 */ fe25519_mul(&z2_10_0,&t0,&z2_5_0); + + /* 2^11 - 2^1 */ fe25519_square(&t0,&z2_10_0); + /* 2^12 - 2^2 */ fe25519_square(&t1,&t0); + /* 2^20 - 2^10 */ for (i = 2;i < 10;i += 2) { fe25519_square(&t0,&t1); fe25519_square(&t1,&t0); } + /* 2^20 - 2^0 */ fe25519_mul(&z2_20_0,&t1,&z2_10_0); + + /* 2^21 - 2^1 */ fe25519_square(&t0,&z2_20_0); + /* 2^22 - 2^2 */ fe25519_square(&t1,&t0); + /* 2^40 - 2^20 */ for (i = 2;i < 20;i += 2) { fe25519_square(&t0,&t1); fe25519_square(&t1,&t0); } + /* 2^40 - 2^0 */ fe25519_mul(&t0,&t1,&z2_20_0); + + /* 2^41 - 2^1 */ fe25519_square(&t1,&t0); + /* 2^42 - 2^2 */ fe25519_square(&t0,&t1); + /* 2^50 - 2^10 */ for (i = 2;i < 10;i += 2) { fe25519_square(&t1,&t0); fe25519_square(&t0,&t1); } + /* 2^50 - 2^0 */ fe25519_mul(&z2_50_0,&t0,&z2_10_0); + + /* 2^51 - 2^1 */ fe25519_square(&t0,&z2_50_0); + /* 2^52 - 2^2 */ fe25519_square(&t1,&t0); + /* 2^100 - 2^50 */ for (i = 2;i < 50;i += 2) { fe25519_square(&t0,&t1); fe25519_square(&t1,&t0); } + /* 2^100 - 2^0 */ fe25519_mul(&z2_100_0,&t1,&z2_50_0); + + /* 2^101 - 2^1 */ fe25519_square(&t1,&z2_100_0); + /* 2^102 - 2^2 */ fe25519_square(&t0,&t1); + /* 2^200 - 2^100 */ for (i = 2;i < 100;i += 2) { fe25519_square(&t1,&t0); fe25519_square(&t0,&t1); } + /* 2^200 - 2^0 */ fe25519_mul(&t1,&t0,&z2_100_0); + + /* 2^201 - 2^1 */ fe25519_square(&t0,&t1); + /* 2^202 - 2^2 */ fe25519_square(&t1,&t0); + /* 2^250 - 2^50 */ for (i = 2;i < 50;i += 2) { fe25519_square(&t0,&t1); fe25519_square(&t1,&t0); } + /* 2^250 - 2^0 */ fe25519_mul(&t0,&t1,&z2_50_0); + + /* 2^251 - 2^1 */ fe25519_square(&t1,&t0); + /* 2^252 - 2^2 */ fe25519_square(&t0,&t1); + /* 2^253 - 2^3 */ fe25519_square(&t1,&t0); + /* 2^254 - 2^4 */ fe25519_square(&t0,&t1); + /* 2^255 - 2^5 */ fe25519_square(&t1,&t0); + /* 2^255 - 21 */ fe25519_mul(r,&t1,&z11); +} diff --git a/nacl/crypto_sign/edwards25519sha512batch/ref/fe25519.h b/nacl/crypto_sign/edwards25519sha512batch/ref/fe25519.h new file mode 100644 index 00000000..e07ddba7 --- /dev/null +++ b/nacl/crypto_sign/edwards25519sha512batch/ref/fe25519.h @@ -0,0 +1,54 @@ +#ifndef FE25519_H +#define FE25519_H + +#define fe25519 crypto_sign_edwards25519sha512batch_fe25519 +#define fe25519_unpack crypto_sign_edwards25519sha512batch_fe25519_unpack +#define fe25519_pack crypto_sign_edwards25519sha512batch_fe25519_pack +#define fe25519_cmov crypto_sign_edwards25519sha512batch_fe25519_cmov +#define fe25519_setone crypto_sign_edwards25519sha512batch_fe25519_setone +#define fe25519_setzero crypto_sign_edwards25519sha512batch_fe25519_setzero +#define fe25519_neg crypto_sign_edwards25519sha512batch_fe25519_neg +#define fe25519_getparity crypto_sign_edwards25519sha512batch_fe25519_getparity +#define fe25519_add crypto_sign_edwards25519sha512batch_fe25519_add +#define fe25519_sub crypto_sign_edwards25519sha512batch_fe25519_sub +#define fe25519_mul crypto_sign_edwards25519sha512batch_fe25519_mul +#define fe25519_square crypto_sign_edwards25519sha512batch_fe25519_square +#define fe25519_pow crypto_sign_edwards25519sha512batch_fe25519_pow +#define fe25519_sqrt_vartime crypto_sign_edwards25519sha512batch_fe25519_sqrt_vartime +#define fe25519_invert crypto_sign_edwards25519sha512batch_fe25519_invert + +#include "crypto_uint32.h" + +typedef struct { + crypto_uint32 v[32]; +} fe25519; + +void fe25519_unpack(fe25519 *r, const unsigned char x[32]); + +void fe25519_pack(unsigned char r[32], const fe25519 *x); + +void fe25519_cmov(fe25519 *r, const fe25519 *x, unsigned char b); + +void fe25519_setone(fe25519 *r); + +void fe25519_setzero(fe25519 *r); + +void fe25519_neg(fe25519 *r, const fe25519 *x); + +unsigned char fe25519_getparity(const fe25519 *x); + +void fe25519_add(fe25519 *r, const fe25519 *x, const fe25519 *y); + +void fe25519_sub(fe25519 *r, const fe25519 *x, const fe25519 *y); + +void fe25519_mul(fe25519 *r, const fe25519 *x, const fe25519 *y); + +void fe25519_square(fe25519 *r, const fe25519 *x); + +void fe25519_pow(fe25519 *r, const fe25519 *x, const unsigned char *e); + +int fe25519_sqrt_vartime(fe25519 *r, const fe25519 *x, unsigned char parity); + +void fe25519_invert(fe25519 *r, const fe25519 *x); + +#endif diff --git a/nacl/crypto_sign/edwards25519sha512batch/ref/ge25519.c b/nacl/crypto_sign/edwards25519sha512batch/ref/ge25519.c new file mode 100644 index 00000000..a57b8f3c --- /dev/null +++ b/nacl/crypto_sign/edwards25519sha512batch/ref/ge25519.c @@ -0,0 +1,227 @@ +#include "fe25519.h" +#include "sc25519.h" +#include "ge25519.h" + +/* + * Arithmetic on the twisted Edwards curve -x^2 + y^2 = 1 + dx^2y^2 + * with d = -(121665/121666) = 37095705934669439343138083508754565189542113879843219016388785533085940283555 + * Base point: (15112221349535400772501151409588531511454012693041857206046113283949847762202,46316835694926478169428394003475163141307993866256225615783033603165251855960); + */ + +typedef struct +{ + fe25519 x; + fe25519 z; + fe25519 y; + fe25519 t; +} ge25519_p1p1; + +typedef struct +{ + fe25519 x; + fe25519 y; + fe25519 z; +} ge25519_p2; + +#define ge25519_p3 ge25519 + +/* Windowsize for fixed-window scalar multiplication */ +#define WINDOWSIZE 2 /* Should be 1,2, or 4 */ +#define WINDOWMASK ((1<x, &p->x, &p->t); + fe25519_mul(&r->y, &p->y, &p->z); + fe25519_mul(&r->z, &p->z, &p->t); +} + +static void p1p1_to_p3(ge25519_p3 *r, const ge25519_p1p1 *p) +{ + p1p1_to_p2((ge25519_p2 *)r, p); + fe25519_mul(&r->t, &p->x, &p->y); +} + +/* Constant-time version of: if(b) r = p */ +static void cmov_p3(ge25519_p3 *r, const ge25519_p3 *p, unsigned char b) +{ + fe25519_cmov(&r->x, &p->x, b); + fe25519_cmov(&r->y, &p->y, b); + fe25519_cmov(&r->z, &p->z, b); + fe25519_cmov(&r->t, &p->t, b); +} + +/* See http://www.hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html#doubling-dbl-2008-hwcd */ +static void dbl_p1p1(ge25519_p1p1 *r, const ge25519_p2 *p) +{ + fe25519 a,b,c,d; + fe25519_square(&a, &p->x); + fe25519_square(&b, &p->y); + fe25519_square(&c, &p->z); + fe25519_add(&c, &c, &c); + fe25519_neg(&d, &a); + + fe25519_add(&r->x, &p->x, &p->y); + fe25519_square(&r->x, &r->x); + fe25519_sub(&r->x, &r->x, &a); + fe25519_sub(&r->x, &r->x, &b); + fe25519_add(&r->z, &d, &b); + fe25519_sub(&r->t, &r->z, &c); + fe25519_sub(&r->y, &d, &b); +} + +static void add_p1p1(ge25519_p1p1 *r, const ge25519_p3 *p, const ge25519_p3 *q) +{ + fe25519 a, b, c, d, t, fd; + fe25519_unpack(&fd, ecd); + + fe25519_sub(&a, &p->y, &p->x); // A = (Y1-X1)*(Y2-X2) + fe25519_sub(&t, &q->y, &q->x); + fe25519_mul(&a, &a, &t); + fe25519_add(&b, &p->x, &p->y); // B = (Y1+X1)*(Y2+X2) + fe25519_add(&t, &q->x, &q->y); + fe25519_mul(&b, &b, &t); + fe25519_mul(&c, &p->t, &q->t); //C = T1*k*T2 + fe25519_mul(&c, &c, &fd); + fe25519_add(&c, &c, &c); //XXX: Can save this addition by precomputing 2*ecd + fe25519_mul(&d, &p->z, &q->z); //D = Z1*2*Z2 + fe25519_add(&d, &d, &d); + fe25519_sub(&r->x, &b, &a); // E = B-A + fe25519_sub(&r->t, &d, &c); // F = D-C + fe25519_add(&r->z, &d, &c); // G = D+C + fe25519_add(&r->y, &b, &a); // H = B+A +} + +/* ******************************************************************** + * EXPORTED FUNCTIONS + ******************************************************************** */ + +/* return 0 on success, -1 otherwise */ +int ge25519_unpack_vartime(ge25519_p3 *r, const unsigned char p[32]) +{ + int ret; + fe25519 t, fd; + fe25519_setone(&r->z); + fe25519_unpack(&fd, ecd); + unsigned char par = p[31] >> 7; + fe25519_unpack(&r->y, p); + fe25519_square(&r->x, &r->y); + fe25519_mul(&t, &r->x, &fd); + fe25519_sub(&r->x, &r->x, &r->z); + fe25519_add(&t, &r->z, &t); + fe25519_invert(&t, &t); + fe25519_mul(&r->x, &r->x, &t); + ret = fe25519_sqrt_vartime(&r->x, &r->x, par); + fe25519_mul(&r->t, &r->x, &r->y); + return ret; +} + +void ge25519_pack(unsigned char r[32], const ge25519_p3 *p) +{ + fe25519 tx, ty, zi; + fe25519_invert(&zi, &p->z); + fe25519_mul(&tx, &p->x, &zi); + fe25519_mul(&ty, &p->y, &zi); + fe25519_pack(r, &ty); + r[31] ^= fe25519_getparity(&tx) << 7; +} + +void ge25519_add(ge25519_p3 *r, const ge25519_p3 *p, const ge25519_p3 *q) +{ + ge25519_p1p1 grp1p1; + add_p1p1(&grp1p1, p, q); + p1p1_to_p3(r, &grp1p1); +} + +void ge25519_double(ge25519_p3 *r, const ge25519_p3 *p) +{ + ge25519_p1p1 grp1p1; + dbl_p1p1(&grp1p1, (ge25519_p2 *)p); + p1p1_to_p3(r, &grp1p1); +} + +void ge25519_scalarmult(ge25519_p3 *r, const ge25519_p3 *p, const sc25519 *s) +{ + int i,j,k; + ge25519_p3 g; + fe25519_unpack(&g.x, ge25519_neutral_x); + fe25519_unpack(&g.y, ge25519_neutral_y); + fe25519_unpack(&g.z, ge25519_neutral_z); + fe25519_unpack(&g.t, ge25519_neutral_t); + + ge25519_p3 pre[(1 << WINDOWSIZE)]; + ge25519_p3 t; + ge25519_p1p1 tp1p1; + unsigned char w; + unsigned char sb[32]; + sc25519_to32bytes(sb, s); + + // Precomputation + pre[0] = g; + pre[1] = *p; + for(i=2;i<(1<0;i--) + { + for(j=8-WINDOWSIZE;j>=0;j-=WINDOWSIZE) + { + for(k=0;k>j) & WINDOWMASK; + t = pre[0]; + for(k=1;k<(1<x = g.x; + r->y = g.y; + r->z = g.z; + r->t = g.t; +} + +void ge25519_scalarmult_base(ge25519_p3 *r, const sc25519 *s) +{ + /* XXX: Better algorithm for known-base-point scalar multiplication */ + ge25519_p3 t; + fe25519_unpack(&t.x, ge25519_base_x); + fe25519_unpack(&t.y, ge25519_base_y); + fe25519_unpack(&t.z, ge25519_base_z); + fe25519_unpack(&t.t, ge25519_base_t); + ge25519_scalarmult(r, &t, s); +} diff --git a/nacl/crypto_sign/edwards25519sha512batch/ref/ge25519.h b/nacl/crypto_sign/edwards25519sha512batch/ref/ge25519.h new file mode 100644 index 00000000..49ad163a --- /dev/null +++ b/nacl/crypto_sign/edwards25519sha512batch/ref/ge25519.h @@ -0,0 +1,34 @@ +#ifndef GE25519_H +#define GE25519_H + +#include "fe25519.h" +#include "sc25519.h" + +#define ge25519 crypto_sign_edwards25519sha512batch_ge25519 +#define ge25519_unpack_vartime crypto_sign_edwards25519sha512batch_ge25519_unpack_vartime +#define ge25519_pack crypto_sign_edwards25519sha512batch_ge25519_pack +#define ge25519_add crypto_sign_edwards25519sha512batch_ge25519_add +#define ge25519_double crypto_sign_edwards25519sha512batch_ge25519_double +#define ge25519_scalarmult crypto_sign_edwards25519sha512batch_ge25519_scalarmult +#define ge25519_scalarmult_base crypto_sign_edwards25519sha512batch_ge25519_scalarmult_base + +typedef struct { + fe25519 x; + fe25519 y; + fe25519 z; + fe25519 t; +} ge25519; + +int ge25519_unpack_vartime(ge25519 *r, const unsigned char p[32]); + +void ge25519_pack(unsigned char r[32], const ge25519 *p); + +void ge25519_add(ge25519 *r, const ge25519 *p, const ge25519 *q); + +void ge25519_double(ge25519 *r, const ge25519 *p); + +void ge25519_scalarmult(ge25519 *r, const ge25519 *p, const sc25519 *s); + +void ge25519_scalarmult_base(ge25519 *r, const sc25519 *s); + +#endif diff --git a/nacl/crypto_sign/edwards25519sha512batch/ref/sc25519.c b/nacl/crypto_sign/edwards25519sha512batch/ref/sc25519.c new file mode 100644 index 00000000..5f27eb1b --- /dev/null +++ b/nacl/crypto_sign/edwards25519sha512batch/ref/sc25519.c @@ -0,0 +1,146 @@ +#include "sc25519.h" + +/*Arithmetic modulo the group order n = 2^252 + 27742317777372353535851937790883648493 = 7237005577332262213973186563042994240857116359379907606001950938285454250989 */ + +static const crypto_uint32 m[32] = {0xED, 0xD3, 0xF5, 0x5C, 0x1A, 0x63, 0x12, 0x58, 0xD6, 0x9C, 0xF7, 0xA2, 0xDE, 0xF9, 0xDE, 0x14, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10}; + +static const crypto_uint32 mu[33] = {0x1B, 0x13, 0x2C, 0x0A, 0xA3, 0xE5, 0x9C, 0xED, 0xA7, 0x29, 0x63, 0x08, 0x5D, 0x21, 0x06, 0x21, + 0xEB, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F}; + +/* Reduce coefficients of r before calling reduce_add_sub */ +static void reduce_add_sub(sc25519 *r) +{ + int i, b, pb=0, nb; + unsigned char t[32]; + + for(i=0;i<32;i++) + { + b = (r->v[i]v[i]-pb-m[i]+b*256; + pb = b; + } + nb = 1-b; + for(i=0;i<32;i++) + r->v[i] = r->v[i]*b + t[i]*nb; +} + +/* Reduce coefficients of x before calling barrett_reduce */ +static void barrett_reduce(sc25519 *r, const crypto_uint32 x[64]) +{ + /* See HAC, Alg. 14.42 */ + int i,j; + crypto_uint32 q2[66] = {0}; + crypto_uint32 *q3 = q2 + 33; + crypto_uint32 r1[33]; + crypto_uint32 r2[33] = {0}; + crypto_uint32 carry; + int b, pb=0; + + for(i=0;i<33;i++) + for(j=0;j<33;j++) + if(i+j >= 31) q2[i+j] += mu[i]*x[j+31]; + carry = q2[31] >> 8; + q2[32] += carry; + carry = q2[32] >> 8; + q2[33] += carry; + + for(i=0;i<33;i++)r1[i] = x[i]; + for(i=0;i<32;i++) + for(j=0;j<33;j++) + if(i+j < 33) r2[i+j] += m[i]*q3[j]; + + for(i=0;i<32;i++) + { + carry = r2[i] >> 8; + r2[i+1] += carry; + r2[i] &= 0xff; + } + + for(i=0;i<32;i++) + { + b = (r1[i]v[i] = r1[i]-pb-r2[i]+b*256; + pb = b; + } + + /* XXX: Can it really happen that r<0?, See HAC, Alg 14.42, Step 3 + * If so: Handle it here! + */ + + reduce_add_sub(r); + reduce_add_sub(r); +} + +/* +static int iszero(const sc25519 *x) +{ + // Implement + return 0; +} +*/ + +void sc25519_from32bytes(sc25519 *r, const unsigned char x[32]) +{ + int i; + crypto_uint32 t[64] = {0}; + for(i=0;i<32;i++) t[i] = x[i]; + barrett_reduce(r, t); +} + +void sc25519_from64bytes(sc25519 *r, const unsigned char x[64]) +{ + int i; + crypto_uint32 t[64] = {0}; + for(i=0;i<64;i++) t[i] = x[i]; + barrett_reduce(r, t); +} + +/* XXX: What we actually want for crypto_group is probably just something like + * void sc25519_frombytes(sc25519 *r, const unsigned char *x, size_t xlen) + */ + +void sc25519_to32bytes(unsigned char r[32], const sc25519 *x) +{ + int i; + for(i=0;i<32;i++) r[i] = x->v[i]; +} + +void sc25519_add(sc25519 *r, const sc25519 *x, const sc25519 *y) +{ + int i, carry; + for(i=0;i<32;i++) r->v[i] = x->v[i] + y->v[i]; + for(i=0;i<31;i++) + { + carry = r->v[i] >> 8; + r->v[i+1] += carry; + r->v[i] &= 0xff; + } + reduce_add_sub(r); +} + +void sc25519_mul(sc25519 *r, const sc25519 *x, const sc25519 *y) +{ + int i,j,carry; + crypto_uint32 t[64]; + for(i=0;i<64;i++)t[i] = 0; + + for(i=0;i<32;i++) + for(j=0;j<32;j++) + t[i+j] += x->v[i] * y->v[j]; + + /* Reduce coefficients */ + for(i=0;i<63;i++) + { + carry = t[i] >> 8; + t[i+1] += carry; + t[i] &= 0xff; + } + + barrett_reduce(r, t); +} + +void sc25519_square(sc25519 *r, const sc25519 *x) +{ + sc25519_mul(r, x, x); +} diff --git a/nacl/crypto_sign/edwards25519sha512batch/ref/sc25519.h b/nacl/crypto_sign/edwards25519sha512batch/ref/sc25519.h new file mode 100644 index 00000000..48584a85 --- /dev/null +++ b/nacl/crypto_sign/edwards25519sha512batch/ref/sc25519.h @@ -0,0 +1,51 @@ +#ifndef SC25519_H +#define SC25519_H + +#define sc25519 crypto_sign_edwards25519sha512batch_sc25519 +#define sc25519_from32bytes crypto_sign_edwards25519sha512batch_sc25519_from32bytes +#define sc25519_from64bytes crypto_sign_edwards25519sha512batch_sc25519_from64bytes +#define sc25519_to32bytes crypto_sign_edwards25519sha512batch_sc25519_to32bytes +#define sc25519_pack crypto_sign_edwards25519sha512batch_sc25519_pack +#define sc25519_getparity crypto_sign_edwards25519sha512batch_sc25519_getparity +#define sc25519_setone crypto_sign_edwards25519sha512batch_sc25519_setone +#define sc25519_setzero crypto_sign_edwards25519sha512batch_sc25519_setzero +#define sc25519_neg crypto_sign_edwards25519sha512batch_sc25519_neg +#define sc25519_add crypto_sign_edwards25519sha512batch_sc25519_add +#define sc25519_sub crypto_sign_edwards25519sha512batch_sc25519_sub +#define sc25519_mul crypto_sign_edwards25519sha512batch_sc25519_mul +#define sc25519_square crypto_sign_edwards25519sha512batch_sc25519_square +#define sc25519_invert crypto_sign_edwards25519sha512batch_sc25519_invert + +#include "crypto_uint32.h" + +typedef struct { + crypto_uint32 v[32]; +} sc25519; + +void sc25519_from32bytes(sc25519 *r, const unsigned char x[32]); + +void sc25519_from64bytes(sc25519 *r, const unsigned char x[64]); + +void sc25519_to32bytes(unsigned char r[32], const sc25519 *x); + +void sc25519_pack(unsigned char r[32], const sc25519 *x); + +unsigned char sc25519_getparity(const sc25519 *x); + +void sc25519_setone(sc25519 *r); + +void sc25519_setzero(sc25519 *r); + +void sc25519_neg(sc25519 *r, const sc25519 *x); + +void sc25519_add(sc25519 *r, const sc25519 *x, const sc25519 *y); + +void sc25519_sub(sc25519 *r, const sc25519 *x, const sc25519 *y); + +void sc25519_mul(sc25519 *r, const sc25519 *x, const sc25519 *y); + +void sc25519_square(sc25519 *r, const sc25519 *x); + +void sc25519_invert(sc25519 *r, const sc25519 *x); + +#endif diff --git a/nacl/crypto_sign/edwards25519sha512batch/ref/sign.c b/nacl/crypto_sign/edwards25519sha512batch/ref/sign.c new file mode 100644 index 00000000..f40e548b --- /dev/null +++ b/nacl/crypto_sign/edwards25519sha512batch/ref/sign.c @@ -0,0 +1,103 @@ +#include "api.h" +#include "crypto_sign.h" +#include "crypto_hash_sha512.h" +#include "randombytes.h" +#include "crypto_verify_32.h" + +#include "ge25519.h" + +int crypto_sign_keypair( + unsigned char *pk, + unsigned char *sk + ) +{ + sc25519 scsk; + ge25519 gepk; + + randombytes(sk, 32); + crypto_hash_sha512(sk, sk, 32); + sk[0] &= 248; + sk[31] &= 127; + sk[31] |= 64; + + sc25519_from32bytes(&scsk,sk); + + ge25519_scalarmult_base(&gepk, &scsk); + ge25519_pack(pk, &gepk); + return 0; +} + +int crypto_sign( + unsigned char *sm,unsigned long long *smlen, + const unsigned char *m,unsigned long long mlen, + const unsigned char *sk + ) +{ + sc25519 sck, scs, scsk; + ge25519 ger; + unsigned char r[32]; + unsigned char s[32]; + unsigned long long i; + unsigned char hmg[crypto_hash_sha512_BYTES]; + unsigned char hmr[crypto_hash_sha512_BYTES]; + + *smlen = mlen+64; + for(i=0;i +#include "randombytes.h" +#include "cpucycles.h" +#include "crypto_sign.h" + +extern void printentry(long long,const char *,long long *,long long); +extern unsigned char *alignedcalloc(unsigned long long); +extern const char *primitiveimplementation; +extern const char *implementationversion; +extern const char *sizenames[]; +extern const long long sizes[]; +extern void allocate(void); +extern void measure(void); + +const char *primitiveimplementation = crypto_sign_IMPLEMENTATION; +const char *implementationversion = crypto_sign_VERSION; +const char *sizenames[] = { "outputbytes", "publickeybytes", "secretkeybytes", 0 }; +const long long sizes[] = { crypto_sign_BYTES, crypto_sign_PUBLICKEYBYTES, crypto_sign_SECRETKEYBYTES }; + +#define MAXTEST_BYTES 100000 + +static unsigned char *pk; +static unsigned char *sk; +static unsigned char *m; unsigned long long mlen; +static unsigned char *sm; unsigned long long smlen; +static unsigned char *t; unsigned long long tlen; + +void preallocate(void) +{ +#ifdef RAND_R_PRNG_NOT_SEEDED + RAND_status(); +#endif +} + +void allocate(void) +{ + pk = alignedcalloc(crypto_sign_PUBLICKEYBYTES); + sk = alignedcalloc(crypto_sign_SECRETKEYBYTES); + m = alignedcalloc(MAXTEST_BYTES + crypto_sign_BYTES); + sm = alignedcalloc(MAXTEST_BYTES + crypto_sign_BYTES); + t = alignedcalloc(MAXTEST_BYTES + crypto_sign_BYTES); +} + +#define TIMINGS 31 +static long long cycles[TIMINGS + 1]; +static long long bytes[TIMINGS + 1]; + +void measure(void) +{ + int i; + int loop; + + for (loop = 0;loop < LOOPS;++loop) { + for (i = 0;i <= TIMINGS;++i) { + cycles[i] = cpucycles(); + crypto_sign_keypair(pk,sk); + } + for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i]; + printentry(-1,"keypair_cycles",cycles,TIMINGS); + + for (mlen = 0;mlen <= MAXTEST_BYTES;mlen += 1 + mlen / 4) { + randombytes(m,mlen); + + for (i = 0;i <= TIMINGS;++i) { + cycles[i] = cpucycles(); + bytes[i] = crypto_sign(sm,&smlen,m,mlen,sk); + if (bytes[i] == 0) bytes[i] = smlen; + } + for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i]; + printentry(mlen,"cycles",cycles,TIMINGS); + printentry(mlen,"bytes",bytes,TIMINGS); + + for (i = 0;i <= TIMINGS;++i) { + cycles[i] = cpucycles(); + bytes[i] = crypto_sign_open(t,&tlen,sm,smlen,pk); + if (bytes[i] == 0) bytes[i] = tlen; + } + for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i]; + printentry(mlen,"open_cycles",cycles,TIMINGS); + printentry(mlen,"open_bytes",bytes,TIMINGS); + } + } +} diff --git a/nacl/crypto_sign/try.c b/nacl/crypto_sign/try.c new file mode 100644 index 00000000..fc553416 --- /dev/null +++ b/nacl/crypto_sign/try.c @@ -0,0 +1,86 @@ +/* + * crypto_sign/try.c version 20090118 + * D. J. Bernstein + * Public domain. + */ + +#include +#include "randombytes.h" +#include "crypto_sign.h" + +#define MAXTEST_BYTES 10000 +#define TUNE_BYTES 1536 + +extern unsigned char *alignedcalloc(unsigned long long); + +const char *primitiveimplementation = crypto_sign_IMPLEMENTATION; + +static unsigned char *pk; +static unsigned char *sk; +static unsigned char *m; unsigned long long mlen; +static unsigned char *sm; unsigned long long smlen; +static unsigned char *t; unsigned long long tlen; + +void preallocate(void) +{ +#ifdef RAND_R_PRNG_NOT_SEEDED + RAND_status(); +#endif +} + +void allocate(void) +{ + pk = alignedcalloc(crypto_sign_PUBLICKEYBYTES); + sk = alignedcalloc(crypto_sign_SECRETKEYBYTES); + m = alignedcalloc(MAXTEST_BYTES + crypto_sign_BYTES); + sm = alignedcalloc(MAXTEST_BYTES + crypto_sign_BYTES); + t = alignedcalloc(MAXTEST_BYTES + crypto_sign_BYTES); +} + +void predoit(void) +{ + crypto_sign_keypair(pk,sk); + mlen = TUNE_BYTES; + smlen = 0; + randombytes(m,mlen); + crypto_sign(sm,&smlen,m,mlen,sk); +} + +void doit(void) +{ + crypto_sign_open(t,&tlen,sm,smlen,pk); +} + +char checksum[crypto_sign_BYTES * 2 + 1]; + +const char *checksum_compute(void) +{ + long long mlen; + long long i; + long long j; + + if (crypto_sign_keypair(pk,sk) != 0) return "crypto_sign_keypair returns nonzero"; + for (mlen = 0;mlen < MAXTEST_BYTES;mlen += 1 + (mlen / 16)) { + if (crypto_sign(sm,&smlen,m,mlen,sk) != 0) return "crypto_sign returns nonzero"; + if (crypto_sign_open(t,&tlen,sm,smlen,pk) != 0) return "crypto_sign_open returns nonzero"; + if (tlen != mlen) return "crypto_sign_open does not match length"; + for (i = 0;i < tlen;++i) + if (t[i] != m[i]) + return "crypto_sign_open does not match contents"; + + j = random() % smlen; + sm[j] ^= 1; + if (crypto_sign_open(t,&tlen,sm,smlen,pk) == 0) { + if (tlen != mlen) return "crypto_sign_open allows trivial forgery of length"; + for (i = 0;i < tlen;++i) + if (t[i] != m[i]) + return "crypto_sign_open allows trivial forgery of contents"; + } + sm[j] ^= 1; + + } + + /* do some long-term checksum */ + checksum[0] = 0; + return 0; +} diff --git a/nacl/crypto_sign/wrapper-keypair.cpp b/nacl/crypto_sign/wrapper-keypair.cpp new file mode 100644 index 00000000..3687465d --- /dev/null +++ b/nacl/crypto_sign/wrapper-keypair.cpp @@ -0,0 +1,12 @@ +#include +using std::string; +#include "crypto_sign.h" + +string crypto_sign_keypair(string *sk_string) +{ + unsigned char pk[crypto_sign_PUBLICKEYBYTES]; + unsigned char sk[crypto_sign_SECRETKEYBYTES]; + crypto_sign_keypair(pk,sk); + *sk_string = string((char *) sk,sizeof sk); + return string((char *) pk,sizeof pk); +} diff --git a/nacl/crypto_sign/wrapper-sign-open.cpp b/nacl/crypto_sign/wrapper-sign-open.cpp new file mode 100644 index 00000000..346e9400 --- /dev/null +++ b/nacl/crypto_sign/wrapper-sign-open.cpp @@ -0,0 +1,24 @@ +#include +using std::string; +#include "crypto_sign.h" + +string crypto_sign_open(const string &sm_string, const string &pk_string) +{ + if (pk_string.size() != crypto_sign_PUBLICKEYBYTES) throw "incorrect public-key length"; + size_t smlen = sm_string.size(); + unsigned char m[smlen]; + unsigned long long mlen; + for (int i = 0;i < smlen;++i) m[i] = sm_string[i]; + if (crypto_sign_open( + m, + &mlen, + m, + smlen, + (const unsigned char *) pk_string.c_str() + ) != 0) + throw "ciphertext fails verification"; + return string( + (char *) m, + mlen + ); +} diff --git a/nacl/crypto_sign/wrapper-sign.cpp b/nacl/crypto_sign/wrapper-sign.cpp new file mode 100644 index 00000000..f0624b76 --- /dev/null +++ b/nacl/crypto_sign/wrapper-sign.cpp @@ -0,0 +1,23 @@ +#include +using std::string; +#include "crypto_sign.h" + +string crypto_sign(const string &m_string, const string &sk_string) +{ + if (sk_string.size() != crypto_sign_SECRETKEYBYTES) throw "incorrect secret-key length"; + size_t mlen = m_string.size(); + unsigned char m[mlen+crypto_sign_BYTES]; + unsigned long long smlen; + for (int i = 0;i < mlen;++i) m[i] = m_string[i]; + crypto_sign( + m, + &smlen, + m, + mlen, + (const unsigned char *) sk_string.c_str() + ); + return string( + (char *) m, + smlen + ); +} diff --git a/nacl/crypto_stream/aes128ctr/checksum b/nacl/crypto_stream/aes128ctr/checksum new file mode 100644 index 00000000..92865436 --- /dev/null +++ b/nacl/crypto_stream/aes128ctr/checksum @@ -0,0 +1 @@ +6e9966897837aae181e93261ae88fdf0 diff --git a/nacl/crypto_stream/aes128ctr/core2/afternm.s b/nacl/crypto_stream/aes128ctr/core2/afternm.s new file mode 100644 index 00000000..c1ba79ef --- /dev/null +++ b/nacl/crypto_stream/aes128ctr/core2/afternm.s @@ -0,0 +1,12308 @@ +# Author: Emilia Käsper and Peter Schwabe +# Date: 2009-03-19 +# +2010.01.31: minor namespace modifications +# Public domain + +.data +.p2align 6 + +RCON: .int 0x00000000, 0x00000000, 0x00000000, 0xffffffff +ROTB: .int 0x0c000000, 0x00000000, 0x04000000, 0x08000000 +EXPB0: .int 0x03030303, 0x07070707, 0x0b0b0b0b, 0x0f0f0f0f +CTRINC1: .int 0x00000001, 0x00000000, 0x00000000, 0x00000000 +CTRINC2: .int 0x00000002, 0x00000000, 0x00000000, 0x00000000 +CTRINC3: .int 0x00000003, 0x00000000, 0x00000000, 0x00000000 +CTRINC4: .int 0x00000004, 0x00000000, 0x00000000, 0x00000000 +CTRINC5: .int 0x00000005, 0x00000000, 0x00000000, 0x00000000 +CTRINC6: .int 0x00000006, 0x00000000, 0x00000000, 0x00000000 +CTRINC7: .int 0x00000007, 0x00000000, 0x00000000, 0x00000000 +RCTRINC1: .int 0x00000000, 0x00000000, 0x00000000, 0x00000001 +RCTRINC2: .int 0x00000000, 0x00000000, 0x00000000, 0x00000002 +RCTRINC3: .int 0x00000000, 0x00000000, 0x00000000, 0x00000003 +RCTRINC4: .int 0x00000000, 0x00000000, 0x00000000, 0x00000004 +RCTRINC5: .int 0x00000000, 0x00000000, 0x00000000, 0x00000005 +RCTRINC6: .int 0x00000000, 0x00000000, 0x00000000, 0x00000006 +RCTRINC7: .int 0x00000000, 0x00000000, 0x00000000, 0x00000007 + +SWAP32: .int 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f +M0SWAP: .quad 0x0105090d0004080c , 0x03070b0f02060a0e + +BS0: .quad 0x5555555555555555, 0x5555555555555555 +BS1: .quad 0x3333333333333333, 0x3333333333333333 +BS2: .quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f +ONE: .quad 0xffffffffffffffff, 0xffffffffffffffff +M0: .quad 0x02060a0e03070b0f, 0x0004080c0105090d +SRM0: .quad 0x0304090e00050a0f, 0x01060b0c0207080d +SR: .quad 0x0504070600030201, 0x0f0e0d0c0a09080b + +# qhasm: int64 outp + +# qhasm: int64 len + +# qhasm: int64 np + +# qhasm: int64 c + +# qhasm: input outp + +# qhasm: input len + +# qhasm: input np + +# qhasm: input c + +# qhasm: int64 lensav + +# qhasm: int6464 xmm0 + +# qhasm: int6464 xmm1 + +# qhasm: int6464 xmm2 + +# qhasm: int6464 xmm3 + +# qhasm: int6464 xmm4 + +# qhasm: int6464 xmm5 + +# qhasm: int6464 xmm6 + +# qhasm: int6464 xmm7 + +# qhasm: int6464 xmm8 + +# qhasm: int6464 xmm9 + +# qhasm: int6464 xmm10 + +# qhasm: int6464 xmm11 + +# qhasm: int6464 xmm12 + +# qhasm: int6464 xmm13 + +# qhasm: int6464 xmm14 + +# qhasm: int6464 xmm15 + +# qhasm: int6464 t + +# qhasm: stack1024 bl + +# qhasm: stack128 nonce_stack + +# qhasm: int64 blp + +# qhasm: int64 b + +# qhasm: int64 tmp + +# qhasm: enter crypto_stream_aes128ctr_core2_afternm +.text +.p2align 5 +.globl _crypto_stream_aes128ctr_core2_afternm +.globl crypto_stream_aes128ctr_core2_afternm +_crypto_stream_aes128ctr_core2_afternm: +crypto_stream_aes128ctr_core2_afternm: +mov %rsp,%r11 +and $31,%r11 +add $160,%r11 +sub %r11,%rsp + +# qhasm: xmm0 = *(int128 *) (np + 0) +# asm 1: movdqa 0(xmm0=int6464#1 +# asm 2: movdqa 0(xmm0=%xmm0 +movdqa 0(%rdx),%xmm0 + +# qhasm: nonce_stack = xmm0 +# asm 1: movdqa nonce_stack=stack128#1 +# asm 2: movdqa nonce_stack=0(%rsp) +movdqa %xmm0,0(%rsp) + +# qhasm: np = &nonce_stack +# asm 1: leaq np=int64#3 +# asm 2: leaq np=%rdx +leaq 0(%rsp),%rdx + +# qhasm: enc_block: +._enc_block: + +# qhasm: xmm0 = *(int128 *) (np + 0) +# asm 1: movdqa 0(xmm0=int6464#1 +# asm 2: movdqa 0(xmm0=%xmm0 +movdqa 0(%rdx),%xmm0 + +# qhasm: xmm1 = xmm0 +# asm 1: movdqa xmm1=int6464#2 +# asm 2: movdqa xmm1=%xmm1 +movdqa %xmm0,%xmm1 + +# qhasm: shuffle bytes of xmm1 by SWAP32 +# asm 1: pshufb SWAP32,xmm2=int6464#3 +# asm 2: movdqa xmm2=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: xmm3 = xmm1 +# asm 1: movdqa xmm3=int6464#4 +# asm 2: movdqa xmm3=%xmm3 +movdqa %xmm1,%xmm3 + +# qhasm: xmm4 = xmm1 +# asm 1: movdqa xmm4=int6464#5 +# asm 2: movdqa xmm4=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: xmm5 = xmm1 +# asm 1: movdqa xmm5=int6464#6 +# asm 2: movdqa xmm5=%xmm5 +movdqa %xmm1,%xmm5 + +# qhasm: xmm6 = xmm1 +# asm 1: movdqa xmm6=int6464#7 +# asm 2: movdqa xmm6=%xmm6 +movdqa %xmm1,%xmm6 + +# qhasm: xmm7 = xmm1 +# asm 1: movdqa xmm7=int6464#8 +# asm 2: movdqa xmm7=%xmm7 +movdqa %xmm1,%xmm7 + +# qhasm: int32323232 xmm1 += RCTRINC1 +# asm 1: paddd RCTRINC1,xmm8=int6464#9 +# asm 2: movdqa xmm8=%xmm8 +movdqa %xmm6,%xmm8 + +# qhasm: uint6464 xmm8 >>= 1 +# asm 1: psrlq $1,xmm8=int6464#9 +# asm 2: movdqa xmm8=%xmm8 +movdqa %xmm4,%xmm8 + +# qhasm: uint6464 xmm8 >>= 1 +# asm 1: psrlq $1,xmm8=int6464#9 +# asm 2: movdqa xmm8=%xmm8 +movdqa %xmm2,%xmm8 + +# qhasm: uint6464 xmm8 >>= 1 +# asm 1: psrlq $1,xmm8=int6464#9 +# asm 2: movdqa xmm8=%xmm8 +movdqa %xmm0,%xmm8 + +# qhasm: uint6464 xmm8 >>= 1 +# asm 1: psrlq $1,xmm8=int6464#9 +# asm 2: movdqa xmm8=%xmm8 +movdqa %xmm5,%xmm8 + +# qhasm: uint6464 xmm8 >>= 2 +# asm 1: psrlq $2,xmm8=int6464#9 +# asm 2: movdqa xmm8=%xmm8 +movdqa %xmm4,%xmm8 + +# qhasm: uint6464 xmm8 >>= 2 +# asm 1: psrlq $2,xmm8=int6464#9 +# asm 2: movdqa xmm8=%xmm8 +movdqa %xmm1,%xmm8 + +# qhasm: uint6464 xmm8 >>= 2 +# asm 1: psrlq $2,xmm8=int6464#9 +# asm 2: movdqa xmm8=%xmm8 +movdqa %xmm0,%xmm8 + +# qhasm: uint6464 xmm8 >>= 2 +# asm 1: psrlq $2,xmm8=int6464#9 +# asm 2: movdqa xmm8=%xmm8 +movdqa %xmm3,%xmm8 + +# qhasm: uint6464 xmm8 >>= 4 +# asm 1: psrlq $4,xmm8=int6464#9 +# asm 2: movdqa xmm8=%xmm8 +movdqa %xmm2,%xmm8 + +# qhasm: uint6464 xmm8 >>= 4 +# asm 1: psrlq $4,xmm8=int6464#9 +# asm 2: movdqa xmm8=%xmm8 +movdqa %xmm1,%xmm8 + +# qhasm: uint6464 xmm8 >>= 4 +# asm 1: psrlq $4,xmm8=int6464#9 +# asm 2: movdqa xmm8=%xmm8 +movdqa %xmm0,%xmm8 + +# qhasm: uint6464 xmm8 >>= 4 +# asm 1: psrlq $4,xmm11=int6464#9 +# asm 2: movdqa xmm11=%xmm8 +movdqa %xmm7,%xmm8 + +# qhasm: xmm10 = xmm1 +# asm 1: movdqa xmm10=int6464#10 +# asm 2: movdqa xmm10=%xmm9 +movdqa %xmm1,%xmm9 + +# qhasm: xmm9 = xmm5 +# asm 1: movdqa xmm9=int6464#11 +# asm 2: movdqa xmm9=%xmm10 +movdqa %xmm5,%xmm10 + +# qhasm: xmm13 = xmm2 +# asm 1: movdqa xmm13=int6464#12 +# asm 2: movdqa xmm13=%xmm11 +movdqa %xmm2,%xmm11 + +# qhasm: xmm12 = xmm6 +# asm 1: movdqa xmm12=int6464#13 +# asm 2: movdqa xmm12=%xmm12 +movdqa %xmm6,%xmm12 + +# qhasm: xmm11 ^= xmm4 +# asm 1: pxor xmm14=int6464#14 +# asm 2: movdqa xmm14=%xmm13 +movdqa %xmm8,%xmm13 + +# qhasm: xmm8 = xmm10 +# asm 1: movdqa xmm8=int6464#15 +# asm 2: movdqa xmm8=%xmm14 +movdqa %xmm9,%xmm14 + +# qhasm: xmm15 = xmm11 +# asm 1: movdqa xmm15=int6464#16 +# asm 2: movdqa xmm15=%xmm15 +movdqa %xmm8,%xmm15 + +# qhasm: xmm10 |= xmm9 +# asm 1: por xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm3,%xmm10 + +# qhasm: xmm12 ^= xmm0 +# asm 1: pxor xmm13=int6464#11 +# asm 2: movdqa xmm13=%xmm10 +movdqa %xmm7,%xmm10 + +# qhasm: xmm13 ^= xmm1 +# asm 1: pxor xmm12=int6464#12 +# asm 2: movdqa xmm12=%xmm11 +movdqa %xmm5,%xmm11 + +# qhasm: xmm9 = xmm13 +# asm 1: movdqa xmm9=int6464#13 +# asm 2: movdqa xmm9=%xmm12 +movdqa %xmm10,%xmm12 + +# qhasm: xmm12 ^= xmm6 +# asm 1: pxor xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm2,%xmm10 + +# qhasm: xmm13 = xmm4 +# asm 1: movdqa xmm13=int6464#12 +# asm 2: movdqa xmm13=%xmm11 +movdqa %xmm4,%xmm11 + +# qhasm: xmm14 = xmm1 +# asm 1: movdqa xmm14=int6464#14 +# asm 2: movdqa xmm14=%xmm13 +movdqa %xmm1,%xmm13 + +# qhasm: xmm15 = xmm7 +# asm 1: movdqa xmm15=int6464#16 +# asm 2: movdqa xmm15=%xmm15 +movdqa %xmm7,%xmm15 + +# qhasm: xmm12 &= xmm3 +# asm 1: pand xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm8,%xmm10 + +# qhasm: xmm12 ^= xmm10 +# asm 1: pxor xmm14=int6464#12 +# asm 2: movdqa xmm14=%xmm11 +movdqa %xmm14,%xmm11 + +# qhasm: xmm14 ^= xmm11 +# asm 1: pxor xmm15=int6464#14 +# asm 2: movdqa xmm15=%xmm13 +movdqa %xmm10,%xmm13 + +# qhasm: xmm15 &= xmm14 +# asm 1: pand xmm13=int6464#16 +# asm 2: movdqa xmm13=%xmm15 +movdqa %xmm12,%xmm15 + +# qhasm: xmm13 ^= xmm8 +# asm 1: pxor xmm10=int6464#9 +# asm 2: movdqa xmm10=%xmm8 +movdqa %xmm11,%xmm8 + +# qhasm: xmm10 ^= xmm13 +# asm 1: pxor xmm12=int6464#9 +# asm 2: movdqa xmm12=%xmm8 +movdqa %xmm6,%xmm8 + +# qhasm: xmm8 = xmm5 +# asm 1: movdqa xmm8=int6464#10 +# asm 2: movdqa xmm8=%xmm9 +movdqa %xmm5,%xmm9 + +# qhasm: xmm10 = xmm15 +# asm 1: movdqa xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm10 ^= xmm14 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm15,%xmm10 + +# qhasm: xmm10 ^= xmm9 +# asm 1: pxor xmm12=int6464#9 +# asm 2: movdqa xmm12=%xmm8 +movdqa %xmm7,%xmm8 + +# qhasm: xmm8 = xmm1 +# asm 1: movdqa xmm8=int6464#10 +# asm 2: movdqa xmm8=%xmm9 +movdqa %xmm1,%xmm9 + +# qhasm: xmm12 ^= xmm4 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm15,%xmm10 + +# qhasm: xmm10 ^= xmm9 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm8=int6464#9 +# asm 2: pshufd $0x93,xmm8=%xmm8 +pshufd $0x93,%xmm0,%xmm8 + +# qhasm: xmm9 = shuffle dwords of xmm1 by 0x93 +# asm 1: pshufd $0x93,xmm9=int6464#10 +# asm 2: pshufd $0x93,xmm9=%xmm9 +pshufd $0x93,%xmm1,%xmm9 + +# qhasm: xmm10 = shuffle dwords of xmm4 by 0x93 +# asm 1: pshufd $0x93,xmm10=int6464#11 +# asm 2: pshufd $0x93,xmm10=%xmm10 +pshufd $0x93,%xmm4,%xmm10 + +# qhasm: xmm11 = shuffle dwords of xmm6 by 0x93 +# asm 1: pshufd $0x93,xmm11=int6464#12 +# asm 2: pshufd $0x93,xmm11=%xmm11 +pshufd $0x93,%xmm6,%xmm11 + +# qhasm: xmm12 = shuffle dwords of xmm3 by 0x93 +# asm 1: pshufd $0x93,xmm12=int6464#13 +# asm 2: pshufd $0x93,xmm12=%xmm12 +pshufd $0x93,%xmm3,%xmm12 + +# qhasm: xmm13 = shuffle dwords of xmm7 by 0x93 +# asm 1: pshufd $0x93,xmm13=int6464#14 +# asm 2: pshufd $0x93,xmm13=%xmm13 +pshufd $0x93,%xmm7,%xmm13 + +# qhasm: xmm14 = shuffle dwords of xmm2 by 0x93 +# asm 1: pshufd $0x93,xmm14=int6464#15 +# asm 2: pshufd $0x93,xmm14=%xmm14 +pshufd $0x93,%xmm2,%xmm14 + +# qhasm: xmm15 = shuffle dwords of xmm5 by 0x93 +# asm 1: pshufd $0x93,xmm15=int6464#16 +# asm 2: pshufd $0x93,xmm15=%xmm15 +pshufd $0x93,%xmm5,%xmm15 + +# qhasm: xmm0 ^= xmm8 +# asm 1: pxor xmm0=int6464#1 +# asm 2: pshufd $0x4E,xmm0=%xmm0 +pshufd $0x4E,%xmm0,%xmm0 + +# qhasm: xmm1 = shuffle dwords of xmm1 by 0x4E +# asm 1: pshufd $0x4E,xmm1=int6464#2 +# asm 2: pshufd $0x4E,xmm1=%xmm1 +pshufd $0x4E,%xmm1,%xmm1 + +# qhasm: xmm4 = shuffle dwords of xmm4 by 0x4E +# asm 1: pshufd $0x4E,xmm4=int6464#5 +# asm 2: pshufd $0x4E,xmm4=%xmm4 +pshufd $0x4E,%xmm4,%xmm4 + +# qhasm: xmm6 = shuffle dwords of xmm6 by 0x4E +# asm 1: pshufd $0x4E,xmm6=int6464#7 +# asm 2: pshufd $0x4E,xmm6=%xmm6 +pshufd $0x4E,%xmm6,%xmm6 + +# qhasm: xmm3 = shuffle dwords of xmm3 by 0x4E +# asm 1: pshufd $0x4E,xmm3=int6464#4 +# asm 2: pshufd $0x4E,xmm3=%xmm3 +pshufd $0x4E,%xmm3,%xmm3 + +# qhasm: xmm7 = shuffle dwords of xmm7 by 0x4E +# asm 1: pshufd $0x4E,xmm7=int6464#8 +# asm 2: pshufd $0x4E,xmm7=%xmm7 +pshufd $0x4E,%xmm7,%xmm7 + +# qhasm: xmm2 = shuffle dwords of xmm2 by 0x4E +# asm 1: pshufd $0x4E,xmm2=int6464#3 +# asm 2: pshufd $0x4E,xmm2=%xmm2 +pshufd $0x4E,%xmm2,%xmm2 + +# qhasm: xmm5 = shuffle dwords of xmm5 by 0x4E +# asm 1: pshufd $0x4E,xmm5=int6464#6 +# asm 2: pshufd $0x4E,xmm5=%xmm5 +pshufd $0x4E,%xmm5,%xmm5 + +# qhasm: xmm8 ^= xmm0 +# asm 1: pxor xmm3=int6464#1 +# asm 2: movdqa xmm3=%xmm0 +movdqa %xmm15,%xmm0 + +# qhasm: xmm2 = xmm9 +# asm 1: movdqa xmm2=int6464#2 +# asm 2: movdqa xmm2=%xmm1 +movdqa %xmm9,%xmm1 + +# qhasm: xmm1 = xmm13 +# asm 1: movdqa xmm1=int6464#3 +# asm 2: movdqa xmm1=%xmm2 +movdqa %xmm13,%xmm2 + +# qhasm: xmm5 = xmm10 +# asm 1: movdqa xmm5=int6464#4 +# asm 2: movdqa xmm5=%xmm3 +movdqa %xmm10,%xmm3 + +# qhasm: xmm4 = xmm14 +# asm 1: movdqa xmm4=int6464#5 +# asm 2: movdqa xmm4=%xmm4 +movdqa %xmm14,%xmm4 + +# qhasm: xmm3 ^= xmm12 +# asm 1: pxor xmm6=int6464#6 +# asm 2: movdqa xmm6=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: xmm0 = xmm2 +# asm 1: movdqa xmm0=int6464#7 +# asm 2: movdqa xmm0=%xmm6 +movdqa %xmm1,%xmm6 + +# qhasm: xmm7 = xmm3 +# asm 1: movdqa xmm7=int6464#8 +# asm 2: movdqa xmm7=%xmm7 +movdqa %xmm0,%xmm7 + +# qhasm: xmm2 |= xmm1 +# asm 1: por xmm4=int6464#3 +# asm 2: movdqa xmm4=%xmm2 +movdqa %xmm11,%xmm2 + +# qhasm: xmm4 ^= xmm8 +# asm 1: pxor xmm5=int6464#3 +# asm 2: movdqa xmm5=%xmm2 +movdqa %xmm15,%xmm2 + +# qhasm: xmm5 ^= xmm9 +# asm 1: pxor xmm4=int6464#4 +# asm 2: movdqa xmm4=%xmm3 +movdqa %xmm13,%xmm3 + +# qhasm: xmm1 = xmm5 +# asm 1: movdqa xmm1=int6464#5 +# asm 2: movdqa xmm1=%xmm4 +movdqa %xmm2,%xmm4 + +# qhasm: xmm4 ^= xmm14 +# asm 1: pxor xmm4=int6464#3 +# asm 2: movdqa xmm4=%xmm2 +movdqa %xmm10,%xmm2 + +# qhasm: xmm5 = xmm12 +# asm 1: movdqa xmm5=int6464#4 +# asm 2: movdqa xmm5=%xmm3 +movdqa %xmm12,%xmm3 + +# qhasm: xmm6 = xmm9 +# asm 1: movdqa xmm6=int6464#6 +# asm 2: movdqa xmm6=%xmm5 +movdqa %xmm9,%xmm5 + +# qhasm: xmm7 = xmm15 +# asm 1: movdqa xmm7=int6464#8 +# asm 2: movdqa xmm7=%xmm7 +movdqa %xmm15,%xmm7 + +# qhasm: xmm4 &= xmm11 +# asm 1: pand xmm4=int6464#3 +# asm 2: movdqa xmm4=%xmm2 +movdqa %xmm0,%xmm2 + +# qhasm: xmm4 ^= xmm2 +# asm 1: pxor xmm6=int6464#4 +# asm 2: movdqa xmm6=%xmm3 +movdqa %xmm6,%xmm3 + +# qhasm: xmm6 ^= xmm3 +# asm 1: pxor xmm7=int6464#6 +# asm 2: movdqa xmm7=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: xmm7 &= xmm6 +# asm 1: pand xmm5=int6464#8 +# asm 2: movdqa xmm5=%xmm7 +movdqa %xmm4,%xmm7 + +# qhasm: xmm5 ^= xmm0 +# asm 1: pxor xmm2=int6464#1 +# asm 2: movdqa xmm2=%xmm0 +movdqa %xmm3,%xmm0 + +# qhasm: xmm2 ^= xmm5 +# asm 1: pxor xmm4=int6464#1 +# asm 2: movdqa xmm4=%xmm0 +movdqa %xmm14,%xmm0 + +# qhasm: xmm0 = xmm13 +# asm 1: movdqa xmm0=int6464#2 +# asm 2: movdqa xmm0=%xmm1 +movdqa %xmm13,%xmm1 + +# qhasm: xmm2 = xmm7 +# asm 1: movdqa xmm2=int6464#3 +# asm 2: movdqa xmm2=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: xmm2 ^= xmm6 +# asm 1: pxor xmm3=int6464#3 +# asm 2: movdqa xmm3=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: xmm3 ^= xmm6 +# asm 1: pxor xmm2=int6464#3 +# asm 2: movdqa xmm2=%xmm2 +movdqa %xmm7,%xmm2 + +# qhasm: xmm2 ^= xmm1 +# asm 1: pxor xmm4=int6464#1 +# asm 2: movdqa xmm4=%xmm0 +movdqa %xmm15,%xmm0 + +# qhasm: xmm0 = xmm9 +# asm 1: movdqa xmm0=int6464#2 +# asm 2: movdqa xmm0=%xmm1 +movdqa %xmm9,%xmm1 + +# qhasm: xmm4 ^= xmm12 +# asm 1: pxor xmm3=int6464#3 +# asm 2: movdqa xmm3=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: xmm3 ^= xmm6 +# asm 1: pxor xmm2=int6464#3 +# asm 2: movdqa xmm2=%xmm2 +movdqa %xmm7,%xmm2 + +# qhasm: xmm2 ^= xmm1 +# asm 1: pxor xmm3=int6464#3 +# asm 2: movdqa xmm3=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: xmm3 ^= xmm6 +# asm 1: pxor xmm0=int6464#1 +# asm 2: pshufd $0x93,xmm0=%xmm0 +pshufd $0x93,%xmm8,%xmm0 + +# qhasm: xmm1 = shuffle dwords of xmm9 by 0x93 +# asm 1: pshufd $0x93,xmm1=int6464#2 +# asm 2: pshufd $0x93,xmm1=%xmm1 +pshufd $0x93,%xmm9,%xmm1 + +# qhasm: xmm2 = shuffle dwords of xmm12 by 0x93 +# asm 1: pshufd $0x93,xmm2=int6464#3 +# asm 2: pshufd $0x93,xmm2=%xmm2 +pshufd $0x93,%xmm12,%xmm2 + +# qhasm: xmm3 = shuffle dwords of xmm14 by 0x93 +# asm 1: pshufd $0x93,xmm3=int6464#4 +# asm 2: pshufd $0x93,xmm3=%xmm3 +pshufd $0x93,%xmm14,%xmm3 + +# qhasm: xmm4 = shuffle dwords of xmm11 by 0x93 +# asm 1: pshufd $0x93,xmm4=int6464#5 +# asm 2: pshufd $0x93,xmm4=%xmm4 +pshufd $0x93,%xmm11,%xmm4 + +# qhasm: xmm5 = shuffle dwords of xmm15 by 0x93 +# asm 1: pshufd $0x93,xmm5=int6464#6 +# asm 2: pshufd $0x93,xmm5=%xmm5 +pshufd $0x93,%xmm15,%xmm5 + +# qhasm: xmm6 = shuffle dwords of xmm10 by 0x93 +# asm 1: pshufd $0x93,xmm6=int6464#7 +# asm 2: pshufd $0x93,xmm6=%xmm6 +pshufd $0x93,%xmm10,%xmm6 + +# qhasm: xmm7 = shuffle dwords of xmm13 by 0x93 +# asm 1: pshufd $0x93,xmm7=int6464#8 +# asm 2: pshufd $0x93,xmm7=%xmm7 +pshufd $0x93,%xmm13,%xmm7 + +# qhasm: xmm8 ^= xmm0 +# asm 1: pxor xmm8=int6464#9 +# asm 2: pshufd $0x4E,xmm8=%xmm8 +pshufd $0x4E,%xmm8,%xmm8 + +# qhasm: xmm9 = shuffle dwords of xmm9 by 0x4E +# asm 1: pshufd $0x4E,xmm9=int6464#10 +# asm 2: pshufd $0x4E,xmm9=%xmm9 +pshufd $0x4E,%xmm9,%xmm9 + +# qhasm: xmm12 = shuffle dwords of xmm12 by 0x4E +# asm 1: pshufd $0x4E,xmm12=int6464#13 +# asm 2: pshufd $0x4E,xmm12=%xmm12 +pshufd $0x4E,%xmm12,%xmm12 + +# qhasm: xmm14 = shuffle dwords of xmm14 by 0x4E +# asm 1: pshufd $0x4E,xmm14=int6464#15 +# asm 2: pshufd $0x4E,xmm14=%xmm14 +pshufd $0x4E,%xmm14,%xmm14 + +# qhasm: xmm11 = shuffle dwords of xmm11 by 0x4E +# asm 1: pshufd $0x4E,xmm11=int6464#12 +# asm 2: pshufd $0x4E,xmm11=%xmm11 +pshufd $0x4E,%xmm11,%xmm11 + +# qhasm: xmm15 = shuffle dwords of xmm15 by 0x4E +# asm 1: pshufd $0x4E,xmm15=int6464#16 +# asm 2: pshufd $0x4E,xmm15=%xmm15 +pshufd $0x4E,%xmm15,%xmm15 + +# qhasm: xmm10 = shuffle dwords of xmm10 by 0x4E +# asm 1: pshufd $0x4E,xmm10=int6464#11 +# asm 2: pshufd $0x4E,xmm10=%xmm10 +pshufd $0x4E,%xmm10,%xmm10 + +# qhasm: xmm13 = shuffle dwords of xmm13 by 0x4E +# asm 1: pshufd $0x4E,xmm13=int6464#14 +# asm 2: pshufd $0x4E,xmm13=%xmm13 +pshufd $0x4E,%xmm13,%xmm13 + +# qhasm: xmm0 ^= xmm8 +# asm 1: pxor xmm11=int6464#9 +# asm 2: movdqa xmm11=%xmm8 +movdqa %xmm7,%xmm8 + +# qhasm: xmm10 = xmm1 +# asm 1: movdqa xmm10=int6464#10 +# asm 2: movdqa xmm10=%xmm9 +movdqa %xmm1,%xmm9 + +# qhasm: xmm9 = xmm5 +# asm 1: movdqa xmm9=int6464#11 +# asm 2: movdqa xmm9=%xmm10 +movdqa %xmm5,%xmm10 + +# qhasm: xmm13 = xmm2 +# asm 1: movdqa xmm13=int6464#12 +# asm 2: movdqa xmm13=%xmm11 +movdqa %xmm2,%xmm11 + +# qhasm: xmm12 = xmm6 +# asm 1: movdqa xmm12=int6464#13 +# asm 2: movdqa xmm12=%xmm12 +movdqa %xmm6,%xmm12 + +# qhasm: xmm11 ^= xmm4 +# asm 1: pxor xmm14=int6464#14 +# asm 2: movdqa xmm14=%xmm13 +movdqa %xmm8,%xmm13 + +# qhasm: xmm8 = xmm10 +# asm 1: movdqa xmm8=int6464#15 +# asm 2: movdqa xmm8=%xmm14 +movdqa %xmm9,%xmm14 + +# qhasm: xmm15 = xmm11 +# asm 1: movdqa xmm15=int6464#16 +# asm 2: movdqa xmm15=%xmm15 +movdqa %xmm8,%xmm15 + +# qhasm: xmm10 |= xmm9 +# asm 1: por xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm3,%xmm10 + +# qhasm: xmm12 ^= xmm0 +# asm 1: pxor xmm13=int6464#11 +# asm 2: movdqa xmm13=%xmm10 +movdqa %xmm7,%xmm10 + +# qhasm: xmm13 ^= xmm1 +# asm 1: pxor xmm12=int6464#12 +# asm 2: movdqa xmm12=%xmm11 +movdqa %xmm5,%xmm11 + +# qhasm: xmm9 = xmm13 +# asm 1: movdqa xmm9=int6464#13 +# asm 2: movdqa xmm9=%xmm12 +movdqa %xmm10,%xmm12 + +# qhasm: xmm12 ^= xmm6 +# asm 1: pxor xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm2,%xmm10 + +# qhasm: xmm13 = xmm4 +# asm 1: movdqa xmm13=int6464#12 +# asm 2: movdqa xmm13=%xmm11 +movdqa %xmm4,%xmm11 + +# qhasm: xmm14 = xmm1 +# asm 1: movdqa xmm14=int6464#14 +# asm 2: movdqa xmm14=%xmm13 +movdqa %xmm1,%xmm13 + +# qhasm: xmm15 = xmm7 +# asm 1: movdqa xmm15=int6464#16 +# asm 2: movdqa xmm15=%xmm15 +movdqa %xmm7,%xmm15 + +# qhasm: xmm12 &= xmm3 +# asm 1: pand xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm8,%xmm10 + +# qhasm: xmm12 ^= xmm10 +# asm 1: pxor xmm14=int6464#12 +# asm 2: movdqa xmm14=%xmm11 +movdqa %xmm14,%xmm11 + +# qhasm: xmm14 ^= xmm11 +# asm 1: pxor xmm15=int6464#14 +# asm 2: movdqa xmm15=%xmm13 +movdqa %xmm10,%xmm13 + +# qhasm: xmm15 &= xmm14 +# asm 1: pand xmm13=int6464#16 +# asm 2: movdqa xmm13=%xmm15 +movdqa %xmm12,%xmm15 + +# qhasm: xmm13 ^= xmm8 +# asm 1: pxor xmm10=int6464#9 +# asm 2: movdqa xmm10=%xmm8 +movdqa %xmm11,%xmm8 + +# qhasm: xmm10 ^= xmm13 +# asm 1: pxor xmm12=int6464#9 +# asm 2: movdqa xmm12=%xmm8 +movdqa %xmm6,%xmm8 + +# qhasm: xmm8 = xmm5 +# asm 1: movdqa xmm8=int6464#10 +# asm 2: movdqa xmm8=%xmm9 +movdqa %xmm5,%xmm9 + +# qhasm: xmm10 = xmm15 +# asm 1: movdqa xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm10 ^= xmm14 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm15,%xmm10 + +# qhasm: xmm10 ^= xmm9 +# asm 1: pxor xmm12=int6464#9 +# asm 2: movdqa xmm12=%xmm8 +movdqa %xmm7,%xmm8 + +# qhasm: xmm8 = xmm1 +# asm 1: movdqa xmm8=int6464#10 +# asm 2: movdqa xmm8=%xmm9 +movdqa %xmm1,%xmm9 + +# qhasm: xmm12 ^= xmm4 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm15,%xmm10 + +# qhasm: xmm10 ^= xmm9 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm8=int6464#9 +# asm 2: pshufd $0x93,xmm8=%xmm8 +pshufd $0x93,%xmm0,%xmm8 + +# qhasm: xmm9 = shuffle dwords of xmm1 by 0x93 +# asm 1: pshufd $0x93,xmm9=int6464#10 +# asm 2: pshufd $0x93,xmm9=%xmm9 +pshufd $0x93,%xmm1,%xmm9 + +# qhasm: xmm10 = shuffle dwords of xmm4 by 0x93 +# asm 1: pshufd $0x93,xmm10=int6464#11 +# asm 2: pshufd $0x93,xmm10=%xmm10 +pshufd $0x93,%xmm4,%xmm10 + +# qhasm: xmm11 = shuffle dwords of xmm6 by 0x93 +# asm 1: pshufd $0x93,xmm11=int6464#12 +# asm 2: pshufd $0x93,xmm11=%xmm11 +pshufd $0x93,%xmm6,%xmm11 + +# qhasm: xmm12 = shuffle dwords of xmm3 by 0x93 +# asm 1: pshufd $0x93,xmm12=int6464#13 +# asm 2: pshufd $0x93,xmm12=%xmm12 +pshufd $0x93,%xmm3,%xmm12 + +# qhasm: xmm13 = shuffle dwords of xmm7 by 0x93 +# asm 1: pshufd $0x93,xmm13=int6464#14 +# asm 2: pshufd $0x93,xmm13=%xmm13 +pshufd $0x93,%xmm7,%xmm13 + +# qhasm: xmm14 = shuffle dwords of xmm2 by 0x93 +# asm 1: pshufd $0x93,xmm14=int6464#15 +# asm 2: pshufd $0x93,xmm14=%xmm14 +pshufd $0x93,%xmm2,%xmm14 + +# qhasm: xmm15 = shuffle dwords of xmm5 by 0x93 +# asm 1: pshufd $0x93,xmm15=int6464#16 +# asm 2: pshufd $0x93,xmm15=%xmm15 +pshufd $0x93,%xmm5,%xmm15 + +# qhasm: xmm0 ^= xmm8 +# asm 1: pxor xmm0=int6464#1 +# asm 2: pshufd $0x4E,xmm0=%xmm0 +pshufd $0x4E,%xmm0,%xmm0 + +# qhasm: xmm1 = shuffle dwords of xmm1 by 0x4E +# asm 1: pshufd $0x4E,xmm1=int6464#2 +# asm 2: pshufd $0x4E,xmm1=%xmm1 +pshufd $0x4E,%xmm1,%xmm1 + +# qhasm: xmm4 = shuffle dwords of xmm4 by 0x4E +# asm 1: pshufd $0x4E,xmm4=int6464#5 +# asm 2: pshufd $0x4E,xmm4=%xmm4 +pshufd $0x4E,%xmm4,%xmm4 + +# qhasm: xmm6 = shuffle dwords of xmm6 by 0x4E +# asm 1: pshufd $0x4E,xmm6=int6464#7 +# asm 2: pshufd $0x4E,xmm6=%xmm6 +pshufd $0x4E,%xmm6,%xmm6 + +# qhasm: xmm3 = shuffle dwords of xmm3 by 0x4E +# asm 1: pshufd $0x4E,xmm3=int6464#4 +# asm 2: pshufd $0x4E,xmm3=%xmm3 +pshufd $0x4E,%xmm3,%xmm3 + +# qhasm: xmm7 = shuffle dwords of xmm7 by 0x4E +# asm 1: pshufd $0x4E,xmm7=int6464#8 +# asm 2: pshufd $0x4E,xmm7=%xmm7 +pshufd $0x4E,%xmm7,%xmm7 + +# qhasm: xmm2 = shuffle dwords of xmm2 by 0x4E +# asm 1: pshufd $0x4E,xmm2=int6464#3 +# asm 2: pshufd $0x4E,xmm2=%xmm2 +pshufd $0x4E,%xmm2,%xmm2 + +# qhasm: xmm5 = shuffle dwords of xmm5 by 0x4E +# asm 1: pshufd $0x4E,xmm5=int6464#6 +# asm 2: pshufd $0x4E,xmm5=%xmm5 +pshufd $0x4E,%xmm5,%xmm5 + +# qhasm: xmm8 ^= xmm0 +# asm 1: pxor xmm3=int6464#1 +# asm 2: movdqa xmm3=%xmm0 +movdqa %xmm15,%xmm0 + +# qhasm: xmm2 = xmm9 +# asm 1: movdqa xmm2=int6464#2 +# asm 2: movdqa xmm2=%xmm1 +movdqa %xmm9,%xmm1 + +# qhasm: xmm1 = xmm13 +# asm 1: movdqa xmm1=int6464#3 +# asm 2: movdqa xmm1=%xmm2 +movdqa %xmm13,%xmm2 + +# qhasm: xmm5 = xmm10 +# asm 1: movdqa xmm5=int6464#4 +# asm 2: movdqa xmm5=%xmm3 +movdqa %xmm10,%xmm3 + +# qhasm: xmm4 = xmm14 +# asm 1: movdqa xmm4=int6464#5 +# asm 2: movdqa xmm4=%xmm4 +movdqa %xmm14,%xmm4 + +# qhasm: xmm3 ^= xmm12 +# asm 1: pxor xmm6=int6464#6 +# asm 2: movdqa xmm6=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: xmm0 = xmm2 +# asm 1: movdqa xmm0=int6464#7 +# asm 2: movdqa xmm0=%xmm6 +movdqa %xmm1,%xmm6 + +# qhasm: xmm7 = xmm3 +# asm 1: movdqa xmm7=int6464#8 +# asm 2: movdqa xmm7=%xmm7 +movdqa %xmm0,%xmm7 + +# qhasm: xmm2 |= xmm1 +# asm 1: por xmm4=int6464#3 +# asm 2: movdqa xmm4=%xmm2 +movdqa %xmm11,%xmm2 + +# qhasm: xmm4 ^= xmm8 +# asm 1: pxor xmm5=int6464#3 +# asm 2: movdqa xmm5=%xmm2 +movdqa %xmm15,%xmm2 + +# qhasm: xmm5 ^= xmm9 +# asm 1: pxor xmm4=int6464#4 +# asm 2: movdqa xmm4=%xmm3 +movdqa %xmm13,%xmm3 + +# qhasm: xmm1 = xmm5 +# asm 1: movdqa xmm1=int6464#5 +# asm 2: movdqa xmm1=%xmm4 +movdqa %xmm2,%xmm4 + +# qhasm: xmm4 ^= xmm14 +# asm 1: pxor xmm4=int6464#3 +# asm 2: movdqa xmm4=%xmm2 +movdqa %xmm10,%xmm2 + +# qhasm: xmm5 = xmm12 +# asm 1: movdqa xmm5=int6464#4 +# asm 2: movdqa xmm5=%xmm3 +movdqa %xmm12,%xmm3 + +# qhasm: xmm6 = xmm9 +# asm 1: movdqa xmm6=int6464#6 +# asm 2: movdqa xmm6=%xmm5 +movdqa %xmm9,%xmm5 + +# qhasm: xmm7 = xmm15 +# asm 1: movdqa xmm7=int6464#8 +# asm 2: movdqa xmm7=%xmm7 +movdqa %xmm15,%xmm7 + +# qhasm: xmm4 &= xmm11 +# asm 1: pand xmm4=int6464#3 +# asm 2: movdqa xmm4=%xmm2 +movdqa %xmm0,%xmm2 + +# qhasm: xmm4 ^= xmm2 +# asm 1: pxor xmm6=int6464#4 +# asm 2: movdqa xmm6=%xmm3 +movdqa %xmm6,%xmm3 + +# qhasm: xmm6 ^= xmm3 +# asm 1: pxor xmm7=int6464#6 +# asm 2: movdqa xmm7=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: xmm7 &= xmm6 +# asm 1: pand xmm5=int6464#8 +# asm 2: movdqa xmm5=%xmm7 +movdqa %xmm4,%xmm7 + +# qhasm: xmm5 ^= xmm0 +# asm 1: pxor xmm2=int6464#1 +# asm 2: movdqa xmm2=%xmm0 +movdqa %xmm3,%xmm0 + +# qhasm: xmm2 ^= xmm5 +# asm 1: pxor xmm4=int6464#1 +# asm 2: movdqa xmm4=%xmm0 +movdqa %xmm14,%xmm0 + +# qhasm: xmm0 = xmm13 +# asm 1: movdqa xmm0=int6464#2 +# asm 2: movdqa xmm0=%xmm1 +movdqa %xmm13,%xmm1 + +# qhasm: xmm2 = xmm7 +# asm 1: movdqa xmm2=int6464#3 +# asm 2: movdqa xmm2=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: xmm2 ^= xmm6 +# asm 1: pxor xmm3=int6464#3 +# asm 2: movdqa xmm3=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: xmm3 ^= xmm6 +# asm 1: pxor xmm2=int6464#3 +# asm 2: movdqa xmm2=%xmm2 +movdqa %xmm7,%xmm2 + +# qhasm: xmm2 ^= xmm1 +# asm 1: pxor xmm4=int6464#1 +# asm 2: movdqa xmm4=%xmm0 +movdqa %xmm15,%xmm0 + +# qhasm: xmm0 = xmm9 +# asm 1: movdqa xmm0=int6464#2 +# asm 2: movdqa xmm0=%xmm1 +movdqa %xmm9,%xmm1 + +# qhasm: xmm4 ^= xmm12 +# asm 1: pxor xmm3=int6464#3 +# asm 2: movdqa xmm3=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: xmm3 ^= xmm6 +# asm 1: pxor xmm2=int6464#3 +# asm 2: movdqa xmm2=%xmm2 +movdqa %xmm7,%xmm2 + +# qhasm: xmm2 ^= xmm1 +# asm 1: pxor xmm3=int6464#3 +# asm 2: movdqa xmm3=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: xmm3 ^= xmm6 +# asm 1: pxor xmm0=int6464#1 +# asm 2: pshufd $0x93,xmm0=%xmm0 +pshufd $0x93,%xmm8,%xmm0 + +# qhasm: xmm1 = shuffle dwords of xmm9 by 0x93 +# asm 1: pshufd $0x93,xmm1=int6464#2 +# asm 2: pshufd $0x93,xmm1=%xmm1 +pshufd $0x93,%xmm9,%xmm1 + +# qhasm: xmm2 = shuffle dwords of xmm12 by 0x93 +# asm 1: pshufd $0x93,xmm2=int6464#3 +# asm 2: pshufd $0x93,xmm2=%xmm2 +pshufd $0x93,%xmm12,%xmm2 + +# qhasm: xmm3 = shuffle dwords of xmm14 by 0x93 +# asm 1: pshufd $0x93,xmm3=int6464#4 +# asm 2: pshufd $0x93,xmm3=%xmm3 +pshufd $0x93,%xmm14,%xmm3 + +# qhasm: xmm4 = shuffle dwords of xmm11 by 0x93 +# asm 1: pshufd $0x93,xmm4=int6464#5 +# asm 2: pshufd $0x93,xmm4=%xmm4 +pshufd $0x93,%xmm11,%xmm4 + +# qhasm: xmm5 = shuffle dwords of xmm15 by 0x93 +# asm 1: pshufd $0x93,xmm5=int6464#6 +# asm 2: pshufd $0x93,xmm5=%xmm5 +pshufd $0x93,%xmm15,%xmm5 + +# qhasm: xmm6 = shuffle dwords of xmm10 by 0x93 +# asm 1: pshufd $0x93,xmm6=int6464#7 +# asm 2: pshufd $0x93,xmm6=%xmm6 +pshufd $0x93,%xmm10,%xmm6 + +# qhasm: xmm7 = shuffle dwords of xmm13 by 0x93 +# asm 1: pshufd $0x93,xmm7=int6464#8 +# asm 2: pshufd $0x93,xmm7=%xmm7 +pshufd $0x93,%xmm13,%xmm7 + +# qhasm: xmm8 ^= xmm0 +# asm 1: pxor xmm8=int6464#9 +# asm 2: pshufd $0x4E,xmm8=%xmm8 +pshufd $0x4E,%xmm8,%xmm8 + +# qhasm: xmm9 = shuffle dwords of xmm9 by 0x4E +# asm 1: pshufd $0x4E,xmm9=int6464#10 +# asm 2: pshufd $0x4E,xmm9=%xmm9 +pshufd $0x4E,%xmm9,%xmm9 + +# qhasm: xmm12 = shuffle dwords of xmm12 by 0x4E +# asm 1: pshufd $0x4E,xmm12=int6464#13 +# asm 2: pshufd $0x4E,xmm12=%xmm12 +pshufd $0x4E,%xmm12,%xmm12 + +# qhasm: xmm14 = shuffle dwords of xmm14 by 0x4E +# asm 1: pshufd $0x4E,xmm14=int6464#15 +# asm 2: pshufd $0x4E,xmm14=%xmm14 +pshufd $0x4E,%xmm14,%xmm14 + +# qhasm: xmm11 = shuffle dwords of xmm11 by 0x4E +# asm 1: pshufd $0x4E,xmm11=int6464#12 +# asm 2: pshufd $0x4E,xmm11=%xmm11 +pshufd $0x4E,%xmm11,%xmm11 + +# qhasm: xmm15 = shuffle dwords of xmm15 by 0x4E +# asm 1: pshufd $0x4E,xmm15=int6464#16 +# asm 2: pshufd $0x4E,xmm15=%xmm15 +pshufd $0x4E,%xmm15,%xmm15 + +# qhasm: xmm10 = shuffle dwords of xmm10 by 0x4E +# asm 1: pshufd $0x4E,xmm10=int6464#11 +# asm 2: pshufd $0x4E,xmm10=%xmm10 +pshufd $0x4E,%xmm10,%xmm10 + +# qhasm: xmm13 = shuffle dwords of xmm13 by 0x4E +# asm 1: pshufd $0x4E,xmm13=int6464#14 +# asm 2: pshufd $0x4E,xmm13=%xmm13 +pshufd $0x4E,%xmm13,%xmm13 + +# qhasm: xmm0 ^= xmm8 +# asm 1: pxor xmm11=int6464#9 +# asm 2: movdqa xmm11=%xmm8 +movdqa %xmm7,%xmm8 + +# qhasm: xmm10 = xmm1 +# asm 1: movdqa xmm10=int6464#10 +# asm 2: movdqa xmm10=%xmm9 +movdqa %xmm1,%xmm9 + +# qhasm: xmm9 = xmm5 +# asm 1: movdqa xmm9=int6464#11 +# asm 2: movdqa xmm9=%xmm10 +movdqa %xmm5,%xmm10 + +# qhasm: xmm13 = xmm2 +# asm 1: movdqa xmm13=int6464#12 +# asm 2: movdqa xmm13=%xmm11 +movdqa %xmm2,%xmm11 + +# qhasm: xmm12 = xmm6 +# asm 1: movdqa xmm12=int6464#13 +# asm 2: movdqa xmm12=%xmm12 +movdqa %xmm6,%xmm12 + +# qhasm: xmm11 ^= xmm4 +# asm 1: pxor xmm14=int6464#14 +# asm 2: movdqa xmm14=%xmm13 +movdqa %xmm8,%xmm13 + +# qhasm: xmm8 = xmm10 +# asm 1: movdqa xmm8=int6464#15 +# asm 2: movdqa xmm8=%xmm14 +movdqa %xmm9,%xmm14 + +# qhasm: xmm15 = xmm11 +# asm 1: movdqa xmm15=int6464#16 +# asm 2: movdqa xmm15=%xmm15 +movdqa %xmm8,%xmm15 + +# qhasm: xmm10 |= xmm9 +# asm 1: por xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm3,%xmm10 + +# qhasm: xmm12 ^= xmm0 +# asm 1: pxor xmm13=int6464#11 +# asm 2: movdqa xmm13=%xmm10 +movdqa %xmm7,%xmm10 + +# qhasm: xmm13 ^= xmm1 +# asm 1: pxor xmm12=int6464#12 +# asm 2: movdqa xmm12=%xmm11 +movdqa %xmm5,%xmm11 + +# qhasm: xmm9 = xmm13 +# asm 1: movdqa xmm9=int6464#13 +# asm 2: movdqa xmm9=%xmm12 +movdqa %xmm10,%xmm12 + +# qhasm: xmm12 ^= xmm6 +# asm 1: pxor xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm2,%xmm10 + +# qhasm: xmm13 = xmm4 +# asm 1: movdqa xmm13=int6464#12 +# asm 2: movdqa xmm13=%xmm11 +movdqa %xmm4,%xmm11 + +# qhasm: xmm14 = xmm1 +# asm 1: movdqa xmm14=int6464#14 +# asm 2: movdqa xmm14=%xmm13 +movdqa %xmm1,%xmm13 + +# qhasm: xmm15 = xmm7 +# asm 1: movdqa xmm15=int6464#16 +# asm 2: movdqa xmm15=%xmm15 +movdqa %xmm7,%xmm15 + +# qhasm: xmm12 &= xmm3 +# asm 1: pand xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm8,%xmm10 + +# qhasm: xmm12 ^= xmm10 +# asm 1: pxor xmm14=int6464#12 +# asm 2: movdqa xmm14=%xmm11 +movdqa %xmm14,%xmm11 + +# qhasm: xmm14 ^= xmm11 +# asm 1: pxor xmm15=int6464#14 +# asm 2: movdqa xmm15=%xmm13 +movdqa %xmm10,%xmm13 + +# qhasm: xmm15 &= xmm14 +# asm 1: pand xmm13=int6464#16 +# asm 2: movdqa xmm13=%xmm15 +movdqa %xmm12,%xmm15 + +# qhasm: xmm13 ^= xmm8 +# asm 1: pxor xmm10=int6464#9 +# asm 2: movdqa xmm10=%xmm8 +movdqa %xmm11,%xmm8 + +# qhasm: xmm10 ^= xmm13 +# asm 1: pxor xmm12=int6464#9 +# asm 2: movdqa xmm12=%xmm8 +movdqa %xmm6,%xmm8 + +# qhasm: xmm8 = xmm5 +# asm 1: movdqa xmm8=int6464#10 +# asm 2: movdqa xmm8=%xmm9 +movdqa %xmm5,%xmm9 + +# qhasm: xmm10 = xmm15 +# asm 1: movdqa xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm10 ^= xmm14 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm15,%xmm10 + +# qhasm: xmm10 ^= xmm9 +# asm 1: pxor xmm12=int6464#9 +# asm 2: movdqa xmm12=%xmm8 +movdqa %xmm7,%xmm8 + +# qhasm: xmm8 = xmm1 +# asm 1: movdqa xmm8=int6464#10 +# asm 2: movdqa xmm8=%xmm9 +movdqa %xmm1,%xmm9 + +# qhasm: xmm12 ^= xmm4 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm15,%xmm10 + +# qhasm: xmm10 ^= xmm9 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm8=int6464#9 +# asm 2: pshufd $0x93,xmm8=%xmm8 +pshufd $0x93,%xmm0,%xmm8 + +# qhasm: xmm9 = shuffle dwords of xmm1 by 0x93 +# asm 1: pshufd $0x93,xmm9=int6464#10 +# asm 2: pshufd $0x93,xmm9=%xmm9 +pshufd $0x93,%xmm1,%xmm9 + +# qhasm: xmm10 = shuffle dwords of xmm4 by 0x93 +# asm 1: pshufd $0x93,xmm10=int6464#11 +# asm 2: pshufd $0x93,xmm10=%xmm10 +pshufd $0x93,%xmm4,%xmm10 + +# qhasm: xmm11 = shuffle dwords of xmm6 by 0x93 +# asm 1: pshufd $0x93,xmm11=int6464#12 +# asm 2: pshufd $0x93,xmm11=%xmm11 +pshufd $0x93,%xmm6,%xmm11 + +# qhasm: xmm12 = shuffle dwords of xmm3 by 0x93 +# asm 1: pshufd $0x93,xmm12=int6464#13 +# asm 2: pshufd $0x93,xmm12=%xmm12 +pshufd $0x93,%xmm3,%xmm12 + +# qhasm: xmm13 = shuffle dwords of xmm7 by 0x93 +# asm 1: pshufd $0x93,xmm13=int6464#14 +# asm 2: pshufd $0x93,xmm13=%xmm13 +pshufd $0x93,%xmm7,%xmm13 + +# qhasm: xmm14 = shuffle dwords of xmm2 by 0x93 +# asm 1: pshufd $0x93,xmm14=int6464#15 +# asm 2: pshufd $0x93,xmm14=%xmm14 +pshufd $0x93,%xmm2,%xmm14 + +# qhasm: xmm15 = shuffle dwords of xmm5 by 0x93 +# asm 1: pshufd $0x93,xmm15=int6464#16 +# asm 2: pshufd $0x93,xmm15=%xmm15 +pshufd $0x93,%xmm5,%xmm15 + +# qhasm: xmm0 ^= xmm8 +# asm 1: pxor xmm0=int6464#1 +# asm 2: pshufd $0x4E,xmm0=%xmm0 +pshufd $0x4E,%xmm0,%xmm0 + +# qhasm: xmm1 = shuffle dwords of xmm1 by 0x4E +# asm 1: pshufd $0x4E,xmm1=int6464#2 +# asm 2: pshufd $0x4E,xmm1=%xmm1 +pshufd $0x4E,%xmm1,%xmm1 + +# qhasm: xmm4 = shuffle dwords of xmm4 by 0x4E +# asm 1: pshufd $0x4E,xmm4=int6464#5 +# asm 2: pshufd $0x4E,xmm4=%xmm4 +pshufd $0x4E,%xmm4,%xmm4 + +# qhasm: xmm6 = shuffle dwords of xmm6 by 0x4E +# asm 1: pshufd $0x4E,xmm6=int6464#7 +# asm 2: pshufd $0x4E,xmm6=%xmm6 +pshufd $0x4E,%xmm6,%xmm6 + +# qhasm: xmm3 = shuffle dwords of xmm3 by 0x4E +# asm 1: pshufd $0x4E,xmm3=int6464#4 +# asm 2: pshufd $0x4E,xmm3=%xmm3 +pshufd $0x4E,%xmm3,%xmm3 + +# qhasm: xmm7 = shuffle dwords of xmm7 by 0x4E +# asm 1: pshufd $0x4E,xmm7=int6464#8 +# asm 2: pshufd $0x4E,xmm7=%xmm7 +pshufd $0x4E,%xmm7,%xmm7 + +# qhasm: xmm2 = shuffle dwords of xmm2 by 0x4E +# asm 1: pshufd $0x4E,xmm2=int6464#3 +# asm 2: pshufd $0x4E,xmm2=%xmm2 +pshufd $0x4E,%xmm2,%xmm2 + +# qhasm: xmm5 = shuffle dwords of xmm5 by 0x4E +# asm 1: pshufd $0x4E,xmm5=int6464#6 +# asm 2: pshufd $0x4E,xmm5=%xmm5 +pshufd $0x4E,%xmm5,%xmm5 + +# qhasm: xmm8 ^= xmm0 +# asm 1: pxor xmm3=int6464#1 +# asm 2: movdqa xmm3=%xmm0 +movdqa %xmm15,%xmm0 + +# qhasm: xmm2 = xmm9 +# asm 1: movdqa xmm2=int6464#2 +# asm 2: movdqa xmm2=%xmm1 +movdqa %xmm9,%xmm1 + +# qhasm: xmm1 = xmm13 +# asm 1: movdqa xmm1=int6464#3 +# asm 2: movdqa xmm1=%xmm2 +movdqa %xmm13,%xmm2 + +# qhasm: xmm5 = xmm10 +# asm 1: movdqa xmm5=int6464#4 +# asm 2: movdqa xmm5=%xmm3 +movdqa %xmm10,%xmm3 + +# qhasm: xmm4 = xmm14 +# asm 1: movdqa xmm4=int6464#5 +# asm 2: movdqa xmm4=%xmm4 +movdqa %xmm14,%xmm4 + +# qhasm: xmm3 ^= xmm12 +# asm 1: pxor xmm6=int6464#6 +# asm 2: movdqa xmm6=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: xmm0 = xmm2 +# asm 1: movdqa xmm0=int6464#7 +# asm 2: movdqa xmm0=%xmm6 +movdqa %xmm1,%xmm6 + +# qhasm: xmm7 = xmm3 +# asm 1: movdqa xmm7=int6464#8 +# asm 2: movdqa xmm7=%xmm7 +movdqa %xmm0,%xmm7 + +# qhasm: xmm2 |= xmm1 +# asm 1: por xmm4=int6464#3 +# asm 2: movdqa xmm4=%xmm2 +movdqa %xmm11,%xmm2 + +# qhasm: xmm4 ^= xmm8 +# asm 1: pxor xmm5=int6464#3 +# asm 2: movdqa xmm5=%xmm2 +movdqa %xmm15,%xmm2 + +# qhasm: xmm5 ^= xmm9 +# asm 1: pxor xmm4=int6464#4 +# asm 2: movdqa xmm4=%xmm3 +movdqa %xmm13,%xmm3 + +# qhasm: xmm1 = xmm5 +# asm 1: movdqa xmm1=int6464#5 +# asm 2: movdqa xmm1=%xmm4 +movdqa %xmm2,%xmm4 + +# qhasm: xmm4 ^= xmm14 +# asm 1: pxor xmm4=int6464#3 +# asm 2: movdqa xmm4=%xmm2 +movdqa %xmm10,%xmm2 + +# qhasm: xmm5 = xmm12 +# asm 1: movdqa xmm5=int6464#4 +# asm 2: movdqa xmm5=%xmm3 +movdqa %xmm12,%xmm3 + +# qhasm: xmm6 = xmm9 +# asm 1: movdqa xmm6=int6464#6 +# asm 2: movdqa xmm6=%xmm5 +movdqa %xmm9,%xmm5 + +# qhasm: xmm7 = xmm15 +# asm 1: movdqa xmm7=int6464#8 +# asm 2: movdqa xmm7=%xmm7 +movdqa %xmm15,%xmm7 + +# qhasm: xmm4 &= xmm11 +# asm 1: pand xmm4=int6464#3 +# asm 2: movdqa xmm4=%xmm2 +movdqa %xmm0,%xmm2 + +# qhasm: xmm4 ^= xmm2 +# asm 1: pxor xmm6=int6464#4 +# asm 2: movdqa xmm6=%xmm3 +movdqa %xmm6,%xmm3 + +# qhasm: xmm6 ^= xmm3 +# asm 1: pxor xmm7=int6464#6 +# asm 2: movdqa xmm7=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: xmm7 &= xmm6 +# asm 1: pand xmm5=int6464#8 +# asm 2: movdqa xmm5=%xmm7 +movdqa %xmm4,%xmm7 + +# qhasm: xmm5 ^= xmm0 +# asm 1: pxor xmm2=int6464#1 +# asm 2: movdqa xmm2=%xmm0 +movdqa %xmm3,%xmm0 + +# qhasm: xmm2 ^= xmm5 +# asm 1: pxor xmm4=int6464#1 +# asm 2: movdqa xmm4=%xmm0 +movdqa %xmm14,%xmm0 + +# qhasm: xmm0 = xmm13 +# asm 1: movdqa xmm0=int6464#2 +# asm 2: movdqa xmm0=%xmm1 +movdqa %xmm13,%xmm1 + +# qhasm: xmm2 = xmm7 +# asm 1: movdqa xmm2=int6464#3 +# asm 2: movdqa xmm2=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: xmm2 ^= xmm6 +# asm 1: pxor xmm3=int6464#3 +# asm 2: movdqa xmm3=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: xmm3 ^= xmm6 +# asm 1: pxor xmm2=int6464#3 +# asm 2: movdqa xmm2=%xmm2 +movdqa %xmm7,%xmm2 + +# qhasm: xmm2 ^= xmm1 +# asm 1: pxor xmm4=int6464#1 +# asm 2: movdqa xmm4=%xmm0 +movdqa %xmm15,%xmm0 + +# qhasm: xmm0 = xmm9 +# asm 1: movdqa xmm0=int6464#2 +# asm 2: movdqa xmm0=%xmm1 +movdqa %xmm9,%xmm1 + +# qhasm: xmm4 ^= xmm12 +# asm 1: pxor xmm3=int6464#3 +# asm 2: movdqa xmm3=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: xmm3 ^= xmm6 +# asm 1: pxor xmm2=int6464#3 +# asm 2: movdqa xmm2=%xmm2 +movdqa %xmm7,%xmm2 + +# qhasm: xmm2 ^= xmm1 +# asm 1: pxor xmm3=int6464#3 +# asm 2: movdqa xmm3=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: xmm3 ^= xmm6 +# asm 1: pxor xmm0=int6464#1 +# asm 2: pshufd $0x93,xmm0=%xmm0 +pshufd $0x93,%xmm8,%xmm0 + +# qhasm: xmm1 = shuffle dwords of xmm9 by 0x93 +# asm 1: pshufd $0x93,xmm1=int6464#2 +# asm 2: pshufd $0x93,xmm1=%xmm1 +pshufd $0x93,%xmm9,%xmm1 + +# qhasm: xmm2 = shuffle dwords of xmm12 by 0x93 +# asm 1: pshufd $0x93,xmm2=int6464#3 +# asm 2: pshufd $0x93,xmm2=%xmm2 +pshufd $0x93,%xmm12,%xmm2 + +# qhasm: xmm3 = shuffle dwords of xmm14 by 0x93 +# asm 1: pshufd $0x93,xmm3=int6464#4 +# asm 2: pshufd $0x93,xmm3=%xmm3 +pshufd $0x93,%xmm14,%xmm3 + +# qhasm: xmm4 = shuffle dwords of xmm11 by 0x93 +# asm 1: pshufd $0x93,xmm4=int6464#5 +# asm 2: pshufd $0x93,xmm4=%xmm4 +pshufd $0x93,%xmm11,%xmm4 + +# qhasm: xmm5 = shuffle dwords of xmm15 by 0x93 +# asm 1: pshufd $0x93,xmm5=int6464#6 +# asm 2: pshufd $0x93,xmm5=%xmm5 +pshufd $0x93,%xmm15,%xmm5 + +# qhasm: xmm6 = shuffle dwords of xmm10 by 0x93 +# asm 1: pshufd $0x93,xmm6=int6464#7 +# asm 2: pshufd $0x93,xmm6=%xmm6 +pshufd $0x93,%xmm10,%xmm6 + +# qhasm: xmm7 = shuffle dwords of xmm13 by 0x93 +# asm 1: pshufd $0x93,xmm7=int6464#8 +# asm 2: pshufd $0x93,xmm7=%xmm7 +pshufd $0x93,%xmm13,%xmm7 + +# qhasm: xmm8 ^= xmm0 +# asm 1: pxor xmm8=int6464#9 +# asm 2: pshufd $0x4E,xmm8=%xmm8 +pshufd $0x4E,%xmm8,%xmm8 + +# qhasm: xmm9 = shuffle dwords of xmm9 by 0x4E +# asm 1: pshufd $0x4E,xmm9=int6464#10 +# asm 2: pshufd $0x4E,xmm9=%xmm9 +pshufd $0x4E,%xmm9,%xmm9 + +# qhasm: xmm12 = shuffle dwords of xmm12 by 0x4E +# asm 1: pshufd $0x4E,xmm12=int6464#13 +# asm 2: pshufd $0x4E,xmm12=%xmm12 +pshufd $0x4E,%xmm12,%xmm12 + +# qhasm: xmm14 = shuffle dwords of xmm14 by 0x4E +# asm 1: pshufd $0x4E,xmm14=int6464#15 +# asm 2: pshufd $0x4E,xmm14=%xmm14 +pshufd $0x4E,%xmm14,%xmm14 + +# qhasm: xmm11 = shuffle dwords of xmm11 by 0x4E +# asm 1: pshufd $0x4E,xmm11=int6464#12 +# asm 2: pshufd $0x4E,xmm11=%xmm11 +pshufd $0x4E,%xmm11,%xmm11 + +# qhasm: xmm15 = shuffle dwords of xmm15 by 0x4E +# asm 1: pshufd $0x4E,xmm15=int6464#16 +# asm 2: pshufd $0x4E,xmm15=%xmm15 +pshufd $0x4E,%xmm15,%xmm15 + +# qhasm: xmm10 = shuffle dwords of xmm10 by 0x4E +# asm 1: pshufd $0x4E,xmm10=int6464#11 +# asm 2: pshufd $0x4E,xmm10=%xmm10 +pshufd $0x4E,%xmm10,%xmm10 + +# qhasm: xmm13 = shuffle dwords of xmm13 by 0x4E +# asm 1: pshufd $0x4E,xmm13=int6464#14 +# asm 2: pshufd $0x4E,xmm13=%xmm13 +pshufd $0x4E,%xmm13,%xmm13 + +# qhasm: xmm0 ^= xmm8 +# asm 1: pxor xmm11=int6464#9 +# asm 2: movdqa xmm11=%xmm8 +movdqa %xmm7,%xmm8 + +# qhasm: xmm10 = xmm1 +# asm 1: movdqa xmm10=int6464#10 +# asm 2: movdqa xmm10=%xmm9 +movdqa %xmm1,%xmm9 + +# qhasm: xmm9 = xmm5 +# asm 1: movdqa xmm9=int6464#11 +# asm 2: movdqa xmm9=%xmm10 +movdqa %xmm5,%xmm10 + +# qhasm: xmm13 = xmm2 +# asm 1: movdqa xmm13=int6464#12 +# asm 2: movdqa xmm13=%xmm11 +movdqa %xmm2,%xmm11 + +# qhasm: xmm12 = xmm6 +# asm 1: movdqa xmm12=int6464#13 +# asm 2: movdqa xmm12=%xmm12 +movdqa %xmm6,%xmm12 + +# qhasm: xmm11 ^= xmm4 +# asm 1: pxor xmm14=int6464#14 +# asm 2: movdqa xmm14=%xmm13 +movdqa %xmm8,%xmm13 + +# qhasm: xmm8 = xmm10 +# asm 1: movdqa xmm8=int6464#15 +# asm 2: movdqa xmm8=%xmm14 +movdqa %xmm9,%xmm14 + +# qhasm: xmm15 = xmm11 +# asm 1: movdqa xmm15=int6464#16 +# asm 2: movdqa xmm15=%xmm15 +movdqa %xmm8,%xmm15 + +# qhasm: xmm10 |= xmm9 +# asm 1: por xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm3,%xmm10 + +# qhasm: xmm12 ^= xmm0 +# asm 1: pxor xmm13=int6464#11 +# asm 2: movdqa xmm13=%xmm10 +movdqa %xmm7,%xmm10 + +# qhasm: xmm13 ^= xmm1 +# asm 1: pxor xmm12=int6464#12 +# asm 2: movdqa xmm12=%xmm11 +movdqa %xmm5,%xmm11 + +# qhasm: xmm9 = xmm13 +# asm 1: movdqa xmm9=int6464#13 +# asm 2: movdqa xmm9=%xmm12 +movdqa %xmm10,%xmm12 + +# qhasm: xmm12 ^= xmm6 +# asm 1: pxor xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm2,%xmm10 + +# qhasm: xmm13 = xmm4 +# asm 1: movdqa xmm13=int6464#12 +# asm 2: movdqa xmm13=%xmm11 +movdqa %xmm4,%xmm11 + +# qhasm: xmm14 = xmm1 +# asm 1: movdqa xmm14=int6464#14 +# asm 2: movdqa xmm14=%xmm13 +movdqa %xmm1,%xmm13 + +# qhasm: xmm15 = xmm7 +# asm 1: movdqa xmm15=int6464#16 +# asm 2: movdqa xmm15=%xmm15 +movdqa %xmm7,%xmm15 + +# qhasm: xmm12 &= xmm3 +# asm 1: pand xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm8,%xmm10 + +# qhasm: xmm12 ^= xmm10 +# asm 1: pxor xmm14=int6464#12 +# asm 2: movdqa xmm14=%xmm11 +movdqa %xmm14,%xmm11 + +# qhasm: xmm14 ^= xmm11 +# asm 1: pxor xmm15=int6464#14 +# asm 2: movdqa xmm15=%xmm13 +movdqa %xmm10,%xmm13 + +# qhasm: xmm15 &= xmm14 +# asm 1: pand xmm13=int6464#16 +# asm 2: movdqa xmm13=%xmm15 +movdqa %xmm12,%xmm15 + +# qhasm: xmm13 ^= xmm8 +# asm 1: pxor xmm10=int6464#9 +# asm 2: movdqa xmm10=%xmm8 +movdqa %xmm11,%xmm8 + +# qhasm: xmm10 ^= xmm13 +# asm 1: pxor xmm12=int6464#9 +# asm 2: movdqa xmm12=%xmm8 +movdqa %xmm6,%xmm8 + +# qhasm: xmm8 = xmm5 +# asm 1: movdqa xmm8=int6464#10 +# asm 2: movdqa xmm8=%xmm9 +movdqa %xmm5,%xmm9 + +# qhasm: xmm10 = xmm15 +# asm 1: movdqa xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm10 ^= xmm14 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm15,%xmm10 + +# qhasm: xmm10 ^= xmm9 +# asm 1: pxor xmm12=int6464#9 +# asm 2: movdqa xmm12=%xmm8 +movdqa %xmm7,%xmm8 + +# qhasm: xmm8 = xmm1 +# asm 1: movdqa xmm8=int6464#10 +# asm 2: movdqa xmm8=%xmm9 +movdqa %xmm1,%xmm9 + +# qhasm: xmm12 ^= xmm4 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm15,%xmm10 + +# qhasm: xmm10 ^= xmm9 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm8=int6464#9 +# asm 2: pshufd $0x93,xmm8=%xmm8 +pshufd $0x93,%xmm0,%xmm8 + +# qhasm: xmm9 = shuffle dwords of xmm1 by 0x93 +# asm 1: pshufd $0x93,xmm9=int6464#10 +# asm 2: pshufd $0x93,xmm9=%xmm9 +pshufd $0x93,%xmm1,%xmm9 + +# qhasm: xmm10 = shuffle dwords of xmm4 by 0x93 +# asm 1: pshufd $0x93,xmm10=int6464#11 +# asm 2: pshufd $0x93,xmm10=%xmm10 +pshufd $0x93,%xmm4,%xmm10 + +# qhasm: xmm11 = shuffle dwords of xmm6 by 0x93 +# asm 1: pshufd $0x93,xmm11=int6464#12 +# asm 2: pshufd $0x93,xmm11=%xmm11 +pshufd $0x93,%xmm6,%xmm11 + +# qhasm: xmm12 = shuffle dwords of xmm3 by 0x93 +# asm 1: pshufd $0x93,xmm12=int6464#13 +# asm 2: pshufd $0x93,xmm12=%xmm12 +pshufd $0x93,%xmm3,%xmm12 + +# qhasm: xmm13 = shuffle dwords of xmm7 by 0x93 +# asm 1: pshufd $0x93,xmm13=int6464#14 +# asm 2: pshufd $0x93,xmm13=%xmm13 +pshufd $0x93,%xmm7,%xmm13 + +# qhasm: xmm14 = shuffle dwords of xmm2 by 0x93 +# asm 1: pshufd $0x93,xmm14=int6464#15 +# asm 2: pshufd $0x93,xmm14=%xmm14 +pshufd $0x93,%xmm2,%xmm14 + +# qhasm: xmm15 = shuffle dwords of xmm5 by 0x93 +# asm 1: pshufd $0x93,xmm15=int6464#16 +# asm 2: pshufd $0x93,xmm15=%xmm15 +pshufd $0x93,%xmm5,%xmm15 + +# qhasm: xmm0 ^= xmm8 +# asm 1: pxor xmm0=int6464#1 +# asm 2: pshufd $0x4E,xmm0=%xmm0 +pshufd $0x4E,%xmm0,%xmm0 + +# qhasm: xmm1 = shuffle dwords of xmm1 by 0x4E +# asm 1: pshufd $0x4E,xmm1=int6464#2 +# asm 2: pshufd $0x4E,xmm1=%xmm1 +pshufd $0x4E,%xmm1,%xmm1 + +# qhasm: xmm4 = shuffle dwords of xmm4 by 0x4E +# asm 1: pshufd $0x4E,xmm4=int6464#5 +# asm 2: pshufd $0x4E,xmm4=%xmm4 +pshufd $0x4E,%xmm4,%xmm4 + +# qhasm: xmm6 = shuffle dwords of xmm6 by 0x4E +# asm 1: pshufd $0x4E,xmm6=int6464#7 +# asm 2: pshufd $0x4E,xmm6=%xmm6 +pshufd $0x4E,%xmm6,%xmm6 + +# qhasm: xmm3 = shuffle dwords of xmm3 by 0x4E +# asm 1: pshufd $0x4E,xmm3=int6464#4 +# asm 2: pshufd $0x4E,xmm3=%xmm3 +pshufd $0x4E,%xmm3,%xmm3 + +# qhasm: xmm7 = shuffle dwords of xmm7 by 0x4E +# asm 1: pshufd $0x4E,xmm7=int6464#8 +# asm 2: pshufd $0x4E,xmm7=%xmm7 +pshufd $0x4E,%xmm7,%xmm7 + +# qhasm: xmm2 = shuffle dwords of xmm2 by 0x4E +# asm 1: pshufd $0x4E,xmm2=int6464#3 +# asm 2: pshufd $0x4E,xmm2=%xmm2 +pshufd $0x4E,%xmm2,%xmm2 + +# qhasm: xmm5 = shuffle dwords of xmm5 by 0x4E +# asm 1: pshufd $0x4E,xmm5=int6464#6 +# asm 2: pshufd $0x4E,xmm5=%xmm5 +pshufd $0x4E,%xmm5,%xmm5 + +# qhasm: xmm8 ^= xmm0 +# asm 1: pxor xmm3=int6464#1 +# asm 2: movdqa xmm3=%xmm0 +movdqa %xmm15,%xmm0 + +# qhasm: xmm2 = xmm9 +# asm 1: movdqa xmm2=int6464#2 +# asm 2: movdqa xmm2=%xmm1 +movdqa %xmm9,%xmm1 + +# qhasm: xmm1 = xmm13 +# asm 1: movdqa xmm1=int6464#3 +# asm 2: movdqa xmm1=%xmm2 +movdqa %xmm13,%xmm2 + +# qhasm: xmm5 = xmm10 +# asm 1: movdqa xmm5=int6464#4 +# asm 2: movdqa xmm5=%xmm3 +movdqa %xmm10,%xmm3 + +# qhasm: xmm4 = xmm14 +# asm 1: movdqa xmm4=int6464#5 +# asm 2: movdqa xmm4=%xmm4 +movdqa %xmm14,%xmm4 + +# qhasm: xmm3 ^= xmm12 +# asm 1: pxor xmm6=int6464#6 +# asm 2: movdqa xmm6=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: xmm0 = xmm2 +# asm 1: movdqa xmm0=int6464#7 +# asm 2: movdqa xmm0=%xmm6 +movdqa %xmm1,%xmm6 + +# qhasm: xmm7 = xmm3 +# asm 1: movdqa xmm7=int6464#8 +# asm 2: movdqa xmm7=%xmm7 +movdqa %xmm0,%xmm7 + +# qhasm: xmm2 |= xmm1 +# asm 1: por xmm4=int6464#3 +# asm 2: movdqa xmm4=%xmm2 +movdqa %xmm11,%xmm2 + +# qhasm: xmm4 ^= xmm8 +# asm 1: pxor xmm5=int6464#3 +# asm 2: movdqa xmm5=%xmm2 +movdqa %xmm15,%xmm2 + +# qhasm: xmm5 ^= xmm9 +# asm 1: pxor xmm4=int6464#4 +# asm 2: movdqa xmm4=%xmm3 +movdqa %xmm13,%xmm3 + +# qhasm: xmm1 = xmm5 +# asm 1: movdqa xmm1=int6464#5 +# asm 2: movdqa xmm1=%xmm4 +movdqa %xmm2,%xmm4 + +# qhasm: xmm4 ^= xmm14 +# asm 1: pxor xmm4=int6464#3 +# asm 2: movdqa xmm4=%xmm2 +movdqa %xmm10,%xmm2 + +# qhasm: xmm5 = xmm12 +# asm 1: movdqa xmm5=int6464#4 +# asm 2: movdqa xmm5=%xmm3 +movdqa %xmm12,%xmm3 + +# qhasm: xmm6 = xmm9 +# asm 1: movdqa xmm6=int6464#6 +# asm 2: movdqa xmm6=%xmm5 +movdqa %xmm9,%xmm5 + +# qhasm: xmm7 = xmm15 +# asm 1: movdqa xmm7=int6464#8 +# asm 2: movdqa xmm7=%xmm7 +movdqa %xmm15,%xmm7 + +# qhasm: xmm4 &= xmm11 +# asm 1: pand xmm4=int6464#3 +# asm 2: movdqa xmm4=%xmm2 +movdqa %xmm0,%xmm2 + +# qhasm: xmm4 ^= xmm2 +# asm 1: pxor xmm6=int6464#4 +# asm 2: movdqa xmm6=%xmm3 +movdqa %xmm6,%xmm3 + +# qhasm: xmm6 ^= xmm3 +# asm 1: pxor xmm7=int6464#6 +# asm 2: movdqa xmm7=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: xmm7 &= xmm6 +# asm 1: pand xmm5=int6464#8 +# asm 2: movdqa xmm5=%xmm7 +movdqa %xmm4,%xmm7 + +# qhasm: xmm5 ^= xmm0 +# asm 1: pxor xmm2=int6464#1 +# asm 2: movdqa xmm2=%xmm0 +movdqa %xmm3,%xmm0 + +# qhasm: xmm2 ^= xmm5 +# asm 1: pxor xmm4=int6464#1 +# asm 2: movdqa xmm4=%xmm0 +movdqa %xmm14,%xmm0 + +# qhasm: xmm0 = xmm13 +# asm 1: movdqa xmm0=int6464#2 +# asm 2: movdqa xmm0=%xmm1 +movdqa %xmm13,%xmm1 + +# qhasm: xmm2 = xmm7 +# asm 1: movdqa xmm2=int6464#3 +# asm 2: movdqa xmm2=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: xmm2 ^= xmm6 +# asm 1: pxor xmm3=int6464#3 +# asm 2: movdqa xmm3=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: xmm3 ^= xmm6 +# asm 1: pxor xmm2=int6464#3 +# asm 2: movdqa xmm2=%xmm2 +movdqa %xmm7,%xmm2 + +# qhasm: xmm2 ^= xmm1 +# asm 1: pxor xmm4=int6464#1 +# asm 2: movdqa xmm4=%xmm0 +movdqa %xmm15,%xmm0 + +# qhasm: xmm0 = xmm9 +# asm 1: movdqa xmm0=int6464#2 +# asm 2: movdqa xmm0=%xmm1 +movdqa %xmm9,%xmm1 + +# qhasm: xmm4 ^= xmm12 +# asm 1: pxor xmm3=int6464#3 +# asm 2: movdqa xmm3=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: xmm3 ^= xmm6 +# asm 1: pxor xmm2=int6464#3 +# asm 2: movdqa xmm2=%xmm2 +movdqa %xmm7,%xmm2 + +# qhasm: xmm2 ^= xmm1 +# asm 1: pxor xmm3=int6464#3 +# asm 2: movdqa xmm3=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: xmm3 ^= xmm6 +# asm 1: pxor xmm0=int6464#1 +# asm 2: pshufd $0x93,xmm0=%xmm0 +pshufd $0x93,%xmm8,%xmm0 + +# qhasm: xmm1 = shuffle dwords of xmm9 by 0x93 +# asm 1: pshufd $0x93,xmm1=int6464#2 +# asm 2: pshufd $0x93,xmm1=%xmm1 +pshufd $0x93,%xmm9,%xmm1 + +# qhasm: xmm2 = shuffle dwords of xmm12 by 0x93 +# asm 1: pshufd $0x93,xmm2=int6464#3 +# asm 2: pshufd $0x93,xmm2=%xmm2 +pshufd $0x93,%xmm12,%xmm2 + +# qhasm: xmm3 = shuffle dwords of xmm14 by 0x93 +# asm 1: pshufd $0x93,xmm3=int6464#4 +# asm 2: pshufd $0x93,xmm3=%xmm3 +pshufd $0x93,%xmm14,%xmm3 + +# qhasm: xmm4 = shuffle dwords of xmm11 by 0x93 +# asm 1: pshufd $0x93,xmm4=int6464#5 +# asm 2: pshufd $0x93,xmm4=%xmm4 +pshufd $0x93,%xmm11,%xmm4 + +# qhasm: xmm5 = shuffle dwords of xmm15 by 0x93 +# asm 1: pshufd $0x93,xmm5=int6464#6 +# asm 2: pshufd $0x93,xmm5=%xmm5 +pshufd $0x93,%xmm15,%xmm5 + +# qhasm: xmm6 = shuffle dwords of xmm10 by 0x93 +# asm 1: pshufd $0x93,xmm6=int6464#7 +# asm 2: pshufd $0x93,xmm6=%xmm6 +pshufd $0x93,%xmm10,%xmm6 + +# qhasm: xmm7 = shuffle dwords of xmm13 by 0x93 +# asm 1: pshufd $0x93,xmm7=int6464#8 +# asm 2: pshufd $0x93,xmm7=%xmm7 +pshufd $0x93,%xmm13,%xmm7 + +# qhasm: xmm8 ^= xmm0 +# asm 1: pxor xmm8=int6464#9 +# asm 2: pshufd $0x4E,xmm8=%xmm8 +pshufd $0x4E,%xmm8,%xmm8 + +# qhasm: xmm9 = shuffle dwords of xmm9 by 0x4E +# asm 1: pshufd $0x4E,xmm9=int6464#10 +# asm 2: pshufd $0x4E,xmm9=%xmm9 +pshufd $0x4E,%xmm9,%xmm9 + +# qhasm: xmm12 = shuffle dwords of xmm12 by 0x4E +# asm 1: pshufd $0x4E,xmm12=int6464#13 +# asm 2: pshufd $0x4E,xmm12=%xmm12 +pshufd $0x4E,%xmm12,%xmm12 + +# qhasm: xmm14 = shuffle dwords of xmm14 by 0x4E +# asm 1: pshufd $0x4E,xmm14=int6464#15 +# asm 2: pshufd $0x4E,xmm14=%xmm14 +pshufd $0x4E,%xmm14,%xmm14 + +# qhasm: xmm11 = shuffle dwords of xmm11 by 0x4E +# asm 1: pshufd $0x4E,xmm11=int6464#12 +# asm 2: pshufd $0x4E,xmm11=%xmm11 +pshufd $0x4E,%xmm11,%xmm11 + +# qhasm: xmm15 = shuffle dwords of xmm15 by 0x4E +# asm 1: pshufd $0x4E,xmm15=int6464#16 +# asm 2: pshufd $0x4E,xmm15=%xmm15 +pshufd $0x4E,%xmm15,%xmm15 + +# qhasm: xmm10 = shuffle dwords of xmm10 by 0x4E +# asm 1: pshufd $0x4E,xmm10=int6464#11 +# asm 2: pshufd $0x4E,xmm10=%xmm10 +pshufd $0x4E,%xmm10,%xmm10 + +# qhasm: xmm13 = shuffle dwords of xmm13 by 0x4E +# asm 1: pshufd $0x4E,xmm13=int6464#14 +# asm 2: pshufd $0x4E,xmm13=%xmm13 +pshufd $0x4E,%xmm13,%xmm13 + +# qhasm: xmm0 ^= xmm8 +# asm 1: pxor xmm11=int6464#9 +# asm 2: movdqa xmm11=%xmm8 +movdqa %xmm7,%xmm8 + +# qhasm: xmm10 = xmm1 +# asm 1: movdqa xmm10=int6464#10 +# asm 2: movdqa xmm10=%xmm9 +movdqa %xmm1,%xmm9 + +# qhasm: xmm9 = xmm5 +# asm 1: movdqa xmm9=int6464#11 +# asm 2: movdqa xmm9=%xmm10 +movdqa %xmm5,%xmm10 + +# qhasm: xmm13 = xmm2 +# asm 1: movdqa xmm13=int6464#12 +# asm 2: movdqa xmm13=%xmm11 +movdqa %xmm2,%xmm11 + +# qhasm: xmm12 = xmm6 +# asm 1: movdqa xmm12=int6464#13 +# asm 2: movdqa xmm12=%xmm12 +movdqa %xmm6,%xmm12 + +# qhasm: xmm11 ^= xmm4 +# asm 1: pxor xmm14=int6464#14 +# asm 2: movdqa xmm14=%xmm13 +movdqa %xmm8,%xmm13 + +# qhasm: xmm8 = xmm10 +# asm 1: movdqa xmm8=int6464#15 +# asm 2: movdqa xmm8=%xmm14 +movdqa %xmm9,%xmm14 + +# qhasm: xmm15 = xmm11 +# asm 1: movdqa xmm15=int6464#16 +# asm 2: movdqa xmm15=%xmm15 +movdqa %xmm8,%xmm15 + +# qhasm: xmm10 |= xmm9 +# asm 1: por xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm3,%xmm10 + +# qhasm: xmm12 ^= xmm0 +# asm 1: pxor xmm13=int6464#11 +# asm 2: movdqa xmm13=%xmm10 +movdqa %xmm7,%xmm10 + +# qhasm: xmm13 ^= xmm1 +# asm 1: pxor xmm12=int6464#12 +# asm 2: movdqa xmm12=%xmm11 +movdqa %xmm5,%xmm11 + +# qhasm: xmm9 = xmm13 +# asm 1: movdqa xmm9=int6464#13 +# asm 2: movdqa xmm9=%xmm12 +movdqa %xmm10,%xmm12 + +# qhasm: xmm12 ^= xmm6 +# asm 1: pxor xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm2,%xmm10 + +# qhasm: xmm13 = xmm4 +# asm 1: movdqa xmm13=int6464#12 +# asm 2: movdqa xmm13=%xmm11 +movdqa %xmm4,%xmm11 + +# qhasm: xmm14 = xmm1 +# asm 1: movdqa xmm14=int6464#14 +# asm 2: movdqa xmm14=%xmm13 +movdqa %xmm1,%xmm13 + +# qhasm: xmm15 = xmm7 +# asm 1: movdqa xmm15=int6464#16 +# asm 2: movdqa xmm15=%xmm15 +movdqa %xmm7,%xmm15 + +# qhasm: xmm12 &= xmm3 +# asm 1: pand xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm8,%xmm10 + +# qhasm: xmm12 ^= xmm10 +# asm 1: pxor xmm14=int6464#12 +# asm 2: movdqa xmm14=%xmm11 +movdqa %xmm14,%xmm11 + +# qhasm: xmm14 ^= xmm11 +# asm 1: pxor xmm15=int6464#14 +# asm 2: movdqa xmm15=%xmm13 +movdqa %xmm10,%xmm13 + +# qhasm: xmm15 &= xmm14 +# asm 1: pand xmm13=int6464#16 +# asm 2: movdqa xmm13=%xmm15 +movdqa %xmm12,%xmm15 + +# qhasm: xmm13 ^= xmm8 +# asm 1: pxor xmm10=int6464#9 +# asm 2: movdqa xmm10=%xmm8 +movdqa %xmm11,%xmm8 + +# qhasm: xmm10 ^= xmm13 +# asm 1: pxor xmm12=int6464#9 +# asm 2: movdqa xmm12=%xmm8 +movdqa %xmm6,%xmm8 + +# qhasm: xmm8 = xmm5 +# asm 1: movdqa xmm8=int6464#10 +# asm 2: movdqa xmm8=%xmm9 +movdqa %xmm5,%xmm9 + +# qhasm: xmm10 = xmm15 +# asm 1: movdqa xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm10 ^= xmm14 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm15,%xmm10 + +# qhasm: xmm10 ^= xmm9 +# asm 1: pxor xmm12=int6464#9 +# asm 2: movdqa xmm12=%xmm8 +movdqa %xmm7,%xmm8 + +# qhasm: xmm8 = xmm1 +# asm 1: movdqa xmm8=int6464#10 +# asm 2: movdqa xmm8=%xmm9 +movdqa %xmm1,%xmm9 + +# qhasm: xmm12 ^= xmm4 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm15,%xmm10 + +# qhasm: xmm10 ^= xmm9 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm8=int6464#9 +# asm 2: pshufd $0x93,xmm8=%xmm8 +pshufd $0x93,%xmm0,%xmm8 + +# qhasm: xmm9 = shuffle dwords of xmm1 by 0x93 +# asm 1: pshufd $0x93,xmm9=int6464#10 +# asm 2: pshufd $0x93,xmm9=%xmm9 +pshufd $0x93,%xmm1,%xmm9 + +# qhasm: xmm10 = shuffle dwords of xmm4 by 0x93 +# asm 1: pshufd $0x93,xmm10=int6464#11 +# asm 2: pshufd $0x93,xmm10=%xmm10 +pshufd $0x93,%xmm4,%xmm10 + +# qhasm: xmm11 = shuffle dwords of xmm6 by 0x93 +# asm 1: pshufd $0x93,xmm11=int6464#12 +# asm 2: pshufd $0x93,xmm11=%xmm11 +pshufd $0x93,%xmm6,%xmm11 + +# qhasm: xmm12 = shuffle dwords of xmm3 by 0x93 +# asm 1: pshufd $0x93,xmm12=int6464#13 +# asm 2: pshufd $0x93,xmm12=%xmm12 +pshufd $0x93,%xmm3,%xmm12 + +# qhasm: xmm13 = shuffle dwords of xmm7 by 0x93 +# asm 1: pshufd $0x93,xmm13=int6464#14 +# asm 2: pshufd $0x93,xmm13=%xmm13 +pshufd $0x93,%xmm7,%xmm13 + +# qhasm: xmm14 = shuffle dwords of xmm2 by 0x93 +# asm 1: pshufd $0x93,xmm14=int6464#15 +# asm 2: pshufd $0x93,xmm14=%xmm14 +pshufd $0x93,%xmm2,%xmm14 + +# qhasm: xmm15 = shuffle dwords of xmm5 by 0x93 +# asm 1: pshufd $0x93,xmm15=int6464#16 +# asm 2: pshufd $0x93,xmm15=%xmm15 +pshufd $0x93,%xmm5,%xmm15 + +# qhasm: xmm0 ^= xmm8 +# asm 1: pxor xmm0=int6464#1 +# asm 2: pshufd $0x4E,xmm0=%xmm0 +pshufd $0x4E,%xmm0,%xmm0 + +# qhasm: xmm1 = shuffle dwords of xmm1 by 0x4E +# asm 1: pshufd $0x4E,xmm1=int6464#2 +# asm 2: pshufd $0x4E,xmm1=%xmm1 +pshufd $0x4E,%xmm1,%xmm1 + +# qhasm: xmm4 = shuffle dwords of xmm4 by 0x4E +# asm 1: pshufd $0x4E,xmm4=int6464#5 +# asm 2: pshufd $0x4E,xmm4=%xmm4 +pshufd $0x4E,%xmm4,%xmm4 + +# qhasm: xmm6 = shuffle dwords of xmm6 by 0x4E +# asm 1: pshufd $0x4E,xmm6=int6464#7 +# asm 2: pshufd $0x4E,xmm6=%xmm6 +pshufd $0x4E,%xmm6,%xmm6 + +# qhasm: xmm3 = shuffle dwords of xmm3 by 0x4E +# asm 1: pshufd $0x4E,xmm3=int6464#4 +# asm 2: pshufd $0x4E,xmm3=%xmm3 +pshufd $0x4E,%xmm3,%xmm3 + +# qhasm: xmm7 = shuffle dwords of xmm7 by 0x4E +# asm 1: pshufd $0x4E,xmm7=int6464#8 +# asm 2: pshufd $0x4E,xmm7=%xmm7 +pshufd $0x4E,%xmm7,%xmm7 + +# qhasm: xmm2 = shuffle dwords of xmm2 by 0x4E +# asm 1: pshufd $0x4E,xmm2=int6464#3 +# asm 2: pshufd $0x4E,xmm2=%xmm2 +pshufd $0x4E,%xmm2,%xmm2 + +# qhasm: xmm5 = shuffle dwords of xmm5 by 0x4E +# asm 1: pshufd $0x4E,xmm5=int6464#6 +# asm 2: pshufd $0x4E,xmm5=%xmm5 +pshufd $0x4E,%xmm5,%xmm5 + +# qhasm: xmm8 ^= xmm0 +# asm 1: pxor xmm3=int6464#1 +# asm 2: movdqa xmm3=%xmm0 +movdqa %xmm15,%xmm0 + +# qhasm: xmm2 = xmm9 +# asm 1: movdqa xmm2=int6464#2 +# asm 2: movdqa xmm2=%xmm1 +movdqa %xmm9,%xmm1 + +# qhasm: xmm1 = xmm13 +# asm 1: movdqa xmm1=int6464#3 +# asm 2: movdqa xmm1=%xmm2 +movdqa %xmm13,%xmm2 + +# qhasm: xmm5 = xmm10 +# asm 1: movdqa xmm5=int6464#4 +# asm 2: movdqa xmm5=%xmm3 +movdqa %xmm10,%xmm3 + +# qhasm: xmm4 = xmm14 +# asm 1: movdqa xmm4=int6464#5 +# asm 2: movdqa xmm4=%xmm4 +movdqa %xmm14,%xmm4 + +# qhasm: xmm3 ^= xmm12 +# asm 1: pxor xmm6=int6464#6 +# asm 2: movdqa xmm6=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: xmm0 = xmm2 +# asm 1: movdqa xmm0=int6464#7 +# asm 2: movdqa xmm0=%xmm6 +movdqa %xmm1,%xmm6 + +# qhasm: xmm7 = xmm3 +# asm 1: movdqa xmm7=int6464#8 +# asm 2: movdqa xmm7=%xmm7 +movdqa %xmm0,%xmm7 + +# qhasm: xmm2 |= xmm1 +# asm 1: por xmm4=int6464#3 +# asm 2: movdqa xmm4=%xmm2 +movdqa %xmm11,%xmm2 + +# qhasm: xmm4 ^= xmm8 +# asm 1: pxor xmm5=int6464#3 +# asm 2: movdqa xmm5=%xmm2 +movdqa %xmm15,%xmm2 + +# qhasm: xmm5 ^= xmm9 +# asm 1: pxor xmm4=int6464#4 +# asm 2: movdqa xmm4=%xmm3 +movdqa %xmm13,%xmm3 + +# qhasm: xmm1 = xmm5 +# asm 1: movdqa xmm1=int6464#5 +# asm 2: movdqa xmm1=%xmm4 +movdqa %xmm2,%xmm4 + +# qhasm: xmm4 ^= xmm14 +# asm 1: pxor xmm4=int6464#3 +# asm 2: movdqa xmm4=%xmm2 +movdqa %xmm10,%xmm2 + +# qhasm: xmm5 = xmm12 +# asm 1: movdqa xmm5=int6464#4 +# asm 2: movdqa xmm5=%xmm3 +movdqa %xmm12,%xmm3 + +# qhasm: xmm6 = xmm9 +# asm 1: movdqa xmm6=int6464#6 +# asm 2: movdqa xmm6=%xmm5 +movdqa %xmm9,%xmm5 + +# qhasm: xmm7 = xmm15 +# asm 1: movdqa xmm7=int6464#8 +# asm 2: movdqa xmm7=%xmm7 +movdqa %xmm15,%xmm7 + +# qhasm: xmm4 &= xmm11 +# asm 1: pand xmm4=int6464#3 +# asm 2: movdqa xmm4=%xmm2 +movdqa %xmm0,%xmm2 + +# qhasm: xmm4 ^= xmm2 +# asm 1: pxor xmm6=int6464#4 +# asm 2: movdqa xmm6=%xmm3 +movdqa %xmm6,%xmm3 + +# qhasm: xmm6 ^= xmm3 +# asm 1: pxor xmm7=int6464#6 +# asm 2: movdqa xmm7=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: xmm7 &= xmm6 +# asm 1: pand xmm5=int6464#8 +# asm 2: movdqa xmm5=%xmm7 +movdqa %xmm4,%xmm7 + +# qhasm: xmm5 ^= xmm0 +# asm 1: pxor xmm2=int6464#1 +# asm 2: movdqa xmm2=%xmm0 +movdqa %xmm3,%xmm0 + +# qhasm: xmm2 ^= xmm5 +# asm 1: pxor xmm4=int6464#1 +# asm 2: movdqa xmm4=%xmm0 +movdqa %xmm14,%xmm0 + +# qhasm: xmm0 = xmm13 +# asm 1: movdqa xmm0=int6464#2 +# asm 2: movdqa xmm0=%xmm1 +movdqa %xmm13,%xmm1 + +# qhasm: xmm2 = xmm7 +# asm 1: movdqa xmm2=int6464#3 +# asm 2: movdqa xmm2=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: xmm2 ^= xmm6 +# asm 1: pxor xmm3=int6464#3 +# asm 2: movdqa xmm3=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: xmm3 ^= xmm6 +# asm 1: pxor xmm2=int6464#3 +# asm 2: movdqa xmm2=%xmm2 +movdqa %xmm7,%xmm2 + +# qhasm: xmm2 ^= xmm1 +# asm 1: pxor xmm4=int6464#1 +# asm 2: movdqa xmm4=%xmm0 +movdqa %xmm15,%xmm0 + +# qhasm: xmm0 = xmm9 +# asm 1: movdqa xmm0=int6464#2 +# asm 2: movdqa xmm0=%xmm1 +movdqa %xmm9,%xmm1 + +# qhasm: xmm4 ^= xmm12 +# asm 1: pxor xmm3=int6464#3 +# asm 2: movdqa xmm3=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: xmm3 ^= xmm6 +# asm 1: pxor xmm2=int6464#3 +# asm 2: movdqa xmm2=%xmm2 +movdqa %xmm7,%xmm2 + +# qhasm: xmm2 ^= xmm1 +# asm 1: pxor xmm3=int6464#3 +# asm 2: movdqa xmm3=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: xmm3 ^= xmm6 +# asm 1: pxor xmm0=int6464#1 +# asm 2: movdqa xmm0=%xmm0 +movdqa %xmm10,%xmm0 + +# qhasm: uint6464 xmm0 >>= 1 +# asm 1: psrlq $1,xmm0=int6464#1 +# asm 2: movdqa xmm0=%xmm0 +movdqa %xmm11,%xmm0 + +# qhasm: uint6464 xmm0 >>= 1 +# asm 1: psrlq $1,xmm0=int6464#1 +# asm 2: movdqa xmm0=%xmm0 +movdqa %xmm12,%xmm0 + +# qhasm: uint6464 xmm0 >>= 1 +# asm 1: psrlq $1,xmm0=int6464#1 +# asm 2: movdqa xmm0=%xmm0 +movdqa %xmm8,%xmm0 + +# qhasm: uint6464 xmm0 >>= 1 +# asm 1: psrlq $1,xmm0=int6464#1 +# asm 2: movdqa xmm0=%xmm0 +movdqa %xmm15,%xmm0 + +# qhasm: uint6464 xmm0 >>= 2 +# asm 1: psrlq $2,xmm0=int6464#1 +# asm 2: movdqa xmm0=%xmm0 +movdqa %xmm11,%xmm0 + +# qhasm: uint6464 xmm0 >>= 2 +# asm 1: psrlq $2,xmm0=int6464#1 +# asm 2: movdqa xmm0=%xmm0 +movdqa %xmm9,%xmm0 + +# qhasm: uint6464 xmm0 >>= 2 +# asm 1: psrlq $2,xmm0=int6464#1 +# asm 2: movdqa xmm0=%xmm0 +movdqa %xmm8,%xmm0 + +# qhasm: uint6464 xmm0 >>= 2 +# asm 1: psrlq $2,xmm0=int6464#1 +# asm 2: movdqa xmm0=%xmm0 +movdqa %xmm14,%xmm0 + +# qhasm: uint6464 xmm0 >>= 4 +# asm 1: psrlq $4,xmm0=int6464#1 +# asm 2: movdqa xmm0=%xmm0 +movdqa %xmm12,%xmm0 + +# qhasm: uint6464 xmm0 >>= 4 +# asm 1: psrlq $4,xmm0=int6464#1 +# asm 2: movdqa xmm0=%xmm0 +movdqa %xmm9,%xmm0 + +# qhasm: uint6464 xmm0 >>= 4 +# asm 1: psrlq $4,xmm0=int6464#1 +# asm 2: movdqa xmm0=%xmm0 +movdqa %xmm8,%xmm0 + +# qhasm: uint6464 xmm0 >>= 4 +# asm 1: psrlq $4,tmp=int64#5d +# asm 2: movl 12(tmp=%r8d +movl 12(%rdx),%r8d + +# qhasm: (uint32) bswap tmp +# asm 1: bswap lensav=int64#4 +# asm 2: mov lensav=%rcx +mov %rsi,%rcx + +# qhasm: (uint32) len >>= 4 +# asm 1: shr $4,tmp=int64#5d +# asm 2: movl 12(tmp=%r8d +movl 12(%rdx),%r8d + +# qhasm: (uint32) bswap tmp +# asm 1: bswap blp=int64#2 +# asm 2: leaq blp=%rsi +leaq 32(%rsp),%rsi + +# qhasm: *(int128 *)(blp + 0) = xmm8 +# asm 1: movdqa b=int64#3 +# asm 2: movzbq 0(b=%rdx +movzbq 0(%rsi),%rdx + +# qhasm: *(uint8 *)(outp + 0) = b +# asm 1: movb tmp=int64#4d +# asm 2: movl 12(tmp=%ecx +movl 12(%rdx),%ecx + +# qhasm: (uint32) bswap tmp +# asm 1: bswap c=int64#1 +# asm 2: mov c=%rdi +mov %rdi,%rdi + +# qhasm: k = arg2 +# asm 1: mov k=int64#2 +# asm 2: mov k=%rsi +mov %rsi,%rsi + +# qhasm: xmm0 = *(int128 *) (k + 0) +# asm 1: movdqa 0(xmm0=int6464#1 +# asm 2: movdqa 0(xmm0=%xmm0 +movdqa 0(%rsi),%xmm0 + +# qhasm: shuffle bytes of xmm0 by M0 +# asm 1: pshufb M0,xmm1=int6464#2 +# asm 2: movdqa xmm1=%xmm1 +movdqa %xmm0,%xmm1 + +# qhasm: xmm2 = xmm0 +# asm 1: movdqa xmm2=int6464#3 +# asm 2: movdqa xmm2=%xmm2 +movdqa %xmm0,%xmm2 + +# qhasm: xmm3 = xmm0 +# asm 1: movdqa xmm3=int6464#4 +# asm 2: movdqa xmm3=%xmm3 +movdqa %xmm0,%xmm3 + +# qhasm: xmm4 = xmm0 +# asm 1: movdqa xmm4=int6464#5 +# asm 2: movdqa xmm4=%xmm4 +movdqa %xmm0,%xmm4 + +# qhasm: xmm5 = xmm0 +# asm 1: movdqa xmm5=int6464#6 +# asm 2: movdqa xmm5=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: xmm6 = xmm0 +# asm 1: movdqa xmm6=int6464#7 +# asm 2: movdqa xmm6=%xmm6 +movdqa %xmm0,%xmm6 + +# qhasm: xmm7 = xmm0 +# asm 1: movdqa xmm7=int6464#8 +# asm 2: movdqa xmm7=%xmm7 +movdqa %xmm0,%xmm7 + +# qhasm: t = xmm6 +# asm 1: movdqa t=int6464#9 +# asm 2: movdqa t=%xmm8 +movdqa %xmm6,%xmm8 + +# qhasm: uint6464 t >>= 1 +# asm 1: psrlq $1,t=int6464#9 +# asm 2: movdqa t=%xmm8 +movdqa %xmm4,%xmm8 + +# qhasm: uint6464 t >>= 1 +# asm 1: psrlq $1,t=int6464#9 +# asm 2: movdqa t=%xmm8 +movdqa %xmm2,%xmm8 + +# qhasm: uint6464 t >>= 1 +# asm 1: psrlq $1,t=int6464#9 +# asm 2: movdqa t=%xmm8 +movdqa %xmm0,%xmm8 + +# qhasm: uint6464 t >>= 1 +# asm 1: psrlq $1,t=int6464#9 +# asm 2: movdqa t=%xmm8 +movdqa %xmm5,%xmm8 + +# qhasm: uint6464 t >>= 2 +# asm 1: psrlq $2,t=int6464#9 +# asm 2: movdqa t=%xmm8 +movdqa %xmm4,%xmm8 + +# qhasm: uint6464 t >>= 2 +# asm 1: psrlq $2,t=int6464#9 +# asm 2: movdqa t=%xmm8 +movdqa %xmm1,%xmm8 + +# qhasm: uint6464 t >>= 2 +# asm 1: psrlq $2,t=int6464#9 +# asm 2: movdqa t=%xmm8 +movdqa %xmm0,%xmm8 + +# qhasm: uint6464 t >>= 2 +# asm 1: psrlq $2,t=int6464#9 +# asm 2: movdqa t=%xmm8 +movdqa %xmm3,%xmm8 + +# qhasm: uint6464 t >>= 4 +# asm 1: psrlq $4,t=int6464#9 +# asm 2: movdqa t=%xmm8 +movdqa %xmm2,%xmm8 + +# qhasm: uint6464 t >>= 4 +# asm 1: psrlq $4,t=int6464#9 +# asm 2: movdqa t=%xmm8 +movdqa %xmm1,%xmm8 + +# qhasm: uint6464 t >>= 4 +# asm 1: psrlq $4,t=int6464#9 +# asm 2: movdqa t=%xmm8 +movdqa %xmm0,%xmm8 + +# qhasm: uint6464 t >>= 4 +# asm 1: psrlq $4,xmm11=int6464#9 +# asm 2: movdqa xmm11=%xmm8 +movdqa %xmm7,%xmm8 + +# qhasm: xmm10 = xmm1 +# asm 1: movdqa xmm10=int6464#10 +# asm 2: movdqa xmm10=%xmm9 +movdqa %xmm1,%xmm9 + +# qhasm: xmm9 = xmm5 +# asm 1: movdqa xmm9=int6464#11 +# asm 2: movdqa xmm9=%xmm10 +movdqa %xmm5,%xmm10 + +# qhasm: xmm13 = xmm2 +# asm 1: movdqa xmm13=int6464#12 +# asm 2: movdqa xmm13=%xmm11 +movdqa %xmm2,%xmm11 + +# qhasm: xmm12 = xmm6 +# asm 1: movdqa xmm12=int6464#13 +# asm 2: movdqa xmm12=%xmm12 +movdqa %xmm6,%xmm12 + +# qhasm: xmm11 ^= xmm4 +# asm 1: pxor xmm14=int6464#14 +# asm 2: movdqa xmm14=%xmm13 +movdqa %xmm8,%xmm13 + +# qhasm: xmm8 = xmm10 +# asm 1: movdqa xmm8=int6464#15 +# asm 2: movdqa xmm8=%xmm14 +movdqa %xmm9,%xmm14 + +# qhasm: xmm15 = xmm11 +# asm 1: movdqa xmm15=int6464#16 +# asm 2: movdqa xmm15=%xmm15 +movdqa %xmm8,%xmm15 + +# qhasm: xmm10 |= xmm9 +# asm 1: por xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm3,%xmm10 + +# qhasm: xmm12 ^= xmm0 +# asm 1: pxor xmm13=int6464#11 +# asm 2: movdqa xmm13=%xmm10 +movdqa %xmm7,%xmm10 + +# qhasm: xmm13 ^= xmm1 +# asm 1: pxor xmm12=int6464#12 +# asm 2: movdqa xmm12=%xmm11 +movdqa %xmm5,%xmm11 + +# qhasm: xmm9 = xmm13 +# asm 1: movdqa xmm9=int6464#13 +# asm 2: movdqa xmm9=%xmm12 +movdqa %xmm10,%xmm12 + +# qhasm: xmm12 ^= xmm6 +# asm 1: pxor xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm2,%xmm10 + +# qhasm: xmm13 = xmm4 +# asm 1: movdqa xmm13=int6464#12 +# asm 2: movdqa xmm13=%xmm11 +movdqa %xmm4,%xmm11 + +# qhasm: xmm14 = xmm1 +# asm 1: movdqa xmm14=int6464#14 +# asm 2: movdqa xmm14=%xmm13 +movdqa %xmm1,%xmm13 + +# qhasm: xmm15 = xmm7 +# asm 1: movdqa xmm15=int6464#16 +# asm 2: movdqa xmm15=%xmm15 +movdqa %xmm7,%xmm15 + +# qhasm: xmm12 &= xmm3 +# asm 1: pand xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm8,%xmm10 + +# qhasm: xmm12 ^= xmm10 +# asm 1: pxor xmm14=int6464#12 +# asm 2: movdqa xmm14=%xmm11 +movdqa %xmm14,%xmm11 + +# qhasm: xmm14 ^= xmm11 +# asm 1: pxor xmm15=int6464#14 +# asm 2: movdqa xmm15=%xmm13 +movdqa %xmm10,%xmm13 + +# qhasm: xmm15 &= xmm14 +# asm 1: pand xmm13=int6464#16 +# asm 2: movdqa xmm13=%xmm15 +movdqa %xmm12,%xmm15 + +# qhasm: xmm13 ^= xmm8 +# asm 1: pxor xmm10=int6464#9 +# asm 2: movdqa xmm10=%xmm8 +movdqa %xmm11,%xmm8 + +# qhasm: xmm10 ^= xmm13 +# asm 1: pxor xmm12=int6464#9 +# asm 2: movdqa xmm12=%xmm8 +movdqa %xmm6,%xmm8 + +# qhasm: xmm8 = xmm5 +# asm 1: movdqa xmm8=int6464#10 +# asm 2: movdqa xmm8=%xmm9 +movdqa %xmm5,%xmm9 + +# qhasm: xmm10 = xmm15 +# asm 1: movdqa xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm10 ^= xmm14 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm15,%xmm10 + +# qhasm: xmm10 ^= xmm9 +# asm 1: pxor xmm12=int6464#9 +# asm 2: movdqa xmm12=%xmm8 +movdqa %xmm7,%xmm8 + +# qhasm: xmm8 = xmm1 +# asm 1: movdqa xmm8=int6464#10 +# asm 2: movdqa xmm8=%xmm9 +movdqa %xmm1,%xmm9 + +# qhasm: xmm12 ^= xmm4 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm15,%xmm10 + +# qhasm: xmm10 ^= xmm9 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm8=int6464#9 +# asm 2: movdqa 0(xmm8=%xmm8 +movdqa 0(%rdi),%xmm8 + +# qhasm: xmm9 = *(int128 *)(c + 16) +# asm 1: movdqa 16(xmm9=int6464#10 +# asm 2: movdqa 16(xmm9=%xmm9 +movdqa 16(%rdi),%xmm9 + +# qhasm: xmm10 = *(int128 *)(c + 32) +# asm 1: movdqa 32(xmm10=int6464#11 +# asm 2: movdqa 32(xmm10=%xmm10 +movdqa 32(%rdi),%xmm10 + +# qhasm: xmm11 = *(int128 *)(c + 48) +# asm 1: movdqa 48(xmm11=int6464#12 +# asm 2: movdqa 48(xmm11=%xmm11 +movdqa 48(%rdi),%xmm11 + +# qhasm: xmm12 = *(int128 *)(c + 64) +# asm 1: movdqa 64(xmm12=int6464#13 +# asm 2: movdqa 64(xmm12=%xmm12 +movdqa 64(%rdi),%xmm12 + +# qhasm: xmm13 = *(int128 *)(c + 80) +# asm 1: movdqa 80(xmm13=int6464#14 +# asm 2: movdqa 80(xmm13=%xmm13 +movdqa 80(%rdi),%xmm13 + +# qhasm: xmm14 = *(int128 *)(c + 96) +# asm 1: movdqa 96(xmm14=int6464#15 +# asm 2: movdqa 96(xmm14=%xmm14 +movdqa 96(%rdi),%xmm14 + +# qhasm: xmm15 = *(int128 *)(c + 112) +# asm 1: movdqa 112(xmm15=int6464#16 +# asm 2: movdqa 112(xmm15=%xmm15 +movdqa 112(%rdi),%xmm15 + +# qhasm: xmm0 ^= xmm8 +# asm 1: pxor >= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,xmm11=int6464#9 +# asm 2: movdqa xmm11=%xmm8 +movdqa %xmm5,%xmm8 + +# qhasm: xmm10 = xmm1 +# asm 1: movdqa xmm10=int6464#10 +# asm 2: movdqa xmm10=%xmm9 +movdqa %xmm1,%xmm9 + +# qhasm: xmm9 = xmm7 +# asm 1: movdqa xmm9=int6464#11 +# asm 2: movdqa xmm9=%xmm10 +movdqa %xmm7,%xmm10 + +# qhasm: xmm13 = xmm4 +# asm 1: movdqa xmm13=int6464#12 +# asm 2: movdqa xmm13=%xmm11 +movdqa %xmm4,%xmm11 + +# qhasm: xmm12 = xmm2 +# asm 1: movdqa xmm12=int6464#13 +# asm 2: movdqa xmm12=%xmm12 +movdqa %xmm2,%xmm12 + +# qhasm: xmm11 ^= xmm3 +# asm 1: pxor xmm14=int6464#14 +# asm 2: movdqa xmm14=%xmm13 +movdqa %xmm8,%xmm13 + +# qhasm: xmm8 = xmm10 +# asm 1: movdqa xmm8=int6464#15 +# asm 2: movdqa xmm8=%xmm14 +movdqa %xmm9,%xmm14 + +# qhasm: xmm15 = xmm11 +# asm 1: movdqa xmm15=int6464#16 +# asm 2: movdqa xmm15=%xmm15 +movdqa %xmm8,%xmm15 + +# qhasm: xmm10 |= xmm9 +# asm 1: por xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm6,%xmm10 + +# qhasm: xmm12 ^= xmm0 +# asm 1: pxor xmm13=int6464#11 +# asm 2: movdqa xmm13=%xmm10 +movdqa %xmm5,%xmm10 + +# qhasm: xmm13 ^= xmm1 +# asm 1: pxor xmm12=int6464#12 +# asm 2: movdqa xmm12=%xmm11 +movdqa %xmm7,%xmm11 + +# qhasm: xmm9 = xmm13 +# asm 1: movdqa xmm9=int6464#13 +# asm 2: movdqa xmm9=%xmm12 +movdqa %xmm10,%xmm12 + +# qhasm: xmm12 ^= xmm2 +# asm 1: pxor xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm4,%xmm10 + +# qhasm: xmm13 = xmm3 +# asm 1: movdqa xmm13=int6464#12 +# asm 2: movdqa xmm13=%xmm11 +movdqa %xmm3,%xmm11 + +# qhasm: xmm14 = xmm1 +# asm 1: movdqa xmm14=int6464#14 +# asm 2: movdqa xmm14=%xmm13 +movdqa %xmm1,%xmm13 + +# qhasm: xmm15 = xmm5 +# asm 1: movdqa xmm15=int6464#16 +# asm 2: movdqa xmm15=%xmm15 +movdqa %xmm5,%xmm15 + +# qhasm: xmm12 &= xmm6 +# asm 1: pand xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm8,%xmm10 + +# qhasm: xmm12 ^= xmm10 +# asm 1: pxor xmm14=int6464#12 +# asm 2: movdqa xmm14=%xmm11 +movdqa %xmm14,%xmm11 + +# qhasm: xmm14 ^= xmm11 +# asm 1: pxor xmm15=int6464#14 +# asm 2: movdqa xmm15=%xmm13 +movdqa %xmm10,%xmm13 + +# qhasm: xmm15 &= xmm14 +# asm 1: pand xmm13=int6464#16 +# asm 2: movdqa xmm13=%xmm15 +movdqa %xmm12,%xmm15 + +# qhasm: xmm13 ^= xmm8 +# asm 1: pxor xmm10=int6464#9 +# asm 2: movdqa xmm10=%xmm8 +movdqa %xmm11,%xmm8 + +# qhasm: xmm10 ^= xmm13 +# asm 1: pxor xmm12=int6464#9 +# asm 2: movdqa xmm12=%xmm8 +movdqa %xmm2,%xmm8 + +# qhasm: xmm8 = xmm7 +# asm 1: movdqa xmm8=int6464#10 +# asm 2: movdqa xmm8=%xmm9 +movdqa %xmm7,%xmm9 + +# qhasm: xmm10 = xmm15 +# asm 1: movdqa xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm10 ^= xmm14 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm15,%xmm10 + +# qhasm: xmm10 ^= xmm9 +# asm 1: pxor xmm12=int6464#9 +# asm 2: movdqa xmm12=%xmm8 +movdqa %xmm5,%xmm8 + +# qhasm: xmm8 = xmm1 +# asm 1: movdqa xmm8=int6464#10 +# asm 2: movdqa xmm8=%xmm9 +movdqa %xmm1,%xmm9 + +# qhasm: xmm12 ^= xmm3 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm15,%xmm10 + +# qhasm: xmm10 ^= xmm9 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm8=int6464#9 +# asm 2: movdqa 128(xmm8=%xmm8 +movdqa 128(%rdi),%xmm8 + +# qhasm: xmm9 = *(int128 *)(c + 144) +# asm 1: movdqa 144(xmm9=int6464#10 +# asm 2: movdqa 144(xmm9=%xmm9 +movdqa 144(%rdi),%xmm9 + +# qhasm: xmm10 = *(int128 *)(c + 160) +# asm 1: movdqa 160(xmm10=int6464#11 +# asm 2: movdqa 160(xmm10=%xmm10 +movdqa 160(%rdi),%xmm10 + +# qhasm: xmm11 = *(int128 *)(c + 176) +# asm 1: movdqa 176(xmm11=int6464#12 +# asm 2: movdqa 176(xmm11=%xmm11 +movdqa 176(%rdi),%xmm11 + +# qhasm: xmm12 = *(int128 *)(c + 192) +# asm 1: movdqa 192(xmm12=int6464#13 +# asm 2: movdqa 192(xmm12=%xmm12 +movdqa 192(%rdi),%xmm12 + +# qhasm: xmm13 = *(int128 *)(c + 208) +# asm 1: movdqa 208(xmm13=int6464#14 +# asm 2: movdqa 208(xmm13=%xmm13 +movdqa 208(%rdi),%xmm13 + +# qhasm: xmm14 = *(int128 *)(c + 224) +# asm 1: movdqa 224(xmm14=int6464#15 +# asm 2: movdqa 224(xmm14=%xmm14 +movdqa 224(%rdi),%xmm14 + +# qhasm: xmm15 = *(int128 *)(c + 240) +# asm 1: movdqa 240(xmm15=int6464#16 +# asm 2: movdqa 240(xmm15=%xmm15 +movdqa 240(%rdi),%xmm15 + +# qhasm: xmm8 ^= ONE +# asm 1: pxor ONE,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,xmm11=int6464#9 +# asm 2: movdqa xmm11=%xmm8 +movdqa %xmm7,%xmm8 + +# qhasm: xmm10 = xmm1 +# asm 1: movdqa xmm10=int6464#10 +# asm 2: movdqa xmm10=%xmm9 +movdqa %xmm1,%xmm9 + +# qhasm: xmm9 = xmm5 +# asm 1: movdqa xmm9=int6464#11 +# asm 2: movdqa xmm9=%xmm10 +movdqa %xmm5,%xmm10 + +# qhasm: xmm13 = xmm3 +# asm 1: movdqa xmm13=int6464#12 +# asm 2: movdqa xmm13=%xmm11 +movdqa %xmm3,%xmm11 + +# qhasm: xmm12 = xmm4 +# asm 1: movdqa xmm12=int6464#13 +# asm 2: movdqa xmm12=%xmm12 +movdqa %xmm4,%xmm12 + +# qhasm: xmm11 ^= xmm6 +# asm 1: pxor xmm14=int6464#14 +# asm 2: movdqa xmm14=%xmm13 +movdqa %xmm8,%xmm13 + +# qhasm: xmm8 = xmm10 +# asm 1: movdqa xmm8=int6464#15 +# asm 2: movdqa xmm8=%xmm14 +movdqa %xmm9,%xmm14 + +# qhasm: xmm15 = xmm11 +# asm 1: movdqa xmm15=int6464#16 +# asm 2: movdqa xmm15=%xmm15 +movdqa %xmm8,%xmm15 + +# qhasm: xmm10 |= xmm9 +# asm 1: por xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm2,%xmm10 + +# qhasm: xmm12 ^= xmm0 +# asm 1: pxor xmm13=int6464#11 +# asm 2: movdqa xmm13=%xmm10 +movdqa %xmm7,%xmm10 + +# qhasm: xmm13 ^= xmm1 +# asm 1: pxor xmm12=int6464#12 +# asm 2: movdqa xmm12=%xmm11 +movdqa %xmm5,%xmm11 + +# qhasm: xmm9 = xmm13 +# asm 1: movdqa xmm9=int6464#13 +# asm 2: movdqa xmm9=%xmm12 +movdqa %xmm10,%xmm12 + +# qhasm: xmm12 ^= xmm4 +# asm 1: pxor xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm3,%xmm10 + +# qhasm: xmm13 = xmm6 +# asm 1: movdqa xmm13=int6464#12 +# asm 2: movdqa xmm13=%xmm11 +movdqa %xmm6,%xmm11 + +# qhasm: xmm14 = xmm1 +# asm 1: movdqa xmm14=int6464#14 +# asm 2: movdqa xmm14=%xmm13 +movdqa %xmm1,%xmm13 + +# qhasm: xmm15 = xmm7 +# asm 1: movdqa xmm15=int6464#16 +# asm 2: movdqa xmm15=%xmm15 +movdqa %xmm7,%xmm15 + +# qhasm: xmm12 &= xmm2 +# asm 1: pand xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm8,%xmm10 + +# qhasm: xmm12 ^= xmm10 +# asm 1: pxor xmm14=int6464#12 +# asm 2: movdqa xmm14=%xmm11 +movdqa %xmm14,%xmm11 + +# qhasm: xmm14 ^= xmm11 +# asm 1: pxor xmm15=int6464#14 +# asm 2: movdqa xmm15=%xmm13 +movdqa %xmm10,%xmm13 + +# qhasm: xmm15 &= xmm14 +# asm 1: pand xmm13=int6464#16 +# asm 2: movdqa xmm13=%xmm15 +movdqa %xmm12,%xmm15 + +# qhasm: xmm13 ^= xmm8 +# asm 1: pxor xmm10=int6464#9 +# asm 2: movdqa xmm10=%xmm8 +movdqa %xmm11,%xmm8 + +# qhasm: xmm10 ^= xmm13 +# asm 1: pxor xmm12=int6464#9 +# asm 2: movdqa xmm12=%xmm8 +movdqa %xmm4,%xmm8 + +# qhasm: xmm8 = xmm5 +# asm 1: movdqa xmm8=int6464#10 +# asm 2: movdqa xmm8=%xmm9 +movdqa %xmm5,%xmm9 + +# qhasm: xmm10 = xmm15 +# asm 1: movdqa xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm10 ^= xmm14 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm15,%xmm10 + +# qhasm: xmm10 ^= xmm9 +# asm 1: pxor xmm12=int6464#9 +# asm 2: movdqa xmm12=%xmm8 +movdqa %xmm7,%xmm8 + +# qhasm: xmm8 = xmm1 +# asm 1: movdqa xmm8=int6464#10 +# asm 2: movdqa xmm8=%xmm9 +movdqa %xmm1,%xmm9 + +# qhasm: xmm12 ^= xmm6 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm15,%xmm10 + +# qhasm: xmm10 ^= xmm9 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm8=int6464#9 +# asm 2: movdqa 256(xmm8=%xmm8 +movdqa 256(%rdi),%xmm8 + +# qhasm: xmm9 = *(int128 *)(c + 272) +# asm 1: movdqa 272(xmm9=int6464#10 +# asm 2: movdqa 272(xmm9=%xmm9 +movdqa 272(%rdi),%xmm9 + +# qhasm: xmm10 = *(int128 *)(c + 288) +# asm 1: movdqa 288(xmm10=int6464#11 +# asm 2: movdqa 288(xmm10=%xmm10 +movdqa 288(%rdi),%xmm10 + +# qhasm: xmm11 = *(int128 *)(c + 304) +# asm 1: movdqa 304(xmm11=int6464#12 +# asm 2: movdqa 304(xmm11=%xmm11 +movdqa 304(%rdi),%xmm11 + +# qhasm: xmm12 = *(int128 *)(c + 320) +# asm 1: movdqa 320(xmm12=int6464#13 +# asm 2: movdqa 320(xmm12=%xmm12 +movdqa 320(%rdi),%xmm12 + +# qhasm: xmm13 = *(int128 *)(c + 336) +# asm 1: movdqa 336(xmm13=int6464#14 +# asm 2: movdqa 336(xmm13=%xmm13 +movdqa 336(%rdi),%xmm13 + +# qhasm: xmm14 = *(int128 *)(c + 352) +# asm 1: movdqa 352(xmm14=int6464#15 +# asm 2: movdqa 352(xmm14=%xmm14 +movdqa 352(%rdi),%xmm14 + +# qhasm: xmm15 = *(int128 *)(c + 368) +# asm 1: movdqa 368(xmm15=int6464#16 +# asm 2: movdqa 368(xmm15=%xmm15 +movdqa 368(%rdi),%xmm15 + +# qhasm: xmm8 ^= ONE +# asm 1: pxor ONE,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,xmm11=int6464#9 +# asm 2: movdqa xmm11=%xmm8 +movdqa %xmm5,%xmm8 + +# qhasm: xmm10 = xmm1 +# asm 1: movdqa xmm10=int6464#10 +# asm 2: movdqa xmm10=%xmm9 +movdqa %xmm1,%xmm9 + +# qhasm: xmm9 = xmm7 +# asm 1: movdqa xmm9=int6464#11 +# asm 2: movdqa xmm9=%xmm10 +movdqa %xmm7,%xmm10 + +# qhasm: xmm13 = xmm6 +# asm 1: movdqa xmm13=int6464#12 +# asm 2: movdqa xmm13=%xmm11 +movdqa %xmm6,%xmm11 + +# qhasm: xmm12 = xmm3 +# asm 1: movdqa xmm12=int6464#13 +# asm 2: movdqa xmm12=%xmm12 +movdqa %xmm3,%xmm12 + +# qhasm: xmm11 ^= xmm2 +# asm 1: pxor xmm14=int6464#14 +# asm 2: movdqa xmm14=%xmm13 +movdqa %xmm8,%xmm13 + +# qhasm: xmm8 = xmm10 +# asm 1: movdqa xmm8=int6464#15 +# asm 2: movdqa xmm8=%xmm14 +movdqa %xmm9,%xmm14 + +# qhasm: xmm15 = xmm11 +# asm 1: movdqa xmm15=int6464#16 +# asm 2: movdqa xmm15=%xmm15 +movdqa %xmm8,%xmm15 + +# qhasm: xmm10 |= xmm9 +# asm 1: por xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm4,%xmm10 + +# qhasm: xmm12 ^= xmm0 +# asm 1: pxor xmm13=int6464#11 +# asm 2: movdqa xmm13=%xmm10 +movdqa %xmm5,%xmm10 + +# qhasm: xmm13 ^= xmm1 +# asm 1: pxor xmm12=int6464#12 +# asm 2: movdqa xmm12=%xmm11 +movdqa %xmm7,%xmm11 + +# qhasm: xmm9 = xmm13 +# asm 1: movdqa xmm9=int6464#13 +# asm 2: movdqa xmm9=%xmm12 +movdqa %xmm10,%xmm12 + +# qhasm: xmm12 ^= xmm3 +# asm 1: pxor xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm6,%xmm10 + +# qhasm: xmm13 = xmm2 +# asm 1: movdqa xmm13=int6464#12 +# asm 2: movdqa xmm13=%xmm11 +movdqa %xmm2,%xmm11 + +# qhasm: xmm14 = xmm1 +# asm 1: movdqa xmm14=int6464#14 +# asm 2: movdqa xmm14=%xmm13 +movdqa %xmm1,%xmm13 + +# qhasm: xmm15 = xmm5 +# asm 1: movdqa xmm15=int6464#16 +# asm 2: movdqa xmm15=%xmm15 +movdqa %xmm5,%xmm15 + +# qhasm: xmm12 &= xmm4 +# asm 1: pand xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm8,%xmm10 + +# qhasm: xmm12 ^= xmm10 +# asm 1: pxor xmm14=int6464#12 +# asm 2: movdqa xmm14=%xmm11 +movdqa %xmm14,%xmm11 + +# qhasm: xmm14 ^= xmm11 +# asm 1: pxor xmm15=int6464#14 +# asm 2: movdqa xmm15=%xmm13 +movdqa %xmm10,%xmm13 + +# qhasm: xmm15 &= xmm14 +# asm 1: pand xmm13=int6464#16 +# asm 2: movdqa xmm13=%xmm15 +movdqa %xmm12,%xmm15 + +# qhasm: xmm13 ^= xmm8 +# asm 1: pxor xmm10=int6464#9 +# asm 2: movdqa xmm10=%xmm8 +movdqa %xmm11,%xmm8 + +# qhasm: xmm10 ^= xmm13 +# asm 1: pxor xmm12=int6464#9 +# asm 2: movdqa xmm12=%xmm8 +movdqa %xmm3,%xmm8 + +# qhasm: xmm8 = xmm7 +# asm 1: movdqa xmm8=int6464#10 +# asm 2: movdqa xmm8=%xmm9 +movdqa %xmm7,%xmm9 + +# qhasm: xmm10 = xmm15 +# asm 1: movdqa xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm10 ^= xmm14 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm15,%xmm10 + +# qhasm: xmm10 ^= xmm9 +# asm 1: pxor xmm12=int6464#9 +# asm 2: movdqa xmm12=%xmm8 +movdqa %xmm5,%xmm8 + +# qhasm: xmm8 = xmm1 +# asm 1: movdqa xmm8=int6464#10 +# asm 2: movdqa xmm8=%xmm9 +movdqa %xmm1,%xmm9 + +# qhasm: xmm12 ^= xmm2 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm15,%xmm10 + +# qhasm: xmm10 ^= xmm9 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm8=int6464#9 +# asm 2: movdqa 384(xmm8=%xmm8 +movdqa 384(%rdi),%xmm8 + +# qhasm: xmm9 = *(int128 *)(c + 400) +# asm 1: movdqa 400(xmm9=int6464#10 +# asm 2: movdqa 400(xmm9=%xmm9 +movdqa 400(%rdi),%xmm9 + +# qhasm: xmm10 = *(int128 *)(c + 416) +# asm 1: movdqa 416(xmm10=int6464#11 +# asm 2: movdqa 416(xmm10=%xmm10 +movdqa 416(%rdi),%xmm10 + +# qhasm: xmm11 = *(int128 *)(c + 432) +# asm 1: movdqa 432(xmm11=int6464#12 +# asm 2: movdqa 432(xmm11=%xmm11 +movdqa 432(%rdi),%xmm11 + +# qhasm: xmm12 = *(int128 *)(c + 448) +# asm 1: movdqa 448(xmm12=int6464#13 +# asm 2: movdqa 448(xmm12=%xmm12 +movdqa 448(%rdi),%xmm12 + +# qhasm: xmm13 = *(int128 *)(c + 464) +# asm 1: movdqa 464(xmm13=int6464#14 +# asm 2: movdqa 464(xmm13=%xmm13 +movdqa 464(%rdi),%xmm13 + +# qhasm: xmm14 = *(int128 *)(c + 480) +# asm 1: movdqa 480(xmm14=int6464#15 +# asm 2: movdqa 480(xmm14=%xmm14 +movdqa 480(%rdi),%xmm14 + +# qhasm: xmm15 = *(int128 *)(c + 496) +# asm 1: movdqa 496(xmm15=int6464#16 +# asm 2: movdqa 496(xmm15=%xmm15 +movdqa 496(%rdi),%xmm15 + +# qhasm: xmm8 ^= ONE +# asm 1: pxor ONE,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,xmm11=int6464#9 +# asm 2: movdqa xmm11=%xmm8 +movdqa %xmm7,%xmm8 + +# qhasm: xmm10 = xmm1 +# asm 1: movdqa xmm10=int6464#10 +# asm 2: movdqa xmm10=%xmm9 +movdqa %xmm1,%xmm9 + +# qhasm: xmm9 = xmm5 +# asm 1: movdqa xmm9=int6464#11 +# asm 2: movdqa xmm9=%xmm10 +movdqa %xmm5,%xmm10 + +# qhasm: xmm13 = xmm2 +# asm 1: movdqa xmm13=int6464#12 +# asm 2: movdqa xmm13=%xmm11 +movdqa %xmm2,%xmm11 + +# qhasm: xmm12 = xmm6 +# asm 1: movdqa xmm12=int6464#13 +# asm 2: movdqa xmm12=%xmm12 +movdqa %xmm6,%xmm12 + +# qhasm: xmm11 ^= xmm4 +# asm 1: pxor xmm14=int6464#14 +# asm 2: movdqa xmm14=%xmm13 +movdqa %xmm8,%xmm13 + +# qhasm: xmm8 = xmm10 +# asm 1: movdqa xmm8=int6464#15 +# asm 2: movdqa xmm8=%xmm14 +movdqa %xmm9,%xmm14 + +# qhasm: xmm15 = xmm11 +# asm 1: movdqa xmm15=int6464#16 +# asm 2: movdqa xmm15=%xmm15 +movdqa %xmm8,%xmm15 + +# qhasm: xmm10 |= xmm9 +# asm 1: por xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm3,%xmm10 + +# qhasm: xmm12 ^= xmm0 +# asm 1: pxor xmm13=int6464#11 +# asm 2: movdqa xmm13=%xmm10 +movdqa %xmm7,%xmm10 + +# qhasm: xmm13 ^= xmm1 +# asm 1: pxor xmm12=int6464#12 +# asm 2: movdqa xmm12=%xmm11 +movdqa %xmm5,%xmm11 + +# qhasm: xmm9 = xmm13 +# asm 1: movdqa xmm9=int6464#13 +# asm 2: movdqa xmm9=%xmm12 +movdqa %xmm10,%xmm12 + +# qhasm: xmm12 ^= xmm6 +# asm 1: pxor xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm2,%xmm10 + +# qhasm: xmm13 = xmm4 +# asm 1: movdqa xmm13=int6464#12 +# asm 2: movdqa xmm13=%xmm11 +movdqa %xmm4,%xmm11 + +# qhasm: xmm14 = xmm1 +# asm 1: movdqa xmm14=int6464#14 +# asm 2: movdqa xmm14=%xmm13 +movdqa %xmm1,%xmm13 + +# qhasm: xmm15 = xmm7 +# asm 1: movdqa xmm15=int6464#16 +# asm 2: movdqa xmm15=%xmm15 +movdqa %xmm7,%xmm15 + +# qhasm: xmm12 &= xmm3 +# asm 1: pand xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm8,%xmm10 + +# qhasm: xmm12 ^= xmm10 +# asm 1: pxor xmm14=int6464#12 +# asm 2: movdqa xmm14=%xmm11 +movdqa %xmm14,%xmm11 + +# qhasm: xmm14 ^= xmm11 +# asm 1: pxor xmm15=int6464#14 +# asm 2: movdqa xmm15=%xmm13 +movdqa %xmm10,%xmm13 + +# qhasm: xmm15 &= xmm14 +# asm 1: pand xmm13=int6464#16 +# asm 2: movdqa xmm13=%xmm15 +movdqa %xmm12,%xmm15 + +# qhasm: xmm13 ^= xmm8 +# asm 1: pxor xmm10=int6464#9 +# asm 2: movdqa xmm10=%xmm8 +movdqa %xmm11,%xmm8 + +# qhasm: xmm10 ^= xmm13 +# asm 1: pxor xmm12=int6464#9 +# asm 2: movdqa xmm12=%xmm8 +movdqa %xmm6,%xmm8 + +# qhasm: xmm8 = xmm5 +# asm 1: movdqa xmm8=int6464#10 +# asm 2: movdqa xmm8=%xmm9 +movdqa %xmm5,%xmm9 + +# qhasm: xmm10 = xmm15 +# asm 1: movdqa xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm10 ^= xmm14 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm15,%xmm10 + +# qhasm: xmm10 ^= xmm9 +# asm 1: pxor xmm12=int6464#9 +# asm 2: movdqa xmm12=%xmm8 +movdqa %xmm7,%xmm8 + +# qhasm: xmm8 = xmm1 +# asm 1: movdqa xmm8=int6464#10 +# asm 2: movdqa xmm8=%xmm9 +movdqa %xmm1,%xmm9 + +# qhasm: xmm12 ^= xmm4 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm15,%xmm10 + +# qhasm: xmm10 ^= xmm9 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm8=int6464#9 +# asm 2: movdqa 512(xmm8=%xmm8 +movdqa 512(%rdi),%xmm8 + +# qhasm: xmm9 = *(int128 *)(c + 528) +# asm 1: movdqa 528(xmm9=int6464#10 +# asm 2: movdqa 528(xmm9=%xmm9 +movdqa 528(%rdi),%xmm9 + +# qhasm: xmm10 = *(int128 *)(c + 544) +# asm 1: movdqa 544(xmm10=int6464#11 +# asm 2: movdqa 544(xmm10=%xmm10 +movdqa 544(%rdi),%xmm10 + +# qhasm: xmm11 = *(int128 *)(c + 560) +# asm 1: movdqa 560(xmm11=int6464#12 +# asm 2: movdqa 560(xmm11=%xmm11 +movdqa 560(%rdi),%xmm11 + +# qhasm: xmm12 = *(int128 *)(c + 576) +# asm 1: movdqa 576(xmm12=int6464#13 +# asm 2: movdqa 576(xmm12=%xmm12 +movdqa 576(%rdi),%xmm12 + +# qhasm: xmm13 = *(int128 *)(c + 592) +# asm 1: movdqa 592(xmm13=int6464#14 +# asm 2: movdqa 592(xmm13=%xmm13 +movdqa 592(%rdi),%xmm13 + +# qhasm: xmm14 = *(int128 *)(c + 608) +# asm 1: movdqa 608(xmm14=int6464#15 +# asm 2: movdqa 608(xmm14=%xmm14 +movdqa 608(%rdi),%xmm14 + +# qhasm: xmm15 = *(int128 *)(c + 624) +# asm 1: movdqa 624(xmm15=int6464#16 +# asm 2: movdqa 624(xmm15=%xmm15 +movdqa 624(%rdi),%xmm15 + +# qhasm: xmm8 ^= ONE +# asm 1: pxor ONE,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,xmm11=int6464#9 +# asm 2: movdqa xmm11=%xmm8 +movdqa %xmm5,%xmm8 + +# qhasm: xmm10 = xmm1 +# asm 1: movdqa xmm10=int6464#10 +# asm 2: movdqa xmm10=%xmm9 +movdqa %xmm1,%xmm9 + +# qhasm: xmm9 = xmm7 +# asm 1: movdqa xmm9=int6464#11 +# asm 2: movdqa xmm9=%xmm10 +movdqa %xmm7,%xmm10 + +# qhasm: xmm13 = xmm4 +# asm 1: movdqa xmm13=int6464#12 +# asm 2: movdqa xmm13=%xmm11 +movdqa %xmm4,%xmm11 + +# qhasm: xmm12 = xmm2 +# asm 1: movdqa xmm12=int6464#13 +# asm 2: movdqa xmm12=%xmm12 +movdqa %xmm2,%xmm12 + +# qhasm: xmm11 ^= xmm3 +# asm 1: pxor xmm14=int6464#14 +# asm 2: movdqa xmm14=%xmm13 +movdqa %xmm8,%xmm13 + +# qhasm: xmm8 = xmm10 +# asm 1: movdqa xmm8=int6464#15 +# asm 2: movdqa xmm8=%xmm14 +movdqa %xmm9,%xmm14 + +# qhasm: xmm15 = xmm11 +# asm 1: movdqa xmm15=int6464#16 +# asm 2: movdqa xmm15=%xmm15 +movdqa %xmm8,%xmm15 + +# qhasm: xmm10 |= xmm9 +# asm 1: por xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm6,%xmm10 + +# qhasm: xmm12 ^= xmm0 +# asm 1: pxor xmm13=int6464#11 +# asm 2: movdqa xmm13=%xmm10 +movdqa %xmm5,%xmm10 + +# qhasm: xmm13 ^= xmm1 +# asm 1: pxor xmm12=int6464#12 +# asm 2: movdqa xmm12=%xmm11 +movdqa %xmm7,%xmm11 + +# qhasm: xmm9 = xmm13 +# asm 1: movdqa xmm9=int6464#13 +# asm 2: movdqa xmm9=%xmm12 +movdqa %xmm10,%xmm12 + +# qhasm: xmm12 ^= xmm2 +# asm 1: pxor xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm4,%xmm10 + +# qhasm: xmm13 = xmm3 +# asm 1: movdqa xmm13=int6464#12 +# asm 2: movdqa xmm13=%xmm11 +movdqa %xmm3,%xmm11 + +# qhasm: xmm14 = xmm1 +# asm 1: movdqa xmm14=int6464#14 +# asm 2: movdqa xmm14=%xmm13 +movdqa %xmm1,%xmm13 + +# qhasm: xmm15 = xmm5 +# asm 1: movdqa xmm15=int6464#16 +# asm 2: movdqa xmm15=%xmm15 +movdqa %xmm5,%xmm15 + +# qhasm: xmm12 &= xmm6 +# asm 1: pand xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm8,%xmm10 + +# qhasm: xmm12 ^= xmm10 +# asm 1: pxor xmm14=int6464#12 +# asm 2: movdqa xmm14=%xmm11 +movdqa %xmm14,%xmm11 + +# qhasm: xmm14 ^= xmm11 +# asm 1: pxor xmm15=int6464#14 +# asm 2: movdqa xmm15=%xmm13 +movdqa %xmm10,%xmm13 + +# qhasm: xmm15 &= xmm14 +# asm 1: pand xmm13=int6464#16 +# asm 2: movdqa xmm13=%xmm15 +movdqa %xmm12,%xmm15 + +# qhasm: xmm13 ^= xmm8 +# asm 1: pxor xmm10=int6464#9 +# asm 2: movdqa xmm10=%xmm8 +movdqa %xmm11,%xmm8 + +# qhasm: xmm10 ^= xmm13 +# asm 1: pxor xmm12=int6464#9 +# asm 2: movdqa xmm12=%xmm8 +movdqa %xmm2,%xmm8 + +# qhasm: xmm8 = xmm7 +# asm 1: movdqa xmm8=int6464#10 +# asm 2: movdqa xmm8=%xmm9 +movdqa %xmm7,%xmm9 + +# qhasm: xmm10 = xmm15 +# asm 1: movdqa xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm10 ^= xmm14 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm15,%xmm10 + +# qhasm: xmm10 ^= xmm9 +# asm 1: pxor xmm12=int6464#9 +# asm 2: movdqa xmm12=%xmm8 +movdqa %xmm5,%xmm8 + +# qhasm: xmm8 = xmm1 +# asm 1: movdqa xmm8=int6464#10 +# asm 2: movdqa xmm8=%xmm9 +movdqa %xmm1,%xmm9 + +# qhasm: xmm12 ^= xmm3 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm15,%xmm10 + +# qhasm: xmm10 ^= xmm9 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm8=int6464#9 +# asm 2: movdqa 640(xmm8=%xmm8 +movdqa 640(%rdi),%xmm8 + +# qhasm: xmm9 = *(int128 *)(c + 656) +# asm 1: movdqa 656(xmm9=int6464#10 +# asm 2: movdqa 656(xmm9=%xmm9 +movdqa 656(%rdi),%xmm9 + +# qhasm: xmm10 = *(int128 *)(c + 672) +# asm 1: movdqa 672(xmm10=int6464#11 +# asm 2: movdqa 672(xmm10=%xmm10 +movdqa 672(%rdi),%xmm10 + +# qhasm: xmm11 = *(int128 *)(c + 688) +# asm 1: movdqa 688(xmm11=int6464#12 +# asm 2: movdqa 688(xmm11=%xmm11 +movdqa 688(%rdi),%xmm11 + +# qhasm: xmm12 = *(int128 *)(c + 704) +# asm 1: movdqa 704(xmm12=int6464#13 +# asm 2: movdqa 704(xmm12=%xmm12 +movdqa 704(%rdi),%xmm12 + +# qhasm: xmm13 = *(int128 *)(c + 720) +# asm 1: movdqa 720(xmm13=int6464#14 +# asm 2: movdqa 720(xmm13=%xmm13 +movdqa 720(%rdi),%xmm13 + +# qhasm: xmm14 = *(int128 *)(c + 736) +# asm 1: movdqa 736(xmm14=int6464#15 +# asm 2: movdqa 736(xmm14=%xmm14 +movdqa 736(%rdi),%xmm14 + +# qhasm: xmm15 = *(int128 *)(c + 752) +# asm 1: movdqa 752(xmm15=int6464#16 +# asm 2: movdqa 752(xmm15=%xmm15 +movdqa 752(%rdi),%xmm15 + +# qhasm: xmm8 ^= ONE +# asm 1: pxor ONE,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,xmm11=int6464#9 +# asm 2: movdqa xmm11=%xmm8 +movdqa %xmm7,%xmm8 + +# qhasm: xmm10 = xmm1 +# asm 1: movdqa xmm10=int6464#10 +# asm 2: movdqa xmm10=%xmm9 +movdqa %xmm1,%xmm9 + +# qhasm: xmm9 = xmm5 +# asm 1: movdqa xmm9=int6464#11 +# asm 2: movdqa xmm9=%xmm10 +movdqa %xmm5,%xmm10 + +# qhasm: xmm13 = xmm3 +# asm 1: movdqa xmm13=int6464#12 +# asm 2: movdqa xmm13=%xmm11 +movdqa %xmm3,%xmm11 + +# qhasm: xmm12 = xmm4 +# asm 1: movdqa xmm12=int6464#13 +# asm 2: movdqa xmm12=%xmm12 +movdqa %xmm4,%xmm12 + +# qhasm: xmm11 ^= xmm6 +# asm 1: pxor xmm14=int6464#14 +# asm 2: movdqa xmm14=%xmm13 +movdqa %xmm8,%xmm13 + +# qhasm: xmm8 = xmm10 +# asm 1: movdqa xmm8=int6464#15 +# asm 2: movdqa xmm8=%xmm14 +movdqa %xmm9,%xmm14 + +# qhasm: xmm15 = xmm11 +# asm 1: movdqa xmm15=int6464#16 +# asm 2: movdqa xmm15=%xmm15 +movdqa %xmm8,%xmm15 + +# qhasm: xmm10 |= xmm9 +# asm 1: por xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm2,%xmm10 + +# qhasm: xmm12 ^= xmm0 +# asm 1: pxor xmm13=int6464#11 +# asm 2: movdqa xmm13=%xmm10 +movdqa %xmm7,%xmm10 + +# qhasm: xmm13 ^= xmm1 +# asm 1: pxor xmm12=int6464#12 +# asm 2: movdqa xmm12=%xmm11 +movdqa %xmm5,%xmm11 + +# qhasm: xmm9 = xmm13 +# asm 1: movdqa xmm9=int6464#13 +# asm 2: movdqa xmm9=%xmm12 +movdqa %xmm10,%xmm12 + +# qhasm: xmm12 ^= xmm4 +# asm 1: pxor xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm3,%xmm10 + +# qhasm: xmm13 = xmm6 +# asm 1: movdqa xmm13=int6464#12 +# asm 2: movdqa xmm13=%xmm11 +movdqa %xmm6,%xmm11 + +# qhasm: xmm14 = xmm1 +# asm 1: movdqa xmm14=int6464#14 +# asm 2: movdqa xmm14=%xmm13 +movdqa %xmm1,%xmm13 + +# qhasm: xmm15 = xmm7 +# asm 1: movdqa xmm15=int6464#16 +# asm 2: movdqa xmm15=%xmm15 +movdqa %xmm7,%xmm15 + +# qhasm: xmm12 &= xmm2 +# asm 1: pand xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm8,%xmm10 + +# qhasm: xmm12 ^= xmm10 +# asm 1: pxor xmm14=int6464#12 +# asm 2: movdqa xmm14=%xmm11 +movdqa %xmm14,%xmm11 + +# qhasm: xmm14 ^= xmm11 +# asm 1: pxor xmm15=int6464#14 +# asm 2: movdqa xmm15=%xmm13 +movdqa %xmm10,%xmm13 + +# qhasm: xmm15 &= xmm14 +# asm 1: pand xmm13=int6464#16 +# asm 2: movdqa xmm13=%xmm15 +movdqa %xmm12,%xmm15 + +# qhasm: xmm13 ^= xmm8 +# asm 1: pxor xmm10=int6464#9 +# asm 2: movdqa xmm10=%xmm8 +movdqa %xmm11,%xmm8 + +# qhasm: xmm10 ^= xmm13 +# asm 1: pxor xmm12=int6464#9 +# asm 2: movdqa xmm12=%xmm8 +movdqa %xmm4,%xmm8 + +# qhasm: xmm8 = xmm5 +# asm 1: movdqa xmm8=int6464#10 +# asm 2: movdqa xmm8=%xmm9 +movdqa %xmm5,%xmm9 + +# qhasm: xmm10 = xmm15 +# asm 1: movdqa xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm10 ^= xmm14 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm15,%xmm10 + +# qhasm: xmm10 ^= xmm9 +# asm 1: pxor xmm12=int6464#9 +# asm 2: movdqa xmm12=%xmm8 +movdqa %xmm7,%xmm8 + +# qhasm: xmm8 = xmm1 +# asm 1: movdqa xmm8=int6464#10 +# asm 2: movdqa xmm8=%xmm9 +movdqa %xmm1,%xmm9 + +# qhasm: xmm12 ^= xmm6 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm15,%xmm10 + +# qhasm: xmm10 ^= xmm9 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm8=int6464#9 +# asm 2: movdqa 768(xmm8=%xmm8 +movdqa 768(%rdi),%xmm8 + +# qhasm: xmm9 = *(int128 *)(c + 784) +# asm 1: movdqa 784(xmm9=int6464#10 +# asm 2: movdqa 784(xmm9=%xmm9 +movdqa 784(%rdi),%xmm9 + +# qhasm: xmm10 = *(int128 *)(c + 800) +# asm 1: movdqa 800(xmm10=int6464#11 +# asm 2: movdqa 800(xmm10=%xmm10 +movdqa 800(%rdi),%xmm10 + +# qhasm: xmm11 = *(int128 *)(c + 816) +# asm 1: movdqa 816(xmm11=int6464#12 +# asm 2: movdqa 816(xmm11=%xmm11 +movdqa 816(%rdi),%xmm11 + +# qhasm: xmm12 = *(int128 *)(c + 832) +# asm 1: movdqa 832(xmm12=int6464#13 +# asm 2: movdqa 832(xmm12=%xmm12 +movdqa 832(%rdi),%xmm12 + +# qhasm: xmm13 = *(int128 *)(c + 848) +# asm 1: movdqa 848(xmm13=int6464#14 +# asm 2: movdqa 848(xmm13=%xmm13 +movdqa 848(%rdi),%xmm13 + +# qhasm: xmm14 = *(int128 *)(c + 864) +# asm 1: movdqa 864(xmm14=int6464#15 +# asm 2: movdqa 864(xmm14=%xmm14 +movdqa 864(%rdi),%xmm14 + +# qhasm: xmm15 = *(int128 *)(c + 880) +# asm 1: movdqa 880(xmm15=int6464#16 +# asm 2: movdqa 880(xmm15=%xmm15 +movdqa 880(%rdi),%xmm15 + +# qhasm: xmm8 ^= ONE +# asm 1: pxor ONE,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,xmm11=int6464#9 +# asm 2: movdqa xmm11=%xmm8 +movdqa %xmm5,%xmm8 + +# qhasm: xmm10 = xmm1 +# asm 1: movdqa xmm10=int6464#10 +# asm 2: movdqa xmm10=%xmm9 +movdqa %xmm1,%xmm9 + +# qhasm: xmm9 = xmm7 +# asm 1: movdqa xmm9=int6464#11 +# asm 2: movdqa xmm9=%xmm10 +movdqa %xmm7,%xmm10 + +# qhasm: xmm13 = xmm6 +# asm 1: movdqa xmm13=int6464#12 +# asm 2: movdqa xmm13=%xmm11 +movdqa %xmm6,%xmm11 + +# qhasm: xmm12 = xmm3 +# asm 1: movdqa xmm12=int6464#13 +# asm 2: movdqa xmm12=%xmm12 +movdqa %xmm3,%xmm12 + +# qhasm: xmm11 ^= xmm2 +# asm 1: pxor xmm14=int6464#14 +# asm 2: movdqa xmm14=%xmm13 +movdqa %xmm8,%xmm13 + +# qhasm: xmm8 = xmm10 +# asm 1: movdqa xmm8=int6464#15 +# asm 2: movdqa xmm8=%xmm14 +movdqa %xmm9,%xmm14 + +# qhasm: xmm15 = xmm11 +# asm 1: movdqa xmm15=int6464#16 +# asm 2: movdqa xmm15=%xmm15 +movdqa %xmm8,%xmm15 + +# qhasm: xmm10 |= xmm9 +# asm 1: por xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm4,%xmm10 + +# qhasm: xmm12 ^= xmm0 +# asm 1: pxor xmm13=int6464#11 +# asm 2: movdqa xmm13=%xmm10 +movdqa %xmm5,%xmm10 + +# qhasm: xmm13 ^= xmm1 +# asm 1: pxor xmm12=int6464#12 +# asm 2: movdqa xmm12=%xmm11 +movdqa %xmm7,%xmm11 + +# qhasm: xmm9 = xmm13 +# asm 1: movdqa xmm9=int6464#13 +# asm 2: movdqa xmm9=%xmm12 +movdqa %xmm10,%xmm12 + +# qhasm: xmm12 ^= xmm3 +# asm 1: pxor xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm6,%xmm10 + +# qhasm: xmm13 = xmm2 +# asm 1: movdqa xmm13=int6464#12 +# asm 2: movdqa xmm13=%xmm11 +movdqa %xmm2,%xmm11 + +# qhasm: xmm14 = xmm1 +# asm 1: movdqa xmm14=int6464#14 +# asm 2: movdqa xmm14=%xmm13 +movdqa %xmm1,%xmm13 + +# qhasm: xmm15 = xmm5 +# asm 1: movdqa xmm15=int6464#16 +# asm 2: movdqa xmm15=%xmm15 +movdqa %xmm5,%xmm15 + +# qhasm: xmm12 &= xmm4 +# asm 1: pand xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm8,%xmm10 + +# qhasm: xmm12 ^= xmm10 +# asm 1: pxor xmm14=int6464#12 +# asm 2: movdqa xmm14=%xmm11 +movdqa %xmm14,%xmm11 + +# qhasm: xmm14 ^= xmm11 +# asm 1: pxor xmm15=int6464#14 +# asm 2: movdqa xmm15=%xmm13 +movdqa %xmm10,%xmm13 + +# qhasm: xmm15 &= xmm14 +# asm 1: pand xmm13=int6464#16 +# asm 2: movdqa xmm13=%xmm15 +movdqa %xmm12,%xmm15 + +# qhasm: xmm13 ^= xmm8 +# asm 1: pxor xmm10=int6464#9 +# asm 2: movdqa xmm10=%xmm8 +movdqa %xmm11,%xmm8 + +# qhasm: xmm10 ^= xmm13 +# asm 1: pxor xmm12=int6464#9 +# asm 2: movdqa xmm12=%xmm8 +movdqa %xmm3,%xmm8 + +# qhasm: xmm8 = xmm7 +# asm 1: movdqa xmm8=int6464#10 +# asm 2: movdqa xmm8=%xmm9 +movdqa %xmm7,%xmm9 + +# qhasm: xmm10 = xmm15 +# asm 1: movdqa xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm10 ^= xmm14 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm15,%xmm10 + +# qhasm: xmm10 ^= xmm9 +# asm 1: pxor xmm12=int6464#9 +# asm 2: movdqa xmm12=%xmm8 +movdqa %xmm5,%xmm8 + +# qhasm: xmm8 = xmm1 +# asm 1: movdqa xmm8=int6464#10 +# asm 2: movdqa xmm8=%xmm9 +movdqa %xmm1,%xmm9 + +# qhasm: xmm12 ^= xmm2 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm15,%xmm10 + +# qhasm: xmm10 ^= xmm9 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm8=int6464#9 +# asm 2: movdqa 896(xmm8=%xmm8 +movdqa 896(%rdi),%xmm8 + +# qhasm: xmm9 = *(int128 *)(c + 912) +# asm 1: movdqa 912(xmm9=int6464#10 +# asm 2: movdqa 912(xmm9=%xmm9 +movdqa 912(%rdi),%xmm9 + +# qhasm: xmm10 = *(int128 *)(c + 928) +# asm 1: movdqa 928(xmm10=int6464#11 +# asm 2: movdqa 928(xmm10=%xmm10 +movdqa 928(%rdi),%xmm10 + +# qhasm: xmm11 = *(int128 *)(c + 944) +# asm 1: movdqa 944(xmm11=int6464#12 +# asm 2: movdqa 944(xmm11=%xmm11 +movdqa 944(%rdi),%xmm11 + +# qhasm: xmm12 = *(int128 *)(c + 960) +# asm 1: movdqa 960(xmm12=int6464#13 +# asm 2: movdqa 960(xmm12=%xmm12 +movdqa 960(%rdi),%xmm12 + +# qhasm: xmm13 = *(int128 *)(c + 976) +# asm 1: movdqa 976(xmm13=int6464#14 +# asm 2: movdqa 976(xmm13=%xmm13 +movdqa 976(%rdi),%xmm13 + +# qhasm: xmm14 = *(int128 *)(c + 992) +# asm 1: movdqa 992(xmm14=int6464#15 +# asm 2: movdqa 992(xmm14=%xmm14 +movdqa 992(%rdi),%xmm14 + +# qhasm: xmm15 = *(int128 *)(c + 1008) +# asm 1: movdqa 1008(xmm15=int6464#16 +# asm 2: movdqa 1008(xmm15=%xmm15 +movdqa 1008(%rdi),%xmm15 + +# qhasm: xmm8 ^= ONE +# asm 1: pxor ONE,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,xmm11=int6464#9 +# asm 2: movdqa xmm11=%xmm8 +movdqa %xmm7,%xmm8 + +# qhasm: xmm10 = xmm1 +# asm 1: movdqa xmm10=int6464#10 +# asm 2: movdqa xmm10=%xmm9 +movdqa %xmm1,%xmm9 + +# qhasm: xmm9 = xmm5 +# asm 1: movdqa xmm9=int6464#11 +# asm 2: movdqa xmm9=%xmm10 +movdqa %xmm5,%xmm10 + +# qhasm: xmm13 = xmm2 +# asm 1: movdqa xmm13=int6464#12 +# asm 2: movdqa xmm13=%xmm11 +movdqa %xmm2,%xmm11 + +# qhasm: xmm12 = xmm6 +# asm 1: movdqa xmm12=int6464#13 +# asm 2: movdqa xmm12=%xmm12 +movdqa %xmm6,%xmm12 + +# qhasm: xmm11 ^= xmm4 +# asm 1: pxor xmm14=int6464#14 +# asm 2: movdqa xmm14=%xmm13 +movdqa %xmm8,%xmm13 + +# qhasm: xmm8 = xmm10 +# asm 1: movdqa xmm8=int6464#15 +# asm 2: movdqa xmm8=%xmm14 +movdqa %xmm9,%xmm14 + +# qhasm: xmm15 = xmm11 +# asm 1: movdqa xmm15=int6464#16 +# asm 2: movdqa xmm15=%xmm15 +movdqa %xmm8,%xmm15 + +# qhasm: xmm10 |= xmm9 +# asm 1: por xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm3,%xmm10 + +# qhasm: xmm12 ^= xmm0 +# asm 1: pxor xmm13=int6464#11 +# asm 2: movdqa xmm13=%xmm10 +movdqa %xmm7,%xmm10 + +# qhasm: xmm13 ^= xmm1 +# asm 1: pxor xmm12=int6464#12 +# asm 2: movdqa xmm12=%xmm11 +movdqa %xmm5,%xmm11 + +# qhasm: xmm9 = xmm13 +# asm 1: movdqa xmm9=int6464#13 +# asm 2: movdqa xmm9=%xmm12 +movdqa %xmm10,%xmm12 + +# qhasm: xmm12 ^= xmm6 +# asm 1: pxor xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm2,%xmm10 + +# qhasm: xmm13 = xmm4 +# asm 1: movdqa xmm13=int6464#12 +# asm 2: movdqa xmm13=%xmm11 +movdqa %xmm4,%xmm11 + +# qhasm: xmm14 = xmm1 +# asm 1: movdqa xmm14=int6464#14 +# asm 2: movdqa xmm14=%xmm13 +movdqa %xmm1,%xmm13 + +# qhasm: xmm15 = xmm7 +# asm 1: movdqa xmm15=int6464#16 +# asm 2: movdqa xmm15=%xmm15 +movdqa %xmm7,%xmm15 + +# qhasm: xmm12 &= xmm3 +# asm 1: pand xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm8,%xmm10 + +# qhasm: xmm12 ^= xmm10 +# asm 1: pxor xmm14=int6464#12 +# asm 2: movdqa xmm14=%xmm11 +movdqa %xmm14,%xmm11 + +# qhasm: xmm14 ^= xmm11 +# asm 1: pxor xmm15=int6464#14 +# asm 2: movdqa xmm15=%xmm13 +movdqa %xmm10,%xmm13 + +# qhasm: xmm15 &= xmm14 +# asm 1: pand xmm13=int6464#16 +# asm 2: movdqa xmm13=%xmm15 +movdqa %xmm12,%xmm15 + +# qhasm: xmm13 ^= xmm8 +# asm 1: pxor xmm10=int6464#9 +# asm 2: movdqa xmm10=%xmm8 +movdqa %xmm11,%xmm8 + +# qhasm: xmm10 ^= xmm13 +# asm 1: pxor xmm12=int6464#9 +# asm 2: movdqa xmm12=%xmm8 +movdqa %xmm6,%xmm8 + +# qhasm: xmm8 = xmm5 +# asm 1: movdqa xmm8=int6464#10 +# asm 2: movdqa xmm8=%xmm9 +movdqa %xmm5,%xmm9 + +# qhasm: xmm10 = xmm15 +# asm 1: movdqa xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm10 ^= xmm14 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm15,%xmm10 + +# qhasm: xmm10 ^= xmm9 +# asm 1: pxor xmm12=int6464#9 +# asm 2: movdqa xmm12=%xmm8 +movdqa %xmm7,%xmm8 + +# qhasm: xmm8 = xmm1 +# asm 1: movdqa xmm8=int6464#10 +# asm 2: movdqa xmm8=%xmm9 +movdqa %xmm1,%xmm9 + +# qhasm: xmm12 ^= xmm4 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm15,%xmm10 + +# qhasm: xmm10 ^= xmm9 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm8=int6464#9 +# asm 2: movdqa 1024(xmm8=%xmm8 +movdqa 1024(%rdi),%xmm8 + +# qhasm: xmm9 = *(int128 *)(c + 1040) +# asm 1: movdqa 1040(xmm9=int6464#10 +# asm 2: movdqa 1040(xmm9=%xmm9 +movdqa 1040(%rdi),%xmm9 + +# qhasm: xmm10 = *(int128 *)(c + 1056) +# asm 1: movdqa 1056(xmm10=int6464#11 +# asm 2: movdqa 1056(xmm10=%xmm10 +movdqa 1056(%rdi),%xmm10 + +# qhasm: xmm11 = *(int128 *)(c + 1072) +# asm 1: movdqa 1072(xmm11=int6464#12 +# asm 2: movdqa 1072(xmm11=%xmm11 +movdqa 1072(%rdi),%xmm11 + +# qhasm: xmm12 = *(int128 *)(c + 1088) +# asm 1: movdqa 1088(xmm12=int6464#13 +# asm 2: movdqa 1088(xmm12=%xmm12 +movdqa 1088(%rdi),%xmm12 + +# qhasm: xmm13 = *(int128 *)(c + 1104) +# asm 1: movdqa 1104(xmm13=int6464#14 +# asm 2: movdqa 1104(xmm13=%xmm13 +movdqa 1104(%rdi),%xmm13 + +# qhasm: xmm14 = *(int128 *)(c + 1120) +# asm 1: movdqa 1120(xmm14=int6464#15 +# asm 2: movdqa 1120(xmm14=%xmm14 +movdqa 1120(%rdi),%xmm14 + +# qhasm: xmm15 = *(int128 *)(c + 1136) +# asm 1: movdqa 1136(xmm15=int6464#16 +# asm 2: movdqa 1136(xmm15=%xmm15 +movdqa 1136(%rdi),%xmm15 + +# qhasm: xmm8 ^= ONE +# asm 1: pxor ONE,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,xmm11=int6464#9 +# asm 2: movdqa xmm11=%xmm8 +movdqa %xmm5,%xmm8 + +# qhasm: xmm10 = xmm1 +# asm 1: movdqa xmm10=int6464#10 +# asm 2: movdqa xmm10=%xmm9 +movdqa %xmm1,%xmm9 + +# qhasm: xmm9 = xmm7 +# asm 1: movdqa xmm9=int6464#11 +# asm 2: movdqa xmm9=%xmm10 +movdqa %xmm7,%xmm10 + +# qhasm: xmm13 = xmm4 +# asm 1: movdqa xmm13=int6464#12 +# asm 2: movdqa xmm13=%xmm11 +movdqa %xmm4,%xmm11 + +# qhasm: xmm12 = xmm2 +# asm 1: movdqa xmm12=int6464#13 +# asm 2: movdqa xmm12=%xmm12 +movdqa %xmm2,%xmm12 + +# qhasm: xmm11 ^= xmm3 +# asm 1: pxor xmm14=int6464#14 +# asm 2: movdqa xmm14=%xmm13 +movdqa %xmm8,%xmm13 + +# qhasm: xmm8 = xmm10 +# asm 1: movdqa xmm8=int6464#15 +# asm 2: movdqa xmm8=%xmm14 +movdqa %xmm9,%xmm14 + +# qhasm: xmm15 = xmm11 +# asm 1: movdqa xmm15=int6464#16 +# asm 2: movdqa xmm15=%xmm15 +movdqa %xmm8,%xmm15 + +# qhasm: xmm10 |= xmm9 +# asm 1: por xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm6,%xmm10 + +# qhasm: xmm12 ^= xmm0 +# asm 1: pxor xmm13=int6464#11 +# asm 2: movdqa xmm13=%xmm10 +movdqa %xmm5,%xmm10 + +# qhasm: xmm13 ^= xmm1 +# asm 1: pxor xmm12=int6464#12 +# asm 2: movdqa xmm12=%xmm11 +movdqa %xmm7,%xmm11 + +# qhasm: xmm9 = xmm13 +# asm 1: movdqa xmm9=int6464#13 +# asm 2: movdqa xmm9=%xmm12 +movdqa %xmm10,%xmm12 + +# qhasm: xmm12 ^= xmm2 +# asm 1: pxor xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm4,%xmm10 + +# qhasm: xmm13 = xmm3 +# asm 1: movdqa xmm13=int6464#12 +# asm 2: movdqa xmm13=%xmm11 +movdqa %xmm3,%xmm11 + +# qhasm: xmm14 = xmm1 +# asm 1: movdqa xmm14=int6464#14 +# asm 2: movdqa xmm14=%xmm13 +movdqa %xmm1,%xmm13 + +# qhasm: xmm15 = xmm5 +# asm 1: movdqa xmm15=int6464#16 +# asm 2: movdqa xmm15=%xmm15 +movdqa %xmm5,%xmm15 + +# qhasm: xmm12 &= xmm6 +# asm 1: pand xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm8,%xmm10 + +# qhasm: xmm12 ^= xmm10 +# asm 1: pxor xmm14=int6464#12 +# asm 2: movdqa xmm14=%xmm11 +movdqa %xmm14,%xmm11 + +# qhasm: xmm14 ^= xmm11 +# asm 1: pxor xmm15=int6464#14 +# asm 2: movdqa xmm15=%xmm13 +movdqa %xmm10,%xmm13 + +# qhasm: xmm15 &= xmm14 +# asm 1: pand xmm13=int6464#16 +# asm 2: movdqa xmm13=%xmm15 +movdqa %xmm12,%xmm15 + +# qhasm: xmm13 ^= xmm8 +# asm 1: pxor xmm10=int6464#9 +# asm 2: movdqa xmm10=%xmm8 +movdqa %xmm11,%xmm8 + +# qhasm: xmm10 ^= xmm13 +# asm 1: pxor xmm12=int6464#9 +# asm 2: movdqa xmm12=%xmm8 +movdqa %xmm2,%xmm8 + +# qhasm: xmm8 = xmm7 +# asm 1: movdqa xmm8=int6464#10 +# asm 2: movdqa xmm8=%xmm9 +movdqa %xmm7,%xmm9 + +# qhasm: xmm10 = xmm15 +# asm 1: movdqa xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm10 ^= xmm14 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm15,%xmm10 + +# qhasm: xmm10 ^= xmm9 +# asm 1: pxor xmm12=int6464#9 +# asm 2: movdqa xmm12=%xmm8 +movdqa %xmm5,%xmm8 + +# qhasm: xmm8 = xmm1 +# asm 1: movdqa xmm8=int6464#10 +# asm 2: movdqa xmm8=%xmm9 +movdqa %xmm1,%xmm9 + +# qhasm: xmm12 ^= xmm3 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm15,%xmm10 + +# qhasm: xmm10 ^= xmm9 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm8=int6464#9 +# asm 2: movdqa 1152(xmm8=%xmm8 +movdqa 1152(%rdi),%xmm8 + +# qhasm: xmm9 = *(int128 *)(c + 1168) +# asm 1: movdqa 1168(xmm9=int6464#10 +# asm 2: movdqa 1168(xmm9=%xmm9 +movdqa 1168(%rdi),%xmm9 + +# qhasm: xmm10 = *(int128 *)(c + 1184) +# asm 1: movdqa 1184(xmm10=int6464#11 +# asm 2: movdqa 1184(xmm10=%xmm10 +movdqa 1184(%rdi),%xmm10 + +# qhasm: xmm11 = *(int128 *)(c + 1200) +# asm 1: movdqa 1200(xmm11=int6464#12 +# asm 2: movdqa 1200(xmm11=%xmm11 +movdqa 1200(%rdi),%xmm11 + +# qhasm: xmm12 = *(int128 *)(c + 1216) +# asm 1: movdqa 1216(xmm12=int6464#13 +# asm 2: movdqa 1216(xmm12=%xmm12 +movdqa 1216(%rdi),%xmm12 + +# qhasm: xmm13 = *(int128 *)(c + 1232) +# asm 1: movdqa 1232(xmm13=int6464#14 +# asm 2: movdqa 1232(xmm13=%xmm13 +movdqa 1232(%rdi),%xmm13 + +# qhasm: xmm14 = *(int128 *)(c + 1248) +# asm 1: movdqa 1248(xmm14=int6464#15 +# asm 2: movdqa 1248(xmm14=%xmm14 +movdqa 1248(%rdi),%xmm14 + +# qhasm: xmm15 = *(int128 *)(c + 1264) +# asm 1: movdqa 1264(xmm15=int6464#16 +# asm 2: movdqa 1264(xmm15=%xmm15 +movdqa 1264(%rdi),%xmm15 + +# qhasm: xmm8 ^= ONE +# asm 1: pxor ONE,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,>= 8 +# asm 1: psrld $8,xmm0=int6464#1 +# asm 2: movdqa 0(xmm0=%xmm0 +movdqa 0(%rcx),%xmm0 + +# qhasm: nonce_stack = xmm0 +# asm 1: movdqa nonce_stack=stack128#1 +# asm 2: movdqa nonce_stack=0(%rsp) +movdqa %xmm0,0(%rsp) + +# qhasm: np = &nonce_stack +# asm 1: leaq np=int64#4 +# asm 2: leaq np=%rcx +leaq 0(%rsp),%rcx + +# qhasm: enc_block: +._enc_block: + +# qhasm: xmm0 = *(int128 *) (np + 0) +# asm 1: movdqa 0(xmm0=int6464#1 +# asm 2: movdqa 0(xmm0=%xmm0 +movdqa 0(%rcx),%xmm0 + +# qhasm: xmm1 = xmm0 +# asm 1: movdqa xmm1=int6464#2 +# asm 2: movdqa xmm1=%xmm1 +movdqa %xmm0,%xmm1 + +# qhasm: shuffle bytes of xmm1 by SWAP32 +# asm 1: pshufb SWAP32,xmm2=int6464#3 +# asm 2: movdqa xmm2=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: xmm3 = xmm1 +# asm 1: movdqa xmm3=int6464#4 +# asm 2: movdqa xmm3=%xmm3 +movdqa %xmm1,%xmm3 + +# qhasm: xmm4 = xmm1 +# asm 1: movdqa xmm4=int6464#5 +# asm 2: movdqa xmm4=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: xmm5 = xmm1 +# asm 1: movdqa xmm5=int6464#6 +# asm 2: movdqa xmm5=%xmm5 +movdqa %xmm1,%xmm5 + +# qhasm: xmm6 = xmm1 +# asm 1: movdqa xmm6=int6464#7 +# asm 2: movdqa xmm6=%xmm6 +movdqa %xmm1,%xmm6 + +# qhasm: xmm7 = xmm1 +# asm 1: movdqa xmm7=int6464#8 +# asm 2: movdqa xmm7=%xmm7 +movdqa %xmm1,%xmm7 + +# qhasm: int32323232 xmm1 += RCTRINC1 +# asm 1: paddd RCTRINC1,xmm8=int6464#9 +# asm 2: movdqa xmm8=%xmm8 +movdqa %xmm6,%xmm8 + +# qhasm: uint6464 xmm8 >>= 1 +# asm 1: psrlq $1,xmm8=int6464#9 +# asm 2: movdqa xmm8=%xmm8 +movdqa %xmm4,%xmm8 + +# qhasm: uint6464 xmm8 >>= 1 +# asm 1: psrlq $1,xmm8=int6464#9 +# asm 2: movdqa xmm8=%xmm8 +movdqa %xmm2,%xmm8 + +# qhasm: uint6464 xmm8 >>= 1 +# asm 1: psrlq $1,xmm8=int6464#9 +# asm 2: movdqa xmm8=%xmm8 +movdqa %xmm0,%xmm8 + +# qhasm: uint6464 xmm8 >>= 1 +# asm 1: psrlq $1,xmm8=int6464#9 +# asm 2: movdqa xmm8=%xmm8 +movdqa %xmm5,%xmm8 + +# qhasm: uint6464 xmm8 >>= 2 +# asm 1: psrlq $2,xmm8=int6464#9 +# asm 2: movdqa xmm8=%xmm8 +movdqa %xmm4,%xmm8 + +# qhasm: uint6464 xmm8 >>= 2 +# asm 1: psrlq $2,xmm8=int6464#9 +# asm 2: movdqa xmm8=%xmm8 +movdqa %xmm1,%xmm8 + +# qhasm: uint6464 xmm8 >>= 2 +# asm 1: psrlq $2,xmm8=int6464#9 +# asm 2: movdqa xmm8=%xmm8 +movdqa %xmm0,%xmm8 + +# qhasm: uint6464 xmm8 >>= 2 +# asm 1: psrlq $2,xmm8=int6464#9 +# asm 2: movdqa xmm8=%xmm8 +movdqa %xmm3,%xmm8 + +# qhasm: uint6464 xmm8 >>= 4 +# asm 1: psrlq $4,xmm8=int6464#9 +# asm 2: movdqa xmm8=%xmm8 +movdqa %xmm2,%xmm8 + +# qhasm: uint6464 xmm8 >>= 4 +# asm 1: psrlq $4,xmm8=int6464#9 +# asm 2: movdqa xmm8=%xmm8 +movdqa %xmm1,%xmm8 + +# qhasm: uint6464 xmm8 >>= 4 +# asm 1: psrlq $4,xmm8=int6464#9 +# asm 2: movdqa xmm8=%xmm8 +movdqa %xmm0,%xmm8 + +# qhasm: uint6464 xmm8 >>= 4 +# asm 1: psrlq $4,xmm11=int6464#9 +# asm 2: movdqa xmm11=%xmm8 +movdqa %xmm7,%xmm8 + +# qhasm: xmm10 = xmm1 +# asm 1: movdqa xmm10=int6464#10 +# asm 2: movdqa xmm10=%xmm9 +movdqa %xmm1,%xmm9 + +# qhasm: xmm9 = xmm5 +# asm 1: movdqa xmm9=int6464#11 +# asm 2: movdqa xmm9=%xmm10 +movdqa %xmm5,%xmm10 + +# qhasm: xmm13 = xmm2 +# asm 1: movdqa xmm13=int6464#12 +# asm 2: movdqa xmm13=%xmm11 +movdqa %xmm2,%xmm11 + +# qhasm: xmm12 = xmm6 +# asm 1: movdqa xmm12=int6464#13 +# asm 2: movdqa xmm12=%xmm12 +movdqa %xmm6,%xmm12 + +# qhasm: xmm11 ^= xmm4 +# asm 1: pxor xmm14=int6464#14 +# asm 2: movdqa xmm14=%xmm13 +movdqa %xmm8,%xmm13 + +# qhasm: xmm8 = xmm10 +# asm 1: movdqa xmm8=int6464#15 +# asm 2: movdqa xmm8=%xmm14 +movdqa %xmm9,%xmm14 + +# qhasm: xmm15 = xmm11 +# asm 1: movdqa xmm15=int6464#16 +# asm 2: movdqa xmm15=%xmm15 +movdqa %xmm8,%xmm15 + +# qhasm: xmm10 |= xmm9 +# asm 1: por xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm3,%xmm10 + +# qhasm: xmm12 ^= xmm0 +# asm 1: pxor xmm13=int6464#11 +# asm 2: movdqa xmm13=%xmm10 +movdqa %xmm7,%xmm10 + +# qhasm: xmm13 ^= xmm1 +# asm 1: pxor xmm12=int6464#12 +# asm 2: movdqa xmm12=%xmm11 +movdqa %xmm5,%xmm11 + +# qhasm: xmm9 = xmm13 +# asm 1: movdqa xmm9=int6464#13 +# asm 2: movdqa xmm9=%xmm12 +movdqa %xmm10,%xmm12 + +# qhasm: xmm12 ^= xmm6 +# asm 1: pxor xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm2,%xmm10 + +# qhasm: xmm13 = xmm4 +# asm 1: movdqa xmm13=int6464#12 +# asm 2: movdqa xmm13=%xmm11 +movdqa %xmm4,%xmm11 + +# qhasm: xmm14 = xmm1 +# asm 1: movdqa xmm14=int6464#14 +# asm 2: movdqa xmm14=%xmm13 +movdqa %xmm1,%xmm13 + +# qhasm: xmm15 = xmm7 +# asm 1: movdqa xmm15=int6464#16 +# asm 2: movdqa xmm15=%xmm15 +movdqa %xmm7,%xmm15 + +# qhasm: xmm12 &= xmm3 +# asm 1: pand xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm8,%xmm10 + +# qhasm: xmm12 ^= xmm10 +# asm 1: pxor xmm14=int6464#12 +# asm 2: movdqa xmm14=%xmm11 +movdqa %xmm14,%xmm11 + +# qhasm: xmm14 ^= xmm11 +# asm 1: pxor xmm15=int6464#14 +# asm 2: movdqa xmm15=%xmm13 +movdqa %xmm10,%xmm13 + +# qhasm: xmm15 &= xmm14 +# asm 1: pand xmm13=int6464#16 +# asm 2: movdqa xmm13=%xmm15 +movdqa %xmm12,%xmm15 + +# qhasm: xmm13 ^= xmm8 +# asm 1: pxor xmm10=int6464#9 +# asm 2: movdqa xmm10=%xmm8 +movdqa %xmm11,%xmm8 + +# qhasm: xmm10 ^= xmm13 +# asm 1: pxor xmm12=int6464#9 +# asm 2: movdqa xmm12=%xmm8 +movdqa %xmm6,%xmm8 + +# qhasm: xmm8 = xmm5 +# asm 1: movdqa xmm8=int6464#10 +# asm 2: movdqa xmm8=%xmm9 +movdqa %xmm5,%xmm9 + +# qhasm: xmm10 = xmm15 +# asm 1: movdqa xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm10 ^= xmm14 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm15,%xmm10 + +# qhasm: xmm10 ^= xmm9 +# asm 1: pxor xmm12=int6464#9 +# asm 2: movdqa xmm12=%xmm8 +movdqa %xmm7,%xmm8 + +# qhasm: xmm8 = xmm1 +# asm 1: movdqa xmm8=int6464#10 +# asm 2: movdqa xmm8=%xmm9 +movdqa %xmm1,%xmm9 + +# qhasm: xmm12 ^= xmm4 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm15,%xmm10 + +# qhasm: xmm10 ^= xmm9 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm8=int6464#9 +# asm 2: pshufd $0x93,xmm8=%xmm8 +pshufd $0x93,%xmm0,%xmm8 + +# qhasm: xmm9 = shuffle dwords of xmm1 by 0x93 +# asm 1: pshufd $0x93,xmm9=int6464#10 +# asm 2: pshufd $0x93,xmm9=%xmm9 +pshufd $0x93,%xmm1,%xmm9 + +# qhasm: xmm10 = shuffle dwords of xmm4 by 0x93 +# asm 1: pshufd $0x93,xmm10=int6464#11 +# asm 2: pshufd $0x93,xmm10=%xmm10 +pshufd $0x93,%xmm4,%xmm10 + +# qhasm: xmm11 = shuffle dwords of xmm6 by 0x93 +# asm 1: pshufd $0x93,xmm11=int6464#12 +# asm 2: pshufd $0x93,xmm11=%xmm11 +pshufd $0x93,%xmm6,%xmm11 + +# qhasm: xmm12 = shuffle dwords of xmm3 by 0x93 +# asm 1: pshufd $0x93,xmm12=int6464#13 +# asm 2: pshufd $0x93,xmm12=%xmm12 +pshufd $0x93,%xmm3,%xmm12 + +# qhasm: xmm13 = shuffle dwords of xmm7 by 0x93 +# asm 1: pshufd $0x93,xmm13=int6464#14 +# asm 2: pshufd $0x93,xmm13=%xmm13 +pshufd $0x93,%xmm7,%xmm13 + +# qhasm: xmm14 = shuffle dwords of xmm2 by 0x93 +# asm 1: pshufd $0x93,xmm14=int6464#15 +# asm 2: pshufd $0x93,xmm14=%xmm14 +pshufd $0x93,%xmm2,%xmm14 + +# qhasm: xmm15 = shuffle dwords of xmm5 by 0x93 +# asm 1: pshufd $0x93,xmm15=int6464#16 +# asm 2: pshufd $0x93,xmm15=%xmm15 +pshufd $0x93,%xmm5,%xmm15 + +# qhasm: xmm0 ^= xmm8 +# asm 1: pxor xmm0=int6464#1 +# asm 2: pshufd $0x4E,xmm0=%xmm0 +pshufd $0x4E,%xmm0,%xmm0 + +# qhasm: xmm1 = shuffle dwords of xmm1 by 0x4E +# asm 1: pshufd $0x4E,xmm1=int6464#2 +# asm 2: pshufd $0x4E,xmm1=%xmm1 +pshufd $0x4E,%xmm1,%xmm1 + +# qhasm: xmm4 = shuffle dwords of xmm4 by 0x4E +# asm 1: pshufd $0x4E,xmm4=int6464#5 +# asm 2: pshufd $0x4E,xmm4=%xmm4 +pshufd $0x4E,%xmm4,%xmm4 + +# qhasm: xmm6 = shuffle dwords of xmm6 by 0x4E +# asm 1: pshufd $0x4E,xmm6=int6464#7 +# asm 2: pshufd $0x4E,xmm6=%xmm6 +pshufd $0x4E,%xmm6,%xmm6 + +# qhasm: xmm3 = shuffle dwords of xmm3 by 0x4E +# asm 1: pshufd $0x4E,xmm3=int6464#4 +# asm 2: pshufd $0x4E,xmm3=%xmm3 +pshufd $0x4E,%xmm3,%xmm3 + +# qhasm: xmm7 = shuffle dwords of xmm7 by 0x4E +# asm 1: pshufd $0x4E,xmm7=int6464#8 +# asm 2: pshufd $0x4E,xmm7=%xmm7 +pshufd $0x4E,%xmm7,%xmm7 + +# qhasm: xmm2 = shuffle dwords of xmm2 by 0x4E +# asm 1: pshufd $0x4E,xmm2=int6464#3 +# asm 2: pshufd $0x4E,xmm2=%xmm2 +pshufd $0x4E,%xmm2,%xmm2 + +# qhasm: xmm5 = shuffle dwords of xmm5 by 0x4E +# asm 1: pshufd $0x4E,xmm5=int6464#6 +# asm 2: pshufd $0x4E,xmm5=%xmm5 +pshufd $0x4E,%xmm5,%xmm5 + +# qhasm: xmm8 ^= xmm0 +# asm 1: pxor xmm3=int6464#1 +# asm 2: movdqa xmm3=%xmm0 +movdqa %xmm15,%xmm0 + +# qhasm: xmm2 = xmm9 +# asm 1: movdqa xmm2=int6464#2 +# asm 2: movdqa xmm2=%xmm1 +movdqa %xmm9,%xmm1 + +# qhasm: xmm1 = xmm13 +# asm 1: movdqa xmm1=int6464#3 +# asm 2: movdqa xmm1=%xmm2 +movdqa %xmm13,%xmm2 + +# qhasm: xmm5 = xmm10 +# asm 1: movdqa xmm5=int6464#4 +# asm 2: movdqa xmm5=%xmm3 +movdqa %xmm10,%xmm3 + +# qhasm: xmm4 = xmm14 +# asm 1: movdqa xmm4=int6464#5 +# asm 2: movdqa xmm4=%xmm4 +movdqa %xmm14,%xmm4 + +# qhasm: xmm3 ^= xmm12 +# asm 1: pxor xmm6=int6464#6 +# asm 2: movdqa xmm6=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: xmm0 = xmm2 +# asm 1: movdqa xmm0=int6464#7 +# asm 2: movdqa xmm0=%xmm6 +movdqa %xmm1,%xmm6 + +# qhasm: xmm7 = xmm3 +# asm 1: movdqa xmm7=int6464#8 +# asm 2: movdqa xmm7=%xmm7 +movdqa %xmm0,%xmm7 + +# qhasm: xmm2 |= xmm1 +# asm 1: por xmm4=int6464#3 +# asm 2: movdqa xmm4=%xmm2 +movdqa %xmm11,%xmm2 + +# qhasm: xmm4 ^= xmm8 +# asm 1: pxor xmm5=int6464#3 +# asm 2: movdqa xmm5=%xmm2 +movdqa %xmm15,%xmm2 + +# qhasm: xmm5 ^= xmm9 +# asm 1: pxor xmm4=int6464#4 +# asm 2: movdqa xmm4=%xmm3 +movdqa %xmm13,%xmm3 + +# qhasm: xmm1 = xmm5 +# asm 1: movdqa xmm1=int6464#5 +# asm 2: movdqa xmm1=%xmm4 +movdqa %xmm2,%xmm4 + +# qhasm: xmm4 ^= xmm14 +# asm 1: pxor xmm4=int6464#3 +# asm 2: movdqa xmm4=%xmm2 +movdqa %xmm10,%xmm2 + +# qhasm: xmm5 = xmm12 +# asm 1: movdqa xmm5=int6464#4 +# asm 2: movdqa xmm5=%xmm3 +movdqa %xmm12,%xmm3 + +# qhasm: xmm6 = xmm9 +# asm 1: movdqa xmm6=int6464#6 +# asm 2: movdqa xmm6=%xmm5 +movdqa %xmm9,%xmm5 + +# qhasm: xmm7 = xmm15 +# asm 1: movdqa xmm7=int6464#8 +# asm 2: movdqa xmm7=%xmm7 +movdqa %xmm15,%xmm7 + +# qhasm: xmm4 &= xmm11 +# asm 1: pand xmm4=int6464#3 +# asm 2: movdqa xmm4=%xmm2 +movdqa %xmm0,%xmm2 + +# qhasm: xmm4 ^= xmm2 +# asm 1: pxor xmm6=int6464#4 +# asm 2: movdqa xmm6=%xmm3 +movdqa %xmm6,%xmm3 + +# qhasm: xmm6 ^= xmm3 +# asm 1: pxor xmm7=int6464#6 +# asm 2: movdqa xmm7=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: xmm7 &= xmm6 +# asm 1: pand xmm5=int6464#8 +# asm 2: movdqa xmm5=%xmm7 +movdqa %xmm4,%xmm7 + +# qhasm: xmm5 ^= xmm0 +# asm 1: pxor xmm2=int6464#1 +# asm 2: movdqa xmm2=%xmm0 +movdqa %xmm3,%xmm0 + +# qhasm: xmm2 ^= xmm5 +# asm 1: pxor xmm4=int6464#1 +# asm 2: movdqa xmm4=%xmm0 +movdqa %xmm14,%xmm0 + +# qhasm: xmm0 = xmm13 +# asm 1: movdqa xmm0=int6464#2 +# asm 2: movdqa xmm0=%xmm1 +movdqa %xmm13,%xmm1 + +# qhasm: xmm2 = xmm7 +# asm 1: movdqa xmm2=int6464#3 +# asm 2: movdqa xmm2=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: xmm2 ^= xmm6 +# asm 1: pxor xmm3=int6464#3 +# asm 2: movdqa xmm3=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: xmm3 ^= xmm6 +# asm 1: pxor xmm2=int6464#3 +# asm 2: movdqa xmm2=%xmm2 +movdqa %xmm7,%xmm2 + +# qhasm: xmm2 ^= xmm1 +# asm 1: pxor xmm4=int6464#1 +# asm 2: movdqa xmm4=%xmm0 +movdqa %xmm15,%xmm0 + +# qhasm: xmm0 = xmm9 +# asm 1: movdqa xmm0=int6464#2 +# asm 2: movdqa xmm0=%xmm1 +movdqa %xmm9,%xmm1 + +# qhasm: xmm4 ^= xmm12 +# asm 1: pxor xmm3=int6464#3 +# asm 2: movdqa xmm3=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: xmm3 ^= xmm6 +# asm 1: pxor xmm2=int6464#3 +# asm 2: movdqa xmm2=%xmm2 +movdqa %xmm7,%xmm2 + +# qhasm: xmm2 ^= xmm1 +# asm 1: pxor xmm3=int6464#3 +# asm 2: movdqa xmm3=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: xmm3 ^= xmm6 +# asm 1: pxor xmm0=int6464#1 +# asm 2: pshufd $0x93,xmm0=%xmm0 +pshufd $0x93,%xmm8,%xmm0 + +# qhasm: xmm1 = shuffle dwords of xmm9 by 0x93 +# asm 1: pshufd $0x93,xmm1=int6464#2 +# asm 2: pshufd $0x93,xmm1=%xmm1 +pshufd $0x93,%xmm9,%xmm1 + +# qhasm: xmm2 = shuffle dwords of xmm12 by 0x93 +# asm 1: pshufd $0x93,xmm2=int6464#3 +# asm 2: pshufd $0x93,xmm2=%xmm2 +pshufd $0x93,%xmm12,%xmm2 + +# qhasm: xmm3 = shuffle dwords of xmm14 by 0x93 +# asm 1: pshufd $0x93,xmm3=int6464#4 +# asm 2: pshufd $0x93,xmm3=%xmm3 +pshufd $0x93,%xmm14,%xmm3 + +# qhasm: xmm4 = shuffle dwords of xmm11 by 0x93 +# asm 1: pshufd $0x93,xmm4=int6464#5 +# asm 2: pshufd $0x93,xmm4=%xmm4 +pshufd $0x93,%xmm11,%xmm4 + +# qhasm: xmm5 = shuffle dwords of xmm15 by 0x93 +# asm 1: pshufd $0x93,xmm5=int6464#6 +# asm 2: pshufd $0x93,xmm5=%xmm5 +pshufd $0x93,%xmm15,%xmm5 + +# qhasm: xmm6 = shuffle dwords of xmm10 by 0x93 +# asm 1: pshufd $0x93,xmm6=int6464#7 +# asm 2: pshufd $0x93,xmm6=%xmm6 +pshufd $0x93,%xmm10,%xmm6 + +# qhasm: xmm7 = shuffle dwords of xmm13 by 0x93 +# asm 1: pshufd $0x93,xmm7=int6464#8 +# asm 2: pshufd $0x93,xmm7=%xmm7 +pshufd $0x93,%xmm13,%xmm7 + +# qhasm: xmm8 ^= xmm0 +# asm 1: pxor xmm8=int6464#9 +# asm 2: pshufd $0x4E,xmm8=%xmm8 +pshufd $0x4E,%xmm8,%xmm8 + +# qhasm: xmm9 = shuffle dwords of xmm9 by 0x4E +# asm 1: pshufd $0x4E,xmm9=int6464#10 +# asm 2: pshufd $0x4E,xmm9=%xmm9 +pshufd $0x4E,%xmm9,%xmm9 + +# qhasm: xmm12 = shuffle dwords of xmm12 by 0x4E +# asm 1: pshufd $0x4E,xmm12=int6464#13 +# asm 2: pshufd $0x4E,xmm12=%xmm12 +pshufd $0x4E,%xmm12,%xmm12 + +# qhasm: xmm14 = shuffle dwords of xmm14 by 0x4E +# asm 1: pshufd $0x4E,xmm14=int6464#15 +# asm 2: pshufd $0x4E,xmm14=%xmm14 +pshufd $0x4E,%xmm14,%xmm14 + +# qhasm: xmm11 = shuffle dwords of xmm11 by 0x4E +# asm 1: pshufd $0x4E,xmm11=int6464#12 +# asm 2: pshufd $0x4E,xmm11=%xmm11 +pshufd $0x4E,%xmm11,%xmm11 + +# qhasm: xmm15 = shuffle dwords of xmm15 by 0x4E +# asm 1: pshufd $0x4E,xmm15=int6464#16 +# asm 2: pshufd $0x4E,xmm15=%xmm15 +pshufd $0x4E,%xmm15,%xmm15 + +# qhasm: xmm10 = shuffle dwords of xmm10 by 0x4E +# asm 1: pshufd $0x4E,xmm10=int6464#11 +# asm 2: pshufd $0x4E,xmm10=%xmm10 +pshufd $0x4E,%xmm10,%xmm10 + +# qhasm: xmm13 = shuffle dwords of xmm13 by 0x4E +# asm 1: pshufd $0x4E,xmm13=int6464#14 +# asm 2: pshufd $0x4E,xmm13=%xmm13 +pshufd $0x4E,%xmm13,%xmm13 + +# qhasm: xmm0 ^= xmm8 +# asm 1: pxor xmm11=int6464#9 +# asm 2: movdqa xmm11=%xmm8 +movdqa %xmm7,%xmm8 + +# qhasm: xmm10 = xmm1 +# asm 1: movdqa xmm10=int6464#10 +# asm 2: movdqa xmm10=%xmm9 +movdqa %xmm1,%xmm9 + +# qhasm: xmm9 = xmm5 +# asm 1: movdqa xmm9=int6464#11 +# asm 2: movdqa xmm9=%xmm10 +movdqa %xmm5,%xmm10 + +# qhasm: xmm13 = xmm2 +# asm 1: movdqa xmm13=int6464#12 +# asm 2: movdqa xmm13=%xmm11 +movdqa %xmm2,%xmm11 + +# qhasm: xmm12 = xmm6 +# asm 1: movdqa xmm12=int6464#13 +# asm 2: movdqa xmm12=%xmm12 +movdqa %xmm6,%xmm12 + +# qhasm: xmm11 ^= xmm4 +# asm 1: pxor xmm14=int6464#14 +# asm 2: movdqa xmm14=%xmm13 +movdqa %xmm8,%xmm13 + +# qhasm: xmm8 = xmm10 +# asm 1: movdqa xmm8=int6464#15 +# asm 2: movdqa xmm8=%xmm14 +movdqa %xmm9,%xmm14 + +# qhasm: xmm15 = xmm11 +# asm 1: movdqa xmm15=int6464#16 +# asm 2: movdqa xmm15=%xmm15 +movdqa %xmm8,%xmm15 + +# qhasm: xmm10 |= xmm9 +# asm 1: por xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm3,%xmm10 + +# qhasm: xmm12 ^= xmm0 +# asm 1: pxor xmm13=int6464#11 +# asm 2: movdqa xmm13=%xmm10 +movdqa %xmm7,%xmm10 + +# qhasm: xmm13 ^= xmm1 +# asm 1: pxor xmm12=int6464#12 +# asm 2: movdqa xmm12=%xmm11 +movdqa %xmm5,%xmm11 + +# qhasm: xmm9 = xmm13 +# asm 1: movdqa xmm9=int6464#13 +# asm 2: movdqa xmm9=%xmm12 +movdqa %xmm10,%xmm12 + +# qhasm: xmm12 ^= xmm6 +# asm 1: pxor xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm2,%xmm10 + +# qhasm: xmm13 = xmm4 +# asm 1: movdqa xmm13=int6464#12 +# asm 2: movdqa xmm13=%xmm11 +movdqa %xmm4,%xmm11 + +# qhasm: xmm14 = xmm1 +# asm 1: movdqa xmm14=int6464#14 +# asm 2: movdqa xmm14=%xmm13 +movdqa %xmm1,%xmm13 + +# qhasm: xmm15 = xmm7 +# asm 1: movdqa xmm15=int6464#16 +# asm 2: movdqa xmm15=%xmm15 +movdqa %xmm7,%xmm15 + +# qhasm: xmm12 &= xmm3 +# asm 1: pand xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm8,%xmm10 + +# qhasm: xmm12 ^= xmm10 +# asm 1: pxor xmm14=int6464#12 +# asm 2: movdqa xmm14=%xmm11 +movdqa %xmm14,%xmm11 + +# qhasm: xmm14 ^= xmm11 +# asm 1: pxor xmm15=int6464#14 +# asm 2: movdqa xmm15=%xmm13 +movdqa %xmm10,%xmm13 + +# qhasm: xmm15 &= xmm14 +# asm 1: pand xmm13=int6464#16 +# asm 2: movdqa xmm13=%xmm15 +movdqa %xmm12,%xmm15 + +# qhasm: xmm13 ^= xmm8 +# asm 1: pxor xmm10=int6464#9 +# asm 2: movdqa xmm10=%xmm8 +movdqa %xmm11,%xmm8 + +# qhasm: xmm10 ^= xmm13 +# asm 1: pxor xmm12=int6464#9 +# asm 2: movdqa xmm12=%xmm8 +movdqa %xmm6,%xmm8 + +# qhasm: xmm8 = xmm5 +# asm 1: movdqa xmm8=int6464#10 +# asm 2: movdqa xmm8=%xmm9 +movdqa %xmm5,%xmm9 + +# qhasm: xmm10 = xmm15 +# asm 1: movdqa xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm10 ^= xmm14 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm15,%xmm10 + +# qhasm: xmm10 ^= xmm9 +# asm 1: pxor xmm12=int6464#9 +# asm 2: movdqa xmm12=%xmm8 +movdqa %xmm7,%xmm8 + +# qhasm: xmm8 = xmm1 +# asm 1: movdqa xmm8=int6464#10 +# asm 2: movdqa xmm8=%xmm9 +movdqa %xmm1,%xmm9 + +# qhasm: xmm12 ^= xmm4 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm15,%xmm10 + +# qhasm: xmm10 ^= xmm9 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm8=int6464#9 +# asm 2: pshufd $0x93,xmm8=%xmm8 +pshufd $0x93,%xmm0,%xmm8 + +# qhasm: xmm9 = shuffle dwords of xmm1 by 0x93 +# asm 1: pshufd $0x93,xmm9=int6464#10 +# asm 2: pshufd $0x93,xmm9=%xmm9 +pshufd $0x93,%xmm1,%xmm9 + +# qhasm: xmm10 = shuffle dwords of xmm4 by 0x93 +# asm 1: pshufd $0x93,xmm10=int6464#11 +# asm 2: pshufd $0x93,xmm10=%xmm10 +pshufd $0x93,%xmm4,%xmm10 + +# qhasm: xmm11 = shuffle dwords of xmm6 by 0x93 +# asm 1: pshufd $0x93,xmm11=int6464#12 +# asm 2: pshufd $0x93,xmm11=%xmm11 +pshufd $0x93,%xmm6,%xmm11 + +# qhasm: xmm12 = shuffle dwords of xmm3 by 0x93 +# asm 1: pshufd $0x93,xmm12=int6464#13 +# asm 2: pshufd $0x93,xmm12=%xmm12 +pshufd $0x93,%xmm3,%xmm12 + +# qhasm: xmm13 = shuffle dwords of xmm7 by 0x93 +# asm 1: pshufd $0x93,xmm13=int6464#14 +# asm 2: pshufd $0x93,xmm13=%xmm13 +pshufd $0x93,%xmm7,%xmm13 + +# qhasm: xmm14 = shuffle dwords of xmm2 by 0x93 +# asm 1: pshufd $0x93,xmm14=int6464#15 +# asm 2: pshufd $0x93,xmm14=%xmm14 +pshufd $0x93,%xmm2,%xmm14 + +# qhasm: xmm15 = shuffle dwords of xmm5 by 0x93 +# asm 1: pshufd $0x93,xmm15=int6464#16 +# asm 2: pshufd $0x93,xmm15=%xmm15 +pshufd $0x93,%xmm5,%xmm15 + +# qhasm: xmm0 ^= xmm8 +# asm 1: pxor xmm0=int6464#1 +# asm 2: pshufd $0x4E,xmm0=%xmm0 +pshufd $0x4E,%xmm0,%xmm0 + +# qhasm: xmm1 = shuffle dwords of xmm1 by 0x4E +# asm 1: pshufd $0x4E,xmm1=int6464#2 +# asm 2: pshufd $0x4E,xmm1=%xmm1 +pshufd $0x4E,%xmm1,%xmm1 + +# qhasm: xmm4 = shuffle dwords of xmm4 by 0x4E +# asm 1: pshufd $0x4E,xmm4=int6464#5 +# asm 2: pshufd $0x4E,xmm4=%xmm4 +pshufd $0x4E,%xmm4,%xmm4 + +# qhasm: xmm6 = shuffle dwords of xmm6 by 0x4E +# asm 1: pshufd $0x4E,xmm6=int6464#7 +# asm 2: pshufd $0x4E,xmm6=%xmm6 +pshufd $0x4E,%xmm6,%xmm6 + +# qhasm: xmm3 = shuffle dwords of xmm3 by 0x4E +# asm 1: pshufd $0x4E,xmm3=int6464#4 +# asm 2: pshufd $0x4E,xmm3=%xmm3 +pshufd $0x4E,%xmm3,%xmm3 + +# qhasm: xmm7 = shuffle dwords of xmm7 by 0x4E +# asm 1: pshufd $0x4E,xmm7=int6464#8 +# asm 2: pshufd $0x4E,xmm7=%xmm7 +pshufd $0x4E,%xmm7,%xmm7 + +# qhasm: xmm2 = shuffle dwords of xmm2 by 0x4E +# asm 1: pshufd $0x4E,xmm2=int6464#3 +# asm 2: pshufd $0x4E,xmm2=%xmm2 +pshufd $0x4E,%xmm2,%xmm2 + +# qhasm: xmm5 = shuffle dwords of xmm5 by 0x4E +# asm 1: pshufd $0x4E,xmm5=int6464#6 +# asm 2: pshufd $0x4E,xmm5=%xmm5 +pshufd $0x4E,%xmm5,%xmm5 + +# qhasm: xmm8 ^= xmm0 +# asm 1: pxor xmm3=int6464#1 +# asm 2: movdqa xmm3=%xmm0 +movdqa %xmm15,%xmm0 + +# qhasm: xmm2 = xmm9 +# asm 1: movdqa xmm2=int6464#2 +# asm 2: movdqa xmm2=%xmm1 +movdqa %xmm9,%xmm1 + +# qhasm: xmm1 = xmm13 +# asm 1: movdqa xmm1=int6464#3 +# asm 2: movdqa xmm1=%xmm2 +movdqa %xmm13,%xmm2 + +# qhasm: xmm5 = xmm10 +# asm 1: movdqa xmm5=int6464#4 +# asm 2: movdqa xmm5=%xmm3 +movdqa %xmm10,%xmm3 + +# qhasm: xmm4 = xmm14 +# asm 1: movdqa xmm4=int6464#5 +# asm 2: movdqa xmm4=%xmm4 +movdqa %xmm14,%xmm4 + +# qhasm: xmm3 ^= xmm12 +# asm 1: pxor xmm6=int6464#6 +# asm 2: movdqa xmm6=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: xmm0 = xmm2 +# asm 1: movdqa xmm0=int6464#7 +# asm 2: movdqa xmm0=%xmm6 +movdqa %xmm1,%xmm6 + +# qhasm: xmm7 = xmm3 +# asm 1: movdqa xmm7=int6464#8 +# asm 2: movdqa xmm7=%xmm7 +movdqa %xmm0,%xmm7 + +# qhasm: xmm2 |= xmm1 +# asm 1: por xmm4=int6464#3 +# asm 2: movdqa xmm4=%xmm2 +movdqa %xmm11,%xmm2 + +# qhasm: xmm4 ^= xmm8 +# asm 1: pxor xmm5=int6464#3 +# asm 2: movdqa xmm5=%xmm2 +movdqa %xmm15,%xmm2 + +# qhasm: xmm5 ^= xmm9 +# asm 1: pxor xmm4=int6464#4 +# asm 2: movdqa xmm4=%xmm3 +movdqa %xmm13,%xmm3 + +# qhasm: xmm1 = xmm5 +# asm 1: movdqa xmm1=int6464#5 +# asm 2: movdqa xmm1=%xmm4 +movdqa %xmm2,%xmm4 + +# qhasm: xmm4 ^= xmm14 +# asm 1: pxor xmm4=int6464#3 +# asm 2: movdqa xmm4=%xmm2 +movdqa %xmm10,%xmm2 + +# qhasm: xmm5 = xmm12 +# asm 1: movdqa xmm5=int6464#4 +# asm 2: movdqa xmm5=%xmm3 +movdqa %xmm12,%xmm3 + +# qhasm: xmm6 = xmm9 +# asm 1: movdqa xmm6=int6464#6 +# asm 2: movdqa xmm6=%xmm5 +movdqa %xmm9,%xmm5 + +# qhasm: xmm7 = xmm15 +# asm 1: movdqa xmm7=int6464#8 +# asm 2: movdqa xmm7=%xmm7 +movdqa %xmm15,%xmm7 + +# qhasm: xmm4 &= xmm11 +# asm 1: pand xmm4=int6464#3 +# asm 2: movdqa xmm4=%xmm2 +movdqa %xmm0,%xmm2 + +# qhasm: xmm4 ^= xmm2 +# asm 1: pxor xmm6=int6464#4 +# asm 2: movdqa xmm6=%xmm3 +movdqa %xmm6,%xmm3 + +# qhasm: xmm6 ^= xmm3 +# asm 1: pxor xmm7=int6464#6 +# asm 2: movdqa xmm7=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: xmm7 &= xmm6 +# asm 1: pand xmm5=int6464#8 +# asm 2: movdqa xmm5=%xmm7 +movdqa %xmm4,%xmm7 + +# qhasm: xmm5 ^= xmm0 +# asm 1: pxor xmm2=int6464#1 +# asm 2: movdqa xmm2=%xmm0 +movdqa %xmm3,%xmm0 + +# qhasm: xmm2 ^= xmm5 +# asm 1: pxor xmm4=int6464#1 +# asm 2: movdqa xmm4=%xmm0 +movdqa %xmm14,%xmm0 + +# qhasm: xmm0 = xmm13 +# asm 1: movdqa xmm0=int6464#2 +# asm 2: movdqa xmm0=%xmm1 +movdqa %xmm13,%xmm1 + +# qhasm: xmm2 = xmm7 +# asm 1: movdqa xmm2=int6464#3 +# asm 2: movdqa xmm2=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: xmm2 ^= xmm6 +# asm 1: pxor xmm3=int6464#3 +# asm 2: movdqa xmm3=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: xmm3 ^= xmm6 +# asm 1: pxor xmm2=int6464#3 +# asm 2: movdqa xmm2=%xmm2 +movdqa %xmm7,%xmm2 + +# qhasm: xmm2 ^= xmm1 +# asm 1: pxor xmm4=int6464#1 +# asm 2: movdqa xmm4=%xmm0 +movdqa %xmm15,%xmm0 + +# qhasm: xmm0 = xmm9 +# asm 1: movdqa xmm0=int6464#2 +# asm 2: movdqa xmm0=%xmm1 +movdqa %xmm9,%xmm1 + +# qhasm: xmm4 ^= xmm12 +# asm 1: pxor xmm3=int6464#3 +# asm 2: movdqa xmm3=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: xmm3 ^= xmm6 +# asm 1: pxor xmm2=int6464#3 +# asm 2: movdqa xmm2=%xmm2 +movdqa %xmm7,%xmm2 + +# qhasm: xmm2 ^= xmm1 +# asm 1: pxor xmm3=int6464#3 +# asm 2: movdqa xmm3=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: xmm3 ^= xmm6 +# asm 1: pxor xmm0=int6464#1 +# asm 2: pshufd $0x93,xmm0=%xmm0 +pshufd $0x93,%xmm8,%xmm0 + +# qhasm: xmm1 = shuffle dwords of xmm9 by 0x93 +# asm 1: pshufd $0x93,xmm1=int6464#2 +# asm 2: pshufd $0x93,xmm1=%xmm1 +pshufd $0x93,%xmm9,%xmm1 + +# qhasm: xmm2 = shuffle dwords of xmm12 by 0x93 +# asm 1: pshufd $0x93,xmm2=int6464#3 +# asm 2: pshufd $0x93,xmm2=%xmm2 +pshufd $0x93,%xmm12,%xmm2 + +# qhasm: xmm3 = shuffle dwords of xmm14 by 0x93 +# asm 1: pshufd $0x93,xmm3=int6464#4 +# asm 2: pshufd $0x93,xmm3=%xmm3 +pshufd $0x93,%xmm14,%xmm3 + +# qhasm: xmm4 = shuffle dwords of xmm11 by 0x93 +# asm 1: pshufd $0x93,xmm4=int6464#5 +# asm 2: pshufd $0x93,xmm4=%xmm4 +pshufd $0x93,%xmm11,%xmm4 + +# qhasm: xmm5 = shuffle dwords of xmm15 by 0x93 +# asm 1: pshufd $0x93,xmm5=int6464#6 +# asm 2: pshufd $0x93,xmm5=%xmm5 +pshufd $0x93,%xmm15,%xmm5 + +# qhasm: xmm6 = shuffle dwords of xmm10 by 0x93 +# asm 1: pshufd $0x93,xmm6=int6464#7 +# asm 2: pshufd $0x93,xmm6=%xmm6 +pshufd $0x93,%xmm10,%xmm6 + +# qhasm: xmm7 = shuffle dwords of xmm13 by 0x93 +# asm 1: pshufd $0x93,xmm7=int6464#8 +# asm 2: pshufd $0x93,xmm7=%xmm7 +pshufd $0x93,%xmm13,%xmm7 + +# qhasm: xmm8 ^= xmm0 +# asm 1: pxor xmm8=int6464#9 +# asm 2: pshufd $0x4E,xmm8=%xmm8 +pshufd $0x4E,%xmm8,%xmm8 + +# qhasm: xmm9 = shuffle dwords of xmm9 by 0x4E +# asm 1: pshufd $0x4E,xmm9=int6464#10 +# asm 2: pshufd $0x4E,xmm9=%xmm9 +pshufd $0x4E,%xmm9,%xmm9 + +# qhasm: xmm12 = shuffle dwords of xmm12 by 0x4E +# asm 1: pshufd $0x4E,xmm12=int6464#13 +# asm 2: pshufd $0x4E,xmm12=%xmm12 +pshufd $0x4E,%xmm12,%xmm12 + +# qhasm: xmm14 = shuffle dwords of xmm14 by 0x4E +# asm 1: pshufd $0x4E,xmm14=int6464#15 +# asm 2: pshufd $0x4E,xmm14=%xmm14 +pshufd $0x4E,%xmm14,%xmm14 + +# qhasm: xmm11 = shuffle dwords of xmm11 by 0x4E +# asm 1: pshufd $0x4E,xmm11=int6464#12 +# asm 2: pshufd $0x4E,xmm11=%xmm11 +pshufd $0x4E,%xmm11,%xmm11 + +# qhasm: xmm15 = shuffle dwords of xmm15 by 0x4E +# asm 1: pshufd $0x4E,xmm15=int6464#16 +# asm 2: pshufd $0x4E,xmm15=%xmm15 +pshufd $0x4E,%xmm15,%xmm15 + +# qhasm: xmm10 = shuffle dwords of xmm10 by 0x4E +# asm 1: pshufd $0x4E,xmm10=int6464#11 +# asm 2: pshufd $0x4E,xmm10=%xmm10 +pshufd $0x4E,%xmm10,%xmm10 + +# qhasm: xmm13 = shuffle dwords of xmm13 by 0x4E +# asm 1: pshufd $0x4E,xmm13=int6464#14 +# asm 2: pshufd $0x4E,xmm13=%xmm13 +pshufd $0x4E,%xmm13,%xmm13 + +# qhasm: xmm0 ^= xmm8 +# asm 1: pxor xmm11=int6464#9 +# asm 2: movdqa xmm11=%xmm8 +movdqa %xmm7,%xmm8 + +# qhasm: xmm10 = xmm1 +# asm 1: movdqa xmm10=int6464#10 +# asm 2: movdqa xmm10=%xmm9 +movdqa %xmm1,%xmm9 + +# qhasm: xmm9 = xmm5 +# asm 1: movdqa xmm9=int6464#11 +# asm 2: movdqa xmm9=%xmm10 +movdqa %xmm5,%xmm10 + +# qhasm: xmm13 = xmm2 +# asm 1: movdqa xmm13=int6464#12 +# asm 2: movdqa xmm13=%xmm11 +movdqa %xmm2,%xmm11 + +# qhasm: xmm12 = xmm6 +# asm 1: movdqa xmm12=int6464#13 +# asm 2: movdqa xmm12=%xmm12 +movdqa %xmm6,%xmm12 + +# qhasm: xmm11 ^= xmm4 +# asm 1: pxor xmm14=int6464#14 +# asm 2: movdqa xmm14=%xmm13 +movdqa %xmm8,%xmm13 + +# qhasm: xmm8 = xmm10 +# asm 1: movdqa xmm8=int6464#15 +# asm 2: movdqa xmm8=%xmm14 +movdqa %xmm9,%xmm14 + +# qhasm: xmm15 = xmm11 +# asm 1: movdqa xmm15=int6464#16 +# asm 2: movdqa xmm15=%xmm15 +movdqa %xmm8,%xmm15 + +# qhasm: xmm10 |= xmm9 +# asm 1: por xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm3,%xmm10 + +# qhasm: xmm12 ^= xmm0 +# asm 1: pxor xmm13=int6464#11 +# asm 2: movdqa xmm13=%xmm10 +movdqa %xmm7,%xmm10 + +# qhasm: xmm13 ^= xmm1 +# asm 1: pxor xmm12=int6464#12 +# asm 2: movdqa xmm12=%xmm11 +movdqa %xmm5,%xmm11 + +# qhasm: xmm9 = xmm13 +# asm 1: movdqa xmm9=int6464#13 +# asm 2: movdqa xmm9=%xmm12 +movdqa %xmm10,%xmm12 + +# qhasm: xmm12 ^= xmm6 +# asm 1: pxor xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm2,%xmm10 + +# qhasm: xmm13 = xmm4 +# asm 1: movdqa xmm13=int6464#12 +# asm 2: movdqa xmm13=%xmm11 +movdqa %xmm4,%xmm11 + +# qhasm: xmm14 = xmm1 +# asm 1: movdqa xmm14=int6464#14 +# asm 2: movdqa xmm14=%xmm13 +movdqa %xmm1,%xmm13 + +# qhasm: xmm15 = xmm7 +# asm 1: movdqa xmm15=int6464#16 +# asm 2: movdqa xmm15=%xmm15 +movdqa %xmm7,%xmm15 + +# qhasm: xmm12 &= xmm3 +# asm 1: pand xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm8,%xmm10 + +# qhasm: xmm12 ^= xmm10 +# asm 1: pxor xmm14=int6464#12 +# asm 2: movdqa xmm14=%xmm11 +movdqa %xmm14,%xmm11 + +# qhasm: xmm14 ^= xmm11 +# asm 1: pxor xmm15=int6464#14 +# asm 2: movdqa xmm15=%xmm13 +movdqa %xmm10,%xmm13 + +# qhasm: xmm15 &= xmm14 +# asm 1: pand xmm13=int6464#16 +# asm 2: movdqa xmm13=%xmm15 +movdqa %xmm12,%xmm15 + +# qhasm: xmm13 ^= xmm8 +# asm 1: pxor xmm10=int6464#9 +# asm 2: movdqa xmm10=%xmm8 +movdqa %xmm11,%xmm8 + +# qhasm: xmm10 ^= xmm13 +# asm 1: pxor xmm12=int6464#9 +# asm 2: movdqa xmm12=%xmm8 +movdqa %xmm6,%xmm8 + +# qhasm: xmm8 = xmm5 +# asm 1: movdqa xmm8=int6464#10 +# asm 2: movdqa xmm8=%xmm9 +movdqa %xmm5,%xmm9 + +# qhasm: xmm10 = xmm15 +# asm 1: movdqa xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm10 ^= xmm14 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm15,%xmm10 + +# qhasm: xmm10 ^= xmm9 +# asm 1: pxor xmm12=int6464#9 +# asm 2: movdqa xmm12=%xmm8 +movdqa %xmm7,%xmm8 + +# qhasm: xmm8 = xmm1 +# asm 1: movdqa xmm8=int6464#10 +# asm 2: movdqa xmm8=%xmm9 +movdqa %xmm1,%xmm9 + +# qhasm: xmm12 ^= xmm4 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm15,%xmm10 + +# qhasm: xmm10 ^= xmm9 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm8=int6464#9 +# asm 2: pshufd $0x93,xmm8=%xmm8 +pshufd $0x93,%xmm0,%xmm8 + +# qhasm: xmm9 = shuffle dwords of xmm1 by 0x93 +# asm 1: pshufd $0x93,xmm9=int6464#10 +# asm 2: pshufd $0x93,xmm9=%xmm9 +pshufd $0x93,%xmm1,%xmm9 + +# qhasm: xmm10 = shuffle dwords of xmm4 by 0x93 +# asm 1: pshufd $0x93,xmm10=int6464#11 +# asm 2: pshufd $0x93,xmm10=%xmm10 +pshufd $0x93,%xmm4,%xmm10 + +# qhasm: xmm11 = shuffle dwords of xmm6 by 0x93 +# asm 1: pshufd $0x93,xmm11=int6464#12 +# asm 2: pshufd $0x93,xmm11=%xmm11 +pshufd $0x93,%xmm6,%xmm11 + +# qhasm: xmm12 = shuffle dwords of xmm3 by 0x93 +# asm 1: pshufd $0x93,xmm12=int6464#13 +# asm 2: pshufd $0x93,xmm12=%xmm12 +pshufd $0x93,%xmm3,%xmm12 + +# qhasm: xmm13 = shuffle dwords of xmm7 by 0x93 +# asm 1: pshufd $0x93,xmm13=int6464#14 +# asm 2: pshufd $0x93,xmm13=%xmm13 +pshufd $0x93,%xmm7,%xmm13 + +# qhasm: xmm14 = shuffle dwords of xmm2 by 0x93 +# asm 1: pshufd $0x93,xmm14=int6464#15 +# asm 2: pshufd $0x93,xmm14=%xmm14 +pshufd $0x93,%xmm2,%xmm14 + +# qhasm: xmm15 = shuffle dwords of xmm5 by 0x93 +# asm 1: pshufd $0x93,xmm15=int6464#16 +# asm 2: pshufd $0x93,xmm15=%xmm15 +pshufd $0x93,%xmm5,%xmm15 + +# qhasm: xmm0 ^= xmm8 +# asm 1: pxor xmm0=int6464#1 +# asm 2: pshufd $0x4E,xmm0=%xmm0 +pshufd $0x4E,%xmm0,%xmm0 + +# qhasm: xmm1 = shuffle dwords of xmm1 by 0x4E +# asm 1: pshufd $0x4E,xmm1=int6464#2 +# asm 2: pshufd $0x4E,xmm1=%xmm1 +pshufd $0x4E,%xmm1,%xmm1 + +# qhasm: xmm4 = shuffle dwords of xmm4 by 0x4E +# asm 1: pshufd $0x4E,xmm4=int6464#5 +# asm 2: pshufd $0x4E,xmm4=%xmm4 +pshufd $0x4E,%xmm4,%xmm4 + +# qhasm: xmm6 = shuffle dwords of xmm6 by 0x4E +# asm 1: pshufd $0x4E,xmm6=int6464#7 +# asm 2: pshufd $0x4E,xmm6=%xmm6 +pshufd $0x4E,%xmm6,%xmm6 + +# qhasm: xmm3 = shuffle dwords of xmm3 by 0x4E +# asm 1: pshufd $0x4E,xmm3=int6464#4 +# asm 2: pshufd $0x4E,xmm3=%xmm3 +pshufd $0x4E,%xmm3,%xmm3 + +# qhasm: xmm7 = shuffle dwords of xmm7 by 0x4E +# asm 1: pshufd $0x4E,xmm7=int6464#8 +# asm 2: pshufd $0x4E,xmm7=%xmm7 +pshufd $0x4E,%xmm7,%xmm7 + +# qhasm: xmm2 = shuffle dwords of xmm2 by 0x4E +# asm 1: pshufd $0x4E,xmm2=int6464#3 +# asm 2: pshufd $0x4E,xmm2=%xmm2 +pshufd $0x4E,%xmm2,%xmm2 + +# qhasm: xmm5 = shuffle dwords of xmm5 by 0x4E +# asm 1: pshufd $0x4E,xmm5=int6464#6 +# asm 2: pshufd $0x4E,xmm5=%xmm5 +pshufd $0x4E,%xmm5,%xmm5 + +# qhasm: xmm8 ^= xmm0 +# asm 1: pxor xmm3=int6464#1 +# asm 2: movdqa xmm3=%xmm0 +movdqa %xmm15,%xmm0 + +# qhasm: xmm2 = xmm9 +# asm 1: movdqa xmm2=int6464#2 +# asm 2: movdqa xmm2=%xmm1 +movdqa %xmm9,%xmm1 + +# qhasm: xmm1 = xmm13 +# asm 1: movdqa xmm1=int6464#3 +# asm 2: movdqa xmm1=%xmm2 +movdqa %xmm13,%xmm2 + +# qhasm: xmm5 = xmm10 +# asm 1: movdqa xmm5=int6464#4 +# asm 2: movdqa xmm5=%xmm3 +movdqa %xmm10,%xmm3 + +# qhasm: xmm4 = xmm14 +# asm 1: movdqa xmm4=int6464#5 +# asm 2: movdqa xmm4=%xmm4 +movdqa %xmm14,%xmm4 + +# qhasm: xmm3 ^= xmm12 +# asm 1: pxor xmm6=int6464#6 +# asm 2: movdqa xmm6=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: xmm0 = xmm2 +# asm 1: movdqa xmm0=int6464#7 +# asm 2: movdqa xmm0=%xmm6 +movdqa %xmm1,%xmm6 + +# qhasm: xmm7 = xmm3 +# asm 1: movdqa xmm7=int6464#8 +# asm 2: movdqa xmm7=%xmm7 +movdqa %xmm0,%xmm7 + +# qhasm: xmm2 |= xmm1 +# asm 1: por xmm4=int6464#3 +# asm 2: movdqa xmm4=%xmm2 +movdqa %xmm11,%xmm2 + +# qhasm: xmm4 ^= xmm8 +# asm 1: pxor xmm5=int6464#3 +# asm 2: movdqa xmm5=%xmm2 +movdqa %xmm15,%xmm2 + +# qhasm: xmm5 ^= xmm9 +# asm 1: pxor xmm4=int6464#4 +# asm 2: movdqa xmm4=%xmm3 +movdqa %xmm13,%xmm3 + +# qhasm: xmm1 = xmm5 +# asm 1: movdqa xmm1=int6464#5 +# asm 2: movdqa xmm1=%xmm4 +movdqa %xmm2,%xmm4 + +# qhasm: xmm4 ^= xmm14 +# asm 1: pxor xmm4=int6464#3 +# asm 2: movdqa xmm4=%xmm2 +movdqa %xmm10,%xmm2 + +# qhasm: xmm5 = xmm12 +# asm 1: movdqa xmm5=int6464#4 +# asm 2: movdqa xmm5=%xmm3 +movdqa %xmm12,%xmm3 + +# qhasm: xmm6 = xmm9 +# asm 1: movdqa xmm6=int6464#6 +# asm 2: movdqa xmm6=%xmm5 +movdqa %xmm9,%xmm5 + +# qhasm: xmm7 = xmm15 +# asm 1: movdqa xmm7=int6464#8 +# asm 2: movdqa xmm7=%xmm7 +movdqa %xmm15,%xmm7 + +# qhasm: xmm4 &= xmm11 +# asm 1: pand xmm4=int6464#3 +# asm 2: movdqa xmm4=%xmm2 +movdqa %xmm0,%xmm2 + +# qhasm: xmm4 ^= xmm2 +# asm 1: pxor xmm6=int6464#4 +# asm 2: movdqa xmm6=%xmm3 +movdqa %xmm6,%xmm3 + +# qhasm: xmm6 ^= xmm3 +# asm 1: pxor xmm7=int6464#6 +# asm 2: movdqa xmm7=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: xmm7 &= xmm6 +# asm 1: pand xmm5=int6464#8 +# asm 2: movdqa xmm5=%xmm7 +movdqa %xmm4,%xmm7 + +# qhasm: xmm5 ^= xmm0 +# asm 1: pxor xmm2=int6464#1 +# asm 2: movdqa xmm2=%xmm0 +movdqa %xmm3,%xmm0 + +# qhasm: xmm2 ^= xmm5 +# asm 1: pxor xmm4=int6464#1 +# asm 2: movdqa xmm4=%xmm0 +movdqa %xmm14,%xmm0 + +# qhasm: xmm0 = xmm13 +# asm 1: movdqa xmm0=int6464#2 +# asm 2: movdqa xmm0=%xmm1 +movdqa %xmm13,%xmm1 + +# qhasm: xmm2 = xmm7 +# asm 1: movdqa xmm2=int6464#3 +# asm 2: movdqa xmm2=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: xmm2 ^= xmm6 +# asm 1: pxor xmm3=int6464#3 +# asm 2: movdqa xmm3=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: xmm3 ^= xmm6 +# asm 1: pxor xmm2=int6464#3 +# asm 2: movdqa xmm2=%xmm2 +movdqa %xmm7,%xmm2 + +# qhasm: xmm2 ^= xmm1 +# asm 1: pxor xmm4=int6464#1 +# asm 2: movdqa xmm4=%xmm0 +movdqa %xmm15,%xmm0 + +# qhasm: xmm0 = xmm9 +# asm 1: movdqa xmm0=int6464#2 +# asm 2: movdqa xmm0=%xmm1 +movdqa %xmm9,%xmm1 + +# qhasm: xmm4 ^= xmm12 +# asm 1: pxor xmm3=int6464#3 +# asm 2: movdqa xmm3=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: xmm3 ^= xmm6 +# asm 1: pxor xmm2=int6464#3 +# asm 2: movdqa xmm2=%xmm2 +movdqa %xmm7,%xmm2 + +# qhasm: xmm2 ^= xmm1 +# asm 1: pxor xmm3=int6464#3 +# asm 2: movdqa xmm3=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: xmm3 ^= xmm6 +# asm 1: pxor xmm0=int6464#1 +# asm 2: pshufd $0x93,xmm0=%xmm0 +pshufd $0x93,%xmm8,%xmm0 + +# qhasm: xmm1 = shuffle dwords of xmm9 by 0x93 +# asm 1: pshufd $0x93,xmm1=int6464#2 +# asm 2: pshufd $0x93,xmm1=%xmm1 +pshufd $0x93,%xmm9,%xmm1 + +# qhasm: xmm2 = shuffle dwords of xmm12 by 0x93 +# asm 1: pshufd $0x93,xmm2=int6464#3 +# asm 2: pshufd $0x93,xmm2=%xmm2 +pshufd $0x93,%xmm12,%xmm2 + +# qhasm: xmm3 = shuffle dwords of xmm14 by 0x93 +# asm 1: pshufd $0x93,xmm3=int6464#4 +# asm 2: pshufd $0x93,xmm3=%xmm3 +pshufd $0x93,%xmm14,%xmm3 + +# qhasm: xmm4 = shuffle dwords of xmm11 by 0x93 +# asm 1: pshufd $0x93,xmm4=int6464#5 +# asm 2: pshufd $0x93,xmm4=%xmm4 +pshufd $0x93,%xmm11,%xmm4 + +# qhasm: xmm5 = shuffle dwords of xmm15 by 0x93 +# asm 1: pshufd $0x93,xmm5=int6464#6 +# asm 2: pshufd $0x93,xmm5=%xmm5 +pshufd $0x93,%xmm15,%xmm5 + +# qhasm: xmm6 = shuffle dwords of xmm10 by 0x93 +# asm 1: pshufd $0x93,xmm6=int6464#7 +# asm 2: pshufd $0x93,xmm6=%xmm6 +pshufd $0x93,%xmm10,%xmm6 + +# qhasm: xmm7 = shuffle dwords of xmm13 by 0x93 +# asm 1: pshufd $0x93,xmm7=int6464#8 +# asm 2: pshufd $0x93,xmm7=%xmm7 +pshufd $0x93,%xmm13,%xmm7 + +# qhasm: xmm8 ^= xmm0 +# asm 1: pxor xmm8=int6464#9 +# asm 2: pshufd $0x4E,xmm8=%xmm8 +pshufd $0x4E,%xmm8,%xmm8 + +# qhasm: xmm9 = shuffle dwords of xmm9 by 0x4E +# asm 1: pshufd $0x4E,xmm9=int6464#10 +# asm 2: pshufd $0x4E,xmm9=%xmm9 +pshufd $0x4E,%xmm9,%xmm9 + +# qhasm: xmm12 = shuffle dwords of xmm12 by 0x4E +# asm 1: pshufd $0x4E,xmm12=int6464#13 +# asm 2: pshufd $0x4E,xmm12=%xmm12 +pshufd $0x4E,%xmm12,%xmm12 + +# qhasm: xmm14 = shuffle dwords of xmm14 by 0x4E +# asm 1: pshufd $0x4E,xmm14=int6464#15 +# asm 2: pshufd $0x4E,xmm14=%xmm14 +pshufd $0x4E,%xmm14,%xmm14 + +# qhasm: xmm11 = shuffle dwords of xmm11 by 0x4E +# asm 1: pshufd $0x4E,xmm11=int6464#12 +# asm 2: pshufd $0x4E,xmm11=%xmm11 +pshufd $0x4E,%xmm11,%xmm11 + +# qhasm: xmm15 = shuffle dwords of xmm15 by 0x4E +# asm 1: pshufd $0x4E,xmm15=int6464#16 +# asm 2: pshufd $0x4E,xmm15=%xmm15 +pshufd $0x4E,%xmm15,%xmm15 + +# qhasm: xmm10 = shuffle dwords of xmm10 by 0x4E +# asm 1: pshufd $0x4E,xmm10=int6464#11 +# asm 2: pshufd $0x4E,xmm10=%xmm10 +pshufd $0x4E,%xmm10,%xmm10 + +# qhasm: xmm13 = shuffle dwords of xmm13 by 0x4E +# asm 1: pshufd $0x4E,xmm13=int6464#14 +# asm 2: pshufd $0x4E,xmm13=%xmm13 +pshufd $0x4E,%xmm13,%xmm13 + +# qhasm: xmm0 ^= xmm8 +# asm 1: pxor xmm11=int6464#9 +# asm 2: movdqa xmm11=%xmm8 +movdqa %xmm7,%xmm8 + +# qhasm: xmm10 = xmm1 +# asm 1: movdqa xmm10=int6464#10 +# asm 2: movdqa xmm10=%xmm9 +movdqa %xmm1,%xmm9 + +# qhasm: xmm9 = xmm5 +# asm 1: movdqa xmm9=int6464#11 +# asm 2: movdqa xmm9=%xmm10 +movdqa %xmm5,%xmm10 + +# qhasm: xmm13 = xmm2 +# asm 1: movdqa xmm13=int6464#12 +# asm 2: movdqa xmm13=%xmm11 +movdqa %xmm2,%xmm11 + +# qhasm: xmm12 = xmm6 +# asm 1: movdqa xmm12=int6464#13 +# asm 2: movdqa xmm12=%xmm12 +movdqa %xmm6,%xmm12 + +# qhasm: xmm11 ^= xmm4 +# asm 1: pxor xmm14=int6464#14 +# asm 2: movdqa xmm14=%xmm13 +movdqa %xmm8,%xmm13 + +# qhasm: xmm8 = xmm10 +# asm 1: movdqa xmm8=int6464#15 +# asm 2: movdqa xmm8=%xmm14 +movdqa %xmm9,%xmm14 + +# qhasm: xmm15 = xmm11 +# asm 1: movdqa xmm15=int6464#16 +# asm 2: movdqa xmm15=%xmm15 +movdqa %xmm8,%xmm15 + +# qhasm: xmm10 |= xmm9 +# asm 1: por xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm3,%xmm10 + +# qhasm: xmm12 ^= xmm0 +# asm 1: pxor xmm13=int6464#11 +# asm 2: movdqa xmm13=%xmm10 +movdqa %xmm7,%xmm10 + +# qhasm: xmm13 ^= xmm1 +# asm 1: pxor xmm12=int6464#12 +# asm 2: movdqa xmm12=%xmm11 +movdqa %xmm5,%xmm11 + +# qhasm: xmm9 = xmm13 +# asm 1: movdqa xmm9=int6464#13 +# asm 2: movdqa xmm9=%xmm12 +movdqa %xmm10,%xmm12 + +# qhasm: xmm12 ^= xmm6 +# asm 1: pxor xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm2,%xmm10 + +# qhasm: xmm13 = xmm4 +# asm 1: movdqa xmm13=int6464#12 +# asm 2: movdqa xmm13=%xmm11 +movdqa %xmm4,%xmm11 + +# qhasm: xmm14 = xmm1 +# asm 1: movdqa xmm14=int6464#14 +# asm 2: movdqa xmm14=%xmm13 +movdqa %xmm1,%xmm13 + +# qhasm: xmm15 = xmm7 +# asm 1: movdqa xmm15=int6464#16 +# asm 2: movdqa xmm15=%xmm15 +movdqa %xmm7,%xmm15 + +# qhasm: xmm12 &= xmm3 +# asm 1: pand xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm8,%xmm10 + +# qhasm: xmm12 ^= xmm10 +# asm 1: pxor xmm14=int6464#12 +# asm 2: movdqa xmm14=%xmm11 +movdqa %xmm14,%xmm11 + +# qhasm: xmm14 ^= xmm11 +# asm 1: pxor xmm15=int6464#14 +# asm 2: movdqa xmm15=%xmm13 +movdqa %xmm10,%xmm13 + +# qhasm: xmm15 &= xmm14 +# asm 1: pand xmm13=int6464#16 +# asm 2: movdqa xmm13=%xmm15 +movdqa %xmm12,%xmm15 + +# qhasm: xmm13 ^= xmm8 +# asm 1: pxor xmm10=int6464#9 +# asm 2: movdqa xmm10=%xmm8 +movdqa %xmm11,%xmm8 + +# qhasm: xmm10 ^= xmm13 +# asm 1: pxor xmm12=int6464#9 +# asm 2: movdqa xmm12=%xmm8 +movdqa %xmm6,%xmm8 + +# qhasm: xmm8 = xmm5 +# asm 1: movdqa xmm8=int6464#10 +# asm 2: movdqa xmm8=%xmm9 +movdqa %xmm5,%xmm9 + +# qhasm: xmm10 = xmm15 +# asm 1: movdqa xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm10 ^= xmm14 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm15,%xmm10 + +# qhasm: xmm10 ^= xmm9 +# asm 1: pxor xmm12=int6464#9 +# asm 2: movdqa xmm12=%xmm8 +movdqa %xmm7,%xmm8 + +# qhasm: xmm8 = xmm1 +# asm 1: movdqa xmm8=int6464#10 +# asm 2: movdqa xmm8=%xmm9 +movdqa %xmm1,%xmm9 + +# qhasm: xmm12 ^= xmm4 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm15,%xmm10 + +# qhasm: xmm10 ^= xmm9 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm8=int6464#9 +# asm 2: pshufd $0x93,xmm8=%xmm8 +pshufd $0x93,%xmm0,%xmm8 + +# qhasm: xmm9 = shuffle dwords of xmm1 by 0x93 +# asm 1: pshufd $0x93,xmm9=int6464#10 +# asm 2: pshufd $0x93,xmm9=%xmm9 +pshufd $0x93,%xmm1,%xmm9 + +# qhasm: xmm10 = shuffle dwords of xmm4 by 0x93 +# asm 1: pshufd $0x93,xmm10=int6464#11 +# asm 2: pshufd $0x93,xmm10=%xmm10 +pshufd $0x93,%xmm4,%xmm10 + +# qhasm: xmm11 = shuffle dwords of xmm6 by 0x93 +# asm 1: pshufd $0x93,xmm11=int6464#12 +# asm 2: pshufd $0x93,xmm11=%xmm11 +pshufd $0x93,%xmm6,%xmm11 + +# qhasm: xmm12 = shuffle dwords of xmm3 by 0x93 +# asm 1: pshufd $0x93,xmm12=int6464#13 +# asm 2: pshufd $0x93,xmm12=%xmm12 +pshufd $0x93,%xmm3,%xmm12 + +# qhasm: xmm13 = shuffle dwords of xmm7 by 0x93 +# asm 1: pshufd $0x93,xmm13=int6464#14 +# asm 2: pshufd $0x93,xmm13=%xmm13 +pshufd $0x93,%xmm7,%xmm13 + +# qhasm: xmm14 = shuffle dwords of xmm2 by 0x93 +# asm 1: pshufd $0x93,xmm14=int6464#15 +# asm 2: pshufd $0x93,xmm14=%xmm14 +pshufd $0x93,%xmm2,%xmm14 + +# qhasm: xmm15 = shuffle dwords of xmm5 by 0x93 +# asm 1: pshufd $0x93,xmm15=int6464#16 +# asm 2: pshufd $0x93,xmm15=%xmm15 +pshufd $0x93,%xmm5,%xmm15 + +# qhasm: xmm0 ^= xmm8 +# asm 1: pxor xmm0=int6464#1 +# asm 2: pshufd $0x4E,xmm0=%xmm0 +pshufd $0x4E,%xmm0,%xmm0 + +# qhasm: xmm1 = shuffle dwords of xmm1 by 0x4E +# asm 1: pshufd $0x4E,xmm1=int6464#2 +# asm 2: pshufd $0x4E,xmm1=%xmm1 +pshufd $0x4E,%xmm1,%xmm1 + +# qhasm: xmm4 = shuffle dwords of xmm4 by 0x4E +# asm 1: pshufd $0x4E,xmm4=int6464#5 +# asm 2: pshufd $0x4E,xmm4=%xmm4 +pshufd $0x4E,%xmm4,%xmm4 + +# qhasm: xmm6 = shuffle dwords of xmm6 by 0x4E +# asm 1: pshufd $0x4E,xmm6=int6464#7 +# asm 2: pshufd $0x4E,xmm6=%xmm6 +pshufd $0x4E,%xmm6,%xmm6 + +# qhasm: xmm3 = shuffle dwords of xmm3 by 0x4E +# asm 1: pshufd $0x4E,xmm3=int6464#4 +# asm 2: pshufd $0x4E,xmm3=%xmm3 +pshufd $0x4E,%xmm3,%xmm3 + +# qhasm: xmm7 = shuffle dwords of xmm7 by 0x4E +# asm 1: pshufd $0x4E,xmm7=int6464#8 +# asm 2: pshufd $0x4E,xmm7=%xmm7 +pshufd $0x4E,%xmm7,%xmm7 + +# qhasm: xmm2 = shuffle dwords of xmm2 by 0x4E +# asm 1: pshufd $0x4E,xmm2=int6464#3 +# asm 2: pshufd $0x4E,xmm2=%xmm2 +pshufd $0x4E,%xmm2,%xmm2 + +# qhasm: xmm5 = shuffle dwords of xmm5 by 0x4E +# asm 1: pshufd $0x4E,xmm5=int6464#6 +# asm 2: pshufd $0x4E,xmm5=%xmm5 +pshufd $0x4E,%xmm5,%xmm5 + +# qhasm: xmm8 ^= xmm0 +# asm 1: pxor xmm3=int6464#1 +# asm 2: movdqa xmm3=%xmm0 +movdqa %xmm15,%xmm0 + +# qhasm: xmm2 = xmm9 +# asm 1: movdqa xmm2=int6464#2 +# asm 2: movdqa xmm2=%xmm1 +movdqa %xmm9,%xmm1 + +# qhasm: xmm1 = xmm13 +# asm 1: movdqa xmm1=int6464#3 +# asm 2: movdqa xmm1=%xmm2 +movdqa %xmm13,%xmm2 + +# qhasm: xmm5 = xmm10 +# asm 1: movdqa xmm5=int6464#4 +# asm 2: movdqa xmm5=%xmm3 +movdqa %xmm10,%xmm3 + +# qhasm: xmm4 = xmm14 +# asm 1: movdqa xmm4=int6464#5 +# asm 2: movdqa xmm4=%xmm4 +movdqa %xmm14,%xmm4 + +# qhasm: xmm3 ^= xmm12 +# asm 1: pxor xmm6=int6464#6 +# asm 2: movdqa xmm6=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: xmm0 = xmm2 +# asm 1: movdqa xmm0=int6464#7 +# asm 2: movdqa xmm0=%xmm6 +movdqa %xmm1,%xmm6 + +# qhasm: xmm7 = xmm3 +# asm 1: movdqa xmm7=int6464#8 +# asm 2: movdqa xmm7=%xmm7 +movdqa %xmm0,%xmm7 + +# qhasm: xmm2 |= xmm1 +# asm 1: por xmm4=int6464#3 +# asm 2: movdqa xmm4=%xmm2 +movdqa %xmm11,%xmm2 + +# qhasm: xmm4 ^= xmm8 +# asm 1: pxor xmm5=int6464#3 +# asm 2: movdqa xmm5=%xmm2 +movdqa %xmm15,%xmm2 + +# qhasm: xmm5 ^= xmm9 +# asm 1: pxor xmm4=int6464#4 +# asm 2: movdqa xmm4=%xmm3 +movdqa %xmm13,%xmm3 + +# qhasm: xmm1 = xmm5 +# asm 1: movdqa xmm1=int6464#5 +# asm 2: movdqa xmm1=%xmm4 +movdqa %xmm2,%xmm4 + +# qhasm: xmm4 ^= xmm14 +# asm 1: pxor xmm4=int6464#3 +# asm 2: movdqa xmm4=%xmm2 +movdqa %xmm10,%xmm2 + +# qhasm: xmm5 = xmm12 +# asm 1: movdqa xmm5=int6464#4 +# asm 2: movdqa xmm5=%xmm3 +movdqa %xmm12,%xmm3 + +# qhasm: xmm6 = xmm9 +# asm 1: movdqa xmm6=int6464#6 +# asm 2: movdqa xmm6=%xmm5 +movdqa %xmm9,%xmm5 + +# qhasm: xmm7 = xmm15 +# asm 1: movdqa xmm7=int6464#8 +# asm 2: movdqa xmm7=%xmm7 +movdqa %xmm15,%xmm7 + +# qhasm: xmm4 &= xmm11 +# asm 1: pand xmm4=int6464#3 +# asm 2: movdqa xmm4=%xmm2 +movdqa %xmm0,%xmm2 + +# qhasm: xmm4 ^= xmm2 +# asm 1: pxor xmm6=int6464#4 +# asm 2: movdqa xmm6=%xmm3 +movdqa %xmm6,%xmm3 + +# qhasm: xmm6 ^= xmm3 +# asm 1: pxor xmm7=int6464#6 +# asm 2: movdqa xmm7=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: xmm7 &= xmm6 +# asm 1: pand xmm5=int6464#8 +# asm 2: movdqa xmm5=%xmm7 +movdqa %xmm4,%xmm7 + +# qhasm: xmm5 ^= xmm0 +# asm 1: pxor xmm2=int6464#1 +# asm 2: movdqa xmm2=%xmm0 +movdqa %xmm3,%xmm0 + +# qhasm: xmm2 ^= xmm5 +# asm 1: pxor xmm4=int6464#1 +# asm 2: movdqa xmm4=%xmm0 +movdqa %xmm14,%xmm0 + +# qhasm: xmm0 = xmm13 +# asm 1: movdqa xmm0=int6464#2 +# asm 2: movdqa xmm0=%xmm1 +movdqa %xmm13,%xmm1 + +# qhasm: xmm2 = xmm7 +# asm 1: movdqa xmm2=int6464#3 +# asm 2: movdqa xmm2=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: xmm2 ^= xmm6 +# asm 1: pxor xmm3=int6464#3 +# asm 2: movdqa xmm3=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: xmm3 ^= xmm6 +# asm 1: pxor xmm2=int6464#3 +# asm 2: movdqa xmm2=%xmm2 +movdqa %xmm7,%xmm2 + +# qhasm: xmm2 ^= xmm1 +# asm 1: pxor xmm4=int6464#1 +# asm 2: movdqa xmm4=%xmm0 +movdqa %xmm15,%xmm0 + +# qhasm: xmm0 = xmm9 +# asm 1: movdqa xmm0=int6464#2 +# asm 2: movdqa xmm0=%xmm1 +movdqa %xmm9,%xmm1 + +# qhasm: xmm4 ^= xmm12 +# asm 1: pxor xmm3=int6464#3 +# asm 2: movdqa xmm3=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: xmm3 ^= xmm6 +# asm 1: pxor xmm2=int6464#3 +# asm 2: movdqa xmm2=%xmm2 +movdqa %xmm7,%xmm2 + +# qhasm: xmm2 ^= xmm1 +# asm 1: pxor xmm3=int6464#3 +# asm 2: movdqa xmm3=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: xmm3 ^= xmm6 +# asm 1: pxor xmm0=int6464#1 +# asm 2: pshufd $0x93,xmm0=%xmm0 +pshufd $0x93,%xmm8,%xmm0 + +# qhasm: xmm1 = shuffle dwords of xmm9 by 0x93 +# asm 1: pshufd $0x93,xmm1=int6464#2 +# asm 2: pshufd $0x93,xmm1=%xmm1 +pshufd $0x93,%xmm9,%xmm1 + +# qhasm: xmm2 = shuffle dwords of xmm12 by 0x93 +# asm 1: pshufd $0x93,xmm2=int6464#3 +# asm 2: pshufd $0x93,xmm2=%xmm2 +pshufd $0x93,%xmm12,%xmm2 + +# qhasm: xmm3 = shuffle dwords of xmm14 by 0x93 +# asm 1: pshufd $0x93,xmm3=int6464#4 +# asm 2: pshufd $0x93,xmm3=%xmm3 +pshufd $0x93,%xmm14,%xmm3 + +# qhasm: xmm4 = shuffle dwords of xmm11 by 0x93 +# asm 1: pshufd $0x93,xmm4=int6464#5 +# asm 2: pshufd $0x93,xmm4=%xmm4 +pshufd $0x93,%xmm11,%xmm4 + +# qhasm: xmm5 = shuffle dwords of xmm15 by 0x93 +# asm 1: pshufd $0x93,xmm5=int6464#6 +# asm 2: pshufd $0x93,xmm5=%xmm5 +pshufd $0x93,%xmm15,%xmm5 + +# qhasm: xmm6 = shuffle dwords of xmm10 by 0x93 +# asm 1: pshufd $0x93,xmm6=int6464#7 +# asm 2: pshufd $0x93,xmm6=%xmm6 +pshufd $0x93,%xmm10,%xmm6 + +# qhasm: xmm7 = shuffle dwords of xmm13 by 0x93 +# asm 1: pshufd $0x93,xmm7=int6464#8 +# asm 2: pshufd $0x93,xmm7=%xmm7 +pshufd $0x93,%xmm13,%xmm7 + +# qhasm: xmm8 ^= xmm0 +# asm 1: pxor xmm8=int6464#9 +# asm 2: pshufd $0x4E,xmm8=%xmm8 +pshufd $0x4E,%xmm8,%xmm8 + +# qhasm: xmm9 = shuffle dwords of xmm9 by 0x4E +# asm 1: pshufd $0x4E,xmm9=int6464#10 +# asm 2: pshufd $0x4E,xmm9=%xmm9 +pshufd $0x4E,%xmm9,%xmm9 + +# qhasm: xmm12 = shuffle dwords of xmm12 by 0x4E +# asm 1: pshufd $0x4E,xmm12=int6464#13 +# asm 2: pshufd $0x4E,xmm12=%xmm12 +pshufd $0x4E,%xmm12,%xmm12 + +# qhasm: xmm14 = shuffle dwords of xmm14 by 0x4E +# asm 1: pshufd $0x4E,xmm14=int6464#15 +# asm 2: pshufd $0x4E,xmm14=%xmm14 +pshufd $0x4E,%xmm14,%xmm14 + +# qhasm: xmm11 = shuffle dwords of xmm11 by 0x4E +# asm 1: pshufd $0x4E,xmm11=int6464#12 +# asm 2: pshufd $0x4E,xmm11=%xmm11 +pshufd $0x4E,%xmm11,%xmm11 + +# qhasm: xmm15 = shuffle dwords of xmm15 by 0x4E +# asm 1: pshufd $0x4E,xmm15=int6464#16 +# asm 2: pshufd $0x4E,xmm15=%xmm15 +pshufd $0x4E,%xmm15,%xmm15 + +# qhasm: xmm10 = shuffle dwords of xmm10 by 0x4E +# asm 1: pshufd $0x4E,xmm10=int6464#11 +# asm 2: pshufd $0x4E,xmm10=%xmm10 +pshufd $0x4E,%xmm10,%xmm10 + +# qhasm: xmm13 = shuffle dwords of xmm13 by 0x4E +# asm 1: pshufd $0x4E,xmm13=int6464#14 +# asm 2: pshufd $0x4E,xmm13=%xmm13 +pshufd $0x4E,%xmm13,%xmm13 + +# qhasm: xmm0 ^= xmm8 +# asm 1: pxor xmm11=int6464#9 +# asm 2: movdqa xmm11=%xmm8 +movdqa %xmm7,%xmm8 + +# qhasm: xmm10 = xmm1 +# asm 1: movdqa xmm10=int6464#10 +# asm 2: movdqa xmm10=%xmm9 +movdqa %xmm1,%xmm9 + +# qhasm: xmm9 = xmm5 +# asm 1: movdqa xmm9=int6464#11 +# asm 2: movdqa xmm9=%xmm10 +movdqa %xmm5,%xmm10 + +# qhasm: xmm13 = xmm2 +# asm 1: movdqa xmm13=int6464#12 +# asm 2: movdqa xmm13=%xmm11 +movdqa %xmm2,%xmm11 + +# qhasm: xmm12 = xmm6 +# asm 1: movdqa xmm12=int6464#13 +# asm 2: movdqa xmm12=%xmm12 +movdqa %xmm6,%xmm12 + +# qhasm: xmm11 ^= xmm4 +# asm 1: pxor xmm14=int6464#14 +# asm 2: movdqa xmm14=%xmm13 +movdqa %xmm8,%xmm13 + +# qhasm: xmm8 = xmm10 +# asm 1: movdqa xmm8=int6464#15 +# asm 2: movdqa xmm8=%xmm14 +movdqa %xmm9,%xmm14 + +# qhasm: xmm15 = xmm11 +# asm 1: movdqa xmm15=int6464#16 +# asm 2: movdqa xmm15=%xmm15 +movdqa %xmm8,%xmm15 + +# qhasm: xmm10 |= xmm9 +# asm 1: por xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm3,%xmm10 + +# qhasm: xmm12 ^= xmm0 +# asm 1: pxor xmm13=int6464#11 +# asm 2: movdqa xmm13=%xmm10 +movdqa %xmm7,%xmm10 + +# qhasm: xmm13 ^= xmm1 +# asm 1: pxor xmm12=int6464#12 +# asm 2: movdqa xmm12=%xmm11 +movdqa %xmm5,%xmm11 + +# qhasm: xmm9 = xmm13 +# asm 1: movdqa xmm9=int6464#13 +# asm 2: movdqa xmm9=%xmm12 +movdqa %xmm10,%xmm12 + +# qhasm: xmm12 ^= xmm6 +# asm 1: pxor xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm2,%xmm10 + +# qhasm: xmm13 = xmm4 +# asm 1: movdqa xmm13=int6464#12 +# asm 2: movdqa xmm13=%xmm11 +movdqa %xmm4,%xmm11 + +# qhasm: xmm14 = xmm1 +# asm 1: movdqa xmm14=int6464#14 +# asm 2: movdqa xmm14=%xmm13 +movdqa %xmm1,%xmm13 + +# qhasm: xmm15 = xmm7 +# asm 1: movdqa xmm15=int6464#16 +# asm 2: movdqa xmm15=%xmm15 +movdqa %xmm7,%xmm15 + +# qhasm: xmm12 &= xmm3 +# asm 1: pand xmm12=int6464#11 +# asm 2: movdqa xmm12=%xmm10 +movdqa %xmm8,%xmm10 + +# qhasm: xmm12 ^= xmm10 +# asm 1: pxor xmm14=int6464#12 +# asm 2: movdqa xmm14=%xmm11 +movdqa %xmm14,%xmm11 + +# qhasm: xmm14 ^= xmm11 +# asm 1: pxor xmm15=int6464#14 +# asm 2: movdqa xmm15=%xmm13 +movdqa %xmm10,%xmm13 + +# qhasm: xmm15 &= xmm14 +# asm 1: pand xmm13=int6464#16 +# asm 2: movdqa xmm13=%xmm15 +movdqa %xmm12,%xmm15 + +# qhasm: xmm13 ^= xmm8 +# asm 1: pxor xmm10=int6464#9 +# asm 2: movdqa xmm10=%xmm8 +movdqa %xmm11,%xmm8 + +# qhasm: xmm10 ^= xmm13 +# asm 1: pxor xmm12=int6464#9 +# asm 2: movdqa xmm12=%xmm8 +movdqa %xmm6,%xmm8 + +# qhasm: xmm8 = xmm5 +# asm 1: movdqa xmm8=int6464#10 +# asm 2: movdqa xmm8=%xmm9 +movdqa %xmm5,%xmm9 + +# qhasm: xmm10 = xmm15 +# asm 1: movdqa xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm10 ^= xmm14 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm15,%xmm10 + +# qhasm: xmm10 ^= xmm9 +# asm 1: pxor xmm12=int6464#9 +# asm 2: movdqa xmm12=%xmm8 +movdqa %xmm7,%xmm8 + +# qhasm: xmm8 = xmm1 +# asm 1: movdqa xmm8=int6464#10 +# asm 2: movdqa xmm8=%xmm9 +movdqa %xmm1,%xmm9 + +# qhasm: xmm12 ^= xmm4 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm10=int6464#11 +# asm 2: movdqa xmm10=%xmm10 +movdqa %xmm15,%xmm10 + +# qhasm: xmm10 ^= xmm9 +# asm 1: pxor xmm11=int6464#11 +# asm 2: movdqa xmm11=%xmm10 +movdqa %xmm13,%xmm10 + +# qhasm: xmm11 ^= xmm14 +# asm 1: pxor xmm8=int6464#9 +# asm 2: pshufd $0x93,xmm8=%xmm8 +pshufd $0x93,%xmm0,%xmm8 + +# qhasm: xmm9 = shuffle dwords of xmm1 by 0x93 +# asm 1: pshufd $0x93,xmm9=int6464#10 +# asm 2: pshufd $0x93,xmm9=%xmm9 +pshufd $0x93,%xmm1,%xmm9 + +# qhasm: xmm10 = shuffle dwords of xmm4 by 0x93 +# asm 1: pshufd $0x93,xmm10=int6464#11 +# asm 2: pshufd $0x93,xmm10=%xmm10 +pshufd $0x93,%xmm4,%xmm10 + +# qhasm: xmm11 = shuffle dwords of xmm6 by 0x93 +# asm 1: pshufd $0x93,xmm11=int6464#12 +# asm 2: pshufd $0x93,xmm11=%xmm11 +pshufd $0x93,%xmm6,%xmm11 + +# qhasm: xmm12 = shuffle dwords of xmm3 by 0x93 +# asm 1: pshufd $0x93,xmm12=int6464#13 +# asm 2: pshufd $0x93,xmm12=%xmm12 +pshufd $0x93,%xmm3,%xmm12 + +# qhasm: xmm13 = shuffle dwords of xmm7 by 0x93 +# asm 1: pshufd $0x93,xmm13=int6464#14 +# asm 2: pshufd $0x93,xmm13=%xmm13 +pshufd $0x93,%xmm7,%xmm13 + +# qhasm: xmm14 = shuffle dwords of xmm2 by 0x93 +# asm 1: pshufd $0x93,xmm14=int6464#15 +# asm 2: pshufd $0x93,xmm14=%xmm14 +pshufd $0x93,%xmm2,%xmm14 + +# qhasm: xmm15 = shuffle dwords of xmm5 by 0x93 +# asm 1: pshufd $0x93,xmm15=int6464#16 +# asm 2: pshufd $0x93,xmm15=%xmm15 +pshufd $0x93,%xmm5,%xmm15 + +# qhasm: xmm0 ^= xmm8 +# asm 1: pxor xmm0=int6464#1 +# asm 2: pshufd $0x4E,xmm0=%xmm0 +pshufd $0x4E,%xmm0,%xmm0 + +# qhasm: xmm1 = shuffle dwords of xmm1 by 0x4E +# asm 1: pshufd $0x4E,xmm1=int6464#2 +# asm 2: pshufd $0x4E,xmm1=%xmm1 +pshufd $0x4E,%xmm1,%xmm1 + +# qhasm: xmm4 = shuffle dwords of xmm4 by 0x4E +# asm 1: pshufd $0x4E,xmm4=int6464#5 +# asm 2: pshufd $0x4E,xmm4=%xmm4 +pshufd $0x4E,%xmm4,%xmm4 + +# qhasm: xmm6 = shuffle dwords of xmm6 by 0x4E +# asm 1: pshufd $0x4E,xmm6=int6464#7 +# asm 2: pshufd $0x4E,xmm6=%xmm6 +pshufd $0x4E,%xmm6,%xmm6 + +# qhasm: xmm3 = shuffle dwords of xmm3 by 0x4E +# asm 1: pshufd $0x4E,xmm3=int6464#4 +# asm 2: pshufd $0x4E,xmm3=%xmm3 +pshufd $0x4E,%xmm3,%xmm3 + +# qhasm: xmm7 = shuffle dwords of xmm7 by 0x4E +# asm 1: pshufd $0x4E,xmm7=int6464#8 +# asm 2: pshufd $0x4E,xmm7=%xmm7 +pshufd $0x4E,%xmm7,%xmm7 + +# qhasm: xmm2 = shuffle dwords of xmm2 by 0x4E +# asm 1: pshufd $0x4E,xmm2=int6464#3 +# asm 2: pshufd $0x4E,xmm2=%xmm2 +pshufd $0x4E,%xmm2,%xmm2 + +# qhasm: xmm5 = shuffle dwords of xmm5 by 0x4E +# asm 1: pshufd $0x4E,xmm5=int6464#6 +# asm 2: pshufd $0x4E,xmm5=%xmm5 +pshufd $0x4E,%xmm5,%xmm5 + +# qhasm: xmm8 ^= xmm0 +# asm 1: pxor xmm3=int6464#1 +# asm 2: movdqa xmm3=%xmm0 +movdqa %xmm15,%xmm0 + +# qhasm: xmm2 = xmm9 +# asm 1: movdqa xmm2=int6464#2 +# asm 2: movdqa xmm2=%xmm1 +movdqa %xmm9,%xmm1 + +# qhasm: xmm1 = xmm13 +# asm 1: movdqa xmm1=int6464#3 +# asm 2: movdqa xmm1=%xmm2 +movdqa %xmm13,%xmm2 + +# qhasm: xmm5 = xmm10 +# asm 1: movdqa xmm5=int6464#4 +# asm 2: movdqa xmm5=%xmm3 +movdqa %xmm10,%xmm3 + +# qhasm: xmm4 = xmm14 +# asm 1: movdqa xmm4=int6464#5 +# asm 2: movdqa xmm4=%xmm4 +movdqa %xmm14,%xmm4 + +# qhasm: xmm3 ^= xmm12 +# asm 1: pxor xmm6=int6464#6 +# asm 2: movdqa xmm6=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: xmm0 = xmm2 +# asm 1: movdqa xmm0=int6464#7 +# asm 2: movdqa xmm0=%xmm6 +movdqa %xmm1,%xmm6 + +# qhasm: xmm7 = xmm3 +# asm 1: movdqa xmm7=int6464#8 +# asm 2: movdqa xmm7=%xmm7 +movdqa %xmm0,%xmm7 + +# qhasm: xmm2 |= xmm1 +# asm 1: por xmm4=int6464#3 +# asm 2: movdqa xmm4=%xmm2 +movdqa %xmm11,%xmm2 + +# qhasm: xmm4 ^= xmm8 +# asm 1: pxor xmm5=int6464#3 +# asm 2: movdqa xmm5=%xmm2 +movdqa %xmm15,%xmm2 + +# qhasm: xmm5 ^= xmm9 +# asm 1: pxor xmm4=int6464#4 +# asm 2: movdqa xmm4=%xmm3 +movdqa %xmm13,%xmm3 + +# qhasm: xmm1 = xmm5 +# asm 1: movdqa xmm1=int6464#5 +# asm 2: movdqa xmm1=%xmm4 +movdqa %xmm2,%xmm4 + +# qhasm: xmm4 ^= xmm14 +# asm 1: pxor xmm4=int6464#3 +# asm 2: movdqa xmm4=%xmm2 +movdqa %xmm10,%xmm2 + +# qhasm: xmm5 = xmm12 +# asm 1: movdqa xmm5=int6464#4 +# asm 2: movdqa xmm5=%xmm3 +movdqa %xmm12,%xmm3 + +# qhasm: xmm6 = xmm9 +# asm 1: movdqa xmm6=int6464#6 +# asm 2: movdqa xmm6=%xmm5 +movdqa %xmm9,%xmm5 + +# qhasm: xmm7 = xmm15 +# asm 1: movdqa xmm7=int6464#8 +# asm 2: movdqa xmm7=%xmm7 +movdqa %xmm15,%xmm7 + +# qhasm: xmm4 &= xmm11 +# asm 1: pand xmm4=int6464#3 +# asm 2: movdqa xmm4=%xmm2 +movdqa %xmm0,%xmm2 + +# qhasm: xmm4 ^= xmm2 +# asm 1: pxor xmm6=int6464#4 +# asm 2: movdqa xmm6=%xmm3 +movdqa %xmm6,%xmm3 + +# qhasm: xmm6 ^= xmm3 +# asm 1: pxor xmm7=int6464#6 +# asm 2: movdqa xmm7=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: xmm7 &= xmm6 +# asm 1: pand xmm5=int6464#8 +# asm 2: movdqa xmm5=%xmm7 +movdqa %xmm4,%xmm7 + +# qhasm: xmm5 ^= xmm0 +# asm 1: pxor xmm2=int6464#1 +# asm 2: movdqa xmm2=%xmm0 +movdqa %xmm3,%xmm0 + +# qhasm: xmm2 ^= xmm5 +# asm 1: pxor xmm4=int6464#1 +# asm 2: movdqa xmm4=%xmm0 +movdqa %xmm14,%xmm0 + +# qhasm: xmm0 = xmm13 +# asm 1: movdqa xmm0=int6464#2 +# asm 2: movdqa xmm0=%xmm1 +movdqa %xmm13,%xmm1 + +# qhasm: xmm2 = xmm7 +# asm 1: movdqa xmm2=int6464#3 +# asm 2: movdqa xmm2=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: xmm2 ^= xmm6 +# asm 1: pxor xmm3=int6464#3 +# asm 2: movdqa xmm3=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: xmm3 ^= xmm6 +# asm 1: pxor xmm2=int6464#3 +# asm 2: movdqa xmm2=%xmm2 +movdqa %xmm7,%xmm2 + +# qhasm: xmm2 ^= xmm1 +# asm 1: pxor xmm4=int6464#1 +# asm 2: movdqa xmm4=%xmm0 +movdqa %xmm15,%xmm0 + +# qhasm: xmm0 = xmm9 +# asm 1: movdqa xmm0=int6464#2 +# asm 2: movdqa xmm0=%xmm1 +movdqa %xmm9,%xmm1 + +# qhasm: xmm4 ^= xmm12 +# asm 1: pxor xmm3=int6464#3 +# asm 2: movdqa xmm3=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: xmm3 ^= xmm6 +# asm 1: pxor xmm2=int6464#3 +# asm 2: movdqa xmm2=%xmm2 +movdqa %xmm7,%xmm2 + +# qhasm: xmm2 ^= xmm1 +# asm 1: pxor xmm3=int6464#3 +# asm 2: movdqa xmm3=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: xmm3 ^= xmm6 +# asm 1: pxor xmm0=int6464#1 +# asm 2: movdqa xmm0=%xmm0 +movdqa %xmm10,%xmm0 + +# qhasm: uint6464 xmm0 >>= 1 +# asm 1: psrlq $1,xmm0=int6464#1 +# asm 2: movdqa xmm0=%xmm0 +movdqa %xmm11,%xmm0 + +# qhasm: uint6464 xmm0 >>= 1 +# asm 1: psrlq $1,xmm0=int6464#1 +# asm 2: movdqa xmm0=%xmm0 +movdqa %xmm12,%xmm0 + +# qhasm: uint6464 xmm0 >>= 1 +# asm 1: psrlq $1,xmm0=int6464#1 +# asm 2: movdqa xmm0=%xmm0 +movdqa %xmm8,%xmm0 + +# qhasm: uint6464 xmm0 >>= 1 +# asm 1: psrlq $1,xmm0=int6464#1 +# asm 2: movdqa xmm0=%xmm0 +movdqa %xmm15,%xmm0 + +# qhasm: uint6464 xmm0 >>= 2 +# asm 1: psrlq $2,xmm0=int6464#1 +# asm 2: movdqa xmm0=%xmm0 +movdqa %xmm11,%xmm0 + +# qhasm: uint6464 xmm0 >>= 2 +# asm 1: psrlq $2,xmm0=int6464#1 +# asm 2: movdqa xmm0=%xmm0 +movdqa %xmm9,%xmm0 + +# qhasm: uint6464 xmm0 >>= 2 +# asm 1: psrlq $2,xmm0=int6464#1 +# asm 2: movdqa xmm0=%xmm0 +movdqa %xmm8,%xmm0 + +# qhasm: uint6464 xmm0 >>= 2 +# asm 1: psrlq $2,xmm0=int6464#1 +# asm 2: movdqa xmm0=%xmm0 +movdqa %xmm14,%xmm0 + +# qhasm: uint6464 xmm0 >>= 4 +# asm 1: psrlq $4,xmm0=int6464#1 +# asm 2: movdqa xmm0=%xmm0 +movdqa %xmm12,%xmm0 + +# qhasm: uint6464 xmm0 >>= 4 +# asm 1: psrlq $4,xmm0=int6464#1 +# asm 2: movdqa xmm0=%xmm0 +movdqa %xmm9,%xmm0 + +# qhasm: uint6464 xmm0 >>= 4 +# asm 1: psrlq $4,xmm0=int6464#1 +# asm 2: movdqa xmm0=%xmm0 +movdqa %xmm8,%xmm0 + +# qhasm: uint6464 xmm0 >>= 4 +# asm 1: psrlq $4,tmp=int64#6d +# asm 2: movl 12(tmp=%r9d +movl 12(%rcx),%r9d + +# qhasm: (uint32) bswap tmp +# asm 1: bswap lensav=int64#5 +# asm 2: mov lensav=%r8 +mov %rdx,%r8 + +# qhasm: (uint32) len >>= 4 +# asm 1: shr $4,tmp=int64#6d +# asm 2: movl 12(tmp=%r9d +movl 12(%rcx),%r9d + +# qhasm: (uint32) bswap tmp +# asm 1: bswap blp=int64#3 +# asm 2: leaq blp=%rdx +leaq 32(%rsp),%rdx + +# qhasm: *(int128 *)(blp + 0) = xmm8 +# asm 1: movdqa b=int64#4 +# asm 2: movzbq 0(b=%rcx +movzbq 0(%rdx),%rcx + +# qhasm: (uint8) b ^= *(uint8 *)(inp + 0) +# asm 1: xorb 0(tmp=int64#3d +# asm 2: movl 12(tmp=%edx +movl 12(%rcx),%edx + +# qhasm: (uint32) bswap tmp +# asm 1: bswap >= 4; + + tmp = load32_bigendian(np + 12); + tmp += len; + store32_bigendian(np + 12, tmp); + + blp = bl; + *(int128 *)(blp + 0) = xmm8; + *(int128 *)(blp + 16) = xmm9; + *(int128 *)(blp + 32) = xmm12; + *(int128 *)(blp + 48) = xmm14; + *(int128 *)(blp + 64) = xmm11; + *(int128 *)(blp + 80) = xmm15; + *(int128 *)(blp + 96) = xmm10; + *(int128 *)(blp + 112) = xmm13; + + bytes: + + if(lensav == 0) goto end; + + b = blp[0]; + *(unsigned char *)(outp + 0) = b; + + blp += 1; + outp +=1; + lensav -= 1; + + goto bytes; + + full: + + tmp = load32_bigendian(np + 12); + tmp += 8; + store32_bigendian(np + 12, tmp); + + *(int128 *) (outp + 0) = xmm8; + *(int128 *) (outp + 16) = xmm9; + *(int128 *) (outp + 32) = xmm12; + *(int128 *) (outp + 48) = xmm14; + *(int128 *) (outp + 64) = xmm11; + *(int128 *) (outp + 80) = xmm15; + *(int128 *) (outp + 96) = xmm10; + *(int128 *) (outp + 112) = xmm13; + + end: + return 0; + +} diff --git a/nacl/crypto_stream/aes128ctr/portable/api.h b/nacl/crypto_stream/aes128ctr/portable/api.h new file mode 100644 index 00000000..62fc8d88 --- /dev/null +++ b/nacl/crypto_stream/aes128ctr/portable/api.h @@ -0,0 +1,3 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NONCEBYTES 16 +#define CRYPTO_BEFORENMBYTES 1408 diff --git a/nacl/crypto_stream/aes128ctr/portable/beforenm.c b/nacl/crypto_stream/aes128ctr/portable/beforenm.c new file mode 100644 index 00000000..8fa2673d --- /dev/null +++ b/nacl/crypto_stream/aes128ctr/portable/beforenm.c @@ -0,0 +1,59 @@ +/* Author: Peter Schwabe, ported from an assembly implementation by Emilia Käsper + * Date: 2009-03-19 + * Public domain */ + +#include "consts.h" +#include "int128.h" +#include "common.h" +#include "crypto_stream.h" + +int crypto_stream_beforenm(unsigned char *c, const unsigned char *k) +{ + + /* + int64 x0; + int64 x1; + int64 x2; + int64 x3; + int64 e; + int64 q0; + int64 q1; + int64 q2; + int64 q3; + */ + + int128 xmm0; + int128 xmm1; + int128 xmm2; + int128 xmm3; + int128 xmm4; + int128 xmm5; + int128 xmm6; + int128 xmm7; + int128 xmm8; + int128 xmm9; + int128 xmm10; + int128 xmm11; + int128 xmm12; + int128 xmm13; + int128 xmm14; + int128 xmm15; + int128 t; + + bitslicekey0(k, c) + + keyexpbs1(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c) + keyexpbs(xmm0, xmm1, xmm4, xmm6, xmm3, xmm7, xmm2, xmm5, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xor_rcon(&xmm1);, 2,c) + keyexpbs(xmm0, xmm1, xmm3, xmm2, xmm6, xmm5, xmm4, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xor_rcon(&xmm6);, 3,c) + keyexpbs(xmm0, xmm1, xmm6, xmm4, xmm2, xmm7, xmm3, xmm5, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xor_rcon(&xmm3);, 4,c) + + keyexpbs(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xor_rcon(&xmm3);, 5,c) + keyexpbs(xmm0, xmm1, xmm4, xmm6, xmm3, xmm7, xmm2, xmm5, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xor_rcon(&xmm5);, 6,c) + keyexpbs(xmm0, xmm1, xmm3, xmm2, xmm6, xmm5, xmm4, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xor_rcon(&xmm3);, 7,c) + keyexpbs(xmm0, xmm1, xmm6, xmm4, xmm2, xmm7, xmm3, xmm5, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xor_rcon(&xmm7);, 8,c) + + keyexpbs(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xor_rcon(&xmm0); xor_rcon(&xmm1); xor_rcon(&xmm6); xor_rcon(&xmm3);, 9,c) + keyexpbs10(xmm0, xmm1, xmm4, xmm6, xmm3, xmm7, xmm2, xmm5, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c) + + return 0; +} diff --git a/nacl/crypto_stream/aes128ctr/portable/common.c b/nacl/crypto_stream/aes128ctr/portable/common.c new file mode 100644 index 00000000..14a28cc6 --- /dev/null +++ b/nacl/crypto_stream/aes128ctr/portable/common.c @@ -0,0 +1,64 @@ +#include "common.h" + +uint32 load32_bigendian(const unsigned char *x) +{ + return + (uint32) (x[3]) \ + | (((uint32) (x[2])) << 8) \ + | (((uint32) (x[1])) << 16) \ + | (((uint32) (x[0])) << 24) + ; +} + +void store32_bigendian(unsigned char *x,uint32 u) +{ + x[3] = u; u >>= 8; + x[2] = u; u >>= 8; + x[1] = u; u >>= 8; + x[0] = u; +} + +uint32 load32_littleendian(const unsigned char *x) +{ + return + (uint32) (x[0]) \ + | (((uint32) (x[1])) << 8) \ + | (((uint32) (x[2])) << 16) \ + | (((uint32) (x[3])) << 24) + ; +} + +void store32_littleendian(unsigned char *x,uint32 u) +{ + x[0] = u; u >>= 8; + x[1] = u; u >>= 8; + x[2] = u; u >>= 8; + x[3] = u; +} + + +uint64 load64_littleendian(const unsigned char *x) +{ + return + (uint64) (x[0]) \ + | (((uint64) (x[1])) << 8) \ + | (((uint64) (x[2])) << 16) \ + | (((uint64) (x[3])) << 24) + | (((uint64) (x[4])) << 32) + | (((uint64) (x[5])) << 40) + | (((uint64) (x[6])) << 48) + | (((uint64) (x[7])) << 56) + ; +} + +void store64_littleendian(unsigned char *x,uint64 u) +{ + x[0] = u; u >>= 8; + x[1] = u; u >>= 8; + x[2] = u; u >>= 8; + x[3] = u; u >>= 8; + x[4] = u; u >>= 8; + x[5] = u; u >>= 8; + x[6] = u; u >>= 8; + x[7] = u; +} diff --git a/nacl/crypto_stream/aes128ctr/portable/common.h b/nacl/crypto_stream/aes128ctr/portable/common.h new file mode 100644 index 00000000..0f723332 --- /dev/null +++ b/nacl/crypto_stream/aes128ctr/portable/common.h @@ -0,0 +1,788 @@ +/* Author: Peter Schwabe, ported from an assembly implementation by Emilia Käsper + Date: 2009-03-19 + Public domain */ +#ifndef COMMON_H +#define COMMON_H + +#include "types.h" + +#define load32_bigendian crypto_stream_aes128ctr_portable_load32_bigendian +uint32 load32_bigendian(const unsigned char *x); + +#define store32_bigendian crypto_stream_aes128ctr_portable_store32_bigendian +void store32_bigendian(unsigned char *x,uint32 u); + +#define load32_littleendian crypto_stream_aes128ctr_portable_load32_littleendian +uint32 load32_littleendian(const unsigned char *x); + +#define store32_littleendian crypto_stream_aes128ctr_portable_store32_littleendian +void store32_littleendian(unsigned char *x,uint32 u); + +#define load64_littleendian crypto_stream_aes128ctr_portable_load64_littleendian +uint64 load64_littleendian(const unsigned char *x); + +#define store64_littleendian crypto_stream_aes128ctr_portable_store64_littleendian +void store64_littleendian(unsigned char *x,uint64 u); + +/* Macros required only for key expansion */ + +#define keyexpbs1(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7, bskey) \ + rotbyte(&b0);\ + rotbyte(&b1);\ + rotbyte(&b2);\ + rotbyte(&b3);\ + rotbyte(&b4);\ + rotbyte(&b5);\ + rotbyte(&b6);\ + rotbyte(&b7);\ + ;\ + sbox(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7);\ + ;\ + xor_rcon(&b0);\ + shufb(&b0, EXPB0);\ + shufb(&b1, EXPB0);\ + shufb(&b4, EXPB0);\ + shufb(&b6, EXPB0);\ + shufb(&b3, EXPB0);\ + shufb(&b7, EXPB0);\ + shufb(&b2, EXPB0);\ + shufb(&b5, EXPB0);\ + shufb(&b0, EXPB0);\ + ;\ + t0 = *(int128 *)(bskey + 0);\ + t1 = *(int128 *)(bskey + 16);\ + t2 = *(int128 *)(bskey + 32);\ + t3 = *(int128 *)(bskey + 48);\ + t4 = *(int128 *)(bskey + 64);\ + t5 = *(int128 *)(bskey + 80);\ + t6 = *(int128 *)(bskey + 96);\ + t7 = *(int128 *)(bskey + 112);\ + ;\ + xor2(&b0, &t0);\ + xor2(&b1, &t1);\ + xor2(&b4, &t2);\ + xor2(&b6, &t3);\ + xor2(&b3, &t4);\ + xor2(&b7, &t5);\ + xor2(&b2, &t6);\ + xor2(&b5, &t7);\ + ;\ + rshift32_littleendian(&t0, 8);\ + rshift32_littleendian(&t1, 8);\ + rshift32_littleendian(&t2, 8);\ + rshift32_littleendian(&t3, 8);\ + rshift32_littleendian(&t4, 8);\ + rshift32_littleendian(&t5, 8);\ + rshift32_littleendian(&t6, 8);\ + rshift32_littleendian(&t7, 8);\ + ;\ + xor2(&b0, &t0);\ + xor2(&b1, &t1);\ + xor2(&b4, &t2);\ + xor2(&b6, &t3);\ + xor2(&b3, &t4);\ + xor2(&b7, &t5);\ + xor2(&b2, &t6);\ + xor2(&b5, &t7);\ + ;\ + rshift32_littleendian(&t0, 8);\ + rshift32_littleendian(&t1, 8);\ + rshift32_littleendian(&t2, 8);\ + rshift32_littleendian(&t3, 8);\ + rshift32_littleendian(&t4, 8);\ + rshift32_littleendian(&t5, 8);\ + rshift32_littleendian(&t6, 8);\ + rshift32_littleendian(&t7, 8);\ + ;\ + xor2(&b0, &t0);\ + xor2(&b1, &t1);\ + xor2(&b4, &t2);\ + xor2(&b6, &t3);\ + xor2(&b3, &t4);\ + xor2(&b7, &t5);\ + xor2(&b2, &t6);\ + xor2(&b5, &t7);\ + ;\ + rshift32_littleendian(&t0, 8);\ + rshift32_littleendian(&t1, 8);\ + rshift32_littleendian(&t2, 8);\ + rshift32_littleendian(&t3, 8);\ + rshift32_littleendian(&t4, 8);\ + rshift32_littleendian(&t5, 8);\ + rshift32_littleendian(&t6, 8);\ + rshift32_littleendian(&t7, 8);\ + ;\ + xor2(&b0, &t0);\ + xor2(&b1, &t1);\ + xor2(&b4, &t2);\ + xor2(&b6, &t3);\ + xor2(&b3, &t4);\ + xor2(&b7, &t5);\ + xor2(&b2, &t6);\ + xor2(&b5, &t7);\ + ;\ + *(int128 *)(bskey + 128) = b0;\ + *(int128 *)(bskey + 144) = b1;\ + *(int128 *)(bskey + 160) = b4;\ + *(int128 *)(bskey + 176) = b6;\ + *(int128 *)(bskey + 192) = b3;\ + *(int128 *)(bskey + 208) = b7;\ + *(int128 *)(bskey + 224) = b2;\ + *(int128 *)(bskey + 240) = b5;\ + +#define keyexpbs10(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7, bskey) ;\ + toggle(&b0);\ + toggle(&b1);\ + toggle(&b5);\ + toggle(&b6);\ + rotbyte(&b0);\ + rotbyte(&b1);\ + rotbyte(&b2);\ + rotbyte(&b3);\ + rotbyte(&b4);\ + rotbyte(&b5);\ + rotbyte(&b6);\ + rotbyte(&b7);\ + ;\ + sbox(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7);\ + ;\ + xor_rcon(&b1);\ + xor_rcon(&b4);\ + xor_rcon(&b3);\ + xor_rcon(&b7);\ + shufb(&b0, EXPB0);\ + shufb(&b1, EXPB0);\ + shufb(&b4, EXPB0);\ + shufb(&b6, EXPB0);\ + shufb(&b3, EXPB0);\ + shufb(&b7, EXPB0);\ + shufb(&b2, EXPB0);\ + shufb(&b5, EXPB0);\ + ;\ + t0 = *(int128 *)(bskey + 9 * 128 + 0);\ + t1 = *(int128 *)(bskey + 9 * 128 + 16);\ + t2 = *(int128 *)(bskey + 9 * 128 + 32);\ + t3 = *(int128 *)(bskey + 9 * 128 + 48);\ + t4 = *(int128 *)(bskey + 9 * 128 + 64);\ + t5 = *(int128 *)(bskey + 9 * 128 + 80);\ + t6 = *(int128 *)(bskey + 9 * 128 + 96);\ + t7 = *(int128 *)(bskey + 9 * 128 + 112);\ + ;\ + toggle(&t0);\ + toggle(&t1);\ + toggle(&t5);\ + toggle(&t6);\ + ;\ + xor2(&b0, &t0);\ + xor2(&b1, &t1);\ + xor2(&b4, &t2);\ + xor2(&b6, &t3);\ + xor2(&b3, &t4);\ + xor2(&b7, &t5);\ + xor2(&b2, &t6);\ + xor2(&b5, &t7);\ + ;\ + rshift32_littleendian(&t0, 8);\ + rshift32_littleendian(&t1, 8);\ + rshift32_littleendian(&t2, 8);\ + rshift32_littleendian(&t3, 8);\ + rshift32_littleendian(&t4, 8);\ + rshift32_littleendian(&t5, 8);\ + rshift32_littleendian(&t6, 8);\ + rshift32_littleendian(&t7, 8);\ + ;\ + xor2(&b0, &t0);\ + xor2(&b1, &t1);\ + xor2(&b4, &t2);\ + xor2(&b6, &t3);\ + xor2(&b3, &t4);\ + xor2(&b7, &t5);\ + xor2(&b2, &t6);\ + xor2(&b5, &t7);\ + ;\ + rshift32_littleendian(&t0, 8);\ + rshift32_littleendian(&t1, 8);\ + rshift32_littleendian(&t2, 8);\ + rshift32_littleendian(&t3, 8);\ + rshift32_littleendian(&t4, 8);\ + rshift32_littleendian(&t5, 8);\ + rshift32_littleendian(&t6, 8);\ + rshift32_littleendian(&t7, 8);\ + ;\ + xor2(&b0, &t0);\ + xor2(&b1, &t1);\ + xor2(&b4, &t2);\ + xor2(&b6, &t3);\ + xor2(&b3, &t4);\ + xor2(&b7, &t5);\ + xor2(&b2, &t6);\ + xor2(&b5, &t7);\ + ;\ + rshift32_littleendian(&t0, 8);\ + rshift32_littleendian(&t1, 8);\ + rshift32_littleendian(&t2, 8);\ + rshift32_littleendian(&t3, 8);\ + rshift32_littleendian(&t4, 8);\ + rshift32_littleendian(&t5, 8);\ + rshift32_littleendian(&t6, 8);\ + rshift32_littleendian(&t7, 8);\ + ;\ + xor2(&b0, &t0);\ + xor2(&b1, &t1);\ + xor2(&b4, &t2);\ + xor2(&b6, &t3);\ + xor2(&b3, &t4);\ + xor2(&b7, &t5);\ + xor2(&b2, &t6);\ + xor2(&b5, &t7);\ + ;\ + shufb(&b0, M0);\ + shufb(&b1, M0);\ + shufb(&b2, M0);\ + shufb(&b3, M0);\ + shufb(&b4, M0);\ + shufb(&b5, M0);\ + shufb(&b6, M0);\ + shufb(&b7, M0);\ + ;\ + *(int128 *)(bskey + 1280) = b0;\ + *(int128 *)(bskey + 1296) = b1;\ + *(int128 *)(bskey + 1312) = b4;\ + *(int128 *)(bskey + 1328) = b6;\ + *(int128 *)(bskey + 1344) = b3;\ + *(int128 *)(bskey + 1360) = b7;\ + *(int128 *)(bskey + 1376) = b2;\ + *(int128 *)(bskey + 1392) = b5;\ + + +#define keyexpbs(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7, rcon, i, bskey) \ + toggle(&b0);\ + toggle(&b1);\ + toggle(&b5);\ + toggle(&b6);\ + rotbyte(&b0);\ + rotbyte(&b1);\ + rotbyte(&b2);\ + rotbyte(&b3);\ + rotbyte(&b4);\ + rotbyte(&b5);\ + rotbyte(&b6);\ + rotbyte(&b7);\ + ;\ + sbox(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7);\ + ;\ + rcon;\ + shufb(&b0, EXPB0);\ + shufb(&b1, EXPB0);\ + shufb(&b4, EXPB0);\ + shufb(&b6, EXPB0);\ + shufb(&b3, EXPB0);\ + shufb(&b7, EXPB0);\ + shufb(&b2, EXPB0);\ + shufb(&b5, EXPB0);\ + ;\ + t0 = *(int128 *)(bskey + (i-1) * 128 + 0);\ + t1 = *(int128 *)(bskey + (i-1) * 128 + 16);\ + t2 = *(int128 *)(bskey + (i-1) * 128 + 32);\ + t3 = *(int128 *)(bskey + (i-1) * 128 + 48);\ + t4 = *(int128 *)(bskey + (i-1) * 128 + 64);\ + t5 = *(int128 *)(bskey + (i-1) * 128 + 80);\ + t6 = *(int128 *)(bskey + (i-1) * 128 + 96);\ + t7 = *(int128 *)(bskey + (i-1) * 128 + 112);\ + ;\ + toggle(&t0);\ + toggle(&t1);\ + toggle(&t5);\ + toggle(&t6);\ + ;\ + xor2(&b0, &t0);\ + xor2(&b1, &t1);\ + xor2(&b4, &t2);\ + xor2(&b6, &t3);\ + xor2(&b3, &t4);\ + xor2(&b7, &t5);\ + xor2(&b2, &t6);\ + xor2(&b5, &t7);\ + ;\ + rshift32_littleendian(&t0, 8);\ + rshift32_littleendian(&t1, 8);\ + rshift32_littleendian(&t2, 8);\ + rshift32_littleendian(&t3, 8);\ + rshift32_littleendian(&t4, 8);\ + rshift32_littleendian(&t5, 8);\ + rshift32_littleendian(&t6, 8);\ + rshift32_littleendian(&t7, 8);\ + ;\ + xor2(&b0, &t0);\ + xor2(&b1, &t1);\ + xor2(&b4, &t2);\ + xor2(&b6, &t3);\ + xor2(&b3, &t4);\ + xor2(&b7, &t5);\ + xor2(&b2, &t6);\ + xor2(&b5, &t7);\ + ;\ + rshift32_littleendian(&t0, 8);\ + rshift32_littleendian(&t1, 8);\ + rshift32_littleendian(&t2, 8);\ + rshift32_littleendian(&t3, 8);\ + rshift32_littleendian(&t4, 8);\ + rshift32_littleendian(&t5, 8);\ + rshift32_littleendian(&t6, 8);\ + rshift32_littleendian(&t7, 8);\ + ;\ + xor2(&b0, &t0);\ + xor2(&b1, &t1);\ + xor2(&b4, &t2);\ + xor2(&b6, &t3);\ + xor2(&b3, &t4);\ + xor2(&b7, &t5);\ + xor2(&b2, &t6);\ + xor2(&b5, &t7);\ + ;\ + rshift32_littleendian(&t0, 8);\ + rshift32_littleendian(&t1, 8);\ + rshift32_littleendian(&t2, 8);\ + rshift32_littleendian(&t3, 8);\ + rshift32_littleendian(&t4, 8);\ + rshift32_littleendian(&t5, 8);\ + rshift32_littleendian(&t6, 8);\ + rshift32_littleendian(&t7, 8);\ + ;\ + xor2(&b0, &t0);\ + xor2(&b1, &t1);\ + xor2(&b4, &t2);\ + xor2(&b6, &t3);\ + xor2(&b3, &t4);\ + xor2(&b7, &t5);\ + xor2(&b2, &t6);\ + xor2(&b5, &t7);\ + ;\ + *(int128 *)(bskey + i*128 + 0) = b0;\ + *(int128 *)(bskey + i*128 + 16) = b1;\ + *(int128 *)(bskey + i*128 + 32) = b4;\ + *(int128 *)(bskey + i*128 + 48) = b6;\ + *(int128 *)(bskey + i*128 + 64) = b3;\ + *(int128 *)(bskey + i*128 + 80) = b7;\ + *(int128 *)(bskey + i*128 + 96) = b2;\ + *(int128 *)(bskey + i*128 + 112) = b5;\ + +/* Macros used in multiple contexts */ + +#define bitslicekey0(key, bskey) \ + xmm0 = *(int128 *) (key + 0);\ + shufb(&xmm0, M0);\ + copy2(&xmm1, &xmm0);\ + copy2(&xmm2, &xmm0);\ + copy2(&xmm3, &xmm0);\ + copy2(&xmm4, &xmm0);\ + copy2(&xmm5, &xmm0);\ + copy2(&xmm6, &xmm0);\ + copy2(&xmm7, &xmm0);\ + ;\ + bitslice(xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0, t);\ + ;\ + *(int128 *) (bskey + 0) = xmm0;\ + *(int128 *) (bskey + 16) = xmm1;\ + *(int128 *) (bskey + 32) = xmm2;\ + *(int128 *) (bskey + 48) = xmm3;\ + *(int128 *) (bskey + 64) = xmm4;\ + *(int128 *) (bskey + 80) = xmm5;\ + *(int128 *) (bskey + 96) = xmm6;\ + *(int128 *) (bskey + 112) = xmm7;\ + + +#define bitslicekey10(key, bskey) \ + xmm0 = *(int128 *) (key + 0);\ + copy2(xmm1, xmm0);\ + copy2(xmm2, xmm0);\ + copy2(xmm3, xmm0);\ + copy2(xmm4, xmm0);\ + copy2(xmm5, xmm0);\ + copy2(xmm6, xmm0);\ + copy2(xmm7, xmm0);\ + ;\ + bitslice(xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0, t);\ + ;\ + toggle(&xmm6);\ + toggle(&xmm5);\ + toggle(&xmm1);\ + toggle(&xmm0);\ + ;\ + *(int128 *) (bskey + 0 + 1280) = xmm0;\ + *(int128 *) (bskey + 16 + 1280) = xmm1;\ + *(int128 *) (bskey + 32 + 1280) = xmm2;\ + *(int128 *) (bskey + 48 + 1280) = xmm3;\ + *(int128 *) (bskey + 64 + 1280) = xmm4;\ + *(int128 *) (bskey + 80 + 1280) = xmm5;\ + *(int128 *) (bskey + 96 + 1280) = xmm6;\ + *(int128 *) (bskey + 112 + 1280) = xmm7;\ + + +#define bitslicekey(i,key,bskey) \ + xmm0 = *(int128 *) (key + 0);\ + shufb(&xmm0, M0);\ + copy2(&xmm1, &xmm0);\ + copy2(&xmm2, &xmm0);\ + copy2(&xmm3, &xmm0);\ + copy2(&xmm4, &xmm0);\ + copy2(&xmm5, &xmm0);\ + copy2(&xmm6, &xmm0);\ + copy2(&xmm7, &xmm0);\ + ;\ + bitslice(xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0, t);\ + ;\ + toggle(&xmm6);\ + toggle(&xmm5);\ + toggle(&xmm1);\ + toggle(&xmm0);\ + ;\ + *(int128 *) (bskey + 0 + 128*i) = xmm0;\ + *(int128 *) (bskey + 16 + 128*i) = xmm1;\ + *(int128 *) (bskey + 32 + 128*i) = xmm2;\ + *(int128 *) (bskey + 48 + 128*i) = xmm3;\ + *(int128 *) (bskey + 64 + 128*i) = xmm4;\ + *(int128 *) (bskey + 80 + 128*i) = xmm5;\ + *(int128 *) (bskey + 96 + 128*i) = xmm6;\ + *(int128 *) (bskey + 112 + 128*i) = xmm7;\ + + +#define bitslice(x0, x1, x2, x3, x4, x5, x6, x7, t) \ + swapmove(x0, x1, 1, BS0, t);\ + swapmove(x2, x3, 1, BS0, t);\ + swapmove(x4, x5, 1, BS0, t);\ + swapmove(x6, x7, 1, BS0, t);\ + ;\ + swapmove(x0, x2, 2, BS1, t);\ + swapmove(x1, x3, 2, BS1, t);\ + swapmove(x4, x6, 2, BS1, t);\ + swapmove(x5, x7, 2, BS1, t);\ + ;\ + swapmove(x0, x4, 4, BS2, t);\ + swapmove(x1, x5, 4, BS2, t);\ + swapmove(x2, x6, 4, BS2, t);\ + swapmove(x3, x7, 4, BS2, t);\ + + +#define swapmove(a, b, n, m, t) \ + copy2(&t, &b);\ + rshift64_littleendian(&t, n);\ + xor2(&t, &a);\ + and2(&t, &m);\ + xor2(&a, &t);\ + lshift64_littleendian(&t, n);\ + xor2(&b, &t); + +#define rotbyte(x) \ + shufb(x, ROTB) /* TODO: Make faster */ + + +/* Macros used for encryption (and decryption) */ + +#define shiftrows(x0, x1, x2, x3, x4, x5, x6, x7, i, M, bskey) \ + xor2(&x0, (int128 *)(bskey + 128*(i-1) + 0));\ + shufb(&x0, M);\ + xor2(&x1, (int128 *)(bskey + 128*(i-1) + 16));\ + shufb(&x1, M);\ + xor2(&x2, (int128 *)(bskey + 128*(i-1) + 32));\ + shufb(&x2, M);\ + xor2(&x3, (int128 *)(bskey + 128*(i-1) + 48));\ + shufb(&x3, M);\ + xor2(&x4, (int128 *)(bskey + 128*(i-1) + 64));\ + shufb(&x4, M);\ + xor2(&x5, (int128 *)(bskey + 128*(i-1) + 80));\ + shufb(&x5, M);\ + xor2(&x6, (int128 *)(bskey + 128*(i-1) + 96));\ + shufb(&x6, M);\ + xor2(&x7, (int128 *)(bskey + 128*(i-1) + 112));\ + shufb(&x7, M);\ + + +#define mixcolumns(x0, x1, x2, x3, x4, x5, x6, x7, t0, t1, t2, t3, t4, t5, t6, t7) \ + shufd(&t0, &x0, 0x93);\ + shufd(&t1, &x1, 0x93);\ + shufd(&t2, &x2, 0x93);\ + shufd(&t3, &x3, 0x93);\ + shufd(&t4, &x4, 0x93);\ + shufd(&t5, &x5, 0x93);\ + shufd(&t6, &x6, 0x93);\ + shufd(&t7, &x7, 0x93);\ + ;\ + xor2(&x0, &t0);\ + xor2(&x1, &t1);\ + xor2(&x2, &t2);\ + xor2(&x3, &t3);\ + xor2(&x4, &t4);\ + xor2(&x5, &t5);\ + xor2(&x6, &t6);\ + xor2(&x7, &t7);\ + ;\ + xor2(&t0, &x7);\ + xor2(&t1, &x0);\ + xor2(&t2, &x1);\ + xor2(&t1, &x7);\ + xor2(&t3, &x2);\ + xor2(&t4, &x3);\ + xor2(&t5, &x4);\ + xor2(&t3, &x7);\ + xor2(&t6, &x5);\ + xor2(&t7, &x6);\ + xor2(&t4, &x7);\ + ;\ + shufd(&x0, &x0, 0x4e);\ + shufd(&x1, &x1, 0x4e);\ + shufd(&x2, &x2, 0x4e);\ + shufd(&x3, &x3, 0x4e);\ + shufd(&x4, &x4, 0x4e);\ + shufd(&x5, &x5, 0x4e);\ + shufd(&x6, &x6, 0x4e);\ + shufd(&x7, &x7, 0x4e);\ + ;\ + xor2(&t0, &x0);\ + xor2(&t1, &x1);\ + xor2(&t2, &x2);\ + xor2(&t3, &x3);\ + xor2(&t4, &x4);\ + xor2(&t5, &x5);\ + xor2(&t6, &x6);\ + xor2(&t7, &x7);\ + + +#define aesround(i, b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7, bskey) \ + shiftrows(b0, b1, b2, b3, b4, b5, b6, b7, i, SR, bskey);\ + sbox(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7);\ + mixcolumns(b0, b1, b4, b6, b3, b7, b2, b5, t0, t1, t2, t3, t4, t5, t6, t7);\ + + +#define lastround(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7, bskey) \ + shiftrows(b0, b1, b2, b3, b4, b5, b6, b7, 10, SRM0, bskey);\ + sbox(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7);\ + xor2(&b0,(int128 *)(bskey + 128*10));\ + xor2(&b1,(int128 *)(bskey + 128*10+16));\ + xor2(&b4,(int128 *)(bskey + 128*10+32));\ + xor2(&b6,(int128 *)(bskey + 128*10+48));\ + xor2(&b3,(int128 *)(bskey + 128*10+64));\ + xor2(&b7,(int128 *)(bskey + 128*10+80));\ + xor2(&b2,(int128 *)(bskey + 128*10+96));\ + xor2(&b5,(int128 *)(bskey + 128*10+112));\ + + +#define sbox(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, s0, s1, s2, s3) \ + InBasisChange(b0, b1, b2, b3, b4, b5, b6, b7); \ + Inv_GF256(b6, b5, b0, b3, b7, b1, b4, b2, t0, t1, t2, t3, s0, s1, s2, s3); \ + OutBasisChange(b7, b1, b4, b2, b6, b5, b0, b3); \ + + +#define InBasisChange(b0, b1, b2, b3, b4, b5, b6, b7) \ + xor2(&b5, &b6);\ + xor2(&b2, &b1);\ + xor2(&b5, &b0);\ + xor2(&b6, &b2);\ + xor2(&b3, &b0);\ + ;\ + xor2(&b6, &b3);\ + xor2(&b3, &b7);\ + xor2(&b3, &b4);\ + xor2(&b7, &b5);\ + xor2(&b3, &b1);\ + ;\ + xor2(&b4, &b5);\ + xor2(&b2, &b7);\ + xor2(&b1, &b5);\ + +#define OutBasisChange(b0, b1, b2, b3, b4, b5, b6, b7) \ + xor2(&b0, &b6);\ + xor2(&b1, &b4);\ + xor2(&b2, &b0);\ + xor2(&b4, &b6);\ + xor2(&b6, &b1);\ + ;\ + xor2(&b1, &b5);\ + xor2(&b5, &b3);\ + xor2(&b2, &b5);\ + xor2(&b3, &b7);\ + xor2(&b7, &b5);\ + ;\ + xor2(&b4, &b7);\ + +#define Mul_GF4(x0, x1, y0, y1, t0) \ + copy2(&t0, &y0);\ + xor2(&t0, &y1);\ + and2(&t0, &x0);\ + xor2(&x0, &x1);\ + and2(&x0, &y1);\ + and2(&x1, &y0);\ + xor2(&x0, &x1);\ + xor2(&x1, &t0);\ + +#define Mul_GF4_N(x0, x1, y0, y1, t0) \ + copy2(&t0, &y0);\ + xor2(&t0, &y1);\ + and2(&t0, &x0);\ + xor2(&x0, &x1);\ + and2(&x0, &y1);\ + and2(&x1, &y0);\ + xor2(&x1, &x0);\ + xor2(&x0, &t0);\ + +#define Mul_GF4_2(x0, x1, x2, x3, y0, y1, t0, t1) \ + copy2(&t0, = y0);\ + xor2(&t0, &y1);\ + copy2(&t1, &t0);\ + and2(&t0, &x0);\ + and2(&t1, &x2);\ + xor2(&x0, &x1);\ + xor2(&x2, &x3);\ + and2(&x0, &y1);\ + and2(&x2, &y1);\ + and2(&x1, &y0);\ + and2(&x3, &y0);\ + xor2(&x0, &x1);\ + xor2(&x2, &x3);\ + xor2(&x1, &t0);\ + xor2(&x3, &t1);\ + +#define Mul_GF16(x0, x1, x2, x3, y0, y1, y2, y3, t0, t1, t2, t3) \ + copy2(&t0, &x0);\ + copy2(&t1, &x1);\ + Mul_GF4(x0, x1, y0, y1, t2);\ + xor2(&t0, &x2);\ + xor2(&t1, &x3);\ + xor2(&y0, &y2);\ + xor2(&y1, &y3);\ + Mul_GF4_N(t0, t1, y0, y1, t2);\ + Mul_GF4(x2, x3, y2, y3, t3);\ + ;\ + xor2(&x0, &t0);\ + xor2(&x2, &t0);\ + xor2(&x1, &t1);\ + xor2(&x3, &t1);\ + +#define Mul_GF16_2(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, t0, t1, t2, t3) \ + copy2(&t0, &x0);\ + copy2(&t1, &x1);\ + Mul_GF4(x0, x1, y0, y1, t2);\ + xor2(&t0, &x2);\ + xor2(&t1, &x3);\ + xor2(&y0, &y2);\ + xor2(&y1, &y3);\ + Mul_GF4_N(t0, t1, y0, y1, t3);\ + Mul_GF4(x2, x3, y2, y3, t2);\ + ;\ + xor2(&x0, &t0);\ + xor2(&x2, &t0);\ + xor2(&x1, &t1);\ + xor2(&x3, &t1);\ + ;\ + copy2(&t0, &x4);\ + copy2(&t1, &x5);\ + xor2(&t0, &x6);\ + xor2(&t1, &x7);\ + Mul_GF4_N(t0, t1, y0, y1, t3);\ + Mul_GF4(x6, x7, y2, y3, t2);\ + xor2(&y0, &y2);\ + xor2(&y1, &y3);\ + Mul_GF4(x4, x5, y0, y1, t3);\ + ;\ + xor2(&x4, &t0);\ + xor2(&x6, &t0);\ + xor2(&x5, &t1);\ + xor2(&x7, &t1);\ + +#define Inv_GF16(x0, x1, x2, x3, t0, t1, t2, t3) \ + copy2(&t0, &x1);\ + copy2(&t1, &x0);\ + and2(&t0, &x3);\ + or2(&t1, &x2);\ + copy2(&t2, &x1);\ + copy2(&t3, &x0);\ + or2(&t2, &x2);\ + or2(&t3, &x3);\ + xor2(&t2, &t3);\ + ;\ + xor2(&t0, &t2);\ + xor2(&t1, &t2);\ + ;\ + Mul_GF4_2(x0, x1, x2, x3, t1, t0, t2, t3);\ + + +#define Inv_GF256(x0, x1, x2, x3, x4, x5, x6, x7, t0, t1, t2, t3, s0, s1, s2, s3) \ + copy2(&t3, &x4);\ + copy2(&t2, &x5);\ + copy2(&t1, &x1);\ + copy2(&s1, &x7);\ + copy2(&s0, &x0);\ + ;\ + xor2(&t3, &x6);\ + xor2(&t2, &x7);\ + xor2(&t1, &x3);\ + xor2(&s1, &x6);\ + xor2(&s0, &x2);\ + ;\ + copy2(&s2, &t3);\ + copy2(&t0, &t2);\ + copy2(&s3, &t3);\ + ;\ + or2(&t2, &t1);\ + or2(&t3, &s0);\ + xor2(&s3, &t0);\ + and2(&s2, &s0);\ + and2(&t0, &t1);\ + xor2(&s0, &t1);\ + and2(&s3, &s0);\ + copy2(&s0, &x3);\ + xor2(&s0, &x2);\ + and2(&s1, &s0);\ + xor2(&t3, &s1);\ + xor2(&t2, &s1);\ + copy2(&s1, &x4);\ + xor2(&s1, &x5);\ + copy2(&s0, &x1);\ + copy2(&t1, &s1);\ + xor2(&s0, &x0);\ + or2(&t1, &s0);\ + and2(&s1, &s0);\ + xor2(&t0, &s1);\ + xor2(&t3, &s3);\ + xor2(&t2, &s2);\ + xor2(&t1, &s3);\ + xor2(&t0, &s2);\ + xor2(&t1, &s2);\ + copy2(&s0, &x7);\ + copy2(&s1, &x6);\ + copy2(&s2, &x5);\ + copy2(&s3, &x4);\ + and2(&s0, &x3);\ + and2(&s1, &x2);\ + and2(&s2, &x1);\ + or2(&s3, &x0);\ + xor2(&t3, &s0);\ + xor2(&t2, &s1);\ + xor2(&t1, &s2);\ + xor2(&t0, &s3);\ + ;\ + copy2(&s0, &t3);\ + xor2(&s0, &t2);\ + and2(&t3, &t1);\ + copy2(&s2, &t0);\ + xor2(&s2, &t3);\ + copy2(&s3, &s0);\ + and2(&s3, &s2);\ + xor2(&s3, &t2);\ + copy2(&s1, &t1);\ + xor2(&s1, &t0);\ + xor2(&t3, &t2);\ + and2(&s1, &t3);\ + xor2(&s1, &t0);\ + xor2(&t1, &s1);\ + copy2(&t2, &s2);\ + xor2(&t2, &s1);\ + and2(&t2, &t0);\ + xor2(&t1, &t2);\ + xor2(&s2, &t2);\ + and2(&s2, &s3);\ + xor2(&s2, &s0);\ + ;\ + Mul_GF16_2(x0, x1, x2, x3, x4, x5, x6, x7, s3, s2, s1, t1, s0, t0, t2, t3);\ + +#endif diff --git a/nacl/crypto_stream/aes128ctr/portable/consts.c b/nacl/crypto_stream/aes128ctr/portable/consts.c new file mode 100644 index 00000000..ed2835db --- /dev/null +++ b/nacl/crypto_stream/aes128ctr/portable/consts.c @@ -0,0 +1,14 @@ +#include "consts.h" + +const unsigned char ROTB[16] = {0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x08}; +const unsigned char M0[16] = {0x0f, 0x0b, 0x07, 0x03, 0x0e, 0x0a, 0x06, 0x02, 0x0d, 0x09, 0x05, 0x01, 0x0c, 0x08, 0x04, 0x00}; +const unsigned char EXPB0[16] = {0x03, 0x03, 0x03, 0x03, 0x07, 0x07, 0x07, 0x07, 0x0b, 0x0b, 0x0b, 0x0b, 0x0f, 0x0f, 0x0f, 0x0f}; + +const unsigned char SWAP32[16] = {0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x05, 0x04, 0x0b, 0x0a, 0x09, 0x08, 0x0f, 0x0e, 0x0d, 0x0c}; +const unsigned char M0SWAP[16] = {0x0c, 0x08, 0x04, 0x00, 0x0d, 0x09, 0x05, 0x01, 0x0e, 0x0a, 0x06, 0x02, 0x0f, 0x0b, 0x07, 0x03}; +const unsigned char SR[16] = {0x01, 0x02, 0x03, 0x00, 0x06, 0x07, 0x04, 0x05, 0x0b, 0x08, 0x09, 0x0a, 0x0c, 0x0d, 0x0e, 0x0f}; +const unsigned char SRM0[16] = {0x0f, 0x0a, 0x05, 0x00, 0x0e, 0x09, 0x04, 0x03, 0x0d, 0x08, 0x07, 0x02, 0x0c, 0x0b, 0x06, 0x01}; + +const int128 BS0 = {0x5555555555555555ULL, 0x5555555555555555ULL}; +const int128 BS1 = {0x3333333333333333ULL, 0x3333333333333333ULL}; +const int128 BS2 = {0x0f0f0f0f0f0f0f0fULL, 0x0f0f0f0f0f0f0f0fULL}; diff --git a/nacl/crypto_stream/aes128ctr/portable/consts.h b/nacl/crypto_stream/aes128ctr/portable/consts.h new file mode 100644 index 00000000..4c50360b --- /dev/null +++ b/nacl/crypto_stream/aes128ctr/portable/consts.h @@ -0,0 +1,28 @@ +#ifndef CONSTS_H +#define CONSTS_H + +#include "int128.h" + +#define ROTB crypto_stream_aes128ctr_portable_ROTB +#define M0 crypto_stream_aes128ctr_portable_M0 +#define EXPB0 crypto_stream_aes128ctr_portable_EXPB0 +#define SWAP32 crypto_stream_aes128ctr_portable_SWAP32 +#define M0SWAP crypto_stream_aes128ctr_portable_M0SWAP +#define SR crypto_stream_aes128ctr_portable_SR +#define SRM0 crypto_stream_aes128ctr_portable_SRM0 +#define BS0 crypto_stream_aes128ctr_portable_BS0 +#define BS1 crypto_stream_aes128ctr_portable_BS1 +#define BS2 crypto_stream_aes128ctr_portable_BS2 + +extern const unsigned char ROTB[16]; +extern const unsigned char M0[16]; +extern const unsigned char EXPB0[16]; +extern const unsigned char SWAP32[16]; +extern const unsigned char M0SWAP[16]; +extern const unsigned char SR[16]; +extern const unsigned char SRM0[16]; +extern const int128 BS0; +extern const int128 BS1; +extern const int128 BS2; + +#endif diff --git a/nacl/crypto_stream/aes128ctr/portable/int128.c b/nacl/crypto_stream/aes128ctr/portable/int128.c new file mode 100644 index 00000000..25894d42 --- /dev/null +++ b/nacl/crypto_stream/aes128ctr/portable/int128.c @@ -0,0 +1,128 @@ +#include "int128.h" +#include "common.h" + +void xor2(int128 *r, const int128 *x) +{ + r->a ^= x->a; + r->b ^= x->b; +} + +void and2(int128 *r, const int128 *x) +{ + r->a &= x->a; + r->b &= x->b; +} + +void or2(int128 *r, const int128 *x) +{ + r->a |= x->a; + r->b |= x->b; +} + +void copy2(int128 *r, const int128 *x) +{ + r->a = x->a; + r->b = x->b; +} + +void shufb(int128 *r, const unsigned char *l) +{ + int128 t; + copy2(&t,r); + unsigned char *cr = (unsigned char *)r; + unsigned char *ct = (unsigned char *)&t; + cr[0] = ct[l[0]]; + cr[1] = ct[l[1]]; + cr[2] = ct[l[2]]; + cr[3] = ct[l[3]]; + cr[4] = ct[l[4]]; + cr[5] = ct[l[5]]; + cr[6] = ct[l[6]]; + cr[7] = ct[l[7]]; + cr[8] = ct[l[8]]; + cr[9] = ct[l[9]]; + cr[10] = ct[l[10]]; + cr[11] = ct[l[11]]; + cr[12] = ct[l[12]]; + cr[13] = ct[l[13]]; + cr[14] = ct[l[14]]; + cr[15] = ct[l[15]]; +} + +void shufd(int128 *r, const int128 *x, const unsigned int c) +{ + int128 t; + uint32 *tp = (uint32 *)&t; + uint32 *xp = (uint32 *)x; + tp[0] = xp[c&3]; + tp[1] = xp[(c>>2)&3]; + tp[2] = xp[(c>>4)&3]; + tp[3] = xp[(c>>6)&3]; + copy2(r,&t); +} + +void rshift32_littleendian(int128 *r, const unsigned int n) +{ + unsigned char *rp = (unsigned char *)r; + uint32 t; + t = load32_littleendian(rp); + t >>= n; + store32_littleendian(rp, t); + t = load32_littleendian(rp+4); + t >>= n; + store32_littleendian(rp+4, t); + t = load32_littleendian(rp+8); + t >>= n; + store32_littleendian(rp+8, t); + t = load32_littleendian(rp+12); + t >>= n; + store32_littleendian(rp+12, t); +} + +void rshift64_littleendian(int128 *r, const unsigned int n) +{ + unsigned char *rp = (unsigned char *)r; + uint64 t; + t = load64_littleendian(rp); + t >>= n; + store64_littleendian(rp, t); + t = load64_littleendian(rp+8); + t >>= n; + store64_littleendian(rp+8, t); +} + +void lshift64_littleendian(int128 *r, const unsigned int n) +{ + unsigned char *rp = (unsigned char *)r; + uint64 t; + t = load64_littleendian(rp); + t <<= n; + store64_littleendian(rp, t); + t = load64_littleendian(rp+8); + t <<= n; + store64_littleendian(rp+8, t); +} + +void toggle(int128 *r) +{ + r->a ^= 0xffffffffffffffffULL; + r->b ^= 0xffffffffffffffffULL; +} + +void xor_rcon(int128 *r) +{ + unsigned char *rp = (unsigned char *)r; + uint32 t; + t = load32_littleendian(rp+12); + t ^= 0xffffffff; + store32_littleendian(rp+12, t); +} + +void add_uint32_big(int128 *r, uint32 x) +{ + unsigned char *rp = (unsigned char *)r; + uint32 t; + t = load32_littleendian(rp+12); + t += x; + store32_littleendian(rp+12, t); +} diff --git a/nacl/crypto_stream/aes128ctr/portable/int128.h b/nacl/crypto_stream/aes128ctr/portable/int128.h new file mode 100644 index 00000000..7099e5b1 --- /dev/null +++ b/nacl/crypto_stream/aes128ctr/portable/int128.h @@ -0,0 +1,47 @@ +#ifndef INT128_H +#define INT128_H + +#include "common.h" + +typedef struct{ + unsigned long long a; + unsigned long long b; +} int128; + +#define xor2 crypto_stream_aes128ctr_portable_xor2 +void xor2(int128 *r, const int128 *x); + +#define and2 crypto_stream_aes128ctr_portable_and2 +void and2(int128 *r, const int128 *x); + +#define or2 crypto_stream_aes128ctr_portable_or2 +void or2(int128 *r, const int128 *x); + +#define copy2 crypto_stream_aes128ctr_portable_copy2 +void copy2(int128 *r, const int128 *x); + +#define shufb crypto_stream_aes128ctr_portable_shufb +void shufb(int128 *r, const unsigned char *l); + +#define shufd crypto_stream_aes128ctr_portable_shufd +void shufd(int128 *r, const int128 *x, const unsigned int c); + +#define rshift32_littleendian crypto_stream_aes128ctr_portable_rshift32_littleendian +void rshift32_littleendian(int128 *r, const unsigned int n); + +#define rshift64_littleendian crypto_stream_aes128ctr_portable_rshift64_littleendian +void rshift64_littleendian(int128 *r, const unsigned int n); + +#define lshift64_littleendian crypto_stream_aes128ctr_portable_lshift64_littleendian +void lshift64_littleendian(int128 *r, const unsigned int n); + +#define toggle crypto_stream_aes128ctr_portable_toggle +void toggle(int128 *r); + +#define xor_rcon crypto_stream_aes128ctr_portable_xor_rcon +void xor_rcon(int128 *r); + +#define add_uint32_big crypto_stream_aes128ctr_portable_add_uint32_big +void add_uint32_big(int128 *r, uint32 x); + +#endif diff --git a/nacl/crypto_stream/aes128ctr/portable/stream.c b/nacl/crypto_stream/aes128ctr/portable/stream.c new file mode 100644 index 00000000..963fa8c1 --- /dev/null +++ b/nacl/crypto_stream/aes128ctr/portable/stream.c @@ -0,0 +1,28 @@ +#include "crypto_stream.h" + +int crypto_stream( + unsigned char *out, + unsigned long long outlen, + const unsigned char *n, + const unsigned char *k + ) +{ + unsigned char d[crypto_stream_BEFORENMBYTES]; + crypto_stream_beforenm(d, k); + crypto_stream_afternm(out, outlen, n, d); + return 0; +} + +int crypto_stream_xor( + unsigned char *out, + const unsigned char *in, + unsigned long long inlen, + const unsigned char *n, + const unsigned char *k + ) +{ + unsigned char d[crypto_stream_BEFORENMBYTES]; + crypto_stream_beforenm(d, k); + crypto_stream_xor_afternm(out, in, inlen, n, d); + return 0; +} diff --git a/nacl/crypto_stream/aes128ctr/portable/types.h b/nacl/crypto_stream/aes128ctr/portable/types.h new file mode 100644 index 00000000..6aa502fc --- /dev/null +++ b/nacl/crypto_stream/aes128ctr/portable/types.h @@ -0,0 +1,10 @@ +#ifndef TYPES_H +#define TYPES_H + +#include "crypto_uint32.h" +typedef crypto_uint32 uint32; + +#include "crypto_uint64.h" +typedef crypto_uint64 uint64; + +#endif diff --git a/nacl/crypto_stream/aes128ctr/portable/xor_afternm.c b/nacl/crypto_stream/aes128ctr/portable/xor_afternm.c new file mode 100644 index 00000000..f2ff8ff6 --- /dev/null +++ b/nacl/crypto_stream/aes128ctr/portable/xor_afternm.c @@ -0,0 +1,180 @@ +/* Author: Peter Schwabe, ported from an assembly implementation by Emilia Käsper + * Date: 2009-03-19 + * Public domain */ + +#include +#include "int128.h" +#include "common.h" +#include "consts.h" +#include "crypto_stream.h" + +int crypto_stream_xor_afternm(unsigned char *outp, const unsigned char *inp, unsigned long long len, const unsigned char *noncep, const unsigned char *c) +{ + + int128 xmm0; + int128 xmm1; + int128 xmm2; + int128 xmm3; + int128 xmm4; + int128 xmm5; + int128 xmm6; + int128 xmm7; + + int128 xmm8; + int128 xmm9; + int128 xmm10; + int128 xmm11; + int128 xmm12; + int128 xmm13; + int128 xmm14; + int128 xmm15; + + int128 nonce_stack; + unsigned long long lensav; + unsigned char bl[128]; + unsigned char *blp; + unsigned char b; + + uint32 tmp; + + /* Copy nonce on the stack */ + copy2(&nonce_stack, (int128 *) (noncep + 0)); + unsigned char *np = (unsigned char *)&nonce_stack; + + enc_block: + + xmm0 = *(int128 *) (np + 0); + copy2(&xmm1, &xmm0); + shufb(&xmm1, SWAP32); + copy2(&xmm2, &xmm1); + copy2(&xmm3, &xmm1); + copy2(&xmm4, &xmm1); + copy2(&xmm5, &xmm1); + copy2(&xmm6, &xmm1); + copy2(&xmm7, &xmm1); + + add_uint32_big(&xmm1, 1); + add_uint32_big(&xmm2, 2); + add_uint32_big(&xmm3, 3); + add_uint32_big(&xmm4, 4); + add_uint32_big(&xmm5, 5); + add_uint32_big(&xmm6, 6); + add_uint32_big(&xmm7, 7); + + shufb(&xmm0, M0); + shufb(&xmm1, M0SWAP); + shufb(&xmm2, M0SWAP); + shufb(&xmm3, M0SWAP); + shufb(&xmm4, M0SWAP); + shufb(&xmm5, M0SWAP); + shufb(&xmm6, M0SWAP); + shufb(&xmm7, M0SWAP); + + bitslice(xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0, xmm8) + + aesround( 1, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c) + aesround( 2, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c) + aesround( 3, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c) + aesround( 4, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c) + aesround( 5, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c) + aesround( 6, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c) + aesround( 7, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c) + aesround( 8, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c) + aesround( 9, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c) + lastround(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c) + + bitslice(xmm13, xmm10, xmm15, xmm11, xmm14, xmm12, xmm9, xmm8, xmm0) + + if(len < 128) goto partial; + if(len == 128) goto full; + + tmp = load32_bigendian(np + 12); + tmp += 8; + store32_bigendian(np + 12, tmp); + + xor2(&xmm8, (int128 *)(inp + 0)); + xor2(&xmm9, (int128 *)(inp + 16)); + xor2(&xmm12, (int128 *)(inp + 32)); + xor2(&xmm14, (int128 *)(inp + 48)); + xor2(&xmm11, (int128 *)(inp + 64)); + xor2(&xmm15, (int128 *)(inp + 80)); + xor2(&xmm10, (int128 *)(inp + 96)); + xor2(&xmm13, (int128 *)(inp + 112)); + + *(int128 *) (outp + 0) = xmm8; + *(int128 *) (outp + 16) = xmm9; + *(int128 *) (outp + 32) = xmm12; + *(int128 *) (outp + 48) = xmm14; + *(int128 *) (outp + 64) = xmm11; + *(int128 *) (outp + 80) = xmm15; + *(int128 *) (outp + 96) = xmm10; + *(int128 *) (outp + 112) = xmm13; + + len -= 128; + inp += 128; + outp += 128; + + goto enc_block; + + partial: + + lensav = len; + len >>= 4; + + tmp = load32_bigendian(np + 12); + tmp += len; + store32_bigendian(np + 12, tmp); + + blp = bl; + *(int128 *)(blp + 0) = xmm8; + *(int128 *)(blp + 16) = xmm9; + *(int128 *)(blp + 32) = xmm12; + *(int128 *)(blp + 48) = xmm14; + *(int128 *)(blp + 64) = xmm11; + *(int128 *)(blp + 80) = xmm15; + *(int128 *)(blp + 96) = xmm10; + *(int128 *)(blp + 112) = xmm13; + + bytes: + + if(lensav == 0) goto end; + + b = blp[0]; + b ^= *(unsigned char *)(inp + 0); + *(unsigned char *)(outp + 0) = b; + + blp += 1; + inp +=1; + outp +=1; + lensav -= 1; + + goto bytes; + + full: + + tmp = load32_bigendian(np + 12); + tmp += 8; + store32_bigendian(np + 12, tmp); + + xor2(&xmm8, (int128 *)(inp + 0)); + xor2(&xmm9, (int128 *)(inp + 16)); + xor2(&xmm12, (int128 *)(inp + 32)); + xor2(&xmm14, (int128 *)(inp + 48)); + xor2(&xmm11, (int128 *)(inp + 64)); + xor2(&xmm15, (int128 *)(inp + 80)); + xor2(&xmm10, (int128 *)(inp + 96)); + xor2(&xmm13, (int128 *)(inp + 112)); + + *(int128 *) (outp + 0) = xmm8; + *(int128 *) (outp + 16) = xmm9; + *(int128 *) (outp + 32) = xmm12; + *(int128 *) (outp + 48) = xmm14; + *(int128 *) (outp + 64) = xmm11; + *(int128 *) (outp + 80) = xmm15; + *(int128 *) (outp + 96) = xmm10; + *(int128 *) (outp + 112) = xmm13; + + end: + return 0; + +} diff --git a/nacl/crypto_stream/aes128ctr/used b/nacl/crypto_stream/aes128ctr/used new file mode 100644 index 00000000..e69de29b diff --git a/nacl/crypto_stream/measure.c b/nacl/crypto_stream/measure.c new file mode 100644 index 00000000..ff3ab610 --- /dev/null +++ b/nacl/crypto_stream/measure.c @@ -0,0 +1,73 @@ +#include +#include "randombytes.h" +#include "cpucycles.h" +#include "crypto_stream.h" + +extern void printentry(long long,const char *,long long *,long long); +extern unsigned char *alignedcalloc(unsigned long long); +extern const char *primitiveimplementation; +extern const char *implementationversion; +extern const char *sizenames[]; +extern const long long sizes[]; +extern void allocate(void); +extern void measure(void); + +const char *primitiveimplementation = crypto_stream_IMPLEMENTATION; +const char *implementationversion = crypto_stream_VERSION; +const char *sizenames[] = { "keybytes", "noncebytes", 0 }; +const long long sizes[] = { crypto_stream_KEYBYTES, crypto_stream_NONCEBYTES }; + +#define MAXTEST_BYTES 4096 +#ifdef SUPERCOP +#define MGAP 8192 +#else +#define MGAP 8 +#endif + +static unsigned char *k; +static unsigned char *n; +static unsigned char *m; +static unsigned char *c; + +void preallocate(void) +{ +} + +void allocate(void) +{ + k = alignedcalloc(crypto_stream_KEYBYTES); + n = alignedcalloc(crypto_stream_NONCEBYTES); + m = alignedcalloc(MAXTEST_BYTES); + c = alignedcalloc(MAXTEST_BYTES); +} + +#define TIMINGS 15 +static long long cycles[TIMINGS + 1]; + +void measure(void) +{ + int i; + int loop; + int mlen; + + for (loop = 0;loop < LOOPS;++loop) { + for (mlen = 0;mlen <= MAXTEST_BYTES;mlen += 1 + mlen / MGAP) { + randombytes(k,crypto_stream_KEYBYTES); + randombytes(n,crypto_stream_NONCEBYTES); + randombytes(m,mlen); + randombytes(c,mlen); + for (i = 0;i <= TIMINGS;++i) { + cycles[i] = cpucycles(); + crypto_stream(c,mlen,n,k); + } + for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i]; + printentry(mlen,"cycles",cycles,TIMINGS); + for (i = 0;i <= TIMINGS;++i) { + cycles[i] = cpucycles(); + crypto_stream_xor(c,m,mlen,n,k); + } + for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i]; + printentry(mlen,"xor_cycles",cycles,TIMINGS); + } + } +} diff --git a/nacl/crypto_stream/salsa20/amd64_xmm6/api.h b/nacl/crypto_stream/salsa20/amd64_xmm6/api.h new file mode 100644 index 00000000..c2b18461 --- /dev/null +++ b/nacl/crypto_stream/salsa20/amd64_xmm6/api.h @@ -0,0 +1,2 @@ +#define CRYPTO_KEYBYTES 32 +#define CRYPTO_NONCEBYTES 8 diff --git a/nacl/crypto_stream/salsa20/amd64_xmm6/implementors b/nacl/crypto_stream/salsa20/amd64_xmm6/implementors new file mode 100644 index 00000000..f6fb3c73 --- /dev/null +++ b/nacl/crypto_stream/salsa20/amd64_xmm6/implementors @@ -0,0 +1 @@ +Daniel J. Bernstein diff --git a/nacl/crypto_stream/salsa20/amd64_xmm6/stream.s b/nacl/crypto_stream/salsa20/amd64_xmm6/stream.s new file mode 100644 index 00000000..82a897f7 --- /dev/null +++ b/nacl/crypto_stream/salsa20/amd64_xmm6/stream.s @@ -0,0 +1,4823 @@ + +# qhasm: int64 r11_caller + +# qhasm: int64 r12_caller + +# qhasm: int64 r13_caller + +# qhasm: int64 r14_caller + +# qhasm: int64 r15_caller + +# qhasm: int64 rbx_caller + +# qhasm: int64 rbp_caller + +# qhasm: caller r11_caller + +# qhasm: caller r12_caller + +# qhasm: caller r13_caller + +# qhasm: caller r14_caller + +# qhasm: caller r15_caller + +# qhasm: caller rbx_caller + +# qhasm: caller rbp_caller + +# qhasm: stack64 r11_stack + +# qhasm: stack64 r12_stack + +# qhasm: stack64 r13_stack + +# qhasm: stack64 r14_stack + +# qhasm: stack64 r15_stack + +# qhasm: stack64 rbx_stack + +# qhasm: stack64 rbp_stack + +# qhasm: int64 a + +# qhasm: int64 arg1 + +# qhasm: int64 arg2 + +# qhasm: int64 arg3 + +# qhasm: int64 arg4 + +# qhasm: int64 arg5 + +# qhasm: input arg1 + +# qhasm: input arg2 + +# qhasm: input arg3 + +# qhasm: input arg4 + +# qhasm: input arg5 + +# qhasm: int64 k + +# qhasm: int64 kbits + +# qhasm: int64 iv + +# qhasm: int64 i + +# qhasm: stack128 x0 + +# qhasm: stack128 x1 + +# qhasm: stack128 x2 + +# qhasm: stack128 x3 + +# qhasm: int64 m + +# qhasm: int64 out + +# qhasm: int64 bytes + +# qhasm: stack32 eax_stack + +# qhasm: stack32 ebx_stack + +# qhasm: stack32 esi_stack + +# qhasm: stack32 edi_stack + +# qhasm: stack32 ebp_stack + +# qhasm: int6464 diag0 + +# qhasm: int6464 diag1 + +# qhasm: int6464 diag2 + +# qhasm: int6464 diag3 + +# qhasm: int6464 a0 + +# qhasm: int6464 a1 + +# qhasm: int6464 a2 + +# qhasm: int6464 a3 + +# qhasm: int6464 a4 + +# qhasm: int6464 a5 + +# qhasm: int6464 a6 + +# qhasm: int6464 a7 + +# qhasm: int6464 b0 + +# qhasm: int6464 b1 + +# qhasm: int6464 b2 + +# qhasm: int6464 b3 + +# qhasm: int6464 b4 + +# qhasm: int6464 b5 + +# qhasm: int6464 b6 + +# qhasm: int6464 b7 + +# qhasm: int6464 z0 + +# qhasm: int6464 z1 + +# qhasm: int6464 z2 + +# qhasm: int6464 z3 + +# qhasm: int6464 z4 + +# qhasm: int6464 z5 + +# qhasm: int6464 z6 + +# qhasm: int6464 z7 + +# qhasm: int6464 z8 + +# qhasm: int6464 z9 + +# qhasm: int6464 z10 + +# qhasm: int6464 z11 + +# qhasm: int6464 z12 + +# qhasm: int6464 z13 + +# qhasm: int6464 z14 + +# qhasm: int6464 z15 + +# qhasm: stack128 z0_stack + +# qhasm: stack128 z1_stack + +# qhasm: stack128 z2_stack + +# qhasm: stack128 z3_stack + +# qhasm: stack128 z4_stack + +# qhasm: stack128 z5_stack + +# qhasm: stack128 z6_stack + +# qhasm: stack128 z7_stack + +# qhasm: stack128 z8_stack + +# qhasm: stack128 z9_stack + +# qhasm: stack128 z10_stack + +# qhasm: stack128 z11_stack + +# qhasm: stack128 z12_stack + +# qhasm: stack128 z13_stack + +# qhasm: stack128 z14_stack + +# qhasm: stack128 z15_stack + +# qhasm: int6464 y0 + +# qhasm: int6464 y1 + +# qhasm: int6464 y2 + +# qhasm: int6464 y3 + +# qhasm: int6464 y4 + +# qhasm: int6464 y5 + +# qhasm: int6464 y6 + +# qhasm: int6464 y7 + +# qhasm: int6464 y8 + +# qhasm: int6464 y9 + +# qhasm: int6464 y10 + +# qhasm: int6464 y11 + +# qhasm: int6464 y12 + +# qhasm: int6464 y13 + +# qhasm: int6464 y14 + +# qhasm: int6464 y15 + +# qhasm: int6464 r0 + +# qhasm: int6464 r1 + +# qhasm: int6464 r2 + +# qhasm: int6464 r3 + +# qhasm: int6464 r4 + +# qhasm: int6464 r5 + +# qhasm: int6464 r6 + +# qhasm: int6464 r7 + +# qhasm: int6464 r8 + +# qhasm: int6464 r9 + +# qhasm: int6464 r10 + +# qhasm: int6464 r11 + +# qhasm: int6464 r12 + +# qhasm: int6464 r13 + +# qhasm: int6464 r14 + +# qhasm: int6464 r15 + +# qhasm: stack128 orig0 + +# qhasm: stack128 orig1 + +# qhasm: stack128 orig2 + +# qhasm: stack128 orig3 + +# qhasm: stack128 orig4 + +# qhasm: stack128 orig5 + +# qhasm: stack128 orig6 + +# qhasm: stack128 orig7 + +# qhasm: stack128 orig8 + +# qhasm: stack128 orig9 + +# qhasm: stack128 orig10 + +# qhasm: stack128 orig11 + +# qhasm: stack128 orig12 + +# qhasm: stack128 orig13 + +# qhasm: stack128 orig14 + +# qhasm: stack128 orig15 + +# qhasm: int64 in0 + +# qhasm: int64 in1 + +# qhasm: int64 in2 + +# qhasm: int64 in3 + +# qhasm: int64 in4 + +# qhasm: int64 in5 + +# qhasm: int64 in6 + +# qhasm: int64 in7 + +# qhasm: int64 in8 + +# qhasm: int64 in9 + +# qhasm: int64 in10 + +# qhasm: int64 in11 + +# qhasm: int64 in12 + +# qhasm: int64 in13 + +# qhasm: int64 in14 + +# qhasm: int64 in15 + +# qhasm: stack512 tmp + +# qhasm: int64 ctarget + +# qhasm: stack64 bytes_backup + +# qhasm: enter crypto_stream_salsa20_amd64_xmm6 +.text +.p2align 5 +.globl _crypto_stream_salsa20_amd64_xmm6 +.globl crypto_stream_salsa20_amd64_xmm6 +_crypto_stream_salsa20_amd64_xmm6: +crypto_stream_salsa20_amd64_xmm6: +mov %rsp,%r11 +and $31,%r11 +add $480,%r11 +sub %r11,%rsp + +# qhasm: r11_stack = r11_caller +# asm 1: movq r11_stack=stack64#1 +# asm 2: movq r11_stack=352(%rsp) +movq %r11,352(%rsp) + +# qhasm: r12_stack = r12_caller +# asm 1: movq r12_stack=stack64#2 +# asm 2: movq r12_stack=360(%rsp) +movq %r12,360(%rsp) + +# qhasm: r13_stack = r13_caller +# asm 1: movq r13_stack=stack64#3 +# asm 2: movq r13_stack=368(%rsp) +movq %r13,368(%rsp) + +# qhasm: r14_stack = r14_caller +# asm 1: movq r14_stack=stack64#4 +# asm 2: movq r14_stack=376(%rsp) +movq %r14,376(%rsp) + +# qhasm: r15_stack = r15_caller +# asm 1: movq r15_stack=stack64#5 +# asm 2: movq r15_stack=384(%rsp) +movq %r15,384(%rsp) + +# qhasm: rbx_stack = rbx_caller +# asm 1: movq rbx_stack=stack64#6 +# asm 2: movq rbx_stack=392(%rsp) +movq %rbx,392(%rsp) + +# qhasm: rbp_stack = rbp_caller +# asm 1: movq rbp_stack=stack64#7 +# asm 2: movq rbp_stack=400(%rsp) +movq %rbp,400(%rsp) + +# qhasm: bytes = arg2 +# asm 1: mov bytes=int64#6 +# asm 2: mov bytes=%r9 +mov %rsi,%r9 + +# qhasm: out = arg1 +# asm 1: mov out=int64#1 +# asm 2: mov out=%rdi +mov %rdi,%rdi + +# qhasm: m = out +# asm 1: mov m=int64#2 +# asm 2: mov m=%rsi +mov %rdi,%rsi + +# qhasm: iv = arg3 +# asm 1: mov iv=int64#3 +# asm 2: mov iv=%rdx +mov %rdx,%rdx + +# qhasm: k = arg4 +# asm 1: mov k=int64#8 +# asm 2: mov k=%r10 +mov %rcx,%r10 + +# qhasm: unsigned>? bytes - 0 +# asm 1: cmp $0, +jbe ._done + +# qhasm: a = 0 +# asm 1: mov $0,>a=int64#7 +# asm 2: mov $0,>a=%rax +mov $0,%rax + +# qhasm: i = bytes +# asm 1: mov i=int64#4 +# asm 2: mov i=%rcx +mov %r9,%rcx + +# qhasm: while (i) { *out++ = a; --i } +rep stosb + +# qhasm: out -= bytes +# asm 1: sub r11_stack=stack64#1 +# asm 2: movq r11_stack=352(%rsp) +movq %r11,352(%rsp) + +# qhasm: r12_stack = r12_caller +# asm 1: movq r12_stack=stack64#2 +# asm 2: movq r12_stack=360(%rsp) +movq %r12,360(%rsp) + +# qhasm: r13_stack = r13_caller +# asm 1: movq r13_stack=stack64#3 +# asm 2: movq r13_stack=368(%rsp) +movq %r13,368(%rsp) + +# qhasm: r14_stack = r14_caller +# asm 1: movq r14_stack=stack64#4 +# asm 2: movq r14_stack=376(%rsp) +movq %r14,376(%rsp) + +# qhasm: r15_stack = r15_caller +# asm 1: movq r15_stack=stack64#5 +# asm 2: movq r15_stack=384(%rsp) +movq %r15,384(%rsp) + +# qhasm: rbx_stack = rbx_caller +# asm 1: movq rbx_stack=stack64#6 +# asm 2: movq rbx_stack=392(%rsp) +movq %rbx,392(%rsp) + +# qhasm: rbp_stack = rbp_caller +# asm 1: movq rbp_stack=stack64#7 +# asm 2: movq rbp_stack=400(%rsp) +movq %rbp,400(%rsp) + +# qhasm: out = arg1 +# asm 1: mov out=int64#1 +# asm 2: mov out=%rdi +mov %rdi,%rdi + +# qhasm: m = arg2 +# asm 1: mov m=int64#2 +# asm 2: mov m=%rsi +mov %rsi,%rsi + +# qhasm: bytes = arg3 +# asm 1: mov bytes=int64#6 +# asm 2: mov bytes=%r9 +mov %rdx,%r9 + +# qhasm: iv = arg4 +# asm 1: mov iv=int64#3 +# asm 2: mov iv=%rdx +mov %rcx,%rdx + +# qhasm: k = arg5 +# asm 1: mov k=int64#8 +# asm 2: mov k=%r10 +mov %r8,%r10 + +# qhasm: unsigned>? bytes - 0 +# asm 1: cmp $0, +jbe ._done +# comment:fp stack unchanged by fallthrough + +# qhasm: start: +._start: + +# qhasm: in12 = *(uint32 *) (k + 20) +# asm 1: movl 20(in12=int64#4d +# asm 2: movl 20(in12=%ecx +movl 20(%r10),%ecx + +# qhasm: in1 = *(uint32 *) (k + 0) +# asm 1: movl 0(in1=int64#5d +# asm 2: movl 0(in1=%r8d +movl 0(%r10),%r8d + +# qhasm: in6 = *(uint32 *) (iv + 0) +# asm 1: movl 0(in6=int64#7d +# asm 2: movl 0(in6=%eax +movl 0(%rdx),%eax + +# qhasm: in11 = *(uint32 *) (k + 16) +# asm 1: movl 16(in11=int64#9d +# asm 2: movl 16(in11=%r11d +movl 16(%r10),%r11d + +# qhasm: ((uint32 *)&x1)[0] = in12 +# asm 1: movl x1=stack128#1 +# asm 2: movl x1=0(%rsp) +movl %ecx,0(%rsp) + +# qhasm: ((uint32 *)&x1)[1] = in1 +# asm 1: movl in8=int64#4 +# asm 2: mov $0,>in8=%rcx +mov $0,%rcx + +# qhasm: in13 = *(uint32 *) (k + 24) +# asm 1: movl 24(in13=int64#5d +# asm 2: movl 24(in13=%r8d +movl 24(%r10),%r8d + +# qhasm: in2 = *(uint32 *) (k + 4) +# asm 1: movl 4(in2=int64#7d +# asm 2: movl 4(in2=%eax +movl 4(%r10),%eax + +# qhasm: in7 = *(uint32 *) (iv + 4) +# asm 1: movl 4(in7=int64#3d +# asm 2: movl 4(in7=%edx +movl 4(%rdx),%edx + +# qhasm: ((uint32 *)&x2)[0] = in8 +# asm 1: movl x2=stack128#2 +# asm 2: movl x2=16(%rsp) +movl %ecx,16(%rsp) + +# qhasm: ((uint32 *)&x2)[1] = in13 +# asm 1: movl in4=int64#3d +# asm 2: movl 12(in4=%edx +movl 12(%r10),%edx + +# qhasm: in9 = 0 +# asm 1: mov $0,>in9=int64#4 +# asm 2: mov $0,>in9=%rcx +mov $0,%rcx + +# qhasm: in14 = *(uint32 *) (k + 28) +# asm 1: movl 28(in14=int64#5d +# asm 2: movl 28(in14=%r8d +movl 28(%r10),%r8d + +# qhasm: in3 = *(uint32 *) (k + 8) +# asm 1: movl 8(in3=int64#7d +# asm 2: movl 8(in3=%eax +movl 8(%r10),%eax + +# qhasm: ((uint32 *)&x3)[0] = in4 +# asm 1: movl x3=stack128#3 +# asm 2: movl x3=32(%rsp) +movl %edx,32(%rsp) + +# qhasm: ((uint32 *)&x3)[1] = in9 +# asm 1: movl in0=int64#3 +# asm 2: mov $1634760805,>in0=%rdx +mov $1634760805,%rdx + +# qhasm: in5 = 857760878 +# asm 1: mov $857760878,>in5=int64#4 +# asm 2: mov $857760878,>in5=%rcx +mov $857760878,%rcx + +# qhasm: in10 = 2036477234 +# asm 1: mov $2036477234,>in10=int64#5 +# asm 2: mov $2036477234,>in10=%r8 +mov $2036477234,%r8 + +# qhasm: in15 = 1797285236 +# asm 1: mov $1797285236,>in15=int64#7 +# asm 2: mov $1797285236,>in15=%rax +mov $1797285236,%rax + +# qhasm: ((uint32 *)&x0)[0] = in0 +# asm 1: movl x0=stack128#4 +# asm 2: movl x0=48(%rsp) +movl %edx,48(%rsp) + +# qhasm: ((uint32 *)&x0)[1] = in5 +# asm 1: movl z0=int6464#1 +# asm 2: movdqa z0=%xmm0 +movdqa 48(%rsp),%xmm0 + +# qhasm: z5 = z0[1,1,1,1] +# asm 1: pshufd $0x55,z5=int6464#2 +# asm 2: pshufd $0x55,z5=%xmm1 +pshufd $0x55,%xmm0,%xmm1 + +# qhasm: z10 = z0[2,2,2,2] +# asm 1: pshufd $0xaa,z10=int6464#3 +# asm 2: pshufd $0xaa,z10=%xmm2 +pshufd $0xaa,%xmm0,%xmm2 + +# qhasm: z15 = z0[3,3,3,3] +# asm 1: pshufd $0xff,z15=int6464#4 +# asm 2: pshufd $0xff,z15=%xmm3 +pshufd $0xff,%xmm0,%xmm3 + +# qhasm: z0 = z0[0,0,0,0] +# asm 1: pshufd $0x00,z0=int6464#1 +# asm 2: pshufd $0x00,z0=%xmm0 +pshufd $0x00,%xmm0,%xmm0 + +# qhasm: orig5 = z5 +# asm 1: movdqa orig5=stack128#5 +# asm 2: movdqa orig5=64(%rsp) +movdqa %xmm1,64(%rsp) + +# qhasm: orig10 = z10 +# asm 1: movdqa orig10=stack128#6 +# asm 2: movdqa orig10=80(%rsp) +movdqa %xmm2,80(%rsp) + +# qhasm: orig15 = z15 +# asm 1: movdqa orig15=stack128#7 +# asm 2: movdqa orig15=96(%rsp) +movdqa %xmm3,96(%rsp) + +# qhasm: orig0 = z0 +# asm 1: movdqa orig0=stack128#8 +# asm 2: movdqa orig0=112(%rsp) +movdqa %xmm0,112(%rsp) + +# qhasm: z1 = x1 +# asm 1: movdqa z1=int6464#1 +# asm 2: movdqa z1=%xmm0 +movdqa 0(%rsp),%xmm0 + +# qhasm: z6 = z1[2,2,2,2] +# asm 1: pshufd $0xaa,z6=int6464#2 +# asm 2: pshufd $0xaa,z6=%xmm1 +pshufd $0xaa,%xmm0,%xmm1 + +# qhasm: z11 = z1[3,3,3,3] +# asm 1: pshufd $0xff,z11=int6464#3 +# asm 2: pshufd $0xff,z11=%xmm2 +pshufd $0xff,%xmm0,%xmm2 + +# qhasm: z12 = z1[0,0,0,0] +# asm 1: pshufd $0x00,z12=int6464#4 +# asm 2: pshufd $0x00,z12=%xmm3 +pshufd $0x00,%xmm0,%xmm3 + +# qhasm: z1 = z1[1,1,1,1] +# asm 1: pshufd $0x55,z1=int6464#1 +# asm 2: pshufd $0x55,z1=%xmm0 +pshufd $0x55,%xmm0,%xmm0 + +# qhasm: orig6 = z6 +# asm 1: movdqa orig6=stack128#9 +# asm 2: movdqa orig6=128(%rsp) +movdqa %xmm1,128(%rsp) + +# qhasm: orig11 = z11 +# asm 1: movdqa orig11=stack128#10 +# asm 2: movdqa orig11=144(%rsp) +movdqa %xmm2,144(%rsp) + +# qhasm: orig12 = z12 +# asm 1: movdqa orig12=stack128#11 +# asm 2: movdqa orig12=160(%rsp) +movdqa %xmm3,160(%rsp) + +# qhasm: orig1 = z1 +# asm 1: movdqa orig1=stack128#12 +# asm 2: movdqa orig1=176(%rsp) +movdqa %xmm0,176(%rsp) + +# qhasm: z2 = x2 +# asm 1: movdqa z2=int6464#1 +# asm 2: movdqa z2=%xmm0 +movdqa 16(%rsp),%xmm0 + +# qhasm: z7 = z2[3,3,3,3] +# asm 1: pshufd $0xff,z7=int6464#2 +# asm 2: pshufd $0xff,z7=%xmm1 +pshufd $0xff,%xmm0,%xmm1 + +# qhasm: z13 = z2[1,1,1,1] +# asm 1: pshufd $0x55,z13=int6464#3 +# asm 2: pshufd $0x55,z13=%xmm2 +pshufd $0x55,%xmm0,%xmm2 + +# qhasm: z2 = z2[2,2,2,2] +# asm 1: pshufd $0xaa,z2=int6464#1 +# asm 2: pshufd $0xaa,z2=%xmm0 +pshufd $0xaa,%xmm0,%xmm0 + +# qhasm: orig7 = z7 +# asm 1: movdqa orig7=stack128#13 +# asm 2: movdqa orig7=192(%rsp) +movdqa %xmm1,192(%rsp) + +# qhasm: orig13 = z13 +# asm 1: movdqa orig13=stack128#14 +# asm 2: movdqa orig13=208(%rsp) +movdqa %xmm2,208(%rsp) + +# qhasm: orig2 = z2 +# asm 1: movdqa orig2=stack128#15 +# asm 2: movdqa orig2=224(%rsp) +movdqa %xmm0,224(%rsp) + +# qhasm: z3 = x3 +# asm 1: movdqa z3=int6464#1 +# asm 2: movdqa z3=%xmm0 +movdqa 32(%rsp),%xmm0 + +# qhasm: z4 = z3[0,0,0,0] +# asm 1: pshufd $0x00,z4=int6464#2 +# asm 2: pshufd $0x00,z4=%xmm1 +pshufd $0x00,%xmm0,%xmm1 + +# qhasm: z14 = z3[2,2,2,2] +# asm 1: pshufd $0xaa,z14=int6464#3 +# asm 2: pshufd $0xaa,z14=%xmm2 +pshufd $0xaa,%xmm0,%xmm2 + +# qhasm: z3 = z3[3,3,3,3] +# asm 1: pshufd $0xff,z3=int6464#1 +# asm 2: pshufd $0xff,z3=%xmm0 +pshufd $0xff,%xmm0,%xmm0 + +# qhasm: orig4 = z4 +# asm 1: movdqa orig4=stack128#16 +# asm 2: movdqa orig4=240(%rsp) +movdqa %xmm1,240(%rsp) + +# qhasm: orig14 = z14 +# asm 1: movdqa orig14=stack128#17 +# asm 2: movdqa orig14=256(%rsp) +movdqa %xmm2,256(%rsp) + +# qhasm: orig3 = z3 +# asm 1: movdqa orig3=stack128#18 +# asm 2: movdqa orig3=272(%rsp) +movdqa %xmm0,272(%rsp) + +# qhasm: bytesatleast256: +._bytesatleast256: + +# qhasm: in8 = ((uint32 *)&x2)[0] +# asm 1: movl in8=int64#3d +# asm 2: movl in8=%edx +movl 16(%rsp),%edx + +# qhasm: in9 = ((uint32 *)&x3)[1] +# asm 1: movl 4+in9=int64#4d +# asm 2: movl 4+in9=%ecx +movl 4+32(%rsp),%ecx + +# qhasm: ((uint32 *) &orig8)[0] = in8 +# asm 1: movl orig8=stack128#19 +# asm 2: movl orig8=288(%rsp) +movl %edx,288(%rsp) + +# qhasm: ((uint32 *) &orig9)[0] = in9 +# asm 1: movl orig9=stack128#20 +# asm 2: movl orig9=304(%rsp) +movl %ecx,304(%rsp) + +# qhasm: in8 += 1 +# asm 1: add $1,in9=int64#4 +# asm 2: mov in9=%rcx +mov %rdx,%rcx + +# qhasm: (uint64) in9 >>= 32 +# asm 1: shr $32,in9=int64#4 +# asm 2: mov in9=%rcx +mov %rdx,%rcx + +# qhasm: (uint64) in9 >>= 32 +# asm 1: shr $32,in9=int64#4 +# asm 2: mov in9=%rcx +mov %rdx,%rcx + +# qhasm: (uint64) in9 >>= 32 +# asm 1: shr $32,in9=int64#4 +# asm 2: mov in9=%rcx +mov %rdx,%rcx + +# qhasm: (uint64) in9 >>= 32 +# asm 1: shr $32,x2=stack128#2 +# asm 2: movl x2=16(%rsp) +movl %edx,16(%rsp) + +# qhasm: ((uint32 *)&x3)[1] = in9 +# asm 1: movl bytes_backup=stack64#8 +# asm 2: movq bytes_backup=408(%rsp) +movq %r9,408(%rsp) + +# qhasm: i = 20 +# asm 1: mov $20,>i=int64#3 +# asm 2: mov $20,>i=%rdx +mov $20,%rdx + +# qhasm: z5 = orig5 +# asm 1: movdqa z5=int6464#1 +# asm 2: movdqa z5=%xmm0 +movdqa 64(%rsp),%xmm0 + +# qhasm: z10 = orig10 +# asm 1: movdqa z10=int6464#2 +# asm 2: movdqa z10=%xmm1 +movdqa 80(%rsp),%xmm1 + +# qhasm: z15 = orig15 +# asm 1: movdqa z15=int6464#3 +# asm 2: movdqa z15=%xmm2 +movdqa 96(%rsp),%xmm2 + +# qhasm: z14 = orig14 +# asm 1: movdqa z14=int6464#4 +# asm 2: movdqa z14=%xmm3 +movdqa 256(%rsp),%xmm3 + +# qhasm: z3 = orig3 +# asm 1: movdqa z3=int6464#5 +# asm 2: movdqa z3=%xmm4 +movdqa 272(%rsp),%xmm4 + +# qhasm: z6 = orig6 +# asm 1: movdqa z6=int6464#6 +# asm 2: movdqa z6=%xmm5 +movdqa 128(%rsp),%xmm5 + +# qhasm: z11 = orig11 +# asm 1: movdqa z11=int6464#7 +# asm 2: movdqa z11=%xmm6 +movdqa 144(%rsp),%xmm6 + +# qhasm: z1 = orig1 +# asm 1: movdqa z1=int6464#8 +# asm 2: movdqa z1=%xmm7 +movdqa 176(%rsp),%xmm7 + +# qhasm: z7 = orig7 +# asm 1: movdqa z7=int6464#9 +# asm 2: movdqa z7=%xmm8 +movdqa 192(%rsp),%xmm8 + +# qhasm: z13 = orig13 +# asm 1: movdqa z13=int6464#10 +# asm 2: movdqa z13=%xmm9 +movdqa 208(%rsp),%xmm9 + +# qhasm: z2 = orig2 +# asm 1: movdqa z2=int6464#11 +# asm 2: movdqa z2=%xmm10 +movdqa 224(%rsp),%xmm10 + +# qhasm: z9 = orig9 +# asm 1: movdqa z9=int6464#12 +# asm 2: movdqa z9=%xmm11 +movdqa 304(%rsp),%xmm11 + +# qhasm: z0 = orig0 +# asm 1: movdqa z0=int6464#13 +# asm 2: movdqa z0=%xmm12 +movdqa 112(%rsp),%xmm12 + +# qhasm: z12 = orig12 +# asm 1: movdqa z12=int6464#14 +# asm 2: movdqa z12=%xmm13 +movdqa 160(%rsp),%xmm13 + +# qhasm: z4 = orig4 +# asm 1: movdqa z4=int6464#15 +# asm 2: movdqa z4=%xmm14 +movdqa 240(%rsp),%xmm14 + +# qhasm: z8 = orig8 +# asm 1: movdqa z8=int6464#16 +# asm 2: movdqa z8=%xmm15 +movdqa 288(%rsp),%xmm15 + +# qhasm: mainloop1: +._mainloop1: + +# qhasm: z10_stack = z10 +# asm 1: movdqa z10_stack=stack128#21 +# asm 2: movdqa z10_stack=320(%rsp) +movdqa %xmm1,320(%rsp) + +# qhasm: z15_stack = z15 +# asm 1: movdqa z15_stack=stack128#22 +# asm 2: movdqa z15_stack=336(%rsp) +movdqa %xmm2,336(%rsp) + +# qhasm: y4 = z12 +# asm 1: movdqa y4=int6464#2 +# asm 2: movdqa y4=%xmm1 +movdqa %xmm13,%xmm1 + +# qhasm: uint32323232 y4 += z0 +# asm 1: paddd r4=int6464#3 +# asm 2: movdqa r4=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y4 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,y9=int6464#2 +# asm 2: movdqa y9=%xmm1 +movdqa %xmm7,%xmm1 + +# qhasm: uint32323232 y9 += z5 +# asm 1: paddd r9=int6464#3 +# asm 2: movdqa r9=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y9 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,y8=int6464#2 +# asm 2: movdqa y8=%xmm1 +movdqa %xmm12,%xmm1 + +# qhasm: uint32323232 y8 += z4 +# asm 1: paddd r8=int6464#3 +# asm 2: movdqa r8=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y8 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y13=int6464#2 +# asm 2: movdqa y13=%xmm1 +movdqa %xmm0,%xmm1 + +# qhasm: uint32323232 y13 += z9 +# asm 1: paddd r13=int6464#3 +# asm 2: movdqa r13=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y13 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y12=int6464#2 +# asm 2: movdqa y12=%xmm1 +movdqa %xmm14,%xmm1 + +# qhasm: uint32323232 y12 += z8 +# asm 1: paddd r12=int6464#3 +# asm 2: movdqa r12=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y12 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y1=int6464#2 +# asm 2: movdqa y1=%xmm1 +movdqa %xmm11,%xmm1 + +# qhasm: uint32323232 y1 += z13 +# asm 1: paddd r1=int6464#3 +# asm 2: movdqa r1=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y1 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y0=int6464#2 +# asm 2: movdqa y0=%xmm1 +movdqa %xmm15,%xmm1 + +# qhasm: uint32323232 y0 += z12 +# asm 1: paddd r0=int6464#3 +# asm 2: movdqa r0=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y0 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,z10=int6464#2 +# asm 2: movdqa z10=%xmm1 +movdqa 320(%rsp),%xmm1 + +# qhasm: z0_stack = z0 +# asm 1: movdqa z0_stack=stack128#21 +# asm 2: movdqa z0_stack=320(%rsp) +movdqa %xmm12,320(%rsp) + +# qhasm: y5 = z13 +# asm 1: movdqa y5=int6464#3 +# asm 2: movdqa y5=%xmm2 +movdqa %xmm9,%xmm2 + +# qhasm: uint32323232 y5 += z1 +# asm 1: paddd r5=int6464#13 +# asm 2: movdqa r5=%xmm12 +movdqa %xmm2,%xmm12 + +# qhasm: uint32323232 y5 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,y14=int6464#3 +# asm 2: movdqa y14=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: uint32323232 y14 += z10 +# asm 1: paddd r14=int6464#13 +# asm 2: movdqa r14=%xmm12 +movdqa %xmm2,%xmm12 + +# qhasm: uint32323232 y14 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,z15=int6464#3 +# asm 2: movdqa z15=%xmm2 +movdqa 336(%rsp),%xmm2 + +# qhasm: z5_stack = z5 +# asm 1: movdqa z5_stack=stack128#22 +# asm 2: movdqa z5_stack=336(%rsp) +movdqa %xmm0,336(%rsp) + +# qhasm: y3 = z11 +# asm 1: movdqa y3=int6464#1 +# asm 2: movdqa y3=%xmm0 +movdqa %xmm6,%xmm0 + +# qhasm: uint32323232 y3 += z15 +# asm 1: paddd r3=int6464#13 +# asm 2: movdqa r3=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y3 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,y2=int6464#1 +# asm 2: movdqa y2=%xmm0 +movdqa %xmm1,%xmm0 + +# qhasm: uint32323232 y2 += z14 +# asm 1: paddd r2=int6464#13 +# asm 2: movdqa r2=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y2 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y7=int6464#1 +# asm 2: movdqa y7=%xmm0 +movdqa %xmm2,%xmm0 + +# qhasm: uint32323232 y7 += z3 +# asm 1: paddd r7=int6464#13 +# asm 2: movdqa r7=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y7 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y6=int6464#1 +# asm 2: movdqa y6=%xmm0 +movdqa %xmm3,%xmm0 + +# qhasm: uint32323232 y6 += z2 +# asm 1: paddd r6=int6464#13 +# asm 2: movdqa r6=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y6 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y11=int6464#1 +# asm 2: movdqa y11=%xmm0 +movdqa %xmm4,%xmm0 + +# qhasm: uint32323232 y11 += z7 +# asm 1: paddd r11=int6464#13 +# asm 2: movdqa r11=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y11 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y10=int6464#1 +# asm 2: movdqa y10=%xmm0 +movdqa %xmm10,%xmm0 + +# qhasm: uint32323232 y10 += z6 +# asm 1: paddd r10=int6464#13 +# asm 2: movdqa r10=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y10 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,z0=int6464#1 +# asm 2: movdqa z0=%xmm0 +movdqa 320(%rsp),%xmm0 + +# qhasm: z10_stack = z10 +# asm 1: movdqa z10_stack=stack128#21 +# asm 2: movdqa z10_stack=320(%rsp) +movdqa %xmm1,320(%rsp) + +# qhasm: y1 = z3 +# asm 1: movdqa y1=int6464#2 +# asm 2: movdqa y1=%xmm1 +movdqa %xmm4,%xmm1 + +# qhasm: uint32323232 y1 += z0 +# asm 1: paddd r1=int6464#13 +# asm 2: movdqa r1=%xmm12 +movdqa %xmm1,%xmm12 + +# qhasm: uint32323232 y1 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,y15=int6464#2 +# asm 2: movdqa y15=%xmm1 +movdqa %xmm8,%xmm1 + +# qhasm: uint32323232 y15 += z11 +# asm 1: paddd r15=int6464#13 +# asm 2: movdqa r15=%xmm12 +movdqa %xmm1,%xmm12 + +# qhasm: uint32323232 y15 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,z5=int6464#13 +# asm 2: movdqa z5=%xmm12 +movdqa 336(%rsp),%xmm12 + +# qhasm: z15_stack = z15 +# asm 1: movdqa z15_stack=stack128#22 +# asm 2: movdqa z15_stack=336(%rsp) +movdqa %xmm2,336(%rsp) + +# qhasm: y6 = z4 +# asm 1: movdqa y6=int6464#2 +# asm 2: movdqa y6=%xmm1 +movdqa %xmm14,%xmm1 + +# qhasm: uint32323232 y6 += z5 +# asm 1: paddd r6=int6464#3 +# asm 2: movdqa r6=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y6 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,y2=int6464#2 +# asm 2: movdqa y2=%xmm1 +movdqa %xmm0,%xmm1 + +# qhasm: uint32323232 y2 += z1 +# asm 1: paddd r2=int6464#3 +# asm 2: movdqa r2=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y2 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y7=int6464#2 +# asm 2: movdqa y7=%xmm1 +movdqa %xmm12,%xmm1 + +# qhasm: uint32323232 y7 += z6 +# asm 1: paddd r7=int6464#3 +# asm 2: movdqa r7=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y7 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y3=int6464#2 +# asm 2: movdqa y3=%xmm1 +movdqa %xmm7,%xmm1 + +# qhasm: uint32323232 y3 += z2 +# asm 1: paddd r3=int6464#3 +# asm 2: movdqa r3=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y3 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y4=int6464#2 +# asm 2: movdqa y4=%xmm1 +movdqa %xmm5,%xmm1 + +# qhasm: uint32323232 y4 += z7 +# asm 1: paddd r4=int6464#3 +# asm 2: movdqa r4=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y4 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y0=int6464#2 +# asm 2: movdqa y0=%xmm1 +movdqa %xmm10,%xmm1 + +# qhasm: uint32323232 y0 += z3 +# asm 1: paddd r0=int6464#3 +# asm 2: movdqa r0=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y0 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,z10=int6464#2 +# asm 2: movdqa z10=%xmm1 +movdqa 320(%rsp),%xmm1 + +# qhasm: z0_stack = z0 +# asm 1: movdqa z0_stack=stack128#21 +# asm 2: movdqa z0_stack=320(%rsp) +movdqa %xmm0,320(%rsp) + +# qhasm: y5 = z7 +# asm 1: movdqa y5=int6464#1 +# asm 2: movdqa y5=%xmm0 +movdqa %xmm8,%xmm0 + +# qhasm: uint32323232 y5 += z4 +# asm 1: paddd r5=int6464#3 +# asm 2: movdqa r5=%xmm2 +movdqa %xmm0,%xmm2 + +# qhasm: uint32323232 y5 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,y11=int6464#1 +# asm 2: movdqa y11=%xmm0 +movdqa %xmm11,%xmm0 + +# qhasm: uint32323232 y11 += z10 +# asm 1: paddd r11=int6464#3 +# asm 2: movdqa r11=%xmm2 +movdqa %xmm0,%xmm2 + +# qhasm: uint32323232 y11 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,z15=int6464#3 +# asm 2: movdqa z15=%xmm2 +movdqa 336(%rsp),%xmm2 + +# qhasm: z5_stack = z5 +# asm 1: movdqa z5_stack=stack128#22 +# asm 2: movdqa z5_stack=336(%rsp) +movdqa %xmm12,336(%rsp) + +# qhasm: y12 = z14 +# asm 1: movdqa y12=int6464#1 +# asm 2: movdqa y12=%xmm0 +movdqa %xmm3,%xmm0 + +# qhasm: uint32323232 y12 += z15 +# asm 1: paddd r12=int6464#13 +# asm 2: movdqa r12=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y12 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,y8=int6464#1 +# asm 2: movdqa y8=%xmm0 +movdqa %xmm1,%xmm0 + +# qhasm: uint32323232 y8 += z11 +# asm 1: paddd r8=int6464#13 +# asm 2: movdqa r8=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y8 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y13=int6464#1 +# asm 2: movdqa y13=%xmm0 +movdqa %xmm2,%xmm0 + +# qhasm: uint32323232 y13 += z12 +# asm 1: paddd r13=int6464#13 +# asm 2: movdqa r13=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y13 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y9=int6464#1 +# asm 2: movdqa y9=%xmm0 +movdqa %xmm6,%xmm0 + +# qhasm: uint32323232 y9 += z8 +# asm 1: paddd r9=int6464#13 +# asm 2: movdqa r9=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y9 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y14=int6464#1 +# asm 2: movdqa y14=%xmm0 +movdqa %xmm13,%xmm0 + +# qhasm: uint32323232 y14 += z13 +# asm 1: paddd r14=int6464#13 +# asm 2: movdqa r14=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y14 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y10=int6464#1 +# asm 2: movdqa y10=%xmm0 +movdqa %xmm15,%xmm0 + +# qhasm: uint32323232 y10 += z9 +# asm 1: paddd r10=int6464#13 +# asm 2: movdqa r10=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y10 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,y15=int6464#1 +# asm 2: movdqa y15=%xmm0 +movdqa %xmm9,%xmm0 + +# qhasm: uint32323232 y15 += z14 +# asm 1: paddd r15=int6464#13 +# asm 2: movdqa r15=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y15 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,z0=int6464#13 +# asm 2: movdqa z0=%xmm12 +movdqa 320(%rsp),%xmm12 + +# qhasm: z5 = z5_stack +# asm 1: movdqa z5=int6464#1 +# asm 2: movdqa z5=%xmm0 +movdqa 336(%rsp),%xmm0 + +# qhasm: unsigned>? i -= 2 +# asm 1: sub $2, +ja ._mainloop1 + +# qhasm: uint32323232 z0 += orig0 +# asm 1: paddd in0=int64#3 +# asm 2: movd in0=%rdx +movd %xmm12,%rdx + +# qhasm: in1 = z1 +# asm 1: movd in1=int64#4 +# asm 2: movd in1=%rcx +movd %xmm7,%rcx + +# qhasm: in2 = z2 +# asm 1: movd in2=int64#5 +# asm 2: movd in2=%r8 +movd %xmm10,%r8 + +# qhasm: in3 = z3 +# asm 1: movd in3=int64#6 +# asm 2: movd in3=%r9 +movd %xmm4,%r9 + +# qhasm: z0 <<<= 96 +# asm 1: pshufd $0x39,in0=int64#3 +# asm 2: movd in0=%rdx +movd %xmm12,%rdx + +# qhasm: in1 = z1 +# asm 1: movd in1=int64#4 +# asm 2: movd in1=%rcx +movd %xmm7,%rcx + +# qhasm: in2 = z2 +# asm 1: movd in2=int64#5 +# asm 2: movd in2=%r8 +movd %xmm10,%r8 + +# qhasm: in3 = z3 +# asm 1: movd in3=int64#6 +# asm 2: movd in3=%r9 +movd %xmm4,%r9 + +# qhasm: z0 <<<= 96 +# asm 1: pshufd $0x39,in0=int64#3 +# asm 2: movd in0=%rdx +movd %xmm12,%rdx + +# qhasm: in1 = z1 +# asm 1: movd in1=int64#4 +# asm 2: movd in1=%rcx +movd %xmm7,%rcx + +# qhasm: in2 = z2 +# asm 1: movd in2=int64#5 +# asm 2: movd in2=%r8 +movd %xmm10,%r8 + +# qhasm: in3 = z3 +# asm 1: movd in3=int64#6 +# asm 2: movd in3=%r9 +movd %xmm4,%r9 + +# qhasm: z0 <<<= 96 +# asm 1: pshufd $0x39,in0=int64#3 +# asm 2: movd in0=%rdx +movd %xmm12,%rdx + +# qhasm: in1 = z1 +# asm 1: movd in1=int64#4 +# asm 2: movd in1=%rcx +movd %xmm7,%rcx + +# qhasm: in2 = z2 +# asm 1: movd in2=int64#5 +# asm 2: movd in2=%r8 +movd %xmm10,%r8 + +# qhasm: in3 = z3 +# asm 1: movd in3=int64#6 +# asm 2: movd in3=%r9 +movd %xmm4,%r9 + +# qhasm: (uint32) in0 ^= *(uint32 *) (m + 192) +# asm 1: xorl 192(in4=int64#3 +# asm 2: movd in4=%rdx +movd %xmm14,%rdx + +# qhasm: in5 = z5 +# asm 1: movd in5=int64#4 +# asm 2: movd in5=%rcx +movd %xmm0,%rcx + +# qhasm: in6 = z6 +# asm 1: movd in6=int64#5 +# asm 2: movd in6=%r8 +movd %xmm5,%r8 + +# qhasm: in7 = z7 +# asm 1: movd in7=int64#6 +# asm 2: movd in7=%r9 +movd %xmm8,%r9 + +# qhasm: z4 <<<= 96 +# asm 1: pshufd $0x39,in4=int64#3 +# asm 2: movd in4=%rdx +movd %xmm14,%rdx + +# qhasm: in5 = z5 +# asm 1: movd in5=int64#4 +# asm 2: movd in5=%rcx +movd %xmm0,%rcx + +# qhasm: in6 = z6 +# asm 1: movd in6=int64#5 +# asm 2: movd in6=%r8 +movd %xmm5,%r8 + +# qhasm: in7 = z7 +# asm 1: movd in7=int64#6 +# asm 2: movd in7=%r9 +movd %xmm8,%r9 + +# qhasm: z4 <<<= 96 +# asm 1: pshufd $0x39,in4=int64#3 +# asm 2: movd in4=%rdx +movd %xmm14,%rdx + +# qhasm: in5 = z5 +# asm 1: movd in5=int64#4 +# asm 2: movd in5=%rcx +movd %xmm0,%rcx + +# qhasm: in6 = z6 +# asm 1: movd in6=int64#5 +# asm 2: movd in6=%r8 +movd %xmm5,%r8 + +# qhasm: in7 = z7 +# asm 1: movd in7=int64#6 +# asm 2: movd in7=%r9 +movd %xmm8,%r9 + +# qhasm: z4 <<<= 96 +# asm 1: pshufd $0x39,in4=int64#3 +# asm 2: movd in4=%rdx +movd %xmm14,%rdx + +# qhasm: in5 = z5 +# asm 1: movd in5=int64#4 +# asm 2: movd in5=%rcx +movd %xmm0,%rcx + +# qhasm: in6 = z6 +# asm 1: movd in6=int64#5 +# asm 2: movd in6=%r8 +movd %xmm5,%r8 + +# qhasm: in7 = z7 +# asm 1: movd in7=int64#6 +# asm 2: movd in7=%r9 +movd %xmm8,%r9 + +# qhasm: (uint32) in4 ^= *(uint32 *) (m + 208) +# asm 1: xorl 208(in8=int64#3 +# asm 2: movd in8=%rdx +movd %xmm15,%rdx + +# qhasm: in9 = z9 +# asm 1: movd in9=int64#4 +# asm 2: movd in9=%rcx +movd %xmm11,%rcx + +# qhasm: in10 = z10 +# asm 1: movd in10=int64#5 +# asm 2: movd in10=%r8 +movd %xmm1,%r8 + +# qhasm: in11 = z11 +# asm 1: movd in11=int64#6 +# asm 2: movd in11=%r9 +movd %xmm6,%r9 + +# qhasm: z8 <<<= 96 +# asm 1: pshufd $0x39,in8=int64#3 +# asm 2: movd in8=%rdx +movd %xmm15,%rdx + +# qhasm: in9 = z9 +# asm 1: movd in9=int64#4 +# asm 2: movd in9=%rcx +movd %xmm11,%rcx + +# qhasm: in10 = z10 +# asm 1: movd in10=int64#5 +# asm 2: movd in10=%r8 +movd %xmm1,%r8 + +# qhasm: in11 = z11 +# asm 1: movd in11=int64#6 +# asm 2: movd in11=%r9 +movd %xmm6,%r9 + +# qhasm: z8 <<<= 96 +# asm 1: pshufd $0x39,in8=int64#3 +# asm 2: movd in8=%rdx +movd %xmm15,%rdx + +# qhasm: in9 = z9 +# asm 1: movd in9=int64#4 +# asm 2: movd in9=%rcx +movd %xmm11,%rcx + +# qhasm: in10 = z10 +# asm 1: movd in10=int64#5 +# asm 2: movd in10=%r8 +movd %xmm1,%r8 + +# qhasm: in11 = z11 +# asm 1: movd in11=int64#6 +# asm 2: movd in11=%r9 +movd %xmm6,%r9 + +# qhasm: z8 <<<= 96 +# asm 1: pshufd $0x39,in8=int64#3 +# asm 2: movd in8=%rdx +movd %xmm15,%rdx + +# qhasm: in9 = z9 +# asm 1: movd in9=int64#4 +# asm 2: movd in9=%rcx +movd %xmm11,%rcx + +# qhasm: in10 = z10 +# asm 1: movd in10=int64#5 +# asm 2: movd in10=%r8 +movd %xmm1,%r8 + +# qhasm: in11 = z11 +# asm 1: movd in11=int64#6 +# asm 2: movd in11=%r9 +movd %xmm6,%r9 + +# qhasm: (uint32) in8 ^= *(uint32 *) (m + 224) +# asm 1: xorl 224(in12=int64#3 +# asm 2: movd in12=%rdx +movd %xmm13,%rdx + +# qhasm: in13 = z13 +# asm 1: movd in13=int64#4 +# asm 2: movd in13=%rcx +movd %xmm9,%rcx + +# qhasm: in14 = z14 +# asm 1: movd in14=int64#5 +# asm 2: movd in14=%r8 +movd %xmm3,%r8 + +# qhasm: in15 = z15 +# asm 1: movd in15=int64#6 +# asm 2: movd in15=%r9 +movd %xmm2,%r9 + +# qhasm: z12 <<<= 96 +# asm 1: pshufd $0x39,in12=int64#3 +# asm 2: movd in12=%rdx +movd %xmm13,%rdx + +# qhasm: in13 = z13 +# asm 1: movd in13=int64#4 +# asm 2: movd in13=%rcx +movd %xmm9,%rcx + +# qhasm: in14 = z14 +# asm 1: movd in14=int64#5 +# asm 2: movd in14=%r8 +movd %xmm3,%r8 + +# qhasm: in15 = z15 +# asm 1: movd in15=int64#6 +# asm 2: movd in15=%r9 +movd %xmm2,%r9 + +# qhasm: z12 <<<= 96 +# asm 1: pshufd $0x39,in12=int64#3 +# asm 2: movd in12=%rdx +movd %xmm13,%rdx + +# qhasm: in13 = z13 +# asm 1: movd in13=int64#4 +# asm 2: movd in13=%rcx +movd %xmm9,%rcx + +# qhasm: in14 = z14 +# asm 1: movd in14=int64#5 +# asm 2: movd in14=%r8 +movd %xmm3,%r8 + +# qhasm: in15 = z15 +# asm 1: movd in15=int64#6 +# asm 2: movd in15=%r9 +movd %xmm2,%r9 + +# qhasm: z12 <<<= 96 +# asm 1: pshufd $0x39,in12=int64#3 +# asm 2: movd in12=%rdx +movd %xmm13,%rdx + +# qhasm: in13 = z13 +# asm 1: movd in13=int64#4 +# asm 2: movd in13=%rcx +movd %xmm9,%rcx + +# qhasm: in14 = z14 +# asm 1: movd in14=int64#5 +# asm 2: movd in14=%r8 +movd %xmm3,%r8 + +# qhasm: in15 = z15 +# asm 1: movd in15=int64#6 +# asm 2: movd in15=%r9 +movd %xmm2,%r9 + +# qhasm: (uint32) in12 ^= *(uint32 *) (m + 240) +# asm 1: xorl 240(bytes=int64#6 +# asm 2: movq bytes=%r9 +movq 408(%rsp),%r9 + +# qhasm: bytes -= 256 +# asm 1: sub $256,? bytes - 0 +# asm 1: cmp $0, +jbe ._done +# comment:fp stack unchanged by fallthrough + +# qhasm: bytesbetween1and255: +._bytesbetween1and255: + +# qhasm: unsignedctarget=int64#3 +# asm 2: mov ctarget=%rdx +mov %rdi,%rdx + +# qhasm: out = &tmp +# asm 1: leaq out=int64#1 +# asm 2: leaq out=%rdi +leaq 416(%rsp),%rdi + +# qhasm: i = bytes +# asm 1: mov i=int64#4 +# asm 2: mov i=%rcx +mov %r9,%rcx + +# qhasm: while (i) { *out++ = *m++; --i } +rep movsb + +# qhasm: out = &tmp +# asm 1: leaq out=int64#1 +# asm 2: leaq out=%rdi +leaq 416(%rsp),%rdi + +# qhasm: m = &tmp +# asm 1: leaq m=int64#2 +# asm 2: leaq m=%rsi +leaq 416(%rsp),%rsi +# comment:fp stack unchanged by fallthrough + +# qhasm: nocopy: +._nocopy: + +# qhasm: bytes_backup = bytes +# asm 1: movq bytes_backup=stack64#8 +# asm 2: movq bytes_backup=408(%rsp) +movq %r9,408(%rsp) + +# qhasm: diag0 = x0 +# asm 1: movdqa diag0=int6464#1 +# asm 2: movdqa diag0=%xmm0 +movdqa 48(%rsp),%xmm0 + +# qhasm: diag1 = x1 +# asm 1: movdqa diag1=int6464#2 +# asm 2: movdqa diag1=%xmm1 +movdqa 0(%rsp),%xmm1 + +# qhasm: diag2 = x2 +# asm 1: movdqa diag2=int6464#3 +# asm 2: movdqa diag2=%xmm2 +movdqa 16(%rsp),%xmm2 + +# qhasm: diag3 = x3 +# asm 1: movdqa diag3=int6464#4 +# asm 2: movdqa diag3=%xmm3 +movdqa 32(%rsp),%xmm3 + +# qhasm: a0 = diag1 +# asm 1: movdqa a0=int6464#5 +# asm 2: movdqa a0=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: i = 20 +# asm 1: mov $20,>i=int64#4 +# asm 2: mov $20,>i=%rcx +mov $20,%rcx + +# qhasm: mainloop2: +._mainloop2: + +# qhasm: uint32323232 a0 += diag0 +# asm 1: paddd a1=int6464#6 +# asm 2: movdqa a1=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: b0 = a0 +# asm 1: movdqa b0=int6464#7 +# asm 2: movdqa b0=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a0 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,a2=int6464#5 +# asm 2: movdqa a2=%xmm4 +movdqa %xmm3,%xmm4 + +# qhasm: b1 = a1 +# asm 1: movdqa b1=int6464#7 +# asm 2: movdqa b1=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a1 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,a3=int6464#6 +# asm 2: movdqa a3=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: b2 = a2 +# asm 1: movdqa b2=int6464#7 +# asm 2: movdqa b2=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a2 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,a4=int6464#5 +# asm 2: movdqa a4=%xmm4 +movdqa %xmm3,%xmm4 + +# qhasm: b3 = a3 +# asm 1: movdqa b3=int6464#7 +# asm 2: movdqa b3=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a3 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,a5=int6464#6 +# asm 2: movdqa a5=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: b4 = a4 +# asm 1: movdqa b4=int6464#7 +# asm 2: movdqa b4=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a4 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,a6=int6464#5 +# asm 2: movdqa a6=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: b5 = a5 +# asm 1: movdqa b5=int6464#7 +# asm 2: movdqa b5=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a5 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,a7=int6464#6 +# asm 2: movdqa a7=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: b6 = a6 +# asm 1: movdqa b6=int6464#7 +# asm 2: movdqa b6=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a6 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,a0=int6464#5 +# asm 2: movdqa a0=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: b7 = a7 +# asm 1: movdqa b7=int6464#7 +# asm 2: movdqa b7=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a7 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,a1=int6464#6 +# asm 2: movdqa a1=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: b0 = a0 +# asm 1: movdqa b0=int6464#7 +# asm 2: movdqa b0=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a0 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,a2=int6464#5 +# asm 2: movdqa a2=%xmm4 +movdqa %xmm3,%xmm4 + +# qhasm: b1 = a1 +# asm 1: movdqa b1=int6464#7 +# asm 2: movdqa b1=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a1 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,a3=int6464#6 +# asm 2: movdqa a3=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: b2 = a2 +# asm 1: movdqa b2=int6464#7 +# asm 2: movdqa b2=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a2 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,a4=int6464#5 +# asm 2: movdqa a4=%xmm4 +movdqa %xmm3,%xmm4 + +# qhasm: b3 = a3 +# asm 1: movdqa b3=int6464#7 +# asm 2: movdqa b3=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a3 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,a5=int6464#6 +# asm 2: movdqa a5=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: b4 = a4 +# asm 1: movdqa b4=int6464#7 +# asm 2: movdqa b4=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a4 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,a6=int6464#5 +# asm 2: movdqa a6=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: b5 = a5 +# asm 1: movdqa b5=int6464#7 +# asm 2: movdqa b5=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a5 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,a7=int6464#6 +# asm 2: movdqa a7=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: b6 = a6 +# asm 1: movdqa b6=int6464#7 +# asm 2: movdqa b6=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a6 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,? i -= 4 +# asm 1: sub $4,a0=int6464#5 +# asm 2: movdqa a0=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: b7 = a7 +# asm 1: movdqa b7=int6464#7 +# asm 2: movdqa b7=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a7 <<= 18 +# asm 1: pslld $18,b0=int6464#8,>b0=int6464#8 +# asm 2: pxor >b0=%xmm7,>b0=%xmm7 +pxor %xmm7,%xmm7 + +# qhasm: uint32323232 b7 >>= 14 +# asm 1: psrld $14, +ja ._mainloop2 + +# qhasm: uint32323232 diag0 += x0 +# asm 1: paddd in0=int64#4 +# asm 2: movd in0=%rcx +movd %xmm0,%rcx + +# qhasm: in12 = diag1 +# asm 1: movd in12=int64#5 +# asm 2: movd in12=%r8 +movd %xmm1,%r8 + +# qhasm: in8 = diag2 +# asm 1: movd in8=int64#6 +# asm 2: movd in8=%r9 +movd %xmm2,%r9 + +# qhasm: in4 = diag3 +# asm 1: movd in4=int64#7 +# asm 2: movd in4=%rax +movd %xmm3,%rax + +# qhasm: diag0 <<<= 96 +# asm 1: pshufd $0x39,in5=int64#4 +# asm 2: movd in5=%rcx +movd %xmm0,%rcx + +# qhasm: in1 = diag1 +# asm 1: movd in1=int64#5 +# asm 2: movd in1=%r8 +movd %xmm1,%r8 + +# qhasm: in13 = diag2 +# asm 1: movd in13=int64#6 +# asm 2: movd in13=%r9 +movd %xmm2,%r9 + +# qhasm: in9 = diag3 +# asm 1: movd in9=int64#7 +# asm 2: movd in9=%rax +movd %xmm3,%rax + +# qhasm: diag0 <<<= 96 +# asm 1: pshufd $0x39,in10=int64#4 +# asm 2: movd in10=%rcx +movd %xmm0,%rcx + +# qhasm: in6 = diag1 +# asm 1: movd in6=int64#5 +# asm 2: movd in6=%r8 +movd %xmm1,%r8 + +# qhasm: in2 = diag2 +# asm 1: movd in2=int64#6 +# asm 2: movd in2=%r9 +movd %xmm2,%r9 + +# qhasm: in14 = diag3 +# asm 1: movd in14=int64#7 +# asm 2: movd in14=%rax +movd %xmm3,%rax + +# qhasm: diag0 <<<= 96 +# asm 1: pshufd $0x39,in15=int64#4 +# asm 2: movd in15=%rcx +movd %xmm0,%rcx + +# qhasm: in11 = diag1 +# asm 1: movd in11=int64#5 +# asm 2: movd in11=%r8 +movd %xmm1,%r8 + +# qhasm: in7 = diag2 +# asm 1: movd in7=int64#6 +# asm 2: movd in7=%r9 +movd %xmm2,%r9 + +# qhasm: in3 = diag3 +# asm 1: movd in3=int64#7 +# asm 2: movd in3=%rax +movd %xmm3,%rax + +# qhasm: (uint32) in15 ^= *(uint32 *) (m + 60) +# asm 1: xorl 60(bytes=int64#6 +# asm 2: movq bytes=%r9 +movq 408(%rsp),%r9 + +# qhasm: in8 = ((uint32 *)&x2)[0] +# asm 1: movl in8=int64#4d +# asm 2: movl in8=%ecx +movl 16(%rsp),%ecx + +# qhasm: in9 = ((uint32 *)&x3)[1] +# asm 1: movl 4+in9=int64#5d +# asm 2: movl 4+in9=%r8d +movl 4+32(%rsp),%r8d + +# qhasm: in8 += 1 +# asm 1: add $1,in9=int64#5 +# asm 2: mov in9=%r8 +mov %rcx,%r8 + +# qhasm: (uint64) in9 >>= 32 +# asm 1: shr $32,x2=stack128#2 +# asm 2: movl x2=16(%rsp) +movl %ecx,16(%rsp) + +# qhasm: ((uint32 *)&x3)[1] = in9 +# asm 1: movl ? unsigned +ja ._bytesatleast65 +# comment:fp stack unchanged by jump + +# qhasm: goto bytesatleast64 if !unsigned< +jae ._bytesatleast64 + +# qhasm: m = out +# asm 1: mov m=int64#2 +# asm 2: mov m=%rsi +mov %rdi,%rsi + +# qhasm: out = ctarget +# asm 1: mov out=int64#1 +# asm 2: mov out=%rdi +mov %rdx,%rdi + +# qhasm: i = bytes +# asm 1: mov i=int64#4 +# asm 2: mov i=%rcx +mov %r9,%rcx + +# qhasm: while (i) { *out++ = *m++; --i } +rep movsb +# comment:fp stack unchanged by fallthrough + +# qhasm: bytesatleast64: +._bytesatleast64: +# comment:fp stack unchanged by fallthrough + +# qhasm: done: +._done: + +# qhasm: r11_caller = r11_stack +# asm 1: movq r11_caller=int64#9 +# asm 2: movq r11_caller=%r11 +movq 352(%rsp),%r11 + +# qhasm: r12_caller = r12_stack +# asm 1: movq r12_caller=int64#10 +# asm 2: movq r12_caller=%r12 +movq 360(%rsp),%r12 + +# qhasm: r13_caller = r13_stack +# asm 1: movq r13_caller=int64#11 +# asm 2: movq r13_caller=%r13 +movq 368(%rsp),%r13 + +# qhasm: r14_caller = r14_stack +# asm 1: movq r14_caller=int64#12 +# asm 2: movq r14_caller=%r14 +movq 376(%rsp),%r14 + +# qhasm: r15_caller = r15_stack +# asm 1: movq r15_caller=int64#13 +# asm 2: movq r15_caller=%r15 +movq 384(%rsp),%r15 + +# qhasm: rbx_caller = rbx_stack +# asm 1: movq rbx_caller=int64#14 +# asm 2: movq rbx_caller=%rbx +movq 392(%rsp),%rbx + +# qhasm: rbp_caller = rbp_stack +# asm 1: movq rbp_caller=int64#15 +# asm 2: movq rbp_caller=%rbp +movq 400(%rsp),%rbp + +# qhasm: leave +add %r11,%rsp +xor %rax,%rax +xor %rdx,%rdx +ret + +# qhasm: bytesatleast65: +._bytesatleast65: + +# qhasm: bytes -= 64 +# asm 1: sub $64,= 64) { + crypto_core_salsa20(c,in,k,sigma); + + u = 1; + for (i = 8;i < 16;++i) { + u += (unsigned int) in[i]; + in[i] = u; + u >>= 8; + } + + clen -= 64; + c += 64; + } + + if (clen) { + crypto_core_salsa20(block,in,k,sigma); + for (i = 0;i < clen;++i) c[i] = block[i]; + } + return 0; +} diff --git a/nacl/crypto_stream/salsa20/ref/xor.c b/nacl/crypto_stream/salsa20/ref/xor.c new file mode 100644 index 00000000..11c7e9f0 --- /dev/null +++ b/nacl/crypto_stream/salsa20/ref/xor.c @@ -0,0 +1,52 @@ +/* +version 20080913 +D. J. Bernstein +Public domain. +*/ + +#include "crypto_core_salsa20.h" +#include "crypto_stream.h" + +typedef unsigned int uint32; + +static const unsigned char sigma[16] = "expand 32-byte k"; + +int crypto_stream_xor( + unsigned char *c, + const unsigned char *m,unsigned long long mlen, + const unsigned char *n, + const unsigned char *k +) +{ + unsigned char in[16]; + unsigned char block[64]; + int i; + unsigned int u; + + if (!mlen) return 0; + + for (i = 0;i < 8;++i) in[i] = n[i]; + for (i = 8;i < 16;++i) in[i] = 0; + + while (mlen >= 64) { + crypto_core_salsa20(block,in,k,sigma); + for (i = 0;i < 64;++i) c[i] = m[i] ^ block[i]; + + u = 1; + for (i = 8;i < 16;++i) { + u += (unsigned int) in[i]; + in[i] = u; + u >>= 8; + } + + mlen -= 64; + c += 64; + m += 64; + } + + if (mlen) { + crypto_core_salsa20(block,in,k,sigma); + for (i = 0;i < mlen;++i) c[i] = m[i] ^ block[i]; + } + return 0; +} diff --git a/nacl/crypto_stream/salsa20/used b/nacl/crypto_stream/salsa20/used new file mode 100644 index 00000000..e69de29b diff --git a/nacl/crypto_stream/salsa20/x86_xmm5/api.h b/nacl/crypto_stream/salsa20/x86_xmm5/api.h new file mode 100644 index 00000000..c2b18461 --- /dev/null +++ b/nacl/crypto_stream/salsa20/x86_xmm5/api.h @@ -0,0 +1,2 @@ +#define CRYPTO_KEYBYTES 32 +#define CRYPTO_NONCEBYTES 8 diff --git a/nacl/crypto_stream/salsa20/x86_xmm5/implementors b/nacl/crypto_stream/salsa20/x86_xmm5/implementors new file mode 100644 index 00000000..f6fb3c73 --- /dev/null +++ b/nacl/crypto_stream/salsa20/x86_xmm5/implementors @@ -0,0 +1 @@ +Daniel J. Bernstein diff --git a/nacl/crypto_stream/salsa20/x86_xmm5/stream.s b/nacl/crypto_stream/salsa20/x86_xmm5/stream.s new file mode 100644 index 00000000..9e32ea43 --- /dev/null +++ b/nacl/crypto_stream/salsa20/x86_xmm5/stream.s @@ -0,0 +1,5078 @@ + +# qhasm: int32 a + +# qhasm: stack32 arg1 + +# qhasm: stack32 arg2 + +# qhasm: stack32 arg3 + +# qhasm: stack32 arg4 + +# qhasm: stack32 arg5 + +# qhasm: stack32 arg6 + +# qhasm: input arg1 + +# qhasm: input arg2 + +# qhasm: input arg3 + +# qhasm: input arg4 + +# qhasm: input arg5 + +# qhasm: input arg6 + +# qhasm: int32 eax + +# qhasm: int32 ebx + +# qhasm: int32 esi + +# qhasm: int32 edi + +# qhasm: int32 ebp + +# qhasm: caller eax + +# qhasm: caller ebx + +# qhasm: caller esi + +# qhasm: caller edi + +# qhasm: caller ebp + +# qhasm: int32 k + +# qhasm: int32 kbits + +# qhasm: int32 iv + +# qhasm: int32 i + +# qhasm: stack128 x0 + +# qhasm: stack128 x1 + +# qhasm: stack128 x2 + +# qhasm: stack128 x3 + +# qhasm: int32 m + +# qhasm: stack32 out_stack + +# qhasm: int32 out + +# qhasm: stack32 bytes_stack + +# qhasm: int32 bytes + +# qhasm: stack32 eax_stack + +# qhasm: stack32 ebx_stack + +# qhasm: stack32 esi_stack + +# qhasm: stack32 edi_stack + +# qhasm: stack32 ebp_stack + +# qhasm: int6464 diag0 + +# qhasm: int6464 diag1 + +# qhasm: int6464 diag2 + +# qhasm: int6464 diag3 + +# qhasm: int6464 a0 + +# qhasm: int6464 a1 + +# qhasm: int6464 a2 + +# qhasm: int6464 a3 + +# qhasm: int6464 a4 + +# qhasm: int6464 a5 + +# qhasm: int6464 a6 + +# qhasm: int6464 a7 + +# qhasm: int6464 b0 + +# qhasm: int6464 b1 + +# qhasm: int6464 b2 + +# qhasm: int6464 b3 + +# qhasm: int6464 b4 + +# qhasm: int6464 b5 + +# qhasm: int6464 b6 + +# qhasm: int6464 b7 + +# qhasm: int6464 z0 + +# qhasm: int6464 z1 + +# qhasm: int6464 z2 + +# qhasm: int6464 z3 + +# qhasm: int6464 z4 + +# qhasm: int6464 z5 + +# qhasm: int6464 z6 + +# qhasm: int6464 z7 + +# qhasm: int6464 z8 + +# qhasm: int6464 z9 + +# qhasm: int6464 z10 + +# qhasm: int6464 z11 + +# qhasm: int6464 z12 + +# qhasm: int6464 z13 + +# qhasm: int6464 z14 + +# qhasm: int6464 z15 + +# qhasm: stack128 z0_stack + +# qhasm: stack128 z1_stack + +# qhasm: stack128 z2_stack + +# qhasm: stack128 z3_stack + +# qhasm: stack128 z4_stack + +# qhasm: stack128 z5_stack + +# qhasm: stack128 z6_stack + +# qhasm: stack128 z7_stack + +# qhasm: stack128 z8_stack + +# qhasm: stack128 z9_stack + +# qhasm: stack128 z10_stack + +# qhasm: stack128 z11_stack + +# qhasm: stack128 z12_stack + +# qhasm: stack128 z13_stack + +# qhasm: stack128 z14_stack + +# qhasm: stack128 z15_stack + +# qhasm: stack128 orig0 + +# qhasm: stack128 orig1 + +# qhasm: stack128 orig2 + +# qhasm: stack128 orig3 + +# qhasm: stack128 orig4 + +# qhasm: stack128 orig5 + +# qhasm: stack128 orig6 + +# qhasm: stack128 orig7 + +# qhasm: stack128 orig8 + +# qhasm: stack128 orig9 + +# qhasm: stack128 orig10 + +# qhasm: stack128 orig11 + +# qhasm: stack128 orig12 + +# qhasm: stack128 orig13 + +# qhasm: stack128 orig14 + +# qhasm: stack128 orig15 + +# qhasm: int6464 p + +# qhasm: int6464 q + +# qhasm: int6464 r + +# qhasm: int6464 s + +# qhasm: int6464 t + +# qhasm: int6464 u + +# qhasm: int6464 v + +# qhasm: int6464 w + +# qhasm: int6464 mp + +# qhasm: int6464 mq + +# qhasm: int6464 mr + +# qhasm: int6464 ms + +# qhasm: int6464 mt + +# qhasm: int6464 mu + +# qhasm: int6464 mv + +# qhasm: int6464 mw + +# qhasm: int32 in0 + +# qhasm: int32 in1 + +# qhasm: int32 in2 + +# qhasm: int32 in3 + +# qhasm: int32 in4 + +# qhasm: int32 in5 + +# qhasm: int32 in6 + +# qhasm: int32 in7 + +# qhasm: int32 in8 + +# qhasm: int32 in9 + +# qhasm: int32 in10 + +# qhasm: int32 in11 + +# qhasm: int32 in12 + +# qhasm: int32 in13 + +# qhasm: int32 in14 + +# qhasm: int32 in15 + +# qhasm: stack512 tmp + +# qhasm: stack32 ctarget + +# qhasm: enter crypto_stream_salsa20_x86_xmm5 +.text +.p2align 5 +.globl _crypto_stream_salsa20_x86_xmm5 +.globl crypto_stream_salsa20_x86_xmm5 +_crypto_stream_salsa20_x86_xmm5: +crypto_stream_salsa20_x86_xmm5: +mov %esp,%eax +and $31,%eax +add $704,%eax +sub %eax,%esp + +# qhasm: eax_stack = eax +# asm 1: movl eax_stack=stack32#1 +# asm 2: movl eax_stack=0(%esp) +movl %eax,0(%esp) + +# qhasm: ebx_stack = ebx +# asm 1: movl ebx_stack=stack32#2 +# asm 2: movl ebx_stack=4(%esp) +movl %ebx,4(%esp) + +# qhasm: esi_stack = esi +# asm 1: movl esi_stack=stack32#3 +# asm 2: movl esi_stack=8(%esp) +movl %esi,8(%esp) + +# qhasm: edi_stack = edi +# asm 1: movl edi_stack=stack32#4 +# asm 2: movl edi_stack=12(%esp) +movl %edi,12(%esp) + +# qhasm: ebp_stack = ebp +# asm 1: movl ebp_stack=stack32#5 +# asm 2: movl ebp_stack=16(%esp) +movl %ebp,16(%esp) + +# qhasm: bytes = arg2 +# asm 1: movl bytes=int32#3 +# asm 2: movl bytes=%edx +movl 8(%esp,%eax),%edx + +# qhasm: out = arg1 +# asm 1: movl out=int32#6 +# asm 2: movl out=%edi +movl 4(%esp,%eax),%edi + +# qhasm: m = out +# asm 1: mov m=int32#5 +# asm 2: mov m=%esi +mov %edi,%esi + +# qhasm: iv = arg4 +# asm 1: movl iv=int32#4 +# asm 2: movl iv=%ebx +movl 16(%esp,%eax),%ebx + +# qhasm: k = arg5 +# asm 1: movl k=int32#7 +# asm 2: movl k=%ebp +movl 20(%esp,%eax),%ebp + +# qhasm: unsigned>? bytes - 0 +# asm 1: cmp $0, +jbe ._done + +# qhasm: a = 0 +# asm 1: mov $0,>a=int32#1 +# asm 2: mov $0,>a=%eax +mov $0,%eax + +# qhasm: i = bytes +# asm 1: mov i=int32#2 +# asm 2: mov i=%ecx +mov %edx,%ecx + +# qhasm: while (i) { *out++ = a; --i } +rep stosb + +# qhasm: out -= bytes +# asm 1: subl eax_stack=stack32#1 +# asm 2: movl eax_stack=0(%esp) +movl %eax,0(%esp) + +# qhasm: ebx_stack = ebx +# asm 1: movl ebx_stack=stack32#2 +# asm 2: movl ebx_stack=4(%esp) +movl %ebx,4(%esp) + +# qhasm: esi_stack = esi +# asm 1: movl esi_stack=stack32#3 +# asm 2: movl esi_stack=8(%esp) +movl %esi,8(%esp) + +# qhasm: edi_stack = edi +# asm 1: movl edi_stack=stack32#4 +# asm 2: movl edi_stack=12(%esp) +movl %edi,12(%esp) + +# qhasm: ebp_stack = ebp +# asm 1: movl ebp_stack=stack32#5 +# asm 2: movl ebp_stack=16(%esp) +movl %ebp,16(%esp) + +# qhasm: out = arg1 +# asm 1: movl out=int32#6 +# asm 2: movl out=%edi +movl 4(%esp,%eax),%edi + +# qhasm: m = arg2 +# asm 1: movl m=int32#5 +# asm 2: movl m=%esi +movl 8(%esp,%eax),%esi + +# qhasm: bytes = arg3 +# asm 1: movl bytes=int32#3 +# asm 2: movl bytes=%edx +movl 12(%esp,%eax),%edx + +# qhasm: iv = arg5 +# asm 1: movl iv=int32#4 +# asm 2: movl iv=%ebx +movl 20(%esp,%eax),%ebx + +# qhasm: k = arg6 +# asm 1: movl k=int32#7 +# asm 2: movl k=%ebp +movl 24(%esp,%eax),%ebp + +# qhasm: unsigned>? bytes - 0 +# asm 1: cmp $0, +jbe ._done +# comment:fp stack unchanged by fallthrough + +# qhasm: start: +._start: + +# qhasm: out_stack = out +# asm 1: movl out_stack=stack32#6 +# asm 2: movl out_stack=20(%esp) +movl %edi,20(%esp) + +# qhasm: bytes_stack = bytes +# asm 1: movl bytes_stack=stack32#7 +# asm 2: movl bytes_stack=24(%esp) +movl %edx,24(%esp) + +# qhasm: in4 = *(uint32 *) (k + 12) +# asm 1: movl 12(in4=int32#1 +# asm 2: movl 12(in4=%eax +movl 12(%ebp),%eax + +# qhasm: in12 = *(uint32 *) (k + 20) +# asm 1: movl 20(in12=int32#2 +# asm 2: movl 20(in12=%ecx +movl 20(%ebp),%ecx + +# qhasm: ((uint32 *)&x3)[0] = in4 +# asm 1: movl x3=stack128#1 +# asm 2: movl x3=32(%esp) +movl %eax,32(%esp) + +# qhasm: ((uint32 *)&x1)[0] = in12 +# asm 1: movl x1=stack128#2 +# asm 2: movl x1=48(%esp) +movl %ecx,48(%esp) + +# qhasm: in0 = 1634760805 +# asm 1: mov $1634760805,>in0=int32#1 +# asm 2: mov $1634760805,>in0=%eax +mov $1634760805,%eax + +# qhasm: in8 = 0 +# asm 1: mov $0,>in8=int32#2 +# asm 2: mov $0,>in8=%ecx +mov $0,%ecx + +# qhasm: ((uint32 *)&x0)[0] = in0 +# asm 1: movl x0=stack128#3 +# asm 2: movl x0=64(%esp) +movl %eax,64(%esp) + +# qhasm: ((uint32 *)&x2)[0] = in8 +# asm 1: movl x2=stack128#4 +# asm 2: movl x2=80(%esp) +movl %ecx,80(%esp) + +# qhasm: in6 = *(uint32 *) (iv + 0) +# asm 1: movl 0(in6=int32#1 +# asm 2: movl 0(in6=%eax +movl 0(%ebx),%eax + +# qhasm: in7 = *(uint32 *) (iv + 4) +# asm 1: movl 4(in7=int32#2 +# asm 2: movl 4(in7=%ecx +movl 4(%ebx),%ecx + +# qhasm: ((uint32 *)&x1)[2] = in6 +# asm 1: movl in9=int32#1 +# asm 2: mov $0,>in9=%eax +mov $0,%eax + +# qhasm: in10 = 2036477234 +# asm 1: mov $2036477234,>in10=int32#2 +# asm 2: mov $2036477234,>in10=%ecx +mov $2036477234,%ecx + +# qhasm: ((uint32 *)&x3)[1] = in9 +# asm 1: movl in1=int32#1 +# asm 2: movl 0(in1=%eax +movl 0(%ebp),%eax + +# qhasm: in2 = *(uint32 *) (k + 4) +# asm 1: movl 4(in2=int32#2 +# asm 2: movl 4(in2=%ecx +movl 4(%ebp),%ecx + +# qhasm: in3 = *(uint32 *) (k + 8) +# asm 1: movl 8(in3=int32#3 +# asm 2: movl 8(in3=%edx +movl 8(%ebp),%edx + +# qhasm: in5 = 857760878 +# asm 1: mov $857760878,>in5=int32#4 +# asm 2: mov $857760878,>in5=%ebx +mov $857760878,%ebx + +# qhasm: ((uint32 *)&x1)[1] = in1 +# asm 1: movl in11=int32#1 +# asm 2: movl 16(in11=%eax +movl 16(%ebp),%eax + +# qhasm: in13 = *(uint32 *) (k + 24) +# asm 1: movl 24(in13=int32#2 +# asm 2: movl 24(in13=%ecx +movl 24(%ebp),%ecx + +# qhasm: in14 = *(uint32 *) (k + 28) +# asm 1: movl 28(in14=int32#3 +# asm 2: movl 28(in14=%edx +movl 28(%ebp),%edx + +# qhasm: in15 = 1797285236 +# asm 1: mov $1797285236,>in15=int32#4 +# asm 2: mov $1797285236,>in15=%ebx +mov $1797285236,%ebx + +# qhasm: ((uint32 *)&x1)[3] = in11 +# asm 1: movl bytes=int32#1 +# asm 2: movl bytes=%eax +movl 24(%esp),%eax + +# qhasm: unsignedz0=int6464#1 +# asm 2: movdqa z0=%xmm0 +movdqa 64(%esp),%xmm0 + +# qhasm: z5 = z0[1,1,1,1] +# asm 1: pshufd $0x55,z5=int6464#2 +# asm 2: pshufd $0x55,z5=%xmm1 +pshufd $0x55,%xmm0,%xmm1 + +# qhasm: z10 = z0[2,2,2,2] +# asm 1: pshufd $0xaa,z10=int6464#3 +# asm 2: pshufd $0xaa,z10=%xmm2 +pshufd $0xaa,%xmm0,%xmm2 + +# qhasm: z15 = z0[3,3,3,3] +# asm 1: pshufd $0xff,z15=int6464#4 +# asm 2: pshufd $0xff,z15=%xmm3 +pshufd $0xff,%xmm0,%xmm3 + +# qhasm: z0 = z0[0,0,0,0] +# asm 1: pshufd $0x00,z0=int6464#1 +# asm 2: pshufd $0x00,z0=%xmm0 +pshufd $0x00,%xmm0,%xmm0 + +# qhasm: orig5 = z5 +# asm 1: movdqa orig5=stack128#5 +# asm 2: movdqa orig5=96(%esp) +movdqa %xmm1,96(%esp) + +# qhasm: orig10 = z10 +# asm 1: movdqa orig10=stack128#6 +# asm 2: movdqa orig10=112(%esp) +movdqa %xmm2,112(%esp) + +# qhasm: orig15 = z15 +# asm 1: movdqa orig15=stack128#7 +# asm 2: movdqa orig15=128(%esp) +movdqa %xmm3,128(%esp) + +# qhasm: orig0 = z0 +# asm 1: movdqa orig0=stack128#8 +# asm 2: movdqa orig0=144(%esp) +movdqa %xmm0,144(%esp) + +# qhasm: z1 = x1 +# asm 1: movdqa z1=int6464#1 +# asm 2: movdqa z1=%xmm0 +movdqa 48(%esp),%xmm0 + +# qhasm: z6 = z1[2,2,2,2] +# asm 1: pshufd $0xaa,z6=int6464#2 +# asm 2: pshufd $0xaa,z6=%xmm1 +pshufd $0xaa,%xmm0,%xmm1 + +# qhasm: z11 = z1[3,3,3,3] +# asm 1: pshufd $0xff,z11=int6464#3 +# asm 2: pshufd $0xff,z11=%xmm2 +pshufd $0xff,%xmm0,%xmm2 + +# qhasm: z12 = z1[0,0,0,0] +# asm 1: pshufd $0x00,z12=int6464#4 +# asm 2: pshufd $0x00,z12=%xmm3 +pshufd $0x00,%xmm0,%xmm3 + +# qhasm: z1 = z1[1,1,1,1] +# asm 1: pshufd $0x55,z1=int6464#1 +# asm 2: pshufd $0x55,z1=%xmm0 +pshufd $0x55,%xmm0,%xmm0 + +# qhasm: orig6 = z6 +# asm 1: movdqa orig6=stack128#9 +# asm 2: movdqa orig6=160(%esp) +movdqa %xmm1,160(%esp) + +# qhasm: orig11 = z11 +# asm 1: movdqa orig11=stack128#10 +# asm 2: movdqa orig11=176(%esp) +movdqa %xmm2,176(%esp) + +# qhasm: orig12 = z12 +# asm 1: movdqa orig12=stack128#11 +# asm 2: movdqa orig12=192(%esp) +movdqa %xmm3,192(%esp) + +# qhasm: orig1 = z1 +# asm 1: movdqa orig1=stack128#12 +# asm 2: movdqa orig1=208(%esp) +movdqa %xmm0,208(%esp) + +# qhasm: z2 = x2 +# asm 1: movdqa z2=int6464#1 +# asm 2: movdqa z2=%xmm0 +movdqa 80(%esp),%xmm0 + +# qhasm: z7 = z2[3,3,3,3] +# asm 1: pshufd $0xff,z7=int6464#2 +# asm 2: pshufd $0xff,z7=%xmm1 +pshufd $0xff,%xmm0,%xmm1 + +# qhasm: z13 = z2[1,1,1,1] +# asm 1: pshufd $0x55,z13=int6464#3 +# asm 2: pshufd $0x55,z13=%xmm2 +pshufd $0x55,%xmm0,%xmm2 + +# qhasm: z2 = z2[2,2,2,2] +# asm 1: pshufd $0xaa,z2=int6464#1 +# asm 2: pshufd $0xaa,z2=%xmm0 +pshufd $0xaa,%xmm0,%xmm0 + +# qhasm: orig7 = z7 +# asm 1: movdqa orig7=stack128#13 +# asm 2: movdqa orig7=224(%esp) +movdqa %xmm1,224(%esp) + +# qhasm: orig13 = z13 +# asm 1: movdqa orig13=stack128#14 +# asm 2: movdqa orig13=240(%esp) +movdqa %xmm2,240(%esp) + +# qhasm: orig2 = z2 +# asm 1: movdqa orig2=stack128#15 +# asm 2: movdqa orig2=256(%esp) +movdqa %xmm0,256(%esp) + +# qhasm: z3 = x3 +# asm 1: movdqa z3=int6464#1 +# asm 2: movdqa z3=%xmm0 +movdqa 32(%esp),%xmm0 + +# qhasm: z4 = z3[0,0,0,0] +# asm 1: pshufd $0x00,z4=int6464#2 +# asm 2: pshufd $0x00,z4=%xmm1 +pshufd $0x00,%xmm0,%xmm1 + +# qhasm: z14 = z3[2,2,2,2] +# asm 1: pshufd $0xaa,z14=int6464#3 +# asm 2: pshufd $0xaa,z14=%xmm2 +pshufd $0xaa,%xmm0,%xmm2 + +# qhasm: z3 = z3[3,3,3,3] +# asm 1: pshufd $0xff,z3=int6464#1 +# asm 2: pshufd $0xff,z3=%xmm0 +pshufd $0xff,%xmm0,%xmm0 + +# qhasm: orig4 = z4 +# asm 1: movdqa orig4=stack128#16 +# asm 2: movdqa orig4=272(%esp) +movdqa %xmm1,272(%esp) + +# qhasm: orig14 = z14 +# asm 1: movdqa orig14=stack128#17 +# asm 2: movdqa orig14=288(%esp) +movdqa %xmm2,288(%esp) + +# qhasm: orig3 = z3 +# asm 1: movdqa orig3=stack128#18 +# asm 2: movdqa orig3=304(%esp) +movdqa %xmm0,304(%esp) + +# qhasm: bytesatleast256: +._bytesatleast256: + +# qhasm: in8 = ((uint32 *)&x2)[0] +# asm 1: movl in8=int32#2 +# asm 2: movl in8=%ecx +movl 80(%esp),%ecx + +# qhasm: in9 = ((uint32 *)&x3)[1] +# asm 1: movl 4+in9=int32#3 +# asm 2: movl 4+in9=%edx +movl 4+32(%esp),%edx + +# qhasm: ((uint32 *) &orig8)[0] = in8 +# asm 1: movl orig8=stack128#19 +# asm 2: movl orig8=320(%esp) +movl %ecx,320(%esp) + +# qhasm: ((uint32 *) &orig9)[0] = in9 +# asm 1: movl orig9=stack128#20 +# asm 2: movl orig9=336(%esp) +movl %edx,336(%esp) + +# qhasm: carry? in8 += 1 +# asm 1: add $1,x2=stack128#4 +# asm 2: movl x2=80(%esp) +movl %ecx,80(%esp) + +# qhasm: ((uint32 *)&x3)[1] = in9 +# asm 1: movl bytes_stack=stack32#7 +# asm 2: movl bytes_stack=24(%esp) +movl %eax,24(%esp) + +# qhasm: i = 20 +# asm 1: mov $20,>i=int32#1 +# asm 2: mov $20,>i=%eax +mov $20,%eax + +# qhasm: z5 = orig5 +# asm 1: movdqa z5=int6464#1 +# asm 2: movdqa z5=%xmm0 +movdqa 96(%esp),%xmm0 + +# qhasm: z10 = orig10 +# asm 1: movdqa z10=int6464#2 +# asm 2: movdqa z10=%xmm1 +movdqa 112(%esp),%xmm1 + +# qhasm: z15 = orig15 +# asm 1: movdqa z15=int6464#3 +# asm 2: movdqa z15=%xmm2 +movdqa 128(%esp),%xmm2 + +# qhasm: z14 = orig14 +# asm 1: movdqa z14=int6464#4 +# asm 2: movdqa z14=%xmm3 +movdqa 288(%esp),%xmm3 + +# qhasm: z3 = orig3 +# asm 1: movdqa z3=int6464#5 +# asm 2: movdqa z3=%xmm4 +movdqa 304(%esp),%xmm4 + +# qhasm: z6 = orig6 +# asm 1: movdqa z6=int6464#6 +# asm 2: movdqa z6=%xmm5 +movdqa 160(%esp),%xmm5 + +# qhasm: z11 = orig11 +# asm 1: movdqa z11=int6464#7 +# asm 2: movdqa z11=%xmm6 +movdqa 176(%esp),%xmm6 + +# qhasm: z1 = orig1 +# asm 1: movdqa z1=int6464#8 +# asm 2: movdqa z1=%xmm7 +movdqa 208(%esp),%xmm7 + +# qhasm: z5_stack = z5 +# asm 1: movdqa z5_stack=stack128#21 +# asm 2: movdqa z5_stack=352(%esp) +movdqa %xmm0,352(%esp) + +# qhasm: z10_stack = z10 +# asm 1: movdqa z10_stack=stack128#22 +# asm 2: movdqa z10_stack=368(%esp) +movdqa %xmm1,368(%esp) + +# qhasm: z15_stack = z15 +# asm 1: movdqa z15_stack=stack128#23 +# asm 2: movdqa z15_stack=384(%esp) +movdqa %xmm2,384(%esp) + +# qhasm: z14_stack = z14 +# asm 1: movdqa z14_stack=stack128#24 +# asm 2: movdqa z14_stack=400(%esp) +movdqa %xmm3,400(%esp) + +# qhasm: z3_stack = z3 +# asm 1: movdqa z3_stack=stack128#25 +# asm 2: movdqa z3_stack=416(%esp) +movdqa %xmm4,416(%esp) + +# qhasm: z6_stack = z6 +# asm 1: movdqa z6_stack=stack128#26 +# asm 2: movdqa z6_stack=432(%esp) +movdqa %xmm5,432(%esp) + +# qhasm: z11_stack = z11 +# asm 1: movdqa z11_stack=stack128#27 +# asm 2: movdqa z11_stack=448(%esp) +movdqa %xmm6,448(%esp) + +# qhasm: z1_stack = z1 +# asm 1: movdqa z1_stack=stack128#28 +# asm 2: movdqa z1_stack=464(%esp) +movdqa %xmm7,464(%esp) + +# qhasm: z7 = orig7 +# asm 1: movdqa z7=int6464#5 +# asm 2: movdqa z7=%xmm4 +movdqa 224(%esp),%xmm4 + +# qhasm: z13 = orig13 +# asm 1: movdqa z13=int6464#6 +# asm 2: movdqa z13=%xmm5 +movdqa 240(%esp),%xmm5 + +# qhasm: z2 = orig2 +# asm 1: movdqa z2=int6464#7 +# asm 2: movdqa z2=%xmm6 +movdqa 256(%esp),%xmm6 + +# qhasm: z9 = orig9 +# asm 1: movdqa z9=int6464#8 +# asm 2: movdqa z9=%xmm7 +movdqa 336(%esp),%xmm7 + +# qhasm: p = orig0 +# asm 1: movdqa p=int6464#1 +# asm 2: movdqa p=%xmm0 +movdqa 144(%esp),%xmm0 + +# qhasm: t = orig12 +# asm 1: movdqa t=int6464#3 +# asm 2: movdqa t=%xmm2 +movdqa 192(%esp),%xmm2 + +# qhasm: q = orig4 +# asm 1: movdqa q=int6464#4 +# asm 2: movdqa q=%xmm3 +movdqa 272(%esp),%xmm3 + +# qhasm: r = orig8 +# asm 1: movdqa r=int6464#2 +# asm 2: movdqa r=%xmm1 +movdqa 320(%esp),%xmm1 + +# qhasm: z7_stack = z7 +# asm 1: movdqa z7_stack=stack128#29 +# asm 2: movdqa z7_stack=480(%esp) +movdqa %xmm4,480(%esp) + +# qhasm: z13_stack = z13 +# asm 1: movdqa z13_stack=stack128#30 +# asm 2: movdqa z13_stack=496(%esp) +movdqa %xmm5,496(%esp) + +# qhasm: z2_stack = z2 +# asm 1: movdqa z2_stack=stack128#31 +# asm 2: movdqa z2_stack=512(%esp) +movdqa %xmm6,512(%esp) + +# qhasm: z9_stack = z9 +# asm 1: movdqa z9_stack=stack128#32 +# asm 2: movdqa z9_stack=528(%esp) +movdqa %xmm7,528(%esp) + +# qhasm: z0_stack = p +# asm 1: movdqa z0_stack=stack128#33 +# asm 2: movdqa z0_stack=544(%esp) +movdqa %xmm0,544(%esp) + +# qhasm: z12_stack = t +# asm 1: movdqa z12_stack=stack128#34 +# asm 2: movdqa z12_stack=560(%esp) +movdqa %xmm2,560(%esp) + +# qhasm: z4_stack = q +# asm 1: movdqa z4_stack=stack128#35 +# asm 2: movdqa z4_stack=576(%esp) +movdqa %xmm3,576(%esp) + +# qhasm: z8_stack = r +# asm 1: movdqa z8_stack=stack128#36 +# asm 2: movdqa z8_stack=592(%esp) +movdqa %xmm1,592(%esp) + +# qhasm: mainloop1: +._mainloop1: + +# qhasm: assign xmm0 to p + +# qhasm: assign xmm1 to r + +# qhasm: assign xmm2 to t + +# qhasm: assign xmm3 to q + +# qhasm: s = t +# asm 1: movdqa s=int6464#7 +# asm 2: movdqa s=%xmm6 +movdqa %xmm2,%xmm6 + +# qhasm: uint32323232 t += p +# asm 1: paddd u=int6464#5 +# asm 2: movdqa u=%xmm4 +movdqa %xmm2,%xmm4 + +# qhasm: uint32323232 t >>= 25 +# asm 1: psrld $25,z4_stack=stack128#33 +# asm 2: movdqa z4_stack=544(%esp) +movdqa %xmm3,544(%esp) + +# qhasm: t = p +# asm 1: movdqa t=int6464#3 +# asm 2: movdqa t=%xmm2 +movdqa %xmm0,%xmm2 + +# qhasm: uint32323232 t += q +# asm 1: paddd u=int6464#5 +# asm 2: movdqa u=%xmm4 +movdqa %xmm2,%xmm4 + +# qhasm: uint32323232 t >>= 23 +# asm 1: psrld $23,z8_stack=stack128#34 +# asm 2: movdqa z8_stack=560(%esp) +movdqa %xmm1,560(%esp) + +# qhasm: uint32323232 q += r +# asm 1: paddd u=int6464#3 +# asm 2: movdqa u=%xmm2 +movdqa %xmm3,%xmm2 + +# qhasm: uint32323232 q >>= 19 +# asm 1: psrld $19,mt=int6464#3 +# asm 2: movdqa mt=%xmm2 +movdqa 464(%esp),%xmm2 + +# qhasm: mp = z5_stack +# asm 1: movdqa mp=int6464#5 +# asm 2: movdqa mp=%xmm4 +movdqa 352(%esp),%xmm4 + +# qhasm: mq = z9_stack +# asm 1: movdqa mq=int6464#4 +# asm 2: movdqa mq=%xmm3 +movdqa 528(%esp),%xmm3 + +# qhasm: mr = z13_stack +# asm 1: movdqa mr=int6464#6 +# asm 2: movdqa mr=%xmm5 +movdqa 496(%esp),%xmm5 + +# qhasm: z12_stack = s +# asm 1: movdqa z12_stack=stack128#30 +# asm 2: movdqa z12_stack=496(%esp) +movdqa %xmm6,496(%esp) + +# qhasm: uint32323232 r += s +# asm 1: paddd u=int6464#7 +# asm 2: movdqa u=%xmm6 +movdqa %xmm1,%xmm6 + +# qhasm: uint32323232 r >>= 14 +# asm 1: psrld $14,z0_stack=stack128#21 +# asm 2: movdqa z0_stack=352(%esp) +movdqa %xmm0,352(%esp) + +# qhasm: assign xmm2 to mt + +# qhasm: assign xmm3 to mq + +# qhasm: assign xmm4 to mp + +# qhasm: assign xmm5 to mr + +# qhasm: ms = mt +# asm 1: movdqa ms=int6464#7 +# asm 2: movdqa ms=%xmm6 +movdqa %xmm2,%xmm6 + +# qhasm: uint32323232 mt += mp +# asm 1: paddd mu=int6464#1 +# asm 2: movdqa mu=%xmm0 +movdqa %xmm2,%xmm0 + +# qhasm: uint32323232 mt >>= 25 +# asm 1: psrld $25,z9_stack=stack128#32 +# asm 2: movdqa z9_stack=528(%esp) +movdqa %xmm3,528(%esp) + +# qhasm: mt = mp +# asm 1: movdqa mt=int6464#1 +# asm 2: movdqa mt=%xmm0 +movdqa %xmm4,%xmm0 + +# qhasm: uint32323232 mt += mq +# asm 1: paddd mu=int6464#2 +# asm 2: movdqa mu=%xmm1 +movdqa %xmm0,%xmm1 + +# qhasm: uint32323232 mt >>= 23 +# asm 1: psrld $23,z13_stack=stack128#35 +# asm 2: movdqa z13_stack=576(%esp) +movdqa %xmm5,576(%esp) + +# qhasm: uint32323232 mq += mr +# asm 1: paddd mu=int6464#1 +# asm 2: movdqa mu=%xmm0 +movdqa %xmm3,%xmm0 + +# qhasm: uint32323232 mq >>= 19 +# asm 1: psrld $19,t=int6464#3 +# asm 2: movdqa t=%xmm2 +movdqa 432(%esp),%xmm2 + +# qhasm: p = z10_stack +# asm 1: movdqa p=int6464#1 +# asm 2: movdqa p=%xmm0 +movdqa 368(%esp),%xmm0 + +# qhasm: q = z14_stack +# asm 1: movdqa q=int6464#4 +# asm 2: movdqa q=%xmm3 +movdqa 400(%esp),%xmm3 + +# qhasm: r = z2_stack +# asm 1: movdqa r=int6464#2 +# asm 2: movdqa r=%xmm1 +movdqa 512(%esp),%xmm1 + +# qhasm: z1_stack = ms +# asm 1: movdqa z1_stack=stack128#22 +# asm 2: movdqa z1_stack=368(%esp) +movdqa %xmm6,368(%esp) + +# qhasm: uint32323232 mr += ms +# asm 1: paddd mu=int6464#7 +# asm 2: movdqa mu=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 mr >>= 14 +# asm 1: psrld $14,z5_stack=stack128#24 +# asm 2: movdqa z5_stack=400(%esp) +movdqa %xmm4,400(%esp) + +# qhasm: assign xmm0 to p + +# qhasm: assign xmm1 to r + +# qhasm: assign xmm2 to t + +# qhasm: assign xmm3 to q + +# qhasm: s = t +# asm 1: movdqa s=int6464#7 +# asm 2: movdqa s=%xmm6 +movdqa %xmm2,%xmm6 + +# qhasm: uint32323232 t += p +# asm 1: paddd u=int6464#5 +# asm 2: movdqa u=%xmm4 +movdqa %xmm2,%xmm4 + +# qhasm: uint32323232 t >>= 25 +# asm 1: psrld $25,z14_stack=stack128#36 +# asm 2: movdqa z14_stack=592(%esp) +movdqa %xmm3,592(%esp) + +# qhasm: t = p +# asm 1: movdqa t=int6464#3 +# asm 2: movdqa t=%xmm2 +movdqa %xmm0,%xmm2 + +# qhasm: uint32323232 t += q +# asm 1: paddd u=int6464#5 +# asm 2: movdqa u=%xmm4 +movdqa %xmm2,%xmm4 + +# qhasm: uint32323232 t >>= 23 +# asm 1: psrld $23,z2_stack=stack128#26 +# asm 2: movdqa z2_stack=432(%esp) +movdqa %xmm1,432(%esp) + +# qhasm: uint32323232 q += r +# asm 1: paddd u=int6464#3 +# asm 2: movdqa u=%xmm2 +movdqa %xmm3,%xmm2 + +# qhasm: uint32323232 q >>= 19 +# asm 1: psrld $19,mt=int6464#3 +# asm 2: movdqa mt=%xmm2 +movdqa 448(%esp),%xmm2 + +# qhasm: mp = z15_stack +# asm 1: movdqa mp=int6464#5 +# asm 2: movdqa mp=%xmm4 +movdqa 384(%esp),%xmm4 + +# qhasm: mq = z3_stack +# asm 1: movdqa mq=int6464#4 +# asm 2: movdqa mq=%xmm3 +movdqa 416(%esp),%xmm3 + +# qhasm: mr = z7_stack +# asm 1: movdqa mr=int6464#6 +# asm 2: movdqa mr=%xmm5 +movdqa 480(%esp),%xmm5 + +# qhasm: z6_stack = s +# asm 1: movdqa z6_stack=stack128#23 +# asm 2: movdqa z6_stack=384(%esp) +movdqa %xmm6,384(%esp) + +# qhasm: uint32323232 r += s +# asm 1: paddd u=int6464#7 +# asm 2: movdqa u=%xmm6 +movdqa %xmm1,%xmm6 + +# qhasm: uint32323232 r >>= 14 +# asm 1: psrld $14,z10_stack=stack128#27 +# asm 2: movdqa z10_stack=448(%esp) +movdqa %xmm0,448(%esp) + +# qhasm: assign xmm2 to mt + +# qhasm: assign xmm3 to mq + +# qhasm: assign xmm4 to mp + +# qhasm: assign xmm5 to mr + +# qhasm: ms = mt +# asm 1: movdqa ms=int6464#7 +# asm 2: movdqa ms=%xmm6 +movdqa %xmm2,%xmm6 + +# qhasm: uint32323232 mt += mp +# asm 1: paddd mu=int6464#1 +# asm 2: movdqa mu=%xmm0 +movdqa %xmm2,%xmm0 + +# qhasm: uint32323232 mt >>= 25 +# asm 1: psrld $25,z3_stack=stack128#25 +# asm 2: movdqa z3_stack=416(%esp) +movdqa %xmm3,416(%esp) + +# qhasm: mt = mp +# asm 1: movdqa mt=int6464#1 +# asm 2: movdqa mt=%xmm0 +movdqa %xmm4,%xmm0 + +# qhasm: uint32323232 mt += mq +# asm 1: paddd mu=int6464#2 +# asm 2: movdqa mu=%xmm1 +movdqa %xmm0,%xmm1 + +# qhasm: uint32323232 mt >>= 23 +# asm 1: psrld $23,z7_stack=stack128#29 +# asm 2: movdqa z7_stack=480(%esp) +movdqa %xmm5,480(%esp) + +# qhasm: uint32323232 mq += mr +# asm 1: paddd mu=int6464#1 +# asm 2: movdqa mu=%xmm0 +movdqa %xmm3,%xmm0 + +# qhasm: uint32323232 mq >>= 19 +# asm 1: psrld $19,t=int6464#3 +# asm 2: movdqa t=%xmm2 +movdqa 416(%esp),%xmm2 + +# qhasm: p = z0_stack +# asm 1: movdqa p=int6464#1 +# asm 2: movdqa p=%xmm0 +movdqa 352(%esp),%xmm0 + +# qhasm: q = z1_stack +# asm 1: movdqa q=int6464#4 +# asm 2: movdqa q=%xmm3 +movdqa 368(%esp),%xmm3 + +# qhasm: r = z2_stack +# asm 1: movdqa r=int6464#2 +# asm 2: movdqa r=%xmm1 +movdqa 432(%esp),%xmm1 + +# qhasm: z11_stack = ms +# asm 1: movdqa z11_stack=stack128#21 +# asm 2: movdqa z11_stack=352(%esp) +movdqa %xmm6,352(%esp) + +# qhasm: uint32323232 mr += ms +# asm 1: paddd mu=int6464#7 +# asm 2: movdqa mu=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 mr >>= 14 +# asm 1: psrld $14,z15_stack=stack128#22 +# asm 2: movdqa z15_stack=368(%esp) +movdqa %xmm4,368(%esp) + +# qhasm: assign xmm0 to p + +# qhasm: assign xmm1 to r + +# qhasm: assign xmm2 to t + +# qhasm: assign xmm3 to q + +# qhasm: s = t +# asm 1: movdqa s=int6464#7 +# asm 2: movdqa s=%xmm6 +movdqa %xmm2,%xmm6 + +# qhasm: uint32323232 t += p +# asm 1: paddd u=int6464#5 +# asm 2: movdqa u=%xmm4 +movdqa %xmm2,%xmm4 + +# qhasm: uint32323232 t >>= 25 +# asm 1: psrld $25,z1_stack=stack128#28 +# asm 2: movdqa z1_stack=464(%esp) +movdqa %xmm3,464(%esp) + +# qhasm: t = p +# asm 1: movdqa t=int6464#3 +# asm 2: movdqa t=%xmm2 +movdqa %xmm0,%xmm2 + +# qhasm: uint32323232 t += q +# asm 1: paddd u=int6464#5 +# asm 2: movdqa u=%xmm4 +movdqa %xmm2,%xmm4 + +# qhasm: uint32323232 t >>= 23 +# asm 1: psrld $23,z2_stack=stack128#31 +# asm 2: movdqa z2_stack=512(%esp) +movdqa %xmm1,512(%esp) + +# qhasm: uint32323232 q += r +# asm 1: paddd u=int6464#3 +# asm 2: movdqa u=%xmm2 +movdqa %xmm3,%xmm2 + +# qhasm: uint32323232 q >>= 19 +# asm 1: psrld $19,mt=int6464#3 +# asm 2: movdqa mt=%xmm2 +movdqa 544(%esp),%xmm2 + +# qhasm: mp = z5_stack +# asm 1: movdqa mp=int6464#5 +# asm 2: movdqa mp=%xmm4 +movdqa 400(%esp),%xmm4 + +# qhasm: mq = z6_stack +# asm 1: movdqa mq=int6464#4 +# asm 2: movdqa mq=%xmm3 +movdqa 384(%esp),%xmm3 + +# qhasm: mr = z7_stack +# asm 1: movdqa mr=int6464#6 +# asm 2: movdqa mr=%xmm5 +movdqa 480(%esp),%xmm5 + +# qhasm: z3_stack = s +# asm 1: movdqa z3_stack=stack128#25 +# asm 2: movdqa z3_stack=416(%esp) +movdqa %xmm6,416(%esp) + +# qhasm: uint32323232 r += s +# asm 1: paddd u=int6464#7 +# asm 2: movdqa u=%xmm6 +movdqa %xmm1,%xmm6 + +# qhasm: uint32323232 r >>= 14 +# asm 1: psrld $14,z0_stack=stack128#33 +# asm 2: movdqa z0_stack=544(%esp) +movdqa %xmm0,544(%esp) + +# qhasm: assign xmm2 to mt + +# qhasm: assign xmm3 to mq + +# qhasm: assign xmm4 to mp + +# qhasm: assign xmm5 to mr + +# qhasm: ms = mt +# asm 1: movdqa ms=int6464#7 +# asm 2: movdqa ms=%xmm6 +movdqa %xmm2,%xmm6 + +# qhasm: uint32323232 mt += mp +# asm 1: paddd mu=int6464#1 +# asm 2: movdqa mu=%xmm0 +movdqa %xmm2,%xmm0 + +# qhasm: uint32323232 mt >>= 25 +# asm 1: psrld $25,z6_stack=stack128#26 +# asm 2: movdqa z6_stack=432(%esp) +movdqa %xmm3,432(%esp) + +# qhasm: mt = mp +# asm 1: movdqa mt=int6464#1 +# asm 2: movdqa mt=%xmm0 +movdqa %xmm4,%xmm0 + +# qhasm: uint32323232 mt += mq +# asm 1: paddd mu=int6464#2 +# asm 2: movdqa mu=%xmm1 +movdqa %xmm0,%xmm1 + +# qhasm: uint32323232 mt >>= 23 +# asm 1: psrld $23,z7_stack=stack128#29 +# asm 2: movdqa z7_stack=480(%esp) +movdqa %xmm5,480(%esp) + +# qhasm: uint32323232 mq += mr +# asm 1: paddd mu=int6464#1 +# asm 2: movdqa mu=%xmm0 +movdqa %xmm3,%xmm0 + +# qhasm: uint32323232 mq >>= 19 +# asm 1: psrld $19,t=int6464#3 +# asm 2: movdqa t=%xmm2 +movdqa 528(%esp),%xmm2 + +# qhasm: p = z10_stack +# asm 1: movdqa p=int6464#1 +# asm 2: movdqa p=%xmm0 +movdqa 448(%esp),%xmm0 + +# qhasm: q = z11_stack +# asm 1: movdqa q=int6464#4 +# asm 2: movdqa q=%xmm3 +movdqa 352(%esp),%xmm3 + +# qhasm: r = z8_stack +# asm 1: movdqa r=int6464#2 +# asm 2: movdqa r=%xmm1 +movdqa 560(%esp),%xmm1 + +# qhasm: z4_stack = ms +# asm 1: movdqa z4_stack=stack128#34 +# asm 2: movdqa z4_stack=560(%esp) +movdqa %xmm6,560(%esp) + +# qhasm: uint32323232 mr += ms +# asm 1: paddd mu=int6464#7 +# asm 2: movdqa mu=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 mr >>= 14 +# asm 1: psrld $14,z5_stack=stack128#21 +# asm 2: movdqa z5_stack=352(%esp) +movdqa %xmm4,352(%esp) + +# qhasm: assign xmm0 to p + +# qhasm: assign xmm1 to r + +# qhasm: assign xmm2 to t + +# qhasm: assign xmm3 to q + +# qhasm: s = t +# asm 1: movdqa s=int6464#7 +# asm 2: movdqa s=%xmm6 +movdqa %xmm2,%xmm6 + +# qhasm: uint32323232 t += p +# asm 1: paddd u=int6464#5 +# asm 2: movdqa u=%xmm4 +movdqa %xmm2,%xmm4 + +# qhasm: uint32323232 t >>= 25 +# asm 1: psrld $25,z11_stack=stack128#27 +# asm 2: movdqa z11_stack=448(%esp) +movdqa %xmm3,448(%esp) + +# qhasm: t = p +# asm 1: movdqa t=int6464#3 +# asm 2: movdqa t=%xmm2 +movdqa %xmm0,%xmm2 + +# qhasm: uint32323232 t += q +# asm 1: paddd u=int6464#5 +# asm 2: movdqa u=%xmm4 +movdqa %xmm2,%xmm4 + +# qhasm: uint32323232 t >>= 23 +# asm 1: psrld $23,z8_stack=stack128#37 +# asm 2: movdqa z8_stack=608(%esp) +movdqa %xmm1,608(%esp) + +# qhasm: uint32323232 q += r +# asm 1: paddd u=int6464#3 +# asm 2: movdqa u=%xmm2 +movdqa %xmm3,%xmm2 + +# qhasm: uint32323232 q >>= 19 +# asm 1: psrld $19,mt=int6464#3 +# asm 2: movdqa mt=%xmm2 +movdqa 592(%esp),%xmm2 + +# qhasm: mp = z15_stack +# asm 1: movdqa mp=int6464#5 +# asm 2: movdqa mp=%xmm4 +movdqa 368(%esp),%xmm4 + +# qhasm: mq = z12_stack +# asm 1: movdqa mq=int6464#4 +# asm 2: movdqa mq=%xmm3 +movdqa 496(%esp),%xmm3 + +# qhasm: mr = z13_stack +# asm 1: movdqa mr=int6464#6 +# asm 2: movdqa mr=%xmm5 +movdqa 576(%esp),%xmm5 + +# qhasm: z9_stack = s +# asm 1: movdqa z9_stack=stack128#32 +# asm 2: movdqa z9_stack=528(%esp) +movdqa %xmm6,528(%esp) + +# qhasm: uint32323232 r += s +# asm 1: paddd u=int6464#7 +# asm 2: movdqa u=%xmm6 +movdqa %xmm1,%xmm6 + +# qhasm: uint32323232 r >>= 14 +# asm 1: psrld $14,z10_stack=stack128#22 +# asm 2: movdqa z10_stack=368(%esp) +movdqa %xmm0,368(%esp) + +# qhasm: assign xmm2 to mt + +# qhasm: assign xmm3 to mq + +# qhasm: assign xmm4 to mp + +# qhasm: assign xmm5 to mr + +# qhasm: ms = mt +# asm 1: movdqa ms=int6464#7 +# asm 2: movdqa ms=%xmm6 +movdqa %xmm2,%xmm6 + +# qhasm: uint32323232 mt += mp +# asm 1: paddd mu=int6464#1 +# asm 2: movdqa mu=%xmm0 +movdqa %xmm2,%xmm0 + +# qhasm: uint32323232 mt >>= 25 +# asm 1: psrld $25,z12_stack=stack128#35 +# asm 2: movdqa z12_stack=576(%esp) +movdqa %xmm3,576(%esp) + +# qhasm: mt = mp +# asm 1: movdqa mt=int6464#1 +# asm 2: movdqa mt=%xmm0 +movdqa %xmm4,%xmm0 + +# qhasm: uint32323232 mt += mq +# asm 1: paddd mu=int6464#2 +# asm 2: movdqa mu=%xmm1 +movdqa %xmm0,%xmm1 + +# qhasm: uint32323232 mt >>= 23 +# asm 1: psrld $23,z13_stack=stack128#30 +# asm 2: movdqa z13_stack=496(%esp) +movdqa %xmm5,496(%esp) + +# qhasm: uint32323232 mq += mr +# asm 1: paddd mu=int6464#1 +# asm 2: movdqa mu=%xmm0 +movdqa %xmm3,%xmm0 + +# qhasm: uint32323232 mq >>= 19 +# asm 1: psrld $19,t=int6464#3 +# asm 2: movdqa t=%xmm2 +movdqa 576(%esp),%xmm2 + +# qhasm: p = z0_stack +# asm 1: movdqa p=int6464#1 +# asm 2: movdqa p=%xmm0 +movdqa 544(%esp),%xmm0 + +# qhasm: q = z4_stack +# asm 1: movdqa q=int6464#4 +# asm 2: movdqa q=%xmm3 +movdqa 560(%esp),%xmm3 + +# qhasm: r = z8_stack +# asm 1: movdqa r=int6464#2 +# asm 2: movdqa r=%xmm1 +movdqa 608(%esp),%xmm1 + +# qhasm: z14_stack = ms +# asm 1: movdqa z14_stack=stack128#24 +# asm 2: movdqa z14_stack=400(%esp) +movdqa %xmm6,400(%esp) + +# qhasm: uint32323232 mr += ms +# asm 1: paddd mu=int6464#7 +# asm 2: movdqa mu=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 mr >>= 14 +# asm 1: psrld $14,z15_stack=stack128#23 +# asm 2: movdqa z15_stack=384(%esp) +movdqa %xmm4,384(%esp) + +# qhasm: unsigned>? i -= 2 +# asm 1: sub $2, +ja ._mainloop1 + +# qhasm: out = out_stack +# asm 1: movl out=int32#6 +# asm 2: movl out=%edi +movl 20(%esp),%edi + +# qhasm: z0 = z0_stack +# asm 1: movdqa z0=int6464#1 +# asm 2: movdqa z0=%xmm0 +movdqa 544(%esp),%xmm0 + +# qhasm: z1 = z1_stack +# asm 1: movdqa z1=int6464#2 +# asm 2: movdqa z1=%xmm1 +movdqa 464(%esp),%xmm1 + +# qhasm: z2 = z2_stack +# asm 1: movdqa z2=int6464#3 +# asm 2: movdqa z2=%xmm2 +movdqa 512(%esp),%xmm2 + +# qhasm: z3 = z3_stack +# asm 1: movdqa z3=int6464#4 +# asm 2: movdqa z3=%xmm3 +movdqa 416(%esp),%xmm3 + +# qhasm: uint32323232 z0 += orig0 +# asm 1: paddd in0=int32#1 +# asm 2: movd in0=%eax +movd %xmm0,%eax + +# qhasm: in1 = z1 +# asm 1: movd in1=int32#2 +# asm 2: movd in1=%ecx +movd %xmm1,%ecx + +# qhasm: in2 = z2 +# asm 1: movd in2=int32#3 +# asm 2: movd in2=%edx +movd %xmm2,%edx + +# qhasm: in3 = z3 +# asm 1: movd in3=int32#4 +# asm 2: movd in3=%ebx +movd %xmm3,%ebx + +# qhasm: z0 <<<= 96 +# asm 1: pshufd $0x39,in0=int32#1 +# asm 2: movd in0=%eax +movd %xmm0,%eax + +# qhasm: in1 = z1 +# asm 1: movd in1=int32#2 +# asm 2: movd in1=%ecx +movd %xmm1,%ecx + +# qhasm: in2 = z2 +# asm 1: movd in2=int32#3 +# asm 2: movd in2=%edx +movd %xmm2,%edx + +# qhasm: in3 = z3 +# asm 1: movd in3=int32#4 +# asm 2: movd in3=%ebx +movd %xmm3,%ebx + +# qhasm: z0 <<<= 96 +# asm 1: pshufd $0x39,in0=int32#1 +# asm 2: movd in0=%eax +movd %xmm0,%eax + +# qhasm: in1 = z1 +# asm 1: movd in1=int32#2 +# asm 2: movd in1=%ecx +movd %xmm1,%ecx + +# qhasm: in2 = z2 +# asm 1: movd in2=int32#3 +# asm 2: movd in2=%edx +movd %xmm2,%edx + +# qhasm: in3 = z3 +# asm 1: movd in3=int32#4 +# asm 2: movd in3=%ebx +movd %xmm3,%ebx + +# qhasm: z0 <<<= 96 +# asm 1: pshufd $0x39,in0=int32#1 +# asm 2: movd in0=%eax +movd %xmm0,%eax + +# qhasm: in1 = z1 +# asm 1: movd in1=int32#2 +# asm 2: movd in1=%ecx +movd %xmm1,%ecx + +# qhasm: in2 = z2 +# asm 1: movd in2=int32#3 +# asm 2: movd in2=%edx +movd %xmm2,%edx + +# qhasm: in3 = z3 +# asm 1: movd in3=int32#4 +# asm 2: movd in3=%ebx +movd %xmm3,%ebx + +# qhasm: in0 ^= *(uint32 *) (m + 192) +# asm 1: xorl 192(z4=int6464#1 +# asm 2: movdqa z4=%xmm0 +movdqa 560(%esp),%xmm0 + +# qhasm: z5 = z5_stack +# asm 1: movdqa z5=int6464#2 +# asm 2: movdqa z5=%xmm1 +movdqa 352(%esp),%xmm1 + +# qhasm: z6 = z6_stack +# asm 1: movdqa z6=int6464#3 +# asm 2: movdqa z6=%xmm2 +movdqa 432(%esp),%xmm2 + +# qhasm: z7 = z7_stack +# asm 1: movdqa z7=int6464#4 +# asm 2: movdqa z7=%xmm3 +movdqa 480(%esp),%xmm3 + +# qhasm: uint32323232 z4 += orig4 +# asm 1: paddd in4=int32#1 +# asm 2: movd in4=%eax +movd %xmm0,%eax + +# qhasm: in5 = z5 +# asm 1: movd in5=int32#2 +# asm 2: movd in5=%ecx +movd %xmm1,%ecx + +# qhasm: in6 = z6 +# asm 1: movd in6=int32#3 +# asm 2: movd in6=%edx +movd %xmm2,%edx + +# qhasm: in7 = z7 +# asm 1: movd in7=int32#4 +# asm 2: movd in7=%ebx +movd %xmm3,%ebx + +# qhasm: z4 <<<= 96 +# asm 1: pshufd $0x39,in4=int32#1 +# asm 2: movd in4=%eax +movd %xmm0,%eax + +# qhasm: in5 = z5 +# asm 1: movd in5=int32#2 +# asm 2: movd in5=%ecx +movd %xmm1,%ecx + +# qhasm: in6 = z6 +# asm 1: movd in6=int32#3 +# asm 2: movd in6=%edx +movd %xmm2,%edx + +# qhasm: in7 = z7 +# asm 1: movd in7=int32#4 +# asm 2: movd in7=%ebx +movd %xmm3,%ebx + +# qhasm: z4 <<<= 96 +# asm 1: pshufd $0x39,in4=int32#1 +# asm 2: movd in4=%eax +movd %xmm0,%eax + +# qhasm: in5 = z5 +# asm 1: movd in5=int32#2 +# asm 2: movd in5=%ecx +movd %xmm1,%ecx + +# qhasm: in6 = z6 +# asm 1: movd in6=int32#3 +# asm 2: movd in6=%edx +movd %xmm2,%edx + +# qhasm: in7 = z7 +# asm 1: movd in7=int32#4 +# asm 2: movd in7=%ebx +movd %xmm3,%ebx + +# qhasm: z4 <<<= 96 +# asm 1: pshufd $0x39,in4=int32#1 +# asm 2: movd in4=%eax +movd %xmm0,%eax + +# qhasm: in5 = z5 +# asm 1: movd in5=int32#2 +# asm 2: movd in5=%ecx +movd %xmm1,%ecx + +# qhasm: in6 = z6 +# asm 1: movd in6=int32#3 +# asm 2: movd in6=%edx +movd %xmm2,%edx + +# qhasm: in7 = z7 +# asm 1: movd in7=int32#4 +# asm 2: movd in7=%ebx +movd %xmm3,%ebx + +# qhasm: in4 ^= *(uint32 *) (m + 208) +# asm 1: xorl 208(z8=int6464#1 +# asm 2: movdqa z8=%xmm0 +movdqa 608(%esp),%xmm0 + +# qhasm: z9 = z9_stack +# asm 1: movdqa z9=int6464#2 +# asm 2: movdqa z9=%xmm1 +movdqa 528(%esp),%xmm1 + +# qhasm: z10 = z10_stack +# asm 1: movdqa z10=int6464#3 +# asm 2: movdqa z10=%xmm2 +movdqa 368(%esp),%xmm2 + +# qhasm: z11 = z11_stack +# asm 1: movdqa z11=int6464#4 +# asm 2: movdqa z11=%xmm3 +movdqa 448(%esp),%xmm3 + +# qhasm: uint32323232 z8 += orig8 +# asm 1: paddd in8=int32#1 +# asm 2: movd in8=%eax +movd %xmm0,%eax + +# qhasm: in9 = z9 +# asm 1: movd in9=int32#2 +# asm 2: movd in9=%ecx +movd %xmm1,%ecx + +# qhasm: in10 = z10 +# asm 1: movd in10=int32#3 +# asm 2: movd in10=%edx +movd %xmm2,%edx + +# qhasm: in11 = z11 +# asm 1: movd in11=int32#4 +# asm 2: movd in11=%ebx +movd %xmm3,%ebx + +# qhasm: z8 <<<= 96 +# asm 1: pshufd $0x39,in8=int32#1 +# asm 2: movd in8=%eax +movd %xmm0,%eax + +# qhasm: in9 = z9 +# asm 1: movd in9=int32#2 +# asm 2: movd in9=%ecx +movd %xmm1,%ecx + +# qhasm: in10 = z10 +# asm 1: movd in10=int32#3 +# asm 2: movd in10=%edx +movd %xmm2,%edx + +# qhasm: in11 = z11 +# asm 1: movd in11=int32#4 +# asm 2: movd in11=%ebx +movd %xmm3,%ebx + +# qhasm: z8 <<<= 96 +# asm 1: pshufd $0x39,in8=int32#1 +# asm 2: movd in8=%eax +movd %xmm0,%eax + +# qhasm: in9 = z9 +# asm 1: movd in9=int32#2 +# asm 2: movd in9=%ecx +movd %xmm1,%ecx + +# qhasm: in10 = z10 +# asm 1: movd in10=int32#3 +# asm 2: movd in10=%edx +movd %xmm2,%edx + +# qhasm: in11 = z11 +# asm 1: movd in11=int32#4 +# asm 2: movd in11=%ebx +movd %xmm3,%ebx + +# qhasm: z8 <<<= 96 +# asm 1: pshufd $0x39,in8=int32#1 +# asm 2: movd in8=%eax +movd %xmm0,%eax + +# qhasm: in9 = z9 +# asm 1: movd in9=int32#2 +# asm 2: movd in9=%ecx +movd %xmm1,%ecx + +# qhasm: in10 = z10 +# asm 1: movd in10=int32#3 +# asm 2: movd in10=%edx +movd %xmm2,%edx + +# qhasm: in11 = z11 +# asm 1: movd in11=int32#4 +# asm 2: movd in11=%ebx +movd %xmm3,%ebx + +# qhasm: in8 ^= *(uint32 *) (m + 224) +# asm 1: xorl 224(z12=int6464#1 +# asm 2: movdqa z12=%xmm0 +movdqa 576(%esp),%xmm0 + +# qhasm: z13 = z13_stack +# asm 1: movdqa z13=int6464#2 +# asm 2: movdqa z13=%xmm1 +movdqa 496(%esp),%xmm1 + +# qhasm: z14 = z14_stack +# asm 1: movdqa z14=int6464#3 +# asm 2: movdqa z14=%xmm2 +movdqa 400(%esp),%xmm2 + +# qhasm: z15 = z15_stack +# asm 1: movdqa z15=int6464#4 +# asm 2: movdqa z15=%xmm3 +movdqa 384(%esp),%xmm3 + +# qhasm: uint32323232 z12 += orig12 +# asm 1: paddd in12=int32#1 +# asm 2: movd in12=%eax +movd %xmm0,%eax + +# qhasm: in13 = z13 +# asm 1: movd in13=int32#2 +# asm 2: movd in13=%ecx +movd %xmm1,%ecx + +# qhasm: in14 = z14 +# asm 1: movd in14=int32#3 +# asm 2: movd in14=%edx +movd %xmm2,%edx + +# qhasm: in15 = z15 +# asm 1: movd in15=int32#4 +# asm 2: movd in15=%ebx +movd %xmm3,%ebx + +# qhasm: z12 <<<= 96 +# asm 1: pshufd $0x39,in12=int32#1 +# asm 2: movd in12=%eax +movd %xmm0,%eax + +# qhasm: in13 = z13 +# asm 1: movd in13=int32#2 +# asm 2: movd in13=%ecx +movd %xmm1,%ecx + +# qhasm: in14 = z14 +# asm 1: movd in14=int32#3 +# asm 2: movd in14=%edx +movd %xmm2,%edx + +# qhasm: in15 = z15 +# asm 1: movd in15=int32#4 +# asm 2: movd in15=%ebx +movd %xmm3,%ebx + +# qhasm: z12 <<<= 96 +# asm 1: pshufd $0x39,in12=int32#1 +# asm 2: movd in12=%eax +movd %xmm0,%eax + +# qhasm: in13 = z13 +# asm 1: movd in13=int32#2 +# asm 2: movd in13=%ecx +movd %xmm1,%ecx + +# qhasm: in14 = z14 +# asm 1: movd in14=int32#3 +# asm 2: movd in14=%edx +movd %xmm2,%edx + +# qhasm: in15 = z15 +# asm 1: movd in15=int32#4 +# asm 2: movd in15=%ebx +movd %xmm3,%ebx + +# qhasm: z12 <<<= 96 +# asm 1: pshufd $0x39,in12=int32#1 +# asm 2: movd in12=%eax +movd %xmm0,%eax + +# qhasm: in13 = z13 +# asm 1: movd in13=int32#2 +# asm 2: movd in13=%ecx +movd %xmm1,%ecx + +# qhasm: in14 = z14 +# asm 1: movd in14=int32#3 +# asm 2: movd in14=%edx +movd %xmm2,%edx + +# qhasm: in15 = z15 +# asm 1: movd in15=int32#4 +# asm 2: movd in15=%ebx +movd %xmm3,%ebx + +# qhasm: in12 ^= *(uint32 *) (m + 240) +# asm 1: xorl 240(bytes=int32#1 +# asm 2: movl bytes=%eax +movl 24(%esp),%eax + +# qhasm: bytes -= 256 +# asm 1: sub $256,out_stack=stack32#6 +# asm 2: movl out_stack=20(%esp) +movl %edi,20(%esp) + +# qhasm: unsigned? bytes - 0 +# asm 1: cmp $0, +jbe ._done +# comment:fp stack unchanged by fallthrough + +# qhasm: bytesbetween1and255: +._bytesbetween1and255: + +# qhasm: unsignedctarget=stack32#6 +# asm 2: movl ctarget=20(%esp) +movl %edi,20(%esp) + +# qhasm: out = &tmp +# asm 1: leal out=int32#6 +# asm 2: leal out=%edi +leal 640(%esp),%edi + +# qhasm: i = bytes +# asm 1: mov i=int32#2 +# asm 2: mov i=%ecx +mov %eax,%ecx + +# qhasm: while (i) { *out++ = *m++; --i } +rep movsb + +# qhasm: out = &tmp +# asm 1: leal out=int32#6 +# asm 2: leal out=%edi +leal 640(%esp),%edi + +# qhasm: m = &tmp +# asm 1: leal m=int32#5 +# asm 2: leal m=%esi +leal 640(%esp),%esi +# comment:fp stack unchanged by fallthrough + +# qhasm: nocopy: +._nocopy: + +# qhasm: bytes_stack = bytes +# asm 1: movl bytes_stack=stack32#7 +# asm 2: movl bytes_stack=24(%esp) +movl %eax,24(%esp) + +# qhasm: diag0 = x0 +# asm 1: movdqa diag0=int6464#1 +# asm 2: movdqa diag0=%xmm0 +movdqa 64(%esp),%xmm0 + +# qhasm: diag1 = x1 +# asm 1: movdqa diag1=int6464#2 +# asm 2: movdqa diag1=%xmm1 +movdqa 48(%esp),%xmm1 + +# qhasm: diag2 = x2 +# asm 1: movdqa diag2=int6464#3 +# asm 2: movdqa diag2=%xmm2 +movdqa 80(%esp),%xmm2 + +# qhasm: diag3 = x3 +# asm 1: movdqa diag3=int6464#4 +# asm 2: movdqa diag3=%xmm3 +movdqa 32(%esp),%xmm3 + +# qhasm: a0 = diag1 +# asm 1: movdqa a0=int6464#5 +# asm 2: movdqa a0=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: i = 20 +# asm 1: mov $20,>i=int32#1 +# asm 2: mov $20,>i=%eax +mov $20,%eax + +# qhasm: mainloop2: +._mainloop2: + +# qhasm: uint32323232 a0 += diag0 +# asm 1: paddd a1=int6464#6 +# asm 2: movdqa a1=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: b0 = a0 +# asm 1: movdqa b0=int6464#7 +# asm 2: movdqa b0=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a0 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,a2=int6464#5 +# asm 2: movdqa a2=%xmm4 +movdqa %xmm3,%xmm4 + +# qhasm: b1 = a1 +# asm 1: movdqa b1=int6464#7 +# asm 2: movdqa b1=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a1 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,a3=int6464#6 +# asm 2: movdqa a3=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: b2 = a2 +# asm 1: movdqa b2=int6464#7 +# asm 2: movdqa b2=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a2 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,a4=int6464#5 +# asm 2: movdqa a4=%xmm4 +movdqa %xmm3,%xmm4 + +# qhasm: b3 = a3 +# asm 1: movdqa b3=int6464#7 +# asm 2: movdqa b3=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a3 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,a5=int6464#6 +# asm 2: movdqa a5=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: b4 = a4 +# asm 1: movdqa b4=int6464#7 +# asm 2: movdqa b4=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a4 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,a6=int6464#5 +# asm 2: movdqa a6=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: b5 = a5 +# asm 1: movdqa b5=int6464#7 +# asm 2: movdqa b5=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a5 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,a7=int6464#6 +# asm 2: movdqa a7=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: b6 = a6 +# asm 1: movdqa b6=int6464#7 +# asm 2: movdqa b6=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a6 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,a0=int6464#5 +# asm 2: movdqa a0=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: b7 = a7 +# asm 1: movdqa b7=int6464#7 +# asm 2: movdqa b7=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a7 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,a1=int6464#6 +# asm 2: movdqa a1=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: b0 = a0 +# asm 1: movdqa b0=int6464#7 +# asm 2: movdqa b0=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a0 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,a2=int6464#5 +# asm 2: movdqa a2=%xmm4 +movdqa %xmm3,%xmm4 + +# qhasm: b1 = a1 +# asm 1: movdqa b1=int6464#7 +# asm 2: movdqa b1=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a1 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,a3=int6464#6 +# asm 2: movdqa a3=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: b2 = a2 +# asm 1: movdqa b2=int6464#7 +# asm 2: movdqa b2=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a2 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,a4=int6464#5 +# asm 2: movdqa a4=%xmm4 +movdqa %xmm3,%xmm4 + +# qhasm: b3 = a3 +# asm 1: movdqa b3=int6464#7 +# asm 2: movdqa b3=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a3 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,a5=int6464#6 +# asm 2: movdqa a5=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: b4 = a4 +# asm 1: movdqa b4=int6464#7 +# asm 2: movdqa b4=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a4 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,a6=int6464#5 +# asm 2: movdqa a6=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: b5 = a5 +# asm 1: movdqa b5=int6464#7 +# asm 2: movdqa b5=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a5 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,a7=int6464#6 +# asm 2: movdqa a7=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: b6 = a6 +# asm 1: movdqa b6=int6464#7 +# asm 2: movdqa b6=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a6 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,? i -= 4 +# asm 1: sub $4,a0=int6464#5 +# asm 2: movdqa a0=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: b7 = a7 +# asm 1: movdqa b7=int6464#7 +# asm 2: movdqa b7=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a7 <<= 18 +# asm 1: pslld $18,b0=int6464#8,>b0=int6464#8 +# asm 2: pxor >b0=%xmm7,>b0=%xmm7 +pxor %xmm7,%xmm7 + +# qhasm: uint32323232 b7 >>= 14 +# asm 1: psrld $14, +ja ._mainloop2 + +# qhasm: uint32323232 diag0 += x0 +# asm 1: paddd in0=int32#1 +# asm 2: movd in0=%eax +movd %xmm0,%eax + +# qhasm: in12 = diag1 +# asm 1: movd in12=int32#2 +# asm 2: movd in12=%ecx +movd %xmm1,%ecx + +# qhasm: in8 = diag2 +# asm 1: movd in8=int32#3 +# asm 2: movd in8=%edx +movd %xmm2,%edx + +# qhasm: in4 = diag3 +# asm 1: movd in4=int32#4 +# asm 2: movd in4=%ebx +movd %xmm3,%ebx + +# qhasm: diag0 <<<= 96 +# asm 1: pshufd $0x39,in5=int32#1 +# asm 2: movd in5=%eax +movd %xmm0,%eax + +# qhasm: in1 = diag1 +# asm 1: movd in1=int32#2 +# asm 2: movd in1=%ecx +movd %xmm1,%ecx + +# qhasm: in13 = diag2 +# asm 1: movd in13=int32#3 +# asm 2: movd in13=%edx +movd %xmm2,%edx + +# qhasm: in9 = diag3 +# asm 1: movd in9=int32#4 +# asm 2: movd in9=%ebx +movd %xmm3,%ebx + +# qhasm: diag0 <<<= 96 +# asm 1: pshufd $0x39,in10=int32#1 +# asm 2: movd in10=%eax +movd %xmm0,%eax + +# qhasm: in6 = diag1 +# asm 1: movd in6=int32#2 +# asm 2: movd in6=%ecx +movd %xmm1,%ecx + +# qhasm: in2 = diag2 +# asm 1: movd in2=int32#3 +# asm 2: movd in2=%edx +movd %xmm2,%edx + +# qhasm: in14 = diag3 +# asm 1: movd in14=int32#4 +# asm 2: movd in14=%ebx +movd %xmm3,%ebx + +# qhasm: diag0 <<<= 96 +# asm 1: pshufd $0x39,in15=int32#1 +# asm 2: movd in15=%eax +movd %xmm0,%eax + +# qhasm: in11 = diag1 +# asm 1: movd in11=int32#2 +# asm 2: movd in11=%ecx +movd %xmm1,%ecx + +# qhasm: in7 = diag2 +# asm 1: movd in7=int32#3 +# asm 2: movd in7=%edx +movd %xmm2,%edx + +# qhasm: in3 = diag3 +# asm 1: movd in3=int32#4 +# asm 2: movd in3=%ebx +movd %xmm3,%ebx + +# qhasm: in15 ^= *(uint32 *) (m + 60) +# asm 1: xorl 60(bytes=int32#1 +# asm 2: movl bytes=%eax +movl 24(%esp),%eax + +# qhasm: in8 = ((uint32 *)&x2)[0] +# asm 1: movl in8=int32#2 +# asm 2: movl in8=%ecx +movl 80(%esp),%ecx + +# qhasm: in9 = ((uint32 *)&x3)[1] +# asm 1: movl 4+in9=int32#3 +# asm 2: movl 4+in9=%edx +movl 4+32(%esp),%edx + +# qhasm: carry? in8 += 1 +# asm 1: add $1,x2=stack128#4 +# asm 2: movl x2=80(%esp) +movl %ecx,80(%esp) + +# qhasm: ((uint32 *)&x3)[1] = in9 +# asm 1: movl ? unsigned +ja ._bytesatleast65 +# comment:fp stack unchanged by jump + +# qhasm: goto bytesatleast64 if !unsigned< +jae ._bytesatleast64 + +# qhasm: m = out +# asm 1: mov m=int32#5 +# asm 2: mov m=%esi +mov %edi,%esi + +# qhasm: out = ctarget +# asm 1: movl out=int32#6 +# asm 2: movl out=%edi +movl 20(%esp),%edi + +# qhasm: i = bytes +# asm 1: mov i=int32#2 +# asm 2: mov i=%ecx +mov %eax,%ecx + +# qhasm: while (i) { *out++ = *m++; --i } +rep movsb +# comment:fp stack unchanged by fallthrough + +# qhasm: bytesatleast64: +._bytesatleast64: +# comment:fp stack unchanged by fallthrough + +# qhasm: done: +._done: + +# qhasm: eax = eax_stack +# asm 1: movl eax=int32#1 +# asm 2: movl eax=%eax +movl 0(%esp),%eax + +# qhasm: ebx = ebx_stack +# asm 1: movl ebx=int32#4 +# asm 2: movl ebx=%ebx +movl 4(%esp),%ebx + +# qhasm: esi = esi_stack +# asm 1: movl esi=int32#5 +# asm 2: movl esi=%esi +movl 8(%esp),%esi + +# qhasm: edi = edi_stack +# asm 1: movl edi=int32#6 +# asm 2: movl edi=%edi +movl 12(%esp),%edi + +# qhasm: ebp = ebp_stack +# asm 1: movl ebp=int32#7 +# asm 2: movl ebp=%ebp +movl 16(%esp),%ebp + +# qhasm: leave +add %eax,%esp +xor %eax,%eax +ret + +# qhasm: bytesatleast65: +._bytesatleast65: + +# qhasm: bytes -= 64 +# asm 1: sub $64,r11_stack=stack64#1 +# asm 2: movq r11_stack=352(%rsp) +movq %r11,352(%rsp) + +# qhasm: r12_stack = r12_caller +# asm 1: movq r12_stack=stack64#2 +# asm 2: movq r12_stack=360(%rsp) +movq %r12,360(%rsp) + +# qhasm: r13_stack = r13_caller +# asm 1: movq r13_stack=stack64#3 +# asm 2: movq r13_stack=368(%rsp) +movq %r13,368(%rsp) + +# qhasm: r14_stack = r14_caller +# asm 1: movq r14_stack=stack64#4 +# asm 2: movq r14_stack=376(%rsp) +movq %r14,376(%rsp) + +# qhasm: r15_stack = r15_caller +# asm 1: movq r15_stack=stack64#5 +# asm 2: movq r15_stack=384(%rsp) +movq %r15,384(%rsp) + +# qhasm: rbx_stack = rbx_caller +# asm 1: movq rbx_stack=stack64#6 +# asm 2: movq rbx_stack=392(%rsp) +movq %rbx,392(%rsp) + +# qhasm: rbp_stack = rbp_caller +# asm 1: movq rbp_stack=stack64#7 +# asm 2: movq rbp_stack=400(%rsp) +movq %rbp,400(%rsp) + +# qhasm: bytes = arg2 +# asm 1: mov bytes=int64#6 +# asm 2: mov bytes=%r9 +mov %rsi,%r9 + +# qhasm: out = arg1 +# asm 1: mov out=int64#1 +# asm 2: mov out=%rdi +mov %rdi,%rdi + +# qhasm: m = out +# asm 1: mov m=int64#2 +# asm 2: mov m=%rsi +mov %rdi,%rsi + +# qhasm: iv = arg3 +# asm 1: mov iv=int64#3 +# asm 2: mov iv=%rdx +mov %rdx,%rdx + +# qhasm: k = arg4 +# asm 1: mov k=int64#8 +# asm 2: mov k=%r10 +mov %rcx,%r10 + +# qhasm: unsigned>? bytes - 0 +# asm 1: cmp $0, +jbe ._done + +# qhasm: a = 0 +# asm 1: mov $0,>a=int64#7 +# asm 2: mov $0,>a=%rax +mov $0,%rax + +# qhasm: i = bytes +# asm 1: mov i=int64#4 +# asm 2: mov i=%rcx +mov %r9,%rcx + +# qhasm: while (i) { *out++ = a; --i } +rep stosb + +# qhasm: out -= bytes +# asm 1: sub r11_stack=stack64#1 +# asm 2: movq r11_stack=352(%rsp) +movq %r11,352(%rsp) + +# qhasm: r12_stack = r12_caller +# asm 1: movq r12_stack=stack64#2 +# asm 2: movq r12_stack=360(%rsp) +movq %r12,360(%rsp) + +# qhasm: r13_stack = r13_caller +# asm 1: movq r13_stack=stack64#3 +# asm 2: movq r13_stack=368(%rsp) +movq %r13,368(%rsp) + +# qhasm: r14_stack = r14_caller +# asm 1: movq r14_stack=stack64#4 +# asm 2: movq r14_stack=376(%rsp) +movq %r14,376(%rsp) + +# qhasm: r15_stack = r15_caller +# asm 1: movq r15_stack=stack64#5 +# asm 2: movq r15_stack=384(%rsp) +movq %r15,384(%rsp) + +# qhasm: rbx_stack = rbx_caller +# asm 1: movq rbx_stack=stack64#6 +# asm 2: movq rbx_stack=392(%rsp) +movq %rbx,392(%rsp) + +# qhasm: rbp_stack = rbp_caller +# asm 1: movq rbp_stack=stack64#7 +# asm 2: movq rbp_stack=400(%rsp) +movq %rbp,400(%rsp) + +# qhasm: out = arg1 +# asm 1: mov out=int64#1 +# asm 2: mov out=%rdi +mov %rdi,%rdi + +# qhasm: m = arg2 +# asm 1: mov m=int64#2 +# asm 2: mov m=%rsi +mov %rsi,%rsi + +# qhasm: bytes = arg3 +# asm 1: mov bytes=int64#6 +# asm 2: mov bytes=%r9 +mov %rdx,%r9 + +# qhasm: iv = arg4 +# asm 1: mov iv=int64#3 +# asm 2: mov iv=%rdx +mov %rcx,%rdx + +# qhasm: k = arg5 +# asm 1: mov k=int64#8 +# asm 2: mov k=%r10 +mov %r8,%r10 + +# qhasm: unsigned>? bytes - 0 +# asm 1: cmp $0, +jbe ._done +# comment:fp stack unchanged by fallthrough + +# qhasm: start: +._start: + +# qhasm: in12 = *(uint32 *) (k + 20) +# asm 1: movl 20(in12=int64#4d +# asm 2: movl 20(in12=%ecx +movl 20(%r10),%ecx + +# qhasm: in1 = *(uint32 *) (k + 0) +# asm 1: movl 0(in1=int64#5d +# asm 2: movl 0(in1=%r8d +movl 0(%r10),%r8d + +# qhasm: in6 = *(uint32 *) (iv + 0) +# asm 1: movl 0(in6=int64#7d +# asm 2: movl 0(in6=%eax +movl 0(%rdx),%eax + +# qhasm: in11 = *(uint32 *) (k + 16) +# asm 1: movl 16(in11=int64#9d +# asm 2: movl 16(in11=%r11d +movl 16(%r10),%r11d + +# qhasm: ((uint32 *)&x1)[0] = in12 +# asm 1: movl x1=stack128#1 +# asm 2: movl x1=0(%rsp) +movl %ecx,0(%rsp) + +# qhasm: ((uint32 *)&x1)[1] = in1 +# asm 1: movl in8=int64#4 +# asm 2: mov $0,>in8=%rcx +mov $0,%rcx + +# qhasm: in13 = *(uint32 *) (k + 24) +# asm 1: movl 24(in13=int64#5d +# asm 2: movl 24(in13=%r8d +movl 24(%r10),%r8d + +# qhasm: in2 = *(uint32 *) (k + 4) +# asm 1: movl 4(in2=int64#7d +# asm 2: movl 4(in2=%eax +movl 4(%r10),%eax + +# qhasm: in7 = *(uint32 *) (iv + 4) +# asm 1: movl 4(in7=int64#3d +# asm 2: movl 4(in7=%edx +movl 4(%rdx),%edx + +# qhasm: ((uint32 *)&x2)[0] = in8 +# asm 1: movl x2=stack128#2 +# asm 2: movl x2=16(%rsp) +movl %ecx,16(%rsp) + +# qhasm: ((uint32 *)&x2)[1] = in13 +# asm 1: movl in4=int64#3d +# asm 2: movl 12(in4=%edx +movl 12(%r10),%edx + +# qhasm: in9 = 0 +# asm 1: mov $0,>in9=int64#4 +# asm 2: mov $0,>in9=%rcx +mov $0,%rcx + +# qhasm: in14 = *(uint32 *) (k + 28) +# asm 1: movl 28(in14=int64#5d +# asm 2: movl 28(in14=%r8d +movl 28(%r10),%r8d + +# qhasm: in3 = *(uint32 *) (k + 8) +# asm 1: movl 8(in3=int64#7d +# asm 2: movl 8(in3=%eax +movl 8(%r10),%eax + +# qhasm: ((uint32 *)&x3)[0] = in4 +# asm 1: movl x3=stack128#3 +# asm 2: movl x3=32(%rsp) +movl %edx,32(%rsp) + +# qhasm: ((uint32 *)&x3)[1] = in9 +# asm 1: movl in0=int64#3 +# asm 2: mov $1634760805,>in0=%rdx +mov $1634760805,%rdx + +# qhasm: in5 = 857760878 +# asm 1: mov $857760878,>in5=int64#4 +# asm 2: mov $857760878,>in5=%rcx +mov $857760878,%rcx + +# qhasm: in10 = 2036477234 +# asm 1: mov $2036477234,>in10=int64#5 +# asm 2: mov $2036477234,>in10=%r8 +mov $2036477234,%r8 + +# qhasm: in15 = 1797285236 +# asm 1: mov $1797285236,>in15=int64#7 +# asm 2: mov $1797285236,>in15=%rax +mov $1797285236,%rax + +# qhasm: ((uint32 *)&x0)[0] = in0 +# asm 1: movl x0=stack128#4 +# asm 2: movl x0=48(%rsp) +movl %edx,48(%rsp) + +# qhasm: ((uint32 *)&x0)[1] = in5 +# asm 1: movl z0=int6464#1 +# asm 2: movdqa z0=%xmm0 +movdqa 48(%rsp),%xmm0 + +# qhasm: z5 = z0[1,1,1,1] +# asm 1: pshufd $0x55,z5=int6464#2 +# asm 2: pshufd $0x55,z5=%xmm1 +pshufd $0x55,%xmm0,%xmm1 + +# qhasm: z10 = z0[2,2,2,2] +# asm 1: pshufd $0xaa,z10=int6464#3 +# asm 2: pshufd $0xaa,z10=%xmm2 +pshufd $0xaa,%xmm0,%xmm2 + +# qhasm: z15 = z0[3,3,3,3] +# asm 1: pshufd $0xff,z15=int6464#4 +# asm 2: pshufd $0xff,z15=%xmm3 +pshufd $0xff,%xmm0,%xmm3 + +# qhasm: z0 = z0[0,0,0,0] +# asm 1: pshufd $0x00,z0=int6464#1 +# asm 2: pshufd $0x00,z0=%xmm0 +pshufd $0x00,%xmm0,%xmm0 + +# qhasm: orig5 = z5 +# asm 1: movdqa orig5=stack128#5 +# asm 2: movdqa orig5=64(%rsp) +movdqa %xmm1,64(%rsp) + +# qhasm: orig10 = z10 +# asm 1: movdqa orig10=stack128#6 +# asm 2: movdqa orig10=80(%rsp) +movdqa %xmm2,80(%rsp) + +# qhasm: orig15 = z15 +# asm 1: movdqa orig15=stack128#7 +# asm 2: movdqa orig15=96(%rsp) +movdqa %xmm3,96(%rsp) + +# qhasm: orig0 = z0 +# asm 1: movdqa orig0=stack128#8 +# asm 2: movdqa orig0=112(%rsp) +movdqa %xmm0,112(%rsp) + +# qhasm: z1 = x1 +# asm 1: movdqa z1=int6464#1 +# asm 2: movdqa z1=%xmm0 +movdqa 0(%rsp),%xmm0 + +# qhasm: z6 = z1[2,2,2,2] +# asm 1: pshufd $0xaa,z6=int6464#2 +# asm 2: pshufd $0xaa,z6=%xmm1 +pshufd $0xaa,%xmm0,%xmm1 + +# qhasm: z11 = z1[3,3,3,3] +# asm 1: pshufd $0xff,z11=int6464#3 +# asm 2: pshufd $0xff,z11=%xmm2 +pshufd $0xff,%xmm0,%xmm2 + +# qhasm: z12 = z1[0,0,0,0] +# asm 1: pshufd $0x00,z12=int6464#4 +# asm 2: pshufd $0x00,z12=%xmm3 +pshufd $0x00,%xmm0,%xmm3 + +# qhasm: z1 = z1[1,1,1,1] +# asm 1: pshufd $0x55,z1=int6464#1 +# asm 2: pshufd $0x55,z1=%xmm0 +pshufd $0x55,%xmm0,%xmm0 + +# qhasm: orig6 = z6 +# asm 1: movdqa orig6=stack128#9 +# asm 2: movdqa orig6=128(%rsp) +movdqa %xmm1,128(%rsp) + +# qhasm: orig11 = z11 +# asm 1: movdqa orig11=stack128#10 +# asm 2: movdqa orig11=144(%rsp) +movdqa %xmm2,144(%rsp) + +# qhasm: orig12 = z12 +# asm 1: movdqa orig12=stack128#11 +# asm 2: movdqa orig12=160(%rsp) +movdqa %xmm3,160(%rsp) + +# qhasm: orig1 = z1 +# asm 1: movdqa orig1=stack128#12 +# asm 2: movdqa orig1=176(%rsp) +movdqa %xmm0,176(%rsp) + +# qhasm: z2 = x2 +# asm 1: movdqa z2=int6464#1 +# asm 2: movdqa z2=%xmm0 +movdqa 16(%rsp),%xmm0 + +# qhasm: z7 = z2[3,3,3,3] +# asm 1: pshufd $0xff,z7=int6464#2 +# asm 2: pshufd $0xff,z7=%xmm1 +pshufd $0xff,%xmm0,%xmm1 + +# qhasm: z13 = z2[1,1,1,1] +# asm 1: pshufd $0x55,z13=int6464#3 +# asm 2: pshufd $0x55,z13=%xmm2 +pshufd $0x55,%xmm0,%xmm2 + +# qhasm: z2 = z2[2,2,2,2] +# asm 1: pshufd $0xaa,z2=int6464#1 +# asm 2: pshufd $0xaa,z2=%xmm0 +pshufd $0xaa,%xmm0,%xmm0 + +# qhasm: orig7 = z7 +# asm 1: movdqa orig7=stack128#13 +# asm 2: movdqa orig7=192(%rsp) +movdqa %xmm1,192(%rsp) + +# qhasm: orig13 = z13 +# asm 1: movdqa orig13=stack128#14 +# asm 2: movdqa orig13=208(%rsp) +movdqa %xmm2,208(%rsp) + +# qhasm: orig2 = z2 +# asm 1: movdqa orig2=stack128#15 +# asm 2: movdqa orig2=224(%rsp) +movdqa %xmm0,224(%rsp) + +# qhasm: z3 = x3 +# asm 1: movdqa z3=int6464#1 +# asm 2: movdqa z3=%xmm0 +movdqa 32(%rsp),%xmm0 + +# qhasm: z4 = z3[0,0,0,0] +# asm 1: pshufd $0x00,z4=int6464#2 +# asm 2: pshufd $0x00,z4=%xmm1 +pshufd $0x00,%xmm0,%xmm1 + +# qhasm: z14 = z3[2,2,2,2] +# asm 1: pshufd $0xaa,z14=int6464#3 +# asm 2: pshufd $0xaa,z14=%xmm2 +pshufd $0xaa,%xmm0,%xmm2 + +# qhasm: z3 = z3[3,3,3,3] +# asm 1: pshufd $0xff,z3=int6464#1 +# asm 2: pshufd $0xff,z3=%xmm0 +pshufd $0xff,%xmm0,%xmm0 + +# qhasm: orig4 = z4 +# asm 1: movdqa orig4=stack128#16 +# asm 2: movdqa orig4=240(%rsp) +movdqa %xmm1,240(%rsp) + +# qhasm: orig14 = z14 +# asm 1: movdqa orig14=stack128#17 +# asm 2: movdqa orig14=256(%rsp) +movdqa %xmm2,256(%rsp) + +# qhasm: orig3 = z3 +# asm 1: movdqa orig3=stack128#18 +# asm 2: movdqa orig3=272(%rsp) +movdqa %xmm0,272(%rsp) + +# qhasm: bytesatleast256: +._bytesatleast256: + +# qhasm: in8 = ((uint32 *)&x2)[0] +# asm 1: movl in8=int64#3d +# asm 2: movl in8=%edx +movl 16(%rsp),%edx + +# qhasm: in9 = ((uint32 *)&x3)[1] +# asm 1: movl 4+in9=int64#4d +# asm 2: movl 4+in9=%ecx +movl 4+32(%rsp),%ecx + +# qhasm: ((uint32 *) &orig8)[0] = in8 +# asm 1: movl orig8=stack128#19 +# asm 2: movl orig8=288(%rsp) +movl %edx,288(%rsp) + +# qhasm: ((uint32 *) &orig9)[0] = in9 +# asm 1: movl orig9=stack128#20 +# asm 2: movl orig9=304(%rsp) +movl %ecx,304(%rsp) + +# qhasm: in8 += 1 +# asm 1: add $1,in9=int64#4 +# asm 2: mov in9=%rcx +mov %rdx,%rcx + +# qhasm: (uint64) in9 >>= 32 +# asm 1: shr $32,in9=int64#4 +# asm 2: mov in9=%rcx +mov %rdx,%rcx + +# qhasm: (uint64) in9 >>= 32 +# asm 1: shr $32,in9=int64#4 +# asm 2: mov in9=%rcx +mov %rdx,%rcx + +# qhasm: (uint64) in9 >>= 32 +# asm 1: shr $32,in9=int64#4 +# asm 2: mov in9=%rcx +mov %rdx,%rcx + +# qhasm: (uint64) in9 >>= 32 +# asm 1: shr $32,x2=stack128#2 +# asm 2: movl x2=16(%rsp) +movl %edx,16(%rsp) + +# qhasm: ((uint32 *)&x3)[1] = in9 +# asm 1: movl bytes_backup=stack64#8 +# asm 2: movq bytes_backup=408(%rsp) +movq %r9,408(%rsp) + +# qhasm: i = 12 +# asm 1: mov $12,>i=int64#3 +# asm 2: mov $12,>i=%rdx +mov $12,%rdx + +# qhasm: z5 = orig5 +# asm 1: movdqa z5=int6464#1 +# asm 2: movdqa z5=%xmm0 +movdqa 64(%rsp),%xmm0 + +# qhasm: z10 = orig10 +# asm 1: movdqa z10=int6464#2 +# asm 2: movdqa z10=%xmm1 +movdqa 80(%rsp),%xmm1 + +# qhasm: z15 = orig15 +# asm 1: movdqa z15=int6464#3 +# asm 2: movdqa z15=%xmm2 +movdqa 96(%rsp),%xmm2 + +# qhasm: z14 = orig14 +# asm 1: movdqa z14=int6464#4 +# asm 2: movdqa z14=%xmm3 +movdqa 256(%rsp),%xmm3 + +# qhasm: z3 = orig3 +# asm 1: movdqa z3=int6464#5 +# asm 2: movdqa z3=%xmm4 +movdqa 272(%rsp),%xmm4 + +# qhasm: z6 = orig6 +# asm 1: movdqa z6=int6464#6 +# asm 2: movdqa z6=%xmm5 +movdqa 128(%rsp),%xmm5 + +# qhasm: z11 = orig11 +# asm 1: movdqa z11=int6464#7 +# asm 2: movdqa z11=%xmm6 +movdqa 144(%rsp),%xmm6 + +# qhasm: z1 = orig1 +# asm 1: movdqa z1=int6464#8 +# asm 2: movdqa z1=%xmm7 +movdqa 176(%rsp),%xmm7 + +# qhasm: z7 = orig7 +# asm 1: movdqa z7=int6464#9 +# asm 2: movdqa z7=%xmm8 +movdqa 192(%rsp),%xmm8 + +# qhasm: z13 = orig13 +# asm 1: movdqa z13=int6464#10 +# asm 2: movdqa z13=%xmm9 +movdqa 208(%rsp),%xmm9 + +# qhasm: z2 = orig2 +# asm 1: movdqa z2=int6464#11 +# asm 2: movdqa z2=%xmm10 +movdqa 224(%rsp),%xmm10 + +# qhasm: z9 = orig9 +# asm 1: movdqa z9=int6464#12 +# asm 2: movdqa z9=%xmm11 +movdqa 304(%rsp),%xmm11 + +# qhasm: z0 = orig0 +# asm 1: movdqa z0=int6464#13 +# asm 2: movdqa z0=%xmm12 +movdqa 112(%rsp),%xmm12 + +# qhasm: z12 = orig12 +# asm 1: movdqa z12=int6464#14 +# asm 2: movdqa z12=%xmm13 +movdqa 160(%rsp),%xmm13 + +# qhasm: z4 = orig4 +# asm 1: movdqa z4=int6464#15 +# asm 2: movdqa z4=%xmm14 +movdqa 240(%rsp),%xmm14 + +# qhasm: z8 = orig8 +# asm 1: movdqa z8=int6464#16 +# asm 2: movdqa z8=%xmm15 +movdqa 288(%rsp),%xmm15 + +# qhasm: mainloop1: +._mainloop1: + +# qhasm: z10_stack = z10 +# asm 1: movdqa z10_stack=stack128#21 +# asm 2: movdqa z10_stack=320(%rsp) +movdqa %xmm1,320(%rsp) + +# qhasm: z15_stack = z15 +# asm 1: movdqa z15_stack=stack128#22 +# asm 2: movdqa z15_stack=336(%rsp) +movdqa %xmm2,336(%rsp) + +# qhasm: y4 = z12 +# asm 1: movdqa y4=int6464#2 +# asm 2: movdqa y4=%xmm1 +movdqa %xmm13,%xmm1 + +# qhasm: uint32323232 y4 += z0 +# asm 1: paddd r4=int6464#3 +# asm 2: movdqa r4=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y4 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,y9=int6464#2 +# asm 2: movdqa y9=%xmm1 +movdqa %xmm7,%xmm1 + +# qhasm: uint32323232 y9 += z5 +# asm 1: paddd r9=int6464#3 +# asm 2: movdqa r9=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y9 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,y8=int6464#2 +# asm 2: movdqa y8=%xmm1 +movdqa %xmm12,%xmm1 + +# qhasm: uint32323232 y8 += z4 +# asm 1: paddd r8=int6464#3 +# asm 2: movdqa r8=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y8 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y13=int6464#2 +# asm 2: movdqa y13=%xmm1 +movdqa %xmm0,%xmm1 + +# qhasm: uint32323232 y13 += z9 +# asm 1: paddd r13=int6464#3 +# asm 2: movdqa r13=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y13 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y12=int6464#2 +# asm 2: movdqa y12=%xmm1 +movdqa %xmm14,%xmm1 + +# qhasm: uint32323232 y12 += z8 +# asm 1: paddd r12=int6464#3 +# asm 2: movdqa r12=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y12 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y1=int6464#2 +# asm 2: movdqa y1=%xmm1 +movdqa %xmm11,%xmm1 + +# qhasm: uint32323232 y1 += z13 +# asm 1: paddd r1=int6464#3 +# asm 2: movdqa r1=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y1 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y0=int6464#2 +# asm 2: movdqa y0=%xmm1 +movdqa %xmm15,%xmm1 + +# qhasm: uint32323232 y0 += z12 +# asm 1: paddd r0=int6464#3 +# asm 2: movdqa r0=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y0 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,z10=int6464#2 +# asm 2: movdqa z10=%xmm1 +movdqa 320(%rsp),%xmm1 + +# qhasm: z0_stack = z0 +# asm 1: movdqa z0_stack=stack128#21 +# asm 2: movdqa z0_stack=320(%rsp) +movdqa %xmm12,320(%rsp) + +# qhasm: y5 = z13 +# asm 1: movdqa y5=int6464#3 +# asm 2: movdqa y5=%xmm2 +movdqa %xmm9,%xmm2 + +# qhasm: uint32323232 y5 += z1 +# asm 1: paddd r5=int6464#13 +# asm 2: movdqa r5=%xmm12 +movdqa %xmm2,%xmm12 + +# qhasm: uint32323232 y5 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,y14=int6464#3 +# asm 2: movdqa y14=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: uint32323232 y14 += z10 +# asm 1: paddd r14=int6464#13 +# asm 2: movdqa r14=%xmm12 +movdqa %xmm2,%xmm12 + +# qhasm: uint32323232 y14 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,z15=int6464#3 +# asm 2: movdqa z15=%xmm2 +movdqa 336(%rsp),%xmm2 + +# qhasm: z5_stack = z5 +# asm 1: movdqa z5_stack=stack128#22 +# asm 2: movdqa z5_stack=336(%rsp) +movdqa %xmm0,336(%rsp) + +# qhasm: y3 = z11 +# asm 1: movdqa y3=int6464#1 +# asm 2: movdqa y3=%xmm0 +movdqa %xmm6,%xmm0 + +# qhasm: uint32323232 y3 += z15 +# asm 1: paddd r3=int6464#13 +# asm 2: movdqa r3=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y3 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,y2=int6464#1 +# asm 2: movdqa y2=%xmm0 +movdqa %xmm1,%xmm0 + +# qhasm: uint32323232 y2 += z14 +# asm 1: paddd r2=int6464#13 +# asm 2: movdqa r2=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y2 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y7=int6464#1 +# asm 2: movdqa y7=%xmm0 +movdqa %xmm2,%xmm0 + +# qhasm: uint32323232 y7 += z3 +# asm 1: paddd r7=int6464#13 +# asm 2: movdqa r7=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y7 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y6=int6464#1 +# asm 2: movdqa y6=%xmm0 +movdqa %xmm3,%xmm0 + +# qhasm: uint32323232 y6 += z2 +# asm 1: paddd r6=int6464#13 +# asm 2: movdqa r6=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y6 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y11=int6464#1 +# asm 2: movdqa y11=%xmm0 +movdqa %xmm4,%xmm0 + +# qhasm: uint32323232 y11 += z7 +# asm 1: paddd r11=int6464#13 +# asm 2: movdqa r11=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y11 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y10=int6464#1 +# asm 2: movdqa y10=%xmm0 +movdqa %xmm10,%xmm0 + +# qhasm: uint32323232 y10 += z6 +# asm 1: paddd r10=int6464#13 +# asm 2: movdqa r10=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y10 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,z0=int6464#1 +# asm 2: movdqa z0=%xmm0 +movdqa 320(%rsp),%xmm0 + +# qhasm: z10_stack = z10 +# asm 1: movdqa z10_stack=stack128#21 +# asm 2: movdqa z10_stack=320(%rsp) +movdqa %xmm1,320(%rsp) + +# qhasm: y1 = z3 +# asm 1: movdqa y1=int6464#2 +# asm 2: movdqa y1=%xmm1 +movdqa %xmm4,%xmm1 + +# qhasm: uint32323232 y1 += z0 +# asm 1: paddd r1=int6464#13 +# asm 2: movdqa r1=%xmm12 +movdqa %xmm1,%xmm12 + +# qhasm: uint32323232 y1 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,y15=int6464#2 +# asm 2: movdqa y15=%xmm1 +movdqa %xmm8,%xmm1 + +# qhasm: uint32323232 y15 += z11 +# asm 1: paddd r15=int6464#13 +# asm 2: movdqa r15=%xmm12 +movdqa %xmm1,%xmm12 + +# qhasm: uint32323232 y15 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,z5=int6464#13 +# asm 2: movdqa z5=%xmm12 +movdqa 336(%rsp),%xmm12 + +# qhasm: z15_stack = z15 +# asm 1: movdqa z15_stack=stack128#22 +# asm 2: movdqa z15_stack=336(%rsp) +movdqa %xmm2,336(%rsp) + +# qhasm: y6 = z4 +# asm 1: movdqa y6=int6464#2 +# asm 2: movdqa y6=%xmm1 +movdqa %xmm14,%xmm1 + +# qhasm: uint32323232 y6 += z5 +# asm 1: paddd r6=int6464#3 +# asm 2: movdqa r6=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y6 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,y2=int6464#2 +# asm 2: movdqa y2=%xmm1 +movdqa %xmm0,%xmm1 + +# qhasm: uint32323232 y2 += z1 +# asm 1: paddd r2=int6464#3 +# asm 2: movdqa r2=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y2 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y7=int6464#2 +# asm 2: movdqa y7=%xmm1 +movdqa %xmm12,%xmm1 + +# qhasm: uint32323232 y7 += z6 +# asm 1: paddd r7=int6464#3 +# asm 2: movdqa r7=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y7 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y3=int6464#2 +# asm 2: movdqa y3=%xmm1 +movdqa %xmm7,%xmm1 + +# qhasm: uint32323232 y3 += z2 +# asm 1: paddd r3=int6464#3 +# asm 2: movdqa r3=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y3 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y4=int6464#2 +# asm 2: movdqa y4=%xmm1 +movdqa %xmm5,%xmm1 + +# qhasm: uint32323232 y4 += z7 +# asm 1: paddd r4=int6464#3 +# asm 2: movdqa r4=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y4 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y0=int6464#2 +# asm 2: movdqa y0=%xmm1 +movdqa %xmm10,%xmm1 + +# qhasm: uint32323232 y0 += z3 +# asm 1: paddd r0=int6464#3 +# asm 2: movdqa r0=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y0 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,z10=int6464#2 +# asm 2: movdqa z10=%xmm1 +movdqa 320(%rsp),%xmm1 + +# qhasm: z0_stack = z0 +# asm 1: movdqa z0_stack=stack128#21 +# asm 2: movdqa z0_stack=320(%rsp) +movdqa %xmm0,320(%rsp) + +# qhasm: y5 = z7 +# asm 1: movdqa y5=int6464#1 +# asm 2: movdqa y5=%xmm0 +movdqa %xmm8,%xmm0 + +# qhasm: uint32323232 y5 += z4 +# asm 1: paddd r5=int6464#3 +# asm 2: movdqa r5=%xmm2 +movdqa %xmm0,%xmm2 + +# qhasm: uint32323232 y5 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,y11=int6464#1 +# asm 2: movdqa y11=%xmm0 +movdqa %xmm11,%xmm0 + +# qhasm: uint32323232 y11 += z10 +# asm 1: paddd r11=int6464#3 +# asm 2: movdqa r11=%xmm2 +movdqa %xmm0,%xmm2 + +# qhasm: uint32323232 y11 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,z15=int6464#3 +# asm 2: movdqa z15=%xmm2 +movdqa 336(%rsp),%xmm2 + +# qhasm: z5_stack = z5 +# asm 1: movdqa z5_stack=stack128#22 +# asm 2: movdqa z5_stack=336(%rsp) +movdqa %xmm12,336(%rsp) + +# qhasm: y12 = z14 +# asm 1: movdqa y12=int6464#1 +# asm 2: movdqa y12=%xmm0 +movdqa %xmm3,%xmm0 + +# qhasm: uint32323232 y12 += z15 +# asm 1: paddd r12=int6464#13 +# asm 2: movdqa r12=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y12 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,y8=int6464#1 +# asm 2: movdqa y8=%xmm0 +movdqa %xmm1,%xmm0 + +# qhasm: uint32323232 y8 += z11 +# asm 1: paddd r8=int6464#13 +# asm 2: movdqa r8=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y8 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y13=int6464#1 +# asm 2: movdqa y13=%xmm0 +movdqa %xmm2,%xmm0 + +# qhasm: uint32323232 y13 += z12 +# asm 1: paddd r13=int6464#13 +# asm 2: movdqa r13=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y13 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y9=int6464#1 +# asm 2: movdqa y9=%xmm0 +movdqa %xmm6,%xmm0 + +# qhasm: uint32323232 y9 += z8 +# asm 1: paddd r9=int6464#13 +# asm 2: movdqa r9=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y9 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y14=int6464#1 +# asm 2: movdqa y14=%xmm0 +movdqa %xmm13,%xmm0 + +# qhasm: uint32323232 y14 += z13 +# asm 1: paddd r14=int6464#13 +# asm 2: movdqa r14=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y14 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y10=int6464#1 +# asm 2: movdqa y10=%xmm0 +movdqa %xmm15,%xmm0 + +# qhasm: uint32323232 y10 += z9 +# asm 1: paddd r10=int6464#13 +# asm 2: movdqa r10=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y10 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,y15=int6464#1 +# asm 2: movdqa y15=%xmm0 +movdqa %xmm9,%xmm0 + +# qhasm: uint32323232 y15 += z14 +# asm 1: paddd r15=int6464#13 +# asm 2: movdqa r15=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y15 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,z0=int6464#13 +# asm 2: movdqa z0=%xmm12 +movdqa 320(%rsp),%xmm12 + +# qhasm: z5 = z5_stack +# asm 1: movdqa z5=int6464#1 +# asm 2: movdqa z5=%xmm0 +movdqa 336(%rsp),%xmm0 + +# qhasm: unsigned>? i -= 2 +# asm 1: sub $2, +ja ._mainloop1 + +# qhasm: uint32323232 z0 += orig0 +# asm 1: paddd in0=int64#3 +# asm 2: movd in0=%rdx +movd %xmm12,%rdx + +# qhasm: in1 = z1 +# asm 1: movd in1=int64#4 +# asm 2: movd in1=%rcx +movd %xmm7,%rcx + +# qhasm: in2 = z2 +# asm 1: movd in2=int64#5 +# asm 2: movd in2=%r8 +movd %xmm10,%r8 + +# qhasm: in3 = z3 +# asm 1: movd in3=int64#6 +# asm 2: movd in3=%r9 +movd %xmm4,%r9 + +# qhasm: z0 <<<= 96 +# asm 1: pshufd $0x39,in0=int64#3 +# asm 2: movd in0=%rdx +movd %xmm12,%rdx + +# qhasm: in1 = z1 +# asm 1: movd in1=int64#4 +# asm 2: movd in1=%rcx +movd %xmm7,%rcx + +# qhasm: in2 = z2 +# asm 1: movd in2=int64#5 +# asm 2: movd in2=%r8 +movd %xmm10,%r8 + +# qhasm: in3 = z3 +# asm 1: movd in3=int64#6 +# asm 2: movd in3=%r9 +movd %xmm4,%r9 + +# qhasm: z0 <<<= 96 +# asm 1: pshufd $0x39,in0=int64#3 +# asm 2: movd in0=%rdx +movd %xmm12,%rdx + +# qhasm: in1 = z1 +# asm 1: movd in1=int64#4 +# asm 2: movd in1=%rcx +movd %xmm7,%rcx + +# qhasm: in2 = z2 +# asm 1: movd in2=int64#5 +# asm 2: movd in2=%r8 +movd %xmm10,%r8 + +# qhasm: in3 = z3 +# asm 1: movd in3=int64#6 +# asm 2: movd in3=%r9 +movd %xmm4,%r9 + +# qhasm: z0 <<<= 96 +# asm 1: pshufd $0x39,in0=int64#3 +# asm 2: movd in0=%rdx +movd %xmm12,%rdx + +# qhasm: in1 = z1 +# asm 1: movd in1=int64#4 +# asm 2: movd in1=%rcx +movd %xmm7,%rcx + +# qhasm: in2 = z2 +# asm 1: movd in2=int64#5 +# asm 2: movd in2=%r8 +movd %xmm10,%r8 + +# qhasm: in3 = z3 +# asm 1: movd in3=int64#6 +# asm 2: movd in3=%r9 +movd %xmm4,%r9 + +# qhasm: (uint32) in0 ^= *(uint32 *) (m + 192) +# asm 1: xorl 192(in4=int64#3 +# asm 2: movd in4=%rdx +movd %xmm14,%rdx + +# qhasm: in5 = z5 +# asm 1: movd in5=int64#4 +# asm 2: movd in5=%rcx +movd %xmm0,%rcx + +# qhasm: in6 = z6 +# asm 1: movd in6=int64#5 +# asm 2: movd in6=%r8 +movd %xmm5,%r8 + +# qhasm: in7 = z7 +# asm 1: movd in7=int64#6 +# asm 2: movd in7=%r9 +movd %xmm8,%r9 + +# qhasm: z4 <<<= 96 +# asm 1: pshufd $0x39,in4=int64#3 +# asm 2: movd in4=%rdx +movd %xmm14,%rdx + +# qhasm: in5 = z5 +# asm 1: movd in5=int64#4 +# asm 2: movd in5=%rcx +movd %xmm0,%rcx + +# qhasm: in6 = z6 +# asm 1: movd in6=int64#5 +# asm 2: movd in6=%r8 +movd %xmm5,%r8 + +# qhasm: in7 = z7 +# asm 1: movd in7=int64#6 +# asm 2: movd in7=%r9 +movd %xmm8,%r9 + +# qhasm: z4 <<<= 96 +# asm 1: pshufd $0x39,in4=int64#3 +# asm 2: movd in4=%rdx +movd %xmm14,%rdx + +# qhasm: in5 = z5 +# asm 1: movd in5=int64#4 +# asm 2: movd in5=%rcx +movd %xmm0,%rcx + +# qhasm: in6 = z6 +# asm 1: movd in6=int64#5 +# asm 2: movd in6=%r8 +movd %xmm5,%r8 + +# qhasm: in7 = z7 +# asm 1: movd in7=int64#6 +# asm 2: movd in7=%r9 +movd %xmm8,%r9 + +# qhasm: z4 <<<= 96 +# asm 1: pshufd $0x39,in4=int64#3 +# asm 2: movd in4=%rdx +movd %xmm14,%rdx + +# qhasm: in5 = z5 +# asm 1: movd in5=int64#4 +# asm 2: movd in5=%rcx +movd %xmm0,%rcx + +# qhasm: in6 = z6 +# asm 1: movd in6=int64#5 +# asm 2: movd in6=%r8 +movd %xmm5,%r8 + +# qhasm: in7 = z7 +# asm 1: movd in7=int64#6 +# asm 2: movd in7=%r9 +movd %xmm8,%r9 + +# qhasm: (uint32) in4 ^= *(uint32 *) (m + 208) +# asm 1: xorl 208(in8=int64#3 +# asm 2: movd in8=%rdx +movd %xmm15,%rdx + +# qhasm: in9 = z9 +# asm 1: movd in9=int64#4 +# asm 2: movd in9=%rcx +movd %xmm11,%rcx + +# qhasm: in10 = z10 +# asm 1: movd in10=int64#5 +# asm 2: movd in10=%r8 +movd %xmm1,%r8 + +# qhasm: in11 = z11 +# asm 1: movd in11=int64#6 +# asm 2: movd in11=%r9 +movd %xmm6,%r9 + +# qhasm: z8 <<<= 96 +# asm 1: pshufd $0x39,in8=int64#3 +# asm 2: movd in8=%rdx +movd %xmm15,%rdx + +# qhasm: in9 = z9 +# asm 1: movd in9=int64#4 +# asm 2: movd in9=%rcx +movd %xmm11,%rcx + +# qhasm: in10 = z10 +# asm 1: movd in10=int64#5 +# asm 2: movd in10=%r8 +movd %xmm1,%r8 + +# qhasm: in11 = z11 +# asm 1: movd in11=int64#6 +# asm 2: movd in11=%r9 +movd %xmm6,%r9 + +# qhasm: z8 <<<= 96 +# asm 1: pshufd $0x39,in8=int64#3 +# asm 2: movd in8=%rdx +movd %xmm15,%rdx + +# qhasm: in9 = z9 +# asm 1: movd in9=int64#4 +# asm 2: movd in9=%rcx +movd %xmm11,%rcx + +# qhasm: in10 = z10 +# asm 1: movd in10=int64#5 +# asm 2: movd in10=%r8 +movd %xmm1,%r8 + +# qhasm: in11 = z11 +# asm 1: movd in11=int64#6 +# asm 2: movd in11=%r9 +movd %xmm6,%r9 + +# qhasm: z8 <<<= 96 +# asm 1: pshufd $0x39,in8=int64#3 +# asm 2: movd in8=%rdx +movd %xmm15,%rdx + +# qhasm: in9 = z9 +# asm 1: movd in9=int64#4 +# asm 2: movd in9=%rcx +movd %xmm11,%rcx + +# qhasm: in10 = z10 +# asm 1: movd in10=int64#5 +# asm 2: movd in10=%r8 +movd %xmm1,%r8 + +# qhasm: in11 = z11 +# asm 1: movd in11=int64#6 +# asm 2: movd in11=%r9 +movd %xmm6,%r9 + +# qhasm: (uint32) in8 ^= *(uint32 *) (m + 224) +# asm 1: xorl 224(in12=int64#3 +# asm 2: movd in12=%rdx +movd %xmm13,%rdx + +# qhasm: in13 = z13 +# asm 1: movd in13=int64#4 +# asm 2: movd in13=%rcx +movd %xmm9,%rcx + +# qhasm: in14 = z14 +# asm 1: movd in14=int64#5 +# asm 2: movd in14=%r8 +movd %xmm3,%r8 + +# qhasm: in15 = z15 +# asm 1: movd in15=int64#6 +# asm 2: movd in15=%r9 +movd %xmm2,%r9 + +# qhasm: z12 <<<= 96 +# asm 1: pshufd $0x39,in12=int64#3 +# asm 2: movd in12=%rdx +movd %xmm13,%rdx + +# qhasm: in13 = z13 +# asm 1: movd in13=int64#4 +# asm 2: movd in13=%rcx +movd %xmm9,%rcx + +# qhasm: in14 = z14 +# asm 1: movd in14=int64#5 +# asm 2: movd in14=%r8 +movd %xmm3,%r8 + +# qhasm: in15 = z15 +# asm 1: movd in15=int64#6 +# asm 2: movd in15=%r9 +movd %xmm2,%r9 + +# qhasm: z12 <<<= 96 +# asm 1: pshufd $0x39,in12=int64#3 +# asm 2: movd in12=%rdx +movd %xmm13,%rdx + +# qhasm: in13 = z13 +# asm 1: movd in13=int64#4 +# asm 2: movd in13=%rcx +movd %xmm9,%rcx + +# qhasm: in14 = z14 +# asm 1: movd in14=int64#5 +# asm 2: movd in14=%r8 +movd %xmm3,%r8 + +# qhasm: in15 = z15 +# asm 1: movd in15=int64#6 +# asm 2: movd in15=%r9 +movd %xmm2,%r9 + +# qhasm: z12 <<<= 96 +# asm 1: pshufd $0x39,in12=int64#3 +# asm 2: movd in12=%rdx +movd %xmm13,%rdx + +# qhasm: in13 = z13 +# asm 1: movd in13=int64#4 +# asm 2: movd in13=%rcx +movd %xmm9,%rcx + +# qhasm: in14 = z14 +# asm 1: movd in14=int64#5 +# asm 2: movd in14=%r8 +movd %xmm3,%r8 + +# qhasm: in15 = z15 +# asm 1: movd in15=int64#6 +# asm 2: movd in15=%r9 +movd %xmm2,%r9 + +# qhasm: (uint32) in12 ^= *(uint32 *) (m + 240) +# asm 1: xorl 240(bytes=int64#6 +# asm 2: movq bytes=%r9 +movq 408(%rsp),%r9 + +# qhasm: bytes -= 256 +# asm 1: sub $256,? bytes - 0 +# asm 1: cmp $0, +jbe ._done +# comment:fp stack unchanged by fallthrough + +# qhasm: bytesbetween1and255: +._bytesbetween1and255: + +# qhasm: unsignedctarget=int64#3 +# asm 2: mov ctarget=%rdx +mov %rdi,%rdx + +# qhasm: out = &tmp +# asm 1: leaq out=int64#1 +# asm 2: leaq out=%rdi +leaq 416(%rsp),%rdi + +# qhasm: i = bytes +# asm 1: mov i=int64#4 +# asm 2: mov i=%rcx +mov %r9,%rcx + +# qhasm: while (i) { *out++ = *m++; --i } +rep movsb + +# qhasm: out = &tmp +# asm 1: leaq out=int64#1 +# asm 2: leaq out=%rdi +leaq 416(%rsp),%rdi + +# qhasm: m = &tmp +# asm 1: leaq m=int64#2 +# asm 2: leaq m=%rsi +leaq 416(%rsp),%rsi +# comment:fp stack unchanged by fallthrough + +# qhasm: nocopy: +._nocopy: + +# qhasm: bytes_backup = bytes +# asm 1: movq bytes_backup=stack64#8 +# asm 2: movq bytes_backup=408(%rsp) +movq %r9,408(%rsp) + +# qhasm: diag0 = x0 +# asm 1: movdqa diag0=int6464#1 +# asm 2: movdqa diag0=%xmm0 +movdqa 48(%rsp),%xmm0 + +# qhasm: diag1 = x1 +# asm 1: movdqa diag1=int6464#2 +# asm 2: movdqa diag1=%xmm1 +movdqa 0(%rsp),%xmm1 + +# qhasm: diag2 = x2 +# asm 1: movdqa diag2=int6464#3 +# asm 2: movdqa diag2=%xmm2 +movdqa 16(%rsp),%xmm2 + +# qhasm: diag3 = x3 +# asm 1: movdqa diag3=int6464#4 +# asm 2: movdqa diag3=%xmm3 +movdqa 32(%rsp),%xmm3 + +# qhasm: a0 = diag1 +# asm 1: movdqa a0=int6464#5 +# asm 2: movdqa a0=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: i = 12 +# asm 1: mov $12,>i=int64#4 +# asm 2: mov $12,>i=%rcx +mov $12,%rcx + +# qhasm: mainloop2: +._mainloop2: + +# qhasm: uint32323232 a0 += diag0 +# asm 1: paddd a1=int6464#6 +# asm 2: movdqa a1=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: b0 = a0 +# asm 1: movdqa b0=int6464#7 +# asm 2: movdqa b0=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a0 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,a2=int6464#5 +# asm 2: movdqa a2=%xmm4 +movdqa %xmm3,%xmm4 + +# qhasm: b1 = a1 +# asm 1: movdqa b1=int6464#7 +# asm 2: movdqa b1=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a1 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,a3=int6464#6 +# asm 2: movdqa a3=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: b2 = a2 +# asm 1: movdqa b2=int6464#7 +# asm 2: movdqa b2=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a2 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,a4=int6464#5 +# asm 2: movdqa a4=%xmm4 +movdqa %xmm3,%xmm4 + +# qhasm: b3 = a3 +# asm 1: movdqa b3=int6464#7 +# asm 2: movdqa b3=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a3 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,a5=int6464#6 +# asm 2: movdqa a5=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: b4 = a4 +# asm 1: movdqa b4=int6464#7 +# asm 2: movdqa b4=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a4 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,a6=int6464#5 +# asm 2: movdqa a6=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: b5 = a5 +# asm 1: movdqa b5=int6464#7 +# asm 2: movdqa b5=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a5 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,a7=int6464#6 +# asm 2: movdqa a7=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: b6 = a6 +# asm 1: movdqa b6=int6464#7 +# asm 2: movdqa b6=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a6 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,a0=int6464#5 +# asm 2: movdqa a0=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: b7 = a7 +# asm 1: movdqa b7=int6464#7 +# asm 2: movdqa b7=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a7 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,a1=int6464#6 +# asm 2: movdqa a1=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: b0 = a0 +# asm 1: movdqa b0=int6464#7 +# asm 2: movdqa b0=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a0 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,a2=int6464#5 +# asm 2: movdqa a2=%xmm4 +movdqa %xmm3,%xmm4 + +# qhasm: b1 = a1 +# asm 1: movdqa b1=int6464#7 +# asm 2: movdqa b1=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a1 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,a3=int6464#6 +# asm 2: movdqa a3=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: b2 = a2 +# asm 1: movdqa b2=int6464#7 +# asm 2: movdqa b2=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a2 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,a4=int6464#5 +# asm 2: movdqa a4=%xmm4 +movdqa %xmm3,%xmm4 + +# qhasm: b3 = a3 +# asm 1: movdqa b3=int6464#7 +# asm 2: movdqa b3=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a3 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,a5=int6464#6 +# asm 2: movdqa a5=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: b4 = a4 +# asm 1: movdqa b4=int6464#7 +# asm 2: movdqa b4=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a4 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,a6=int6464#5 +# asm 2: movdqa a6=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: b5 = a5 +# asm 1: movdqa b5=int6464#7 +# asm 2: movdqa b5=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a5 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,a7=int6464#6 +# asm 2: movdqa a7=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: b6 = a6 +# asm 1: movdqa b6=int6464#7 +# asm 2: movdqa b6=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a6 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,? i -= 4 +# asm 1: sub $4,a0=int6464#5 +# asm 2: movdqa a0=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: b7 = a7 +# asm 1: movdqa b7=int6464#7 +# asm 2: movdqa b7=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a7 <<= 18 +# asm 1: pslld $18,b0=int6464#8,>b0=int6464#8 +# asm 2: pxor >b0=%xmm7,>b0=%xmm7 +pxor %xmm7,%xmm7 + +# qhasm: uint32323232 b7 >>= 14 +# asm 1: psrld $14, +ja ._mainloop2 + +# qhasm: uint32323232 diag0 += x0 +# asm 1: paddd in0=int64#4 +# asm 2: movd in0=%rcx +movd %xmm0,%rcx + +# qhasm: in12 = diag1 +# asm 1: movd in12=int64#5 +# asm 2: movd in12=%r8 +movd %xmm1,%r8 + +# qhasm: in8 = diag2 +# asm 1: movd in8=int64#6 +# asm 2: movd in8=%r9 +movd %xmm2,%r9 + +# qhasm: in4 = diag3 +# asm 1: movd in4=int64#7 +# asm 2: movd in4=%rax +movd %xmm3,%rax + +# qhasm: diag0 <<<= 96 +# asm 1: pshufd $0x39,in5=int64#4 +# asm 2: movd in5=%rcx +movd %xmm0,%rcx + +# qhasm: in1 = diag1 +# asm 1: movd in1=int64#5 +# asm 2: movd in1=%r8 +movd %xmm1,%r8 + +# qhasm: in13 = diag2 +# asm 1: movd in13=int64#6 +# asm 2: movd in13=%r9 +movd %xmm2,%r9 + +# qhasm: in9 = diag3 +# asm 1: movd in9=int64#7 +# asm 2: movd in9=%rax +movd %xmm3,%rax + +# qhasm: diag0 <<<= 96 +# asm 1: pshufd $0x39,in10=int64#4 +# asm 2: movd in10=%rcx +movd %xmm0,%rcx + +# qhasm: in6 = diag1 +# asm 1: movd in6=int64#5 +# asm 2: movd in6=%r8 +movd %xmm1,%r8 + +# qhasm: in2 = diag2 +# asm 1: movd in2=int64#6 +# asm 2: movd in2=%r9 +movd %xmm2,%r9 + +# qhasm: in14 = diag3 +# asm 1: movd in14=int64#7 +# asm 2: movd in14=%rax +movd %xmm3,%rax + +# qhasm: diag0 <<<= 96 +# asm 1: pshufd $0x39,in15=int64#4 +# asm 2: movd in15=%rcx +movd %xmm0,%rcx + +# qhasm: in11 = diag1 +# asm 1: movd in11=int64#5 +# asm 2: movd in11=%r8 +movd %xmm1,%r8 + +# qhasm: in7 = diag2 +# asm 1: movd in7=int64#6 +# asm 2: movd in7=%r9 +movd %xmm2,%r9 + +# qhasm: in3 = diag3 +# asm 1: movd in3=int64#7 +# asm 2: movd in3=%rax +movd %xmm3,%rax + +# qhasm: (uint32) in15 ^= *(uint32 *) (m + 60) +# asm 1: xorl 60(bytes=int64#6 +# asm 2: movq bytes=%r9 +movq 408(%rsp),%r9 + +# qhasm: in8 = ((uint32 *)&x2)[0] +# asm 1: movl in8=int64#4d +# asm 2: movl in8=%ecx +movl 16(%rsp),%ecx + +# qhasm: in9 = ((uint32 *)&x3)[1] +# asm 1: movl 4+in9=int64#5d +# asm 2: movl 4+in9=%r8d +movl 4+32(%rsp),%r8d + +# qhasm: in8 += 1 +# asm 1: add $1,in9=int64#5 +# asm 2: mov in9=%r8 +mov %rcx,%r8 + +# qhasm: (uint64) in9 >>= 32 +# asm 1: shr $32,x2=stack128#2 +# asm 2: movl x2=16(%rsp) +movl %ecx,16(%rsp) + +# qhasm: ((uint32 *)&x3)[1] = in9 +# asm 1: movl ? unsigned +ja ._bytesatleast65 +# comment:fp stack unchanged by jump + +# qhasm: goto bytesatleast64 if !unsigned< +jae ._bytesatleast64 + +# qhasm: m = out +# asm 1: mov m=int64#2 +# asm 2: mov m=%rsi +mov %rdi,%rsi + +# qhasm: out = ctarget +# asm 1: mov out=int64#1 +# asm 2: mov out=%rdi +mov %rdx,%rdi + +# qhasm: i = bytes +# asm 1: mov i=int64#4 +# asm 2: mov i=%rcx +mov %r9,%rcx + +# qhasm: while (i) { *out++ = *m++; --i } +rep movsb +# comment:fp stack unchanged by fallthrough + +# qhasm: bytesatleast64: +._bytesatleast64: +# comment:fp stack unchanged by fallthrough + +# qhasm: done: +._done: + +# qhasm: r11_caller = r11_stack +# asm 1: movq r11_caller=int64#9 +# asm 2: movq r11_caller=%r11 +movq 352(%rsp),%r11 + +# qhasm: r12_caller = r12_stack +# asm 1: movq r12_caller=int64#10 +# asm 2: movq r12_caller=%r12 +movq 360(%rsp),%r12 + +# qhasm: r13_caller = r13_stack +# asm 1: movq r13_caller=int64#11 +# asm 2: movq r13_caller=%r13 +movq 368(%rsp),%r13 + +# qhasm: r14_caller = r14_stack +# asm 1: movq r14_caller=int64#12 +# asm 2: movq r14_caller=%r14 +movq 376(%rsp),%r14 + +# qhasm: r15_caller = r15_stack +# asm 1: movq r15_caller=int64#13 +# asm 2: movq r15_caller=%r15 +movq 384(%rsp),%r15 + +# qhasm: rbx_caller = rbx_stack +# asm 1: movq rbx_caller=int64#14 +# asm 2: movq rbx_caller=%rbx +movq 392(%rsp),%rbx + +# qhasm: rbp_caller = rbp_stack +# asm 1: movq rbp_caller=int64#15 +# asm 2: movq rbp_caller=%rbp +movq 400(%rsp),%rbp + +# qhasm: leave +add %r11,%rsp +xor %rax,%rax +xor %rdx,%rdx +ret + +# qhasm: bytesatleast65: +._bytesatleast65: + +# qhasm: bytes -= 64 +# asm 1: sub $64,= 64) { + crypto_core_salsa2012(c,in,k,sigma); + + u = 1; + for (i = 8;i < 16;++i) { + u += (unsigned int) in[i]; + in[i] = u; + u >>= 8; + } + + clen -= 64; + c += 64; + } + + if (clen) { + crypto_core_salsa2012(block,in,k,sigma); + for (i = 0;i < clen;++i) c[i] = block[i]; + } + return 0; +} diff --git a/nacl/crypto_stream/salsa2012/ref/xor.c b/nacl/crypto_stream/salsa2012/ref/xor.c new file mode 100644 index 00000000..90206426 --- /dev/null +++ b/nacl/crypto_stream/salsa2012/ref/xor.c @@ -0,0 +1,52 @@ +/* +version 20080913 +D. J. Bernstein +Public domain. +*/ + +#include "crypto_core_salsa2012.h" +#include "crypto_stream.h" + +typedef unsigned int uint32; + +static const unsigned char sigma[16] = "expand 32-byte k"; + +int crypto_stream_xor( + unsigned char *c, + const unsigned char *m,unsigned long long mlen, + const unsigned char *n, + const unsigned char *k +) +{ + unsigned char in[16]; + unsigned char block[64]; + int i; + unsigned int u; + + if (!mlen) return 0; + + for (i = 0;i < 8;++i) in[i] = n[i]; + for (i = 8;i < 16;++i) in[i] = 0; + + while (mlen >= 64) { + crypto_core_salsa2012(block,in,k,sigma); + for (i = 0;i < 64;++i) c[i] = m[i] ^ block[i]; + + u = 1; + for (i = 8;i < 16;++i) { + u += (unsigned int) in[i]; + in[i] = u; + u >>= 8; + } + + mlen -= 64; + c += 64; + m += 64; + } + + if (mlen) { + crypto_core_salsa2012(block,in,k,sigma); + for (i = 0;i < mlen;++i) c[i] = m[i] ^ block[i]; + } + return 0; +} diff --git a/nacl/crypto_stream/salsa2012/used b/nacl/crypto_stream/salsa2012/used new file mode 100644 index 00000000..e69de29b diff --git a/nacl/crypto_stream/salsa2012/x86_xmm5/api.h b/nacl/crypto_stream/salsa2012/x86_xmm5/api.h new file mode 100644 index 00000000..c2b18461 --- /dev/null +++ b/nacl/crypto_stream/salsa2012/x86_xmm5/api.h @@ -0,0 +1,2 @@ +#define CRYPTO_KEYBYTES 32 +#define CRYPTO_NONCEBYTES 8 diff --git a/nacl/crypto_stream/salsa2012/x86_xmm5/implementors b/nacl/crypto_stream/salsa2012/x86_xmm5/implementors new file mode 100644 index 00000000..f6fb3c73 --- /dev/null +++ b/nacl/crypto_stream/salsa2012/x86_xmm5/implementors @@ -0,0 +1 @@ +Daniel J. Bernstein diff --git a/nacl/crypto_stream/salsa2012/x86_xmm5/stream.s b/nacl/crypto_stream/salsa2012/x86_xmm5/stream.s new file mode 100644 index 00000000..c511b0d3 --- /dev/null +++ b/nacl/crypto_stream/salsa2012/x86_xmm5/stream.s @@ -0,0 +1,5078 @@ + +# qhasm: int32 a + +# qhasm: stack32 arg1 + +# qhasm: stack32 arg2 + +# qhasm: stack32 arg3 + +# qhasm: stack32 arg4 + +# qhasm: stack32 arg5 + +# qhasm: stack32 arg6 + +# qhasm: input arg1 + +# qhasm: input arg2 + +# qhasm: input arg3 + +# qhasm: input arg4 + +# qhasm: input arg5 + +# qhasm: input arg6 + +# qhasm: int32 eax + +# qhasm: int32 ebx + +# qhasm: int32 esi + +# qhasm: int32 edi + +# qhasm: int32 ebp + +# qhasm: caller eax + +# qhasm: caller ebx + +# qhasm: caller esi + +# qhasm: caller edi + +# qhasm: caller ebp + +# qhasm: int32 k + +# qhasm: int32 kbits + +# qhasm: int32 iv + +# qhasm: int32 i + +# qhasm: stack128 x0 + +# qhasm: stack128 x1 + +# qhasm: stack128 x2 + +# qhasm: stack128 x3 + +# qhasm: int32 m + +# qhasm: stack32 out_stack + +# qhasm: int32 out + +# qhasm: stack32 bytes_stack + +# qhasm: int32 bytes + +# qhasm: stack32 eax_stack + +# qhasm: stack32 ebx_stack + +# qhasm: stack32 esi_stack + +# qhasm: stack32 edi_stack + +# qhasm: stack32 ebp_stack + +# qhasm: int6464 diag0 + +# qhasm: int6464 diag1 + +# qhasm: int6464 diag2 + +# qhasm: int6464 diag3 + +# qhasm: int6464 a0 + +# qhasm: int6464 a1 + +# qhasm: int6464 a2 + +# qhasm: int6464 a3 + +# qhasm: int6464 a4 + +# qhasm: int6464 a5 + +# qhasm: int6464 a6 + +# qhasm: int6464 a7 + +# qhasm: int6464 b0 + +# qhasm: int6464 b1 + +# qhasm: int6464 b2 + +# qhasm: int6464 b3 + +# qhasm: int6464 b4 + +# qhasm: int6464 b5 + +# qhasm: int6464 b6 + +# qhasm: int6464 b7 + +# qhasm: int6464 z0 + +# qhasm: int6464 z1 + +# qhasm: int6464 z2 + +# qhasm: int6464 z3 + +# qhasm: int6464 z4 + +# qhasm: int6464 z5 + +# qhasm: int6464 z6 + +# qhasm: int6464 z7 + +# qhasm: int6464 z8 + +# qhasm: int6464 z9 + +# qhasm: int6464 z10 + +# qhasm: int6464 z11 + +# qhasm: int6464 z12 + +# qhasm: int6464 z13 + +# qhasm: int6464 z14 + +# qhasm: int6464 z15 + +# qhasm: stack128 z0_stack + +# qhasm: stack128 z1_stack + +# qhasm: stack128 z2_stack + +# qhasm: stack128 z3_stack + +# qhasm: stack128 z4_stack + +# qhasm: stack128 z5_stack + +# qhasm: stack128 z6_stack + +# qhasm: stack128 z7_stack + +# qhasm: stack128 z8_stack + +# qhasm: stack128 z9_stack + +# qhasm: stack128 z10_stack + +# qhasm: stack128 z11_stack + +# qhasm: stack128 z12_stack + +# qhasm: stack128 z13_stack + +# qhasm: stack128 z14_stack + +# qhasm: stack128 z15_stack + +# qhasm: stack128 orig0 + +# qhasm: stack128 orig1 + +# qhasm: stack128 orig2 + +# qhasm: stack128 orig3 + +# qhasm: stack128 orig4 + +# qhasm: stack128 orig5 + +# qhasm: stack128 orig6 + +# qhasm: stack128 orig7 + +# qhasm: stack128 orig8 + +# qhasm: stack128 orig9 + +# qhasm: stack128 orig10 + +# qhasm: stack128 orig11 + +# qhasm: stack128 orig12 + +# qhasm: stack128 orig13 + +# qhasm: stack128 orig14 + +# qhasm: stack128 orig15 + +# qhasm: int6464 p + +# qhasm: int6464 q + +# qhasm: int6464 r + +# qhasm: int6464 s + +# qhasm: int6464 t + +# qhasm: int6464 u + +# qhasm: int6464 v + +# qhasm: int6464 w + +# qhasm: int6464 mp + +# qhasm: int6464 mq + +# qhasm: int6464 mr + +# qhasm: int6464 ms + +# qhasm: int6464 mt + +# qhasm: int6464 mu + +# qhasm: int6464 mv + +# qhasm: int6464 mw + +# qhasm: int32 in0 + +# qhasm: int32 in1 + +# qhasm: int32 in2 + +# qhasm: int32 in3 + +# qhasm: int32 in4 + +# qhasm: int32 in5 + +# qhasm: int32 in6 + +# qhasm: int32 in7 + +# qhasm: int32 in8 + +# qhasm: int32 in9 + +# qhasm: int32 in10 + +# qhasm: int32 in11 + +# qhasm: int32 in12 + +# qhasm: int32 in13 + +# qhasm: int32 in14 + +# qhasm: int32 in15 + +# qhasm: stack512 tmp + +# qhasm: stack32 ctarget + +# qhasm: enter crypto_stream_salsa2012_x86_xmm5 +.text +.p2align 5 +.globl _crypto_stream_salsa2012_x86_xmm5 +.globl crypto_stream_salsa2012_x86_xmm5 +_crypto_stream_salsa2012_x86_xmm5: +crypto_stream_salsa2012_x86_xmm5: +mov %esp,%eax +and $31,%eax +add $704,%eax +sub %eax,%esp + +# qhasm: eax_stack = eax +# asm 1: movl eax_stack=stack32#1 +# asm 2: movl eax_stack=0(%esp) +movl %eax,0(%esp) + +# qhasm: ebx_stack = ebx +# asm 1: movl ebx_stack=stack32#2 +# asm 2: movl ebx_stack=4(%esp) +movl %ebx,4(%esp) + +# qhasm: esi_stack = esi +# asm 1: movl esi_stack=stack32#3 +# asm 2: movl esi_stack=8(%esp) +movl %esi,8(%esp) + +# qhasm: edi_stack = edi +# asm 1: movl edi_stack=stack32#4 +# asm 2: movl edi_stack=12(%esp) +movl %edi,12(%esp) + +# qhasm: ebp_stack = ebp +# asm 1: movl ebp_stack=stack32#5 +# asm 2: movl ebp_stack=16(%esp) +movl %ebp,16(%esp) + +# qhasm: bytes = arg2 +# asm 1: movl bytes=int32#3 +# asm 2: movl bytes=%edx +movl 8(%esp,%eax),%edx + +# qhasm: out = arg1 +# asm 1: movl out=int32#6 +# asm 2: movl out=%edi +movl 4(%esp,%eax),%edi + +# qhasm: m = out +# asm 1: mov m=int32#5 +# asm 2: mov m=%esi +mov %edi,%esi + +# qhasm: iv = arg4 +# asm 1: movl iv=int32#4 +# asm 2: movl iv=%ebx +movl 16(%esp,%eax),%ebx + +# qhasm: k = arg5 +# asm 1: movl k=int32#7 +# asm 2: movl k=%ebp +movl 20(%esp,%eax),%ebp + +# qhasm: unsigned>? bytes - 0 +# asm 1: cmp $0, +jbe ._done + +# qhasm: a = 0 +# asm 1: mov $0,>a=int32#1 +# asm 2: mov $0,>a=%eax +mov $0,%eax + +# qhasm: i = bytes +# asm 1: mov i=int32#2 +# asm 2: mov i=%ecx +mov %edx,%ecx + +# qhasm: while (i) { *out++ = a; --i } +rep stosb + +# qhasm: out -= bytes +# asm 1: subl eax_stack=stack32#1 +# asm 2: movl eax_stack=0(%esp) +movl %eax,0(%esp) + +# qhasm: ebx_stack = ebx +# asm 1: movl ebx_stack=stack32#2 +# asm 2: movl ebx_stack=4(%esp) +movl %ebx,4(%esp) + +# qhasm: esi_stack = esi +# asm 1: movl esi_stack=stack32#3 +# asm 2: movl esi_stack=8(%esp) +movl %esi,8(%esp) + +# qhasm: edi_stack = edi +# asm 1: movl edi_stack=stack32#4 +# asm 2: movl edi_stack=12(%esp) +movl %edi,12(%esp) + +# qhasm: ebp_stack = ebp +# asm 1: movl ebp_stack=stack32#5 +# asm 2: movl ebp_stack=16(%esp) +movl %ebp,16(%esp) + +# qhasm: out = arg1 +# asm 1: movl out=int32#6 +# asm 2: movl out=%edi +movl 4(%esp,%eax),%edi + +# qhasm: m = arg2 +# asm 1: movl m=int32#5 +# asm 2: movl m=%esi +movl 8(%esp,%eax),%esi + +# qhasm: bytes = arg3 +# asm 1: movl bytes=int32#3 +# asm 2: movl bytes=%edx +movl 12(%esp,%eax),%edx + +# qhasm: iv = arg5 +# asm 1: movl iv=int32#4 +# asm 2: movl iv=%ebx +movl 20(%esp,%eax),%ebx + +# qhasm: k = arg6 +# asm 1: movl k=int32#7 +# asm 2: movl k=%ebp +movl 24(%esp,%eax),%ebp + +# qhasm: unsigned>? bytes - 0 +# asm 1: cmp $0, +jbe ._done +# comment:fp stack unchanged by fallthrough + +# qhasm: start: +._start: + +# qhasm: out_stack = out +# asm 1: movl out_stack=stack32#6 +# asm 2: movl out_stack=20(%esp) +movl %edi,20(%esp) + +# qhasm: bytes_stack = bytes +# asm 1: movl bytes_stack=stack32#7 +# asm 2: movl bytes_stack=24(%esp) +movl %edx,24(%esp) + +# qhasm: in4 = *(uint32 *) (k + 12) +# asm 1: movl 12(in4=int32#1 +# asm 2: movl 12(in4=%eax +movl 12(%ebp),%eax + +# qhasm: in12 = *(uint32 *) (k + 20) +# asm 1: movl 20(in12=int32#2 +# asm 2: movl 20(in12=%ecx +movl 20(%ebp),%ecx + +# qhasm: ((uint32 *)&x3)[0] = in4 +# asm 1: movl x3=stack128#1 +# asm 2: movl x3=32(%esp) +movl %eax,32(%esp) + +# qhasm: ((uint32 *)&x1)[0] = in12 +# asm 1: movl x1=stack128#2 +# asm 2: movl x1=48(%esp) +movl %ecx,48(%esp) + +# qhasm: in0 = 1634760805 +# asm 1: mov $1634760805,>in0=int32#1 +# asm 2: mov $1634760805,>in0=%eax +mov $1634760805,%eax + +# qhasm: in8 = 0 +# asm 1: mov $0,>in8=int32#2 +# asm 2: mov $0,>in8=%ecx +mov $0,%ecx + +# qhasm: ((uint32 *)&x0)[0] = in0 +# asm 1: movl x0=stack128#3 +# asm 2: movl x0=64(%esp) +movl %eax,64(%esp) + +# qhasm: ((uint32 *)&x2)[0] = in8 +# asm 1: movl x2=stack128#4 +# asm 2: movl x2=80(%esp) +movl %ecx,80(%esp) + +# qhasm: in6 = *(uint32 *) (iv + 0) +# asm 1: movl 0(in6=int32#1 +# asm 2: movl 0(in6=%eax +movl 0(%ebx),%eax + +# qhasm: in7 = *(uint32 *) (iv + 4) +# asm 1: movl 4(in7=int32#2 +# asm 2: movl 4(in7=%ecx +movl 4(%ebx),%ecx + +# qhasm: ((uint32 *)&x1)[2] = in6 +# asm 1: movl in9=int32#1 +# asm 2: mov $0,>in9=%eax +mov $0,%eax + +# qhasm: in10 = 2036477234 +# asm 1: mov $2036477234,>in10=int32#2 +# asm 2: mov $2036477234,>in10=%ecx +mov $2036477234,%ecx + +# qhasm: ((uint32 *)&x3)[1] = in9 +# asm 1: movl in1=int32#1 +# asm 2: movl 0(in1=%eax +movl 0(%ebp),%eax + +# qhasm: in2 = *(uint32 *) (k + 4) +# asm 1: movl 4(in2=int32#2 +# asm 2: movl 4(in2=%ecx +movl 4(%ebp),%ecx + +# qhasm: in3 = *(uint32 *) (k + 8) +# asm 1: movl 8(in3=int32#3 +# asm 2: movl 8(in3=%edx +movl 8(%ebp),%edx + +# qhasm: in5 = 857760878 +# asm 1: mov $857760878,>in5=int32#4 +# asm 2: mov $857760878,>in5=%ebx +mov $857760878,%ebx + +# qhasm: ((uint32 *)&x1)[1] = in1 +# asm 1: movl in11=int32#1 +# asm 2: movl 16(in11=%eax +movl 16(%ebp),%eax + +# qhasm: in13 = *(uint32 *) (k + 24) +# asm 1: movl 24(in13=int32#2 +# asm 2: movl 24(in13=%ecx +movl 24(%ebp),%ecx + +# qhasm: in14 = *(uint32 *) (k + 28) +# asm 1: movl 28(in14=int32#3 +# asm 2: movl 28(in14=%edx +movl 28(%ebp),%edx + +# qhasm: in15 = 1797285236 +# asm 1: mov $1797285236,>in15=int32#4 +# asm 2: mov $1797285236,>in15=%ebx +mov $1797285236,%ebx + +# qhasm: ((uint32 *)&x1)[3] = in11 +# asm 1: movl bytes=int32#1 +# asm 2: movl bytes=%eax +movl 24(%esp),%eax + +# qhasm: unsignedz0=int6464#1 +# asm 2: movdqa z0=%xmm0 +movdqa 64(%esp),%xmm0 + +# qhasm: z5 = z0[1,1,1,1] +# asm 1: pshufd $0x55,z5=int6464#2 +# asm 2: pshufd $0x55,z5=%xmm1 +pshufd $0x55,%xmm0,%xmm1 + +# qhasm: z10 = z0[2,2,2,2] +# asm 1: pshufd $0xaa,z10=int6464#3 +# asm 2: pshufd $0xaa,z10=%xmm2 +pshufd $0xaa,%xmm0,%xmm2 + +# qhasm: z15 = z0[3,3,3,3] +# asm 1: pshufd $0xff,z15=int6464#4 +# asm 2: pshufd $0xff,z15=%xmm3 +pshufd $0xff,%xmm0,%xmm3 + +# qhasm: z0 = z0[0,0,0,0] +# asm 1: pshufd $0x00,z0=int6464#1 +# asm 2: pshufd $0x00,z0=%xmm0 +pshufd $0x00,%xmm0,%xmm0 + +# qhasm: orig5 = z5 +# asm 1: movdqa orig5=stack128#5 +# asm 2: movdqa orig5=96(%esp) +movdqa %xmm1,96(%esp) + +# qhasm: orig10 = z10 +# asm 1: movdqa orig10=stack128#6 +# asm 2: movdqa orig10=112(%esp) +movdqa %xmm2,112(%esp) + +# qhasm: orig15 = z15 +# asm 1: movdqa orig15=stack128#7 +# asm 2: movdqa orig15=128(%esp) +movdqa %xmm3,128(%esp) + +# qhasm: orig0 = z0 +# asm 1: movdqa orig0=stack128#8 +# asm 2: movdqa orig0=144(%esp) +movdqa %xmm0,144(%esp) + +# qhasm: z1 = x1 +# asm 1: movdqa z1=int6464#1 +# asm 2: movdqa z1=%xmm0 +movdqa 48(%esp),%xmm0 + +# qhasm: z6 = z1[2,2,2,2] +# asm 1: pshufd $0xaa,z6=int6464#2 +# asm 2: pshufd $0xaa,z6=%xmm1 +pshufd $0xaa,%xmm0,%xmm1 + +# qhasm: z11 = z1[3,3,3,3] +# asm 1: pshufd $0xff,z11=int6464#3 +# asm 2: pshufd $0xff,z11=%xmm2 +pshufd $0xff,%xmm0,%xmm2 + +# qhasm: z12 = z1[0,0,0,0] +# asm 1: pshufd $0x00,z12=int6464#4 +# asm 2: pshufd $0x00,z12=%xmm3 +pshufd $0x00,%xmm0,%xmm3 + +# qhasm: z1 = z1[1,1,1,1] +# asm 1: pshufd $0x55,z1=int6464#1 +# asm 2: pshufd $0x55,z1=%xmm0 +pshufd $0x55,%xmm0,%xmm0 + +# qhasm: orig6 = z6 +# asm 1: movdqa orig6=stack128#9 +# asm 2: movdqa orig6=160(%esp) +movdqa %xmm1,160(%esp) + +# qhasm: orig11 = z11 +# asm 1: movdqa orig11=stack128#10 +# asm 2: movdqa orig11=176(%esp) +movdqa %xmm2,176(%esp) + +# qhasm: orig12 = z12 +# asm 1: movdqa orig12=stack128#11 +# asm 2: movdqa orig12=192(%esp) +movdqa %xmm3,192(%esp) + +# qhasm: orig1 = z1 +# asm 1: movdqa orig1=stack128#12 +# asm 2: movdqa orig1=208(%esp) +movdqa %xmm0,208(%esp) + +# qhasm: z2 = x2 +# asm 1: movdqa z2=int6464#1 +# asm 2: movdqa z2=%xmm0 +movdqa 80(%esp),%xmm0 + +# qhasm: z7 = z2[3,3,3,3] +# asm 1: pshufd $0xff,z7=int6464#2 +# asm 2: pshufd $0xff,z7=%xmm1 +pshufd $0xff,%xmm0,%xmm1 + +# qhasm: z13 = z2[1,1,1,1] +# asm 1: pshufd $0x55,z13=int6464#3 +# asm 2: pshufd $0x55,z13=%xmm2 +pshufd $0x55,%xmm0,%xmm2 + +# qhasm: z2 = z2[2,2,2,2] +# asm 1: pshufd $0xaa,z2=int6464#1 +# asm 2: pshufd $0xaa,z2=%xmm0 +pshufd $0xaa,%xmm0,%xmm0 + +# qhasm: orig7 = z7 +# asm 1: movdqa orig7=stack128#13 +# asm 2: movdqa orig7=224(%esp) +movdqa %xmm1,224(%esp) + +# qhasm: orig13 = z13 +# asm 1: movdqa orig13=stack128#14 +# asm 2: movdqa orig13=240(%esp) +movdqa %xmm2,240(%esp) + +# qhasm: orig2 = z2 +# asm 1: movdqa orig2=stack128#15 +# asm 2: movdqa orig2=256(%esp) +movdqa %xmm0,256(%esp) + +# qhasm: z3 = x3 +# asm 1: movdqa z3=int6464#1 +# asm 2: movdqa z3=%xmm0 +movdqa 32(%esp),%xmm0 + +# qhasm: z4 = z3[0,0,0,0] +# asm 1: pshufd $0x00,z4=int6464#2 +# asm 2: pshufd $0x00,z4=%xmm1 +pshufd $0x00,%xmm0,%xmm1 + +# qhasm: z14 = z3[2,2,2,2] +# asm 1: pshufd $0xaa,z14=int6464#3 +# asm 2: pshufd $0xaa,z14=%xmm2 +pshufd $0xaa,%xmm0,%xmm2 + +# qhasm: z3 = z3[3,3,3,3] +# asm 1: pshufd $0xff,z3=int6464#1 +# asm 2: pshufd $0xff,z3=%xmm0 +pshufd $0xff,%xmm0,%xmm0 + +# qhasm: orig4 = z4 +# asm 1: movdqa orig4=stack128#16 +# asm 2: movdqa orig4=272(%esp) +movdqa %xmm1,272(%esp) + +# qhasm: orig14 = z14 +# asm 1: movdqa orig14=stack128#17 +# asm 2: movdqa orig14=288(%esp) +movdqa %xmm2,288(%esp) + +# qhasm: orig3 = z3 +# asm 1: movdqa orig3=stack128#18 +# asm 2: movdqa orig3=304(%esp) +movdqa %xmm0,304(%esp) + +# qhasm: bytesatleast256: +._bytesatleast256: + +# qhasm: in8 = ((uint32 *)&x2)[0] +# asm 1: movl in8=int32#2 +# asm 2: movl in8=%ecx +movl 80(%esp),%ecx + +# qhasm: in9 = ((uint32 *)&x3)[1] +# asm 1: movl 4+in9=int32#3 +# asm 2: movl 4+in9=%edx +movl 4+32(%esp),%edx + +# qhasm: ((uint32 *) &orig8)[0] = in8 +# asm 1: movl orig8=stack128#19 +# asm 2: movl orig8=320(%esp) +movl %ecx,320(%esp) + +# qhasm: ((uint32 *) &orig9)[0] = in9 +# asm 1: movl orig9=stack128#20 +# asm 2: movl orig9=336(%esp) +movl %edx,336(%esp) + +# qhasm: carry? in8 += 1 +# asm 1: add $1,x2=stack128#4 +# asm 2: movl x2=80(%esp) +movl %ecx,80(%esp) + +# qhasm: ((uint32 *)&x3)[1] = in9 +# asm 1: movl bytes_stack=stack32#7 +# asm 2: movl bytes_stack=24(%esp) +movl %eax,24(%esp) + +# qhasm: i = 12 +# asm 1: mov $12,>i=int32#1 +# asm 2: mov $12,>i=%eax +mov $12,%eax + +# qhasm: z5 = orig5 +# asm 1: movdqa z5=int6464#1 +# asm 2: movdqa z5=%xmm0 +movdqa 96(%esp),%xmm0 + +# qhasm: z10 = orig10 +# asm 1: movdqa z10=int6464#2 +# asm 2: movdqa z10=%xmm1 +movdqa 112(%esp),%xmm1 + +# qhasm: z15 = orig15 +# asm 1: movdqa z15=int6464#3 +# asm 2: movdqa z15=%xmm2 +movdqa 128(%esp),%xmm2 + +# qhasm: z14 = orig14 +# asm 1: movdqa z14=int6464#4 +# asm 2: movdqa z14=%xmm3 +movdqa 288(%esp),%xmm3 + +# qhasm: z3 = orig3 +# asm 1: movdqa z3=int6464#5 +# asm 2: movdqa z3=%xmm4 +movdqa 304(%esp),%xmm4 + +# qhasm: z6 = orig6 +# asm 1: movdqa z6=int6464#6 +# asm 2: movdqa z6=%xmm5 +movdqa 160(%esp),%xmm5 + +# qhasm: z11 = orig11 +# asm 1: movdqa z11=int6464#7 +# asm 2: movdqa z11=%xmm6 +movdqa 176(%esp),%xmm6 + +# qhasm: z1 = orig1 +# asm 1: movdqa z1=int6464#8 +# asm 2: movdqa z1=%xmm7 +movdqa 208(%esp),%xmm7 + +# qhasm: z5_stack = z5 +# asm 1: movdqa z5_stack=stack128#21 +# asm 2: movdqa z5_stack=352(%esp) +movdqa %xmm0,352(%esp) + +# qhasm: z10_stack = z10 +# asm 1: movdqa z10_stack=stack128#22 +# asm 2: movdqa z10_stack=368(%esp) +movdqa %xmm1,368(%esp) + +# qhasm: z15_stack = z15 +# asm 1: movdqa z15_stack=stack128#23 +# asm 2: movdqa z15_stack=384(%esp) +movdqa %xmm2,384(%esp) + +# qhasm: z14_stack = z14 +# asm 1: movdqa z14_stack=stack128#24 +# asm 2: movdqa z14_stack=400(%esp) +movdqa %xmm3,400(%esp) + +# qhasm: z3_stack = z3 +# asm 1: movdqa z3_stack=stack128#25 +# asm 2: movdqa z3_stack=416(%esp) +movdqa %xmm4,416(%esp) + +# qhasm: z6_stack = z6 +# asm 1: movdqa z6_stack=stack128#26 +# asm 2: movdqa z6_stack=432(%esp) +movdqa %xmm5,432(%esp) + +# qhasm: z11_stack = z11 +# asm 1: movdqa z11_stack=stack128#27 +# asm 2: movdqa z11_stack=448(%esp) +movdqa %xmm6,448(%esp) + +# qhasm: z1_stack = z1 +# asm 1: movdqa z1_stack=stack128#28 +# asm 2: movdqa z1_stack=464(%esp) +movdqa %xmm7,464(%esp) + +# qhasm: z7 = orig7 +# asm 1: movdqa z7=int6464#5 +# asm 2: movdqa z7=%xmm4 +movdqa 224(%esp),%xmm4 + +# qhasm: z13 = orig13 +# asm 1: movdqa z13=int6464#6 +# asm 2: movdqa z13=%xmm5 +movdqa 240(%esp),%xmm5 + +# qhasm: z2 = orig2 +# asm 1: movdqa z2=int6464#7 +# asm 2: movdqa z2=%xmm6 +movdqa 256(%esp),%xmm6 + +# qhasm: z9 = orig9 +# asm 1: movdqa z9=int6464#8 +# asm 2: movdqa z9=%xmm7 +movdqa 336(%esp),%xmm7 + +# qhasm: p = orig0 +# asm 1: movdqa p=int6464#1 +# asm 2: movdqa p=%xmm0 +movdqa 144(%esp),%xmm0 + +# qhasm: t = orig12 +# asm 1: movdqa t=int6464#3 +# asm 2: movdqa t=%xmm2 +movdqa 192(%esp),%xmm2 + +# qhasm: q = orig4 +# asm 1: movdqa q=int6464#4 +# asm 2: movdqa q=%xmm3 +movdqa 272(%esp),%xmm3 + +# qhasm: r = orig8 +# asm 1: movdqa r=int6464#2 +# asm 2: movdqa r=%xmm1 +movdqa 320(%esp),%xmm1 + +# qhasm: z7_stack = z7 +# asm 1: movdqa z7_stack=stack128#29 +# asm 2: movdqa z7_stack=480(%esp) +movdqa %xmm4,480(%esp) + +# qhasm: z13_stack = z13 +# asm 1: movdqa z13_stack=stack128#30 +# asm 2: movdqa z13_stack=496(%esp) +movdqa %xmm5,496(%esp) + +# qhasm: z2_stack = z2 +# asm 1: movdqa z2_stack=stack128#31 +# asm 2: movdqa z2_stack=512(%esp) +movdqa %xmm6,512(%esp) + +# qhasm: z9_stack = z9 +# asm 1: movdqa z9_stack=stack128#32 +# asm 2: movdqa z9_stack=528(%esp) +movdqa %xmm7,528(%esp) + +# qhasm: z0_stack = p +# asm 1: movdqa z0_stack=stack128#33 +# asm 2: movdqa z0_stack=544(%esp) +movdqa %xmm0,544(%esp) + +# qhasm: z12_stack = t +# asm 1: movdqa z12_stack=stack128#34 +# asm 2: movdqa z12_stack=560(%esp) +movdqa %xmm2,560(%esp) + +# qhasm: z4_stack = q +# asm 1: movdqa z4_stack=stack128#35 +# asm 2: movdqa z4_stack=576(%esp) +movdqa %xmm3,576(%esp) + +# qhasm: z8_stack = r +# asm 1: movdqa z8_stack=stack128#36 +# asm 2: movdqa z8_stack=592(%esp) +movdqa %xmm1,592(%esp) + +# qhasm: mainloop1: +._mainloop1: + +# qhasm: assign xmm0 to p + +# qhasm: assign xmm1 to r + +# qhasm: assign xmm2 to t + +# qhasm: assign xmm3 to q + +# qhasm: s = t +# asm 1: movdqa s=int6464#7 +# asm 2: movdqa s=%xmm6 +movdqa %xmm2,%xmm6 + +# qhasm: uint32323232 t += p +# asm 1: paddd u=int6464#5 +# asm 2: movdqa u=%xmm4 +movdqa %xmm2,%xmm4 + +# qhasm: uint32323232 t >>= 25 +# asm 1: psrld $25,z4_stack=stack128#33 +# asm 2: movdqa z4_stack=544(%esp) +movdqa %xmm3,544(%esp) + +# qhasm: t = p +# asm 1: movdqa t=int6464#3 +# asm 2: movdqa t=%xmm2 +movdqa %xmm0,%xmm2 + +# qhasm: uint32323232 t += q +# asm 1: paddd u=int6464#5 +# asm 2: movdqa u=%xmm4 +movdqa %xmm2,%xmm4 + +# qhasm: uint32323232 t >>= 23 +# asm 1: psrld $23,z8_stack=stack128#34 +# asm 2: movdqa z8_stack=560(%esp) +movdqa %xmm1,560(%esp) + +# qhasm: uint32323232 q += r +# asm 1: paddd u=int6464#3 +# asm 2: movdqa u=%xmm2 +movdqa %xmm3,%xmm2 + +# qhasm: uint32323232 q >>= 19 +# asm 1: psrld $19,mt=int6464#3 +# asm 2: movdqa mt=%xmm2 +movdqa 464(%esp),%xmm2 + +# qhasm: mp = z5_stack +# asm 1: movdqa mp=int6464#5 +# asm 2: movdqa mp=%xmm4 +movdqa 352(%esp),%xmm4 + +# qhasm: mq = z9_stack +# asm 1: movdqa mq=int6464#4 +# asm 2: movdqa mq=%xmm3 +movdqa 528(%esp),%xmm3 + +# qhasm: mr = z13_stack +# asm 1: movdqa mr=int6464#6 +# asm 2: movdqa mr=%xmm5 +movdqa 496(%esp),%xmm5 + +# qhasm: z12_stack = s +# asm 1: movdqa z12_stack=stack128#30 +# asm 2: movdqa z12_stack=496(%esp) +movdqa %xmm6,496(%esp) + +# qhasm: uint32323232 r += s +# asm 1: paddd u=int6464#7 +# asm 2: movdqa u=%xmm6 +movdqa %xmm1,%xmm6 + +# qhasm: uint32323232 r >>= 14 +# asm 1: psrld $14,z0_stack=stack128#21 +# asm 2: movdqa z0_stack=352(%esp) +movdqa %xmm0,352(%esp) + +# qhasm: assign xmm2 to mt + +# qhasm: assign xmm3 to mq + +# qhasm: assign xmm4 to mp + +# qhasm: assign xmm5 to mr + +# qhasm: ms = mt +# asm 1: movdqa ms=int6464#7 +# asm 2: movdqa ms=%xmm6 +movdqa %xmm2,%xmm6 + +# qhasm: uint32323232 mt += mp +# asm 1: paddd mu=int6464#1 +# asm 2: movdqa mu=%xmm0 +movdqa %xmm2,%xmm0 + +# qhasm: uint32323232 mt >>= 25 +# asm 1: psrld $25,z9_stack=stack128#32 +# asm 2: movdqa z9_stack=528(%esp) +movdqa %xmm3,528(%esp) + +# qhasm: mt = mp +# asm 1: movdqa mt=int6464#1 +# asm 2: movdqa mt=%xmm0 +movdqa %xmm4,%xmm0 + +# qhasm: uint32323232 mt += mq +# asm 1: paddd mu=int6464#2 +# asm 2: movdqa mu=%xmm1 +movdqa %xmm0,%xmm1 + +# qhasm: uint32323232 mt >>= 23 +# asm 1: psrld $23,z13_stack=stack128#35 +# asm 2: movdqa z13_stack=576(%esp) +movdqa %xmm5,576(%esp) + +# qhasm: uint32323232 mq += mr +# asm 1: paddd mu=int6464#1 +# asm 2: movdqa mu=%xmm0 +movdqa %xmm3,%xmm0 + +# qhasm: uint32323232 mq >>= 19 +# asm 1: psrld $19,t=int6464#3 +# asm 2: movdqa t=%xmm2 +movdqa 432(%esp),%xmm2 + +# qhasm: p = z10_stack +# asm 1: movdqa p=int6464#1 +# asm 2: movdqa p=%xmm0 +movdqa 368(%esp),%xmm0 + +# qhasm: q = z14_stack +# asm 1: movdqa q=int6464#4 +# asm 2: movdqa q=%xmm3 +movdqa 400(%esp),%xmm3 + +# qhasm: r = z2_stack +# asm 1: movdqa r=int6464#2 +# asm 2: movdqa r=%xmm1 +movdqa 512(%esp),%xmm1 + +# qhasm: z1_stack = ms +# asm 1: movdqa z1_stack=stack128#22 +# asm 2: movdqa z1_stack=368(%esp) +movdqa %xmm6,368(%esp) + +# qhasm: uint32323232 mr += ms +# asm 1: paddd mu=int6464#7 +# asm 2: movdqa mu=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 mr >>= 14 +# asm 1: psrld $14,z5_stack=stack128#24 +# asm 2: movdqa z5_stack=400(%esp) +movdqa %xmm4,400(%esp) + +# qhasm: assign xmm0 to p + +# qhasm: assign xmm1 to r + +# qhasm: assign xmm2 to t + +# qhasm: assign xmm3 to q + +# qhasm: s = t +# asm 1: movdqa s=int6464#7 +# asm 2: movdqa s=%xmm6 +movdqa %xmm2,%xmm6 + +# qhasm: uint32323232 t += p +# asm 1: paddd u=int6464#5 +# asm 2: movdqa u=%xmm4 +movdqa %xmm2,%xmm4 + +# qhasm: uint32323232 t >>= 25 +# asm 1: psrld $25,z14_stack=stack128#36 +# asm 2: movdqa z14_stack=592(%esp) +movdqa %xmm3,592(%esp) + +# qhasm: t = p +# asm 1: movdqa t=int6464#3 +# asm 2: movdqa t=%xmm2 +movdqa %xmm0,%xmm2 + +# qhasm: uint32323232 t += q +# asm 1: paddd u=int6464#5 +# asm 2: movdqa u=%xmm4 +movdqa %xmm2,%xmm4 + +# qhasm: uint32323232 t >>= 23 +# asm 1: psrld $23,z2_stack=stack128#26 +# asm 2: movdqa z2_stack=432(%esp) +movdqa %xmm1,432(%esp) + +# qhasm: uint32323232 q += r +# asm 1: paddd u=int6464#3 +# asm 2: movdqa u=%xmm2 +movdqa %xmm3,%xmm2 + +# qhasm: uint32323232 q >>= 19 +# asm 1: psrld $19,mt=int6464#3 +# asm 2: movdqa mt=%xmm2 +movdqa 448(%esp),%xmm2 + +# qhasm: mp = z15_stack +# asm 1: movdqa mp=int6464#5 +# asm 2: movdqa mp=%xmm4 +movdqa 384(%esp),%xmm4 + +# qhasm: mq = z3_stack +# asm 1: movdqa mq=int6464#4 +# asm 2: movdqa mq=%xmm3 +movdqa 416(%esp),%xmm3 + +# qhasm: mr = z7_stack +# asm 1: movdqa mr=int6464#6 +# asm 2: movdqa mr=%xmm5 +movdqa 480(%esp),%xmm5 + +# qhasm: z6_stack = s +# asm 1: movdqa z6_stack=stack128#23 +# asm 2: movdqa z6_stack=384(%esp) +movdqa %xmm6,384(%esp) + +# qhasm: uint32323232 r += s +# asm 1: paddd u=int6464#7 +# asm 2: movdqa u=%xmm6 +movdqa %xmm1,%xmm6 + +# qhasm: uint32323232 r >>= 14 +# asm 1: psrld $14,z10_stack=stack128#27 +# asm 2: movdqa z10_stack=448(%esp) +movdqa %xmm0,448(%esp) + +# qhasm: assign xmm2 to mt + +# qhasm: assign xmm3 to mq + +# qhasm: assign xmm4 to mp + +# qhasm: assign xmm5 to mr + +# qhasm: ms = mt +# asm 1: movdqa ms=int6464#7 +# asm 2: movdqa ms=%xmm6 +movdqa %xmm2,%xmm6 + +# qhasm: uint32323232 mt += mp +# asm 1: paddd mu=int6464#1 +# asm 2: movdqa mu=%xmm0 +movdqa %xmm2,%xmm0 + +# qhasm: uint32323232 mt >>= 25 +# asm 1: psrld $25,z3_stack=stack128#25 +# asm 2: movdqa z3_stack=416(%esp) +movdqa %xmm3,416(%esp) + +# qhasm: mt = mp +# asm 1: movdqa mt=int6464#1 +# asm 2: movdqa mt=%xmm0 +movdqa %xmm4,%xmm0 + +# qhasm: uint32323232 mt += mq +# asm 1: paddd mu=int6464#2 +# asm 2: movdqa mu=%xmm1 +movdqa %xmm0,%xmm1 + +# qhasm: uint32323232 mt >>= 23 +# asm 1: psrld $23,z7_stack=stack128#29 +# asm 2: movdqa z7_stack=480(%esp) +movdqa %xmm5,480(%esp) + +# qhasm: uint32323232 mq += mr +# asm 1: paddd mu=int6464#1 +# asm 2: movdqa mu=%xmm0 +movdqa %xmm3,%xmm0 + +# qhasm: uint32323232 mq >>= 19 +# asm 1: psrld $19,t=int6464#3 +# asm 2: movdqa t=%xmm2 +movdqa 416(%esp),%xmm2 + +# qhasm: p = z0_stack +# asm 1: movdqa p=int6464#1 +# asm 2: movdqa p=%xmm0 +movdqa 352(%esp),%xmm0 + +# qhasm: q = z1_stack +# asm 1: movdqa q=int6464#4 +# asm 2: movdqa q=%xmm3 +movdqa 368(%esp),%xmm3 + +# qhasm: r = z2_stack +# asm 1: movdqa r=int6464#2 +# asm 2: movdqa r=%xmm1 +movdqa 432(%esp),%xmm1 + +# qhasm: z11_stack = ms +# asm 1: movdqa z11_stack=stack128#21 +# asm 2: movdqa z11_stack=352(%esp) +movdqa %xmm6,352(%esp) + +# qhasm: uint32323232 mr += ms +# asm 1: paddd mu=int6464#7 +# asm 2: movdqa mu=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 mr >>= 14 +# asm 1: psrld $14,z15_stack=stack128#22 +# asm 2: movdqa z15_stack=368(%esp) +movdqa %xmm4,368(%esp) + +# qhasm: assign xmm0 to p + +# qhasm: assign xmm1 to r + +# qhasm: assign xmm2 to t + +# qhasm: assign xmm3 to q + +# qhasm: s = t +# asm 1: movdqa s=int6464#7 +# asm 2: movdqa s=%xmm6 +movdqa %xmm2,%xmm6 + +# qhasm: uint32323232 t += p +# asm 1: paddd u=int6464#5 +# asm 2: movdqa u=%xmm4 +movdqa %xmm2,%xmm4 + +# qhasm: uint32323232 t >>= 25 +# asm 1: psrld $25,z1_stack=stack128#28 +# asm 2: movdqa z1_stack=464(%esp) +movdqa %xmm3,464(%esp) + +# qhasm: t = p +# asm 1: movdqa t=int6464#3 +# asm 2: movdqa t=%xmm2 +movdqa %xmm0,%xmm2 + +# qhasm: uint32323232 t += q +# asm 1: paddd u=int6464#5 +# asm 2: movdqa u=%xmm4 +movdqa %xmm2,%xmm4 + +# qhasm: uint32323232 t >>= 23 +# asm 1: psrld $23,z2_stack=stack128#31 +# asm 2: movdqa z2_stack=512(%esp) +movdqa %xmm1,512(%esp) + +# qhasm: uint32323232 q += r +# asm 1: paddd u=int6464#3 +# asm 2: movdqa u=%xmm2 +movdqa %xmm3,%xmm2 + +# qhasm: uint32323232 q >>= 19 +# asm 1: psrld $19,mt=int6464#3 +# asm 2: movdqa mt=%xmm2 +movdqa 544(%esp),%xmm2 + +# qhasm: mp = z5_stack +# asm 1: movdqa mp=int6464#5 +# asm 2: movdqa mp=%xmm4 +movdqa 400(%esp),%xmm4 + +# qhasm: mq = z6_stack +# asm 1: movdqa mq=int6464#4 +# asm 2: movdqa mq=%xmm3 +movdqa 384(%esp),%xmm3 + +# qhasm: mr = z7_stack +# asm 1: movdqa mr=int6464#6 +# asm 2: movdqa mr=%xmm5 +movdqa 480(%esp),%xmm5 + +# qhasm: z3_stack = s +# asm 1: movdqa z3_stack=stack128#25 +# asm 2: movdqa z3_stack=416(%esp) +movdqa %xmm6,416(%esp) + +# qhasm: uint32323232 r += s +# asm 1: paddd u=int6464#7 +# asm 2: movdqa u=%xmm6 +movdqa %xmm1,%xmm6 + +# qhasm: uint32323232 r >>= 14 +# asm 1: psrld $14,z0_stack=stack128#33 +# asm 2: movdqa z0_stack=544(%esp) +movdqa %xmm0,544(%esp) + +# qhasm: assign xmm2 to mt + +# qhasm: assign xmm3 to mq + +# qhasm: assign xmm4 to mp + +# qhasm: assign xmm5 to mr + +# qhasm: ms = mt +# asm 1: movdqa ms=int6464#7 +# asm 2: movdqa ms=%xmm6 +movdqa %xmm2,%xmm6 + +# qhasm: uint32323232 mt += mp +# asm 1: paddd mu=int6464#1 +# asm 2: movdqa mu=%xmm0 +movdqa %xmm2,%xmm0 + +# qhasm: uint32323232 mt >>= 25 +# asm 1: psrld $25,z6_stack=stack128#26 +# asm 2: movdqa z6_stack=432(%esp) +movdqa %xmm3,432(%esp) + +# qhasm: mt = mp +# asm 1: movdqa mt=int6464#1 +# asm 2: movdqa mt=%xmm0 +movdqa %xmm4,%xmm0 + +# qhasm: uint32323232 mt += mq +# asm 1: paddd mu=int6464#2 +# asm 2: movdqa mu=%xmm1 +movdqa %xmm0,%xmm1 + +# qhasm: uint32323232 mt >>= 23 +# asm 1: psrld $23,z7_stack=stack128#29 +# asm 2: movdqa z7_stack=480(%esp) +movdqa %xmm5,480(%esp) + +# qhasm: uint32323232 mq += mr +# asm 1: paddd mu=int6464#1 +# asm 2: movdqa mu=%xmm0 +movdqa %xmm3,%xmm0 + +# qhasm: uint32323232 mq >>= 19 +# asm 1: psrld $19,t=int6464#3 +# asm 2: movdqa t=%xmm2 +movdqa 528(%esp),%xmm2 + +# qhasm: p = z10_stack +# asm 1: movdqa p=int6464#1 +# asm 2: movdqa p=%xmm0 +movdqa 448(%esp),%xmm0 + +# qhasm: q = z11_stack +# asm 1: movdqa q=int6464#4 +# asm 2: movdqa q=%xmm3 +movdqa 352(%esp),%xmm3 + +# qhasm: r = z8_stack +# asm 1: movdqa r=int6464#2 +# asm 2: movdqa r=%xmm1 +movdqa 560(%esp),%xmm1 + +# qhasm: z4_stack = ms +# asm 1: movdqa z4_stack=stack128#34 +# asm 2: movdqa z4_stack=560(%esp) +movdqa %xmm6,560(%esp) + +# qhasm: uint32323232 mr += ms +# asm 1: paddd mu=int6464#7 +# asm 2: movdqa mu=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 mr >>= 14 +# asm 1: psrld $14,z5_stack=stack128#21 +# asm 2: movdqa z5_stack=352(%esp) +movdqa %xmm4,352(%esp) + +# qhasm: assign xmm0 to p + +# qhasm: assign xmm1 to r + +# qhasm: assign xmm2 to t + +# qhasm: assign xmm3 to q + +# qhasm: s = t +# asm 1: movdqa s=int6464#7 +# asm 2: movdqa s=%xmm6 +movdqa %xmm2,%xmm6 + +# qhasm: uint32323232 t += p +# asm 1: paddd u=int6464#5 +# asm 2: movdqa u=%xmm4 +movdqa %xmm2,%xmm4 + +# qhasm: uint32323232 t >>= 25 +# asm 1: psrld $25,z11_stack=stack128#27 +# asm 2: movdqa z11_stack=448(%esp) +movdqa %xmm3,448(%esp) + +# qhasm: t = p +# asm 1: movdqa t=int6464#3 +# asm 2: movdqa t=%xmm2 +movdqa %xmm0,%xmm2 + +# qhasm: uint32323232 t += q +# asm 1: paddd u=int6464#5 +# asm 2: movdqa u=%xmm4 +movdqa %xmm2,%xmm4 + +# qhasm: uint32323232 t >>= 23 +# asm 1: psrld $23,z8_stack=stack128#37 +# asm 2: movdqa z8_stack=608(%esp) +movdqa %xmm1,608(%esp) + +# qhasm: uint32323232 q += r +# asm 1: paddd u=int6464#3 +# asm 2: movdqa u=%xmm2 +movdqa %xmm3,%xmm2 + +# qhasm: uint32323232 q >>= 19 +# asm 1: psrld $19,mt=int6464#3 +# asm 2: movdqa mt=%xmm2 +movdqa 592(%esp),%xmm2 + +# qhasm: mp = z15_stack +# asm 1: movdqa mp=int6464#5 +# asm 2: movdqa mp=%xmm4 +movdqa 368(%esp),%xmm4 + +# qhasm: mq = z12_stack +# asm 1: movdqa mq=int6464#4 +# asm 2: movdqa mq=%xmm3 +movdqa 496(%esp),%xmm3 + +# qhasm: mr = z13_stack +# asm 1: movdqa mr=int6464#6 +# asm 2: movdqa mr=%xmm5 +movdqa 576(%esp),%xmm5 + +# qhasm: z9_stack = s +# asm 1: movdqa z9_stack=stack128#32 +# asm 2: movdqa z9_stack=528(%esp) +movdqa %xmm6,528(%esp) + +# qhasm: uint32323232 r += s +# asm 1: paddd u=int6464#7 +# asm 2: movdqa u=%xmm6 +movdqa %xmm1,%xmm6 + +# qhasm: uint32323232 r >>= 14 +# asm 1: psrld $14,z10_stack=stack128#22 +# asm 2: movdqa z10_stack=368(%esp) +movdqa %xmm0,368(%esp) + +# qhasm: assign xmm2 to mt + +# qhasm: assign xmm3 to mq + +# qhasm: assign xmm4 to mp + +# qhasm: assign xmm5 to mr + +# qhasm: ms = mt +# asm 1: movdqa ms=int6464#7 +# asm 2: movdqa ms=%xmm6 +movdqa %xmm2,%xmm6 + +# qhasm: uint32323232 mt += mp +# asm 1: paddd mu=int6464#1 +# asm 2: movdqa mu=%xmm0 +movdqa %xmm2,%xmm0 + +# qhasm: uint32323232 mt >>= 25 +# asm 1: psrld $25,z12_stack=stack128#35 +# asm 2: movdqa z12_stack=576(%esp) +movdqa %xmm3,576(%esp) + +# qhasm: mt = mp +# asm 1: movdqa mt=int6464#1 +# asm 2: movdqa mt=%xmm0 +movdqa %xmm4,%xmm0 + +# qhasm: uint32323232 mt += mq +# asm 1: paddd mu=int6464#2 +# asm 2: movdqa mu=%xmm1 +movdqa %xmm0,%xmm1 + +# qhasm: uint32323232 mt >>= 23 +# asm 1: psrld $23,z13_stack=stack128#30 +# asm 2: movdqa z13_stack=496(%esp) +movdqa %xmm5,496(%esp) + +# qhasm: uint32323232 mq += mr +# asm 1: paddd mu=int6464#1 +# asm 2: movdqa mu=%xmm0 +movdqa %xmm3,%xmm0 + +# qhasm: uint32323232 mq >>= 19 +# asm 1: psrld $19,t=int6464#3 +# asm 2: movdqa t=%xmm2 +movdqa 576(%esp),%xmm2 + +# qhasm: p = z0_stack +# asm 1: movdqa p=int6464#1 +# asm 2: movdqa p=%xmm0 +movdqa 544(%esp),%xmm0 + +# qhasm: q = z4_stack +# asm 1: movdqa q=int6464#4 +# asm 2: movdqa q=%xmm3 +movdqa 560(%esp),%xmm3 + +# qhasm: r = z8_stack +# asm 1: movdqa r=int6464#2 +# asm 2: movdqa r=%xmm1 +movdqa 608(%esp),%xmm1 + +# qhasm: z14_stack = ms +# asm 1: movdqa z14_stack=stack128#24 +# asm 2: movdqa z14_stack=400(%esp) +movdqa %xmm6,400(%esp) + +# qhasm: uint32323232 mr += ms +# asm 1: paddd mu=int6464#7 +# asm 2: movdqa mu=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 mr >>= 14 +# asm 1: psrld $14,z15_stack=stack128#23 +# asm 2: movdqa z15_stack=384(%esp) +movdqa %xmm4,384(%esp) + +# qhasm: unsigned>? i -= 2 +# asm 1: sub $2, +ja ._mainloop1 + +# qhasm: out = out_stack +# asm 1: movl out=int32#6 +# asm 2: movl out=%edi +movl 20(%esp),%edi + +# qhasm: z0 = z0_stack +# asm 1: movdqa z0=int6464#1 +# asm 2: movdqa z0=%xmm0 +movdqa 544(%esp),%xmm0 + +# qhasm: z1 = z1_stack +# asm 1: movdqa z1=int6464#2 +# asm 2: movdqa z1=%xmm1 +movdqa 464(%esp),%xmm1 + +# qhasm: z2 = z2_stack +# asm 1: movdqa z2=int6464#3 +# asm 2: movdqa z2=%xmm2 +movdqa 512(%esp),%xmm2 + +# qhasm: z3 = z3_stack +# asm 1: movdqa z3=int6464#4 +# asm 2: movdqa z3=%xmm3 +movdqa 416(%esp),%xmm3 + +# qhasm: uint32323232 z0 += orig0 +# asm 1: paddd in0=int32#1 +# asm 2: movd in0=%eax +movd %xmm0,%eax + +# qhasm: in1 = z1 +# asm 1: movd in1=int32#2 +# asm 2: movd in1=%ecx +movd %xmm1,%ecx + +# qhasm: in2 = z2 +# asm 1: movd in2=int32#3 +# asm 2: movd in2=%edx +movd %xmm2,%edx + +# qhasm: in3 = z3 +# asm 1: movd in3=int32#4 +# asm 2: movd in3=%ebx +movd %xmm3,%ebx + +# qhasm: z0 <<<= 96 +# asm 1: pshufd $0x39,in0=int32#1 +# asm 2: movd in0=%eax +movd %xmm0,%eax + +# qhasm: in1 = z1 +# asm 1: movd in1=int32#2 +# asm 2: movd in1=%ecx +movd %xmm1,%ecx + +# qhasm: in2 = z2 +# asm 1: movd in2=int32#3 +# asm 2: movd in2=%edx +movd %xmm2,%edx + +# qhasm: in3 = z3 +# asm 1: movd in3=int32#4 +# asm 2: movd in3=%ebx +movd %xmm3,%ebx + +# qhasm: z0 <<<= 96 +# asm 1: pshufd $0x39,in0=int32#1 +# asm 2: movd in0=%eax +movd %xmm0,%eax + +# qhasm: in1 = z1 +# asm 1: movd in1=int32#2 +# asm 2: movd in1=%ecx +movd %xmm1,%ecx + +# qhasm: in2 = z2 +# asm 1: movd in2=int32#3 +# asm 2: movd in2=%edx +movd %xmm2,%edx + +# qhasm: in3 = z3 +# asm 1: movd in3=int32#4 +# asm 2: movd in3=%ebx +movd %xmm3,%ebx + +# qhasm: z0 <<<= 96 +# asm 1: pshufd $0x39,in0=int32#1 +# asm 2: movd in0=%eax +movd %xmm0,%eax + +# qhasm: in1 = z1 +# asm 1: movd in1=int32#2 +# asm 2: movd in1=%ecx +movd %xmm1,%ecx + +# qhasm: in2 = z2 +# asm 1: movd in2=int32#3 +# asm 2: movd in2=%edx +movd %xmm2,%edx + +# qhasm: in3 = z3 +# asm 1: movd in3=int32#4 +# asm 2: movd in3=%ebx +movd %xmm3,%ebx + +# qhasm: in0 ^= *(uint32 *) (m + 192) +# asm 1: xorl 192(z4=int6464#1 +# asm 2: movdqa z4=%xmm0 +movdqa 560(%esp),%xmm0 + +# qhasm: z5 = z5_stack +# asm 1: movdqa z5=int6464#2 +# asm 2: movdqa z5=%xmm1 +movdqa 352(%esp),%xmm1 + +# qhasm: z6 = z6_stack +# asm 1: movdqa z6=int6464#3 +# asm 2: movdqa z6=%xmm2 +movdqa 432(%esp),%xmm2 + +# qhasm: z7 = z7_stack +# asm 1: movdqa z7=int6464#4 +# asm 2: movdqa z7=%xmm3 +movdqa 480(%esp),%xmm3 + +# qhasm: uint32323232 z4 += orig4 +# asm 1: paddd in4=int32#1 +# asm 2: movd in4=%eax +movd %xmm0,%eax + +# qhasm: in5 = z5 +# asm 1: movd in5=int32#2 +# asm 2: movd in5=%ecx +movd %xmm1,%ecx + +# qhasm: in6 = z6 +# asm 1: movd in6=int32#3 +# asm 2: movd in6=%edx +movd %xmm2,%edx + +# qhasm: in7 = z7 +# asm 1: movd in7=int32#4 +# asm 2: movd in7=%ebx +movd %xmm3,%ebx + +# qhasm: z4 <<<= 96 +# asm 1: pshufd $0x39,in4=int32#1 +# asm 2: movd in4=%eax +movd %xmm0,%eax + +# qhasm: in5 = z5 +# asm 1: movd in5=int32#2 +# asm 2: movd in5=%ecx +movd %xmm1,%ecx + +# qhasm: in6 = z6 +# asm 1: movd in6=int32#3 +# asm 2: movd in6=%edx +movd %xmm2,%edx + +# qhasm: in7 = z7 +# asm 1: movd in7=int32#4 +# asm 2: movd in7=%ebx +movd %xmm3,%ebx + +# qhasm: z4 <<<= 96 +# asm 1: pshufd $0x39,in4=int32#1 +# asm 2: movd in4=%eax +movd %xmm0,%eax + +# qhasm: in5 = z5 +# asm 1: movd in5=int32#2 +# asm 2: movd in5=%ecx +movd %xmm1,%ecx + +# qhasm: in6 = z6 +# asm 1: movd in6=int32#3 +# asm 2: movd in6=%edx +movd %xmm2,%edx + +# qhasm: in7 = z7 +# asm 1: movd in7=int32#4 +# asm 2: movd in7=%ebx +movd %xmm3,%ebx + +# qhasm: z4 <<<= 96 +# asm 1: pshufd $0x39,in4=int32#1 +# asm 2: movd in4=%eax +movd %xmm0,%eax + +# qhasm: in5 = z5 +# asm 1: movd in5=int32#2 +# asm 2: movd in5=%ecx +movd %xmm1,%ecx + +# qhasm: in6 = z6 +# asm 1: movd in6=int32#3 +# asm 2: movd in6=%edx +movd %xmm2,%edx + +# qhasm: in7 = z7 +# asm 1: movd in7=int32#4 +# asm 2: movd in7=%ebx +movd %xmm3,%ebx + +# qhasm: in4 ^= *(uint32 *) (m + 208) +# asm 1: xorl 208(z8=int6464#1 +# asm 2: movdqa z8=%xmm0 +movdqa 608(%esp),%xmm0 + +# qhasm: z9 = z9_stack +# asm 1: movdqa z9=int6464#2 +# asm 2: movdqa z9=%xmm1 +movdqa 528(%esp),%xmm1 + +# qhasm: z10 = z10_stack +# asm 1: movdqa z10=int6464#3 +# asm 2: movdqa z10=%xmm2 +movdqa 368(%esp),%xmm2 + +# qhasm: z11 = z11_stack +# asm 1: movdqa z11=int6464#4 +# asm 2: movdqa z11=%xmm3 +movdqa 448(%esp),%xmm3 + +# qhasm: uint32323232 z8 += orig8 +# asm 1: paddd in8=int32#1 +# asm 2: movd in8=%eax +movd %xmm0,%eax + +# qhasm: in9 = z9 +# asm 1: movd in9=int32#2 +# asm 2: movd in9=%ecx +movd %xmm1,%ecx + +# qhasm: in10 = z10 +# asm 1: movd in10=int32#3 +# asm 2: movd in10=%edx +movd %xmm2,%edx + +# qhasm: in11 = z11 +# asm 1: movd in11=int32#4 +# asm 2: movd in11=%ebx +movd %xmm3,%ebx + +# qhasm: z8 <<<= 96 +# asm 1: pshufd $0x39,in8=int32#1 +# asm 2: movd in8=%eax +movd %xmm0,%eax + +# qhasm: in9 = z9 +# asm 1: movd in9=int32#2 +# asm 2: movd in9=%ecx +movd %xmm1,%ecx + +# qhasm: in10 = z10 +# asm 1: movd in10=int32#3 +# asm 2: movd in10=%edx +movd %xmm2,%edx + +# qhasm: in11 = z11 +# asm 1: movd in11=int32#4 +# asm 2: movd in11=%ebx +movd %xmm3,%ebx + +# qhasm: z8 <<<= 96 +# asm 1: pshufd $0x39,in8=int32#1 +# asm 2: movd in8=%eax +movd %xmm0,%eax + +# qhasm: in9 = z9 +# asm 1: movd in9=int32#2 +# asm 2: movd in9=%ecx +movd %xmm1,%ecx + +# qhasm: in10 = z10 +# asm 1: movd in10=int32#3 +# asm 2: movd in10=%edx +movd %xmm2,%edx + +# qhasm: in11 = z11 +# asm 1: movd in11=int32#4 +# asm 2: movd in11=%ebx +movd %xmm3,%ebx + +# qhasm: z8 <<<= 96 +# asm 1: pshufd $0x39,in8=int32#1 +# asm 2: movd in8=%eax +movd %xmm0,%eax + +# qhasm: in9 = z9 +# asm 1: movd in9=int32#2 +# asm 2: movd in9=%ecx +movd %xmm1,%ecx + +# qhasm: in10 = z10 +# asm 1: movd in10=int32#3 +# asm 2: movd in10=%edx +movd %xmm2,%edx + +# qhasm: in11 = z11 +# asm 1: movd in11=int32#4 +# asm 2: movd in11=%ebx +movd %xmm3,%ebx + +# qhasm: in8 ^= *(uint32 *) (m + 224) +# asm 1: xorl 224(z12=int6464#1 +# asm 2: movdqa z12=%xmm0 +movdqa 576(%esp),%xmm0 + +# qhasm: z13 = z13_stack +# asm 1: movdqa z13=int6464#2 +# asm 2: movdqa z13=%xmm1 +movdqa 496(%esp),%xmm1 + +# qhasm: z14 = z14_stack +# asm 1: movdqa z14=int6464#3 +# asm 2: movdqa z14=%xmm2 +movdqa 400(%esp),%xmm2 + +# qhasm: z15 = z15_stack +# asm 1: movdqa z15=int6464#4 +# asm 2: movdqa z15=%xmm3 +movdqa 384(%esp),%xmm3 + +# qhasm: uint32323232 z12 += orig12 +# asm 1: paddd in12=int32#1 +# asm 2: movd in12=%eax +movd %xmm0,%eax + +# qhasm: in13 = z13 +# asm 1: movd in13=int32#2 +# asm 2: movd in13=%ecx +movd %xmm1,%ecx + +# qhasm: in14 = z14 +# asm 1: movd in14=int32#3 +# asm 2: movd in14=%edx +movd %xmm2,%edx + +# qhasm: in15 = z15 +# asm 1: movd in15=int32#4 +# asm 2: movd in15=%ebx +movd %xmm3,%ebx + +# qhasm: z12 <<<= 96 +# asm 1: pshufd $0x39,in12=int32#1 +# asm 2: movd in12=%eax +movd %xmm0,%eax + +# qhasm: in13 = z13 +# asm 1: movd in13=int32#2 +# asm 2: movd in13=%ecx +movd %xmm1,%ecx + +# qhasm: in14 = z14 +# asm 1: movd in14=int32#3 +# asm 2: movd in14=%edx +movd %xmm2,%edx + +# qhasm: in15 = z15 +# asm 1: movd in15=int32#4 +# asm 2: movd in15=%ebx +movd %xmm3,%ebx + +# qhasm: z12 <<<= 96 +# asm 1: pshufd $0x39,in12=int32#1 +# asm 2: movd in12=%eax +movd %xmm0,%eax + +# qhasm: in13 = z13 +# asm 1: movd in13=int32#2 +# asm 2: movd in13=%ecx +movd %xmm1,%ecx + +# qhasm: in14 = z14 +# asm 1: movd in14=int32#3 +# asm 2: movd in14=%edx +movd %xmm2,%edx + +# qhasm: in15 = z15 +# asm 1: movd in15=int32#4 +# asm 2: movd in15=%ebx +movd %xmm3,%ebx + +# qhasm: z12 <<<= 96 +# asm 1: pshufd $0x39,in12=int32#1 +# asm 2: movd in12=%eax +movd %xmm0,%eax + +# qhasm: in13 = z13 +# asm 1: movd in13=int32#2 +# asm 2: movd in13=%ecx +movd %xmm1,%ecx + +# qhasm: in14 = z14 +# asm 1: movd in14=int32#3 +# asm 2: movd in14=%edx +movd %xmm2,%edx + +# qhasm: in15 = z15 +# asm 1: movd in15=int32#4 +# asm 2: movd in15=%ebx +movd %xmm3,%ebx + +# qhasm: in12 ^= *(uint32 *) (m + 240) +# asm 1: xorl 240(bytes=int32#1 +# asm 2: movl bytes=%eax +movl 24(%esp),%eax + +# qhasm: bytes -= 256 +# asm 1: sub $256,out_stack=stack32#6 +# asm 2: movl out_stack=20(%esp) +movl %edi,20(%esp) + +# qhasm: unsigned? bytes - 0 +# asm 1: cmp $0, +jbe ._done +# comment:fp stack unchanged by fallthrough + +# qhasm: bytesbetween1and255: +._bytesbetween1and255: + +# qhasm: unsignedctarget=stack32#6 +# asm 2: movl ctarget=20(%esp) +movl %edi,20(%esp) + +# qhasm: out = &tmp +# asm 1: leal out=int32#6 +# asm 2: leal out=%edi +leal 640(%esp),%edi + +# qhasm: i = bytes +# asm 1: mov i=int32#2 +# asm 2: mov i=%ecx +mov %eax,%ecx + +# qhasm: while (i) { *out++ = *m++; --i } +rep movsb + +# qhasm: out = &tmp +# asm 1: leal out=int32#6 +# asm 2: leal out=%edi +leal 640(%esp),%edi + +# qhasm: m = &tmp +# asm 1: leal m=int32#5 +# asm 2: leal m=%esi +leal 640(%esp),%esi +# comment:fp stack unchanged by fallthrough + +# qhasm: nocopy: +._nocopy: + +# qhasm: bytes_stack = bytes +# asm 1: movl bytes_stack=stack32#7 +# asm 2: movl bytes_stack=24(%esp) +movl %eax,24(%esp) + +# qhasm: diag0 = x0 +# asm 1: movdqa diag0=int6464#1 +# asm 2: movdqa diag0=%xmm0 +movdqa 64(%esp),%xmm0 + +# qhasm: diag1 = x1 +# asm 1: movdqa diag1=int6464#2 +# asm 2: movdqa diag1=%xmm1 +movdqa 48(%esp),%xmm1 + +# qhasm: diag2 = x2 +# asm 1: movdqa diag2=int6464#3 +# asm 2: movdqa diag2=%xmm2 +movdqa 80(%esp),%xmm2 + +# qhasm: diag3 = x3 +# asm 1: movdqa diag3=int6464#4 +# asm 2: movdqa diag3=%xmm3 +movdqa 32(%esp),%xmm3 + +# qhasm: a0 = diag1 +# asm 1: movdqa a0=int6464#5 +# asm 2: movdqa a0=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: i = 12 +# asm 1: mov $12,>i=int32#1 +# asm 2: mov $12,>i=%eax +mov $12,%eax + +# qhasm: mainloop2: +._mainloop2: + +# qhasm: uint32323232 a0 += diag0 +# asm 1: paddd a1=int6464#6 +# asm 2: movdqa a1=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: b0 = a0 +# asm 1: movdqa b0=int6464#7 +# asm 2: movdqa b0=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a0 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,a2=int6464#5 +# asm 2: movdqa a2=%xmm4 +movdqa %xmm3,%xmm4 + +# qhasm: b1 = a1 +# asm 1: movdqa b1=int6464#7 +# asm 2: movdqa b1=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a1 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,a3=int6464#6 +# asm 2: movdqa a3=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: b2 = a2 +# asm 1: movdqa b2=int6464#7 +# asm 2: movdqa b2=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a2 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,a4=int6464#5 +# asm 2: movdqa a4=%xmm4 +movdqa %xmm3,%xmm4 + +# qhasm: b3 = a3 +# asm 1: movdqa b3=int6464#7 +# asm 2: movdqa b3=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a3 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,a5=int6464#6 +# asm 2: movdqa a5=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: b4 = a4 +# asm 1: movdqa b4=int6464#7 +# asm 2: movdqa b4=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a4 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,a6=int6464#5 +# asm 2: movdqa a6=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: b5 = a5 +# asm 1: movdqa b5=int6464#7 +# asm 2: movdqa b5=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a5 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,a7=int6464#6 +# asm 2: movdqa a7=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: b6 = a6 +# asm 1: movdqa b6=int6464#7 +# asm 2: movdqa b6=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a6 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,a0=int6464#5 +# asm 2: movdqa a0=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: b7 = a7 +# asm 1: movdqa b7=int6464#7 +# asm 2: movdqa b7=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a7 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,a1=int6464#6 +# asm 2: movdqa a1=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: b0 = a0 +# asm 1: movdqa b0=int6464#7 +# asm 2: movdqa b0=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a0 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,a2=int6464#5 +# asm 2: movdqa a2=%xmm4 +movdqa %xmm3,%xmm4 + +# qhasm: b1 = a1 +# asm 1: movdqa b1=int6464#7 +# asm 2: movdqa b1=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a1 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,a3=int6464#6 +# asm 2: movdqa a3=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: b2 = a2 +# asm 1: movdqa b2=int6464#7 +# asm 2: movdqa b2=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a2 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,a4=int6464#5 +# asm 2: movdqa a4=%xmm4 +movdqa %xmm3,%xmm4 + +# qhasm: b3 = a3 +# asm 1: movdqa b3=int6464#7 +# asm 2: movdqa b3=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a3 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,a5=int6464#6 +# asm 2: movdqa a5=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: b4 = a4 +# asm 1: movdqa b4=int6464#7 +# asm 2: movdqa b4=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a4 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,a6=int6464#5 +# asm 2: movdqa a6=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: b5 = a5 +# asm 1: movdqa b5=int6464#7 +# asm 2: movdqa b5=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a5 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,a7=int6464#6 +# asm 2: movdqa a7=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: b6 = a6 +# asm 1: movdqa b6=int6464#7 +# asm 2: movdqa b6=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a6 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,? i -= 4 +# asm 1: sub $4,a0=int6464#5 +# asm 2: movdqa a0=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: b7 = a7 +# asm 1: movdqa b7=int6464#7 +# asm 2: movdqa b7=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a7 <<= 18 +# asm 1: pslld $18,b0=int6464#8,>b0=int6464#8 +# asm 2: pxor >b0=%xmm7,>b0=%xmm7 +pxor %xmm7,%xmm7 + +# qhasm: uint32323232 b7 >>= 14 +# asm 1: psrld $14, +ja ._mainloop2 + +# qhasm: uint32323232 diag0 += x0 +# asm 1: paddd in0=int32#1 +# asm 2: movd in0=%eax +movd %xmm0,%eax + +# qhasm: in12 = diag1 +# asm 1: movd in12=int32#2 +# asm 2: movd in12=%ecx +movd %xmm1,%ecx + +# qhasm: in8 = diag2 +# asm 1: movd in8=int32#3 +# asm 2: movd in8=%edx +movd %xmm2,%edx + +# qhasm: in4 = diag3 +# asm 1: movd in4=int32#4 +# asm 2: movd in4=%ebx +movd %xmm3,%ebx + +# qhasm: diag0 <<<= 96 +# asm 1: pshufd $0x39,in5=int32#1 +# asm 2: movd in5=%eax +movd %xmm0,%eax + +# qhasm: in1 = diag1 +# asm 1: movd in1=int32#2 +# asm 2: movd in1=%ecx +movd %xmm1,%ecx + +# qhasm: in13 = diag2 +# asm 1: movd in13=int32#3 +# asm 2: movd in13=%edx +movd %xmm2,%edx + +# qhasm: in9 = diag3 +# asm 1: movd in9=int32#4 +# asm 2: movd in9=%ebx +movd %xmm3,%ebx + +# qhasm: diag0 <<<= 96 +# asm 1: pshufd $0x39,in10=int32#1 +# asm 2: movd in10=%eax +movd %xmm0,%eax + +# qhasm: in6 = diag1 +# asm 1: movd in6=int32#2 +# asm 2: movd in6=%ecx +movd %xmm1,%ecx + +# qhasm: in2 = diag2 +# asm 1: movd in2=int32#3 +# asm 2: movd in2=%edx +movd %xmm2,%edx + +# qhasm: in14 = diag3 +# asm 1: movd in14=int32#4 +# asm 2: movd in14=%ebx +movd %xmm3,%ebx + +# qhasm: diag0 <<<= 96 +# asm 1: pshufd $0x39,in15=int32#1 +# asm 2: movd in15=%eax +movd %xmm0,%eax + +# qhasm: in11 = diag1 +# asm 1: movd in11=int32#2 +# asm 2: movd in11=%ecx +movd %xmm1,%ecx + +# qhasm: in7 = diag2 +# asm 1: movd in7=int32#3 +# asm 2: movd in7=%edx +movd %xmm2,%edx + +# qhasm: in3 = diag3 +# asm 1: movd in3=int32#4 +# asm 2: movd in3=%ebx +movd %xmm3,%ebx + +# qhasm: in15 ^= *(uint32 *) (m + 60) +# asm 1: xorl 60(bytes=int32#1 +# asm 2: movl bytes=%eax +movl 24(%esp),%eax + +# qhasm: in8 = ((uint32 *)&x2)[0] +# asm 1: movl in8=int32#2 +# asm 2: movl in8=%ecx +movl 80(%esp),%ecx + +# qhasm: in9 = ((uint32 *)&x3)[1] +# asm 1: movl 4+in9=int32#3 +# asm 2: movl 4+in9=%edx +movl 4+32(%esp),%edx + +# qhasm: carry? in8 += 1 +# asm 1: add $1,x2=stack128#4 +# asm 2: movl x2=80(%esp) +movl %ecx,80(%esp) + +# qhasm: ((uint32 *)&x3)[1] = in9 +# asm 1: movl ? unsigned +ja ._bytesatleast65 +# comment:fp stack unchanged by jump + +# qhasm: goto bytesatleast64 if !unsigned< +jae ._bytesatleast64 + +# qhasm: m = out +# asm 1: mov m=int32#5 +# asm 2: mov m=%esi +mov %edi,%esi + +# qhasm: out = ctarget +# asm 1: movl out=int32#6 +# asm 2: movl out=%edi +movl 20(%esp),%edi + +# qhasm: i = bytes +# asm 1: mov i=int32#2 +# asm 2: mov i=%ecx +mov %eax,%ecx + +# qhasm: while (i) { *out++ = *m++; --i } +rep movsb +# comment:fp stack unchanged by fallthrough + +# qhasm: bytesatleast64: +._bytesatleast64: +# comment:fp stack unchanged by fallthrough + +# qhasm: done: +._done: + +# qhasm: eax = eax_stack +# asm 1: movl eax=int32#1 +# asm 2: movl eax=%eax +movl 0(%esp),%eax + +# qhasm: ebx = ebx_stack +# asm 1: movl ebx=int32#4 +# asm 2: movl ebx=%ebx +movl 4(%esp),%ebx + +# qhasm: esi = esi_stack +# asm 1: movl esi=int32#5 +# asm 2: movl esi=%esi +movl 8(%esp),%esi + +# qhasm: edi = edi_stack +# asm 1: movl edi=int32#6 +# asm 2: movl edi=%edi +movl 12(%esp),%edi + +# qhasm: ebp = ebp_stack +# asm 1: movl ebp=int32#7 +# asm 2: movl ebp=%ebp +movl 16(%esp),%ebp + +# qhasm: leave +add %eax,%esp +xor %eax,%eax +ret + +# qhasm: bytesatleast65: +._bytesatleast65: + +# qhasm: bytes -= 64 +# asm 1: sub $64,r11_stack=stack64#1 +# asm 2: movq r11_stack=352(%rsp) +movq %r11,352(%rsp) + +# qhasm: r12_stack = r12_caller +# asm 1: movq r12_stack=stack64#2 +# asm 2: movq r12_stack=360(%rsp) +movq %r12,360(%rsp) + +# qhasm: r13_stack = r13_caller +# asm 1: movq r13_stack=stack64#3 +# asm 2: movq r13_stack=368(%rsp) +movq %r13,368(%rsp) + +# qhasm: r14_stack = r14_caller +# asm 1: movq r14_stack=stack64#4 +# asm 2: movq r14_stack=376(%rsp) +movq %r14,376(%rsp) + +# qhasm: r15_stack = r15_caller +# asm 1: movq r15_stack=stack64#5 +# asm 2: movq r15_stack=384(%rsp) +movq %r15,384(%rsp) + +# qhasm: rbx_stack = rbx_caller +# asm 1: movq rbx_stack=stack64#6 +# asm 2: movq rbx_stack=392(%rsp) +movq %rbx,392(%rsp) + +# qhasm: rbp_stack = rbp_caller +# asm 1: movq rbp_stack=stack64#7 +# asm 2: movq rbp_stack=400(%rsp) +movq %rbp,400(%rsp) + +# qhasm: bytes = arg2 +# asm 1: mov bytes=int64#6 +# asm 2: mov bytes=%r9 +mov %rsi,%r9 + +# qhasm: out = arg1 +# asm 1: mov out=int64#1 +# asm 2: mov out=%rdi +mov %rdi,%rdi + +# qhasm: m = out +# asm 1: mov m=int64#2 +# asm 2: mov m=%rsi +mov %rdi,%rsi + +# qhasm: iv = arg3 +# asm 1: mov iv=int64#3 +# asm 2: mov iv=%rdx +mov %rdx,%rdx + +# qhasm: k = arg4 +# asm 1: mov k=int64#8 +# asm 2: mov k=%r10 +mov %rcx,%r10 + +# qhasm: unsigned>? bytes - 0 +# asm 1: cmp $0, +jbe ._done + +# qhasm: a = 0 +# asm 1: mov $0,>a=int64#7 +# asm 2: mov $0,>a=%rax +mov $0,%rax + +# qhasm: i = bytes +# asm 1: mov i=int64#4 +# asm 2: mov i=%rcx +mov %r9,%rcx + +# qhasm: while (i) { *out++ = a; --i } +rep stosb + +# qhasm: out -= bytes +# asm 1: sub r11_stack=stack64#1 +# asm 2: movq r11_stack=352(%rsp) +movq %r11,352(%rsp) + +# qhasm: r12_stack = r12_caller +# asm 1: movq r12_stack=stack64#2 +# asm 2: movq r12_stack=360(%rsp) +movq %r12,360(%rsp) + +# qhasm: r13_stack = r13_caller +# asm 1: movq r13_stack=stack64#3 +# asm 2: movq r13_stack=368(%rsp) +movq %r13,368(%rsp) + +# qhasm: r14_stack = r14_caller +# asm 1: movq r14_stack=stack64#4 +# asm 2: movq r14_stack=376(%rsp) +movq %r14,376(%rsp) + +# qhasm: r15_stack = r15_caller +# asm 1: movq r15_stack=stack64#5 +# asm 2: movq r15_stack=384(%rsp) +movq %r15,384(%rsp) + +# qhasm: rbx_stack = rbx_caller +# asm 1: movq rbx_stack=stack64#6 +# asm 2: movq rbx_stack=392(%rsp) +movq %rbx,392(%rsp) + +# qhasm: rbp_stack = rbp_caller +# asm 1: movq rbp_stack=stack64#7 +# asm 2: movq rbp_stack=400(%rsp) +movq %rbp,400(%rsp) + +# qhasm: out = arg1 +# asm 1: mov out=int64#1 +# asm 2: mov out=%rdi +mov %rdi,%rdi + +# qhasm: m = arg2 +# asm 1: mov m=int64#2 +# asm 2: mov m=%rsi +mov %rsi,%rsi + +# qhasm: bytes = arg3 +# asm 1: mov bytes=int64#6 +# asm 2: mov bytes=%r9 +mov %rdx,%r9 + +# qhasm: iv = arg4 +# asm 1: mov iv=int64#3 +# asm 2: mov iv=%rdx +mov %rcx,%rdx + +# qhasm: k = arg5 +# asm 1: mov k=int64#8 +# asm 2: mov k=%r10 +mov %r8,%r10 + +# qhasm: unsigned>? bytes - 0 +# asm 1: cmp $0, +jbe ._done +# comment:fp stack unchanged by fallthrough + +# qhasm: start: +._start: + +# qhasm: in12 = *(uint32 *) (k + 20) +# asm 1: movl 20(in12=int64#4d +# asm 2: movl 20(in12=%ecx +movl 20(%r10),%ecx + +# qhasm: in1 = *(uint32 *) (k + 0) +# asm 1: movl 0(in1=int64#5d +# asm 2: movl 0(in1=%r8d +movl 0(%r10),%r8d + +# qhasm: in6 = *(uint32 *) (iv + 0) +# asm 1: movl 0(in6=int64#7d +# asm 2: movl 0(in6=%eax +movl 0(%rdx),%eax + +# qhasm: in11 = *(uint32 *) (k + 16) +# asm 1: movl 16(in11=int64#9d +# asm 2: movl 16(in11=%r11d +movl 16(%r10),%r11d + +# qhasm: ((uint32 *)&x1)[0] = in12 +# asm 1: movl x1=stack128#1 +# asm 2: movl x1=0(%rsp) +movl %ecx,0(%rsp) + +# qhasm: ((uint32 *)&x1)[1] = in1 +# asm 1: movl in8=int64#4 +# asm 2: mov $0,>in8=%rcx +mov $0,%rcx + +# qhasm: in13 = *(uint32 *) (k + 24) +# asm 1: movl 24(in13=int64#5d +# asm 2: movl 24(in13=%r8d +movl 24(%r10),%r8d + +# qhasm: in2 = *(uint32 *) (k + 4) +# asm 1: movl 4(in2=int64#7d +# asm 2: movl 4(in2=%eax +movl 4(%r10),%eax + +# qhasm: in7 = *(uint32 *) (iv + 4) +# asm 1: movl 4(in7=int64#3d +# asm 2: movl 4(in7=%edx +movl 4(%rdx),%edx + +# qhasm: ((uint32 *)&x2)[0] = in8 +# asm 1: movl x2=stack128#2 +# asm 2: movl x2=16(%rsp) +movl %ecx,16(%rsp) + +# qhasm: ((uint32 *)&x2)[1] = in13 +# asm 1: movl in4=int64#3d +# asm 2: movl 12(in4=%edx +movl 12(%r10),%edx + +# qhasm: in9 = 0 +# asm 1: mov $0,>in9=int64#4 +# asm 2: mov $0,>in9=%rcx +mov $0,%rcx + +# qhasm: in14 = *(uint32 *) (k + 28) +# asm 1: movl 28(in14=int64#5d +# asm 2: movl 28(in14=%r8d +movl 28(%r10),%r8d + +# qhasm: in3 = *(uint32 *) (k + 8) +# asm 1: movl 8(in3=int64#7d +# asm 2: movl 8(in3=%eax +movl 8(%r10),%eax + +# qhasm: ((uint32 *)&x3)[0] = in4 +# asm 1: movl x3=stack128#3 +# asm 2: movl x3=32(%rsp) +movl %edx,32(%rsp) + +# qhasm: ((uint32 *)&x3)[1] = in9 +# asm 1: movl in0=int64#3 +# asm 2: mov $1634760805,>in0=%rdx +mov $1634760805,%rdx + +# qhasm: in5 = 857760878 +# asm 1: mov $857760878,>in5=int64#4 +# asm 2: mov $857760878,>in5=%rcx +mov $857760878,%rcx + +# qhasm: in10 = 2036477234 +# asm 1: mov $2036477234,>in10=int64#5 +# asm 2: mov $2036477234,>in10=%r8 +mov $2036477234,%r8 + +# qhasm: in15 = 1797285236 +# asm 1: mov $1797285236,>in15=int64#7 +# asm 2: mov $1797285236,>in15=%rax +mov $1797285236,%rax + +# qhasm: ((uint32 *)&x0)[0] = in0 +# asm 1: movl x0=stack128#4 +# asm 2: movl x0=48(%rsp) +movl %edx,48(%rsp) + +# qhasm: ((uint32 *)&x0)[1] = in5 +# asm 1: movl z0=int6464#1 +# asm 2: movdqa z0=%xmm0 +movdqa 48(%rsp),%xmm0 + +# qhasm: z5 = z0[1,1,1,1] +# asm 1: pshufd $0x55,z5=int6464#2 +# asm 2: pshufd $0x55,z5=%xmm1 +pshufd $0x55,%xmm0,%xmm1 + +# qhasm: z10 = z0[2,2,2,2] +# asm 1: pshufd $0xaa,z10=int6464#3 +# asm 2: pshufd $0xaa,z10=%xmm2 +pshufd $0xaa,%xmm0,%xmm2 + +# qhasm: z15 = z0[3,3,3,3] +# asm 1: pshufd $0xff,z15=int6464#4 +# asm 2: pshufd $0xff,z15=%xmm3 +pshufd $0xff,%xmm0,%xmm3 + +# qhasm: z0 = z0[0,0,0,0] +# asm 1: pshufd $0x00,z0=int6464#1 +# asm 2: pshufd $0x00,z0=%xmm0 +pshufd $0x00,%xmm0,%xmm0 + +# qhasm: orig5 = z5 +# asm 1: movdqa orig5=stack128#5 +# asm 2: movdqa orig5=64(%rsp) +movdqa %xmm1,64(%rsp) + +# qhasm: orig10 = z10 +# asm 1: movdqa orig10=stack128#6 +# asm 2: movdqa orig10=80(%rsp) +movdqa %xmm2,80(%rsp) + +# qhasm: orig15 = z15 +# asm 1: movdqa orig15=stack128#7 +# asm 2: movdqa orig15=96(%rsp) +movdqa %xmm3,96(%rsp) + +# qhasm: orig0 = z0 +# asm 1: movdqa orig0=stack128#8 +# asm 2: movdqa orig0=112(%rsp) +movdqa %xmm0,112(%rsp) + +# qhasm: z1 = x1 +# asm 1: movdqa z1=int6464#1 +# asm 2: movdqa z1=%xmm0 +movdqa 0(%rsp),%xmm0 + +# qhasm: z6 = z1[2,2,2,2] +# asm 1: pshufd $0xaa,z6=int6464#2 +# asm 2: pshufd $0xaa,z6=%xmm1 +pshufd $0xaa,%xmm0,%xmm1 + +# qhasm: z11 = z1[3,3,3,3] +# asm 1: pshufd $0xff,z11=int6464#3 +# asm 2: pshufd $0xff,z11=%xmm2 +pshufd $0xff,%xmm0,%xmm2 + +# qhasm: z12 = z1[0,0,0,0] +# asm 1: pshufd $0x00,z12=int6464#4 +# asm 2: pshufd $0x00,z12=%xmm3 +pshufd $0x00,%xmm0,%xmm3 + +# qhasm: z1 = z1[1,1,1,1] +# asm 1: pshufd $0x55,z1=int6464#1 +# asm 2: pshufd $0x55,z1=%xmm0 +pshufd $0x55,%xmm0,%xmm0 + +# qhasm: orig6 = z6 +# asm 1: movdqa orig6=stack128#9 +# asm 2: movdqa orig6=128(%rsp) +movdqa %xmm1,128(%rsp) + +# qhasm: orig11 = z11 +# asm 1: movdqa orig11=stack128#10 +# asm 2: movdqa orig11=144(%rsp) +movdqa %xmm2,144(%rsp) + +# qhasm: orig12 = z12 +# asm 1: movdqa orig12=stack128#11 +# asm 2: movdqa orig12=160(%rsp) +movdqa %xmm3,160(%rsp) + +# qhasm: orig1 = z1 +# asm 1: movdqa orig1=stack128#12 +# asm 2: movdqa orig1=176(%rsp) +movdqa %xmm0,176(%rsp) + +# qhasm: z2 = x2 +# asm 1: movdqa z2=int6464#1 +# asm 2: movdqa z2=%xmm0 +movdqa 16(%rsp),%xmm0 + +# qhasm: z7 = z2[3,3,3,3] +# asm 1: pshufd $0xff,z7=int6464#2 +# asm 2: pshufd $0xff,z7=%xmm1 +pshufd $0xff,%xmm0,%xmm1 + +# qhasm: z13 = z2[1,1,1,1] +# asm 1: pshufd $0x55,z13=int6464#3 +# asm 2: pshufd $0x55,z13=%xmm2 +pshufd $0x55,%xmm0,%xmm2 + +# qhasm: z2 = z2[2,2,2,2] +# asm 1: pshufd $0xaa,z2=int6464#1 +# asm 2: pshufd $0xaa,z2=%xmm0 +pshufd $0xaa,%xmm0,%xmm0 + +# qhasm: orig7 = z7 +# asm 1: movdqa orig7=stack128#13 +# asm 2: movdqa orig7=192(%rsp) +movdqa %xmm1,192(%rsp) + +# qhasm: orig13 = z13 +# asm 1: movdqa orig13=stack128#14 +# asm 2: movdqa orig13=208(%rsp) +movdqa %xmm2,208(%rsp) + +# qhasm: orig2 = z2 +# asm 1: movdqa orig2=stack128#15 +# asm 2: movdqa orig2=224(%rsp) +movdqa %xmm0,224(%rsp) + +# qhasm: z3 = x3 +# asm 1: movdqa z3=int6464#1 +# asm 2: movdqa z3=%xmm0 +movdqa 32(%rsp),%xmm0 + +# qhasm: z4 = z3[0,0,0,0] +# asm 1: pshufd $0x00,z4=int6464#2 +# asm 2: pshufd $0x00,z4=%xmm1 +pshufd $0x00,%xmm0,%xmm1 + +# qhasm: z14 = z3[2,2,2,2] +# asm 1: pshufd $0xaa,z14=int6464#3 +# asm 2: pshufd $0xaa,z14=%xmm2 +pshufd $0xaa,%xmm0,%xmm2 + +# qhasm: z3 = z3[3,3,3,3] +# asm 1: pshufd $0xff,z3=int6464#1 +# asm 2: pshufd $0xff,z3=%xmm0 +pshufd $0xff,%xmm0,%xmm0 + +# qhasm: orig4 = z4 +# asm 1: movdqa orig4=stack128#16 +# asm 2: movdqa orig4=240(%rsp) +movdqa %xmm1,240(%rsp) + +# qhasm: orig14 = z14 +# asm 1: movdqa orig14=stack128#17 +# asm 2: movdqa orig14=256(%rsp) +movdqa %xmm2,256(%rsp) + +# qhasm: orig3 = z3 +# asm 1: movdqa orig3=stack128#18 +# asm 2: movdqa orig3=272(%rsp) +movdqa %xmm0,272(%rsp) + +# qhasm: bytesatleast256: +._bytesatleast256: + +# qhasm: in8 = ((uint32 *)&x2)[0] +# asm 1: movl in8=int64#3d +# asm 2: movl in8=%edx +movl 16(%rsp),%edx + +# qhasm: in9 = ((uint32 *)&x3)[1] +# asm 1: movl 4+in9=int64#4d +# asm 2: movl 4+in9=%ecx +movl 4+32(%rsp),%ecx + +# qhasm: ((uint32 *) &orig8)[0] = in8 +# asm 1: movl orig8=stack128#19 +# asm 2: movl orig8=288(%rsp) +movl %edx,288(%rsp) + +# qhasm: ((uint32 *) &orig9)[0] = in9 +# asm 1: movl orig9=stack128#20 +# asm 2: movl orig9=304(%rsp) +movl %ecx,304(%rsp) + +# qhasm: in8 += 1 +# asm 1: add $1,in9=int64#4 +# asm 2: mov in9=%rcx +mov %rdx,%rcx + +# qhasm: (uint64) in9 >>= 32 +# asm 1: shr $32,in9=int64#4 +# asm 2: mov in9=%rcx +mov %rdx,%rcx + +# qhasm: (uint64) in9 >>= 32 +# asm 1: shr $32,in9=int64#4 +# asm 2: mov in9=%rcx +mov %rdx,%rcx + +# qhasm: (uint64) in9 >>= 32 +# asm 1: shr $32,in9=int64#4 +# asm 2: mov in9=%rcx +mov %rdx,%rcx + +# qhasm: (uint64) in9 >>= 32 +# asm 1: shr $32,x2=stack128#2 +# asm 2: movl x2=16(%rsp) +movl %edx,16(%rsp) + +# qhasm: ((uint32 *)&x3)[1] = in9 +# asm 1: movl bytes_backup=stack64#8 +# asm 2: movq bytes_backup=408(%rsp) +movq %r9,408(%rsp) + +# qhasm: i = 8 +# asm 1: mov $8,>i=int64#3 +# asm 2: mov $8,>i=%rdx +mov $8,%rdx + +# qhasm: z5 = orig5 +# asm 1: movdqa z5=int6464#1 +# asm 2: movdqa z5=%xmm0 +movdqa 64(%rsp),%xmm0 + +# qhasm: z10 = orig10 +# asm 1: movdqa z10=int6464#2 +# asm 2: movdqa z10=%xmm1 +movdqa 80(%rsp),%xmm1 + +# qhasm: z15 = orig15 +# asm 1: movdqa z15=int6464#3 +# asm 2: movdqa z15=%xmm2 +movdqa 96(%rsp),%xmm2 + +# qhasm: z14 = orig14 +# asm 1: movdqa z14=int6464#4 +# asm 2: movdqa z14=%xmm3 +movdqa 256(%rsp),%xmm3 + +# qhasm: z3 = orig3 +# asm 1: movdqa z3=int6464#5 +# asm 2: movdqa z3=%xmm4 +movdqa 272(%rsp),%xmm4 + +# qhasm: z6 = orig6 +# asm 1: movdqa z6=int6464#6 +# asm 2: movdqa z6=%xmm5 +movdqa 128(%rsp),%xmm5 + +# qhasm: z11 = orig11 +# asm 1: movdqa z11=int6464#7 +# asm 2: movdqa z11=%xmm6 +movdqa 144(%rsp),%xmm6 + +# qhasm: z1 = orig1 +# asm 1: movdqa z1=int6464#8 +# asm 2: movdqa z1=%xmm7 +movdqa 176(%rsp),%xmm7 + +# qhasm: z7 = orig7 +# asm 1: movdqa z7=int6464#9 +# asm 2: movdqa z7=%xmm8 +movdqa 192(%rsp),%xmm8 + +# qhasm: z13 = orig13 +# asm 1: movdqa z13=int6464#10 +# asm 2: movdqa z13=%xmm9 +movdqa 208(%rsp),%xmm9 + +# qhasm: z2 = orig2 +# asm 1: movdqa z2=int6464#11 +# asm 2: movdqa z2=%xmm10 +movdqa 224(%rsp),%xmm10 + +# qhasm: z9 = orig9 +# asm 1: movdqa z9=int6464#12 +# asm 2: movdqa z9=%xmm11 +movdqa 304(%rsp),%xmm11 + +# qhasm: z0 = orig0 +# asm 1: movdqa z0=int6464#13 +# asm 2: movdqa z0=%xmm12 +movdqa 112(%rsp),%xmm12 + +# qhasm: z12 = orig12 +# asm 1: movdqa z12=int6464#14 +# asm 2: movdqa z12=%xmm13 +movdqa 160(%rsp),%xmm13 + +# qhasm: z4 = orig4 +# asm 1: movdqa z4=int6464#15 +# asm 2: movdqa z4=%xmm14 +movdqa 240(%rsp),%xmm14 + +# qhasm: z8 = orig8 +# asm 1: movdqa z8=int6464#16 +# asm 2: movdqa z8=%xmm15 +movdqa 288(%rsp),%xmm15 + +# qhasm: mainloop1: +._mainloop1: + +# qhasm: z10_stack = z10 +# asm 1: movdqa z10_stack=stack128#21 +# asm 2: movdqa z10_stack=320(%rsp) +movdqa %xmm1,320(%rsp) + +# qhasm: z15_stack = z15 +# asm 1: movdqa z15_stack=stack128#22 +# asm 2: movdqa z15_stack=336(%rsp) +movdqa %xmm2,336(%rsp) + +# qhasm: y4 = z12 +# asm 1: movdqa y4=int6464#2 +# asm 2: movdqa y4=%xmm1 +movdqa %xmm13,%xmm1 + +# qhasm: uint32323232 y4 += z0 +# asm 1: paddd r4=int6464#3 +# asm 2: movdqa r4=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y4 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,y9=int6464#2 +# asm 2: movdqa y9=%xmm1 +movdqa %xmm7,%xmm1 + +# qhasm: uint32323232 y9 += z5 +# asm 1: paddd r9=int6464#3 +# asm 2: movdqa r9=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y9 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,y8=int6464#2 +# asm 2: movdqa y8=%xmm1 +movdqa %xmm12,%xmm1 + +# qhasm: uint32323232 y8 += z4 +# asm 1: paddd r8=int6464#3 +# asm 2: movdqa r8=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y8 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y13=int6464#2 +# asm 2: movdqa y13=%xmm1 +movdqa %xmm0,%xmm1 + +# qhasm: uint32323232 y13 += z9 +# asm 1: paddd r13=int6464#3 +# asm 2: movdqa r13=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y13 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y12=int6464#2 +# asm 2: movdqa y12=%xmm1 +movdqa %xmm14,%xmm1 + +# qhasm: uint32323232 y12 += z8 +# asm 1: paddd r12=int6464#3 +# asm 2: movdqa r12=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y12 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y1=int6464#2 +# asm 2: movdqa y1=%xmm1 +movdqa %xmm11,%xmm1 + +# qhasm: uint32323232 y1 += z13 +# asm 1: paddd r1=int6464#3 +# asm 2: movdqa r1=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y1 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y0=int6464#2 +# asm 2: movdqa y0=%xmm1 +movdqa %xmm15,%xmm1 + +# qhasm: uint32323232 y0 += z12 +# asm 1: paddd r0=int6464#3 +# asm 2: movdqa r0=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y0 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,z10=int6464#2 +# asm 2: movdqa z10=%xmm1 +movdqa 320(%rsp),%xmm1 + +# qhasm: z0_stack = z0 +# asm 1: movdqa z0_stack=stack128#21 +# asm 2: movdqa z0_stack=320(%rsp) +movdqa %xmm12,320(%rsp) + +# qhasm: y5 = z13 +# asm 1: movdqa y5=int6464#3 +# asm 2: movdqa y5=%xmm2 +movdqa %xmm9,%xmm2 + +# qhasm: uint32323232 y5 += z1 +# asm 1: paddd r5=int6464#13 +# asm 2: movdqa r5=%xmm12 +movdqa %xmm2,%xmm12 + +# qhasm: uint32323232 y5 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,y14=int6464#3 +# asm 2: movdqa y14=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: uint32323232 y14 += z10 +# asm 1: paddd r14=int6464#13 +# asm 2: movdqa r14=%xmm12 +movdqa %xmm2,%xmm12 + +# qhasm: uint32323232 y14 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,z15=int6464#3 +# asm 2: movdqa z15=%xmm2 +movdqa 336(%rsp),%xmm2 + +# qhasm: z5_stack = z5 +# asm 1: movdqa z5_stack=stack128#22 +# asm 2: movdqa z5_stack=336(%rsp) +movdqa %xmm0,336(%rsp) + +# qhasm: y3 = z11 +# asm 1: movdqa y3=int6464#1 +# asm 2: movdqa y3=%xmm0 +movdqa %xmm6,%xmm0 + +# qhasm: uint32323232 y3 += z15 +# asm 1: paddd r3=int6464#13 +# asm 2: movdqa r3=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y3 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,y2=int6464#1 +# asm 2: movdqa y2=%xmm0 +movdqa %xmm1,%xmm0 + +# qhasm: uint32323232 y2 += z14 +# asm 1: paddd r2=int6464#13 +# asm 2: movdqa r2=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y2 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y7=int6464#1 +# asm 2: movdqa y7=%xmm0 +movdqa %xmm2,%xmm0 + +# qhasm: uint32323232 y7 += z3 +# asm 1: paddd r7=int6464#13 +# asm 2: movdqa r7=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y7 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y6=int6464#1 +# asm 2: movdqa y6=%xmm0 +movdqa %xmm3,%xmm0 + +# qhasm: uint32323232 y6 += z2 +# asm 1: paddd r6=int6464#13 +# asm 2: movdqa r6=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y6 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y11=int6464#1 +# asm 2: movdqa y11=%xmm0 +movdqa %xmm4,%xmm0 + +# qhasm: uint32323232 y11 += z7 +# asm 1: paddd r11=int6464#13 +# asm 2: movdqa r11=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y11 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y10=int6464#1 +# asm 2: movdqa y10=%xmm0 +movdqa %xmm10,%xmm0 + +# qhasm: uint32323232 y10 += z6 +# asm 1: paddd r10=int6464#13 +# asm 2: movdqa r10=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y10 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,z0=int6464#1 +# asm 2: movdqa z0=%xmm0 +movdqa 320(%rsp),%xmm0 + +# qhasm: z10_stack = z10 +# asm 1: movdqa z10_stack=stack128#21 +# asm 2: movdqa z10_stack=320(%rsp) +movdqa %xmm1,320(%rsp) + +# qhasm: y1 = z3 +# asm 1: movdqa y1=int6464#2 +# asm 2: movdqa y1=%xmm1 +movdqa %xmm4,%xmm1 + +# qhasm: uint32323232 y1 += z0 +# asm 1: paddd r1=int6464#13 +# asm 2: movdqa r1=%xmm12 +movdqa %xmm1,%xmm12 + +# qhasm: uint32323232 y1 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,y15=int6464#2 +# asm 2: movdqa y15=%xmm1 +movdqa %xmm8,%xmm1 + +# qhasm: uint32323232 y15 += z11 +# asm 1: paddd r15=int6464#13 +# asm 2: movdqa r15=%xmm12 +movdqa %xmm1,%xmm12 + +# qhasm: uint32323232 y15 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,z5=int6464#13 +# asm 2: movdqa z5=%xmm12 +movdqa 336(%rsp),%xmm12 + +# qhasm: z15_stack = z15 +# asm 1: movdqa z15_stack=stack128#22 +# asm 2: movdqa z15_stack=336(%rsp) +movdqa %xmm2,336(%rsp) + +# qhasm: y6 = z4 +# asm 1: movdqa y6=int6464#2 +# asm 2: movdqa y6=%xmm1 +movdqa %xmm14,%xmm1 + +# qhasm: uint32323232 y6 += z5 +# asm 1: paddd r6=int6464#3 +# asm 2: movdqa r6=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y6 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,y2=int6464#2 +# asm 2: movdqa y2=%xmm1 +movdqa %xmm0,%xmm1 + +# qhasm: uint32323232 y2 += z1 +# asm 1: paddd r2=int6464#3 +# asm 2: movdqa r2=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y2 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y7=int6464#2 +# asm 2: movdqa y7=%xmm1 +movdqa %xmm12,%xmm1 + +# qhasm: uint32323232 y7 += z6 +# asm 1: paddd r7=int6464#3 +# asm 2: movdqa r7=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y7 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y3=int6464#2 +# asm 2: movdqa y3=%xmm1 +movdqa %xmm7,%xmm1 + +# qhasm: uint32323232 y3 += z2 +# asm 1: paddd r3=int6464#3 +# asm 2: movdqa r3=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y3 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y4=int6464#2 +# asm 2: movdqa y4=%xmm1 +movdqa %xmm5,%xmm1 + +# qhasm: uint32323232 y4 += z7 +# asm 1: paddd r4=int6464#3 +# asm 2: movdqa r4=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y4 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y0=int6464#2 +# asm 2: movdqa y0=%xmm1 +movdqa %xmm10,%xmm1 + +# qhasm: uint32323232 y0 += z3 +# asm 1: paddd r0=int6464#3 +# asm 2: movdqa r0=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y0 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,z10=int6464#2 +# asm 2: movdqa z10=%xmm1 +movdqa 320(%rsp),%xmm1 + +# qhasm: z0_stack = z0 +# asm 1: movdqa z0_stack=stack128#21 +# asm 2: movdqa z0_stack=320(%rsp) +movdqa %xmm0,320(%rsp) + +# qhasm: y5 = z7 +# asm 1: movdqa y5=int6464#1 +# asm 2: movdqa y5=%xmm0 +movdqa %xmm8,%xmm0 + +# qhasm: uint32323232 y5 += z4 +# asm 1: paddd r5=int6464#3 +# asm 2: movdqa r5=%xmm2 +movdqa %xmm0,%xmm2 + +# qhasm: uint32323232 y5 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,y11=int6464#1 +# asm 2: movdqa y11=%xmm0 +movdqa %xmm11,%xmm0 + +# qhasm: uint32323232 y11 += z10 +# asm 1: paddd r11=int6464#3 +# asm 2: movdqa r11=%xmm2 +movdqa %xmm0,%xmm2 + +# qhasm: uint32323232 y11 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,z15=int6464#3 +# asm 2: movdqa z15=%xmm2 +movdqa 336(%rsp),%xmm2 + +# qhasm: z5_stack = z5 +# asm 1: movdqa z5_stack=stack128#22 +# asm 2: movdqa z5_stack=336(%rsp) +movdqa %xmm12,336(%rsp) + +# qhasm: y12 = z14 +# asm 1: movdqa y12=int6464#1 +# asm 2: movdqa y12=%xmm0 +movdqa %xmm3,%xmm0 + +# qhasm: uint32323232 y12 += z15 +# asm 1: paddd r12=int6464#13 +# asm 2: movdqa r12=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y12 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,y8=int6464#1 +# asm 2: movdqa y8=%xmm0 +movdqa %xmm1,%xmm0 + +# qhasm: uint32323232 y8 += z11 +# asm 1: paddd r8=int6464#13 +# asm 2: movdqa r8=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y8 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y13=int6464#1 +# asm 2: movdqa y13=%xmm0 +movdqa %xmm2,%xmm0 + +# qhasm: uint32323232 y13 += z12 +# asm 1: paddd r13=int6464#13 +# asm 2: movdqa r13=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y13 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y9=int6464#1 +# asm 2: movdqa y9=%xmm0 +movdqa %xmm6,%xmm0 + +# qhasm: uint32323232 y9 += z8 +# asm 1: paddd r9=int6464#13 +# asm 2: movdqa r9=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y9 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y14=int6464#1 +# asm 2: movdqa y14=%xmm0 +movdqa %xmm13,%xmm0 + +# qhasm: uint32323232 y14 += z13 +# asm 1: paddd r14=int6464#13 +# asm 2: movdqa r14=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y14 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y10=int6464#1 +# asm 2: movdqa y10=%xmm0 +movdqa %xmm15,%xmm0 + +# qhasm: uint32323232 y10 += z9 +# asm 1: paddd r10=int6464#13 +# asm 2: movdqa r10=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y10 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,y15=int6464#1 +# asm 2: movdqa y15=%xmm0 +movdqa %xmm9,%xmm0 + +# qhasm: uint32323232 y15 += z14 +# asm 1: paddd r15=int6464#13 +# asm 2: movdqa r15=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y15 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,z0=int6464#13 +# asm 2: movdqa z0=%xmm12 +movdqa 320(%rsp),%xmm12 + +# qhasm: z5 = z5_stack +# asm 1: movdqa z5=int6464#1 +# asm 2: movdqa z5=%xmm0 +movdqa 336(%rsp),%xmm0 + +# qhasm: unsigned>? i -= 2 +# asm 1: sub $2, +ja ._mainloop1 + +# qhasm: uint32323232 z0 += orig0 +# asm 1: paddd in0=int64#3 +# asm 2: movd in0=%rdx +movd %xmm12,%rdx + +# qhasm: in1 = z1 +# asm 1: movd in1=int64#4 +# asm 2: movd in1=%rcx +movd %xmm7,%rcx + +# qhasm: in2 = z2 +# asm 1: movd in2=int64#5 +# asm 2: movd in2=%r8 +movd %xmm10,%r8 + +# qhasm: in3 = z3 +# asm 1: movd in3=int64#6 +# asm 2: movd in3=%r9 +movd %xmm4,%r9 + +# qhasm: z0 <<<= 96 +# asm 1: pshufd $0x39,in0=int64#3 +# asm 2: movd in0=%rdx +movd %xmm12,%rdx + +# qhasm: in1 = z1 +# asm 1: movd in1=int64#4 +# asm 2: movd in1=%rcx +movd %xmm7,%rcx + +# qhasm: in2 = z2 +# asm 1: movd in2=int64#5 +# asm 2: movd in2=%r8 +movd %xmm10,%r8 + +# qhasm: in3 = z3 +# asm 1: movd in3=int64#6 +# asm 2: movd in3=%r9 +movd %xmm4,%r9 + +# qhasm: z0 <<<= 96 +# asm 1: pshufd $0x39,in0=int64#3 +# asm 2: movd in0=%rdx +movd %xmm12,%rdx + +# qhasm: in1 = z1 +# asm 1: movd in1=int64#4 +# asm 2: movd in1=%rcx +movd %xmm7,%rcx + +# qhasm: in2 = z2 +# asm 1: movd in2=int64#5 +# asm 2: movd in2=%r8 +movd %xmm10,%r8 + +# qhasm: in3 = z3 +# asm 1: movd in3=int64#6 +# asm 2: movd in3=%r9 +movd %xmm4,%r9 + +# qhasm: z0 <<<= 96 +# asm 1: pshufd $0x39,in0=int64#3 +# asm 2: movd in0=%rdx +movd %xmm12,%rdx + +# qhasm: in1 = z1 +# asm 1: movd in1=int64#4 +# asm 2: movd in1=%rcx +movd %xmm7,%rcx + +# qhasm: in2 = z2 +# asm 1: movd in2=int64#5 +# asm 2: movd in2=%r8 +movd %xmm10,%r8 + +# qhasm: in3 = z3 +# asm 1: movd in3=int64#6 +# asm 2: movd in3=%r9 +movd %xmm4,%r9 + +# qhasm: (uint32) in0 ^= *(uint32 *) (m + 192) +# asm 1: xorl 192(in4=int64#3 +# asm 2: movd in4=%rdx +movd %xmm14,%rdx + +# qhasm: in5 = z5 +# asm 1: movd in5=int64#4 +# asm 2: movd in5=%rcx +movd %xmm0,%rcx + +# qhasm: in6 = z6 +# asm 1: movd in6=int64#5 +# asm 2: movd in6=%r8 +movd %xmm5,%r8 + +# qhasm: in7 = z7 +# asm 1: movd in7=int64#6 +# asm 2: movd in7=%r9 +movd %xmm8,%r9 + +# qhasm: z4 <<<= 96 +# asm 1: pshufd $0x39,in4=int64#3 +# asm 2: movd in4=%rdx +movd %xmm14,%rdx + +# qhasm: in5 = z5 +# asm 1: movd in5=int64#4 +# asm 2: movd in5=%rcx +movd %xmm0,%rcx + +# qhasm: in6 = z6 +# asm 1: movd in6=int64#5 +# asm 2: movd in6=%r8 +movd %xmm5,%r8 + +# qhasm: in7 = z7 +# asm 1: movd in7=int64#6 +# asm 2: movd in7=%r9 +movd %xmm8,%r9 + +# qhasm: z4 <<<= 96 +# asm 1: pshufd $0x39,in4=int64#3 +# asm 2: movd in4=%rdx +movd %xmm14,%rdx + +# qhasm: in5 = z5 +# asm 1: movd in5=int64#4 +# asm 2: movd in5=%rcx +movd %xmm0,%rcx + +# qhasm: in6 = z6 +# asm 1: movd in6=int64#5 +# asm 2: movd in6=%r8 +movd %xmm5,%r8 + +# qhasm: in7 = z7 +# asm 1: movd in7=int64#6 +# asm 2: movd in7=%r9 +movd %xmm8,%r9 + +# qhasm: z4 <<<= 96 +# asm 1: pshufd $0x39,in4=int64#3 +# asm 2: movd in4=%rdx +movd %xmm14,%rdx + +# qhasm: in5 = z5 +# asm 1: movd in5=int64#4 +# asm 2: movd in5=%rcx +movd %xmm0,%rcx + +# qhasm: in6 = z6 +# asm 1: movd in6=int64#5 +# asm 2: movd in6=%r8 +movd %xmm5,%r8 + +# qhasm: in7 = z7 +# asm 1: movd in7=int64#6 +# asm 2: movd in7=%r9 +movd %xmm8,%r9 + +# qhasm: (uint32) in4 ^= *(uint32 *) (m + 208) +# asm 1: xorl 208(in8=int64#3 +# asm 2: movd in8=%rdx +movd %xmm15,%rdx + +# qhasm: in9 = z9 +# asm 1: movd in9=int64#4 +# asm 2: movd in9=%rcx +movd %xmm11,%rcx + +# qhasm: in10 = z10 +# asm 1: movd in10=int64#5 +# asm 2: movd in10=%r8 +movd %xmm1,%r8 + +# qhasm: in11 = z11 +# asm 1: movd in11=int64#6 +# asm 2: movd in11=%r9 +movd %xmm6,%r9 + +# qhasm: z8 <<<= 96 +# asm 1: pshufd $0x39,in8=int64#3 +# asm 2: movd in8=%rdx +movd %xmm15,%rdx + +# qhasm: in9 = z9 +# asm 1: movd in9=int64#4 +# asm 2: movd in9=%rcx +movd %xmm11,%rcx + +# qhasm: in10 = z10 +# asm 1: movd in10=int64#5 +# asm 2: movd in10=%r8 +movd %xmm1,%r8 + +# qhasm: in11 = z11 +# asm 1: movd in11=int64#6 +# asm 2: movd in11=%r9 +movd %xmm6,%r9 + +# qhasm: z8 <<<= 96 +# asm 1: pshufd $0x39,in8=int64#3 +# asm 2: movd in8=%rdx +movd %xmm15,%rdx + +# qhasm: in9 = z9 +# asm 1: movd in9=int64#4 +# asm 2: movd in9=%rcx +movd %xmm11,%rcx + +# qhasm: in10 = z10 +# asm 1: movd in10=int64#5 +# asm 2: movd in10=%r8 +movd %xmm1,%r8 + +# qhasm: in11 = z11 +# asm 1: movd in11=int64#6 +# asm 2: movd in11=%r9 +movd %xmm6,%r9 + +# qhasm: z8 <<<= 96 +# asm 1: pshufd $0x39,in8=int64#3 +# asm 2: movd in8=%rdx +movd %xmm15,%rdx + +# qhasm: in9 = z9 +# asm 1: movd in9=int64#4 +# asm 2: movd in9=%rcx +movd %xmm11,%rcx + +# qhasm: in10 = z10 +# asm 1: movd in10=int64#5 +# asm 2: movd in10=%r8 +movd %xmm1,%r8 + +# qhasm: in11 = z11 +# asm 1: movd in11=int64#6 +# asm 2: movd in11=%r9 +movd %xmm6,%r9 + +# qhasm: (uint32) in8 ^= *(uint32 *) (m + 224) +# asm 1: xorl 224(in12=int64#3 +# asm 2: movd in12=%rdx +movd %xmm13,%rdx + +# qhasm: in13 = z13 +# asm 1: movd in13=int64#4 +# asm 2: movd in13=%rcx +movd %xmm9,%rcx + +# qhasm: in14 = z14 +# asm 1: movd in14=int64#5 +# asm 2: movd in14=%r8 +movd %xmm3,%r8 + +# qhasm: in15 = z15 +# asm 1: movd in15=int64#6 +# asm 2: movd in15=%r9 +movd %xmm2,%r9 + +# qhasm: z12 <<<= 96 +# asm 1: pshufd $0x39,in12=int64#3 +# asm 2: movd in12=%rdx +movd %xmm13,%rdx + +# qhasm: in13 = z13 +# asm 1: movd in13=int64#4 +# asm 2: movd in13=%rcx +movd %xmm9,%rcx + +# qhasm: in14 = z14 +# asm 1: movd in14=int64#5 +# asm 2: movd in14=%r8 +movd %xmm3,%r8 + +# qhasm: in15 = z15 +# asm 1: movd in15=int64#6 +# asm 2: movd in15=%r9 +movd %xmm2,%r9 + +# qhasm: z12 <<<= 96 +# asm 1: pshufd $0x39,in12=int64#3 +# asm 2: movd in12=%rdx +movd %xmm13,%rdx + +# qhasm: in13 = z13 +# asm 1: movd in13=int64#4 +# asm 2: movd in13=%rcx +movd %xmm9,%rcx + +# qhasm: in14 = z14 +# asm 1: movd in14=int64#5 +# asm 2: movd in14=%r8 +movd %xmm3,%r8 + +# qhasm: in15 = z15 +# asm 1: movd in15=int64#6 +# asm 2: movd in15=%r9 +movd %xmm2,%r9 + +# qhasm: z12 <<<= 96 +# asm 1: pshufd $0x39,in12=int64#3 +# asm 2: movd in12=%rdx +movd %xmm13,%rdx + +# qhasm: in13 = z13 +# asm 1: movd in13=int64#4 +# asm 2: movd in13=%rcx +movd %xmm9,%rcx + +# qhasm: in14 = z14 +# asm 1: movd in14=int64#5 +# asm 2: movd in14=%r8 +movd %xmm3,%r8 + +# qhasm: in15 = z15 +# asm 1: movd in15=int64#6 +# asm 2: movd in15=%r9 +movd %xmm2,%r9 + +# qhasm: (uint32) in12 ^= *(uint32 *) (m + 240) +# asm 1: xorl 240(bytes=int64#6 +# asm 2: movq bytes=%r9 +movq 408(%rsp),%r9 + +# qhasm: bytes -= 256 +# asm 1: sub $256,? bytes - 0 +# asm 1: cmp $0, +jbe ._done +# comment:fp stack unchanged by fallthrough + +# qhasm: bytesbetween1and255: +._bytesbetween1and255: + +# qhasm: unsignedctarget=int64#3 +# asm 2: mov ctarget=%rdx +mov %rdi,%rdx + +# qhasm: out = &tmp +# asm 1: leaq out=int64#1 +# asm 2: leaq out=%rdi +leaq 416(%rsp),%rdi + +# qhasm: i = bytes +# asm 1: mov i=int64#4 +# asm 2: mov i=%rcx +mov %r9,%rcx + +# qhasm: while (i) { *out++ = *m++; --i } +rep movsb + +# qhasm: out = &tmp +# asm 1: leaq out=int64#1 +# asm 2: leaq out=%rdi +leaq 416(%rsp),%rdi + +# qhasm: m = &tmp +# asm 1: leaq m=int64#2 +# asm 2: leaq m=%rsi +leaq 416(%rsp),%rsi +# comment:fp stack unchanged by fallthrough + +# qhasm: nocopy: +._nocopy: + +# qhasm: bytes_backup = bytes +# asm 1: movq bytes_backup=stack64#8 +# asm 2: movq bytes_backup=408(%rsp) +movq %r9,408(%rsp) + +# qhasm: diag0 = x0 +# asm 1: movdqa diag0=int6464#1 +# asm 2: movdqa diag0=%xmm0 +movdqa 48(%rsp),%xmm0 + +# qhasm: diag1 = x1 +# asm 1: movdqa diag1=int6464#2 +# asm 2: movdqa diag1=%xmm1 +movdqa 0(%rsp),%xmm1 + +# qhasm: diag2 = x2 +# asm 1: movdqa diag2=int6464#3 +# asm 2: movdqa diag2=%xmm2 +movdqa 16(%rsp),%xmm2 + +# qhasm: diag3 = x3 +# asm 1: movdqa diag3=int6464#4 +# asm 2: movdqa diag3=%xmm3 +movdqa 32(%rsp),%xmm3 + +# qhasm: a0 = diag1 +# asm 1: movdqa a0=int6464#5 +# asm 2: movdqa a0=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: i = 8 +# asm 1: mov $8,>i=int64#4 +# asm 2: mov $8,>i=%rcx +mov $8,%rcx + +# qhasm: mainloop2: +._mainloop2: + +# qhasm: uint32323232 a0 += diag0 +# asm 1: paddd a1=int6464#6 +# asm 2: movdqa a1=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: b0 = a0 +# asm 1: movdqa b0=int6464#7 +# asm 2: movdqa b0=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a0 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,a2=int6464#5 +# asm 2: movdqa a2=%xmm4 +movdqa %xmm3,%xmm4 + +# qhasm: b1 = a1 +# asm 1: movdqa b1=int6464#7 +# asm 2: movdqa b1=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a1 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,a3=int6464#6 +# asm 2: movdqa a3=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: b2 = a2 +# asm 1: movdqa b2=int6464#7 +# asm 2: movdqa b2=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a2 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,a4=int6464#5 +# asm 2: movdqa a4=%xmm4 +movdqa %xmm3,%xmm4 + +# qhasm: b3 = a3 +# asm 1: movdqa b3=int6464#7 +# asm 2: movdqa b3=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a3 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,a5=int6464#6 +# asm 2: movdqa a5=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: b4 = a4 +# asm 1: movdqa b4=int6464#7 +# asm 2: movdqa b4=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a4 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,a6=int6464#5 +# asm 2: movdqa a6=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: b5 = a5 +# asm 1: movdqa b5=int6464#7 +# asm 2: movdqa b5=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a5 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,a7=int6464#6 +# asm 2: movdqa a7=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: b6 = a6 +# asm 1: movdqa b6=int6464#7 +# asm 2: movdqa b6=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a6 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,a0=int6464#5 +# asm 2: movdqa a0=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: b7 = a7 +# asm 1: movdqa b7=int6464#7 +# asm 2: movdqa b7=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a7 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,a1=int6464#6 +# asm 2: movdqa a1=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: b0 = a0 +# asm 1: movdqa b0=int6464#7 +# asm 2: movdqa b0=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a0 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,a2=int6464#5 +# asm 2: movdqa a2=%xmm4 +movdqa %xmm3,%xmm4 + +# qhasm: b1 = a1 +# asm 1: movdqa b1=int6464#7 +# asm 2: movdqa b1=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a1 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,a3=int6464#6 +# asm 2: movdqa a3=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: b2 = a2 +# asm 1: movdqa b2=int6464#7 +# asm 2: movdqa b2=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a2 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,a4=int6464#5 +# asm 2: movdqa a4=%xmm4 +movdqa %xmm3,%xmm4 + +# qhasm: b3 = a3 +# asm 1: movdqa b3=int6464#7 +# asm 2: movdqa b3=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a3 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,a5=int6464#6 +# asm 2: movdqa a5=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: b4 = a4 +# asm 1: movdqa b4=int6464#7 +# asm 2: movdqa b4=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a4 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,a6=int6464#5 +# asm 2: movdqa a6=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: b5 = a5 +# asm 1: movdqa b5=int6464#7 +# asm 2: movdqa b5=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a5 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,a7=int6464#6 +# asm 2: movdqa a7=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: b6 = a6 +# asm 1: movdqa b6=int6464#7 +# asm 2: movdqa b6=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a6 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,? i -= 4 +# asm 1: sub $4,a0=int6464#5 +# asm 2: movdqa a0=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: b7 = a7 +# asm 1: movdqa b7=int6464#7 +# asm 2: movdqa b7=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a7 <<= 18 +# asm 1: pslld $18,b0=int6464#8,>b0=int6464#8 +# asm 2: pxor >b0=%xmm7,>b0=%xmm7 +pxor %xmm7,%xmm7 + +# qhasm: uint32323232 b7 >>= 14 +# asm 1: psrld $14, +ja ._mainloop2 + +# qhasm: uint32323232 diag0 += x0 +# asm 1: paddd in0=int64#4 +# asm 2: movd in0=%rcx +movd %xmm0,%rcx + +# qhasm: in12 = diag1 +# asm 1: movd in12=int64#5 +# asm 2: movd in12=%r8 +movd %xmm1,%r8 + +# qhasm: in8 = diag2 +# asm 1: movd in8=int64#6 +# asm 2: movd in8=%r9 +movd %xmm2,%r9 + +# qhasm: in4 = diag3 +# asm 1: movd in4=int64#7 +# asm 2: movd in4=%rax +movd %xmm3,%rax + +# qhasm: diag0 <<<= 96 +# asm 1: pshufd $0x39,in5=int64#4 +# asm 2: movd in5=%rcx +movd %xmm0,%rcx + +# qhasm: in1 = diag1 +# asm 1: movd in1=int64#5 +# asm 2: movd in1=%r8 +movd %xmm1,%r8 + +# qhasm: in13 = diag2 +# asm 1: movd in13=int64#6 +# asm 2: movd in13=%r9 +movd %xmm2,%r9 + +# qhasm: in9 = diag3 +# asm 1: movd in9=int64#7 +# asm 2: movd in9=%rax +movd %xmm3,%rax + +# qhasm: diag0 <<<= 96 +# asm 1: pshufd $0x39,in10=int64#4 +# asm 2: movd in10=%rcx +movd %xmm0,%rcx + +# qhasm: in6 = diag1 +# asm 1: movd in6=int64#5 +# asm 2: movd in6=%r8 +movd %xmm1,%r8 + +# qhasm: in2 = diag2 +# asm 1: movd in2=int64#6 +# asm 2: movd in2=%r9 +movd %xmm2,%r9 + +# qhasm: in14 = diag3 +# asm 1: movd in14=int64#7 +# asm 2: movd in14=%rax +movd %xmm3,%rax + +# qhasm: diag0 <<<= 96 +# asm 1: pshufd $0x39,in15=int64#4 +# asm 2: movd in15=%rcx +movd %xmm0,%rcx + +# qhasm: in11 = diag1 +# asm 1: movd in11=int64#5 +# asm 2: movd in11=%r8 +movd %xmm1,%r8 + +# qhasm: in7 = diag2 +# asm 1: movd in7=int64#6 +# asm 2: movd in7=%r9 +movd %xmm2,%r9 + +# qhasm: in3 = diag3 +# asm 1: movd in3=int64#7 +# asm 2: movd in3=%rax +movd %xmm3,%rax + +# qhasm: (uint32) in15 ^= *(uint32 *) (m + 60) +# asm 1: xorl 60(bytes=int64#6 +# asm 2: movq bytes=%r9 +movq 408(%rsp),%r9 + +# qhasm: in8 = ((uint32 *)&x2)[0] +# asm 1: movl in8=int64#4d +# asm 2: movl in8=%ecx +movl 16(%rsp),%ecx + +# qhasm: in9 = ((uint32 *)&x3)[1] +# asm 1: movl 4+in9=int64#5d +# asm 2: movl 4+in9=%r8d +movl 4+32(%rsp),%r8d + +# qhasm: in8 += 1 +# asm 1: add $1,in9=int64#5 +# asm 2: mov in9=%r8 +mov %rcx,%r8 + +# qhasm: (uint64) in9 >>= 32 +# asm 1: shr $32,x2=stack128#2 +# asm 2: movl x2=16(%rsp) +movl %ecx,16(%rsp) + +# qhasm: ((uint32 *)&x3)[1] = in9 +# asm 1: movl ? unsigned +ja ._bytesatleast65 +# comment:fp stack unchanged by jump + +# qhasm: goto bytesatleast64 if !unsigned< +jae ._bytesatleast64 + +# qhasm: m = out +# asm 1: mov m=int64#2 +# asm 2: mov m=%rsi +mov %rdi,%rsi + +# qhasm: out = ctarget +# asm 1: mov out=int64#1 +# asm 2: mov out=%rdi +mov %rdx,%rdi + +# qhasm: i = bytes +# asm 1: mov i=int64#4 +# asm 2: mov i=%rcx +mov %r9,%rcx + +# qhasm: while (i) { *out++ = *m++; --i } +rep movsb +# comment:fp stack unchanged by fallthrough + +# qhasm: bytesatleast64: +._bytesatleast64: +# comment:fp stack unchanged by fallthrough + +# qhasm: done: +._done: + +# qhasm: r11_caller = r11_stack +# asm 1: movq r11_caller=int64#9 +# asm 2: movq r11_caller=%r11 +movq 352(%rsp),%r11 + +# qhasm: r12_caller = r12_stack +# asm 1: movq r12_caller=int64#10 +# asm 2: movq r12_caller=%r12 +movq 360(%rsp),%r12 + +# qhasm: r13_caller = r13_stack +# asm 1: movq r13_caller=int64#11 +# asm 2: movq r13_caller=%r13 +movq 368(%rsp),%r13 + +# qhasm: r14_caller = r14_stack +# asm 1: movq r14_caller=int64#12 +# asm 2: movq r14_caller=%r14 +movq 376(%rsp),%r14 + +# qhasm: r15_caller = r15_stack +# asm 1: movq r15_caller=int64#13 +# asm 2: movq r15_caller=%r15 +movq 384(%rsp),%r15 + +# qhasm: rbx_caller = rbx_stack +# asm 1: movq rbx_caller=int64#14 +# asm 2: movq rbx_caller=%rbx +movq 392(%rsp),%rbx + +# qhasm: rbp_caller = rbp_stack +# asm 1: movq rbp_caller=int64#15 +# asm 2: movq rbp_caller=%rbp +movq 400(%rsp),%rbp + +# qhasm: leave +add %r11,%rsp +xor %rax,%rax +xor %rdx,%rdx +ret + +# qhasm: bytesatleast65: +._bytesatleast65: + +# qhasm: bytes -= 64 +# asm 1: sub $64,= 64) { + crypto_core_salsa208(c,in,k,sigma); + + u = 1; + for (i = 8;i < 16;++i) { + u += (unsigned int) in[i]; + in[i] = u; + u >>= 8; + } + + clen -= 64; + c += 64; + } + + if (clen) { + crypto_core_salsa208(block,in,k,sigma); + for (i = 0;i < clen;++i) c[i] = block[i]; + } + return 0; +} diff --git a/nacl/crypto_stream/salsa208/ref/xor.c b/nacl/crypto_stream/salsa208/ref/xor.c new file mode 100644 index 00000000..c017ac42 --- /dev/null +++ b/nacl/crypto_stream/salsa208/ref/xor.c @@ -0,0 +1,52 @@ +/* +version 20080913 +D. J. Bernstein +Public domain. +*/ + +#include "crypto_core_salsa208.h" +#include "crypto_stream.h" + +typedef unsigned int uint32; + +static const unsigned char sigma[16] = "expand 32-byte k"; + +int crypto_stream_xor( + unsigned char *c, + const unsigned char *m,unsigned long long mlen, + const unsigned char *n, + const unsigned char *k +) +{ + unsigned char in[16]; + unsigned char block[64]; + int i; + unsigned int u; + + if (!mlen) return 0; + + for (i = 0;i < 8;++i) in[i] = n[i]; + for (i = 8;i < 16;++i) in[i] = 0; + + while (mlen >= 64) { + crypto_core_salsa208(block,in,k,sigma); + for (i = 0;i < 64;++i) c[i] = m[i] ^ block[i]; + + u = 1; + for (i = 8;i < 16;++i) { + u += (unsigned int) in[i]; + in[i] = u; + u >>= 8; + } + + mlen -= 64; + c += 64; + m += 64; + } + + if (mlen) { + crypto_core_salsa208(block,in,k,sigma); + for (i = 0;i < mlen;++i) c[i] = m[i] ^ block[i]; + } + return 0; +} diff --git a/nacl/crypto_stream/salsa208/used b/nacl/crypto_stream/salsa208/used new file mode 100644 index 00000000..e69de29b diff --git a/nacl/crypto_stream/salsa208/x86_xmm5/api.h b/nacl/crypto_stream/salsa208/x86_xmm5/api.h new file mode 100644 index 00000000..c2b18461 --- /dev/null +++ b/nacl/crypto_stream/salsa208/x86_xmm5/api.h @@ -0,0 +1,2 @@ +#define CRYPTO_KEYBYTES 32 +#define CRYPTO_NONCEBYTES 8 diff --git a/nacl/crypto_stream/salsa208/x86_xmm5/implementors b/nacl/crypto_stream/salsa208/x86_xmm5/implementors new file mode 100644 index 00000000..f6fb3c73 --- /dev/null +++ b/nacl/crypto_stream/salsa208/x86_xmm5/implementors @@ -0,0 +1 @@ +Daniel J. Bernstein diff --git a/nacl/crypto_stream/salsa208/x86_xmm5/stream.s b/nacl/crypto_stream/salsa208/x86_xmm5/stream.s new file mode 100644 index 00000000..065253a8 --- /dev/null +++ b/nacl/crypto_stream/salsa208/x86_xmm5/stream.s @@ -0,0 +1,5078 @@ + +# qhasm: int32 a + +# qhasm: stack32 arg1 + +# qhasm: stack32 arg2 + +# qhasm: stack32 arg3 + +# qhasm: stack32 arg4 + +# qhasm: stack32 arg5 + +# qhasm: stack32 arg6 + +# qhasm: input arg1 + +# qhasm: input arg2 + +# qhasm: input arg3 + +# qhasm: input arg4 + +# qhasm: input arg5 + +# qhasm: input arg6 + +# qhasm: int32 eax + +# qhasm: int32 ebx + +# qhasm: int32 esi + +# qhasm: int32 edi + +# qhasm: int32 ebp + +# qhasm: caller eax + +# qhasm: caller ebx + +# qhasm: caller esi + +# qhasm: caller edi + +# qhasm: caller ebp + +# qhasm: int32 k + +# qhasm: int32 kbits + +# qhasm: int32 iv + +# qhasm: int32 i + +# qhasm: stack128 x0 + +# qhasm: stack128 x1 + +# qhasm: stack128 x2 + +# qhasm: stack128 x3 + +# qhasm: int32 m + +# qhasm: stack32 out_stack + +# qhasm: int32 out + +# qhasm: stack32 bytes_stack + +# qhasm: int32 bytes + +# qhasm: stack32 eax_stack + +# qhasm: stack32 ebx_stack + +# qhasm: stack32 esi_stack + +# qhasm: stack32 edi_stack + +# qhasm: stack32 ebp_stack + +# qhasm: int6464 diag0 + +# qhasm: int6464 diag1 + +# qhasm: int6464 diag2 + +# qhasm: int6464 diag3 + +# qhasm: int6464 a0 + +# qhasm: int6464 a1 + +# qhasm: int6464 a2 + +# qhasm: int6464 a3 + +# qhasm: int6464 a4 + +# qhasm: int6464 a5 + +# qhasm: int6464 a6 + +# qhasm: int6464 a7 + +# qhasm: int6464 b0 + +# qhasm: int6464 b1 + +# qhasm: int6464 b2 + +# qhasm: int6464 b3 + +# qhasm: int6464 b4 + +# qhasm: int6464 b5 + +# qhasm: int6464 b6 + +# qhasm: int6464 b7 + +# qhasm: int6464 z0 + +# qhasm: int6464 z1 + +# qhasm: int6464 z2 + +# qhasm: int6464 z3 + +# qhasm: int6464 z4 + +# qhasm: int6464 z5 + +# qhasm: int6464 z6 + +# qhasm: int6464 z7 + +# qhasm: int6464 z8 + +# qhasm: int6464 z9 + +# qhasm: int6464 z10 + +# qhasm: int6464 z11 + +# qhasm: int6464 z12 + +# qhasm: int6464 z13 + +# qhasm: int6464 z14 + +# qhasm: int6464 z15 + +# qhasm: stack128 z0_stack + +# qhasm: stack128 z1_stack + +# qhasm: stack128 z2_stack + +# qhasm: stack128 z3_stack + +# qhasm: stack128 z4_stack + +# qhasm: stack128 z5_stack + +# qhasm: stack128 z6_stack + +# qhasm: stack128 z7_stack + +# qhasm: stack128 z8_stack + +# qhasm: stack128 z9_stack + +# qhasm: stack128 z10_stack + +# qhasm: stack128 z11_stack + +# qhasm: stack128 z12_stack + +# qhasm: stack128 z13_stack + +# qhasm: stack128 z14_stack + +# qhasm: stack128 z15_stack + +# qhasm: stack128 orig0 + +# qhasm: stack128 orig1 + +# qhasm: stack128 orig2 + +# qhasm: stack128 orig3 + +# qhasm: stack128 orig4 + +# qhasm: stack128 orig5 + +# qhasm: stack128 orig6 + +# qhasm: stack128 orig7 + +# qhasm: stack128 orig8 + +# qhasm: stack128 orig9 + +# qhasm: stack128 orig10 + +# qhasm: stack128 orig11 + +# qhasm: stack128 orig12 + +# qhasm: stack128 orig13 + +# qhasm: stack128 orig14 + +# qhasm: stack128 orig15 + +# qhasm: int6464 p + +# qhasm: int6464 q + +# qhasm: int6464 r + +# qhasm: int6464 s + +# qhasm: int6464 t + +# qhasm: int6464 u + +# qhasm: int6464 v + +# qhasm: int6464 w + +# qhasm: int6464 mp + +# qhasm: int6464 mq + +# qhasm: int6464 mr + +# qhasm: int6464 ms + +# qhasm: int6464 mt + +# qhasm: int6464 mu + +# qhasm: int6464 mv + +# qhasm: int6464 mw + +# qhasm: int32 in0 + +# qhasm: int32 in1 + +# qhasm: int32 in2 + +# qhasm: int32 in3 + +# qhasm: int32 in4 + +# qhasm: int32 in5 + +# qhasm: int32 in6 + +# qhasm: int32 in7 + +# qhasm: int32 in8 + +# qhasm: int32 in9 + +# qhasm: int32 in10 + +# qhasm: int32 in11 + +# qhasm: int32 in12 + +# qhasm: int32 in13 + +# qhasm: int32 in14 + +# qhasm: int32 in15 + +# qhasm: stack512 tmp + +# qhasm: stack32 ctarget + +# qhasm: enter crypto_stream_salsa208_x86_xmm5 +.text +.p2align 5 +.globl _crypto_stream_salsa208_x86_xmm5 +.globl crypto_stream_salsa208_x86_xmm5 +_crypto_stream_salsa208_x86_xmm5: +crypto_stream_salsa208_x86_xmm5: +mov %esp,%eax +and $31,%eax +add $704,%eax +sub %eax,%esp + +# qhasm: eax_stack = eax +# asm 1: movl eax_stack=stack32#1 +# asm 2: movl eax_stack=0(%esp) +movl %eax,0(%esp) + +# qhasm: ebx_stack = ebx +# asm 1: movl ebx_stack=stack32#2 +# asm 2: movl ebx_stack=4(%esp) +movl %ebx,4(%esp) + +# qhasm: esi_stack = esi +# asm 1: movl esi_stack=stack32#3 +# asm 2: movl esi_stack=8(%esp) +movl %esi,8(%esp) + +# qhasm: edi_stack = edi +# asm 1: movl edi_stack=stack32#4 +# asm 2: movl edi_stack=12(%esp) +movl %edi,12(%esp) + +# qhasm: ebp_stack = ebp +# asm 1: movl ebp_stack=stack32#5 +# asm 2: movl ebp_stack=16(%esp) +movl %ebp,16(%esp) + +# qhasm: bytes = arg2 +# asm 1: movl bytes=int32#3 +# asm 2: movl bytes=%edx +movl 8(%esp,%eax),%edx + +# qhasm: out = arg1 +# asm 1: movl out=int32#6 +# asm 2: movl out=%edi +movl 4(%esp,%eax),%edi + +# qhasm: m = out +# asm 1: mov m=int32#5 +# asm 2: mov m=%esi +mov %edi,%esi + +# qhasm: iv = arg4 +# asm 1: movl iv=int32#4 +# asm 2: movl iv=%ebx +movl 16(%esp,%eax),%ebx + +# qhasm: k = arg5 +# asm 1: movl k=int32#7 +# asm 2: movl k=%ebp +movl 20(%esp,%eax),%ebp + +# qhasm: unsigned>? bytes - 0 +# asm 1: cmp $0, +jbe ._done + +# qhasm: a = 0 +# asm 1: mov $0,>a=int32#1 +# asm 2: mov $0,>a=%eax +mov $0,%eax + +# qhasm: i = bytes +# asm 1: mov i=int32#2 +# asm 2: mov i=%ecx +mov %edx,%ecx + +# qhasm: while (i) { *out++ = a; --i } +rep stosb + +# qhasm: out -= bytes +# asm 1: subl eax_stack=stack32#1 +# asm 2: movl eax_stack=0(%esp) +movl %eax,0(%esp) + +# qhasm: ebx_stack = ebx +# asm 1: movl ebx_stack=stack32#2 +# asm 2: movl ebx_stack=4(%esp) +movl %ebx,4(%esp) + +# qhasm: esi_stack = esi +# asm 1: movl esi_stack=stack32#3 +# asm 2: movl esi_stack=8(%esp) +movl %esi,8(%esp) + +# qhasm: edi_stack = edi +# asm 1: movl edi_stack=stack32#4 +# asm 2: movl edi_stack=12(%esp) +movl %edi,12(%esp) + +# qhasm: ebp_stack = ebp +# asm 1: movl ebp_stack=stack32#5 +# asm 2: movl ebp_stack=16(%esp) +movl %ebp,16(%esp) + +# qhasm: out = arg1 +# asm 1: movl out=int32#6 +# asm 2: movl out=%edi +movl 4(%esp,%eax),%edi + +# qhasm: m = arg2 +# asm 1: movl m=int32#5 +# asm 2: movl m=%esi +movl 8(%esp,%eax),%esi + +# qhasm: bytes = arg3 +# asm 1: movl bytes=int32#3 +# asm 2: movl bytes=%edx +movl 12(%esp,%eax),%edx + +# qhasm: iv = arg5 +# asm 1: movl iv=int32#4 +# asm 2: movl iv=%ebx +movl 20(%esp,%eax),%ebx + +# qhasm: k = arg6 +# asm 1: movl k=int32#7 +# asm 2: movl k=%ebp +movl 24(%esp,%eax),%ebp + +# qhasm: unsigned>? bytes - 0 +# asm 1: cmp $0, +jbe ._done +# comment:fp stack unchanged by fallthrough + +# qhasm: start: +._start: + +# qhasm: out_stack = out +# asm 1: movl out_stack=stack32#6 +# asm 2: movl out_stack=20(%esp) +movl %edi,20(%esp) + +# qhasm: bytes_stack = bytes +# asm 1: movl bytes_stack=stack32#7 +# asm 2: movl bytes_stack=24(%esp) +movl %edx,24(%esp) + +# qhasm: in4 = *(uint32 *) (k + 12) +# asm 1: movl 12(in4=int32#1 +# asm 2: movl 12(in4=%eax +movl 12(%ebp),%eax + +# qhasm: in12 = *(uint32 *) (k + 20) +# asm 1: movl 20(in12=int32#2 +# asm 2: movl 20(in12=%ecx +movl 20(%ebp),%ecx + +# qhasm: ((uint32 *)&x3)[0] = in4 +# asm 1: movl x3=stack128#1 +# asm 2: movl x3=32(%esp) +movl %eax,32(%esp) + +# qhasm: ((uint32 *)&x1)[0] = in12 +# asm 1: movl x1=stack128#2 +# asm 2: movl x1=48(%esp) +movl %ecx,48(%esp) + +# qhasm: in0 = 1634760805 +# asm 1: mov $1634760805,>in0=int32#1 +# asm 2: mov $1634760805,>in0=%eax +mov $1634760805,%eax + +# qhasm: in8 = 0 +# asm 1: mov $0,>in8=int32#2 +# asm 2: mov $0,>in8=%ecx +mov $0,%ecx + +# qhasm: ((uint32 *)&x0)[0] = in0 +# asm 1: movl x0=stack128#3 +# asm 2: movl x0=64(%esp) +movl %eax,64(%esp) + +# qhasm: ((uint32 *)&x2)[0] = in8 +# asm 1: movl x2=stack128#4 +# asm 2: movl x2=80(%esp) +movl %ecx,80(%esp) + +# qhasm: in6 = *(uint32 *) (iv + 0) +# asm 1: movl 0(in6=int32#1 +# asm 2: movl 0(in6=%eax +movl 0(%ebx),%eax + +# qhasm: in7 = *(uint32 *) (iv + 4) +# asm 1: movl 4(in7=int32#2 +# asm 2: movl 4(in7=%ecx +movl 4(%ebx),%ecx + +# qhasm: ((uint32 *)&x1)[2] = in6 +# asm 1: movl in9=int32#1 +# asm 2: mov $0,>in9=%eax +mov $0,%eax + +# qhasm: in10 = 2036477234 +# asm 1: mov $2036477234,>in10=int32#2 +# asm 2: mov $2036477234,>in10=%ecx +mov $2036477234,%ecx + +# qhasm: ((uint32 *)&x3)[1] = in9 +# asm 1: movl in1=int32#1 +# asm 2: movl 0(in1=%eax +movl 0(%ebp),%eax + +# qhasm: in2 = *(uint32 *) (k + 4) +# asm 1: movl 4(in2=int32#2 +# asm 2: movl 4(in2=%ecx +movl 4(%ebp),%ecx + +# qhasm: in3 = *(uint32 *) (k + 8) +# asm 1: movl 8(in3=int32#3 +# asm 2: movl 8(in3=%edx +movl 8(%ebp),%edx + +# qhasm: in5 = 857760878 +# asm 1: mov $857760878,>in5=int32#4 +# asm 2: mov $857760878,>in5=%ebx +mov $857760878,%ebx + +# qhasm: ((uint32 *)&x1)[1] = in1 +# asm 1: movl in11=int32#1 +# asm 2: movl 16(in11=%eax +movl 16(%ebp),%eax + +# qhasm: in13 = *(uint32 *) (k + 24) +# asm 1: movl 24(in13=int32#2 +# asm 2: movl 24(in13=%ecx +movl 24(%ebp),%ecx + +# qhasm: in14 = *(uint32 *) (k + 28) +# asm 1: movl 28(in14=int32#3 +# asm 2: movl 28(in14=%edx +movl 28(%ebp),%edx + +# qhasm: in15 = 1797285236 +# asm 1: mov $1797285236,>in15=int32#4 +# asm 2: mov $1797285236,>in15=%ebx +mov $1797285236,%ebx + +# qhasm: ((uint32 *)&x1)[3] = in11 +# asm 1: movl bytes=int32#1 +# asm 2: movl bytes=%eax +movl 24(%esp),%eax + +# qhasm: unsignedz0=int6464#1 +# asm 2: movdqa z0=%xmm0 +movdqa 64(%esp),%xmm0 + +# qhasm: z5 = z0[1,1,1,1] +# asm 1: pshufd $0x55,z5=int6464#2 +# asm 2: pshufd $0x55,z5=%xmm1 +pshufd $0x55,%xmm0,%xmm1 + +# qhasm: z10 = z0[2,2,2,2] +# asm 1: pshufd $0xaa,z10=int6464#3 +# asm 2: pshufd $0xaa,z10=%xmm2 +pshufd $0xaa,%xmm0,%xmm2 + +# qhasm: z15 = z0[3,3,3,3] +# asm 1: pshufd $0xff,z15=int6464#4 +# asm 2: pshufd $0xff,z15=%xmm3 +pshufd $0xff,%xmm0,%xmm3 + +# qhasm: z0 = z0[0,0,0,0] +# asm 1: pshufd $0x00,z0=int6464#1 +# asm 2: pshufd $0x00,z0=%xmm0 +pshufd $0x00,%xmm0,%xmm0 + +# qhasm: orig5 = z5 +# asm 1: movdqa orig5=stack128#5 +# asm 2: movdqa orig5=96(%esp) +movdqa %xmm1,96(%esp) + +# qhasm: orig10 = z10 +# asm 1: movdqa orig10=stack128#6 +# asm 2: movdqa orig10=112(%esp) +movdqa %xmm2,112(%esp) + +# qhasm: orig15 = z15 +# asm 1: movdqa orig15=stack128#7 +# asm 2: movdqa orig15=128(%esp) +movdqa %xmm3,128(%esp) + +# qhasm: orig0 = z0 +# asm 1: movdqa orig0=stack128#8 +# asm 2: movdqa orig0=144(%esp) +movdqa %xmm0,144(%esp) + +# qhasm: z1 = x1 +# asm 1: movdqa z1=int6464#1 +# asm 2: movdqa z1=%xmm0 +movdqa 48(%esp),%xmm0 + +# qhasm: z6 = z1[2,2,2,2] +# asm 1: pshufd $0xaa,z6=int6464#2 +# asm 2: pshufd $0xaa,z6=%xmm1 +pshufd $0xaa,%xmm0,%xmm1 + +# qhasm: z11 = z1[3,3,3,3] +# asm 1: pshufd $0xff,z11=int6464#3 +# asm 2: pshufd $0xff,z11=%xmm2 +pshufd $0xff,%xmm0,%xmm2 + +# qhasm: z12 = z1[0,0,0,0] +# asm 1: pshufd $0x00,z12=int6464#4 +# asm 2: pshufd $0x00,z12=%xmm3 +pshufd $0x00,%xmm0,%xmm3 + +# qhasm: z1 = z1[1,1,1,1] +# asm 1: pshufd $0x55,z1=int6464#1 +# asm 2: pshufd $0x55,z1=%xmm0 +pshufd $0x55,%xmm0,%xmm0 + +# qhasm: orig6 = z6 +# asm 1: movdqa orig6=stack128#9 +# asm 2: movdqa orig6=160(%esp) +movdqa %xmm1,160(%esp) + +# qhasm: orig11 = z11 +# asm 1: movdqa orig11=stack128#10 +# asm 2: movdqa orig11=176(%esp) +movdqa %xmm2,176(%esp) + +# qhasm: orig12 = z12 +# asm 1: movdqa orig12=stack128#11 +# asm 2: movdqa orig12=192(%esp) +movdqa %xmm3,192(%esp) + +# qhasm: orig1 = z1 +# asm 1: movdqa orig1=stack128#12 +# asm 2: movdqa orig1=208(%esp) +movdqa %xmm0,208(%esp) + +# qhasm: z2 = x2 +# asm 1: movdqa z2=int6464#1 +# asm 2: movdqa z2=%xmm0 +movdqa 80(%esp),%xmm0 + +# qhasm: z7 = z2[3,3,3,3] +# asm 1: pshufd $0xff,z7=int6464#2 +# asm 2: pshufd $0xff,z7=%xmm1 +pshufd $0xff,%xmm0,%xmm1 + +# qhasm: z13 = z2[1,1,1,1] +# asm 1: pshufd $0x55,z13=int6464#3 +# asm 2: pshufd $0x55,z13=%xmm2 +pshufd $0x55,%xmm0,%xmm2 + +# qhasm: z2 = z2[2,2,2,2] +# asm 1: pshufd $0xaa,z2=int6464#1 +# asm 2: pshufd $0xaa,z2=%xmm0 +pshufd $0xaa,%xmm0,%xmm0 + +# qhasm: orig7 = z7 +# asm 1: movdqa orig7=stack128#13 +# asm 2: movdqa orig7=224(%esp) +movdqa %xmm1,224(%esp) + +# qhasm: orig13 = z13 +# asm 1: movdqa orig13=stack128#14 +# asm 2: movdqa orig13=240(%esp) +movdqa %xmm2,240(%esp) + +# qhasm: orig2 = z2 +# asm 1: movdqa orig2=stack128#15 +# asm 2: movdqa orig2=256(%esp) +movdqa %xmm0,256(%esp) + +# qhasm: z3 = x3 +# asm 1: movdqa z3=int6464#1 +# asm 2: movdqa z3=%xmm0 +movdqa 32(%esp),%xmm0 + +# qhasm: z4 = z3[0,0,0,0] +# asm 1: pshufd $0x00,z4=int6464#2 +# asm 2: pshufd $0x00,z4=%xmm1 +pshufd $0x00,%xmm0,%xmm1 + +# qhasm: z14 = z3[2,2,2,2] +# asm 1: pshufd $0xaa,z14=int6464#3 +# asm 2: pshufd $0xaa,z14=%xmm2 +pshufd $0xaa,%xmm0,%xmm2 + +# qhasm: z3 = z3[3,3,3,3] +# asm 1: pshufd $0xff,z3=int6464#1 +# asm 2: pshufd $0xff,z3=%xmm0 +pshufd $0xff,%xmm0,%xmm0 + +# qhasm: orig4 = z4 +# asm 1: movdqa orig4=stack128#16 +# asm 2: movdqa orig4=272(%esp) +movdqa %xmm1,272(%esp) + +# qhasm: orig14 = z14 +# asm 1: movdqa orig14=stack128#17 +# asm 2: movdqa orig14=288(%esp) +movdqa %xmm2,288(%esp) + +# qhasm: orig3 = z3 +# asm 1: movdqa orig3=stack128#18 +# asm 2: movdqa orig3=304(%esp) +movdqa %xmm0,304(%esp) + +# qhasm: bytesatleast256: +._bytesatleast256: + +# qhasm: in8 = ((uint32 *)&x2)[0] +# asm 1: movl in8=int32#2 +# asm 2: movl in8=%ecx +movl 80(%esp),%ecx + +# qhasm: in9 = ((uint32 *)&x3)[1] +# asm 1: movl 4+in9=int32#3 +# asm 2: movl 4+in9=%edx +movl 4+32(%esp),%edx + +# qhasm: ((uint32 *) &orig8)[0] = in8 +# asm 1: movl orig8=stack128#19 +# asm 2: movl orig8=320(%esp) +movl %ecx,320(%esp) + +# qhasm: ((uint32 *) &orig9)[0] = in9 +# asm 1: movl orig9=stack128#20 +# asm 2: movl orig9=336(%esp) +movl %edx,336(%esp) + +# qhasm: carry? in8 += 1 +# asm 1: add $1,x2=stack128#4 +# asm 2: movl x2=80(%esp) +movl %ecx,80(%esp) + +# qhasm: ((uint32 *)&x3)[1] = in9 +# asm 1: movl bytes_stack=stack32#7 +# asm 2: movl bytes_stack=24(%esp) +movl %eax,24(%esp) + +# qhasm: i = 8 +# asm 1: mov $8,>i=int32#1 +# asm 2: mov $8,>i=%eax +mov $8,%eax + +# qhasm: z5 = orig5 +# asm 1: movdqa z5=int6464#1 +# asm 2: movdqa z5=%xmm0 +movdqa 96(%esp),%xmm0 + +# qhasm: z10 = orig10 +# asm 1: movdqa z10=int6464#2 +# asm 2: movdqa z10=%xmm1 +movdqa 112(%esp),%xmm1 + +# qhasm: z15 = orig15 +# asm 1: movdqa z15=int6464#3 +# asm 2: movdqa z15=%xmm2 +movdqa 128(%esp),%xmm2 + +# qhasm: z14 = orig14 +# asm 1: movdqa z14=int6464#4 +# asm 2: movdqa z14=%xmm3 +movdqa 288(%esp),%xmm3 + +# qhasm: z3 = orig3 +# asm 1: movdqa z3=int6464#5 +# asm 2: movdqa z3=%xmm4 +movdqa 304(%esp),%xmm4 + +# qhasm: z6 = orig6 +# asm 1: movdqa z6=int6464#6 +# asm 2: movdqa z6=%xmm5 +movdqa 160(%esp),%xmm5 + +# qhasm: z11 = orig11 +# asm 1: movdqa z11=int6464#7 +# asm 2: movdqa z11=%xmm6 +movdqa 176(%esp),%xmm6 + +# qhasm: z1 = orig1 +# asm 1: movdqa z1=int6464#8 +# asm 2: movdqa z1=%xmm7 +movdqa 208(%esp),%xmm7 + +# qhasm: z5_stack = z5 +# asm 1: movdqa z5_stack=stack128#21 +# asm 2: movdqa z5_stack=352(%esp) +movdqa %xmm0,352(%esp) + +# qhasm: z10_stack = z10 +# asm 1: movdqa z10_stack=stack128#22 +# asm 2: movdqa z10_stack=368(%esp) +movdqa %xmm1,368(%esp) + +# qhasm: z15_stack = z15 +# asm 1: movdqa z15_stack=stack128#23 +# asm 2: movdqa z15_stack=384(%esp) +movdqa %xmm2,384(%esp) + +# qhasm: z14_stack = z14 +# asm 1: movdqa z14_stack=stack128#24 +# asm 2: movdqa z14_stack=400(%esp) +movdqa %xmm3,400(%esp) + +# qhasm: z3_stack = z3 +# asm 1: movdqa z3_stack=stack128#25 +# asm 2: movdqa z3_stack=416(%esp) +movdqa %xmm4,416(%esp) + +# qhasm: z6_stack = z6 +# asm 1: movdqa z6_stack=stack128#26 +# asm 2: movdqa z6_stack=432(%esp) +movdqa %xmm5,432(%esp) + +# qhasm: z11_stack = z11 +# asm 1: movdqa z11_stack=stack128#27 +# asm 2: movdqa z11_stack=448(%esp) +movdqa %xmm6,448(%esp) + +# qhasm: z1_stack = z1 +# asm 1: movdqa z1_stack=stack128#28 +# asm 2: movdqa z1_stack=464(%esp) +movdqa %xmm7,464(%esp) + +# qhasm: z7 = orig7 +# asm 1: movdqa z7=int6464#5 +# asm 2: movdqa z7=%xmm4 +movdqa 224(%esp),%xmm4 + +# qhasm: z13 = orig13 +# asm 1: movdqa z13=int6464#6 +# asm 2: movdqa z13=%xmm5 +movdqa 240(%esp),%xmm5 + +# qhasm: z2 = orig2 +# asm 1: movdqa z2=int6464#7 +# asm 2: movdqa z2=%xmm6 +movdqa 256(%esp),%xmm6 + +# qhasm: z9 = orig9 +# asm 1: movdqa z9=int6464#8 +# asm 2: movdqa z9=%xmm7 +movdqa 336(%esp),%xmm7 + +# qhasm: p = orig0 +# asm 1: movdqa p=int6464#1 +# asm 2: movdqa p=%xmm0 +movdqa 144(%esp),%xmm0 + +# qhasm: t = orig12 +# asm 1: movdqa t=int6464#3 +# asm 2: movdqa t=%xmm2 +movdqa 192(%esp),%xmm2 + +# qhasm: q = orig4 +# asm 1: movdqa q=int6464#4 +# asm 2: movdqa q=%xmm3 +movdqa 272(%esp),%xmm3 + +# qhasm: r = orig8 +# asm 1: movdqa r=int6464#2 +# asm 2: movdqa r=%xmm1 +movdqa 320(%esp),%xmm1 + +# qhasm: z7_stack = z7 +# asm 1: movdqa z7_stack=stack128#29 +# asm 2: movdqa z7_stack=480(%esp) +movdqa %xmm4,480(%esp) + +# qhasm: z13_stack = z13 +# asm 1: movdqa z13_stack=stack128#30 +# asm 2: movdqa z13_stack=496(%esp) +movdqa %xmm5,496(%esp) + +# qhasm: z2_stack = z2 +# asm 1: movdqa z2_stack=stack128#31 +# asm 2: movdqa z2_stack=512(%esp) +movdqa %xmm6,512(%esp) + +# qhasm: z9_stack = z9 +# asm 1: movdqa z9_stack=stack128#32 +# asm 2: movdqa z9_stack=528(%esp) +movdqa %xmm7,528(%esp) + +# qhasm: z0_stack = p +# asm 1: movdqa z0_stack=stack128#33 +# asm 2: movdqa z0_stack=544(%esp) +movdqa %xmm0,544(%esp) + +# qhasm: z12_stack = t +# asm 1: movdqa z12_stack=stack128#34 +# asm 2: movdqa z12_stack=560(%esp) +movdqa %xmm2,560(%esp) + +# qhasm: z4_stack = q +# asm 1: movdqa z4_stack=stack128#35 +# asm 2: movdqa z4_stack=576(%esp) +movdqa %xmm3,576(%esp) + +# qhasm: z8_stack = r +# asm 1: movdqa z8_stack=stack128#36 +# asm 2: movdqa z8_stack=592(%esp) +movdqa %xmm1,592(%esp) + +# qhasm: mainloop1: +._mainloop1: + +# qhasm: assign xmm0 to p + +# qhasm: assign xmm1 to r + +# qhasm: assign xmm2 to t + +# qhasm: assign xmm3 to q + +# qhasm: s = t +# asm 1: movdqa s=int6464#7 +# asm 2: movdqa s=%xmm6 +movdqa %xmm2,%xmm6 + +# qhasm: uint32323232 t += p +# asm 1: paddd u=int6464#5 +# asm 2: movdqa u=%xmm4 +movdqa %xmm2,%xmm4 + +# qhasm: uint32323232 t >>= 25 +# asm 1: psrld $25,z4_stack=stack128#33 +# asm 2: movdqa z4_stack=544(%esp) +movdqa %xmm3,544(%esp) + +# qhasm: t = p +# asm 1: movdqa t=int6464#3 +# asm 2: movdqa t=%xmm2 +movdqa %xmm0,%xmm2 + +# qhasm: uint32323232 t += q +# asm 1: paddd u=int6464#5 +# asm 2: movdqa u=%xmm4 +movdqa %xmm2,%xmm4 + +# qhasm: uint32323232 t >>= 23 +# asm 1: psrld $23,z8_stack=stack128#34 +# asm 2: movdqa z8_stack=560(%esp) +movdqa %xmm1,560(%esp) + +# qhasm: uint32323232 q += r +# asm 1: paddd u=int6464#3 +# asm 2: movdqa u=%xmm2 +movdqa %xmm3,%xmm2 + +# qhasm: uint32323232 q >>= 19 +# asm 1: psrld $19,mt=int6464#3 +# asm 2: movdqa mt=%xmm2 +movdqa 464(%esp),%xmm2 + +# qhasm: mp = z5_stack +# asm 1: movdqa mp=int6464#5 +# asm 2: movdqa mp=%xmm4 +movdqa 352(%esp),%xmm4 + +# qhasm: mq = z9_stack +# asm 1: movdqa mq=int6464#4 +# asm 2: movdqa mq=%xmm3 +movdqa 528(%esp),%xmm3 + +# qhasm: mr = z13_stack +# asm 1: movdqa mr=int6464#6 +# asm 2: movdqa mr=%xmm5 +movdqa 496(%esp),%xmm5 + +# qhasm: z12_stack = s +# asm 1: movdqa z12_stack=stack128#30 +# asm 2: movdqa z12_stack=496(%esp) +movdqa %xmm6,496(%esp) + +# qhasm: uint32323232 r += s +# asm 1: paddd u=int6464#7 +# asm 2: movdqa u=%xmm6 +movdqa %xmm1,%xmm6 + +# qhasm: uint32323232 r >>= 14 +# asm 1: psrld $14,z0_stack=stack128#21 +# asm 2: movdqa z0_stack=352(%esp) +movdqa %xmm0,352(%esp) + +# qhasm: assign xmm2 to mt + +# qhasm: assign xmm3 to mq + +# qhasm: assign xmm4 to mp + +# qhasm: assign xmm5 to mr + +# qhasm: ms = mt +# asm 1: movdqa ms=int6464#7 +# asm 2: movdqa ms=%xmm6 +movdqa %xmm2,%xmm6 + +# qhasm: uint32323232 mt += mp +# asm 1: paddd mu=int6464#1 +# asm 2: movdqa mu=%xmm0 +movdqa %xmm2,%xmm0 + +# qhasm: uint32323232 mt >>= 25 +# asm 1: psrld $25,z9_stack=stack128#32 +# asm 2: movdqa z9_stack=528(%esp) +movdqa %xmm3,528(%esp) + +# qhasm: mt = mp +# asm 1: movdqa mt=int6464#1 +# asm 2: movdqa mt=%xmm0 +movdqa %xmm4,%xmm0 + +# qhasm: uint32323232 mt += mq +# asm 1: paddd mu=int6464#2 +# asm 2: movdqa mu=%xmm1 +movdqa %xmm0,%xmm1 + +# qhasm: uint32323232 mt >>= 23 +# asm 1: psrld $23,z13_stack=stack128#35 +# asm 2: movdqa z13_stack=576(%esp) +movdqa %xmm5,576(%esp) + +# qhasm: uint32323232 mq += mr +# asm 1: paddd mu=int6464#1 +# asm 2: movdqa mu=%xmm0 +movdqa %xmm3,%xmm0 + +# qhasm: uint32323232 mq >>= 19 +# asm 1: psrld $19,t=int6464#3 +# asm 2: movdqa t=%xmm2 +movdqa 432(%esp),%xmm2 + +# qhasm: p = z10_stack +# asm 1: movdqa p=int6464#1 +# asm 2: movdqa p=%xmm0 +movdqa 368(%esp),%xmm0 + +# qhasm: q = z14_stack +# asm 1: movdqa q=int6464#4 +# asm 2: movdqa q=%xmm3 +movdqa 400(%esp),%xmm3 + +# qhasm: r = z2_stack +# asm 1: movdqa r=int6464#2 +# asm 2: movdqa r=%xmm1 +movdqa 512(%esp),%xmm1 + +# qhasm: z1_stack = ms +# asm 1: movdqa z1_stack=stack128#22 +# asm 2: movdqa z1_stack=368(%esp) +movdqa %xmm6,368(%esp) + +# qhasm: uint32323232 mr += ms +# asm 1: paddd mu=int6464#7 +# asm 2: movdqa mu=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 mr >>= 14 +# asm 1: psrld $14,z5_stack=stack128#24 +# asm 2: movdqa z5_stack=400(%esp) +movdqa %xmm4,400(%esp) + +# qhasm: assign xmm0 to p + +# qhasm: assign xmm1 to r + +# qhasm: assign xmm2 to t + +# qhasm: assign xmm3 to q + +# qhasm: s = t +# asm 1: movdqa s=int6464#7 +# asm 2: movdqa s=%xmm6 +movdqa %xmm2,%xmm6 + +# qhasm: uint32323232 t += p +# asm 1: paddd u=int6464#5 +# asm 2: movdqa u=%xmm4 +movdqa %xmm2,%xmm4 + +# qhasm: uint32323232 t >>= 25 +# asm 1: psrld $25,z14_stack=stack128#36 +# asm 2: movdqa z14_stack=592(%esp) +movdqa %xmm3,592(%esp) + +# qhasm: t = p +# asm 1: movdqa t=int6464#3 +# asm 2: movdqa t=%xmm2 +movdqa %xmm0,%xmm2 + +# qhasm: uint32323232 t += q +# asm 1: paddd u=int6464#5 +# asm 2: movdqa u=%xmm4 +movdqa %xmm2,%xmm4 + +# qhasm: uint32323232 t >>= 23 +# asm 1: psrld $23,z2_stack=stack128#26 +# asm 2: movdqa z2_stack=432(%esp) +movdqa %xmm1,432(%esp) + +# qhasm: uint32323232 q += r +# asm 1: paddd u=int6464#3 +# asm 2: movdqa u=%xmm2 +movdqa %xmm3,%xmm2 + +# qhasm: uint32323232 q >>= 19 +# asm 1: psrld $19,mt=int6464#3 +# asm 2: movdqa mt=%xmm2 +movdqa 448(%esp),%xmm2 + +# qhasm: mp = z15_stack +# asm 1: movdqa mp=int6464#5 +# asm 2: movdqa mp=%xmm4 +movdqa 384(%esp),%xmm4 + +# qhasm: mq = z3_stack +# asm 1: movdqa mq=int6464#4 +# asm 2: movdqa mq=%xmm3 +movdqa 416(%esp),%xmm3 + +# qhasm: mr = z7_stack +# asm 1: movdqa mr=int6464#6 +# asm 2: movdqa mr=%xmm5 +movdqa 480(%esp),%xmm5 + +# qhasm: z6_stack = s +# asm 1: movdqa z6_stack=stack128#23 +# asm 2: movdqa z6_stack=384(%esp) +movdqa %xmm6,384(%esp) + +# qhasm: uint32323232 r += s +# asm 1: paddd u=int6464#7 +# asm 2: movdqa u=%xmm6 +movdqa %xmm1,%xmm6 + +# qhasm: uint32323232 r >>= 14 +# asm 1: psrld $14,z10_stack=stack128#27 +# asm 2: movdqa z10_stack=448(%esp) +movdqa %xmm0,448(%esp) + +# qhasm: assign xmm2 to mt + +# qhasm: assign xmm3 to mq + +# qhasm: assign xmm4 to mp + +# qhasm: assign xmm5 to mr + +# qhasm: ms = mt +# asm 1: movdqa ms=int6464#7 +# asm 2: movdqa ms=%xmm6 +movdqa %xmm2,%xmm6 + +# qhasm: uint32323232 mt += mp +# asm 1: paddd mu=int6464#1 +# asm 2: movdqa mu=%xmm0 +movdqa %xmm2,%xmm0 + +# qhasm: uint32323232 mt >>= 25 +# asm 1: psrld $25,z3_stack=stack128#25 +# asm 2: movdqa z3_stack=416(%esp) +movdqa %xmm3,416(%esp) + +# qhasm: mt = mp +# asm 1: movdqa mt=int6464#1 +# asm 2: movdqa mt=%xmm0 +movdqa %xmm4,%xmm0 + +# qhasm: uint32323232 mt += mq +# asm 1: paddd mu=int6464#2 +# asm 2: movdqa mu=%xmm1 +movdqa %xmm0,%xmm1 + +# qhasm: uint32323232 mt >>= 23 +# asm 1: psrld $23,z7_stack=stack128#29 +# asm 2: movdqa z7_stack=480(%esp) +movdqa %xmm5,480(%esp) + +# qhasm: uint32323232 mq += mr +# asm 1: paddd mu=int6464#1 +# asm 2: movdqa mu=%xmm0 +movdqa %xmm3,%xmm0 + +# qhasm: uint32323232 mq >>= 19 +# asm 1: psrld $19,t=int6464#3 +# asm 2: movdqa t=%xmm2 +movdqa 416(%esp),%xmm2 + +# qhasm: p = z0_stack +# asm 1: movdqa p=int6464#1 +# asm 2: movdqa p=%xmm0 +movdqa 352(%esp),%xmm0 + +# qhasm: q = z1_stack +# asm 1: movdqa q=int6464#4 +# asm 2: movdqa q=%xmm3 +movdqa 368(%esp),%xmm3 + +# qhasm: r = z2_stack +# asm 1: movdqa r=int6464#2 +# asm 2: movdqa r=%xmm1 +movdqa 432(%esp),%xmm1 + +# qhasm: z11_stack = ms +# asm 1: movdqa z11_stack=stack128#21 +# asm 2: movdqa z11_stack=352(%esp) +movdqa %xmm6,352(%esp) + +# qhasm: uint32323232 mr += ms +# asm 1: paddd mu=int6464#7 +# asm 2: movdqa mu=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 mr >>= 14 +# asm 1: psrld $14,z15_stack=stack128#22 +# asm 2: movdqa z15_stack=368(%esp) +movdqa %xmm4,368(%esp) + +# qhasm: assign xmm0 to p + +# qhasm: assign xmm1 to r + +# qhasm: assign xmm2 to t + +# qhasm: assign xmm3 to q + +# qhasm: s = t +# asm 1: movdqa s=int6464#7 +# asm 2: movdqa s=%xmm6 +movdqa %xmm2,%xmm6 + +# qhasm: uint32323232 t += p +# asm 1: paddd u=int6464#5 +# asm 2: movdqa u=%xmm4 +movdqa %xmm2,%xmm4 + +# qhasm: uint32323232 t >>= 25 +# asm 1: psrld $25,z1_stack=stack128#28 +# asm 2: movdqa z1_stack=464(%esp) +movdqa %xmm3,464(%esp) + +# qhasm: t = p +# asm 1: movdqa t=int6464#3 +# asm 2: movdqa t=%xmm2 +movdqa %xmm0,%xmm2 + +# qhasm: uint32323232 t += q +# asm 1: paddd u=int6464#5 +# asm 2: movdqa u=%xmm4 +movdqa %xmm2,%xmm4 + +# qhasm: uint32323232 t >>= 23 +# asm 1: psrld $23,z2_stack=stack128#31 +# asm 2: movdqa z2_stack=512(%esp) +movdqa %xmm1,512(%esp) + +# qhasm: uint32323232 q += r +# asm 1: paddd u=int6464#3 +# asm 2: movdqa u=%xmm2 +movdqa %xmm3,%xmm2 + +# qhasm: uint32323232 q >>= 19 +# asm 1: psrld $19,mt=int6464#3 +# asm 2: movdqa mt=%xmm2 +movdqa 544(%esp),%xmm2 + +# qhasm: mp = z5_stack +# asm 1: movdqa mp=int6464#5 +# asm 2: movdqa mp=%xmm4 +movdqa 400(%esp),%xmm4 + +# qhasm: mq = z6_stack +# asm 1: movdqa mq=int6464#4 +# asm 2: movdqa mq=%xmm3 +movdqa 384(%esp),%xmm3 + +# qhasm: mr = z7_stack +# asm 1: movdqa mr=int6464#6 +# asm 2: movdqa mr=%xmm5 +movdqa 480(%esp),%xmm5 + +# qhasm: z3_stack = s +# asm 1: movdqa z3_stack=stack128#25 +# asm 2: movdqa z3_stack=416(%esp) +movdqa %xmm6,416(%esp) + +# qhasm: uint32323232 r += s +# asm 1: paddd u=int6464#7 +# asm 2: movdqa u=%xmm6 +movdqa %xmm1,%xmm6 + +# qhasm: uint32323232 r >>= 14 +# asm 1: psrld $14,z0_stack=stack128#33 +# asm 2: movdqa z0_stack=544(%esp) +movdqa %xmm0,544(%esp) + +# qhasm: assign xmm2 to mt + +# qhasm: assign xmm3 to mq + +# qhasm: assign xmm4 to mp + +# qhasm: assign xmm5 to mr + +# qhasm: ms = mt +# asm 1: movdqa ms=int6464#7 +# asm 2: movdqa ms=%xmm6 +movdqa %xmm2,%xmm6 + +# qhasm: uint32323232 mt += mp +# asm 1: paddd mu=int6464#1 +# asm 2: movdqa mu=%xmm0 +movdqa %xmm2,%xmm0 + +# qhasm: uint32323232 mt >>= 25 +# asm 1: psrld $25,z6_stack=stack128#26 +# asm 2: movdqa z6_stack=432(%esp) +movdqa %xmm3,432(%esp) + +# qhasm: mt = mp +# asm 1: movdqa mt=int6464#1 +# asm 2: movdqa mt=%xmm0 +movdqa %xmm4,%xmm0 + +# qhasm: uint32323232 mt += mq +# asm 1: paddd mu=int6464#2 +# asm 2: movdqa mu=%xmm1 +movdqa %xmm0,%xmm1 + +# qhasm: uint32323232 mt >>= 23 +# asm 1: psrld $23,z7_stack=stack128#29 +# asm 2: movdqa z7_stack=480(%esp) +movdqa %xmm5,480(%esp) + +# qhasm: uint32323232 mq += mr +# asm 1: paddd mu=int6464#1 +# asm 2: movdqa mu=%xmm0 +movdqa %xmm3,%xmm0 + +# qhasm: uint32323232 mq >>= 19 +# asm 1: psrld $19,t=int6464#3 +# asm 2: movdqa t=%xmm2 +movdqa 528(%esp),%xmm2 + +# qhasm: p = z10_stack +# asm 1: movdqa p=int6464#1 +# asm 2: movdqa p=%xmm0 +movdqa 448(%esp),%xmm0 + +# qhasm: q = z11_stack +# asm 1: movdqa q=int6464#4 +# asm 2: movdqa q=%xmm3 +movdqa 352(%esp),%xmm3 + +# qhasm: r = z8_stack +# asm 1: movdqa r=int6464#2 +# asm 2: movdqa r=%xmm1 +movdqa 560(%esp),%xmm1 + +# qhasm: z4_stack = ms +# asm 1: movdqa z4_stack=stack128#34 +# asm 2: movdqa z4_stack=560(%esp) +movdqa %xmm6,560(%esp) + +# qhasm: uint32323232 mr += ms +# asm 1: paddd mu=int6464#7 +# asm 2: movdqa mu=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 mr >>= 14 +# asm 1: psrld $14,z5_stack=stack128#21 +# asm 2: movdqa z5_stack=352(%esp) +movdqa %xmm4,352(%esp) + +# qhasm: assign xmm0 to p + +# qhasm: assign xmm1 to r + +# qhasm: assign xmm2 to t + +# qhasm: assign xmm3 to q + +# qhasm: s = t +# asm 1: movdqa s=int6464#7 +# asm 2: movdqa s=%xmm6 +movdqa %xmm2,%xmm6 + +# qhasm: uint32323232 t += p +# asm 1: paddd u=int6464#5 +# asm 2: movdqa u=%xmm4 +movdqa %xmm2,%xmm4 + +# qhasm: uint32323232 t >>= 25 +# asm 1: psrld $25,z11_stack=stack128#27 +# asm 2: movdqa z11_stack=448(%esp) +movdqa %xmm3,448(%esp) + +# qhasm: t = p +# asm 1: movdqa t=int6464#3 +# asm 2: movdqa t=%xmm2 +movdqa %xmm0,%xmm2 + +# qhasm: uint32323232 t += q +# asm 1: paddd u=int6464#5 +# asm 2: movdqa u=%xmm4 +movdqa %xmm2,%xmm4 + +# qhasm: uint32323232 t >>= 23 +# asm 1: psrld $23,z8_stack=stack128#37 +# asm 2: movdqa z8_stack=608(%esp) +movdqa %xmm1,608(%esp) + +# qhasm: uint32323232 q += r +# asm 1: paddd u=int6464#3 +# asm 2: movdqa u=%xmm2 +movdqa %xmm3,%xmm2 + +# qhasm: uint32323232 q >>= 19 +# asm 1: psrld $19,mt=int6464#3 +# asm 2: movdqa mt=%xmm2 +movdqa 592(%esp),%xmm2 + +# qhasm: mp = z15_stack +# asm 1: movdqa mp=int6464#5 +# asm 2: movdqa mp=%xmm4 +movdqa 368(%esp),%xmm4 + +# qhasm: mq = z12_stack +# asm 1: movdqa mq=int6464#4 +# asm 2: movdqa mq=%xmm3 +movdqa 496(%esp),%xmm3 + +# qhasm: mr = z13_stack +# asm 1: movdqa mr=int6464#6 +# asm 2: movdqa mr=%xmm5 +movdqa 576(%esp),%xmm5 + +# qhasm: z9_stack = s +# asm 1: movdqa z9_stack=stack128#32 +# asm 2: movdqa z9_stack=528(%esp) +movdqa %xmm6,528(%esp) + +# qhasm: uint32323232 r += s +# asm 1: paddd u=int6464#7 +# asm 2: movdqa u=%xmm6 +movdqa %xmm1,%xmm6 + +# qhasm: uint32323232 r >>= 14 +# asm 1: psrld $14,z10_stack=stack128#22 +# asm 2: movdqa z10_stack=368(%esp) +movdqa %xmm0,368(%esp) + +# qhasm: assign xmm2 to mt + +# qhasm: assign xmm3 to mq + +# qhasm: assign xmm4 to mp + +# qhasm: assign xmm5 to mr + +# qhasm: ms = mt +# asm 1: movdqa ms=int6464#7 +# asm 2: movdqa ms=%xmm6 +movdqa %xmm2,%xmm6 + +# qhasm: uint32323232 mt += mp +# asm 1: paddd mu=int6464#1 +# asm 2: movdqa mu=%xmm0 +movdqa %xmm2,%xmm0 + +# qhasm: uint32323232 mt >>= 25 +# asm 1: psrld $25,z12_stack=stack128#35 +# asm 2: movdqa z12_stack=576(%esp) +movdqa %xmm3,576(%esp) + +# qhasm: mt = mp +# asm 1: movdqa mt=int6464#1 +# asm 2: movdqa mt=%xmm0 +movdqa %xmm4,%xmm0 + +# qhasm: uint32323232 mt += mq +# asm 1: paddd mu=int6464#2 +# asm 2: movdqa mu=%xmm1 +movdqa %xmm0,%xmm1 + +# qhasm: uint32323232 mt >>= 23 +# asm 1: psrld $23,z13_stack=stack128#30 +# asm 2: movdqa z13_stack=496(%esp) +movdqa %xmm5,496(%esp) + +# qhasm: uint32323232 mq += mr +# asm 1: paddd mu=int6464#1 +# asm 2: movdqa mu=%xmm0 +movdqa %xmm3,%xmm0 + +# qhasm: uint32323232 mq >>= 19 +# asm 1: psrld $19,t=int6464#3 +# asm 2: movdqa t=%xmm2 +movdqa 576(%esp),%xmm2 + +# qhasm: p = z0_stack +# asm 1: movdqa p=int6464#1 +# asm 2: movdqa p=%xmm0 +movdqa 544(%esp),%xmm0 + +# qhasm: q = z4_stack +# asm 1: movdqa q=int6464#4 +# asm 2: movdqa q=%xmm3 +movdqa 560(%esp),%xmm3 + +# qhasm: r = z8_stack +# asm 1: movdqa r=int6464#2 +# asm 2: movdqa r=%xmm1 +movdqa 608(%esp),%xmm1 + +# qhasm: z14_stack = ms +# asm 1: movdqa z14_stack=stack128#24 +# asm 2: movdqa z14_stack=400(%esp) +movdqa %xmm6,400(%esp) + +# qhasm: uint32323232 mr += ms +# asm 1: paddd mu=int6464#7 +# asm 2: movdqa mu=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 mr >>= 14 +# asm 1: psrld $14,z15_stack=stack128#23 +# asm 2: movdqa z15_stack=384(%esp) +movdqa %xmm4,384(%esp) + +# qhasm: unsigned>? i -= 2 +# asm 1: sub $2, +ja ._mainloop1 + +# qhasm: out = out_stack +# asm 1: movl out=int32#6 +# asm 2: movl out=%edi +movl 20(%esp),%edi + +# qhasm: z0 = z0_stack +# asm 1: movdqa z0=int6464#1 +# asm 2: movdqa z0=%xmm0 +movdqa 544(%esp),%xmm0 + +# qhasm: z1 = z1_stack +# asm 1: movdqa z1=int6464#2 +# asm 2: movdqa z1=%xmm1 +movdqa 464(%esp),%xmm1 + +# qhasm: z2 = z2_stack +# asm 1: movdqa z2=int6464#3 +# asm 2: movdqa z2=%xmm2 +movdqa 512(%esp),%xmm2 + +# qhasm: z3 = z3_stack +# asm 1: movdqa z3=int6464#4 +# asm 2: movdqa z3=%xmm3 +movdqa 416(%esp),%xmm3 + +# qhasm: uint32323232 z0 += orig0 +# asm 1: paddd in0=int32#1 +# asm 2: movd in0=%eax +movd %xmm0,%eax + +# qhasm: in1 = z1 +# asm 1: movd in1=int32#2 +# asm 2: movd in1=%ecx +movd %xmm1,%ecx + +# qhasm: in2 = z2 +# asm 1: movd in2=int32#3 +# asm 2: movd in2=%edx +movd %xmm2,%edx + +# qhasm: in3 = z3 +# asm 1: movd in3=int32#4 +# asm 2: movd in3=%ebx +movd %xmm3,%ebx + +# qhasm: z0 <<<= 96 +# asm 1: pshufd $0x39,in0=int32#1 +# asm 2: movd in0=%eax +movd %xmm0,%eax + +# qhasm: in1 = z1 +# asm 1: movd in1=int32#2 +# asm 2: movd in1=%ecx +movd %xmm1,%ecx + +# qhasm: in2 = z2 +# asm 1: movd in2=int32#3 +# asm 2: movd in2=%edx +movd %xmm2,%edx + +# qhasm: in3 = z3 +# asm 1: movd in3=int32#4 +# asm 2: movd in3=%ebx +movd %xmm3,%ebx + +# qhasm: z0 <<<= 96 +# asm 1: pshufd $0x39,in0=int32#1 +# asm 2: movd in0=%eax +movd %xmm0,%eax + +# qhasm: in1 = z1 +# asm 1: movd in1=int32#2 +# asm 2: movd in1=%ecx +movd %xmm1,%ecx + +# qhasm: in2 = z2 +# asm 1: movd in2=int32#3 +# asm 2: movd in2=%edx +movd %xmm2,%edx + +# qhasm: in3 = z3 +# asm 1: movd in3=int32#4 +# asm 2: movd in3=%ebx +movd %xmm3,%ebx + +# qhasm: z0 <<<= 96 +# asm 1: pshufd $0x39,in0=int32#1 +# asm 2: movd in0=%eax +movd %xmm0,%eax + +# qhasm: in1 = z1 +# asm 1: movd in1=int32#2 +# asm 2: movd in1=%ecx +movd %xmm1,%ecx + +# qhasm: in2 = z2 +# asm 1: movd in2=int32#3 +# asm 2: movd in2=%edx +movd %xmm2,%edx + +# qhasm: in3 = z3 +# asm 1: movd in3=int32#4 +# asm 2: movd in3=%ebx +movd %xmm3,%ebx + +# qhasm: in0 ^= *(uint32 *) (m + 192) +# asm 1: xorl 192(z4=int6464#1 +# asm 2: movdqa z4=%xmm0 +movdqa 560(%esp),%xmm0 + +# qhasm: z5 = z5_stack +# asm 1: movdqa z5=int6464#2 +# asm 2: movdqa z5=%xmm1 +movdqa 352(%esp),%xmm1 + +# qhasm: z6 = z6_stack +# asm 1: movdqa z6=int6464#3 +# asm 2: movdqa z6=%xmm2 +movdqa 432(%esp),%xmm2 + +# qhasm: z7 = z7_stack +# asm 1: movdqa z7=int6464#4 +# asm 2: movdqa z7=%xmm3 +movdqa 480(%esp),%xmm3 + +# qhasm: uint32323232 z4 += orig4 +# asm 1: paddd in4=int32#1 +# asm 2: movd in4=%eax +movd %xmm0,%eax + +# qhasm: in5 = z5 +# asm 1: movd in5=int32#2 +# asm 2: movd in5=%ecx +movd %xmm1,%ecx + +# qhasm: in6 = z6 +# asm 1: movd in6=int32#3 +# asm 2: movd in6=%edx +movd %xmm2,%edx + +# qhasm: in7 = z7 +# asm 1: movd in7=int32#4 +# asm 2: movd in7=%ebx +movd %xmm3,%ebx + +# qhasm: z4 <<<= 96 +# asm 1: pshufd $0x39,in4=int32#1 +# asm 2: movd in4=%eax +movd %xmm0,%eax + +# qhasm: in5 = z5 +# asm 1: movd in5=int32#2 +# asm 2: movd in5=%ecx +movd %xmm1,%ecx + +# qhasm: in6 = z6 +# asm 1: movd in6=int32#3 +# asm 2: movd in6=%edx +movd %xmm2,%edx + +# qhasm: in7 = z7 +# asm 1: movd in7=int32#4 +# asm 2: movd in7=%ebx +movd %xmm3,%ebx + +# qhasm: z4 <<<= 96 +# asm 1: pshufd $0x39,in4=int32#1 +# asm 2: movd in4=%eax +movd %xmm0,%eax + +# qhasm: in5 = z5 +# asm 1: movd in5=int32#2 +# asm 2: movd in5=%ecx +movd %xmm1,%ecx + +# qhasm: in6 = z6 +# asm 1: movd in6=int32#3 +# asm 2: movd in6=%edx +movd %xmm2,%edx + +# qhasm: in7 = z7 +# asm 1: movd in7=int32#4 +# asm 2: movd in7=%ebx +movd %xmm3,%ebx + +# qhasm: z4 <<<= 96 +# asm 1: pshufd $0x39,in4=int32#1 +# asm 2: movd in4=%eax +movd %xmm0,%eax + +# qhasm: in5 = z5 +# asm 1: movd in5=int32#2 +# asm 2: movd in5=%ecx +movd %xmm1,%ecx + +# qhasm: in6 = z6 +# asm 1: movd in6=int32#3 +# asm 2: movd in6=%edx +movd %xmm2,%edx + +# qhasm: in7 = z7 +# asm 1: movd in7=int32#4 +# asm 2: movd in7=%ebx +movd %xmm3,%ebx + +# qhasm: in4 ^= *(uint32 *) (m + 208) +# asm 1: xorl 208(z8=int6464#1 +# asm 2: movdqa z8=%xmm0 +movdqa 608(%esp),%xmm0 + +# qhasm: z9 = z9_stack +# asm 1: movdqa z9=int6464#2 +# asm 2: movdqa z9=%xmm1 +movdqa 528(%esp),%xmm1 + +# qhasm: z10 = z10_stack +# asm 1: movdqa z10=int6464#3 +# asm 2: movdqa z10=%xmm2 +movdqa 368(%esp),%xmm2 + +# qhasm: z11 = z11_stack +# asm 1: movdqa z11=int6464#4 +# asm 2: movdqa z11=%xmm3 +movdqa 448(%esp),%xmm3 + +# qhasm: uint32323232 z8 += orig8 +# asm 1: paddd in8=int32#1 +# asm 2: movd in8=%eax +movd %xmm0,%eax + +# qhasm: in9 = z9 +# asm 1: movd in9=int32#2 +# asm 2: movd in9=%ecx +movd %xmm1,%ecx + +# qhasm: in10 = z10 +# asm 1: movd in10=int32#3 +# asm 2: movd in10=%edx +movd %xmm2,%edx + +# qhasm: in11 = z11 +# asm 1: movd in11=int32#4 +# asm 2: movd in11=%ebx +movd %xmm3,%ebx + +# qhasm: z8 <<<= 96 +# asm 1: pshufd $0x39,in8=int32#1 +# asm 2: movd in8=%eax +movd %xmm0,%eax + +# qhasm: in9 = z9 +# asm 1: movd in9=int32#2 +# asm 2: movd in9=%ecx +movd %xmm1,%ecx + +# qhasm: in10 = z10 +# asm 1: movd in10=int32#3 +# asm 2: movd in10=%edx +movd %xmm2,%edx + +# qhasm: in11 = z11 +# asm 1: movd in11=int32#4 +# asm 2: movd in11=%ebx +movd %xmm3,%ebx + +# qhasm: z8 <<<= 96 +# asm 1: pshufd $0x39,in8=int32#1 +# asm 2: movd in8=%eax +movd %xmm0,%eax + +# qhasm: in9 = z9 +# asm 1: movd in9=int32#2 +# asm 2: movd in9=%ecx +movd %xmm1,%ecx + +# qhasm: in10 = z10 +# asm 1: movd in10=int32#3 +# asm 2: movd in10=%edx +movd %xmm2,%edx + +# qhasm: in11 = z11 +# asm 1: movd in11=int32#4 +# asm 2: movd in11=%ebx +movd %xmm3,%ebx + +# qhasm: z8 <<<= 96 +# asm 1: pshufd $0x39,in8=int32#1 +# asm 2: movd in8=%eax +movd %xmm0,%eax + +# qhasm: in9 = z9 +# asm 1: movd in9=int32#2 +# asm 2: movd in9=%ecx +movd %xmm1,%ecx + +# qhasm: in10 = z10 +# asm 1: movd in10=int32#3 +# asm 2: movd in10=%edx +movd %xmm2,%edx + +# qhasm: in11 = z11 +# asm 1: movd in11=int32#4 +# asm 2: movd in11=%ebx +movd %xmm3,%ebx + +# qhasm: in8 ^= *(uint32 *) (m + 224) +# asm 1: xorl 224(z12=int6464#1 +# asm 2: movdqa z12=%xmm0 +movdqa 576(%esp),%xmm0 + +# qhasm: z13 = z13_stack +# asm 1: movdqa z13=int6464#2 +# asm 2: movdqa z13=%xmm1 +movdqa 496(%esp),%xmm1 + +# qhasm: z14 = z14_stack +# asm 1: movdqa z14=int6464#3 +# asm 2: movdqa z14=%xmm2 +movdqa 400(%esp),%xmm2 + +# qhasm: z15 = z15_stack +# asm 1: movdqa z15=int6464#4 +# asm 2: movdqa z15=%xmm3 +movdqa 384(%esp),%xmm3 + +# qhasm: uint32323232 z12 += orig12 +# asm 1: paddd in12=int32#1 +# asm 2: movd in12=%eax +movd %xmm0,%eax + +# qhasm: in13 = z13 +# asm 1: movd in13=int32#2 +# asm 2: movd in13=%ecx +movd %xmm1,%ecx + +# qhasm: in14 = z14 +# asm 1: movd in14=int32#3 +# asm 2: movd in14=%edx +movd %xmm2,%edx + +# qhasm: in15 = z15 +# asm 1: movd in15=int32#4 +# asm 2: movd in15=%ebx +movd %xmm3,%ebx + +# qhasm: z12 <<<= 96 +# asm 1: pshufd $0x39,in12=int32#1 +# asm 2: movd in12=%eax +movd %xmm0,%eax + +# qhasm: in13 = z13 +# asm 1: movd in13=int32#2 +# asm 2: movd in13=%ecx +movd %xmm1,%ecx + +# qhasm: in14 = z14 +# asm 1: movd in14=int32#3 +# asm 2: movd in14=%edx +movd %xmm2,%edx + +# qhasm: in15 = z15 +# asm 1: movd in15=int32#4 +# asm 2: movd in15=%ebx +movd %xmm3,%ebx + +# qhasm: z12 <<<= 96 +# asm 1: pshufd $0x39,in12=int32#1 +# asm 2: movd in12=%eax +movd %xmm0,%eax + +# qhasm: in13 = z13 +# asm 1: movd in13=int32#2 +# asm 2: movd in13=%ecx +movd %xmm1,%ecx + +# qhasm: in14 = z14 +# asm 1: movd in14=int32#3 +# asm 2: movd in14=%edx +movd %xmm2,%edx + +# qhasm: in15 = z15 +# asm 1: movd in15=int32#4 +# asm 2: movd in15=%ebx +movd %xmm3,%ebx + +# qhasm: z12 <<<= 96 +# asm 1: pshufd $0x39,in12=int32#1 +# asm 2: movd in12=%eax +movd %xmm0,%eax + +# qhasm: in13 = z13 +# asm 1: movd in13=int32#2 +# asm 2: movd in13=%ecx +movd %xmm1,%ecx + +# qhasm: in14 = z14 +# asm 1: movd in14=int32#3 +# asm 2: movd in14=%edx +movd %xmm2,%edx + +# qhasm: in15 = z15 +# asm 1: movd in15=int32#4 +# asm 2: movd in15=%ebx +movd %xmm3,%ebx + +# qhasm: in12 ^= *(uint32 *) (m + 240) +# asm 1: xorl 240(bytes=int32#1 +# asm 2: movl bytes=%eax +movl 24(%esp),%eax + +# qhasm: bytes -= 256 +# asm 1: sub $256,out_stack=stack32#6 +# asm 2: movl out_stack=20(%esp) +movl %edi,20(%esp) + +# qhasm: unsigned? bytes - 0 +# asm 1: cmp $0, +jbe ._done +# comment:fp stack unchanged by fallthrough + +# qhasm: bytesbetween1and255: +._bytesbetween1and255: + +# qhasm: unsignedctarget=stack32#6 +# asm 2: movl ctarget=20(%esp) +movl %edi,20(%esp) + +# qhasm: out = &tmp +# asm 1: leal out=int32#6 +# asm 2: leal out=%edi +leal 640(%esp),%edi + +# qhasm: i = bytes +# asm 1: mov i=int32#2 +# asm 2: mov i=%ecx +mov %eax,%ecx + +# qhasm: while (i) { *out++ = *m++; --i } +rep movsb + +# qhasm: out = &tmp +# asm 1: leal out=int32#6 +# asm 2: leal out=%edi +leal 640(%esp),%edi + +# qhasm: m = &tmp +# asm 1: leal m=int32#5 +# asm 2: leal m=%esi +leal 640(%esp),%esi +# comment:fp stack unchanged by fallthrough + +# qhasm: nocopy: +._nocopy: + +# qhasm: bytes_stack = bytes +# asm 1: movl bytes_stack=stack32#7 +# asm 2: movl bytes_stack=24(%esp) +movl %eax,24(%esp) + +# qhasm: diag0 = x0 +# asm 1: movdqa diag0=int6464#1 +# asm 2: movdqa diag0=%xmm0 +movdqa 64(%esp),%xmm0 + +# qhasm: diag1 = x1 +# asm 1: movdqa diag1=int6464#2 +# asm 2: movdqa diag1=%xmm1 +movdqa 48(%esp),%xmm1 + +# qhasm: diag2 = x2 +# asm 1: movdqa diag2=int6464#3 +# asm 2: movdqa diag2=%xmm2 +movdqa 80(%esp),%xmm2 + +# qhasm: diag3 = x3 +# asm 1: movdqa diag3=int6464#4 +# asm 2: movdqa diag3=%xmm3 +movdqa 32(%esp),%xmm3 + +# qhasm: a0 = diag1 +# asm 1: movdqa a0=int6464#5 +# asm 2: movdqa a0=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: i = 8 +# asm 1: mov $8,>i=int32#1 +# asm 2: mov $8,>i=%eax +mov $8,%eax + +# qhasm: mainloop2: +._mainloop2: + +# qhasm: uint32323232 a0 += diag0 +# asm 1: paddd a1=int6464#6 +# asm 2: movdqa a1=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: b0 = a0 +# asm 1: movdqa b0=int6464#7 +# asm 2: movdqa b0=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a0 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,a2=int6464#5 +# asm 2: movdqa a2=%xmm4 +movdqa %xmm3,%xmm4 + +# qhasm: b1 = a1 +# asm 1: movdqa b1=int6464#7 +# asm 2: movdqa b1=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a1 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,a3=int6464#6 +# asm 2: movdqa a3=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: b2 = a2 +# asm 1: movdqa b2=int6464#7 +# asm 2: movdqa b2=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a2 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,a4=int6464#5 +# asm 2: movdqa a4=%xmm4 +movdqa %xmm3,%xmm4 + +# qhasm: b3 = a3 +# asm 1: movdqa b3=int6464#7 +# asm 2: movdqa b3=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a3 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,a5=int6464#6 +# asm 2: movdqa a5=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: b4 = a4 +# asm 1: movdqa b4=int6464#7 +# asm 2: movdqa b4=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a4 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,a6=int6464#5 +# asm 2: movdqa a6=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: b5 = a5 +# asm 1: movdqa b5=int6464#7 +# asm 2: movdqa b5=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a5 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,a7=int6464#6 +# asm 2: movdqa a7=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: b6 = a6 +# asm 1: movdqa b6=int6464#7 +# asm 2: movdqa b6=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a6 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,a0=int6464#5 +# asm 2: movdqa a0=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: b7 = a7 +# asm 1: movdqa b7=int6464#7 +# asm 2: movdqa b7=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a7 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,a1=int6464#6 +# asm 2: movdqa a1=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: b0 = a0 +# asm 1: movdqa b0=int6464#7 +# asm 2: movdqa b0=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a0 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,a2=int6464#5 +# asm 2: movdqa a2=%xmm4 +movdqa %xmm3,%xmm4 + +# qhasm: b1 = a1 +# asm 1: movdqa b1=int6464#7 +# asm 2: movdqa b1=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a1 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,a3=int6464#6 +# asm 2: movdqa a3=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: b2 = a2 +# asm 1: movdqa b2=int6464#7 +# asm 2: movdqa b2=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a2 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,a4=int6464#5 +# asm 2: movdqa a4=%xmm4 +movdqa %xmm3,%xmm4 + +# qhasm: b3 = a3 +# asm 1: movdqa b3=int6464#7 +# asm 2: movdqa b3=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a3 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,a5=int6464#6 +# asm 2: movdqa a5=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: b4 = a4 +# asm 1: movdqa b4=int6464#7 +# asm 2: movdqa b4=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a4 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,a6=int6464#5 +# asm 2: movdqa a6=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: b5 = a5 +# asm 1: movdqa b5=int6464#7 +# asm 2: movdqa b5=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a5 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,a7=int6464#6 +# asm 2: movdqa a7=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: b6 = a6 +# asm 1: movdqa b6=int6464#7 +# asm 2: movdqa b6=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a6 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,? i -= 4 +# asm 1: sub $4,a0=int6464#5 +# asm 2: movdqa a0=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: b7 = a7 +# asm 1: movdqa b7=int6464#7 +# asm 2: movdqa b7=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a7 <<= 18 +# asm 1: pslld $18,b0=int6464#8,>b0=int6464#8 +# asm 2: pxor >b0=%xmm7,>b0=%xmm7 +pxor %xmm7,%xmm7 + +# qhasm: uint32323232 b7 >>= 14 +# asm 1: psrld $14, +ja ._mainloop2 + +# qhasm: uint32323232 diag0 += x0 +# asm 1: paddd in0=int32#1 +# asm 2: movd in0=%eax +movd %xmm0,%eax + +# qhasm: in12 = diag1 +# asm 1: movd in12=int32#2 +# asm 2: movd in12=%ecx +movd %xmm1,%ecx + +# qhasm: in8 = diag2 +# asm 1: movd in8=int32#3 +# asm 2: movd in8=%edx +movd %xmm2,%edx + +# qhasm: in4 = diag3 +# asm 1: movd in4=int32#4 +# asm 2: movd in4=%ebx +movd %xmm3,%ebx + +# qhasm: diag0 <<<= 96 +# asm 1: pshufd $0x39,in5=int32#1 +# asm 2: movd in5=%eax +movd %xmm0,%eax + +# qhasm: in1 = diag1 +# asm 1: movd in1=int32#2 +# asm 2: movd in1=%ecx +movd %xmm1,%ecx + +# qhasm: in13 = diag2 +# asm 1: movd in13=int32#3 +# asm 2: movd in13=%edx +movd %xmm2,%edx + +# qhasm: in9 = diag3 +# asm 1: movd in9=int32#4 +# asm 2: movd in9=%ebx +movd %xmm3,%ebx + +# qhasm: diag0 <<<= 96 +# asm 1: pshufd $0x39,in10=int32#1 +# asm 2: movd in10=%eax +movd %xmm0,%eax + +# qhasm: in6 = diag1 +# asm 1: movd in6=int32#2 +# asm 2: movd in6=%ecx +movd %xmm1,%ecx + +# qhasm: in2 = diag2 +# asm 1: movd in2=int32#3 +# asm 2: movd in2=%edx +movd %xmm2,%edx + +# qhasm: in14 = diag3 +# asm 1: movd in14=int32#4 +# asm 2: movd in14=%ebx +movd %xmm3,%ebx + +# qhasm: diag0 <<<= 96 +# asm 1: pshufd $0x39,in15=int32#1 +# asm 2: movd in15=%eax +movd %xmm0,%eax + +# qhasm: in11 = diag1 +# asm 1: movd in11=int32#2 +# asm 2: movd in11=%ecx +movd %xmm1,%ecx + +# qhasm: in7 = diag2 +# asm 1: movd in7=int32#3 +# asm 2: movd in7=%edx +movd %xmm2,%edx + +# qhasm: in3 = diag3 +# asm 1: movd in3=int32#4 +# asm 2: movd in3=%ebx +movd %xmm3,%ebx + +# qhasm: in15 ^= *(uint32 *) (m + 60) +# asm 1: xorl 60(bytes=int32#1 +# asm 2: movl bytes=%eax +movl 24(%esp),%eax + +# qhasm: in8 = ((uint32 *)&x2)[0] +# asm 1: movl in8=int32#2 +# asm 2: movl in8=%ecx +movl 80(%esp),%ecx + +# qhasm: in9 = ((uint32 *)&x3)[1] +# asm 1: movl 4+in9=int32#3 +# asm 2: movl 4+in9=%edx +movl 4+32(%esp),%edx + +# qhasm: carry? in8 += 1 +# asm 1: add $1,x2=stack128#4 +# asm 2: movl x2=80(%esp) +movl %ecx,80(%esp) + +# qhasm: ((uint32 *)&x3)[1] = in9 +# asm 1: movl ? unsigned +ja ._bytesatleast65 +# comment:fp stack unchanged by jump + +# qhasm: goto bytesatleast64 if !unsigned< +jae ._bytesatleast64 + +# qhasm: m = out +# asm 1: mov m=int32#5 +# asm 2: mov m=%esi +mov %edi,%esi + +# qhasm: out = ctarget +# asm 1: movl out=int32#6 +# asm 2: movl out=%edi +movl 20(%esp),%edi + +# qhasm: i = bytes +# asm 1: mov i=int32#2 +# asm 2: mov i=%ecx +mov %eax,%ecx + +# qhasm: while (i) { *out++ = *m++; --i } +rep movsb +# comment:fp stack unchanged by fallthrough + +# qhasm: bytesatleast64: +._bytesatleast64: +# comment:fp stack unchanged by fallthrough + +# qhasm: done: +._done: + +# qhasm: eax = eax_stack +# asm 1: movl eax=int32#1 +# asm 2: movl eax=%eax +movl 0(%esp),%eax + +# qhasm: ebx = ebx_stack +# asm 1: movl ebx=int32#4 +# asm 2: movl ebx=%ebx +movl 4(%esp),%ebx + +# qhasm: esi = esi_stack +# asm 1: movl esi=int32#5 +# asm 2: movl esi=%esi +movl 8(%esp),%esi + +# qhasm: edi = edi_stack +# asm 1: movl edi=int32#6 +# asm 2: movl edi=%edi +movl 12(%esp),%edi + +# qhasm: ebp = ebp_stack +# asm 1: movl ebp=int32#7 +# asm 2: movl ebp=%ebp +movl 16(%esp),%ebp + +# qhasm: leave +add %eax,%esp +xor %eax,%eax +ret + +# qhasm: bytesatleast65: +._bytesatleast65: + +# qhasm: bytes -= 64 +# asm 1: sub $64, +#include "crypto_stream.h" + +extern unsigned char *alignedcalloc(unsigned long long); + +const char *primitiveimplementation = crypto_stream_IMPLEMENTATION; + +#define MAXTEST_BYTES 10000 +#define CHECKSUM_BYTES 4096 +#define TUNE_BYTES 1536 + +static unsigned char *k; +static unsigned char *n; +static unsigned char *m; +static unsigned char *c; +static unsigned char *s; +static unsigned char *k2; +static unsigned char *n2; +static unsigned char *m2; +static unsigned char *c2; +static unsigned char *s2; + +void preallocate(void) +{ +} + +void allocate(void) +{ + k = alignedcalloc(crypto_stream_KEYBYTES); + n = alignedcalloc(crypto_stream_NONCEBYTES); + m = alignedcalloc(MAXTEST_BYTES); + c = alignedcalloc(MAXTEST_BYTES); + s = alignedcalloc(MAXTEST_BYTES); + k2 = alignedcalloc(crypto_stream_KEYBYTES); + n2 = alignedcalloc(crypto_stream_NONCEBYTES); + m2 = alignedcalloc(MAXTEST_BYTES); + c2 = alignedcalloc(MAXTEST_BYTES); + s2 = alignedcalloc(MAXTEST_BYTES); +} + +void predoit(void) +{ +} + +void doit(void) +{ + crypto_stream_xor(c,m,TUNE_BYTES,n,k); +} + +char checksum[crypto_stream_KEYBYTES * 2 + 1]; + +const char *checksum_compute(void) +{ + long long i; + long long j; + + for (i = 0;i < CHECKSUM_BYTES;++i) { + long long mlen = i; + long long clen = i; + long long slen = i; + long long klen = crypto_stream_KEYBYTES; + long long nlen = crypto_stream_NONCEBYTES; + for (j = -16;j < 0;++j) m[j] = random(); + for (j = -16;j < 0;++j) c[j] = random(); + for (j = -16;j < 0;++j) s[j] = random(); + for (j = -16;j < 0;++j) n[j] = random(); + for (j = -16;j < 0;++j) k[j] = random(); + for (j = mlen;j < mlen + 16;++j) m[j] = random(); + for (j = clen;j < clen + 16;++j) c[j] = random(); + for (j = slen;j < slen + 16;++j) s[j] = random(); + for (j = nlen;j < nlen + 16;++j) n[j] = random(); + for (j = klen;j < klen + 16;++j) k[j] = random(); + for (j = -16;j < mlen + 16;++j) m2[j] = m[j]; + for (j = -16;j < clen + 16;++j) c2[j] = c[j]; + for (j = -16;j < slen + 16;++j) s2[j] = s[j]; + for (j = -16;j < nlen + 16;++j) n2[j] = n[j]; + for (j = -16;j < klen + 16;++j) k2[j] = k[j]; + + crypto_stream_xor(c,m,mlen,n,k); + + for (j = -16;j < mlen + 16;++j) if (m[j] != m2[j]) return "crypto_stream_xor overwrites m"; + for (j = -16;j < slen + 16;++j) if (s[j] != s2[j]) return "crypto_stream_xor overwrites s"; + for (j = -16;j < nlen + 16;++j) if (n[j] != n2[j]) return "crypto_stream_xor overwrites n"; + for (j = -16;j < klen + 16;++j) if (k[j] != k2[j]) return "crypto_stream_xor overwrites k"; + for (j = -16;j < 0;++j) if (c[j] != c2[j]) return "crypto_stream_xor writes before output"; + for (j = clen;j < clen + 16;++j) if (c[j] != c2[j]) return "crypto_stream_xor writes after output"; + + for (j = -16;j < clen + 16;++j) c2[j] = c[j]; + + crypto_stream(s,slen,n,k); + + for (j = -16;j < mlen + 16;++j) if (m[j] != m2[j]) return "crypto_stream overwrites m"; + for (j = -16;j < clen + 16;++j) if (c[j] != c2[j]) return "crypto_stream overwrites c"; + for (j = -16;j < nlen + 16;++j) if (n[j] != n2[j]) return "crypto_stream overwrites n"; + for (j = -16;j < klen + 16;++j) if (k[j] != k2[j]) return "crypto_stream overwrites k"; + for (j = -16;j < 0;++j) if (s[j] != s2[j]) return "crypto_stream writes before output"; + for (j = slen;j < slen + 16;++j) if (s[j] != s2[j]) return "crypto_stream writes after output"; + + for (j = 0;j < mlen;++j) + if ((s[j] ^ m[j]) != c[j]) return "crypto_stream_xor does not match crypto_stream"; + + for (j = 0;j < clen;++j) k[j % klen] ^= c[j]; + crypto_stream_xor(m,c,clen,n,k); + crypto_stream(s,slen,n,k); + for (j = 0;j < mlen;++j) + if ((s[j] ^ m[j]) != c[j]) return "crypto_stream_xor does not match crypto_stream"; + for (j = 0;j < mlen;++j) n[j % nlen] ^= m[j]; + m[mlen] = 0; + } + + for (i = 0;i < crypto_stream_KEYBYTES;++i) { + checksum[2 * i] = "0123456789abcdef"[15 & (k[i] >> 4)]; + checksum[2 * i + 1] = "0123456789abcdef"[15 & k[i]]; + } + checksum[2 * i] = 0; + + return 0; +} diff --git a/nacl/crypto_stream/wrapper-stream.cpp b/nacl/crypto_stream/wrapper-stream.cpp new file mode 100644 index 00000000..dd10c2f6 --- /dev/null +++ b/nacl/crypto_stream/wrapper-stream.cpp @@ -0,0 +1,12 @@ +#include +using std::string; +#include "crypto_stream.h" + +string crypto_stream(size_t clen,const string &n,const string &k) +{ + if (n.size() != crypto_stream_NONCEBYTES) throw "incorrect nonce length"; + if (k.size() != crypto_stream_KEYBYTES) throw "incorrect key length"; + unsigned char c[clen]; + crypto_stream(c,clen,(const unsigned char *) n.c_str(),(const unsigned char *) k.c_str()); + return string((char *) c,clen); +} diff --git a/nacl/crypto_stream/wrapper-xor.cpp b/nacl/crypto_stream/wrapper-xor.cpp new file mode 100644 index 00000000..8d770d1e --- /dev/null +++ b/nacl/crypto_stream/wrapper-xor.cpp @@ -0,0 +1,17 @@ +#include +using std::string; +#include "crypto_stream.h" + +string crypto_stream_xor(const string &m,const string &n,const string &k) +{ + if (n.size() != crypto_stream_NONCEBYTES) throw "incorrect nonce length"; + if (k.size() != crypto_stream_KEYBYTES) throw "incorrect key length"; + size_t mlen = m.size(); + unsigned char c[mlen]; + crypto_stream_xor(c, + (const unsigned char *) m.c_str(),mlen, + (const unsigned char *) n.c_str(), + (const unsigned char *) k.c_str() + ); + return string((char *) c,mlen); +} diff --git a/nacl/crypto_stream/xsalsa20/checksum b/nacl/crypto_stream/xsalsa20/checksum new file mode 100644 index 00000000..cae64c0d --- /dev/null +++ b/nacl/crypto_stream/xsalsa20/checksum @@ -0,0 +1 @@ +201bc58a96adcb6ed339ca33c188af8ca04a4ce68be1e0953309ee09a0cf8e7a diff --git a/nacl/crypto_stream/xsalsa20/ref/api.h b/nacl/crypto_stream/xsalsa20/ref/api.h new file mode 100644 index 00000000..6910a7dc --- /dev/null +++ b/nacl/crypto_stream/xsalsa20/ref/api.h @@ -0,0 +1,2 @@ +#define CRYPTO_KEYBYTES 32 +#define CRYPTO_NONCEBYTES 24 diff --git a/nacl/crypto_stream/xsalsa20/ref/implementors b/nacl/crypto_stream/xsalsa20/ref/implementors new file mode 100644 index 00000000..f6fb3c73 --- /dev/null +++ b/nacl/crypto_stream/xsalsa20/ref/implementors @@ -0,0 +1 @@ +Daniel J. Bernstein diff --git a/nacl/crypto_stream/xsalsa20/ref/stream.c b/nacl/crypto_stream/xsalsa20/ref/stream.c new file mode 100644 index 00000000..2d710709 --- /dev/null +++ b/nacl/crypto_stream/xsalsa20/ref/stream.c @@ -0,0 +1,22 @@ +/* +version 20080914 +D. J. Bernstein +Public domain. +*/ + +#include "crypto_core_hsalsa20.h" +#include "crypto_stream_salsa20.h" +#include "crypto_stream.h" + +static const unsigned char sigma[16] = "expand 32-byte k"; + +int crypto_stream( + unsigned char *c,unsigned long long clen, + const unsigned char *n, + const unsigned char *k +) +{ + unsigned char subkey[32]; + crypto_core_hsalsa20(subkey,n,k,sigma); + return crypto_stream_salsa20(c,clen,n + 16,subkey); +} diff --git a/nacl/crypto_stream/xsalsa20/ref/xor.c b/nacl/crypto_stream/xsalsa20/ref/xor.c new file mode 100644 index 00000000..13f3134a --- /dev/null +++ b/nacl/crypto_stream/xsalsa20/ref/xor.c @@ -0,0 +1,23 @@ +/* +version 20080913 +D. J. Bernstein +Public domain. +*/ + +#include "crypto_core_hsalsa20.h" +#include "crypto_stream_salsa20.h" +#include "crypto_stream.h" + +static const unsigned char sigma[16] = "expand 32-byte k"; + +int crypto_stream_xor( + unsigned char *c, + const unsigned char *m,unsigned long long mlen, + const unsigned char *n, + const unsigned char *k +) +{ + unsigned char subkey[32]; + crypto_core_hsalsa20(subkey,n,k,sigma); + return crypto_stream_salsa20_xor(c,m,mlen,n + 16,subkey); +} diff --git a/nacl/crypto_stream/xsalsa20/selected b/nacl/crypto_stream/xsalsa20/selected new file mode 100644 index 00000000..e69de29b diff --git a/nacl/crypto_stream/xsalsa20/used b/nacl/crypto_stream/xsalsa20/used new file mode 100644 index 00000000..e69de29b diff --git a/nacl/crypto_verify/16/checksum b/nacl/crypto_verify/16/checksum new file mode 100644 index 00000000..573541ac --- /dev/null +++ b/nacl/crypto_verify/16/checksum @@ -0,0 +1 @@ +0 diff --git a/nacl/crypto_verify/16/ref/api.h b/nacl/crypto_verify/16/ref/api.h new file mode 100644 index 00000000..32be2f97 --- /dev/null +++ b/nacl/crypto_verify/16/ref/api.h @@ -0,0 +1 @@ +#define CRYPTO_BYTES 16 diff --git a/nacl/crypto_verify/16/ref/verify.c b/nacl/crypto_verify/16/ref/verify.c new file mode 100644 index 00000000..d356060c --- /dev/null +++ b/nacl/crypto_verify/16/ref/verify.c @@ -0,0 +1,24 @@ +#include "crypto_verify.h" + +int crypto_verify(const unsigned char *x,const unsigned char *y) +{ + unsigned int differentbits = 0; +#define F(i) differentbits |= x[i] ^ y[i]; + F(0) + F(1) + F(2) + F(3) + F(4) + F(5) + F(6) + F(7) + F(8) + F(9) + F(10) + F(11) + F(12) + F(13) + F(14) + F(15) + return (1 & ((differentbits - 1) >> 8)) - 1; +} diff --git a/nacl/crypto_verify/16/used b/nacl/crypto_verify/16/used new file mode 100644 index 00000000..e69de29b diff --git a/nacl/crypto_verify/32/checksum b/nacl/crypto_verify/32/checksum new file mode 100644 index 00000000..573541ac --- /dev/null +++ b/nacl/crypto_verify/32/checksum @@ -0,0 +1 @@ +0 diff --git a/nacl/crypto_verify/32/ref/api.h b/nacl/crypto_verify/32/ref/api.h new file mode 100644 index 00000000..ae8c7f6a --- /dev/null +++ b/nacl/crypto_verify/32/ref/api.h @@ -0,0 +1 @@ +#define CRYPTO_BYTES 32 diff --git a/nacl/crypto_verify/32/ref/verify.c b/nacl/crypto_verify/32/ref/verify.c new file mode 100644 index 00000000..a0e23afe --- /dev/null +++ b/nacl/crypto_verify/32/ref/verify.c @@ -0,0 +1,40 @@ +#include "crypto_verify.h" + +int crypto_verify(const unsigned char *x,const unsigned char *y) +{ + unsigned int differentbits = 0; +#define F(i) differentbits |= x[i] ^ y[i]; + F(0) + F(1) + F(2) + F(3) + F(4) + F(5) + F(6) + F(7) + F(8) + F(9) + F(10) + F(11) + F(12) + F(13) + F(14) + F(15) + F(16) + F(17) + F(18) + F(19) + F(20) + F(21) + F(22) + F(23) + F(24) + F(25) + F(26) + F(27) + F(28) + F(29) + F(30) + F(31) + return (1 & ((differentbits - 1) >> 8)) - 1; +} diff --git a/nacl/crypto_verify/32/used b/nacl/crypto_verify/32/used new file mode 100644 index 00000000..e69de29b diff --git a/nacl/crypto_verify/measure.c b/nacl/crypto_verify/measure.c new file mode 100644 index 00000000..bbfac4f1 --- /dev/null +++ b/nacl/crypto_verify/measure.c @@ -0,0 +1,18 @@ +#include "crypto_verify.h" + +const char *primitiveimplementation = crypto_verify_IMPLEMENTATION; +const char *implementationversion = crypto_verify_VERSION; +const char *sizenames[] = { "inputbytes", 0 }; +const long long sizes[] = { crypto_verify_BYTES }; + +void preallocate(void) +{ +} + +void allocate(void) +{ +} + +void measure(void) +{ +} diff --git a/nacl/crypto_verify/try.c b/nacl/crypto_verify/try.c new file mode 100644 index 00000000..f555cb4e --- /dev/null +++ b/nacl/crypto_verify/try.c @@ -0,0 +1,75 @@ +/* + * crypto_verify/try.c version 20090118 + * D. J. Bernstein + * Public domain. + */ + +#include +#include "crypto_verify.h" + +extern unsigned char *alignedcalloc(unsigned long long); + +const char *primitiveimplementation = crypto_verify_IMPLEMENTATION; + +static unsigned char *x; +static unsigned char *y; + +void preallocate(void) +{ +} + +void allocate(void) +{ + x = alignedcalloc(crypto_verify_BYTES); + y = alignedcalloc(crypto_verify_BYTES); +} + +void predoit(void) +{ +} + +void doit(void) +{ + crypto_verify(x,y); +} + +static const char *check(void) +{ + int r = crypto_verify(x,y); + if (r == 0) { + if (memcmp(x,y,crypto_verify_BYTES)) return "different strings pass verify"; + } else if (r == -1) { + if (!memcmp(x,y,crypto_verify_BYTES)) return "equal strings fail verify"; + } else { + return "weird return value from verify"; + } + return 0; +} + +char checksum[2]; + +const char *checksum_compute(void) +{ + long long tests; + long long i; + long long j; + const char *c; + + for (tests = 0;tests < 100000;++tests) { + for (i = 0;i < crypto_verify_BYTES;++i) x[i] = random(); + for (i = 0;i < crypto_verify_BYTES;++i) y[i] = random(); + c = check(); if (c) return c; + for (i = 0;i < crypto_verify_BYTES;++i) y[i] = x[i]; + c = check(); if (c) return c; + y[random() % crypto_verify_BYTES] = random(); + c = check(); if (c) return c; + y[random() % crypto_verify_BYTES] = random(); + c = check(); if (c) return c; + y[random() % crypto_verify_BYTES] = random(); + c = check(); if (c) return c; + } + + checksum[0] = '0'; + checksum[1] = 0; + return 0; +} diff --git a/nacl/crypto_verify/wrapper-empty.cpp b/nacl/crypto_verify/wrapper-empty.cpp new file mode 100644 index 00000000..e69de29b diff --git a/nacl/curvecp/LIBS b/nacl/curvecp/LIBS new file mode 100644 index 00000000..2928c658 --- /dev/null +++ b/nacl/curvecp/LIBS @@ -0,0 +1,31 @@ +blocking.o +byte_copy.o +byte_isequal.o +byte_zero.o +crypto_block.o +die.o +e.o +hexparse.o +load.o +nameparse.o +nanoseconds.o +open_cwd.o +open_lock.o +open_pipe.o +open_read.o +open_write.o +portparse.o +randommod.o +safenonce.o +savesync.o +socket_bind.o +socket_recv.o +socket_send.o +socket_udp.o +uint16_pack.o +uint16_unpack.o +uint32_pack.o +uint32_unpack.o +uint64_pack.o +uint64_unpack.o +writeall.o diff --git a/nacl/curvecp/README b/nacl/curvecp/README new file mode 100644 index 00000000..1048c894 --- /dev/null +++ b/nacl/curvecp/README @@ -0,0 +1,10 @@ +Example of use (with nacl-20110221/build/*/bin in $PATH): + curvecpmakekey serverkey + curvecpprintkey serverkey > serverkey.hex + curvecpserver this.machine.name serverkey \ + 127.0.0.1 10000 31415926535897932384626433832795 \ + curvecpmessage cat /usr/share/dict/words & + curvecpclient this.machine.name `cat serverkey.hex` \ + 127.0.0.1 10000 31415926535897932384626433832795 \ + curvecpmessage -c sh -c 'nacl-sha512 <&6' + nacl-sha512 < /usr/share/dict/words diff --git a/nacl/curvecp/SOURCES b/nacl/curvecp/SOURCES new file mode 100644 index 00000000..3fc29751 --- /dev/null +++ b/nacl/curvecp/SOURCES @@ -0,0 +1,36 @@ +blocking +byte_copy +byte_isequal +byte_zero +crypto_block +die +e +hexparse +load +nameparse +nanoseconds +open_cwd +open_lock +open_pipe +open_read +open_write +portparse +randommod +safenonce +savesync +socket_bind +socket_recv +socket_send +socket_udp +uint16_pack +uint16_unpack +uint32_pack +uint32_unpack +uint64_pack +uint64_unpack +writeall +curvecpprintkey +curvecpmakekey +curvecpclient +curvecpserver +curvecpmessage diff --git a/nacl/curvecp/TARGETS b/nacl/curvecp/TARGETS new file mode 100644 index 00000000..ab04272c --- /dev/null +++ b/nacl/curvecp/TARGETS @@ -0,0 +1,5 @@ +curvecpprintkey +curvecpmakekey +curvecpclient +curvecpserver +curvecpmessage diff --git a/nacl/curvecp/blocking.c b/nacl/curvecp/blocking.c new file mode 100644 index 00000000..1594259c --- /dev/null +++ b/nacl/curvecp/blocking.c @@ -0,0 +1,12 @@ +#include +#include "blocking.h" + +void blocking_enable(int fd) +{ + fcntl(fd,F_SETFL,fcntl(fd,F_GETFL,0) & ~O_NONBLOCK); +} + +void blocking_disable(int fd) +{ + fcntl(fd,F_SETFL,fcntl(fd,F_GETFL,0) | O_NONBLOCK); +} diff --git a/nacl/curvecp/blocking.h b/nacl/curvecp/blocking.h new file mode 100644 index 00000000..9ba08a5e --- /dev/null +++ b/nacl/curvecp/blocking.h @@ -0,0 +1,7 @@ +#ifndef BLOCKING_H +#define BLOCKING_H + +extern void blocking_enable(int); +extern void blocking_disable(int); + +#endif diff --git a/nacl/curvecp/byte.h b/nacl/curvecp/byte.h new file mode 100644 index 00000000..5dbfbd96 --- /dev/null +++ b/nacl/curvecp/byte.h @@ -0,0 +1,8 @@ +#ifndef BYTE_H +#define BYTE_H + +extern void byte_zero(void *,long long); +extern void byte_copy(void *,long long,const void *); +extern int byte_isequal(const void *,long long,const void *); + +#endif diff --git a/nacl/curvecp/byte_copy.c b/nacl/curvecp/byte_copy.c new file mode 100644 index 00000000..55f446a4 --- /dev/null +++ b/nacl/curvecp/byte_copy.c @@ -0,0 +1,8 @@ +#include "byte.h" + +void byte_copy(void *yv,long long ylen,const void *xv) +{ + char *y = yv; + const char *x = xv; + while (ylen > 0) { *y++ = *x++; --ylen; } +} diff --git a/nacl/curvecp/byte_isequal.c b/nacl/curvecp/byte_isequal.c new file mode 100644 index 00000000..625d361e --- /dev/null +++ b/nacl/curvecp/byte_isequal.c @@ -0,0 +1,10 @@ +#include "byte.h" + +int byte_isequal(const void *yv,long long ylen,const void *xv) +{ + const unsigned char *y = yv; + const unsigned char *x = xv; + unsigned char diff = 0; + while (ylen > 0) { diff |= (*y++ ^ *x++); --ylen; } + return (256 - (unsigned int) diff) >> 8; +} diff --git a/nacl/curvecp/byte_zero.c b/nacl/curvecp/byte_zero.c new file mode 100644 index 00000000..bdc1f799 --- /dev/null +++ b/nacl/curvecp/byte_zero.c @@ -0,0 +1,7 @@ +#include "byte.h" + +void byte_zero(void *yv,long long ylen) +{ + char *y = yv; + while (ylen > 0) { *y++ = 0; --ylen; } +} diff --git a/nacl/curvecp/crypto_block.c b/nacl/curvecp/crypto_block.c new file mode 100644 index 00000000..5c7cf35e --- /dev/null +++ b/nacl/curvecp/crypto_block.c @@ -0,0 +1,35 @@ +#include "crypto_block.h" +#include "crypto_uint64.h" +#include "uint64_unpack.h" +#include "uint64_pack.h" + +/* +TEA with double-size words. +XXX: Switch to crypto_block_aes256. +XXX: Build crypto_stream_aes256 on top of crypto_block_aes256. +*/ + +int crypto_block( + unsigned char *out, + const unsigned char *in, + const unsigned char *k +) +{ + crypto_uint64 v0 = uint64_unpack(in + 0); + crypto_uint64 v1 = uint64_unpack(in + 8); + crypto_uint64 k0 = uint64_unpack(k + 0); + crypto_uint64 k1 = uint64_unpack(k + 8); + crypto_uint64 k2 = uint64_unpack(k + 16); + crypto_uint64 k3 = uint64_unpack(k + 24); + crypto_uint64 sum = 0; + crypto_uint64 delta = 0x9e3779b97f4a7c15; + int i; + for (i = 0;i < 32;++i) { + sum += delta; + v0 += ((v1<<7) + k0) ^ (v1 + sum) ^ ((v1>>12) + k1); + v1 += ((v0<<16) + k2) ^ (v0 + sum) ^ ((v0>>8) + k3); + } + uint64_pack(out + 0,v0); + uint64_pack(out + 8,v1); + return 0; +} diff --git a/nacl/curvecp/crypto_block.h b/nacl/curvecp/crypto_block.h new file mode 100644 index 00000000..f13620c4 --- /dev/null +++ b/nacl/curvecp/crypto_block.h @@ -0,0 +1,4 @@ +#define crypto_block_BYTES 16 +#define crypto_block_KEYBYTES 32 + +extern int crypto_block(unsigned char *,const unsigned char *,const unsigned char *); diff --git a/nacl/curvecp/curvecpclient.c b/nacl/curvecp/curvecpclient.c new file mode 100644 index 00000000..00793f00 --- /dev/null +++ b/nacl/curvecp/curvecpclient.c @@ -0,0 +1,476 @@ +#include +#include +#include +#include +#include +#include +#include +#include "e.h" +#include "die.h" +#include "load.h" +#include "open.h" +#include "byte.h" +#include "socket.h" +#include "uint64_pack.h" +#include "uint64_unpack.h" +#include "nanoseconds.h" +#include "hexparse.h" +#include "nameparse.h" +#include "portparse.h" +#include "writeall.h" +#include "safenonce.h" +#include "randommod.h" + +long long recent = 0; + +#define NUMIP 8 +long long hellowait[NUMIP] = { + 1000000000 +, 1500000000 +, 2250000000 +, 3375000000 +, 5062500000 +, 7593750000 +, 11390625000 +, 17085937500 +} ; + +#include "crypto_box.h" +#include "randombytes.h" +#if crypto_box_PUBLICKEYBYTES != 32 +error! +#endif +#if crypto_box_NONCEBYTES != 24 +error! +#endif +#if crypto_box_BOXZEROBYTES != 16 +error! +#endif +#if crypto_box_ZEROBYTES != 32 +error! +#endif +#if crypto_box_BEFORENMBYTES != 32 +error! +#endif + +int flagverbose = 1; + +#define USAGE "\ +curvecpclient: how to use:\n\ +curvecpclient: -q (optional): no error messages\n\ +curvecpclient: -Q (optional): print error messages (default)\n\ +curvecpclient: -v (optional): print extra information\n\ +curvecpclient: -c keydir (optional): use this public-key directory\n\ +curvecpclient: sname: server's name\n\ +curvecpclient: pk: server's public key\n\ +curvecpclient: ip: server's IP address\n\ +curvecpclient: port: server's UDP port\n\ +curvecpclient: ext: server's extension\n\ +curvecpclient: prog: run this client\n\ +" + +void die_usage(const char *s) +{ + if (s) die_4(100,USAGE,"curvecpclient: fatal: ",s,"\n"); + die_1(100,USAGE); +} + +void die_fatal(const char *trouble,const char *d,const char *fn) +{ + /* XXX: clean up? OS can do it much more reliably */ + if (!flagverbose) die_0(111); + if (d) { + if (fn) die_9(111,"curvecpclient: fatal: ",trouble," ",d,"/",fn,": ",e_str(errno),"\n"); + die_7(111,"curvecpclient: fatal: ",trouble," ",d,": ",e_str(errno),"\n"); + } + if (errno) die_5(111,"curvecpclient: fatal: ",trouble,": ",e_str(errno),"\n"); + die_3(111,"curvecpclient: fatal: ",trouble,"\n"); +} + +int multiipparse(unsigned char *y,const char *x) +{ + long long pos; + long long pos2; + long long ynum; + long long ypos; + long long j; + long long k; + long long d; + for (j = 0;j < 4 * NUMIP;++j) y[j] = 0; + ynum = 0; + while (ynum < 1000) { + ++ynum; + ypos = randommod(ynum); + for (k = 0;k < 4;++k) { + pos = ypos * 4 + k; + pos2 = (ynum - 1) * 4 + k; + if (pos >= 0 && pos < 4 * NUMIP && pos2 >= 0 && pos2 < 4 * NUMIP) y[pos2] = y[pos]; + d = 0; + for (j = 0;j < 3 && x[j] >= '0' && x[j] <= '9';++j) d = d * 10 + (x[j] - '0'); + if (j == 0) return 0; + x += j; + if (pos >= 0 && pos < 4 * NUMIP) y[pos] = d; + if (k < 3) { + if (*x != '.') return 0; + ++x; + } + } + if (!*x) break; + if (*x != ',') return 0; + ++x; + } + /* if fewer than 8 IP addresses, cycle through them: */ + pos = 0; + pos2 = ynum * 4; + while (pos2 < 4 * NUMIP) { + if (pos >= 0 && pos < 4 * NUMIP && pos2 >= 0 && pos2 < 4 * NUMIP) y[pos2] = y[pos]; + ++pos2; + ++pos; + } + return 1; +} + + +/* routing to the client: */ +unsigned char clientextension[16]; +long long clientextensionloadtime = 0; +int udpfd = -1; + +void clientextension_init(void) +{ + if (recent >= clientextensionloadtime) { + clientextensionloadtime = recent + 30000000000LL; + if (load("/etc/curvecpextension",clientextension,16) == -1) + if (errno == ENOENT || errno == ENAMETOOLONG) + byte_zero(clientextension,16); + } +} + + +/* client security: */ +char *keydir = 0; +unsigned char clientlongtermpk[32]; +unsigned char clientlongtermsk[32]; +unsigned char clientshorttermpk[32]; +unsigned char clientshorttermsk[32]; +crypto_uint64 clientshorttermnonce; +unsigned char vouch[64]; + +void clientshorttermnonce_update(void) +{ + ++clientshorttermnonce; + if (clientshorttermnonce) return; + errno = EPROTO; + die_fatal("nonce space expired",0,0); +} + +/* routing to the server: */ +unsigned char serverip[4 * NUMIP]; +unsigned char serverport[2]; +unsigned char serverextension[16]; + +/* server security: */ +unsigned char servername[256]; +unsigned char serverlongtermpk[32]; +unsigned char servershorttermpk[32]; +unsigned char servercookie[96]; + +/* shared secrets: */ +unsigned char clientshortserverlong[32]; +unsigned char clientshortservershort[32]; +unsigned char clientlongserverlong[32]; + +unsigned char allzero[128] = {0}; + +unsigned char nonce[24]; +unsigned char text[2048]; + +unsigned char packet[4096]; +unsigned char packetip[4]; +unsigned char packetport[2]; +crypto_uint64 packetnonce; +int flagreceivedmessage = 0; +crypto_uint64 receivednonce = 0; + +struct pollfd p[3]; + +int fdwd = -1; + +int tochild[2] = {-1,-1}; +int fromchild[2] = {-1,-1}; +pid_t child = -1; +int childstatus = 0; + +unsigned char childbuf[4096]; +long long childbuflen = 0; +unsigned char childmessage[2048]; +long long childmessagelen = 0; + +int main(int argc,char **argv) +{ + long long hellopackets; + long long r; + long long nextaction; + + signal(SIGPIPE,SIG_IGN); + + if (!argv[0]) die_usage(0); + for (;;) { + char *x; + if (!argv[1]) break; + if (argv[1][0] != '-') break; + x = *++argv; + if (x[0] == '-' && x[1] == 0) break; + if (x[0] == '-' && x[1] == '-' && x[2] == 0) break; + while (*++x) { + if (*x == 'q') { flagverbose = 0; continue; } + if (*x == 'Q') { flagverbose = 1; continue; } + if (*x == 'v') { if (flagverbose == 2) flagverbose = 3; else flagverbose = 2; continue; } + if (*x == 'c') { + if (x[1]) { keydir = x + 1; break; } + if (argv[1]) { keydir = *++argv; break; } + } + die_usage(0); + } + } + if (!nameparse(servername,*++argv)) die_usage("sname must be at most 255 bytes, at most 63 bytes between dots"); + if (!hexparse(serverlongtermpk,32,*++argv)) die_usage("pk must be exactly 64 hex characters"); + if (!multiipparse(serverip,*++argv)) die_usage("ip must be a comma-separated series of IPv4 addresses"); + if (!portparse(serverport,*++argv)) die_usage("port must be an integer between 0 and 65535"); + if (!hexparse(serverextension,16,*++argv)) die_usage("ext must be exactly 32 hex characters"); + if (!*++argv) die_usage("missing prog"); + + for (;;) { + r = open_read("/dev/null"); + if (r == -1) die_fatal("unable to open /dev/null",0,0); + if (r > 9) { close(r); break; } + } + + if (keydir) { + fdwd = open_cwd(); + if (fdwd == -1) die_fatal("unable to open current working directory",0,0); + if (chdir(keydir) == -1) die_fatal("unable to change to directory",keydir,0); + if (load("publickey",clientlongtermpk,sizeof clientlongtermpk) == -1) die_fatal("unable to read public key from",keydir,0); + if (load(".expertsonly/secretkey",clientlongtermsk,sizeof clientlongtermsk) == -1) die_fatal("unable to read secret key from",keydir,0); + } else { + crypto_box_keypair(clientlongtermpk,clientlongtermsk); + } + + crypto_box_keypair(clientshorttermpk,clientshorttermsk); + clientshorttermnonce = randommod(281474976710656LL); + crypto_box_beforenm(clientshortserverlong,serverlongtermpk,clientshorttermsk); + crypto_box_beforenm(clientlongserverlong,serverlongtermpk,clientlongtermsk); + + udpfd = socket_udp(); + if (udpfd == -1) die_fatal("unable to create socket",0,0); + + for (hellopackets = 0;hellopackets < NUMIP;++hellopackets) { + recent = nanoseconds(); + + /* send a Hello packet: */ + + clientextension_init(); + + clientshorttermnonce_update(); + byte_copy(nonce,16,"CurveCP-client-H"); + uint64_pack(nonce + 16,clientshorttermnonce); + + byte_copy(packet,8,"QvnQ5XlH"); + byte_copy(packet + 8,16,serverextension); + byte_copy(packet + 24,16,clientextension); + byte_copy(packet + 40,32,clientshorttermpk); + byte_copy(packet + 72,64,allzero); + byte_copy(packet + 136,8,nonce + 16); + crypto_box_afternm(text,allzero,96,nonce,clientshortserverlong); + byte_copy(packet + 144,80,text + 16); + + socket_send(udpfd,packet,224,serverip + 4 * hellopackets,serverport); + + nextaction = recent + hellowait[hellopackets] + randommod(hellowait[hellopackets]); + + for (;;) { + long long timeout = nextaction - recent; + if (timeout <= 0) break; + p[0].fd = udpfd; + p[0].events = POLLIN; + if (poll(p,1,timeout / 1000000 + 1) < 0) p[0].revents = 0; + + do { /* try receiving a Cookie packet: */ + if (!p[0].revents) break; + r = socket_recv(udpfd,packet,sizeof packet,packetip,packetport); + if (r != 200) break; + if (!(byte_isequal(packetip,4,serverip + 4 * hellopackets) & + byte_isequal(packetport,2,serverport) & + byte_isequal(packet,8,"RL3aNMXK") & + byte_isequal(packet + 8,16,clientextension) & + byte_isequal(packet + 24,16,serverextension) + )) break; + byte_copy(nonce,8,"CurveCPK"); + byte_copy(nonce + 8,16,packet + 40); + byte_zero(text,16); + byte_copy(text + 16,144,packet + 56); + if (crypto_box_open_afternm(text,text,160,nonce,clientshortserverlong)) break; + byte_copy(servershorttermpk,32,text + 32); + byte_copy(servercookie,96,text + 64); + byte_copy(serverip,4,serverip + 4 * hellopackets); + goto receivedcookie; + } while (0); + + recent = nanoseconds(); + } + } + + errno = ETIMEDOUT; die_fatal("no response from server",0,0); + + receivedcookie: + + crypto_box_beforenm(clientshortservershort,servershorttermpk,clientshorttermsk); + + byte_copy(nonce,8,"CurveCPV"); + if (keydir) { + if (safenonce(nonce + 8,0) == -1) die_fatal("nonce-generation disaster",0,0); + } else { + randombytes(nonce + 8,16); + } + + byte_zero(text,32); + byte_copy(text + 32,32,clientshorttermpk); + crypto_box_afternm(text,text,64,nonce,clientlongserverlong); + byte_copy(vouch,16,nonce + 8); + byte_copy(vouch + 16,48,text + 16); + + /* server is responding, so start child: */ + + if (open_pipe(tochild) == -1) die_fatal("unable to create pipe",0,0); + if (open_pipe(fromchild) == -1) die_fatal("unable to create pipe",0,0); + + child = fork(); + if (child == -1) die_fatal("unable to fork",0,0); + if (child == 0) { + if (keydir) if (fchdir(fdwd) == -1) die_fatal("unable to chdir to original directory",0,0); + close(8); + if (dup(tochild[0]) != 8) die_fatal("unable to dup",0,0); + close(9); + if (dup(fromchild[1]) != 9) die_fatal("unable to dup",0,0); + /* XXX: set up environment variables */ + signal(SIGPIPE,SIG_DFL); + execvp(*argv,argv); + die_fatal("unable to run",*argv,0); + } + + close(fromchild[1]); + close(tochild[0]); + + + for (;;) { + p[0].fd = udpfd; + p[0].events = POLLIN; + p[1].fd = fromchild[0]; + p[1].events = POLLIN; + + if (poll(p,2,-1) < 0) { + p[0].revents = 0; + p[1].revents = 0; + } + + do { /* try receiving a Message packet: */ + if (!p[0].revents) break; + r = socket_recv(udpfd,packet,sizeof packet,packetip,packetport); + if (r < 80) break; + if (r > 1152) break; + if (r & 15) break; + packetnonce = uint64_unpack(packet + 40); + if (flagreceivedmessage && packetnonce <= receivednonce) break; + if (!(byte_isequal(packetip,4,serverip + 4 * hellopackets) & + byte_isequal(packetport,2,serverport) & + byte_isequal(packet,8,"RL3aNMXM") & + byte_isequal(packet + 8,16,clientextension) & + byte_isequal(packet + 24,16,serverextension) + )) break; + byte_copy(nonce,16,"CurveCP-server-M"); + byte_copy(nonce + 16,8,packet + 40); + byte_zero(text,16); + byte_copy(text + 16,r - 48,packet + 48); + if (crypto_box_open_afternm(text,text,r - 32,nonce,clientshortservershort)) break; + + if (!flagreceivedmessage) { + flagreceivedmessage = 1; + randombytes(clientlongtermpk,sizeof clientlongtermpk); + randombytes(vouch,sizeof vouch); + randombytes(servername,sizeof servername); + randombytes(servercookie,sizeof servercookie); + } + + receivednonce = packetnonce; + text[31] = (r - 64) >> 4; + /* child is responsible for reading all data immediately, so we won't block: */ + if (writeall(tochild[1],text + 31,r - 63) == -1) goto done; + } while (0); + + do { /* try receiving data from child: */ + long long i; + if (!p[1].revents) break; + r = read(fromchild[0],childbuf,sizeof childbuf); + if (r == -1) if (errno == EINTR || errno == EWOULDBLOCK || errno == EAGAIN) break; + if (r <= 0) goto done; + childbuflen = r; + for (i = 0;i < childbuflen;++i) { + if (childmessagelen < 0) goto done; + if (childmessagelen >= sizeof childmessage) goto done; + childmessage[childmessagelen++] = childbuf[i]; + if (childmessage[0] & 128) goto done; + if (childmessagelen == 1 + 16 * (unsigned long long) childmessage[0]) { + clientextension_init(); + clientshorttermnonce_update(); + uint64_pack(nonce + 16,clientshorttermnonce); + if (flagreceivedmessage) { + r = childmessagelen - 1; + if (r < 16) goto done; + if (r > 1088) goto done; + byte_copy(nonce,16,"CurveCP-client-M"); + byte_zero(text,32); + byte_copy(text + 32,r,childmessage + 1); + crypto_box_afternm(text,text,r + 32,nonce,clientshortservershort); + byte_copy(packet,8,"QvnQ5XlM"); + byte_copy(packet + 8,16,serverextension); + byte_copy(packet + 24,16,clientextension); + byte_copy(packet + 40,32,clientshorttermpk); + byte_copy(packet + 72,8,nonce + 16); + byte_copy(packet + 80,r + 16,text + 16); + socket_send(udpfd,packet,r + 96,serverip,serverport); + } else { + r = childmessagelen - 1; + if (r < 16) goto done; + if (r > 640) goto done; + byte_copy(nonce,16,"CurveCP-client-I"); + byte_zero(text,32); + byte_copy(text + 32,32,clientlongtermpk); + byte_copy(text + 64,64,vouch); + byte_copy(text + 128,256,servername); + byte_copy(text + 384,r,childmessage + 1); + crypto_box_afternm(text,text,r + 384,nonce,clientshortservershort); + byte_copy(packet,8,"QvnQ5XlI"); + byte_copy(packet + 8,16,serverextension); + byte_copy(packet + 24,16,clientextension); + byte_copy(packet + 40,32,clientshorttermpk); + byte_copy(packet + 72,96,servercookie); + byte_copy(packet + 168,8,nonce + 16); + byte_copy(packet + 176,r + 368,text + 16); + socket_send(udpfd,packet,r + 544,serverip,serverport); + } + childmessagelen = 0; + } + } + } while (0); + } + + + done: + + do { + r = waitpid(child,&childstatus,0); + } while (r == -1 && errno == EINTR); + + if (!WIFEXITED(childstatus)) { errno = 0; die_fatal("process killed by signal",0,0); } + return WEXITSTATUS(childstatus); +} diff --git a/nacl/curvecp/curvecpmakekey.c b/nacl/curvecp/curvecpmakekey.c new file mode 100644 index 00000000..dfa181b0 --- /dev/null +++ b/nacl/curvecp/curvecpmakekey.c @@ -0,0 +1,57 @@ +#include +#include +#include +#include "die.h" +#include "e.h" +#include "savesync.h" +#include "randombytes.h" +#include "crypto_box.h" + +void die_usage(void) +{ + die_1(111,"curvecpmakekey: usage: curvecpmakekey keydir\n"); +} + +void die_fatal(const char *trouble,const char *d,const char *fn) +{ + if (fn) die_9(111,"curvecpmakekey: fatal: ",trouble," ",d,"/",fn,": ",e_str(errno),"\n"); + die_7(111,"curvecpmakekey: fatal: ",trouble," ",d,": ",e_str(errno),"\n"); +} + +unsigned char pk[crypto_box_PUBLICKEYBYTES]; +unsigned char sk[crypto_box_SECRETKEYBYTES]; +unsigned char lock[1]; +unsigned char noncekey[32]; +unsigned char noncecounter[8]; + +void create(const char *d,const char *fn,const unsigned char *x,long long xlen) +{ + if (savesync(fn,x,xlen) == -1) die_fatal("unable to create",d,fn); +} + +int main(int argc,char **argv) +{ + char *d; + + if (!argv[0]) die_usage(); + if (!argv[1]) die_usage(); + d = argv[1]; + + umask(022); + if (mkdir(d,0755) == -1) die_fatal("unable to create directory",d,0); + if (chdir(d) == -1) die_fatal("unable to chdir to directory",d,0); + if (mkdir(".expertsonly",0700) == -1) die_fatal("unable to create directory",d,".expertsonly"); + + crypto_box_keypair(pk,sk); + create(d,"publickey",pk,sizeof pk); + + randombytes(noncekey,sizeof noncekey); + + umask(077); + create(d,".expertsonly/secretkey",sk,sizeof sk); + create(d,".expertsonly/lock",lock,sizeof lock); + create(d,".expertsonly/noncekey",noncekey,sizeof noncekey); + create(d,".expertsonly/noncecounter",noncecounter,sizeof noncecounter); + + return 0; +} diff --git a/nacl/curvecp/curvecpmessage.c b/nacl/curvecp/curvecpmessage.c new file mode 100644 index 00000000..df1e1664 --- /dev/null +++ b/nacl/curvecp/curvecpmessage.c @@ -0,0 +1,654 @@ +#include +#include +#include +#include +#include +#include "open.h" +#include "blocking.h" +#include "e.h" +#include "die.h" +#include "randommod.h" +#include "byte.h" +#include "crypto_uint32.h" +#include "uint16_pack.h" +#include "uint32_pack.h" +#include "uint64_pack.h" +#include "uint16_unpack.h" +#include "uint32_unpack.h" +#include "uint64_unpack.h" +#include "nanoseconds.h" +#include "writeall.h" + +int flagverbose = 1; +int flagserver = 1; +int wantping = 0; /* 1: ping after a second; 2: ping immediately */ + +#define USAGE "\ +curvecpmessage: how to use:\n\ +curvecpmessage: -q (optional): no error messages\n\ +curvecpmessage: -Q (optional): print error messages (default)\n\ +curvecpmessage: -v (optional): print extra information\n\ +curvecpmessage: -c (optional): program is a client; server starts first\n\ +curvecpmessage: -C (optional): program is a client that starts first\n\ +curvecpmessage: -s (optional): program is a server (default)\n\ +curvecpmessage: prog: run this program\n\ +" + +void die_usage(const char *s) +{ + if (s) die_4(100,USAGE,"curvecpmessage: fatal: ",s,"\n"); + die_1(100,USAGE); +} + +void die_fatal(const char *trouble,const char *d,const char *fn) +{ + if (!flagverbose) die_0(111); + if (d) { + if (fn) die_9(111,"curvecpmessage: fatal: ",trouble," ",d,"/",fn,": ",e_str(errno),"\n"); + die_7(111,"curvecpmessage: fatal: ",trouble," ",d,": ",e_str(errno),"\n"); + } + if (errno) die_5(111,"curvecpmessage: fatal: ",trouble,": ",e_str(errno),"\n"); + die_3(111,"curvecpmessage: fatal: ",trouble,"\n"); +} + +void die_badmessage(void) +{ + errno = EPROTO; + die_fatal("unable to read from file descriptor 8",0,0); +} + +void die_internalerror(void) +{ + errno = EPROTO; + die_fatal("internal error",0,0); +} + + +int tochild[2] = {-1,-1}; +int fromchild[2] = {-1,-1}; +pid_t child = -1; +int childstatus; + +struct pollfd p[3]; + +long long sendacked = 0; /* number of initial bytes sent and fully acknowledged */ +long long sendbytes = 0; /* number of additional bytes to send */ +unsigned char sendbuf[131072]; /* circular queue with the additional bytes; size must be power of 2 */ +long long sendprocessed = 0; /* within sendbytes, number of bytes absorbed into blocks */ + +crypto_uint16 sendeof = 0; /* 2048 for normal eof after sendbytes, 4096 for error after sendbytes */ +int sendeofprocessed = 0; +int sendeofacked = 0; + +long long totalblocktransmissions = 0; +long long totalblocks = 0; + +#define OUTGOING 128 /* must be power of 2 */ +long long blocknum = 0; /* number of outgoing blocks being tracked */ +long long blockfirst = 0; /* circular queue */ +long long blockpos[OUTGOING]; /* position of block's first byte within stream */ +long long blocklen[OUTGOING]; /* number of bytes in this block */ +crypto_uint16 blockeof[OUTGOING]; /* 0, 2048, 4096 */ +long long blocktransmissions[OUTGOING]; +long long blocktime[OUTGOING]; /* time of last message sending this block; 0 means acked */ +long long earliestblocktime = 0; /* if nonzero, minimum of active blocktime values */ +crypto_uint32 blockid[OUTGOING]; /* ID of last message sending this block */ + +#define INCOMING 64 /* must be power of 2 */ +long long messagenum = 0; /* number of messages in incoming queue */ +long long messagefirst = 0; /* position of first message; circular queue */ +unsigned char messagelen[INCOMING]; /* times 16 */ +unsigned char message[INCOMING][1088]; +unsigned char messagetodo[2048]; +long long messagetodolen = 0; + +long long receivebytes = 0; /* number of initial bytes fully received */ +long long receivewritten = 0; /* within receivebytes, number of bytes given to child */ +crypto_uint16 receiveeof = 0; /* 0, 2048, 4096 */ +long long receivetotalbytes = 0; /* total number of bytes in stream, if receiveeof */ +unsigned char receivebuf[131072]; /* circular queue beyond receivewritten; size must be power of 2 */ +unsigned char receivevalid[131072]; /* 1 for byte successfully received; XXX: use buddy structure to speed this up */ + +long long maxblocklen = 512; +crypto_uint32 nextmessageid = 1; + +unsigned char buf[4096]; + +long long lastblocktime = 0; +long long nsecperblock = 1000000000; +long long lastspeedadjustment = 0; +long long lastedge = 0; +long long lastdoubling = 0; + +long long rtt; +long long rtt_delta; +long long rtt_average = 0; +long long rtt_deviation = 0; +long long rtt_lowwater = 0; +long long rtt_highwater = 0; +long long rtt_timeout = 1000000000; +long long rtt_seenrecenthigh = 0; +long long rtt_seenrecentlow = 0; +long long rtt_seenolderhigh = 0; +long long rtt_seenolderlow = 0; +long long rtt_phase = 0; + +long long lastpanic = 0; + +void earliestblocktime_compute(void) /* XXX: use priority queue */ +{ + long long i; + long long pos; + earliestblocktime = 0; + for (i = 0;i < blocknum;++i) { + pos = (blockfirst + i) & (OUTGOING - 1); + if (blocktime[pos]) { + if (!earliestblocktime) + earliestblocktime = blocktime[pos]; + else + if (blocktime[pos] < earliestblocktime) + earliestblocktime = blocktime[pos]; + } + } +} + +void acknowledged(unsigned long long start,unsigned long long stop) +{ + long long i; + long long pos; + if (stop == start) return; + for (i = 0;i < blocknum;++i) { + pos = (blockfirst + i) & (OUTGOING - 1); + if (blockpos[pos] >= start && blockpos[pos] + blocklen[pos] <= stop) { + blocktime[pos] = 0; + totalblocktransmissions += blocktransmissions[pos]; + totalblocks += 1; + } + } + while (blocknum) { + pos = blockfirst & (OUTGOING - 1); + if (blocktime[pos]) break; + sendacked += blocklen[pos]; + sendbytes -= blocklen[pos]; + sendprocessed -= blocklen[pos]; + ++blockfirst; + --blocknum; + } + if (sendeof) + if (start == 0) + if (stop > sendacked + sendbytes) + if (!sendeofacked) { + sendeofacked = 1; + } + earliestblocktime_compute(); +} + +int main(int argc,char **argv) +{ + long long pos; + long long len; + long long u; + long long r; + long long i; + long long k; + long long recent; + long long nextaction; + long long timeout; + struct pollfd *q; + struct pollfd *watch8; + struct pollfd *watchtochild; + struct pollfd *watchfromchild; + + signal(SIGPIPE,SIG_IGN); + + if (!argv[0]) die_usage(0); + for (;;) { + char *x; + if (!argv[1]) break; + if (argv[1][0] != '-') break; + x = *++argv; + if (x[0] == '-' && x[1] == 0) break; + if (x[0] == '-' && x[1] == '-' && x[2] == 0) break; + while (*++x) { + if (*x == 'q') { flagverbose = 0; continue; } + if (*x == 'Q') { flagverbose = 1; continue; } + if (*x == 'v') { if (flagverbose == 2) flagverbose = 3; else flagverbose = 2; continue; } + if (*x == 'c') { flagserver = 0; wantping = 2; continue; } + if (*x == 'C') { flagserver = 0; wantping = 1; continue; } + if (*x == 's') { flagserver = 1; wantping = 0; continue; } + die_usage(0); + } + } + if (!*++argv) die_usage("missing prog"); + + for (;;) { + r = open_read("/dev/null"); + if (r == -1) die_fatal("unable to open /dev/null",0,0); + if (r > 9) { close(r); break; } + } + + if (open_pipe(tochild) == -1) die_fatal("unable to create pipe",0,0); + if (open_pipe(fromchild) == -1) die_fatal("unable to create pipe",0,0); + + blocking_enable(tochild[0]); + blocking_enable(fromchild[1]); + + child = fork(); + if (child == -1) die_fatal("unable to fork",0,0); + if (child == 0) { + close(8); + close(9); + if (flagserver) { + close(0); + if (dup(tochild[0]) != 0) die_fatal("unable to dup",0,0); + close(1); + if (dup(fromchild[1]) != 1) die_fatal("unable to dup",0,0); + } else { + close(6); + if (dup(tochild[0]) != 6) die_fatal("unable to dup",0,0); + close(7); + if (dup(fromchild[1]) != 7) die_fatal("unable to dup",0,0); + } + signal(SIGPIPE,SIG_DFL); + execvp(*argv,argv); + die_fatal("unable to run",*argv,0); + } + + close(tochild[0]); + close(fromchild[1]); + + recent = nanoseconds(); + lastspeedadjustment = recent; + if (flagserver) maxblocklen = 1024; + + for (;;) { + if (sendeofacked) + if (receivewritten == receivetotalbytes) + if (receiveeof) + if (tochild[1] < 0) + break; /* XXX: to re-ack should enter a TIME-WAIT state here */ + + q = p; + + watch8 = q; + if (watch8) { q->fd = 8; q->events = POLLIN; ++q; } + + watchtochild = q; + if (tochild[1] < 0) watchtochild = 0; + if (receivewritten >= receivebytes) watchtochild = 0; + if (watchtochild) { q->fd = tochild[1]; q->events = POLLOUT; ++q; } + + watchfromchild = q; + if (sendeof) watchfromchild = 0; + if (sendbytes + 4096 > sizeof sendbuf) watchfromchild = 0; + if (watchfromchild) { q->fd = fromchild[0]; q->events = POLLIN; ++q; } + + nextaction = recent + 60000000000LL; + if (wantping == 1) nextaction = recent + 1000000000; + if (wantping == 2) + if (nextaction > lastblocktime + nsecperblock) nextaction = lastblocktime + nsecperblock; + if (blocknum < OUTGOING) + if (!(sendeof ? sendeofprocessed : sendprocessed >= sendbytes)) + if (nextaction > lastblocktime + nsecperblock) nextaction = lastblocktime + nsecperblock; + if (earliestblocktime) + if (earliestblocktime + rtt_timeout > lastblocktime + nsecperblock) + if (earliestblocktime + rtt_timeout < nextaction) + nextaction = earliestblocktime + rtt_timeout; + + if (messagenum) + if (!watchtochild) + nextaction = 0; + + if (nextaction <= recent) + timeout = 0; + else + timeout = (nextaction - recent) / 1000000 + 1; + + if (poll(p,q - p,timeout) < 0) { + watch8 = 0; + watchtochild = 0; + watchfromchild = 0; + } else { + if (watch8) if (!watch8->revents) watch8 = 0; + if (watchtochild) if (!watchtochild->revents) watchtochild = 0; + if (watchfromchild) if (!watchfromchild->revents) watchfromchild = 0; + } + + /* XXX: keepalives */ + + do { /* try receiving data from child: */ + if (!watchfromchild) break; + if (sendeof) break; + if (sendbytes + 4096 > sizeof sendbuf) break; + + pos = (sendacked & (sizeof sendbuf - 1)) + sendbytes; + if (pos < sizeof sendbuf) { + r = read(fromchild[0],sendbuf + pos,sizeof sendbuf - pos); + } else { + r = read(fromchild[0],sendbuf + pos - sizeof sendbuf,sizeof sendbuf - sendbytes); + } + if (r == -1) if (errno == EINTR || errno == EWOULDBLOCK || errno == EAGAIN) break; + if (r < 0) { sendeof = 4096; break; } + if (r == 0) { sendeof = 2048; break; } + sendbytes += r; + if (sendbytes >= 1152921504606846976LL) die_internalerror(); + } while(0); + + recent = nanoseconds(); + + do { /* try re-sending an old block: */ + if (recent < lastblocktime + nsecperblock) break; + if (earliestblocktime == 0) break; + if (recent < earliestblocktime + rtt_timeout) break; + + for (i = 0;i < blocknum;++i) { + pos = (blockfirst + i) & (OUTGOING - 1); + if (blocktime[pos] == earliestblocktime) { + if (recent > lastpanic + 4 * rtt_timeout) { + nsecperblock *= 2; + lastpanic = recent; + lastedge = recent; + } + goto sendblock; + } + } + } while(0); + + do { /* try sending a new block: */ + if (recent < lastblocktime + nsecperblock) break; + if (blocknum >= OUTGOING) break; + if (!wantping) + if (sendeof ? sendeofprocessed : sendprocessed >= sendbytes) break; + /* XXX: if any Nagle-type processing is desired, do it here */ + + pos = (blockfirst + blocknum) & (OUTGOING - 1); + ++blocknum; + blockpos[pos] = sendacked + sendprocessed; + blocklen[pos] = sendbytes - sendprocessed; + if (blocklen[pos] > maxblocklen) blocklen[pos] = maxblocklen; + if ((blockpos[pos] & (sizeof sendbuf - 1)) + blocklen[pos] > sizeof sendbuf) + blocklen[pos] = sizeof sendbuf - (blockpos[pos] & (sizeof sendbuf - 1)); + /* XXX: or could have the full block in post-buffer space */ + sendprocessed += blocklen[pos]; + blockeof[pos] = 0; + if (sendprocessed == sendbytes) { + blockeof[pos] = sendeof; + if (sendeof) sendeofprocessed = 1; + } + blocktransmissions[pos] = 0; + + sendblock: + + blocktransmissions[pos] += 1; + blocktime[pos] = recent; + blockid[pos] = nextmessageid; + if (!++nextmessageid) ++nextmessageid; + + /* constraints: u multiple of 16; u >= 16; u <= 1088; u >= 48 + blocklen[pos] */ + u = 64 + blocklen[pos]; + if (u <= 192) u = 192; + else if (u <= 320) u = 320; + else if (u <= 576) u = 576; + else if (u <= 1088) u = 1088; + else die_internalerror(); + if (blocklen[pos] < 0 || blocklen[pos] > 1024) die_internalerror(); + + byte_zero(buf + 8,u); + buf[7] = u / 16; + uint32_pack(buf + 8,blockid[pos]); + /* XXX: include any acknowledgments that have piled up */ + uint16_pack(buf + 46,blockeof[pos] | (crypto_uint16) blocklen[pos]); + uint64_pack(buf + 48,blockpos[pos]); + byte_copy(buf + 8 + u - blocklen[pos],blocklen[pos],sendbuf + (blockpos[pos] & (sizeof sendbuf - 1))); + + if (writeall(9,buf + 7,u + 1) == -1) die_fatal("unable to write descriptor 9",0,0); + lastblocktime = recent; + wantping = 0; + + earliestblocktime_compute(); + } while(0); + + do { /* try receiving messages: */ + if (!watch8) break; + r = read(8,buf,sizeof buf); + if (r == -1) if (errno == EINTR || errno == EWOULDBLOCK || errno == EAGAIN) break; + if (r == 0) die_badmessage(); + if (r < 0) die_fatal("unable to read from file descriptor 8",0,0); + for (k = 0;k < r;++k) { + messagetodo[messagetodolen++] = buf[k]; + u = 16 * (unsigned long long) messagetodo[0]; + if (u < 16) die_badmessage(); + if (u > 1088) die_badmessage(); + if (messagetodolen == 1 + u) { + if (messagenum < INCOMING) { + pos = (messagefirst + messagenum) & (INCOMING - 1); + messagelen[pos] = messagetodo[0]; + byte_copy(message[pos],u,messagetodo + 1); + ++messagenum; + } else { + ; /* drop tail */ + } + messagetodolen = 0; + } + } + } while(0); + + do { /* try processing a message: */ + if (!messagenum) break; + if (tochild[1] >= 0 && receivewritten < receivebytes) break; + + maxblocklen = 1024; + + pos = messagefirst & (INCOMING - 1); + len = 16 * (unsigned long long) messagelen[pos]; + do { /* handle this message if it's comprehensible: */ + unsigned long long D; + unsigned long long SF; + unsigned long long startbyte; + unsigned long long stopbyte; + crypto_uint32 id; + long long i; + + if (len < 48) break; + if (len > 1088) break; + + id = uint32_unpack(message[pos] + 4); + for (i = 0;i < blocknum;++i) { + k = (blockfirst + i) & (OUTGOING - 1); + if (blockid[k] == id) { + rtt = recent - blocktime[k]; + if (!rtt_average) { + nsecperblock = rtt; + rtt_average = rtt; + rtt_deviation = rtt / 2; + rtt_highwater = rtt; + rtt_lowwater = rtt; + } + + /* Jacobson's retransmission timeout calculation: */ + rtt_delta = rtt - rtt_average; + rtt_average += rtt_delta / 8; + if (rtt_delta < 0) rtt_delta = -rtt_delta; + rtt_delta -= rtt_deviation; + rtt_deviation += rtt_delta / 4; + rtt_timeout = rtt_average + 4 * rtt_deviation; + /* adjust for delayed acks with anti-spiking: */ + rtt_timeout += 8 * nsecperblock; + + /* recognizing top and bottom of congestion cycle: */ + rtt_delta = rtt - rtt_highwater; + rtt_highwater += rtt_delta / 1024; + rtt_delta = rtt - rtt_lowwater; + if (rtt_delta > 0) rtt_lowwater += rtt_delta / 8192; + else rtt_lowwater += rtt_delta / 256; + + if (rtt_average > rtt_highwater + 5000000) rtt_seenrecenthigh = 1; + else if (rtt_average < rtt_lowwater) rtt_seenrecentlow = 1; + + if (recent >= lastspeedadjustment + 16 * nsecperblock) { + if (recent - lastspeedadjustment > 10000000000LL) { + nsecperblock = 1000000000; /* slow restart */ + nsecperblock += randommod(nsecperblock / 8); + } + + lastspeedadjustment = recent; + + if (nsecperblock >= 131072) { + /* additive increase: adjust 1/N by a constant c */ + /* rtt-fair additive increase: adjust 1/N by a constant c every nanosecond */ + /* approximation: adjust 1/N by cN every N nanoseconds */ + /* i.e., N <- 1/(1/N + cN) = N/(1 + cN^2) every N nanoseconds */ + if (nsecperblock < 16777216) { + /* N/(1+cN^2) approx N - cN^3 */ + u = nsecperblock / 131072; + nsecperblock -= u * u * u; + } else { + double d = nsecperblock; + nsecperblock = d/(1 + d*d / 2251799813685248.0); + } + } + + if (rtt_phase == 0) { + if (rtt_seenolderhigh) { + rtt_phase = 1; + lastedge = recent; + nsecperblock += randommod(nsecperblock / 4); + } + } else { + if (rtt_seenolderlow) { + rtt_phase = 0; + } + } + + rtt_seenolderhigh = rtt_seenrecenthigh; + rtt_seenolderlow = rtt_seenrecentlow; + rtt_seenrecenthigh = 0; + rtt_seenrecentlow = 0; + } + + do { + if (recent - lastedge < 60000000000LL) { + if (recent < lastdoubling + 4 * nsecperblock + 64 * rtt_timeout + 5000000000LL) break; + } else { + if (recent < lastdoubling + 4 * nsecperblock + 2 * rtt_timeout) break; + } + if (nsecperblock <= 65535) break; + + nsecperblock /= 2; + lastdoubling = recent; + if (lastedge) lastedge = recent; + } while(0); + } + } + + stopbyte = uint64_unpack(message[pos] + 8); + acknowledged(0,stopbyte); + startbyte = stopbyte + (unsigned long long) uint32_unpack(message[pos] + 16); + stopbyte = startbyte + (unsigned long long) uint16_unpack(message[pos] + 20); + acknowledged(startbyte,stopbyte); + startbyte = stopbyte + (unsigned long long) uint16_unpack(message[pos] + 22); + stopbyte = startbyte + (unsigned long long) uint16_unpack(message[pos] + 24); + acknowledged(startbyte,stopbyte); + startbyte = stopbyte + (unsigned long long) uint16_unpack(message[pos] + 26); + stopbyte = startbyte + (unsigned long long) uint16_unpack(message[pos] + 28); + acknowledged(startbyte,stopbyte); + startbyte = stopbyte + (unsigned long long) uint16_unpack(message[pos] + 30); + stopbyte = startbyte + (unsigned long long) uint16_unpack(message[pos] + 32); + acknowledged(startbyte,stopbyte); + startbyte = stopbyte + (unsigned long long) uint16_unpack(message[pos] + 34); + stopbyte = startbyte + (unsigned long long) uint16_unpack(message[pos] + 36); + acknowledged(startbyte,stopbyte); + + D = uint16_unpack(message[pos] + 38); + SF = D & (2048 + 4096); + D -= SF; + if (D > 1024) break; + if (48 + D > len) break; + + startbyte = uint64_unpack(message[pos] + 40); + stopbyte = startbyte + D; + + if (stopbyte > receivewritten + sizeof receivebuf) { + break; + /* of course, flow control would avoid this case */ + } + + if (SF) { + receiveeof = SF; + receivetotalbytes = stopbyte; + } + + for (k = 0;k < D;++k) { + unsigned char ch = message[pos][len - D + k]; + unsigned long long where = startbyte + k; + if (where >= receivewritten && where < receivewritten + sizeof receivebuf) { + receivevalid[where & (sizeof receivebuf - 1)] = 1; + receivebuf[where & (sizeof receivebuf - 1)] = ch; + } + } + for (;;) { + if (receivebytes >= receivewritten + sizeof receivebuf) break; + if (!receivevalid[receivebytes & (sizeof receivebuf - 1)]) break; + ++receivebytes; + } + + if (!uint32_unpack(message[pos])) break; /* never acknowledge a pure acknowledgment */ + + /* XXX: delay acknowledgments */ + u = 192; + byte_zero(buf + 8,u); + buf[7] = u / 16; + byte_copy(buf + 12,4,message[pos]); + if (receiveeof && receivebytes == receivetotalbytes) { + uint64_pack(buf + 16,receivebytes + 1); + } else + uint64_pack(buf + 16,receivebytes); + /* XXX: incorporate selective acknowledgments */ + + if (writeall(9,buf + 7,u + 1) == -1) die_fatal("unable to write descriptor 9",0,0); + } while(0); + + ++messagefirst; + --messagenum; + } while(0); + + do { /* try sending data to child: */ + if (!watchtochild) break; + if (tochild[1] < 0) { receivewritten = receivebytes; break; } + if (receivewritten >= receivebytes) break; + + pos = receivewritten & (sizeof receivebuf - 1); + len = receivebytes - receivewritten; + if (pos + len > sizeof receivebuf) len = sizeof receivebuf - pos; + r = write(tochild[1],receivebuf + pos,len); + if (r == -1) if (errno == EINTR || errno == EWOULDBLOCK || errno == EAGAIN) break; + if (r <= 0) { + close(tochild[1]); + tochild[1] = -1; + break; + } + byte_zero(receivevalid + pos,r); + receivewritten += r; + } while(0); + + do { /* try closing pipe to child: */ + if (!receiveeof) break; + if (receivewritten < receivetotalbytes) break; + if (tochild[1] < 0) break; + + if (receiveeof == 4096) + ; /* XXX: UNIX doesn't provide a way to signal an error through a pipe */ + close(tochild[1]); + tochild[1] = -1; + } while(0); + + } + + + do { + r = waitpid(child,&childstatus,0); + } while (r == -1 && errno == EINTR); + + if (!WIFEXITED(childstatus)) { errno = 0; die_fatal("process killed by signal",0,0); } + return WEXITSTATUS(childstatus); +} diff --git a/nacl/curvecp/curvecpprintkey.c b/nacl/curvecp/curvecpprintkey.c new file mode 100644 index 00000000..8fd26bcf --- /dev/null +++ b/nacl/curvecp/curvecpprintkey.c @@ -0,0 +1,46 @@ +#include +#include "die.h" +#include "e.h" +#include "load.h" +#include "writeall.h" +#include "crypto_box.h" + +unsigned char pk[crypto_box_PUBLICKEYBYTES]; +unsigned char out[crypto_box_PUBLICKEYBYTES * 2 + 1]; + +void die_usage(void) +{ + die_1(111,"curvecpprintkey: usage: curvecpprintkey keydir\n"); +} + +void die_fatal(const char *trouble,const char *d,const char *fn) +{ + if (d) { + if (fn) die_9(111,"curvecpmakekey: fatal: ",trouble," ",d,"/",fn,": ",e_str(errno),"\n"); + die_7(111,"curvecpmakekey: fatal: ",trouble," ",d,": ",e_str(errno),"\n"); + } + die_5(111,"curvecpmakekey: fatal: ",trouble,": ",e_str(errno),"\n"); +} + +int main(int argc,char **argv) +{ + char *d; + long long j; + + if (!argv[0]) die_usage(); + if (!argv[1]) die_usage(); + d = argv[1]; + + if (chdir(d) == -1) die_fatal("unable to chdir to directory",d,0); + if (load("publickey",pk,sizeof pk) == -1) die_fatal("unable to read",d,"publickey"); + + for (j = 0;j < crypto_box_PUBLICKEYBYTES;++j) { + out[2 * j + 0] = "0123456789abcdef"[15 & (int) (pk[j] >> 4)]; + out[2 * j + 1] = "0123456789abcdef"[15 & (int) (pk[j] >> 0)]; + } + out[2 * j] = '\n'; + + if (writeall(1,out,sizeof out) == -1) die_fatal("unable to write output",0,0); + + return 0; +} diff --git a/nacl/curvecp/curvecpserver.c b/nacl/curvecp/curvecpserver.c new file mode 100644 index 00000000..82cc6670 --- /dev/null +++ b/nacl/curvecp/curvecpserver.c @@ -0,0 +1,497 @@ +#include +#include +#include +#include +#include +#include +#include +#include "e.h" +#include "die.h" +#include "byte.h" +#include "open.h" +#include "load.h" +#include "socket.h" +#include "uint64_pack.h" +#include "uint64_unpack.h" +#include "writeall.h" +#include "nanoseconds.h" +#include "safenonce.h" +#include "nameparse.h" +#include "hexparse.h" +#include "portparse.h" +#include "randommod.h" + +#include "randombytes.h" +#include "crypto_box.h" +#include "crypto_secretbox.h" +#if crypto_box_PUBLICKEYBYTES != 32 +error! +#endif +#if crypto_box_NONCEBYTES != 24 +error! +#endif +#if crypto_box_BOXZEROBYTES != 16 +error! +#endif +#if crypto_box_ZEROBYTES != 32 +error! +#endif +#if crypto_box_BEFORENMBYTES != 32 +error! +#endif +#if crypto_secretbox_KEYBYTES != 32 +error! +#endif +#if crypto_secretbox_NONCEBYTES != 24 +error! +#endif +#if crypto_secretbox_BOXZEROBYTES != 16 +error! +#endif +#if crypto_secretbox_ZEROBYTES != 32 +error! +#endif + +int flagverbose; + +#define USAGE "\ +curvecpserver: how to use:\n\ +curvecpserver: -q (optional): no error messages\n\ +curvecpserver: -Q (optional): print error messages (default)\n\ +curvecpserver: -v (optional): print extra information\n\ +curvecpserver: -c n (optional): allow at most n clients at once (default 100)\n\ +curvecpserver: sname: server's name\n\ +curvecpserver: keydir: use this public-key directory\n\ +curvecpserver: ip: server's IP address\n\ +curvecpserver: port: server's UDP port\n\ +curvecpserver: ext: server's extension\n\ +curvecpserver: prog: run this server\n\ +" + +void die_usage(const char *s) +{ + if (s) die_4(100,USAGE,"curvecpserver: fatal: ",s,"\n"); + die_1(100,USAGE); +} + +void die_fatal(const char *trouble,const char *d,const char *fn) +{ + if (!flagverbose) die_0(111); + if (d) { + if (fn) die_9(111,"curvecpserver: fatal: ",trouble," ",d,"/",fn,": ",e_str(errno),"\n"); + die_7(111,"curvecpserver: fatal: ",trouble," ",d,": ",e_str(errno),"\n"); + } + die_5(111,"curvecpserver: fatal: ",trouble,": ",e_str(errno),"\n"); +} + +int ipparse(unsigned char *y,const char *x) +{ + long long j; + long long k; + long long d; + + for (k = 0;k < 4;++k) y[k] = 0; + for (k = 0;k < 4;++k) { + d = 0; + for (j = 0;j < 3 && x[j] >= '0' && x[j] <= '9';++j) d = d * 10 + (x[j] - '0'); + if (j == 0) return 0; + x += j; + if (k >= 0 && k < 4) y[k] = d; + if (k < 3) { + if (*x != '.') return 0; + ++x; + } + } + if (*x) return 0; + return 1; +} + +int maxparse(long long *y,const char *x) +{ + long long d; + long long j; + + d = 0; + for (j = 0;j < 9 && x[j] >= '0' && x[j] <= '9';++j) d = d * 10 + (x[j] - '0'); + if (x[j]) return 0; + if (d < 1) return 0; + if (d > 65535) return 0; + *y = d; + return 1; +} + +/* cookies: */ +long long nextminute; +unsigned char minutekey[32]; +unsigned char lastminutekey[32]; + +/* routing to the server: */ +unsigned char serverip[4]; +unsigned char serverport[2]; +unsigned char serverextension[16]; +int udpfd = -1; + +/* server security: */ +char *keydir = 0; +unsigned char servername[256]; +unsigned char serverlongtermsk[32]; +unsigned char servershorttermpk[32]; +unsigned char servershorttermsk[32]; + +/* routing to the client: */ +unsigned char clientextension[16]; + +/* client security: */ +unsigned char clientlongtermpk[32]; +unsigned char clientshorttermpk[32]; + +/* shared secrets: */ +unsigned char clientshortserverlong[32]; +unsigned char clientshortservershort[32]; +unsigned char clientlongserverlong[32]; + +unsigned char allzero[128] = {0}; + +unsigned char nonce[24]; +unsigned char text[2048]; + +unsigned char packetip[4]; +unsigned char packetport[2]; +unsigned char packet[4096]; +crypto_uint64 packetnonce; + +#define MESSAGELEN 1104 + +struct activeclient { + unsigned char clientshorttermpk[32]; + unsigned char clientshortservershort[32]; + crypto_uint64 receivednonce; + crypto_uint64 sentnonce; + long long messagelen; + pid_t child; + int tochild; + int fromchild; + unsigned char clientextension[16]; + unsigned char clientip[4]; + unsigned char clientport[2]; + unsigned char message[MESSAGELEN]; +} ; + +const char *strmaxactiveclients = "100"; +long long maxactiveclients = 0; +long long numactiveclients = 0; +struct activeclient *activeclients = 0; +struct pollfd *p; + +int fdwd = -1; + +int pi0[2]; +int pi1[2]; + +unsigned char childbuf[4096]; +long long childbuflen = 0; +unsigned char childmessage[2048]; +long long childmessagelen = 0; + +int main(int argc,char **argv) +{ + long long r; + long long i; + long long k; + + signal(SIGPIPE,SIG_IGN); + signal(SIGCHLD,SIG_IGN); + + if (!argv[0]) die_usage(0); + for (;;) { + char *x; + if (!argv[1]) break; + if (argv[1][0] != '-') break; + x = *++argv; + if (x[0] == '-' && x[1] == 0) break; + if (x[0] == '-' && x[1] == '-' && x[2] == 0) break; + while (*++x) { + if (*x == 'q') { flagverbose = 0; continue; } + if (*x == 'Q') { flagverbose = 1; continue; } + if (*x == 'v') { if (flagverbose == 2) flagverbose = 3; else flagverbose = 2; continue; } + if (*x == 'c') { + if (x[1]) { strmaxactiveclients = x + 1; break; } + if (argv[1]) { strmaxactiveclients = *++argv; break; } + } + die_usage(0); + } + } + if (!maxparse(&maxactiveclients,strmaxactiveclients)) die_usage("concurrency must be between 1 and 65535"); + if (!nameparse(servername,*++argv)) die_usage("sname must be at most 255 bytes, at most 63 bytes between dots"); + keydir = *++argv; if (!keydir) die_usage("missing keydir"); + if (!ipparse(serverip,*++argv)) die_usage("ip must be an IPv4 address"); + if (!portparse(serverport,*++argv)) die_usage("port must be an integer between 0 and 65535"); + if (!hexparse(serverextension,16,*++argv)) die_usage("ext must be exactly 32 hex characters"); + if (!*++argv) die_usage("missing prog"); + + for (;;) { + r = open_read("/dev/null"); + if (r == -1) die_fatal("unable to open /dev/null",0,0); + if (r > 9) { close(r); break; } + } + + activeclients = malloc(maxactiveclients * sizeof(struct activeclient)); + if (!activeclients) die_fatal("unable to create activeclients array",0,0); + randombytes((void *) activeclients,maxactiveclients * sizeof(struct activeclient)); + for (i = 0;i < maxactiveclients;++i) { + activeclients[i].child = -1; + activeclients[i].tochild = -1; + activeclients[i].fromchild = -1; + activeclients[i].receivednonce = 0; + activeclients[i].sentnonce = randommod(281474976710656LL); + } + + p = malloc((1 + maxactiveclients) * sizeof(struct pollfd)); + if (!p) die_fatal("unable to create poll array",0,0); + + fdwd = open_cwd(); + if (fdwd == -1) die_fatal("unable to open current directory",0,0); + + if (chdir(keydir) == -1) die_fatal("unable to chdir to",keydir,0); + if (load(".expertsonly/secretkey",serverlongtermsk,sizeof serverlongtermsk) == -1) die_fatal("unable to read secret key from",keydir,0); + + udpfd = socket_udp(); + if (udpfd == -1) die_fatal("unable to create socket",0,0); + if (socket_bind(udpfd,serverip,serverport) == -1) die_fatal("unable to bind socket",0,0); + + randombytes(minutekey,sizeof minutekey); + randombytes(lastminutekey,sizeof lastminutekey); + nextminute = nanoseconds() + 60000000000ULL; + + for (;;) { + long long timeout = nextminute - nanoseconds(); + if (timeout <= 0) { + timeout = 60000000000ULL; + byte_copy(lastminutekey,sizeof lastminutekey,minutekey); + randombytes(minutekey,sizeof minutekey); + nextminute = nanoseconds() + timeout; + randombytes(packet,sizeof packet); + randombytes(packetip,sizeof packetip); + randombytes(packetport,sizeof packetport); + randombytes(clientshorttermpk,sizeof clientshorttermpk); + randombytes(clientshortserverlong,sizeof clientshortserverlong); + randombytes(nonce,sizeof nonce); + randombytes(text,sizeof text); + randombytes(childbuf,sizeof childbuf); + randombytes(childmessage,sizeof childmessage); + randombytes(servershorttermpk,sizeof servershorttermpk); + randombytes(servershorttermsk,sizeof servershorttermsk); + } + + for (i = 0;i < numactiveclients;++i) { + p[i].fd = activeclients[i].fromchild; + p[i].events = POLLIN; + } + p[numactiveclients].fd = udpfd; + p[numactiveclients].events = POLLIN; + if (poll(p,1 + numactiveclients,timeout / 1000000 + 1) < 0) continue; + + do { /* try receiving a packet: */ + if (!p[numactiveclients].revents) break; + r = socket_recv(udpfd,packet,sizeof packet,packetip,packetport); + if (r < 80) break; + if (r > 1184) break; + if (r & 15) break; + if (!(byte_isequal(packet,7,"QvnQ5Xl") & byte_isequal(packet + 8,16,serverextension))) break; + byte_copy(clientextension,16,packet + 24); + if (packet[7] == 'H') { /* Hello packet: */ + if (r != 224) break; + byte_copy(clientshorttermpk,32,packet + 40); + crypto_box_beforenm(clientshortserverlong,clientshorttermpk,serverlongtermsk); + byte_copy(nonce,16,"CurveCP-client-H"); + byte_copy(nonce + 16,8,packet + 136); + byte_zero(text,16); + byte_copy(text + 16,80,packet + 144); + if (crypto_box_open_afternm(text,text,96,nonce,clientshortserverlong)) break; + + /* send Cookie packet: */ + + crypto_box_keypair(servershorttermpk,servershorttermsk); + byte_zero(text + 64,32); + byte_copy(text + 96,32,clientshorttermpk); + byte_copy(text + 128,32,servershorttermsk); + byte_copy(nonce,8,"minute-k"); + if (safenonce(nonce + 8,1) == -1) die_fatal("nonce-generation disaster",0,0); + crypto_secretbox(text + 64,text + 64,96,nonce,minutekey); + byte_copy(text + 64,16,nonce + 8); + + byte_zero(text,32); + byte_copy(text + 32,32,servershorttermpk); + byte_copy(nonce,8,"CurveCPK"); /* reusing the other 16 bytes */ + crypto_box_afternm(text,text,160,nonce,clientshortserverlong); + + byte_copy(packet,8,"RL3aNMXK"); + byte_copy(packet + 8,16,clientextension); + byte_copy(packet + 24,16,serverextension); + byte_copy(packet + 40,16,nonce + 8); + byte_copy(packet + 56,144,text + 16); + + socket_send(udpfd,packet,200,packetip,packetport); + } + if (packet[7] == 'I') { /* Initiate packet: */ + if (r < 560) break; + for (i = 0;i < numactiveclients;++i) /* XXX use better data structure */ + if (byte_isequal(activeclients[i].clientshorttermpk,32,packet + 40)) + break; + if (i < numactiveclients) { + packetnonce = uint64_unpack(packet + 168); + if (packetnonce <= activeclients[i].receivednonce) break; + byte_copy(nonce,16,"CurveCP-client-I"); + byte_copy(nonce + 16,8,packet + 168); + byte_zero(text,16); + byte_copy(text + 16,r - 176,packet + 176); + if (crypto_box_open_afternm(text,text,r - 160,nonce,activeclients[i].clientshortservershort)) break; + + /* XXX: update clientip, clientextension; but not if client has spoken recently */ + activeclients[i].receivednonce = packetnonce; + text[383] = (r - 544) >> 4; + if (writeall(activeclients[i].tochild,text + 383,r - 543) == -1) + ; /* child is gone; will see eof later */ + break; + } + if (i == maxactiveclients) break; + + byte_copy(nonce,8,"minute-k"); + byte_copy(nonce + 8,16,packet + 72); + byte_zero(text,16); + byte_copy(text + 16,80,packet + 88); + if (crypto_secretbox_open(text,text,96,nonce,minutekey)) { + byte_zero(text,16); + byte_copy(text + 16,80,packet + 88); + if (crypto_secretbox_open(text,text,96,nonce,lastminutekey)) break; + } + if (!byte_isequal(packet + 40,32,text + 32)) break; + byte_copy(servershorttermsk,32,text + 64); + byte_copy(clientshorttermpk,32,packet + 40); + crypto_box_beforenm(clientshortservershort,clientshorttermpk,servershorttermsk); + + byte_copy(nonce,16,"CurveCP-client-I"); + byte_copy(nonce + 16,8,packet + 168); + byte_zero(text,16); + byte_copy(text + 16,r - 176,packet + 176); + if (crypto_box_open_afternm(text,text,r - 160,nonce,clientshortservershort)) break; + + if (!byte_isequal(text + 128,256,servername)) break; + + /* XXX skip if client authentication is not desired: */ + byte_copy(clientlongtermpk,32,text + 32); + /* XXX impose policy limitations on clients: known, maxconn */ + /* XXX for known clients, retrieve shared secret from cache: */ + crypto_box_beforenm(clientlongserverlong,clientlongtermpk,serverlongtermsk); + byte_copy(nonce,8,"CurveCPV"); + byte_copy(nonce + 8,16,text + 64); + byte_zero(text + 64,16); + if (crypto_box_open_afternm(text + 64,text + 64,64,nonce,clientlongserverlong)) break; + if (!byte_isequal(text + 96,32,clientshorttermpk)) break; + + if (open_pipe(pi0) == -1) break; /* XXX: error message */ + if (open_pipe(pi1) == -1) { close(pi0[0]); close(pi0[1]); break; } /* XXX: error message */ + + activeclients[i].child = fork(); + if (activeclients[i].child == -1) { + close(pi0[0]); close(pi0[1]); + close(pi1[0]); close(pi1[1]); + break; /* XXX: error message */ + } + if (activeclients[i].child == 0) { + if (fchdir(fdwd) == -1) die_fatal("unable to chdir to original directory",0,0); + close(8); + if (dup(pi0[0]) != 8) die_fatal("unable to dup",0,0); + close(9); + if (dup(pi1[1]) != 9) die_fatal("unable to dup",0,0); + /* XXX: set up environment variables */ + signal(SIGPIPE,SIG_DFL); + signal(SIGCHLD,SIG_DFL); + execvp(*argv,argv); + die_fatal("unable to run",*argv,0); + } + + activeclients[i].tochild = pi0[1]; close(pi0[0]); + activeclients[i].fromchild = pi1[0]; close(pi1[1]); + activeclients[i].messagelen = 0; + byte_copy(activeclients[i].clientshorttermpk,32,clientshorttermpk); + byte_copy(activeclients[i].clientshortservershort,32,clientshortservershort); + activeclients[i].receivednonce = uint64_unpack(packet + 168); + byte_copy(activeclients[i].clientextension,16,clientextension); + byte_copy(activeclients[i].clientip,4,packetip); + byte_copy(activeclients[i].clientport,2,packetport); + ++numactiveclients; + + text[383] = (r - 544) >> 4; + if (writeall(activeclients[i].tochild,text + 383,r - 543) == -1) + ; /* child is gone; will see eof later */ + } + if (packet[7] == 'M') { /* Message packet: */ + if (r < 112) break; + for (i = 0;i < numactiveclients;++i) /* XXX use better data structure */ + if (byte_isequal(activeclients[i].clientshorttermpk,32,packet + 40)) + break; + if (i < numactiveclients) { + packetnonce = uint64_unpack(packet + 72); + if (packetnonce <= activeclients[i].receivednonce) break; + byte_copy(nonce,16,"CurveCP-client-M"); + byte_copy(nonce + 16,8,packet + 72); + byte_zero(text,16); + byte_copy(text + 16,r - 80,packet + 80); + if (crypto_box_open_afternm(text,text,r - 64,nonce,activeclients[i].clientshortservershort)) break; + + /* XXX: update clientip, clientextension */ + activeclients[i].receivednonce = packetnonce; + text[31] = (r - 96) >> 4; + if (writeall(activeclients[i].tochild,text + 31,r - 95) == -1) + ; /* child is gone; will see eof later */ + break; + } + } + } while (0); + + for (i = numactiveclients - 1;i >= 0;--i) { + do { + if (!p[i].revents) break; + r = read(activeclients[i].fromchild,childbuf,sizeof childbuf); + if (r == -1) if (errno == EINTR || errno == EWOULDBLOCK || errno == EAGAIN) break; + if (r <= 0) goto endconnection; + childbuflen = r; + for (k = 0;k < childbuflen;++k) { + r = activeclients[i].messagelen; + if (r < 0) goto endconnection; + if (r >= MESSAGELEN) goto endconnection; + activeclients[i].message[r] = childbuf[k]; + if (r == 0) if (childbuf[k] & 128) goto endconnection; + activeclients[i].messagelen = r + 1; + if (r == 16 * (unsigned long long) activeclients[i].message[0]) { + if (r < 16) goto endconnection; + if (r > 1088) goto endconnection; + byte_copy(nonce,16,"CurveCP-server-M"); + uint64_pack(nonce + 16,++activeclients[i].sentnonce); + byte_zero(text,32); + byte_copy(text + 32,r,activeclients[i].message + 1); + crypto_box_afternm(text,text,r + 32,nonce,activeclients[i].clientshortservershort); + byte_copy(packet,8,"RL3aNMXM"); + byte_copy(packet + 8,16,clientextension); + byte_copy(packet + 24,16,serverextension); + byte_copy(packet + 40,8,nonce + 16); + byte_copy(packet + 48,r + 16,text + 16); + socket_send(udpfd,packet,r + 64,activeclients[i].clientip,activeclients[i].clientport); + activeclients[i].messagelen = 0; + } + } + break; + + endconnection: + + /* XXX: cache cookie if it's recent */ + close(activeclients[i].fromchild); activeclients[i].fromchild = -1; + close(activeclients[i].tochild); activeclients[i].tochild = -1; + --numactiveclients; + activeclients[i] = activeclients[numactiveclients]; + randombytes((void *) &activeclients[numactiveclients],sizeof(struct activeclient)); + } while (0); + } + } +} diff --git a/nacl/curvecp/die.c b/nacl/curvecp/die.c new file mode 100644 index 00000000..2220cf38 --- /dev/null +++ b/nacl/curvecp/die.c @@ -0,0 +1,42 @@ +#include +#include "writeall.h" +#include "die.h" + +void die_9(int e + ,const char *s0 + ,const char *s1 + ,const char *s2 + ,const char *s3 + ,const char *s4 + ,const char *s5 + ,const char *s6 + ,const char *s7 + ,const char *s8 +) +{ + const char *s[9]; + const char *x; + char buf[1024]; + int buflen = 0; + int i; + + s[0] = s0; + s[1] = s1; + s[2] = s2; + s[3] = s3; + s[4] = s4; + s[5] = s5; + s[6] = s6; + s[7] = s7; + s[8] = s8; + for (i = 0;i < 9;++i) { + x = s[i]; + if (!x) continue; + while (*x) { + if (buflen == sizeof buf) { writeall(2,buf,buflen); buflen = 0; } + buf[buflen++] = *x++; + } + } + writeall(2,buf,buflen); + _exit(e); +} diff --git a/nacl/curvecp/die.h b/nacl/curvecp/die.h new file mode 100644 index 00000000..52ec7616 --- /dev/null +++ b/nacl/curvecp/die.h @@ -0,0 +1,16 @@ +#ifndef DIE_H +#define DIE_H + +extern void die_9(int,const char *,const char *,const char *,const char *,const char *,const char *,const char *,const char *,const char *); + +#define die_8(x,a,b,c,d,e,f,g,h) die_9(x,a,b,c,d,e,f,g,h,0) +#define die_7(x,a,b,c,d,e,f,g) die_8(x,a,b,c,d,e,f,g,0) +#define die_6(x,a,b,c,d,e,f) die_7(x,a,b,c,d,e,f,0) +#define die_5(x,a,b,c,d,e) die_6(x,a,b,c,d,e,0) +#define die_4(x,a,b,c,d) die_5(x,a,b,c,d,0) +#define die_3(x,a,b,c) die_4(x,a,b,c,0) +#define die_2(x,a,b) die_3(x,a,b,0) +#define die_1(x,a) die_2(x,a,0) +#define die_0(x) die_1(x,0) + +#endif diff --git a/nacl/curvecp/e.c b/nacl/curvecp/e.c new file mode 100644 index 00000000..00ff7fd9 --- /dev/null +++ b/nacl/curvecp/e.c @@ -0,0 +1,106 @@ +#include "e.h" + +#define X(e,s) if (i == e) return s; + +const char *e_str(int i) +{ + X(0,"no error"); + X(EINTR,"interrupted system call") + X(ENOMEM,"out of memory") + X(ENOENT,"file does not exist") + X(ETXTBSY,"text busy") + X(EIO,"input/output error") + X(EEXIST,"file already exists") + X(ETIMEDOUT,"timed out") + X(EINPROGRESS,"operation in progress") + X(EAGAIN,"temporary failure") + X(EWOULDBLOCK,"input/output would block") + X(EPIPE,"broken pipe") + X(EPERM,"permission denied") + X(EACCES,"access denied") + X(ENODEV,"device not configured") + X(EPROTO,"protocol error") + X(EISDIR,"is a directory") + X(ESRCH,"no such process") + X(E2BIG,"argument list too long") + X(ENOEXEC,"exec format error") + X(EBADF,"file descriptor not open") + X(ECHILD,"no child processes") + X(EDEADLK,"operation would cause deadlock") + X(EFAULT,"bad address") + X(ENOTBLK,"not a block device") + X(EBUSY,"device busy") + X(EXDEV,"cross-device link") + X(ENODEV,"device does not support operation") + X(ENOTDIR,"not a directory") + X(EINVAL,"invalid argument") + X(ENFILE,"system cannot open more files") + X(EMFILE,"process cannot open more files") + X(ENOTTY,"not a tty") + X(EFBIG,"file too big") + X(ENOSPC,"out of disk space") + X(ESPIPE,"unseekable descriptor") + X(EROFS,"read-only file system") + X(EMLINK,"too many links") + X(EDOM,"input out of range") + X(ERANGE,"output out of range") + X(EALREADY,"operation already in progress") + X(ENOTSOCK,"not a socket") + X(EDESTADDRREQ,"destination address required") + X(EMSGSIZE,"message too long") + X(EPROTOTYPE,"incorrect protocol type") + X(ENOPROTOOPT,"protocol not available") + X(EPROTONOSUPPORT,"protocol not supported") + X(ESOCKTNOSUPPORT,"socket type not supported") + X(EOPNOTSUPP,"operation not supported") + X(EPFNOSUPPORT,"protocol family not supported") + X(EAFNOSUPPORT,"address family not supported") + X(EADDRINUSE,"address already used") + X(EADDRNOTAVAIL,"address not available") + X(ENETDOWN,"network down") + X(ENETUNREACH,"network unreachable") + X(ENETRESET,"network reset") + X(ECONNABORTED,"connection aborted") + X(ECONNRESET,"connection reset") + X(ENOBUFS,"out of buffer space") + X(EISCONN,"already connected") + X(ENOTCONN,"not connected") + X(ESHUTDOWN,"socket shut down") + X(ETOOMANYREFS,"too many references") + X(ECONNREFUSED,"connection refused") + X(ELOOP,"symbolic link loop") + X(ENAMETOOLONG,"file name too long") + X(EHOSTDOWN,"host down") + X(EHOSTUNREACH,"host unreachable") + X(ENOTEMPTY,"directory not empty") + X(EPROCLIM,"too many processes") + X(EUSERS,"too many users") + X(EDQUOT,"disk quota exceeded") + X(ESTALE,"stale NFS file handle") + X(EREMOTE,"too many levels of remote in path") + X(EBADRPC,"RPC structure is bad") + X(ERPCMISMATCH,"RPC version mismatch") + X(EPROGUNAVAIL,"RPC program unavailable") + X(EPROGMISMATCH,"program version mismatch") + X(EPROCUNAVAIL,"bad procedure for program") + X(ENOLCK,"no locks available") + X(ENOSYS,"system call not available") + X(EFTYPE,"bad file type") + X(EAUTH,"authentication error") + X(ENEEDAUTH,"not authenticated") + X(ENOSTR,"not a stream device") + X(ETIME,"timer expired") + X(ENOSR,"out of stream resources") + X(ENOMSG,"no message of desired type") + X(EBADMSG,"bad message type") + X(EIDRM,"identifier removed") + X(ENONET,"machine not on network") + X(EREMOTE,"object not local") + X(ENOLINK,"link severed") + X(EADV,"advertise error") + X(ESRMNT,"srmount error") + X(ECOMM,"communication error") + X(EMULTIHOP,"multihop attempted") + X(EREMCHG,"remote address changed") + return "unknown error"; +} diff --git a/nacl/curvecp/e.h b/nacl/curvecp/e.h new file mode 100644 index 00000000..add0768b --- /dev/null +++ b/nacl/curvecp/e.h @@ -0,0 +1,438 @@ +#ifndef E_H +#define E_H + +#include + +extern const char *e_str(int); + +#ifndef EPERM +#define EPERM (-5001) +#endif +#ifndef ENOENT +#define ENOENT (-5002) +#endif +#ifndef ESRCH +#define ESRCH (-5003) +#endif +#ifndef EINTR +#define EINTR (-5004) +#endif +#ifndef EIO +#define EIO (-5005) +#endif +#ifndef ENXIO +#define ENXIO (-5006) +#endif +#ifndef E2BIG +#define E2BIG (-5007) +#endif +#ifndef ENOEXEC +#define ENOEXEC (-5008) +#endif +#ifndef EBADF +#define EBADF (-5009) +#endif +#ifndef ECHILD +#define ECHILD (-5010) +#endif +#ifndef EAGAIN +#define EAGAIN (-5011) +#endif +#ifndef EWOULDBLOCK +#define EWOULDBLOCK (-7011) +#endif +#ifndef ENOMEM +#define ENOMEM (-5012) +#endif +#ifndef EACCES +#define EACCES (-5013) +#endif +#ifndef EFAULT +#define EFAULT (-5014) +#endif +#ifndef ENOTBLK +#define ENOTBLK (-5015) +#endif +#ifndef EBUSY +#define EBUSY (-5016) +#endif +#ifndef EEXIST +#define EEXIST (-5017) +#endif +#ifndef EXDEV +#define EXDEV (-5018) +#endif +#ifndef ENODEV +#define ENODEV (-5019) +#endif +#ifndef ENOTDIR +#define ENOTDIR (-5020) +#endif +#ifndef EISDIR +#define EISDIR (-5021) +#endif +#ifndef EINVAL +#define EINVAL (-5022) +#endif +#ifndef ENFILE +#define ENFILE (-5023) +#endif +#ifndef EMFILE +#define EMFILE (-5024) +#endif +#ifndef ENOTTY +#define ENOTTY (-5025) +#endif +#ifndef ETXTBSY +#define ETXTBSY (-5026) +#endif +#ifndef EFBIG +#define EFBIG (-5027) +#endif +#ifndef ENOSPC +#define ENOSPC (-5028) +#endif +#ifndef ESPIPE +#define ESPIPE (-5029) +#endif +#ifndef EROFS +#define EROFS (-5030) +#endif +#ifndef EMLINK +#define EMLINK (-5031) +#endif +#ifndef EPIPE +#define EPIPE (-5032) +#endif +#ifndef EDOM +#define EDOM (-5033) +#endif +#ifndef ERANGE +#define ERANGE (-5034) +#endif +#ifndef EDEADLK +#define EDEADLK (-5035) +#endif +#ifndef EDEADLOCK +#define EDEADLOCK (-7035) +#endif +#ifndef ENAMETOOLONG +#define ENAMETOOLONG (-5036) +#endif +#ifndef ENOLCK +#define ENOLCK (-5037) +#endif +#ifndef ENOSYS +#define ENOSYS (-5038) +#endif +#ifndef ENOTEMPTY +#define ENOTEMPTY (-5039) +#endif +#ifndef ELOOP +#define ELOOP (-5040) +#endif +#ifndef ENOMSG +#define ENOMSG (-5042) +#endif +#ifndef EIDRM +#define EIDRM (-5043) +#endif +#ifndef ECHRNG +#define ECHRNG (-5044) +#endif +#ifndef EL2NSYNC +#define EL2NSYNC (-5045) +#endif +#ifndef EL3HLT +#define EL3HLT (-5046) +#endif +#ifndef EL3RST +#define EL3RST (-5047) +#endif +#ifndef ELNRNG +#define ELNRNG (-5048) +#endif +#ifndef EUNATCH +#define EUNATCH (-5049) +#endif +#ifndef ENOCSI +#define ENOCSI (-5050) +#endif +#ifndef EL2HLT +#define EL2HLT (-5051) +#endif +#ifndef EBADE +#define EBADE (-5052) +#endif +#ifndef EBADR +#define EBADR (-5053) +#endif +#ifndef EXFULL +#define EXFULL (-5054) +#endif +#ifndef ENOANO +#define ENOANO (-5055) +#endif +#ifndef EBADRQC +#define EBADRQC (-5056) +#endif +#ifndef EBADSLT +#define EBADSLT (-5057) +#endif +#ifndef EBFONT +#define EBFONT (-5059) +#endif +#ifndef ENOSTR +#define ENOSTR (-5060) +#endif +#ifndef ENODATA +#define ENODATA (-5061) +#endif +#ifndef ETIME +#define ETIME (-5062) +#endif +#ifndef ENOSR +#define ENOSR (-5063) +#endif +#ifndef ENONET +#define ENONET (-5064) +#endif +#ifndef ENOPKG +#define ENOPKG (-5065) +#endif +#ifndef EREMOTE +#define EREMOTE (-5066) +#endif +#ifndef ENOLINK +#define ENOLINK (-5067) +#endif +#ifndef EADV +#define EADV (-5068) +#endif +#ifndef ESRMNT +#define ESRMNT (-5069) +#endif +#ifndef ECOMM +#define ECOMM (-5070) +#endif +#ifndef EPROTO +#define EPROTO (-5071) +#endif +#ifndef EMULTIHOP +#define EMULTIHOP (-5072) +#endif +#ifndef EDOTDOT +#define EDOTDOT (-5073) +#endif +#ifndef EBADMSG +#define EBADMSG (-5074) +#endif +#ifndef EOVERFLOW +#define EOVERFLOW (-5075) +#endif +#ifndef ENOTUNIQ +#define ENOTUNIQ (-5076) +#endif +#ifndef EBADFD +#define EBADFD (-5077) +#endif +#ifndef EREMCHG +#define EREMCHG (-5078) +#endif +#ifndef ELIBACC +#define ELIBACC (-5079) +#endif +#ifndef ELIBBAD +#define ELIBBAD (-5080) +#endif +#ifndef ELIBSCN +#define ELIBSCN (-5081) +#endif +#ifndef ELIBMAX +#define ELIBMAX (-5082) +#endif +#ifndef ELIBEXEC +#define ELIBEXEC (-5083) +#endif +#ifndef EILSEQ +#define EILSEQ (-5084) +#endif +#ifndef ERESTART +#define ERESTART (-5085) +#endif +#ifndef ESTRPIPE +#define ESTRPIPE (-5086) +#endif +#ifndef EUSERS +#define EUSERS (-5087) +#endif +#ifndef ENOTSOCK +#define ENOTSOCK (-5088) +#endif +#ifndef EDESTADDRREQ +#define EDESTADDRREQ (-5089) +#endif +#ifndef EMSGSIZE +#define EMSGSIZE (-5090) +#endif +#ifndef EPROTOTYPE +#define EPROTOTYPE (-5091) +#endif +#ifndef ENOPROTOOPT +#define ENOPROTOOPT (-5092) +#endif +#ifndef EPROTONOSUPPORT +#define EPROTONOSUPPORT (-5093) +#endif +#ifndef ESOCKTNOSUPPORT +#define ESOCKTNOSUPPORT (-5094) +#endif +#ifndef EOPNOTSUPP +#define EOPNOTSUPP (-5095) +#endif +#ifndef EPFNOSUPPORT +#define EPFNOSUPPORT (-5096) +#endif +#ifndef EAFNOSUPPORT +#define EAFNOSUPPORT (-5097) +#endif +#ifndef EADDRINUSE +#define EADDRINUSE (-5098) +#endif +#ifndef EADDRNOTAVAIL +#define EADDRNOTAVAIL (-5099) +#endif +#ifndef ENETDOWN +#define ENETDOWN (-5100) +#endif +#ifndef ENETUNREACH +#define ENETUNREACH (-5101) +#endif +#ifndef ENETRESET +#define ENETRESET (-5102) +#endif +#ifndef ECONNABORTED +#define ECONNABORTED (-5103) +#endif +#ifndef ECONNRESET +#define ECONNRESET (-5104) +#endif +#ifndef ENOBUFS +#define ENOBUFS (-5105) +#endif +#ifndef EISCONN +#define EISCONN (-5106) +#endif +#ifndef ENOTCONN +#define ENOTCONN (-5107) +#endif +#ifndef ESHUTDOWN +#define ESHUTDOWN (-5108) +#endif +#ifndef ETOOMANYREFS +#define ETOOMANYREFS (-5109) +#endif +#ifndef ETIMEDOUT +#define ETIMEDOUT (-5110) +#endif +#ifndef ECONNREFUSED +#define ECONNREFUSED (-5111) +#endif +#ifndef EHOSTDOWN +#define EHOSTDOWN (-5112) +#endif +#ifndef EHOSTUNREACH +#define EHOSTUNREACH (-5113) +#endif +#ifndef EALREADY +#define EALREADY (-5114) +#endif +#ifndef EINPROGRESS +#define EINPROGRESS (-5115) +#endif +#ifndef ESTALE +#define ESTALE (-5116) +#endif +#ifndef EUCLEAN +#define EUCLEAN (-5117) +#endif +#ifndef ENOTNAM +#define ENOTNAM (-5118) +#endif +#ifndef ENAVAIL +#define ENAVAIL (-5119) +#endif +#ifndef EISNAM +#define EISNAM (-5120) +#endif +#ifndef EREMOTEIO +#define EREMOTEIO (-5121) +#endif +#ifndef EDQUOT +#define EDQUOT (-5122) +#endif +#ifndef ENOMEDIUM +#define ENOMEDIUM (-5123) +#endif +#ifndef EMEDIUMTYPE +#define EMEDIUMTYPE (-5124) +#endif +#ifndef ECANCELED +#define ECANCELED (-5125) +#endif +#ifndef ENOKEY +#define ENOKEY (-5126) +#endif +#ifndef EKEYEXPIRED +#define EKEYEXPIRED (-5127) +#endif +#ifndef EKEYREVOKED +#define EKEYREVOKED (-5128) +#endif +#ifndef EKEYREJECTED +#define EKEYREJECTED (-5129) +#endif +#ifndef EOWNERDEAD +#define EOWNERDEAD (-5130) +#endif +#ifndef ENOTRECOVERABLE +#define ENOTRECOVERABLE (-5131) +#endif +#ifndef ERFKILL +#define ERFKILL (-5132) +#endif +#ifndef EPROCLIM +#define EPROCLIM (-6067) +#endif +#ifndef EBADRPC +#define EBADRPC (-6072) +#endif +#ifndef ERPCMISMATCH +#define ERPCMISMATCH (-6073) +#endif +#ifndef EPROGUNAVAIL +#define EPROGUNAVAIL (-6074) +#endif +#ifndef EPROGMISMATCH +#define EPROGMISMATCH (-6075) +#endif +#ifndef EPROCUNAVAIL +#define EPROCUNAVAIL (-6076) +#endif +#ifndef EFTYPE +#define EFTYPE (-6079) +#endif +#ifndef EAUTH +#define EAUTH (-6080) +#endif +#ifndef ENEEDAUTH +#define ENEEDAUTH (-6081) +#endif +#ifndef ENOATTR +#define ENOATTR (-6087) +#endif +#ifndef ENOTCAPABLE +#define ENOTCAPABLE (-6093) +#endif + +#endif diff --git a/nacl/curvecp/hexparse.c b/nacl/curvecp/hexparse.c new file mode 100644 index 00000000..43bfe044 --- /dev/null +++ b/nacl/curvecp/hexparse.c @@ -0,0 +1,25 @@ +#include "hexparse.h" + +static int hexdigit(char x) +{ + if (x >= '0' && x <= '9') return x - '0'; + if (x >= 'a' && x <= 'f') return 10 + (x - 'a'); + if (x >= 'A' && x <= 'F') return 10 + (x - 'A'); + return -1; +} + +int hexparse(unsigned char *y,long long len,const char *x) +{ + if (!x) return 0; + while (len > 0) { + int digit0; + int digit1; + digit0 = hexdigit(x[0]); if (digit0 == -1) return 0; + digit1 = hexdigit(x[1]); if (digit1 == -1) return 0; + *y++ = digit1 + 16 * digit0; + --len; + x += 2; + } + if (x[0]) return 0; + return 1; +} diff --git a/nacl/curvecp/hexparse.h b/nacl/curvecp/hexparse.h new file mode 100644 index 00000000..4e88e187 --- /dev/null +++ b/nacl/curvecp/hexparse.h @@ -0,0 +1,6 @@ +#ifndef HEXPARSE_H +#define HEXPARSE_H + +extern int hexparse(unsigned char *,long long,const char *); + +#endif diff --git a/nacl/curvecp/load.c b/nacl/curvecp/load.c new file mode 100644 index 00000000..0cd4e43d --- /dev/null +++ b/nacl/curvecp/load.c @@ -0,0 +1,33 @@ +#include +#include "open.h" +#include "e.h" +#include "load.h" + +static int readall(int fd,void *x,long long xlen) +{ + long long r; + while (xlen > 0) { + r = xlen; + if (r > 1048576) r = 1048576; + r = read(fd,x,r); + if (r == 0) errno = EPROTO; + if (r <= 0) { + if (errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK) continue; + return -1; + } + x += r; + xlen -= r; + } + return 0; +} + +int load(const char *fn,void *x,long long xlen) +{ + int fd; + int r; + fd = open_read(fn); + if (fd == -1) return -1; + r = readall(fd,x,xlen); + close(fd); + return r; +} diff --git a/nacl/curvecp/load.h b/nacl/curvecp/load.h new file mode 100644 index 00000000..9ff1ab2c --- /dev/null +++ b/nacl/curvecp/load.h @@ -0,0 +1,6 @@ +#ifndef LOAD_H +#define LOAD_H + +extern int load(const char *,void *,long long); + +#endif diff --git a/nacl/curvecp/nameparse.c b/nacl/curvecp/nameparse.c new file mode 100644 index 00000000..f6386d73 --- /dev/null +++ b/nacl/curvecp/nameparse.c @@ -0,0 +1,19 @@ +#include "nameparse.h" + +int nameparse(unsigned char *s,const char *x) +{ + long long pos; + long long j; + if (!x) return 0; + for (pos = 0;pos < 256;++pos) s[pos] = 0; + pos = 0; + while (*x) { + if (*x == '.') { ++x; continue; } + for (j = 0;x[j];++j) if (x[j] == '.') break; + if (j > 63) return 0; + if (pos < 0 || pos >= 256) return 0; s[pos++] = j; + while (j > 0) { if (pos < 0 || pos >= 256) return 0; s[pos++] = *x++; --j; } + } + if (pos < 0 || pos >= 256) return 0; s[pos++] = 0; + return 1; +} diff --git a/nacl/curvecp/nameparse.h b/nacl/curvecp/nameparse.h new file mode 100644 index 00000000..97c56e8b --- /dev/null +++ b/nacl/curvecp/nameparse.h @@ -0,0 +1,6 @@ +#ifndef NAMEPARSE_H +#define NAMEPARSE_H + +extern int nameparse(unsigned char *,const char *); + +#endif diff --git a/nacl/curvecp/nanoseconds.c b/nacl/curvecp/nanoseconds.c new file mode 100644 index 00000000..158ff402 --- /dev/null +++ b/nacl/curvecp/nanoseconds.c @@ -0,0 +1,12 @@ +#include +#include "nanoseconds.h" + +/* XXX: Y2036 problems; should upgrade to a 128-bit type for this */ +/* XXX: nanosecond granularity limits users to 1 terabyte per second */ + +long long nanoseconds(void) +{ + struct timespec t; + if (clock_gettime(CLOCK_REALTIME,&t) != 0) return -1; + return t.tv_sec * 1000000000LL + t.tv_nsec; +} diff --git a/nacl/curvecp/nanoseconds.h b/nacl/curvecp/nanoseconds.h new file mode 100644 index 00000000..eb72ec0f --- /dev/null +++ b/nacl/curvecp/nanoseconds.h @@ -0,0 +1,6 @@ +#ifndef NANOSECONDS_H +#define NANOSECONDS_H + +extern long long nanoseconds(void); + +#endif diff --git a/nacl/curvecp/open.h b/nacl/curvecp/open.h new file mode 100644 index 00000000..a6ef9ec4 --- /dev/null +++ b/nacl/curvecp/open.h @@ -0,0 +1,10 @@ +#ifndef OPEN_H +#define OPEN_H + +extern int open_read(const char *); +extern int open_write(const char *); +extern int open_lock(const char *); +extern int open_cwd(void); +extern int open_pipe(int *); + +#endif diff --git a/nacl/curvecp/open_cwd.c b/nacl/curvecp/open_cwd.c new file mode 100644 index 00000000..65d53bcd --- /dev/null +++ b/nacl/curvecp/open_cwd.c @@ -0,0 +1,6 @@ +#include "open.h" + +int open_cwd(void) +{ + return open_read("."); +} diff --git a/nacl/curvecp/open_lock.c b/nacl/curvecp/open_lock.c new file mode 100644 index 00000000..898f3b60 --- /dev/null +++ b/nacl/curvecp/open_lock.c @@ -0,0 +1,19 @@ +#include +#include +#include +#include +#include "open.h" + +int open_lock(const char *fn) +{ +#ifdef O_CLOEXEC + int fd = open(fn,O_RDWR | O_CLOEXEC); + if (fd == -1) return -1; +#else + int fd = open(fn,O_RDWR); + if (fd == -1) return -1; + fcntl(fd,F_SETFD,1); +#endif + if (lockf(fd,F_LOCK,0) == -1) { close(fd); return -1; } + return fd; +} diff --git a/nacl/curvecp/open_pipe.c b/nacl/curvecp/open_pipe.c new file mode 100644 index 00000000..2fc2b1af --- /dev/null +++ b/nacl/curvecp/open_pipe.c @@ -0,0 +1,15 @@ +#include +#include +#include "open.h" +#include "blocking.h" + +int open_pipe(int *fd) +{ + int i; + if (pipe(fd) == -1) return -1; + for (i = 0;i < 2;++i) { + fcntl(fd[i],F_SETFD,1); + blocking_disable(fd[i]); + } + return 0; +} diff --git a/nacl/curvecp/open_read.c b/nacl/curvecp/open_read.c new file mode 100644 index 00000000..cea667b5 --- /dev/null +++ b/nacl/curvecp/open_read.c @@ -0,0 +1,17 @@ +#include +#include +#include +#include +#include "open.h" + +int open_read(const char *fn) +{ +#ifdef O_CLOEXEC + return open(fn,O_RDONLY | O_NONBLOCK | O_CLOEXEC); +#else + int fd = open(fn,O_RDONLY | O_NONBLOCK); + if (fd == -1) return -1; + fcntl(fd,F_SETFD,1); + return fd; +#endif +} diff --git a/nacl/curvecp/open_write.c b/nacl/curvecp/open_write.c new file mode 100644 index 00000000..e23752d1 --- /dev/null +++ b/nacl/curvecp/open_write.c @@ -0,0 +1,17 @@ +#include +#include +#include +#include +#include "open.h" + +int open_write(const char *fn) +{ +#ifdef O_CLOEXEC + return open(fn,O_CREAT | O_WRONLY | O_NONBLOCK | O_CLOEXEC,0644); +#else + int fd = open(fn,O_CREAT | O_WRONLY | O_NONBLOCK,0644); + if (fd == -1) return -1; + fcntl(fd,F_SETFD,1); + return fd; +#endif +} diff --git a/nacl/curvecp/portparse.c b/nacl/curvecp/portparse.c new file mode 100644 index 00000000..37e4caca --- /dev/null +++ b/nacl/curvecp/portparse.c @@ -0,0 +1,14 @@ +#include "portparse.h" + +int portparse(unsigned char *y,const char *x) +{ + long long d = 0; + long long j; + for (j = 0;j < 5 && x[j] >= '0' && x[j] <= '9';++j) + d = d * 10 + (x[j] - '0'); + if (j == 0) return 0; + if (x[j]) return 0; + y[0] = d >> 8; + y[1] = d; + return 1; +} diff --git a/nacl/curvecp/portparse.h b/nacl/curvecp/portparse.h new file mode 100644 index 00000000..99a17748 --- /dev/null +++ b/nacl/curvecp/portparse.h @@ -0,0 +1,6 @@ +#ifndef PORTPARSE_H +#define PORTPARSE_H + +extern int portparse(unsigned char *,const char *); + +#endif diff --git a/nacl/curvecp/randommod.c b/nacl/curvecp/randommod.c new file mode 100644 index 00000000..575a627b --- /dev/null +++ b/nacl/curvecp/randommod.c @@ -0,0 +1,14 @@ +#include "randombytes.h" + +/* XXX: current implementation is limited to n<2^55 */ + +long long randommod(long long n) +{ + long long result = 0; + long long j; + unsigned char r[32]; + if (n <= 1) return 0; + randombytes(r,32); + for (j = 0;j < 32;++j) result = (result * 256 + (unsigned long long) r[j]) % n; + return result; +} diff --git a/nacl/curvecp/randommod.h b/nacl/curvecp/randommod.h new file mode 100644 index 00000000..2b8405d6 --- /dev/null +++ b/nacl/curvecp/randommod.h @@ -0,0 +1,6 @@ +#ifndef RANDOMMOD_H +#define RANDOMMOD_H + +extern long long randommod(long long); + +#endif diff --git a/nacl/curvecp/safenonce.c b/nacl/curvecp/safenonce.c new file mode 100644 index 00000000..cfcabcd2 --- /dev/null +++ b/nacl/curvecp/safenonce.c @@ -0,0 +1,74 @@ +#include +#include +#include +#include +#include "crypto_uint64.h" +#include "uint64_pack.h" +#include "uint64_unpack.h" +#include "savesync.h" +#include "open.h" +#include "load.h" +#include "randombytes.h" +#include "safenonce.h" + +#include "crypto_block.h" +#if crypto_block_BYTES != 16 +error! +#endif +#if crypto_block_KEYBYTES != 32 +error! +#endif + +/* +Output: 128-bit nonce y[0],...,y[15]. +Reads and writes existing 8-byte file ".expertsonly/noncecounter", +locked via existing 1-byte file ".expertsonly/lock". +Also reads existing 32-byte file ".expertsonly/noncekey". +Not thread-safe. + +Invariants: +This process is free to use counters that are >=counterlow and =counterhigh. + +XXX: should rewrite file in background, rather than briefly pausing +*/ + +static crypto_uint64 counterlow = 0; +static crypto_uint64 counterhigh = 0; + +static unsigned char flagkeyloaded = 0; +static unsigned char noncekey[32]; +static unsigned char data[16]; + +int safenonce(unsigned char *y,int flaglongterm) +{ + if (!flagkeyloaded) { + int fdlock; + fdlock = open_lock(".expertsonly/lock"); + if (fdlock == -1) return -1; + if (load(".expertsonly/noncekey",noncekey,sizeof noncekey) == -1) { close(fdlock); return -1; } + close(fdlock); + flagkeyloaded = 1; + } + + if (counterlow >= counterhigh) { + int fdlock; + fdlock = open_lock(".expertsonly/lock"); + if (fdlock == -1) return -1; + if (load(".expertsonly/noncecounter",data,8) == -1) { close(fdlock); return -1; } + counterlow = uint64_unpack(data); + if (flaglongterm) + counterhigh = counterlow + 1048576; + else + counterhigh = counterlow + 1; + uint64_pack(data,counterhigh); + if (savesync(".expertsonly/noncecounter",data,8) == -1) { close(fdlock); return -1; } + close(fdlock); + } + + randombytes(data + 8,8); + uint64_pack(data,counterlow++); + crypto_block(y,data,noncekey); + + return 0; +} diff --git a/nacl/curvecp/safenonce.h b/nacl/curvecp/safenonce.h new file mode 100644 index 00000000..c01271aa --- /dev/null +++ b/nacl/curvecp/safenonce.h @@ -0,0 +1,6 @@ +#ifndef SAFENONCE_H +#define SAFENONCE_H + +extern int safenonce(unsigned char *,int); + +#endif diff --git a/nacl/curvecp/savesync.c b/nacl/curvecp/savesync.c new file mode 100644 index 00000000..73074a4b --- /dev/null +++ b/nacl/curvecp/savesync.c @@ -0,0 +1,24 @@ +#include +#include +#include +#include +#include "open.h" +#include "savesync.h" +#include "writeall.h" + +static int writesync(int fd,const void *x,long long xlen) +{ + if (writeall(fd,x,xlen) == -1) return -1; + return fsync(fd); +} + +int savesync(const char *fn,const void *x,long long xlen) +{ + int fd; + int r; + fd = open_write(fn); + if (fd == -1) return -1; + r = writesync(fd,x,xlen); + close(fd); + return r; +} diff --git a/nacl/curvecp/savesync.h b/nacl/curvecp/savesync.h new file mode 100644 index 00000000..4c0cd3d2 --- /dev/null +++ b/nacl/curvecp/savesync.h @@ -0,0 +1,6 @@ +#ifndef SAVESYNC_H +#define SAVESYNC_H + +extern int savesync(const char *,const void *,long long); + +#endif diff --git a/nacl/curvecp/socket.h b/nacl/curvecp/socket.h new file mode 100644 index 00000000..9fab01c7 --- /dev/null +++ b/nacl/curvecp/socket.h @@ -0,0 +1,9 @@ +#ifndef SOCKET_H +#define SOCKET_H + +extern int socket_udp(void); +extern int socket_bind(int,const unsigned char *,const unsigned char *); +extern int socket_send(int,const unsigned char *,long long,const unsigned char *,const unsigned char *); +extern long long socket_recv(int,unsigned char *,long long,unsigned char *,unsigned char *); + +#endif diff --git a/nacl/curvecp/socket_bind.c b/nacl/curvecp/socket_bind.c new file mode 100644 index 00000000..9e36925d --- /dev/null +++ b/nacl/curvecp/socket_bind.c @@ -0,0 +1,15 @@ +#include +#include +#include +#include +#include "socket.h" +#include "byte.h" + +int socket_bind(int fd,const unsigned char *ip,const unsigned char *port) +{ + struct sockaddr_in sa; + byte_zero(&sa,sizeof sa); + byte_copy(&sa.sin_addr,4,ip); + byte_copy(&sa.sin_port,2,port); + return bind(fd,(struct sockaddr *) &sa,sizeof sa); +} diff --git a/nacl/curvecp/socket_recv.c b/nacl/curvecp/socket_recv.c new file mode 100644 index 00000000..8b266ba2 --- /dev/null +++ b/nacl/curvecp/socket_recv.c @@ -0,0 +1,23 @@ +#include +#include +#include +#include +#include "socket.h" +#include "byte.h" + +long long socket_recv(int fd,unsigned char *x,long long xlen,unsigned char *ip,unsigned char *port) +{ + struct sockaddr_in sa; + socklen_t salen; + int r; + + if (xlen < 0) { errno = EPROTO; return -1; } + if (xlen > 1048576) xlen = 1048576; + + byte_zero(&sa,sizeof sa); + salen = sizeof sa; + r = recvfrom(fd,x,xlen,0,(struct sockaddr *) &sa,&salen); + byte_copy(ip,4,&sa.sin_addr); + byte_copy(port,2,&sa.sin_port); + return r; +} diff --git a/nacl/curvecp/socket_send.c b/nacl/curvecp/socket_send.c new file mode 100644 index 00000000..1521384c --- /dev/null +++ b/nacl/curvecp/socket_send.c @@ -0,0 +1,19 @@ +#include +#include +#include +#include +#include "socket.h" +#include "byte.h" + +int socket_send(int fd,const unsigned char *x,long long xlen,const unsigned char *ip,const unsigned char *port) +{ + struct sockaddr_in sa; + + if (xlen < 0 || xlen > 1048576) { errno = EPROTO; return -1; } + + byte_zero(&sa,sizeof sa); + sa.sin_family = AF_INET; + byte_copy(&sa.sin_addr,4,ip); + byte_copy(&sa.sin_port,2,port); + return sendto(fd,x,xlen,0,(struct sockaddr *) &sa,sizeof sa); +} diff --git a/nacl/curvecp/socket_udp.c b/nacl/curvecp/socket_udp.c new file mode 100644 index 00000000..f64762f1 --- /dev/null +++ b/nacl/curvecp/socket_udp.c @@ -0,0 +1,36 @@ +#include +#include +#include +#include +#include +#include "socket.h" +#include "blocking.h" + +static void enable_bsd_fragmentation(int fd) +{ +#ifdef IP_DONTFRAG + const int x = 0; + setsockopt(fd,SOL_IP,IP_DONTFRAG,&x,sizeof x); +#endif +} + +static void enable_linux_fragmentation(int fd) +{ +#ifdef IP_MTU_DISCOVER +#ifdef IP_PMTUDISC_DONT + const int x = IP_PMTUDISC_DONT; + setsockopt(fd,SOL_IP,IP_MTU_DISCOVER,&x,sizeof x); +#endif +#endif +} + +int socket_udp(void) +{ + int fd = socket(PF_INET,SOCK_DGRAM,0); + if (fd == -1) return -1; + fcntl(fd,F_SETFD,1); + blocking_disable(fd); + enable_bsd_fragmentation(fd); + enable_linux_fragmentation(fd); + return fd; +} diff --git a/nacl/curvecp/uint16_pack.c b/nacl/curvecp/uint16_pack.c new file mode 100644 index 00000000..f3761035 --- /dev/null +++ b/nacl/curvecp/uint16_pack.c @@ -0,0 +1,7 @@ +#include "uint16_pack.h" + +void uint16_pack(unsigned char *y,crypto_uint16 x) +{ + *y++ = x; x >>= 8; + *y++ = x; x >>= 8; +} diff --git a/nacl/curvecp/uint16_pack.h b/nacl/curvecp/uint16_pack.h new file mode 100644 index 00000000..6c5b65e1 --- /dev/null +++ b/nacl/curvecp/uint16_pack.h @@ -0,0 +1,8 @@ +#ifndef UINT16_PACK_H +#define UINT16_PACK_H + +#include "crypto_uint16.h" + +extern void uint16_pack(unsigned char *,crypto_uint16); + +#endif diff --git a/nacl/curvecp/uint16_unpack.c b/nacl/curvecp/uint16_unpack.c new file mode 100644 index 00000000..b4e74ee4 --- /dev/null +++ b/nacl/curvecp/uint16_unpack.c @@ -0,0 +1,9 @@ +#include "uint16_unpack.h" + +crypto_uint16 uint16_unpack(const unsigned char *x) +{ + crypto_uint16 result; + result = x[1]; + result <<= 8; result |= x[0]; + return result; +} diff --git a/nacl/curvecp/uint16_unpack.h b/nacl/curvecp/uint16_unpack.h new file mode 100644 index 00000000..3e3aedfc --- /dev/null +++ b/nacl/curvecp/uint16_unpack.h @@ -0,0 +1,8 @@ +#ifndef UINT16_UNPACK_H +#define UINT16_UNPACK_H + +#include "crypto_uint16.h" + +extern crypto_uint16 uint16_unpack(const unsigned char *); + +#endif diff --git a/nacl/curvecp/uint32_pack.c b/nacl/curvecp/uint32_pack.c new file mode 100644 index 00000000..d54fe542 --- /dev/null +++ b/nacl/curvecp/uint32_pack.c @@ -0,0 +1,9 @@ +#include "uint32_pack.h" + +void uint32_pack(unsigned char *y,crypto_uint32 x) +{ + *y++ = x; x >>= 8; + *y++ = x; x >>= 8; + *y++ = x; x >>= 8; + *y++ = x; x >>= 8; +} diff --git a/nacl/curvecp/uint32_pack.h b/nacl/curvecp/uint32_pack.h new file mode 100644 index 00000000..efdf7919 --- /dev/null +++ b/nacl/curvecp/uint32_pack.h @@ -0,0 +1,8 @@ +#ifndef UINT32_PACK_H +#define UINT32_PACK_H + +#include "crypto_uint32.h" + +extern void uint32_pack(unsigned char *,crypto_uint32); + +#endif diff --git a/nacl/curvecp/uint32_unpack.c b/nacl/curvecp/uint32_unpack.c new file mode 100644 index 00000000..adde6987 --- /dev/null +++ b/nacl/curvecp/uint32_unpack.c @@ -0,0 +1,11 @@ +#include "uint32_unpack.h" + +crypto_uint32 uint32_unpack(const unsigned char *x) +{ + crypto_uint32 result; + result = x[3]; + result <<= 8; result |= x[2]; + result <<= 8; result |= x[1]; + result <<= 8; result |= x[0]; + return result; +} diff --git a/nacl/curvecp/uint32_unpack.h b/nacl/curvecp/uint32_unpack.h new file mode 100644 index 00000000..dd65f365 --- /dev/null +++ b/nacl/curvecp/uint32_unpack.h @@ -0,0 +1,8 @@ +#ifndef UINT32_UNPACK_H +#define UINT32_UNPACK_H + +#include "crypto_uint32.h" + +extern crypto_uint32 uint32_unpack(const unsigned char *); + +#endif diff --git a/nacl/curvecp/uint64_pack.c b/nacl/curvecp/uint64_pack.c new file mode 100644 index 00000000..898a80a3 --- /dev/null +++ b/nacl/curvecp/uint64_pack.c @@ -0,0 +1,13 @@ +#include "uint64_pack.h" + +void uint64_pack(unsigned char *y,crypto_uint64 x) +{ + *y++ = x; x >>= 8; + *y++ = x; x >>= 8; + *y++ = x; x >>= 8; + *y++ = x; x >>= 8; + *y++ = x; x >>= 8; + *y++ = x; x >>= 8; + *y++ = x; x >>= 8; + *y++ = x; x >>= 8; +} diff --git a/nacl/curvecp/uint64_pack.h b/nacl/curvecp/uint64_pack.h new file mode 100644 index 00000000..be8330fd --- /dev/null +++ b/nacl/curvecp/uint64_pack.h @@ -0,0 +1,8 @@ +#ifndef UINT64_PACK_H +#define UINT64_PACK_H + +#include "crypto_uint64.h" + +extern void uint64_pack(unsigned char *,crypto_uint64); + +#endif diff --git a/nacl/curvecp/uint64_unpack.c b/nacl/curvecp/uint64_unpack.c new file mode 100644 index 00000000..2d69bf72 --- /dev/null +++ b/nacl/curvecp/uint64_unpack.c @@ -0,0 +1,15 @@ +#include "uint64_unpack.h" + +crypto_uint64 uint64_unpack(const unsigned char *x) +{ + crypto_uint64 result; + result = x[7]; + result <<= 8; result |= x[6]; + result <<= 8; result |= x[5]; + result <<= 8; result |= x[4]; + result <<= 8; result |= x[3]; + result <<= 8; result |= x[2]; + result <<= 8; result |= x[1]; + result <<= 8; result |= x[0]; + return result; +} diff --git a/nacl/curvecp/uint64_unpack.h b/nacl/curvecp/uint64_unpack.h new file mode 100644 index 00000000..f40e7a8a --- /dev/null +++ b/nacl/curvecp/uint64_unpack.h @@ -0,0 +1,8 @@ +#ifndef UINT64_UNPACK_H +#define UINT64_UNPACK_H + +#include "crypto_uint64.h" + +extern crypto_uint64 uint64_unpack(const unsigned char *); + +#endif diff --git a/nacl/curvecp/writeall.c b/nacl/curvecp/writeall.c new file mode 100644 index 00000000..58f93011 --- /dev/null +++ b/nacl/curvecp/writeall.c @@ -0,0 +1,27 @@ +#include +#include +#include "e.h" +#include "writeall.h" + +int writeall(int fd,const void *x,long long xlen) +{ + long long w; + while (xlen > 0) { + w = xlen; + if (w > 1048576) w = 1048576; + w = write(fd,x,w); + if (w < 0) { + if (errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK) { + struct pollfd p; + p.fd = fd; + p.events = POLLOUT | POLLERR; + poll(&p,1,-1); + continue; + } + return -1; + } + x += w; + xlen -= w; + } + return 0; +} diff --git a/nacl/curvecp/writeall.h b/nacl/curvecp/writeall.h new file mode 100644 index 00000000..92341236 --- /dev/null +++ b/nacl/curvecp/writeall.h @@ -0,0 +1,6 @@ +#ifndef WRITEALL_H +#define WRITEALL_H + +extern int writeall(int,const void *,long long); + +#endif diff --git a/nacl/do b/nacl/do new file mode 100755 index 00000000..f953508d --- /dev/null +++ b/nacl/do @@ -0,0 +1,468 @@ +#!/bin/sh + +# nacl/do +# D. J. Bernstein +# Public domain. + +version=`cat version` +project=nacl +shorthostname=`hostname | sed 's/\..*//' | tr -cd '[a-z][A-Z][0-9]'` + +top="`pwd`/build/$shorthostname" +bin="$top/bin" +lib="$top/lib" +include="$top/include" +work="$top/work" + +PATH="/usr/local/bin:$PATH" +PATH="/usr/sfw/bin:$PATH" +PATH="$bin:$PATH" +export PATH + +LD_LIBRARY_PATH="/usr/local/lib/sparcv9:/usr/local/lib:$LD_LIBRARY_PATH" +LD_LIBRARY_PATH="/usr/sfw/lib/sparcv9:/usr/sfw/lib:$LD_LIBRARY_PATH" +export LD_LIBRARY_PATH + +# and wacky MacOS X +DYLD_LIBRARY_PATH="/usr/local/lib/sparcv9:/usr/local/lib:$DYLD_LIBRARY_PATH" +DYLD_LIBRARY_PATH="/usr/sfw/lib/sparcv9:/usr/sfw/lib:$DYLD_LIBRARY_PATH" +export DYLD_LIBRARY_PATH + +# and work around bug in GNU sort +LANG=C +export LANG + +rm -rf "$top" +mkdir -p "$top" +mkdir -p "$bin" +mkdir -p "$lib" +mkdir -p "$include" + +exec >"$top/log" +exec 2>&1 +exec 5>"$top/data" +exec "$work/${project}_base.c" + okc-$abi \ + | while read compiler + do + ( cd "$work" && $compiler -c ${project}_base.c ) && break + done + okar-$abi cr "$lib/$abi/lib${project}.a" "$work/${project}_base.o" + ( ranlib "$lib/$abi/lib${project}.a" || exit 0 ) +done + +# loop over operations +cat OPERATIONS \ +| while read o +do + [ -d "$o" ] || continue + + selected='' + [ -f "$o/selected" ] && selected=`cat "$o/selected"` + + # for each operation, loop over primitives + ls "$o" \ + | sort \ + | while read p + do + [ -d "$o/$p" ] || continue + expectedchecksum='' + [ -f "$o/$p/checksum" ] && expectedchecksum=`cat "$o/$p/checksum"` + op="${o}_${p}" + + startdate=`date +%Y%m%d` + + # for each operation primitive, loop over abis + okabi \ + | while read abi + do + echo "=== `date` === $abi $o/$p" + libs=`"oklibs-$abi"` + libs="$lib/$abi/cpucycles.o $libs" + [ -f "$lib/$abi/lib${project}.a" ] && libs="$lib/$abi/lib${project}.a $libs" + + rm -rf "$work" + mkdir -p "$work" + mkdir -p "$work/best" + + # for each operation primitive abi, loop over implementations + find "$o/$p" -follow -name "api.h" \ + | sort \ + | while read doth + do + implementationdir=`dirname $doth` + opi=`echo "$implementationdir" | tr ./- ___` + + echo "=== `date` === $abi $implementationdir" + + rm -rf "$work/compile" + mkdir -p "$work/compile" + + cfiles=`ls "$implementationdir" | grep '\.c$' || :` + sfiles=`ls "$implementationdir" | grep '\.[sS]$' || :` + cppfiles=`ls "$o" | grep '\.cpp$' || :` + + cp -p "$o"/*.c "$work/compile/" + cp -p "$o"/*.cpp "$work/compile/" + + cp -pr "$implementationdir"/* "$work/compile" + + cp -p "try-anything.c" "$work/compile/try-anything.c" + cp -p "measure-anything.c" "$work/compile/measure-anything.c" + + cp -p MACROS "$work/compile/MACROS" + cp -p PROTOTYPES.c "$work/compile/PROTOTYPES.c" + cp -p PROTOTYPES.cpp "$work/compile/PROTOTYPES.cpp" + + ( + cd "$work/compile" + ( + echo "#ifndef ${o}_H" + echo "#define ${o}_H" + echo "" + echo "#include \"${op}.h\"" + echo "" + egrep "${o}"'$|'"${o}"'\(|'"${o}"'_' < MACROS \ + | sed "s/$o/$op/" | while read mop + do + echo "#define ${mop} ${mop}" | sed "s/$op/$o/" + done + echo "#define ${o}_PRIMITIVE \"${p}\"" + echo "#define ${o}_IMPLEMENTATION ${op}_IMPLEMENTATION" + echo "#define ${o}_VERSION ${op}_VERSION" + echo "" + echo "#endif" + ) > "$o.h" + ( + echo "#ifndef ${op}_H" + echo "#define ${op}_H" + echo "" + sed 's/[ ]CRYPTO_/ '"${opi}"'_/g' < api.h + echo '#ifdef __cplusplus' + echo '#include ' + egrep "${o}"'$|'"${o}"'\(|'"${o}"'_' < PROTOTYPES.cpp \ + | sed "s/$o/$opi/" + echo 'extern "C" {' + echo '#endif' + egrep "${o}"'$|'"${o}"'\(|'"${o}"'_' < PROTOTYPES.c \ + | sed "s/$o/$opi/" + echo '#ifdef __cplusplus' + echo '}' + echo '#endif' + echo "" + egrep "${o}"'$|'"${o}"'\(|'"${o}"'_' < MACROS \ + | sed "s/$o/$opi/" | while read mopi + do + echo "#define ${mopi} ${mopi}" | sed "s/$opi/$op/" + done + echo "#define ${op}_IMPLEMENTATION \"${implementationdir}\"" + echo "#ifndef ${opi}_VERSION" + echo "#define ${opi}_VERSION \"-\"" + echo "#endif" + echo "#define ${op}_VERSION ${opi}_VERSION" + echo "" + echo "#endif" + ) > "$op.h" + + okc-$abi \ + | while read compiler + do + echo "=== `date` === $abi $implementationdir $compiler" + compilerword=`echo "$compiler" | tr ' ' '_'` + ok=1 + for f in $cfiles $sfiles + do + if [ "$ok" = 1 ] + then + $compiler \ + -I. -I"$include" -I"$include/$abi" \ + -c "$f" >../errors 2>&1 || ok=0 + ( if [ `wc -l < ../errors` -lt 25 ] + then + cat ../errors + else + head ../errors + echo ... + tail ../errors + fi + ) \ + | while read err + do + echo "$version $shorthostname $abi $startdate $o $p fromcompiler $implementationdir $compilerword $f $err" >&5 + done + fi + done + + [ "$ok" = 1 ] || continue + okar-$abi cr "$op.a" *.o || continue + ranlib "$op.a" + + $compiler \ + -I. -I"$include" -I"$include/$abi" \ + -o try try.c try-anything.c \ + "$op.a" $libs >../errors 2>&1 || ok=0 + cat ../errors \ + | while read err + do + echo "$version $shorthostname $abi $startdate $o $p fromcompiler $implementationdir $compilerword try.c $err" >&5 + done + [ "$ok" = 1 ] || continue + + if sh -c './try || exit $?' >../outputs 2>../errors + then + checksum=`awk '{print $1}' < ../outputs` + cycles=`awk '{print $2}' < ../outputs` + checksumcycles=`awk '{print $3}' < ../outputs` + cyclespersecond=`awk '{print $4}' < ../outputs` + impl=`awk '{print $5}' < ../outputs` + else + echo "$version $shorthostname $abi $startdate $o $p tryfails $implementationdir $compilerword error $?" >&5 + cat ../outputs ../errors \ + | while read err + do + echo "$version $shorthostname $abi $startdate $o $p tryfails $implementationdir $compilerword $err" >&5 + done + continue + fi + + checksumok=fails + [ "x$expectedchecksum" = "x$checksum" ] && checksumok=ok + [ "x$expectedchecksum" = "x" ] && checksumok=unknown + echo "$version $shorthostname $abi $startdate $o $p try $checksum $checksumok $cycles $checksumcycles $cyclespersecond $impl $compilerword" >&5 + [ "$checksumok" = fails ] && continue + + [ -s ../bestmedian ] && [ `cat ../bestmedian` -le $cycles ] && continue + echo "$cycles" > ../bestmedian + + $compiler -D'COMPILER="'"$compiler"'"' \ + -DLOOPS=1 \ + -I. -I"$include" -I"$include/$abi" \ + -o measure measure.c measure-anything.c \ + "$op.a" $libs >../errors 2>&1 || ok=0 + cat ../errors \ + | while read err + do + echo "$version $shorthostname $abi $startdate $o $p fromcompiler $implementationdir $compilerword measure.c $err" >&5 + done + [ "$ok" = 1 ] || continue + + for f in $cppfiles + do + okcpp-$abi \ + | while read cppcompiler + do + echo "=== `date` === $abi $implementationdir $cppcompiler" + $cppcompiler \ + -I. -I"$include" -I"$include/$abi" \ + -c "$f" && break + done + done + + rm -f ../best/*.o ../best/measure || continue + for f in *.o + do + cp -p "$f" "../best/${opi}-$f" + done + cp -p "$op.h" "../$op.h" + cp -p "$o.h" "../$o.h" + cp -p measure ../best/measure + done + ) + done + + echo "=== `date` === $abi $o/$p measuring" + + "$work/best/measure" \ + | while read measurement + do + echo "$version $shorthostname $abi $startdate $o $p $measurement" >&5 + done + + [ -f "$o/$p/used" ] \ + && okar-$abi cr "$lib/$abi/lib${project}.a" "$work/best"/*.o \ + && ( ranlib "$lib/$abi/lib${project}.a" || exit 0 ) \ + && cp -p "$work/$op.h" "$include/$abi/$op.h" \ + && [ -f "$o/$p/selected" ] \ + && cp -p "$work/$o.h" "$include/$abi/$o.h" \ + || : + done + done +done + +for language in c cpp +do + for bintype in commandline tests + do + ls $bintype \ + | sed -n 's/\.'$language'$//p' \ + | sort \ + | while read cmd + do + echo "=== `date` === starting $bintype/$cmd" + + rm -rf "$work" + mkdir -p "$work/compile" + + cp "$bintype/$cmd.$language" "$work/compile/$cmd.$language" + [ "$bintype" = tests ] && cp -p "$bintype/$cmd.out" "$work/compile/$cmd.out" + + okabi \ + | while read abi + do + [ -x "$bin/$cmd" ] && break + + libs=`"oklibs-$abi"` + libs="$lib/$abi/cpucycles.o $libs" + libs="$libs $lib/$abi/randombytes.o" + + ok${language}-$abi \ + | while read compiler + do + [ -x "$bin/$cmd" ] && break + + echo "=== `date` === $bintype/$cmd $abi $compiler" + ( + cd "$work/compile" + if $compiler \ + -I"$include" -I"$include/$abi" \ + -o "$cmd" "$cmd.${language}" \ + "$lib/$abi/lib${project}.a" $libs + then + case "$bintype" in + commandline) cp -p "$cmd" "$bin/$cmd" ;; + tests) "./$cmd" | cmp - "$cmd.out" || "./$cmd" ;; + esac + fi + ) + done + done + done + done +done + +echo "=== `date` === starting curvecp" + +okabi \ +| awk ' + { if ($1=="amd64" || $1=="ia64" || $1=="ppc64" || $1=="sparcv9" || $1=="mips64") print 1,$1 + else if ($1 == "mips32") print 2,$1 + else print 3,$1 + } +' \ +| sort \ +| while read okabipriority abi +do + [ -x "$bin/curvecpmessage" ] && break + libs=`"oklibs-$abi"` + libs="$lib/$abi/cpucycles.o $libs" + libs="$libs $lib/$abi/randombytes.o" + + okc-$abi \ + | while read compiler + do + [ -x "$bin/curvecpmessage" ] && break + + echo "=== `date` === curvecp $abi $compiler" + rm -rf "$work" + mkdir -p "$work/compile" + cp curvecp/* "$work/compile" + ( + cd "$work/compile" + cat SOURCES \ + | while read x + do + $compiler -I"$include" -I"$include/$abi" -c "$x.c" + done + + if okar-$abi cr curvecplibs.a `cat LIBS` + then + cat TARGETS \ + | while read x + do + $compiler -I"$include" -I"$include/$abi" \ + -o "$x" "$x.o" \ + curvecplibs.a "$lib/$abi/lib${project}.a" $libs \ + && cp -p "$x" "$bin/$x" + done + fi + ) + done + +done + +echo "=== `date` === finishing" diff --git a/nacl/inttypes/crypto_int16.c b/nacl/inttypes/crypto_int16.c new file mode 100644 index 00000000..bc160669 --- /dev/null +++ b/nacl/inttypes/crypto_int16.c @@ -0,0 +1,3 @@ +#include "crypto_int16.h" +#include "signed.h" +DOIT(16,crypto_int16) diff --git a/nacl/inttypes/crypto_int32.c b/nacl/inttypes/crypto_int32.c new file mode 100644 index 00000000..520e6822 --- /dev/null +++ b/nacl/inttypes/crypto_int32.c @@ -0,0 +1,3 @@ +#include "crypto_int32.h" +#include "signed.h" +DOIT(32,crypto_int32) diff --git a/nacl/inttypes/crypto_int64.c b/nacl/inttypes/crypto_int64.c new file mode 100644 index 00000000..77e815bf --- /dev/null +++ b/nacl/inttypes/crypto_int64.c @@ -0,0 +1,3 @@ +#include "crypto_int64.h" +#include "signed.h" +DOIT(64,crypto_int64) diff --git a/nacl/inttypes/crypto_int8.c b/nacl/inttypes/crypto_int8.c new file mode 100644 index 00000000..5966c62e --- /dev/null +++ b/nacl/inttypes/crypto_int8.c @@ -0,0 +1,3 @@ +#include "crypto_int8.h" +#include "signed.h" +DOIT(8,crypto_int8) diff --git a/nacl/inttypes/crypto_uint16.c b/nacl/inttypes/crypto_uint16.c new file mode 100644 index 00000000..16ce4a69 --- /dev/null +++ b/nacl/inttypes/crypto_uint16.c @@ -0,0 +1,3 @@ +#include "crypto_uint16.h" +#include "unsigned.h" +DOIT(16,crypto_uint16) diff --git a/nacl/inttypes/crypto_uint32.c b/nacl/inttypes/crypto_uint32.c new file mode 100644 index 00000000..7050b573 --- /dev/null +++ b/nacl/inttypes/crypto_uint32.c @@ -0,0 +1,3 @@ +#include "crypto_uint32.h" +#include "unsigned.h" +DOIT(32,crypto_uint32) diff --git a/nacl/inttypes/crypto_uint64.c b/nacl/inttypes/crypto_uint64.c new file mode 100644 index 00000000..808055c7 --- /dev/null +++ b/nacl/inttypes/crypto_uint64.c @@ -0,0 +1,3 @@ +#include "crypto_uint64.h" +#include "unsigned.h" +DOIT(64,crypto_uint64) diff --git a/nacl/inttypes/crypto_uint8.c b/nacl/inttypes/crypto_uint8.c new file mode 100644 index 00000000..61683391 --- /dev/null +++ b/nacl/inttypes/crypto_uint8.c @@ -0,0 +1,3 @@ +#include "crypto_uint8.h" +#include "unsigned.h" +DOIT(8,crypto_uint8) diff --git a/nacl/inttypes/do b/nacl/inttypes/do new file mode 100644 index 00000000..af88b26a --- /dev/null +++ b/nacl/inttypes/do @@ -0,0 +1,47 @@ +#!/bin/sh -e + +okabi | ( + while read abi + do + ( + echo 'int8 signed char' + echo 'int16 short' + echo 'int32 int' + echo 'int32 long' + echo 'int64 long long' + echo 'int64 long' + echo 'int64 int __attribute__((__mode__(__DI__)))' + echo 'uint8 unsigned char' + echo 'uint16 unsigned short' + echo 'uint32 unsigned int' + echo 'uint32 unsigned long' + echo 'uint64 unsigned long long' + echo 'uint64 unsigned long' + echo 'uint64 unsigned int __attribute__((__mode__(__DI__)))' + ) | ( + while read target source + do + okc-$abi | ( + while read c + do + [ -f include/$abi/crypto_$target.h ] && continue + echo "=== `date` === $abi trying $source as $target under $c..." >&2 + rm -f crypto_$target crypto_$target.h + ( + echo "#ifndef crypto_${target}_h" + echo "#define crypto_${target}_h" + echo "" + echo "typedef ${source} crypto_${target};" + echo "" + echo "#endif" + ) > crypto_$target.h + $c -o crypto_$target crypto_$target.c || continue + ./crypto_$target || continue + mkdir -p include/$abi + cp crypto_$target.h include/$abi/crypto_$target.h + done + ) + done + ) + done +) diff --git a/nacl/inttypes/signed.h b/nacl/inttypes/signed.h new file mode 100644 index 00000000..92689ff8 --- /dev/null +++ b/nacl/inttypes/signed.h @@ -0,0 +1,17 @@ +#define DOIT(bits,target) \ +int main() \ +{ \ + target x; \ + int i; \ + \ + x = 1; \ + for (i = 0;i < bits;++i) { \ + if (x == 0) return 100; \ + x += x; \ + } \ + if (x != 0) return 100; \ + x -= 1; \ + if (x > 0) return 100; \ + \ + return 0; \ +} diff --git a/nacl/inttypes/unsigned.h b/nacl/inttypes/unsigned.h new file mode 100644 index 00000000..31a7a6ea --- /dev/null +++ b/nacl/inttypes/unsigned.h @@ -0,0 +1,17 @@ +#define DOIT(bits,target) \ +int main() \ +{ \ + target x; \ + int i; \ + \ + x = 1; \ + for (i = 0;i < bits;++i) { \ + if (x == 0) return 100; \ + x += x; \ + } \ + if (x != 0) return 100; \ + x -= 1; \ + if (x < 0) return 100; \ + \ + return 0; \ +} diff --git a/nacl/measure-anything.c b/nacl/measure-anything.c new file mode 100644 index 00000000..32555060 --- /dev/null +++ b/nacl/measure-anything.c @@ -0,0 +1,225 @@ +/* + * measure-anything.c version 20090223 + * D. J. Bernstein + * Public domain. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "cpucycles.h" +#include "cpuid.h" + +typedef int uint32; + +static uint32 seed[32] = { 3,1,4,1,5,9,2,6,5,3,5,8,9,7,9,3,2,3,8,4,6,2,6,4,3,3,8,3,2,7,9,5 } ; +static uint32 in[12]; +static uint32 out[8]; +static int outleft = 0; + +#define ROTATE(x,b) (((x) << (b)) | ((x) >> (32 - (b)))) +#define MUSH(i,b) x = t[i] += (((x ^ seed[i]) + sum) ^ ROTATE(x,b)); + +static void surf(void) +{ + uint32 t[12]; uint32 x; uint32 sum = 0; + int r; int i; int loop; + + for (i = 0;i < 12;++i) t[i] = in[i] ^ seed[12 + i]; + for (i = 0;i < 8;++i) out[i] = seed[24 + i]; + x = t[11]; + for (loop = 0;loop < 2;++loop) { + for (r = 0;r < 16;++r) { + sum += 0x9e3779b9; + MUSH(0,5) MUSH(1,7) MUSH(2,9) MUSH(3,13) + MUSH(4,5) MUSH(5,7) MUSH(6,9) MUSH(7,13) + MUSH(8,5) MUSH(9,7) MUSH(10,9) MUSH(11,13) + } + for (i = 0;i < 8;++i) out[i] ^= t[i + 4]; + } +} + +void randombytes(unsigned char *x,unsigned long long xlen) +{ + while (xlen > 0) { + if (!outleft) { + if (!++in[0]) if (!++in[1]) if (!++in[2]) ++in[3]; + surf(); + outleft = 8; + } + *x = out[--outleft]; + ++x; + --xlen; + } +} + +extern const char *primitiveimplementation; +extern const char *implementationversion; +extern const char *sizenames[]; +extern const long long sizes[]; +extern void preallocate(void); +extern void allocate(void); +extern void measure(void); + +static void printword(const char *s) +{ + if (!*s) putchar('-'); + while (*s) { + if (*s == ' ') putchar('_'); + else if (*s == '\t') putchar('_'); + else if (*s == '\r') putchar('_'); + else if (*s == '\n') putchar('_'); + else putchar(*s); + ++s; + } + putchar(' '); +} + +static void printnum(long long x) +{ + printf("%lld ",x); +} + +static void fail(const char *why) +{ + fprintf(stderr,"measure: fatal: %s\n",why); + exit(111); +} + +unsigned char *alignedcalloc(unsigned long long len) +{ + unsigned char *x = (unsigned char *) calloc(1,len + 128); + if (!x) fail("out of memory"); + /* will never deallocate so shifting is ok */ + x += 63 & (-(unsigned long) x); + return x; +} + +static long long cyclespersecond; + +static void printimplementations(void) +{ + int i; + + printword("implementation"); + printword(primitiveimplementation); + printword(implementationversion); + printf("\n"); fflush(stdout); + + for (i = 0;sizenames[i];++i) { + printword(sizenames[i]); + printnum(sizes[i]); + printf("\n"); fflush(stdout); + } + + printword("cpuid"); + printword(cpuid); + printf("\n"); fflush(stdout); + + printword("cpucycles_persecond"); + printnum(cyclespersecond); + printf("\n"); fflush(stdout); + + printword("cpucycles_implementation"); + printword(cpucycles_implementation); + printf("\n"); fflush(stdout); + + printword("compiler"); + printword(COMPILER); +#if defined(__VERSION__) && !defined(__ICC) + printword(__VERSION__); +#elif defined(__xlc__) + printword(__xlc__); +#elif defined(__ICC) + { + char buf[256]; + + sprintf(buf, "%d.%d.%d", __ICC/100, __ICC%100, + __INTEL_COMPILER_BUILD_DATE); + printword(buf); + } +#elif defined(__PGIC__) + { + char buf[256]; + + sprintf(buf, "%d.%d.%d", __PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__); + printword(buf); + } +#elif defined(__SUNPRO_C) + { + char buf[256]; + int major, minor, micro; + + micro = __SUNPRO_C & 0xf; + minor = (__SUNPRO_C >> 4) & 0xf; + major = (__SUNPRO_C >> 8) & 0xf; + + if (micro) + sprintf(buf, "%d.%d.%d", major, minor, micro); + else + sprintf(buf, "%d.%d", major, minor); + printword(buf); + } +#else + printword("unknown compiler version"); +#endif + printf("\n"); fflush(stdout); +} + +void printentry(long long mbytes,const char *measuring,long long *m,long long mlen) +{ + long long i; + long long j; + long long belowj; + long long abovej; + + printword(measuring); + if (mbytes >= 0) printnum(mbytes); else printword(""); + if (mlen > 0) { + for (j = 0;j + 1 < mlen;++j) { + belowj = 0; + for (i = 0;i < mlen;++i) if (m[i] < m[j]) ++belowj; + abovej = 0; + for (i = 0;i < mlen;++i) if (m[i] > m[j]) ++abovej; + if (belowj * 2 < mlen && abovej * 2 < mlen) break; + } + printnum(m[j]); + if (mlen > 1) { + for (i = 0;i < mlen;++i) printnum(m[i]); + } + } + printf("\n"); fflush(stdout); +} + +void limits() +{ +#ifdef RLIM_INFINITY + struct rlimit r; + r.rlim_cur = 0; + r.rlim_max = 0; +#ifdef RLIMIT_NOFILE + setrlimit(RLIMIT_NOFILE,&r); +#endif +#ifdef RLIMIT_NPROC + setrlimit(RLIMIT_NPROC,&r); +#endif +#ifdef RLIMIT_CORE + setrlimit(RLIMIT_CORE,&r); +#endif +#endif +} + +int main() +{ + cyclespersecond = cpucycles_persecond(); + preallocate(); + limits(); + printimplementations(); + allocate(); + measure(); + return 0; +} diff --git a/nacl/okcompilers/abiname.c b/nacl/okcompilers/abiname.c new file mode 100644 index 00000000..38373201 --- /dev/null +++ b/nacl/okcompilers/abiname.c @@ -0,0 +1,45 @@ +#include + +const char *abi(void) +{ +#if defined(__amd64__) || defined(__x86_64__) || defined(__AMD64__) || defined(_M_X64) || defined(__amd64) + return "amd64"; +#elif defined(__i386__) || defined(__x86__) || defined(__X86__) || defined(_M_IX86) || defined(__i386) + return "x86"; +#elif defined(__ia64__) || defined(__IA64__) || defined(__M_IA64) + return "ia64"; +#elif defined(__SPU__) + return "cellspu"; +#elif defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__) || defined(_ARCH_PPC64) + return "ppc64"; +#elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) + return "ppc32"; +#elif defined(__sparcv9__) || defined(__sparcv9) + return "sparcv9"; +#elif defined(__sparc_v8__) + return "sparcv8"; +#elif defined(__sparc__) || defined(__sparc) + if (sizeof(long) == 4) return "sparcv8"; + return "sparcv9"; +#elif defined(__ARM_EABI__) + return "armeabi"; +#elif defined(__arm__) + return "arm"; +#elif defined(__mips__) || defined(__mips) || defined(__MIPS__) +# if defined(_ABIO32) + return "mipso32"; +# elif defined(_ABIN32) + return "mips32"; +# else + return "mips64"; +# endif +#else + return "default"; +#endif +} + +int main(int argc,char **argv) +{ + printf("%s %s\n",argv[1],abi()); + return 0; +} diff --git a/nacl/okcompilers/archivers b/nacl/okcompilers/archivers new file mode 100644 index 00000000..d5851c37 --- /dev/null +++ b/nacl/okcompilers/archivers @@ -0,0 +1,2 @@ +ar +ar -X64 diff --git a/nacl/okcompilers/c b/nacl/okcompilers/c new file mode 100644 index 00000000..7218da3a --- /dev/null +++ b/nacl/okcompilers/c @@ -0,0 +1,8 @@ +gcc -m64 -O3 -fomit-frame-pointer -funroll-loops +gcc -m64 -O -fomit-frame-pointer +gcc -m64 -fomit-frame-pointer +gcc -m32 -O3 -fomit-frame-pointer -funroll-loops +gcc -m32 -O -fomit-frame-pointer +gcc -m32 -fomit-frame-pointer +spu-gcc -mstdmain -march=cell -O3 -funroll-loops -fomit-frame-pointer -Drandom=rand -Dsrandom=srand +spu-gcc -mstdmain -march=cell -O -fomit-frame-pointer -Drandom=rand -Dsrandom=srand diff --git a/nacl/okcompilers/cpp b/nacl/okcompilers/cpp new file mode 100644 index 00000000..d1b9ae6d --- /dev/null +++ b/nacl/okcompilers/cpp @@ -0,0 +1,8 @@ +g++ -m64 -O3 -fomit-frame-pointer -funroll-loops +g++ -m64 -O -fomit-frame-pointer +g++ -m64 -fomit-frame-pointer +g++ -m32 -O3 -fomit-frame-pointer -funroll-loops +g++ -m32 -O -fomit-frame-pointer +g++ -m32 -fomit-frame-pointer +spu-g++ -mstdmain -march=cell -O3 -funroll-loops -fomit-frame-pointer -Drandom=rand -Dsrandom=srand +spu-g++ -mstdmain -march=cell -O -fomit-frame-pointer -Drandom=rand -Dsrandom=srand diff --git a/nacl/okcompilers/do b/nacl/okcompilers/do new file mode 100755 index 00000000..372b7e00 --- /dev/null +++ b/nacl/okcompilers/do @@ -0,0 +1,196 @@ +#!/bin/sh -e + +mkdir oldbin +mkdir bin + +for language in c cpp +do + exec <$language + exec 9>${language}-works + + while read c options + do + echo "=== `date` === checking $c $options" >&2 + rm -f test* + ( + echo "#!/bin/sh" + echo 'PATH="'"$PATH"'"' + echo 'export PATH' + echo "$c" "$options" '"$@"' + ) > test-okc + chmod 755 test-okc + cat lib.c main.c > test.$language || continue + ./test-okc -o test test.$language || continue + ./test || continue + cp main.c test1.$language || continue + cp lib.c test2.$language || continue + ./test-okc -c test1.$language || continue + ./test-okc -c test2.$language || continue + ./test-okc -o test1 test1.o test2.o || continue + ./test1 || continue + echo "=== `date` === success: $c $options is ok" + echo "$c $options" >&9 + done + + mv ${language}-works $language +done + +exec oldbin/okabi +chmod 755 oldbin/okabi +echo "#!/bin/sh" >&7 + +while : +do + exec ${language}-compatible + exec 9>${language}-incompatible + echo "=== `date` === checking compatibility with $c $options" >&2 + exec <$language + while read c2 options2 + do + echo "=== `date` === checking $c2 $options2" >&2 + works=1 + rm -f test* + ( + echo "#!/bin/sh" + echo 'PATH="'"$PATH"'"' + echo 'export PATH' + echo "$c" "$options" '"$@"' + ) > test-okc + chmod 755 test-okc + ( + echo "#!/bin/sh" + echo 'PATH="'"$PATH"'"' + echo 'export PATH' + echo "$c2" "$options2" '"$@"' + ) > test-okc2 + chmod 755 test-okc2 + if cp main.c test5.c \ + && cp main.cpp test5.cpp \ + && cp lib.c test6.c \ + && ./test-okc2 -c test5.$language \ + && ./test-okc -c test6.c \ + && ./test-okc2 -o test5 test5.o test6.o \ + && ./test5 + then + echo "=== `date` === success: $c2 $options2 is compatible" >&2 + echo "$c2 $options2" >&8 + else + echo "$c2 $options2" >&9 + fi + done + done + + abi=`awk '{print length($0),$0}' < c-compatible \ + | sort -n | head -1 | sed 's/ *$//' | sed 's/^[^ ]* //' | tr ' /' '__'` + + echo "echo '"$abi"'" >&7 + + syslibs="" + for i in -lm -lnsl -lsocket -lrt + do + echo "=== `date` === checking $i" >&2 + ( + echo "#!/bin/sh" + echo 'PATH="'"$PATH"'"' + echo 'export PATH' + echo "$c" "$options" '"$@"' "$i" "$syslibs" + ) > test-okclink + chmod 755 test-okclink + cat lib.c main.c > test.c || continue + ./test-okclink -o test test.c $i $syslibs || continue + ./test || continue + syslibs="$i $syslibs" + ( + echo '#!/bin/sh' + echo 'echo "'"$syslibs"'"' + ) > "oldbin/oklibs-$abi" + chmod 755 "oldbin/oklibs-$abi" + done + + foundokar=0 + exec &2 + ( + echo "#!/bin/sh" + echo 'PATH="'"$PATH"'"' + echo 'export PATH' + echo "$a" '"$@"' + ) > test-okar + chmod 755 test-okar + cp main.c test9.c || continue + cp lib.c test10.c || continue + ./test-okc -c test10.c || continue + ./test-okar cr test10.a test10.o || continue + ranlib test10.a || echo "=== `date` === no ranlib; continuing anyway" >&2 + ./test-okc -o test9 test9.c test10.a || continue + ./test9 || continue + cp -p test-okar "oldbin/okar-$abi" + echo "=== `date` === success: archiver $a is ok" >&2 + foundokar=1 + break + done + + case $foundokar in + 0) + echo "=== `date` === giving up; no archivers work" >&2 + exit 111 + ;; + esac + + for language in c cpp + do + mv ${language}-incompatible ${language} + exec <${language}-compatible + exec 9>"oldbin/ok${language}-$abi" + chmod 755 "oldbin/ok${language}-$abi" + + echo "#!/bin/sh" >&9 + while read c2 options2 + do + echo "echo '"$c2 $options2"'" >&9 + done + done +done + +exec 7>/dev/null + +oldbin/okabi \ +| while read abi +do + oldbin/okc-$abi \ + | head -1 \ + | while read c + do + $c -o abiname abiname.c \ + && ./abiname "$abi" + done +done > abinames + +numabinames=`awk '{print $2}' < abinames | sort -u | wc -l` +numabis=`oldbin/okabi | wc -l` +if [ "$numabis" = "$numabinames" ] +then + exec bin/okabi + chmod 755 bin/okabi + echo '#!/bin/sh' >&7 + while read oldabi newabi + do + mv "oldbin/okc-$oldabi" "bin/okc-$newabi" + mv "oldbin/okcpp-$oldabi" "bin/okcpp-$newabi" + mv "oldbin/okar-$oldabi" "bin/okar-$newabi" + mv "oldbin/oklibs-$oldabi" "bin/oklibs-$newabi" + echo "echo $newabi" >&7 + done +else + cp -p oldbin/* bin +fi diff --git a/nacl/okcompilers/lib.c b/nacl/okcompilers/lib.c new file mode 100644 index 00000000..cf2e3790 --- /dev/null +++ b/nacl/okcompilers/lib.c @@ -0,0 +1,29 @@ +int not3(int n) +{ + return n != 3; +} + +int bytes(int n) +{ + return (n + 7) / 8; +} + +long long shr32(long long n) +{ + return n >> 32; +} + +double double5(void) +{ + return 5.0; +} + +int intbytes(void) +{ + return sizeof(int); +} + +int longbytes(void) +{ + return sizeof(long); +} diff --git a/nacl/okcompilers/lib.cpp b/nacl/okcompilers/lib.cpp new file mode 100644 index 00000000..ea956244 --- /dev/null +++ b/nacl/okcompilers/lib.cpp @@ -0,0 +1,19 @@ +int not3(int n) +{ + return n != 3; +} + +int bytes(int n) +{ + return (n + 7) / 8; +} + +long long shr32(long long n) +{ + return n >> 32; +} + +double double5(void) +{ + return 5.0; +} diff --git a/nacl/okcompilers/main.c b/nacl/okcompilers/main.c new file mode 100644 index 00000000..3b7efa25 --- /dev/null +++ b/nacl/okcompilers/main.c @@ -0,0 +1,25 @@ +extern int not3(int); +extern int bytes(int); +extern long long shr32(long long); +extern double double5(void); +extern int longbytes(void); +extern int intbytes(void); + +int main(int argc,char **argv) +{ + if (intbytes() != sizeof(int)) return 100; + if (longbytes() != sizeof(long)) return 100; + + if (not3(3)) return 100; + + /* on ppc32, gcc -mpowerpc64 produces SIGILL for >>32 */ + if (!not3(shr32(1))) return 100; + + /* on pentium 1, gcc -march=pentium2 produces SIGILL for (...+7)/8 */ + if (bytes(not3(1)) != 1) return 100; + + /* on pentium 1, gcc -march=prescott produces SIGILL for double comparison */ + if (double5() < 0) return 100; + + return 0; +} diff --git a/nacl/okcompilers/main.cpp b/nacl/okcompilers/main.cpp new file mode 100644 index 00000000..6255102c --- /dev/null +++ b/nacl/okcompilers/main.cpp @@ -0,0 +1,22 @@ +extern "C" { + extern int not3(int); + extern int bytes(int); + extern long long shr32(long long); + extern double double5(void); +} + +int main(int argc,char **argv) +{ + if (not3(3)) return 100; + + /* on ppc32, gcc -mpowerpc64 produces SIGILL for >>32 */ + if (!not3(shr32(1))) return 100; + + /* on pentium 1, gcc -march=pentium2 produces SIGILL for (...+7)/8 */ + if (bytes(not3(1)) != 1) return 100; + + /* on pentium 1, gcc -march=prescott produces SIGILL for double comparison */ + if (double5() < 0) return 100; + + return 0; +} diff --git a/nacl/randombytes/devurandom.c b/nacl/randombytes/devurandom.c new file mode 100644 index 00000000..f3b8d418 --- /dev/null +++ b/nacl/randombytes/devurandom.c @@ -0,0 +1,34 @@ +#include +#include +#include +#include + +/* it's really stupid that there isn't a syscall for this */ + +static int fd = -1; + +void randombytes(unsigned char *x,unsigned long long xlen) +{ + int i; + + if (fd == -1) { + for (;;) { + fd = open("/dev/urandom",O_RDONLY); + if (fd != -1) break; + sleep(1); + } + } + + while (xlen > 0) { + if (xlen < 1048576) i = xlen; else i = 1048576; + + i = read(fd,x,i); + if (i < 1) { + sleep(1); + continue; + } + + x += i; + xlen -= i; + } +} diff --git a/nacl/randombytes/devurandom.h b/nacl/randombytes/devurandom.h new file mode 100644 index 00000000..2e0caf8a --- /dev/null +++ b/nacl/randombytes/devurandom.h @@ -0,0 +1,24 @@ +/* +randombytes/devurandom.h version 20080713 +D. J. Bernstein +Public domain. +*/ + +#ifndef randombytes_devurandom_H +#define randombytes_devurandom_H + +#ifdef __cplusplus +extern "C" { +#endif + +extern void randombytes(unsigned char *,unsigned long long); + +#ifdef __cplusplus +} +#endif + +#ifndef randombytes_implementation +#define randombytes_implementation "devurandom" +#endif + +#endif diff --git a/nacl/randombytes/do b/nacl/randombytes/do new file mode 100644 index 00000000..42586282 --- /dev/null +++ b/nacl/randombytes/do @@ -0,0 +1,43 @@ +#!/bin/sh -e + +okabi | ( + while read abi + do + + rm -f randombytes.o randombytes.h + + ( + echo devurandom + ) | ( + while read n + do + okc-$abi | ( + while read c + do + echo "=== `date` === Trying $n.c with $c..." >&2 + rm -f test randombytes-impl.o randombytes-impl.h randombytes-impl.c + cp $n.c randombytes-impl.c || continue + cp $n.h randombytes-impl.h || continue + $c -c randombytes-impl.c || continue + $c -o test test.c randombytes-impl.o || continue + ./test || continue + echo "=== `date` === Success. Using $n.c." >&2 + mkdir -p lib/$abi + mv randombytes-impl.o lib/$abi/randombytes.o + mkdir -p include/$abi + mv randombytes-impl.h include/$abi/randombytes.h + exit 0 + done + exit 111 + ) && exit 0 + done + exit 111 + ) || ( + echo ===== Giving up. >&2 + rm -f test randombytes-impl.o randombytes-impl.h randombytes-impl.c + exit 111 + ) || exit 111 + + done + exit 0 +) || exit 111 diff --git a/nacl/randombytes/test.c b/nacl/randombytes/test.c new file mode 100644 index 00000000..646811ca --- /dev/null +++ b/nacl/randombytes/test.c @@ -0,0 +1,15 @@ +#include "randombytes-impl.h" + +unsigned char x[65536]; +unsigned long long freq[256]; + +int main() +{ + unsigned long long i; + + randombytes(x,sizeof x); + for (i = 0;i < 256;++i) freq[i] = 0; + for (i = 0;i < sizeof x;++i) ++freq[255 & (int) x[i]]; + for (i = 0;i < 256;++i) if (!freq[i]) return 111; + return 0; +} diff --git a/nacl/tests/auth.c b/nacl/tests/auth.c new file mode 100644 index 00000000..5086624e --- /dev/null +++ b/nacl/tests/auth.c @@ -0,0 +1,19 @@ +#include +#include "crypto_auth_hmacsha512256.h" + +/* "Test Case 2" from RFC 4231 */ +unsigned char key[32] = "Jefe"; +unsigned char c[28] = "what do ya want for nothing?"; + +unsigned char a[32]; + +main() +{ + int i; + crypto_auth_hmacsha512256(a,c,sizeof c,key); + for (i = 0;i < 32;++i) { + printf(",0x%02x",(unsigned int) a[i]); + if (i % 8 == 7) printf("\n"); + } + return 0; +} diff --git a/nacl/tests/auth.out b/nacl/tests/auth.out new file mode 100644 index 00000000..35e5909d --- /dev/null +++ b/nacl/tests/auth.out @@ -0,0 +1,4 @@ +,0x16,0x4b,0x7a,0x7b,0xfc,0xf8,0x19,0xe2 +,0xe3,0x95,0xfb,0xe7,0x3b,0x56,0xe0,0xa3 +,0x87,0xbd,0x64,0x22,0x2e,0x83,0x1f,0xd6 +,0x10,0x27,0x0c,0xd7,0xea,0x25,0x05,0x54 diff --git a/nacl/tests/auth2.c b/nacl/tests/auth2.c new file mode 100644 index 00000000..ba191de4 --- /dev/null +++ b/nacl/tests/auth2.c @@ -0,0 +1,34 @@ +/* "Test Case AUTH256-4" from RFC 4868 */ + +#include +#include "crypto_auth_hmacsha256.h" + +unsigned char key[32] = { + 0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08 +,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10 +,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18 +,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20 +} ; + +unsigned char c[50] = { + 0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd +,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd +,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd +,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd +,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd +,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd +,0xcd,0xcd +} ; + +unsigned char a[32]; + +main() +{ + int i; + crypto_auth_hmacsha256(a,c,sizeof c,key); + for (i = 0;i < 32;++i) { + printf(",0x%02x",(unsigned int) a[i]); + if (i % 8 == 7) printf("\n"); + } + return 0; +} diff --git a/nacl/tests/auth2.out b/nacl/tests/auth2.out new file mode 100644 index 00000000..955951a2 --- /dev/null +++ b/nacl/tests/auth2.out @@ -0,0 +1,4 @@ +,0x37,0x2e,0xfc,0xf9,0xb4,0x0b,0x35,0xc2 +,0x11,0x5b,0x13,0x46,0x90,0x3d,0x2e,0xf4 +,0x2f,0xce,0xd4,0x6f,0x08,0x46,0xe7,0x25 +,0x7b,0xb1,0x56,0xd3,0xd7,0xb3,0x0d,0x3f diff --git a/nacl/tests/auth3.c b/nacl/tests/auth3.c new file mode 100644 index 00000000..b713b388 --- /dev/null +++ b/nacl/tests/auth3.c @@ -0,0 +1,34 @@ +/* "Test Case AUTH256-4" from RFC 4868 */ + +#include +#include "crypto_auth_hmacsha256.h" + +unsigned char key[32] = { + 0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08 +,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10 +,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18 +,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20 +} ; + +unsigned char c[50] = { + 0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd +,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd +,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd +,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd +,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd +,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd +,0xcd,0xcd +} ; + +unsigned char a[32] = { + 0x37,0x2e,0xfc,0xf9,0xb4,0x0b,0x35,0xc2 +,0x11,0x5b,0x13,0x46,0x90,0x3d,0x2e,0xf4 +,0x2f,0xce,0xd4,0x6f,0x08,0x46,0xe7,0x25 +,0x7b,0xb1,0x56,0xd3,0xd7,0xb3,0x0d,0x3f +} ; + +main() +{ + printf("%d\n",crypto_auth_hmacsha256_verify(a,c,sizeof c,key)); + return 0; +} diff --git a/nacl/tests/auth3.out b/nacl/tests/auth3.out new file mode 100644 index 00000000..573541ac --- /dev/null +++ b/nacl/tests/auth3.out @@ -0,0 +1 @@ +0 diff --git a/nacl/tests/auth4.cpp b/nacl/tests/auth4.cpp new file mode 100644 index 00000000..a94837d2 --- /dev/null +++ b/nacl/tests/auth4.cpp @@ -0,0 +1,44 @@ +/* "Test Case AUTH256-4" from RFC 4868 */ + +#include +using std::string; +#include +#include "crypto_auth_hmacsha256.h" + +char key_bytes[32] = { + 0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08 +,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10 +,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18 +,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20 +} ; + +char c_bytes[50] = { + 0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd +,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd +,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd +,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd +,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd +,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd +,0xcd,0xcd +} ; + +char a_bytes[32] = { + 0x37,0x2e,0xfc,0xf9,0xb4,0x0b,0x35,0xc2 +,0x11,0x5b,0x13,0x46,0x90,0x3d,0x2e,0xf4 +,0x2f,0xce,0xd4,0x6f,0x08,0x46,0xe7,0x25 +,0x7b,0xb1,0x56,0xd3,0xd7,0xb3,0x0d,0x3f +} ; + +main() +{ + string key(key_bytes,sizeof key_bytes); + string c(c_bytes,sizeof c_bytes); + string a(a_bytes,sizeof a_bytes); + try { + crypto_auth_hmacsha256_verify(a,c,key); + printf("0\n"); + } catch(const char *s) { + printf("%s\n",s); + } + return 0; +} diff --git a/nacl/tests/auth4.out b/nacl/tests/auth4.out new file mode 100644 index 00000000..573541ac --- /dev/null +++ b/nacl/tests/auth4.out @@ -0,0 +1 @@ +0 diff --git a/nacl/tests/auth5.c b/nacl/tests/auth5.c new file mode 100644 index 00000000..d304a073 --- /dev/null +++ b/nacl/tests/auth5.c @@ -0,0 +1,36 @@ +#include +#include +#include "crypto_auth_hmacsha512256.h" +#include "randombytes.h" + +unsigned char key[32]; +unsigned char c[10000]; +unsigned char a[32]; + +main() +{ + int clen; + int i; + for (clen = 0;clen < 10000;++clen) { + randombytes(key,sizeof key); + randombytes(c,clen); + crypto_auth_hmacsha512256(a,c,clen,key); + if (crypto_auth_hmacsha512256_verify(a,c,clen,key) != 0) { + printf("fail %d\n",clen); + return 100; + } + if (clen > 0) { + c[random() % clen] += 1 + (random() % 255); + if (crypto_auth_hmacsha512256_verify(a,c,clen,key) == 0) { + printf("forgery %d\n",clen); + return 100; + } + a[random() % sizeof a] += 1 + (random() % 255); + if (crypto_auth_hmacsha512256_verify(a,c,clen,key) == 0) { + printf("forgery %d\n",clen); + return 100; + } + } + } + return 0; +} diff --git a/nacl/tests/auth5.out b/nacl/tests/auth5.out new file mode 100644 index 00000000..e69de29b diff --git a/nacl/tests/auth6.cpp b/nacl/tests/auth6.cpp new file mode 100644 index 00000000..dffb6388 --- /dev/null +++ b/nacl/tests/auth6.cpp @@ -0,0 +1,46 @@ +#include +using std::string; +#include +#include +#include "crypto_auth_hmacsha512256.h" +#include "randombytes.h" + +main() +{ + int clen; + int i; + for (clen = 0;clen < 10000;++clen) { + unsigned char key_bytes[32]; + randombytes(key_bytes,sizeof key_bytes); + string key((char *) key_bytes,sizeof key_bytes); + unsigned char c_bytes[clen]; + randombytes(c_bytes,sizeof c_bytes); + string c((char *) c_bytes,sizeof c_bytes); + string a = crypto_auth_hmacsha512256(c,key); + try { + crypto_auth_hmacsha512256_verify(a,c,key); + } catch(const char *s) { + printf("fail %d %s\n",clen,s); + return 100; + } + if (clen > 0) { + size_t pos = random() % clen; + c.replace(pos,1,1,c[pos] + 1 + (random() % 255)); + try { + crypto_auth_hmacsha512256_verify(a,c,key); + printf("forgery %d\n",clen); + } catch(const char *s) { + ; + } + pos = random() % a.size(); + a.replace(pos,1,1,a[pos] + 1 + (random() % 255)); + try { + crypto_auth_hmacsha512256_verify(a,c,key); + printf("forgery %d\n",clen); + } catch(const char *s) { + ; + } + } + } + return 0; +} diff --git a/nacl/tests/auth6.out b/nacl/tests/auth6.out new file mode 100644 index 00000000..e69de29b diff --git a/nacl/tests/box.c b/nacl/tests/box.c new file mode 100644 index 00000000..b57a9883 --- /dev/null +++ b/nacl/tests/box.c @@ -0,0 +1,63 @@ +#include +#include "crypto_box_curve25519xsalsa20poly1305.h" + +unsigned char alicesk[32] = { + 0x77,0x07,0x6d,0x0a,0x73,0x18,0xa5,0x7d +,0x3c,0x16,0xc1,0x72,0x51,0xb2,0x66,0x45 +,0xdf,0x4c,0x2f,0x87,0xeb,0xc0,0x99,0x2a +,0xb1,0x77,0xfb,0xa5,0x1d,0xb9,0x2c,0x2a +} ; + +unsigned char bobpk[32] = { + 0xde,0x9e,0xdb,0x7d,0x7b,0x7d,0xc1,0xb4 +,0xd3,0x5b,0x61,0xc2,0xec,0xe4,0x35,0x37 +,0x3f,0x83,0x43,0xc8,0x5b,0x78,0x67,0x4d +,0xad,0xfc,0x7e,0x14,0x6f,0x88,0x2b,0x4f +} ; + +unsigned char nonce[24] = { + 0x69,0x69,0x6e,0xe9,0x55,0xb6,0x2b,0x73 +,0xcd,0x62,0xbd,0xa8,0x75,0xfc,0x73,0xd6 +,0x82,0x19,0xe0,0x03,0x6b,0x7a,0x0b,0x37 +} ; + +// API requires first 32 bytes to be 0 +unsigned char m[163] = { + 0, 0, 0, 0, 0, 0, 0, 0 +, 0, 0, 0, 0, 0, 0, 0, 0 +, 0, 0, 0, 0, 0, 0, 0, 0 +, 0, 0, 0, 0, 0, 0, 0, 0 +,0xbe,0x07,0x5f,0xc5,0x3c,0x81,0xf2,0xd5 +,0xcf,0x14,0x13,0x16,0xeb,0xeb,0x0c,0x7b +,0x52,0x28,0xc5,0x2a,0x4c,0x62,0xcb,0xd4 +,0x4b,0x66,0x84,0x9b,0x64,0x24,0x4f,0xfc +,0xe5,0xec,0xba,0xaf,0x33,0xbd,0x75,0x1a +,0x1a,0xc7,0x28,0xd4,0x5e,0x6c,0x61,0x29 +,0x6c,0xdc,0x3c,0x01,0x23,0x35,0x61,0xf4 +,0x1d,0xb6,0x6c,0xce,0x31,0x4a,0xdb,0x31 +,0x0e,0x3b,0xe8,0x25,0x0c,0x46,0xf0,0x6d +,0xce,0xea,0x3a,0x7f,0xa1,0x34,0x80,0x57 +,0xe2,0xf6,0x55,0x6a,0xd6,0xb1,0x31,0x8a +,0x02,0x4a,0x83,0x8f,0x21,0xaf,0x1f,0xde +,0x04,0x89,0x77,0xeb,0x48,0xf5,0x9f,0xfd +,0x49,0x24,0xca,0x1c,0x60,0x90,0x2e,0x52 +,0xf0,0xa0,0x89,0xbc,0x76,0x89,0x70,0x40 +,0xe0,0x82,0xf9,0x37,0x76,0x38,0x48,0x64 +,0x5e,0x07,0x05 +} ; + +unsigned char c[163]; + +main() +{ + int i; + crypto_box_curve25519xsalsa20poly1305( + c,m,163,nonce,bobpk,alicesk + ); + for (i = 16;i < 163;++i) { + printf(",0x%02x",(unsigned int) c[i]); + if (i % 8 == 7) printf("\n"); + } + printf("\n"); + return 0; +} diff --git a/nacl/tests/box.out b/nacl/tests/box.out new file mode 100644 index 00000000..2b6c51ea --- /dev/null +++ b/nacl/tests/box.out @@ -0,0 +1,19 @@ +,0xf3,0xff,0xc7,0x70,0x3f,0x94,0x00,0xe5 +,0x2a,0x7d,0xfb,0x4b,0x3d,0x33,0x05,0xd9 +,0x8e,0x99,0x3b,0x9f,0x48,0x68,0x12,0x73 +,0xc2,0x96,0x50,0xba,0x32,0xfc,0x76,0xce +,0x48,0x33,0x2e,0xa7,0x16,0x4d,0x96,0xa4 +,0x47,0x6f,0xb8,0xc5,0x31,0xa1,0x18,0x6a +,0xc0,0xdf,0xc1,0x7c,0x98,0xdc,0xe8,0x7b +,0x4d,0xa7,0xf0,0x11,0xec,0x48,0xc9,0x72 +,0x71,0xd2,0xc2,0x0f,0x9b,0x92,0x8f,0xe2 +,0x27,0x0d,0x6f,0xb8,0x63,0xd5,0x17,0x38 +,0xb4,0x8e,0xee,0xe3,0x14,0xa7,0xcc,0x8a +,0xb9,0x32,0x16,0x45,0x48,0xe5,0x26,0xae +,0x90,0x22,0x43,0x68,0x51,0x7a,0xcf,0xea +,0xbd,0x6b,0xb3,0x73,0x2b,0xc0,0xe9,0xda +,0x99,0x83,0x2b,0x61,0xca,0x01,0xb6,0xde +,0x56,0x24,0x4a,0x9e,0x88,0xd5,0xf9,0xb3 +,0x79,0x73,0xf6,0x22,0xa4,0x3d,0x14,0xa6 +,0x59,0x9b,0x1f,0x65,0x4c,0xb4,0x5a,0x74 +,0xe3,0x55,0xa5 diff --git a/nacl/tests/box2.c b/nacl/tests/box2.c new file mode 100644 index 00000000..0a531142 --- /dev/null +++ b/nacl/tests/box2.c @@ -0,0 +1,64 @@ +#include +#include "crypto_box_curve25519xsalsa20poly1305.h" + +unsigned char bobsk[32] = { + 0x5d,0xab,0x08,0x7e,0x62,0x4a,0x8a,0x4b +,0x79,0xe1,0x7f,0x8b,0x83,0x80,0x0e,0xe6 +,0x6f,0x3b,0xb1,0x29,0x26,0x18,0xb6,0xfd +,0x1c,0x2f,0x8b,0x27,0xff,0x88,0xe0,0xeb +} ; + +unsigned char alicepk[32] = { + 0x85,0x20,0xf0,0x09,0x89,0x30,0xa7,0x54 +,0x74,0x8b,0x7d,0xdc,0xb4,0x3e,0xf7,0x5a +,0x0d,0xbf,0x3a,0x0d,0x26,0x38,0x1a,0xf4 +,0xeb,0xa4,0xa9,0x8e,0xaa,0x9b,0x4e,0x6a +} ; + +unsigned char nonce[24] = { + 0x69,0x69,0x6e,0xe9,0x55,0xb6,0x2b,0x73 +,0xcd,0x62,0xbd,0xa8,0x75,0xfc,0x73,0xd6 +,0x82,0x19,0xe0,0x03,0x6b,0x7a,0x0b,0x37 +} ; + +// API requires first 16 bytes to be 0 +unsigned char c[163] = { + 0, 0, 0, 0, 0, 0, 0, 0 +, 0, 0, 0, 0, 0, 0, 0, 0 +,0xf3,0xff,0xc7,0x70,0x3f,0x94,0x00,0xe5 +,0x2a,0x7d,0xfb,0x4b,0x3d,0x33,0x05,0xd9 +,0x8e,0x99,0x3b,0x9f,0x48,0x68,0x12,0x73 +,0xc2,0x96,0x50,0xba,0x32,0xfc,0x76,0xce +,0x48,0x33,0x2e,0xa7,0x16,0x4d,0x96,0xa4 +,0x47,0x6f,0xb8,0xc5,0x31,0xa1,0x18,0x6a +,0xc0,0xdf,0xc1,0x7c,0x98,0xdc,0xe8,0x7b +,0x4d,0xa7,0xf0,0x11,0xec,0x48,0xc9,0x72 +,0x71,0xd2,0xc2,0x0f,0x9b,0x92,0x8f,0xe2 +,0x27,0x0d,0x6f,0xb8,0x63,0xd5,0x17,0x38 +,0xb4,0x8e,0xee,0xe3,0x14,0xa7,0xcc,0x8a +,0xb9,0x32,0x16,0x45,0x48,0xe5,0x26,0xae +,0x90,0x22,0x43,0x68,0x51,0x7a,0xcf,0xea +,0xbd,0x6b,0xb3,0x73,0x2b,0xc0,0xe9,0xda +,0x99,0x83,0x2b,0x61,0xca,0x01,0xb6,0xde +,0x56,0x24,0x4a,0x9e,0x88,0xd5,0xf9,0xb3 +,0x79,0x73,0xf6,0x22,0xa4,0x3d,0x14,0xa6 +,0x59,0x9b,0x1f,0x65,0x4c,0xb4,0x5a,0x74 +,0xe3,0x55,0xa5 +} ; + +unsigned char m[163]; + +main() +{ + int i; + if (crypto_box_curve25519xsalsa20poly1305_open( + m,c,163,nonce,alicepk,bobsk + ) == 0) { + for (i = 32;i < 163;++i) { + printf(",0x%02x",(unsigned int) m[i]); + if (i % 8 == 7) printf("\n"); + } + printf("\n"); + } + return 0; +} diff --git a/nacl/tests/box2.out b/nacl/tests/box2.out new file mode 100644 index 00000000..c61d4557 --- /dev/null +++ b/nacl/tests/box2.out @@ -0,0 +1,17 @@ +,0xbe,0x07,0x5f,0xc5,0x3c,0x81,0xf2,0xd5 +,0xcf,0x14,0x13,0x16,0xeb,0xeb,0x0c,0x7b +,0x52,0x28,0xc5,0x2a,0x4c,0x62,0xcb,0xd4 +,0x4b,0x66,0x84,0x9b,0x64,0x24,0x4f,0xfc +,0xe5,0xec,0xba,0xaf,0x33,0xbd,0x75,0x1a +,0x1a,0xc7,0x28,0xd4,0x5e,0x6c,0x61,0x29 +,0x6c,0xdc,0x3c,0x01,0x23,0x35,0x61,0xf4 +,0x1d,0xb6,0x6c,0xce,0x31,0x4a,0xdb,0x31 +,0x0e,0x3b,0xe8,0x25,0x0c,0x46,0xf0,0x6d +,0xce,0xea,0x3a,0x7f,0xa1,0x34,0x80,0x57 +,0xe2,0xf6,0x55,0x6a,0xd6,0xb1,0x31,0x8a +,0x02,0x4a,0x83,0x8f,0x21,0xaf,0x1f,0xde +,0x04,0x89,0x77,0xeb,0x48,0xf5,0x9f,0xfd +,0x49,0x24,0xca,0x1c,0x60,0x90,0x2e,0x52 +,0xf0,0xa0,0x89,0xbc,0x76,0x89,0x70,0x40 +,0xe0,0x82,0xf9,0x37,0x76,0x38,0x48,0x64 +,0x5e,0x07,0x05 diff --git a/nacl/tests/box3.cpp b/nacl/tests/box3.cpp new file mode 100644 index 00000000..db89dd03 --- /dev/null +++ b/nacl/tests/box3.cpp @@ -0,0 +1,60 @@ +#include +using std::string; +#include +#include "crypto_box_curve25519xsalsa20poly1305.h" + +char alicesk_bytes[32] = { + 0x77,0x07,0x6d,0x0a,0x73,0x18,0xa5,0x7d +,0x3c,0x16,0xc1,0x72,0x51,0xb2,0x66,0x45 +,0xdf,0x4c,0x2f,0x87,0xeb,0xc0,0x99,0x2a +,0xb1,0x77,0xfb,0xa5,0x1d,0xb9,0x2c,0x2a +} ; + +char bobpk_bytes[32] = { + 0xde,0x9e,0xdb,0x7d,0x7b,0x7d,0xc1,0xb4 +,0xd3,0x5b,0x61,0xc2,0xec,0xe4,0x35,0x37 +,0x3f,0x83,0x43,0xc8,0x5b,0x78,0x67,0x4d +,0xad,0xfc,0x7e,0x14,0x6f,0x88,0x2b,0x4f +} ; + +char nonce_bytes[24] = { + 0x69,0x69,0x6e,0xe9,0x55,0xb6,0x2b,0x73 +,0xcd,0x62,0xbd,0xa8,0x75,0xfc,0x73,0xd6 +,0x82,0x19,0xe0,0x03,0x6b,0x7a,0x0b,0x37 +} ; + +char m_bytes[131] = { + 0xbe,0x07,0x5f,0xc5,0x3c,0x81,0xf2,0xd5 +,0xcf,0x14,0x13,0x16,0xeb,0xeb,0x0c,0x7b +,0x52,0x28,0xc5,0x2a,0x4c,0x62,0xcb,0xd4 +,0x4b,0x66,0x84,0x9b,0x64,0x24,0x4f,0xfc +,0xe5,0xec,0xba,0xaf,0x33,0xbd,0x75,0x1a +,0x1a,0xc7,0x28,0xd4,0x5e,0x6c,0x61,0x29 +,0x6c,0xdc,0x3c,0x01,0x23,0x35,0x61,0xf4 +,0x1d,0xb6,0x6c,0xce,0x31,0x4a,0xdb,0x31 +,0x0e,0x3b,0xe8,0x25,0x0c,0x46,0xf0,0x6d +,0xce,0xea,0x3a,0x7f,0xa1,0x34,0x80,0x57 +,0xe2,0xf6,0x55,0x6a,0xd6,0xb1,0x31,0x8a +,0x02,0x4a,0x83,0x8f,0x21,0xaf,0x1f,0xde +,0x04,0x89,0x77,0xeb,0x48,0xf5,0x9f,0xfd +,0x49,0x24,0xca,0x1c,0x60,0x90,0x2e,0x52 +,0xf0,0xa0,0x89,0xbc,0x76,0x89,0x70,0x40 +,0xe0,0x82,0xf9,0x37,0x76,0x38,0x48,0x64 +,0x5e,0x07,0x05 +} ; + +main() +{ + int i; + string m(m_bytes,sizeof m_bytes); + string nonce(nonce_bytes,sizeof nonce_bytes); + string bobpk(bobpk_bytes,sizeof bobpk_bytes); + string alicesk(alicesk_bytes,sizeof alicesk_bytes); + string c = crypto_box_curve25519xsalsa20poly1305(m,nonce,bobpk,alicesk); + for (i = 0;i < c.size();++i) { + printf(",0x%02x",(unsigned int) (unsigned char) c[i]); + if (i % 8 == 7) printf("\n"); + } + printf("\n"); + return 0; +} diff --git a/nacl/tests/box3.out b/nacl/tests/box3.out new file mode 100644 index 00000000..2b6c51ea --- /dev/null +++ b/nacl/tests/box3.out @@ -0,0 +1,19 @@ +,0xf3,0xff,0xc7,0x70,0x3f,0x94,0x00,0xe5 +,0x2a,0x7d,0xfb,0x4b,0x3d,0x33,0x05,0xd9 +,0x8e,0x99,0x3b,0x9f,0x48,0x68,0x12,0x73 +,0xc2,0x96,0x50,0xba,0x32,0xfc,0x76,0xce +,0x48,0x33,0x2e,0xa7,0x16,0x4d,0x96,0xa4 +,0x47,0x6f,0xb8,0xc5,0x31,0xa1,0x18,0x6a +,0xc0,0xdf,0xc1,0x7c,0x98,0xdc,0xe8,0x7b +,0x4d,0xa7,0xf0,0x11,0xec,0x48,0xc9,0x72 +,0x71,0xd2,0xc2,0x0f,0x9b,0x92,0x8f,0xe2 +,0x27,0x0d,0x6f,0xb8,0x63,0xd5,0x17,0x38 +,0xb4,0x8e,0xee,0xe3,0x14,0xa7,0xcc,0x8a +,0xb9,0x32,0x16,0x45,0x48,0xe5,0x26,0xae +,0x90,0x22,0x43,0x68,0x51,0x7a,0xcf,0xea +,0xbd,0x6b,0xb3,0x73,0x2b,0xc0,0xe9,0xda +,0x99,0x83,0x2b,0x61,0xca,0x01,0xb6,0xde +,0x56,0x24,0x4a,0x9e,0x88,0xd5,0xf9,0xb3 +,0x79,0x73,0xf6,0x22,0xa4,0x3d,0x14,0xa6 +,0x59,0x9b,0x1f,0x65,0x4c,0xb4,0x5a,0x74 +,0xe3,0x55,0xa5 diff --git a/nacl/tests/box4.cpp b/nacl/tests/box4.cpp new file mode 100644 index 00000000..7f48fcd6 --- /dev/null +++ b/nacl/tests/box4.cpp @@ -0,0 +1,66 @@ +#include +using std::string; +#include +#include "crypto_box_curve25519xsalsa20poly1305.h" + +char bobsk_bytes[32] = { + 0x5d,0xab,0x08,0x7e,0x62,0x4a,0x8a,0x4b +,0x79,0xe1,0x7f,0x8b,0x83,0x80,0x0e,0xe6 +,0x6f,0x3b,0xb1,0x29,0x26,0x18,0xb6,0xfd +,0x1c,0x2f,0x8b,0x27,0xff,0x88,0xe0,0xeb +} ; + +char alicepk_bytes[32] = { + 0x85,0x20,0xf0,0x09,0x89,0x30,0xa7,0x54 +,0x74,0x8b,0x7d,0xdc,0xb4,0x3e,0xf7,0x5a +,0x0d,0xbf,0x3a,0x0d,0x26,0x38,0x1a,0xf4 +,0xeb,0xa4,0xa9,0x8e,0xaa,0x9b,0x4e,0x6a +} ; + +char nonce_bytes[24] = { + 0x69,0x69,0x6e,0xe9,0x55,0xb6,0x2b,0x73 +,0xcd,0x62,0xbd,0xa8,0x75,0xfc,0x73,0xd6 +,0x82,0x19,0xe0,0x03,0x6b,0x7a,0x0b,0x37 +} ; + +char c_bytes[147] = { + 0xf3,0xff,0xc7,0x70,0x3f,0x94,0x00,0xe5 +,0x2a,0x7d,0xfb,0x4b,0x3d,0x33,0x05,0xd9 +,0x8e,0x99,0x3b,0x9f,0x48,0x68,0x12,0x73 +,0xc2,0x96,0x50,0xba,0x32,0xfc,0x76,0xce +,0x48,0x33,0x2e,0xa7,0x16,0x4d,0x96,0xa4 +,0x47,0x6f,0xb8,0xc5,0x31,0xa1,0x18,0x6a +,0xc0,0xdf,0xc1,0x7c,0x98,0xdc,0xe8,0x7b +,0x4d,0xa7,0xf0,0x11,0xec,0x48,0xc9,0x72 +,0x71,0xd2,0xc2,0x0f,0x9b,0x92,0x8f,0xe2 +,0x27,0x0d,0x6f,0xb8,0x63,0xd5,0x17,0x38 +,0xb4,0x8e,0xee,0xe3,0x14,0xa7,0xcc,0x8a +,0xb9,0x32,0x16,0x45,0x48,0xe5,0x26,0xae +,0x90,0x22,0x43,0x68,0x51,0x7a,0xcf,0xea +,0xbd,0x6b,0xb3,0x73,0x2b,0xc0,0xe9,0xda +,0x99,0x83,0x2b,0x61,0xca,0x01,0xb6,0xde +,0x56,0x24,0x4a,0x9e,0x88,0xd5,0xf9,0xb3 +,0x79,0x73,0xf6,0x22,0xa4,0x3d,0x14,0xa6 +,0x59,0x9b,0x1f,0x65,0x4c,0xb4,0x5a,0x74 +,0xe3,0x55,0xa5 +} ; + +main() +{ + int i; + string c(c_bytes,sizeof c_bytes); + string nonce(nonce_bytes,sizeof nonce_bytes); + string alicepk(alicepk_bytes,sizeof alicepk_bytes); + string bobsk(bobsk_bytes,sizeof bobsk_bytes); + try { + string m = crypto_box_curve25519xsalsa20poly1305_open(c,nonce,alicepk,bobsk); + for (i = 0;i < m.size();++i) { + printf(",0x%02x",(unsigned int) (unsigned char) m[i]); + if (i % 8 == 7) printf("\n"); + } + printf("\n"); + } catch(const char *s) { + printf("%s\n",s); + } + return 0; +} diff --git a/nacl/tests/box4.out b/nacl/tests/box4.out new file mode 100644 index 00000000..c61d4557 --- /dev/null +++ b/nacl/tests/box4.out @@ -0,0 +1,17 @@ +,0xbe,0x07,0x5f,0xc5,0x3c,0x81,0xf2,0xd5 +,0xcf,0x14,0x13,0x16,0xeb,0xeb,0x0c,0x7b +,0x52,0x28,0xc5,0x2a,0x4c,0x62,0xcb,0xd4 +,0x4b,0x66,0x84,0x9b,0x64,0x24,0x4f,0xfc +,0xe5,0xec,0xba,0xaf,0x33,0xbd,0x75,0x1a +,0x1a,0xc7,0x28,0xd4,0x5e,0x6c,0x61,0x29 +,0x6c,0xdc,0x3c,0x01,0x23,0x35,0x61,0xf4 +,0x1d,0xb6,0x6c,0xce,0x31,0x4a,0xdb,0x31 +,0x0e,0x3b,0xe8,0x25,0x0c,0x46,0xf0,0x6d +,0xce,0xea,0x3a,0x7f,0xa1,0x34,0x80,0x57 +,0xe2,0xf6,0x55,0x6a,0xd6,0xb1,0x31,0x8a +,0x02,0x4a,0x83,0x8f,0x21,0xaf,0x1f,0xde +,0x04,0x89,0x77,0xeb,0x48,0xf5,0x9f,0xfd +,0x49,0x24,0xca,0x1c,0x60,0x90,0x2e,0x52 +,0xf0,0xa0,0x89,0xbc,0x76,0x89,0x70,0x40 +,0xe0,0x82,0xf9,0x37,0x76,0x38,0x48,0x64 +,0x5e,0x07,0x05 diff --git a/nacl/tests/box5.cpp b/nacl/tests/box5.cpp new file mode 100644 index 00000000..366e2e30 --- /dev/null +++ b/nacl/tests/box5.cpp @@ -0,0 +1,30 @@ +#include +using std::string; +#include +#include "crypto_box.h" +#include "randombytes.h" + +main() +{ + int mlen; + for (mlen = 0;mlen < 1000;++mlen) { + string alicesk; + string alicepk = crypto_box_keypair(&alicesk); + string bobsk; + string bobpk = crypto_box_keypair(&bobsk); + unsigned char nbytes[crypto_box_NONCEBYTES]; + randombytes(nbytes,crypto_box_NONCEBYTES); + string n((char *) nbytes,crypto_box_NONCEBYTES); + unsigned char mbytes[mlen]; + randombytes(mbytes,mlen); + string m((char *) mbytes,mlen); + string c = crypto_box(m,n,bobpk,alicesk); + try { + string m2 = crypto_box_open(c,n,alicepk,bobsk); + if (m != m2) printf("bad decryption\n"); + } catch(const char *s) { + printf("%s\n",s); + } + } + return 0; +} diff --git a/nacl/tests/box5.out b/nacl/tests/box5.out new file mode 100644 index 00000000..e69de29b diff --git a/nacl/tests/box6.cpp b/nacl/tests/box6.cpp new file mode 100644 index 00000000..bab18105 --- /dev/null +++ b/nacl/tests/box6.cpp @@ -0,0 +1,43 @@ +#include +using std::string; +#include +#include +#include "crypto_box.h" +#include "randombytes.h" + +main() +{ + int mlen; + for (mlen = 0;mlen < 1000;++mlen) { + string alicesk; + string alicepk = crypto_box_keypair(&alicesk); + string bobsk; + string bobpk = crypto_box_keypair(&bobsk); + unsigned char nbytes[crypto_box_NONCEBYTES]; + randombytes(nbytes,crypto_box_NONCEBYTES); + string n((char *) nbytes,crypto_box_NONCEBYTES); + unsigned char mbytes[mlen]; + randombytes(mbytes,mlen); + string m((char *) mbytes,mlen); + string c = crypto_box(m,n,bobpk,alicesk); + int caught = 0; + while (caught < 10) { + c.replace(random() % c.size(),1,1,random()); + try { + string m2 = crypto_box_open(c,n,alicepk,bobsk); + if (m != m2) { + printf("forgery\n"); + return 100; + } + } catch(const char *s) { + if (string(s) == string("ciphertext fails verification")) + ++caught; + else { + printf("%s\n",s); + return 111; + } + } + } + } + return 0; +} diff --git a/nacl/tests/box6.out b/nacl/tests/box6.out new file mode 100644 index 00000000..e69de29b diff --git a/nacl/tests/box7.c b/nacl/tests/box7.c new file mode 100644 index 00000000..809301c1 --- /dev/null +++ b/nacl/tests/box7.c @@ -0,0 +1,36 @@ +#include +#include "crypto_box.h" +#include "randombytes.h" + +unsigned char alicesk[crypto_box_SECRETKEYBYTES]; +unsigned char alicepk[crypto_box_PUBLICKEYBYTES]; +unsigned char bobsk[crypto_box_SECRETKEYBYTES]; +unsigned char bobpk[crypto_box_PUBLICKEYBYTES]; +unsigned char n[crypto_box_NONCEBYTES]; +unsigned char m[10000]; +unsigned char c[10000]; +unsigned char m2[10000]; + +main() +{ + int mlen; + int i; + + for (mlen = 0;mlen < 1000 && mlen + crypto_box_ZEROBYTES < sizeof m;++mlen) { + crypto_box_keypair(alicepk,alicesk); + crypto_box_keypair(bobpk,bobsk); + randombytes(n,crypto_box_NONCEBYTES); + randombytes(m + crypto_box_ZEROBYTES,mlen); + crypto_box(c,m,mlen + crypto_box_ZEROBYTES,n,bobpk,alicesk); + if (crypto_box_open(m2,c,mlen + crypto_box_ZEROBYTES,n,alicepk,bobsk) == 0) { + for (i = 0;i < mlen + crypto_box_ZEROBYTES;++i) + if (m2[i] != m[i]) { + printf("bad decryption\n"); + break; + } + } else { + printf("ciphertext fails verification\n"); + } + } + return 0; +} diff --git a/nacl/tests/box7.out b/nacl/tests/box7.out new file mode 100644 index 00000000..e69de29b diff --git a/nacl/tests/box8.c b/nacl/tests/box8.c new file mode 100644 index 00000000..dac676ef --- /dev/null +++ b/nacl/tests/box8.c @@ -0,0 +1,41 @@ +#include +#include "crypto_box.h" +#include "randombytes.h" + +unsigned char alicesk[crypto_box_SECRETKEYBYTES]; +unsigned char alicepk[crypto_box_PUBLICKEYBYTES]; +unsigned char bobsk[crypto_box_SECRETKEYBYTES]; +unsigned char bobpk[crypto_box_PUBLICKEYBYTES]; +unsigned char n[crypto_box_NONCEBYTES]; +unsigned char m[10000]; +unsigned char c[10000]; +unsigned char m2[10000]; + +main() +{ + int mlen; + int i; + int caught; + + for (mlen = 0;mlen < 1000 && mlen + crypto_box_ZEROBYTES < sizeof m;++mlen) { + crypto_box_keypair(alicepk,alicesk); + crypto_box_keypair(bobpk,bobsk); + randombytes(n,crypto_box_NONCEBYTES); + randombytes(m + crypto_box_ZEROBYTES,mlen); + crypto_box(c,m,mlen + crypto_box_ZEROBYTES,n,bobpk,alicesk); + caught = 0; + while (caught < 10) { + c[random() % (mlen + crypto_box_ZEROBYTES)] = random(); + if (crypto_box_open(m2,c,mlen + crypto_box_ZEROBYTES,n,alicepk,bobsk) == 0) { + for (i = 0;i < mlen + crypto_box_ZEROBYTES;++i) + if (m2[i] != m[i]) { + printf("forgery\n"); + return 100; + } + } else { + ++caught; + } + } + } + return 0; +} diff --git a/nacl/tests/box8.out b/nacl/tests/box8.out new file mode 100644 index 00000000..e69de29b diff --git a/nacl/tests/core1.c b/nacl/tests/core1.c new file mode 100644 index 00000000..9a8fc51d --- /dev/null +++ b/nacl/tests/core1.c @@ -0,0 +1,30 @@ +#include +#include "crypto_core_hsalsa20.h" + +unsigned char shared[32] = { + 0x4a,0x5d,0x9d,0x5b,0xa4,0xce,0x2d,0xe1 +,0x72,0x8e,0x3b,0xf4,0x80,0x35,0x0f,0x25 +,0xe0,0x7e,0x21,0xc9,0x47,0xd1,0x9e,0x33 +,0x76,0xf0,0x9b,0x3c,0x1e,0x16,0x17,0x42 +} ; + +unsigned char zero[32] = { 0 }; + +unsigned char c[16] = { + 0x65,0x78,0x70,0x61,0x6e,0x64,0x20,0x33 +,0x32,0x2d,0x62,0x79,0x74,0x65,0x20,0x6b +} ; + +unsigned char firstkey[32]; + +main() +{ + int i; + crypto_core_hsalsa20(firstkey,zero,shared,c); + for (i = 0;i < 32;++i) { + if (i > 0) printf(","); else printf(" "); + printf("0x%02x",(unsigned int) firstkey[i]); + if (i % 8 == 7) printf("\n"); + } + return 0; +} diff --git a/nacl/tests/core1.out b/nacl/tests/core1.out new file mode 100644 index 00000000..715a489d --- /dev/null +++ b/nacl/tests/core1.out @@ -0,0 +1,4 @@ + 0x1b,0x27,0x55,0x64,0x73,0xe9,0x85,0xd4 +,0x62,0xcd,0x51,0x19,0x7a,0x9a,0x46,0xc7 +,0x60,0x09,0x54,0x9e,0xac,0x64,0x74,0xf2 +,0x06,0xc4,0xee,0x08,0x44,0xf6,0x83,0x89 diff --git a/nacl/tests/core2.c b/nacl/tests/core2.c new file mode 100644 index 00000000..08402285 --- /dev/null +++ b/nacl/tests/core2.c @@ -0,0 +1,33 @@ +#include +#include "crypto_core_hsalsa20.h" + +unsigned char firstkey[32] = { + 0x1b,0x27,0x55,0x64,0x73,0xe9,0x85,0xd4 +,0x62,0xcd,0x51,0x19,0x7a,0x9a,0x46,0xc7 +,0x60,0x09,0x54,0x9e,0xac,0x64,0x74,0xf2 +,0x06,0xc4,0xee,0x08,0x44,0xf6,0x83,0x89 +} ; + +unsigned char nonceprefix[16] = { + 0x69,0x69,0x6e,0xe9,0x55,0xb6,0x2b,0x73 +,0xcd,0x62,0xbd,0xa8,0x75,0xfc,0x73,0xd6 +} ; + +unsigned char c[16] = { + 0x65,0x78,0x70,0x61,0x6e,0x64,0x20,0x33 +,0x32,0x2d,0x62,0x79,0x74,0x65,0x20,0x6b +} ; + +unsigned char secondkey[32]; + +main() +{ + int i; + crypto_core_hsalsa20(secondkey,nonceprefix,firstkey,c); + for (i = 0;i < 32;++i) { + if (i > 0) printf(","); else printf(" "); + printf("0x%02x",(unsigned int) secondkey[i]); + if (i % 8 == 7) printf("\n"); + } + return 0; +} diff --git a/nacl/tests/core2.out b/nacl/tests/core2.out new file mode 100644 index 00000000..f4682af0 --- /dev/null +++ b/nacl/tests/core2.out @@ -0,0 +1,4 @@ + 0xdc,0x90,0x8d,0xda,0x0b,0x93,0x44,0xa9 +,0x53,0x62,0x9b,0x73,0x38,0x20,0x77,0x88 +,0x80,0xf3,0xce,0xb4,0x21,0xbb,0x61,0xb9 +,0x1c,0xbd,0x4c,0x3e,0x66,0x25,0x6c,0xe4 diff --git a/nacl/tests/core3.c b/nacl/tests/core3.c new file mode 100644 index 00000000..4c759a5b --- /dev/null +++ b/nacl/tests/core3.c @@ -0,0 +1,41 @@ +#include +#include "crypto_core_salsa20.h" +#include "crypto_hash_sha256.h" + +unsigned char secondkey[32] = { + 0xdc,0x90,0x8d,0xda,0x0b,0x93,0x44,0xa9 +,0x53,0x62,0x9b,0x73,0x38,0x20,0x77,0x88 +,0x80,0xf3,0xce,0xb4,0x21,0xbb,0x61,0xb9 +,0x1c,0xbd,0x4c,0x3e,0x66,0x25,0x6c,0xe4 +} ; + +unsigned char noncesuffix[8] = { + 0x82,0x19,0xe0,0x03,0x6b,0x7a,0x0b,0x37 +} ; + +unsigned char c[16] = { + 0x65,0x78,0x70,0x61,0x6e,0x64,0x20,0x33 +,0x32,0x2d,0x62,0x79,0x74,0x65,0x20,0x6b +} ; + +unsigned char in[16] = { 0 } ; + +unsigned char output[64 * 256 * 256]; + +unsigned char h[32]; + +main() +{ + int i; + long long pos = 0; + for (i = 0;i < 8;++i) in[i] = noncesuffix[i]; + do { + do { + crypto_core_salsa20(output + pos,in,secondkey,c); + pos += 64; + } while (++in[8]); + } while (++in[9]); + crypto_hash_sha256(h,output,sizeof output); + for (i = 0;i < 32;++i) printf("%02x",h[i]); printf("\n"); + return 0; +} diff --git a/nacl/tests/core3.out b/nacl/tests/core3.out new file mode 100644 index 00000000..5fa208c1 --- /dev/null +++ b/nacl/tests/core3.out @@ -0,0 +1 @@ +662b9d0e3463029156069b12f918691a98f7dfb2ca0393c96bbfc6b1fbd630a2 diff --git a/nacl/tests/core4.c b/nacl/tests/core4.c new file mode 100644 index 00000000..1f238c5e --- /dev/null +++ b/nacl/tests/core4.c @@ -0,0 +1,33 @@ +#include +#include "crypto_core_salsa20.h" + +unsigned char k[32] = { + 1, 2, 3, 4, 5, 6, 7, 8 +, 9, 10, 11, 12, 13, 14, 15, 16 +,201,202,203,204,205,206,207,208 +,209,210,211,212,213,214,215,216 +} ; + +unsigned char in[16] = { + 101,102,103,104,105,106,107,108 +,109,110,111,112,113,114,115,116 +} ; + +unsigned char c[16] = { + 101,120,112, 97,110,100, 32, 51 +, 50, 45, 98,121,116,101, 32,107 +} ; + +unsigned char out[64]; + +main() +{ + int i; + crypto_core_salsa20(out,in,k,c); + for (i = 0;i < 64;++i) { + if (i > 0) printf(","); else printf(" "); + printf("%3d",(unsigned int) out[i]); + if (i % 8 == 7) printf("\n"); + } + return 0; +} diff --git a/nacl/tests/core4.out b/nacl/tests/core4.out new file mode 100644 index 00000000..d04e5b5e --- /dev/null +++ b/nacl/tests/core4.out @@ -0,0 +1,8 @@ + 69, 37, 68, 39, 41, 15,107,193 +,255,139,122, 6,170,233,217, 98 +, 89,144,182,106, 21, 51,200, 65 +,239, 49,222, 34,215,114, 40,126 +,104,197, 7,225,197,153, 31, 2 +,102, 78, 76,176, 84,245,246,184 +,177,160,133,130, 6, 72,149,119 +,192,195,132,236,234,103,246, 74 diff --git a/nacl/tests/core5.c b/nacl/tests/core5.c new file mode 100644 index 00000000..6353477d --- /dev/null +++ b/nacl/tests/core5.c @@ -0,0 +1,32 @@ +#include +#include "crypto_core_hsalsa20.h" + +unsigned char k[32] = { + 0xee,0x30,0x4f,0xca,0x27,0x00,0x8d,0x8c +,0x12,0x6f,0x90,0x02,0x79,0x01,0xd8,0x0f +,0x7f,0x1d,0x8b,0x8d,0xc9,0x36,0xcf,0x3b +,0x9f,0x81,0x96,0x92,0x82,0x7e,0x57,0x77 +} ; + +unsigned char in[16] = { + 0x81,0x91,0x8e,0xf2,0xa5,0xe0,0xda,0x9b +,0x3e,0x90,0x60,0x52,0x1e,0x4b,0xb3,0x52 +} ; + +unsigned char c[16] = { + 101,120,112, 97,110,100, 32, 51 +, 50, 45, 98,121,116,101, 32,107 +} ; + +unsigned char out[32]; + +main() +{ + int i; + crypto_core_hsalsa20(out,in,k,c); + for (i = 0;i < 32;++i) { + printf(",0x%02x",(unsigned int) out[i]); + if (i % 8 == 7) printf("\n"); + } + return 0; +} diff --git a/nacl/tests/core5.out b/nacl/tests/core5.out new file mode 100644 index 00000000..562cf717 --- /dev/null +++ b/nacl/tests/core5.out @@ -0,0 +1,4 @@ +,0xbc,0x1b,0x30,0xfc,0x07,0x2c,0xc1,0x40 +,0x75,0xe4,0xba,0xa7,0x31,0xb5,0xa8,0x45 +,0xea,0x9b,0x11,0xe9,0xa5,0x19,0x1f,0x94 +,0xe1,0x8c,0xba,0x8f,0xd8,0x21,0xa7,0xcd diff --git a/nacl/tests/core6.c b/nacl/tests/core6.c new file mode 100644 index 00000000..67f35df9 --- /dev/null +++ b/nacl/tests/core6.c @@ -0,0 +1,47 @@ +#include +#include "crypto_core_salsa20.h" + +unsigned char k[32] = { + 0xee,0x30,0x4f,0xca,0x27,0x00,0x8d,0x8c +,0x12,0x6f,0x90,0x02,0x79,0x01,0xd8,0x0f +,0x7f,0x1d,0x8b,0x8d,0xc9,0x36,0xcf,0x3b +,0x9f,0x81,0x96,0x92,0x82,0x7e,0x57,0x77 +} ; + +unsigned char in[16] = { + 0x81,0x91,0x8e,0xf2,0xa5,0xe0,0xda,0x9b +,0x3e,0x90,0x60,0x52,0x1e,0x4b,0xb3,0x52 +} ; + +unsigned char c[16] = { + 101,120,112, 97,110,100, 32, 51 +, 50, 45, 98,121,116,101, 32,107 +} ; + +unsigned char out[64]; + +void print(unsigned char *x,unsigned char *y) +{ + int i; + unsigned int borrow = 0; + for (i = 0;i < 4;++i) { + unsigned int xi = x[i]; + unsigned int yi = y[i]; + printf(",0x%02x",255 & (xi - yi - borrow)); + borrow = (xi < yi + borrow); + } +} + +main() +{ + crypto_core_salsa20(out,in,k,c); + print(out,c); + print(out + 20,c + 4); printf("\n"); + print(out + 40,c + 8); + print(out + 60,c + 12); printf("\n"); + print(out + 24,in); + print(out + 28,in + 4); printf("\n"); + print(out + 32,in + 8); + print(out + 36,in + 12); printf("\n"); + return 0; +} diff --git a/nacl/tests/core6.out b/nacl/tests/core6.out new file mode 100644 index 00000000..562cf717 --- /dev/null +++ b/nacl/tests/core6.out @@ -0,0 +1,4 @@ +,0xbc,0x1b,0x30,0xfc,0x07,0x2c,0xc1,0x40 +,0x75,0xe4,0xba,0xa7,0x31,0xb5,0xa8,0x45 +,0xea,0x9b,0x11,0xe9,0xa5,0x19,0x1f,0x94 +,0xe1,0x8c,0xba,0x8f,0xd8,0x21,0xa7,0xcd diff --git a/nacl/tests/hash.c b/nacl/tests/hash.c new file mode 100644 index 00000000..8de470aa --- /dev/null +++ b/nacl/tests/hash.c @@ -0,0 +1,14 @@ +#include +#include "crypto_hash.h" + +unsigned char x[8] = "testing\n"; +unsigned char h[crypto_hash_BYTES]; + +int main() +{ + int i; + crypto_hash(h,x,sizeof x); + for (i = 0;i < crypto_hash_BYTES;++i) printf("%02x",(unsigned int) h[i]); + printf("\n"); + return 0; +} diff --git a/nacl/tests/hash.out b/nacl/tests/hash.out new file mode 100644 index 00000000..df582172 --- /dev/null +++ b/nacl/tests/hash.out @@ -0,0 +1 @@ +24f950aac7b9ea9b3cb728228a0c82b67c39e96b4b344798870d5daee93e3ae5931baae8c7cacfea4b629452c38026a81d138bc7aad1af3ef7bfd5ec646d6c28 diff --git a/nacl/tests/hash2.cpp b/nacl/tests/hash2.cpp new file mode 100644 index 00000000..6594620d --- /dev/null +++ b/nacl/tests/hash2.cpp @@ -0,0 +1,18 @@ +#include +#include +using std::string; +using std::cout; +using std::hex; +#include "crypto_hash.h" + +int main() +{ + string x = "testing\n"; + string h = crypto_hash(x); + for (int i = 0;i < h.size();++i) { + cout << hex << (15 & (int) (h[i] >> 4)); + cout << hex << (15 & (int) h[i]); + } + cout << "\n"; + return 0; +} diff --git a/nacl/tests/hash2.out b/nacl/tests/hash2.out new file mode 100644 index 00000000..df582172 --- /dev/null +++ b/nacl/tests/hash2.out @@ -0,0 +1 @@ +24f950aac7b9ea9b3cb728228a0c82b67c39e96b4b344798870d5daee93e3ae5931baae8c7cacfea4b629452c38026a81d138bc7aad1af3ef7bfd5ec646d6c28 diff --git a/nacl/tests/hash3.c b/nacl/tests/hash3.c new file mode 100644 index 00000000..10b89b90 --- /dev/null +++ b/nacl/tests/hash3.c @@ -0,0 +1,14 @@ +#include +#include "crypto_hash_sha512.h" + +unsigned char x[8] = "testing\n"; +unsigned char h[crypto_hash_sha512_BYTES]; + +int main() +{ + int i; + crypto_hash_sha512(h,x,sizeof x); + for (i = 0;i < crypto_hash_sha512_BYTES;++i) printf("%02x",(unsigned int) h[i]); + printf("\n"); + return 0; +} diff --git a/nacl/tests/hash3.out b/nacl/tests/hash3.out new file mode 100644 index 00000000..df582172 --- /dev/null +++ b/nacl/tests/hash3.out @@ -0,0 +1 @@ +24f950aac7b9ea9b3cb728228a0c82b67c39e96b4b344798870d5daee93e3ae5931baae8c7cacfea4b629452c38026a81d138bc7aad1af3ef7bfd5ec646d6c28 diff --git a/nacl/tests/hash4.cpp b/nacl/tests/hash4.cpp new file mode 100644 index 00000000..1d0a3f37 --- /dev/null +++ b/nacl/tests/hash4.cpp @@ -0,0 +1,18 @@ +#include +#include +using std::string; +using std::cout; +using std::hex; +#include "crypto_hash_sha512.h" + +int main() +{ + string x = "testing\n"; + string h = crypto_hash_sha512(x); + for (int i = 0;i < h.size();++i) { + cout << hex << (15 & (int) (h[i] >> 4)); + cout << hex << (15 & (int) h[i]); + } + cout << "\n"; + return 0; +} diff --git a/nacl/tests/hash4.out b/nacl/tests/hash4.out new file mode 100644 index 00000000..df582172 --- /dev/null +++ b/nacl/tests/hash4.out @@ -0,0 +1 @@ +24f950aac7b9ea9b3cb728228a0c82b67c39e96b4b344798870d5daee93e3ae5931baae8c7cacfea4b629452c38026a81d138bc7aad1af3ef7bfd5ec646d6c28 diff --git a/nacl/tests/onetimeauth.c b/nacl/tests/onetimeauth.c new file mode 100644 index 00000000..60a2df14 --- /dev/null +++ b/nacl/tests/onetimeauth.c @@ -0,0 +1,42 @@ +#include +#include "crypto_onetimeauth_poly1305.h" + +unsigned char rs[32] = { + 0xee,0xa6,0xa7,0x25,0x1c,0x1e,0x72,0x91 +,0x6d,0x11,0xc2,0xcb,0x21,0x4d,0x3c,0x25 +,0x25,0x39,0x12,0x1d,0x8e,0x23,0x4e,0x65 +,0x2d,0x65,0x1f,0xa4,0xc8,0xcf,0xf8,0x80 +} ; + +unsigned char c[131] = { + 0x8e,0x99,0x3b,0x9f,0x48,0x68,0x12,0x73 +,0xc2,0x96,0x50,0xba,0x32,0xfc,0x76,0xce +,0x48,0x33,0x2e,0xa7,0x16,0x4d,0x96,0xa4 +,0x47,0x6f,0xb8,0xc5,0x31,0xa1,0x18,0x6a +,0xc0,0xdf,0xc1,0x7c,0x98,0xdc,0xe8,0x7b +,0x4d,0xa7,0xf0,0x11,0xec,0x48,0xc9,0x72 +,0x71,0xd2,0xc2,0x0f,0x9b,0x92,0x8f,0xe2 +,0x27,0x0d,0x6f,0xb8,0x63,0xd5,0x17,0x38 +,0xb4,0x8e,0xee,0xe3,0x14,0xa7,0xcc,0x8a +,0xb9,0x32,0x16,0x45,0x48,0xe5,0x26,0xae +,0x90,0x22,0x43,0x68,0x51,0x7a,0xcf,0xea +,0xbd,0x6b,0xb3,0x73,0x2b,0xc0,0xe9,0xda +,0x99,0x83,0x2b,0x61,0xca,0x01,0xb6,0xde +,0x56,0x24,0x4a,0x9e,0x88,0xd5,0xf9,0xb3 +,0x79,0x73,0xf6,0x22,0xa4,0x3d,0x14,0xa6 +,0x59,0x9b,0x1f,0x65,0x4c,0xb4,0x5a,0x74 +,0xe3,0x55,0xa5 +} ; + +unsigned char a[16]; + +main() +{ + int i; + crypto_onetimeauth_poly1305(a,c,131,rs); + for (i = 0;i < 16;++i) { + printf(",0x%02x",(unsigned int) a[i]); + if (i % 8 == 7) printf("\n"); + } + return 0; +} diff --git a/nacl/tests/onetimeauth.out b/nacl/tests/onetimeauth.out new file mode 100644 index 00000000..6d914615 --- /dev/null +++ b/nacl/tests/onetimeauth.out @@ -0,0 +1,2 @@ +,0xf3,0xff,0xc7,0x70,0x3f,0x94,0x00,0xe5 +,0x2a,0x7d,0xfb,0x4b,0x3d,0x33,0x05,0xd9 diff --git a/nacl/tests/onetimeauth2.c b/nacl/tests/onetimeauth2.c new file mode 100644 index 00000000..64c1a9cd --- /dev/null +++ b/nacl/tests/onetimeauth2.c @@ -0,0 +1,40 @@ +#include +#include "crypto_onetimeauth_poly1305.h" + +unsigned char rs[32] = { + 0xee,0xa6,0xa7,0x25,0x1c,0x1e,0x72,0x91 +,0x6d,0x11,0xc2,0xcb,0x21,0x4d,0x3c,0x25 +,0x25,0x39,0x12,0x1d,0x8e,0x23,0x4e,0x65 +,0x2d,0x65,0x1f,0xa4,0xc8,0xcf,0xf8,0x80 +} ; + +unsigned char c[131] = { + 0x8e,0x99,0x3b,0x9f,0x48,0x68,0x12,0x73 +,0xc2,0x96,0x50,0xba,0x32,0xfc,0x76,0xce +,0x48,0x33,0x2e,0xa7,0x16,0x4d,0x96,0xa4 +,0x47,0x6f,0xb8,0xc5,0x31,0xa1,0x18,0x6a +,0xc0,0xdf,0xc1,0x7c,0x98,0xdc,0xe8,0x7b +,0x4d,0xa7,0xf0,0x11,0xec,0x48,0xc9,0x72 +,0x71,0xd2,0xc2,0x0f,0x9b,0x92,0x8f,0xe2 +,0x27,0x0d,0x6f,0xb8,0x63,0xd5,0x17,0x38 +,0xb4,0x8e,0xee,0xe3,0x14,0xa7,0xcc,0x8a +,0xb9,0x32,0x16,0x45,0x48,0xe5,0x26,0xae +,0x90,0x22,0x43,0x68,0x51,0x7a,0xcf,0xea +,0xbd,0x6b,0xb3,0x73,0x2b,0xc0,0xe9,0xda +,0x99,0x83,0x2b,0x61,0xca,0x01,0xb6,0xde +,0x56,0x24,0x4a,0x9e,0x88,0xd5,0xf9,0xb3 +,0x79,0x73,0xf6,0x22,0xa4,0x3d,0x14,0xa6 +,0x59,0x9b,0x1f,0x65,0x4c,0xb4,0x5a,0x74 +,0xe3,0x55,0xa5 +} ; + +unsigned char a[16] = { + 0xf3,0xff,0xc7,0x70,0x3f,0x94,0x00,0xe5 +,0x2a,0x7d,0xfb,0x4b,0x3d,0x33,0x05,0xd9 +} ; + +main() +{ + printf("%d\n",crypto_onetimeauth_poly1305_verify(a,c,131,rs)); + return 0; +} diff --git a/nacl/tests/onetimeauth2.out b/nacl/tests/onetimeauth2.out new file mode 100644 index 00000000..573541ac --- /dev/null +++ b/nacl/tests/onetimeauth2.out @@ -0,0 +1 @@ +0 diff --git a/nacl/tests/onetimeauth5.cpp b/nacl/tests/onetimeauth5.cpp new file mode 100644 index 00000000..884892ac --- /dev/null +++ b/nacl/tests/onetimeauth5.cpp @@ -0,0 +1,46 @@ +#include +using std::string; +#include +#include "crypto_onetimeauth_poly1305.h" + +char rs_bytes[32] = { + 0xee,0xa6,0xa7,0x25,0x1c,0x1e,0x72,0x91 +,0x6d,0x11,0xc2,0xcb,0x21,0x4d,0x3c,0x25 +,0x25,0x39,0x12,0x1d,0x8e,0x23,0x4e,0x65 +,0x2d,0x65,0x1f,0xa4,0xc8,0xcf,0xf8,0x80 +} ; + +char c_bytes[131] = { + 0x8e,0x99,0x3b,0x9f,0x48,0x68,0x12,0x73 +,0xc2,0x96,0x50,0xba,0x32,0xfc,0x76,0xce +,0x48,0x33,0x2e,0xa7,0x16,0x4d,0x96,0xa4 +,0x47,0x6f,0xb8,0xc5,0x31,0xa1,0x18,0x6a +,0xc0,0xdf,0xc1,0x7c,0x98,0xdc,0xe8,0x7b +,0x4d,0xa7,0xf0,0x11,0xec,0x48,0xc9,0x72 +,0x71,0xd2,0xc2,0x0f,0x9b,0x92,0x8f,0xe2 +,0x27,0x0d,0x6f,0xb8,0x63,0xd5,0x17,0x38 +,0xb4,0x8e,0xee,0xe3,0x14,0xa7,0xcc,0x8a +,0xb9,0x32,0x16,0x45,0x48,0xe5,0x26,0xae +,0x90,0x22,0x43,0x68,0x51,0x7a,0xcf,0xea +,0xbd,0x6b,0xb3,0x73,0x2b,0xc0,0xe9,0xda +,0x99,0x83,0x2b,0x61,0xca,0x01,0xb6,0xde +,0x56,0x24,0x4a,0x9e,0x88,0xd5,0xf9,0xb3 +,0x79,0x73,0xf6,0x22,0xa4,0x3d,0x14,0xa6 +,0x59,0x9b,0x1f,0x65,0x4c,0xb4,0x5a,0x74 +,0xe3,0x55,0xa5 +} ; + +unsigned char a[16]; + +main() +{ + int i; + string c(c_bytes,sizeof c_bytes); + string rs(rs_bytes,sizeof rs_bytes); + string a = crypto_onetimeauth_poly1305(c,rs); + for (i = 0;i < a.size();++i) { + printf(",0x%02x",(unsigned int) (unsigned char) a[i]); + if (i % 8 == 7) printf("\n"); + } + return 0; +} diff --git a/nacl/tests/onetimeauth5.out b/nacl/tests/onetimeauth5.out new file mode 100644 index 00000000..6d914615 --- /dev/null +++ b/nacl/tests/onetimeauth5.out @@ -0,0 +1,2 @@ +,0xf3,0xff,0xc7,0x70,0x3f,0x94,0x00,0xe5 +,0x2a,0x7d,0xfb,0x4b,0x3d,0x33,0x05,0xd9 diff --git a/nacl/tests/onetimeauth6.cpp b/nacl/tests/onetimeauth6.cpp new file mode 100644 index 00000000..d79d4613 --- /dev/null +++ b/nacl/tests/onetimeauth6.cpp @@ -0,0 +1,50 @@ +#include +using std::string; +#include +#include "crypto_onetimeauth_poly1305.h" + +char rs_bytes[32] = { + 0xee,0xa6,0xa7,0x25,0x1c,0x1e,0x72,0x91 +,0x6d,0x11,0xc2,0xcb,0x21,0x4d,0x3c,0x25 +,0x25,0x39,0x12,0x1d,0x8e,0x23,0x4e,0x65 +,0x2d,0x65,0x1f,0xa4,0xc8,0xcf,0xf8,0x80 +} ; + +char c_bytes[131] = { + 0x8e,0x99,0x3b,0x9f,0x48,0x68,0x12,0x73 +,0xc2,0x96,0x50,0xba,0x32,0xfc,0x76,0xce +,0x48,0x33,0x2e,0xa7,0x16,0x4d,0x96,0xa4 +,0x47,0x6f,0xb8,0xc5,0x31,0xa1,0x18,0x6a +,0xc0,0xdf,0xc1,0x7c,0x98,0xdc,0xe8,0x7b +,0x4d,0xa7,0xf0,0x11,0xec,0x48,0xc9,0x72 +,0x71,0xd2,0xc2,0x0f,0x9b,0x92,0x8f,0xe2 +,0x27,0x0d,0x6f,0xb8,0x63,0xd5,0x17,0x38 +,0xb4,0x8e,0xee,0xe3,0x14,0xa7,0xcc,0x8a +,0xb9,0x32,0x16,0x45,0x48,0xe5,0x26,0xae +,0x90,0x22,0x43,0x68,0x51,0x7a,0xcf,0xea +,0xbd,0x6b,0xb3,0x73,0x2b,0xc0,0xe9,0xda +,0x99,0x83,0x2b,0x61,0xca,0x01,0xb6,0xde +,0x56,0x24,0x4a,0x9e,0x88,0xd5,0xf9,0xb3 +,0x79,0x73,0xf6,0x22,0xa4,0x3d,0x14,0xa6 +,0x59,0x9b,0x1f,0x65,0x4c,0xb4,0x5a,0x74 +,0xe3,0x55,0xa5 +} ; + +char a_bytes[16] = { + 0xf3,0xff,0xc7,0x70,0x3f,0x94,0x00,0xe5 +,0x2a,0x7d,0xfb,0x4b,0x3d,0x33,0x05,0xd9 +} ; + +main() +{ + string rs(rs_bytes,sizeof rs_bytes); + string c(c_bytes,sizeof c_bytes); + string a(a_bytes,sizeof a_bytes); + try { + crypto_onetimeauth_poly1305_verify(a,c,rs); + printf("0\n"); + } catch(const char *s) { + printf("%s\n",s); + } + return 0; +} diff --git a/nacl/tests/onetimeauth6.out b/nacl/tests/onetimeauth6.out new file mode 100644 index 00000000..573541ac --- /dev/null +++ b/nacl/tests/onetimeauth6.out @@ -0,0 +1 @@ +0 diff --git a/nacl/tests/onetimeauth7.c b/nacl/tests/onetimeauth7.c new file mode 100644 index 00000000..349b8751 --- /dev/null +++ b/nacl/tests/onetimeauth7.c @@ -0,0 +1,36 @@ +#include +#include +#include "crypto_onetimeauth_poly1305.h" +#include "randombytes.h" + +unsigned char key[32]; +unsigned char c[10000]; +unsigned char a[16]; + +main() +{ + int clen; + int i; + for (clen = 0;clen < 10000;++clen) { + randombytes(key,sizeof key); + randombytes(c,clen); + crypto_onetimeauth_poly1305(a,c,clen,key); + if (crypto_onetimeauth_poly1305_verify(a,c,clen,key) != 0) { + printf("fail %d\n",clen); + return 100; + } + if (clen > 0) { + c[random() % clen] += 1 + (random() % 255); + if (crypto_onetimeauth_poly1305_verify(a,c,clen,key) == 0) { + printf("forgery %d\n",clen); + return 100; + } + a[random() % sizeof a] += 1 + (random() % 255); + if (crypto_onetimeauth_poly1305_verify(a,c,clen,key) == 0) { + printf("forgery %d\n",clen); + return 100; + } + } + } + return 0; +} diff --git a/nacl/tests/onetimeauth7.out b/nacl/tests/onetimeauth7.out new file mode 100644 index 00000000..e69de29b diff --git a/nacl/tests/onetimeauth8.cpp b/nacl/tests/onetimeauth8.cpp new file mode 100644 index 00000000..ce554fb4 --- /dev/null +++ b/nacl/tests/onetimeauth8.cpp @@ -0,0 +1,46 @@ +#include +using std::string; +#include +#include +#include "crypto_onetimeauth_poly1305.h" +#include "randombytes.h" + +main() +{ + int clen; + int i; + for (clen = 0;clen < 10000;++clen) { + unsigned char key_bytes[32]; + randombytes(key_bytes,sizeof key_bytes); + string key((char *) key_bytes,sizeof key_bytes); + unsigned char c_bytes[clen]; + randombytes(c_bytes,sizeof c_bytes); + string c((char *) c_bytes,sizeof c_bytes); + string a = crypto_onetimeauth_poly1305(c,key); + try { + crypto_onetimeauth_poly1305_verify(a,c,key); + } catch(const char *s) { + printf("fail %d %s\n",clen,s); + return 100; + } + if (clen > 0) { + size_t pos = random() % clen; + c.replace(pos,1,1,c[pos] + 1 + (random() % 255)); + try { + crypto_onetimeauth_poly1305_verify(a,c,key); + printf("forgery %d\n",clen); + } catch(const char *s) { + ; + } + pos = random() % a.size(); + a.replace(pos,1,1,a[pos] + 1 + (random() % 255)); + try { + crypto_onetimeauth_poly1305_verify(a,c,key); + printf("forgery %d\n",clen); + } catch(const char *s) { + ; + } + } + } + return 0; +} diff --git a/nacl/tests/onetimeauth8.out b/nacl/tests/onetimeauth8.out new file mode 100644 index 00000000..e69de29b diff --git a/nacl/tests/scalarmult.c b/nacl/tests/scalarmult.c new file mode 100644 index 00000000..d9265954 --- /dev/null +++ b/nacl/tests/scalarmult.c @@ -0,0 +1,23 @@ +#include +#include "crypto_scalarmult_curve25519.h" + +unsigned char alicesk[32] = { + 0x77,0x07,0x6d,0x0a,0x73,0x18,0xa5,0x7d +,0x3c,0x16,0xc1,0x72,0x51,0xb2,0x66,0x45 +,0xdf,0x4c,0x2f,0x87,0xeb,0xc0,0x99,0x2a +,0xb1,0x77,0xfb,0xa5,0x1d,0xb9,0x2c,0x2a +} ; + +unsigned char alicepk[32]; + +main() +{ + int i; + crypto_scalarmult_curve25519_base(alicepk,alicesk); + for (i = 0;i < 32;++i) { + if (i > 0) printf(","); else printf(" "); + printf("0x%02x",(unsigned int) alicepk[i]); + if (i % 8 == 7) printf("\n"); + } + return 0; +} diff --git a/nacl/tests/scalarmult.out b/nacl/tests/scalarmult.out new file mode 100644 index 00000000..ddd130d6 --- /dev/null +++ b/nacl/tests/scalarmult.out @@ -0,0 +1,4 @@ + 0x85,0x20,0xf0,0x09,0x89,0x30,0xa7,0x54 +,0x74,0x8b,0x7d,0xdc,0xb4,0x3e,0xf7,0x5a +,0x0d,0xbf,0x3a,0x0d,0x26,0x38,0x1a,0xf4 +,0xeb,0xa4,0xa9,0x8e,0xaa,0x9b,0x4e,0x6a diff --git a/nacl/tests/scalarmult2.c b/nacl/tests/scalarmult2.c new file mode 100644 index 00000000..90e6360d --- /dev/null +++ b/nacl/tests/scalarmult2.c @@ -0,0 +1,23 @@ +#include +#include "crypto_scalarmult_curve25519.h" + +unsigned char bobsk[32] = { + 0x5d,0xab,0x08,0x7e,0x62,0x4a,0x8a,0x4b +,0x79,0xe1,0x7f,0x8b,0x83,0x80,0x0e,0xe6 +,0x6f,0x3b,0xb1,0x29,0x26,0x18,0xb6,0xfd +,0x1c,0x2f,0x8b,0x27,0xff,0x88,0xe0,0xeb +} ; + +unsigned char bobpk[32]; + +main() +{ + int i; + crypto_scalarmult_curve25519_base(bobpk,bobsk); + for (i = 0;i < 32;++i) { + if (i > 0) printf(","); else printf(" "); + printf("0x%02x",(unsigned int) bobpk[i]); + if (i % 8 == 7) printf("\n"); + } + return 0; +} diff --git a/nacl/tests/scalarmult2.out b/nacl/tests/scalarmult2.out new file mode 100644 index 00000000..b5391865 --- /dev/null +++ b/nacl/tests/scalarmult2.out @@ -0,0 +1,4 @@ + 0xde,0x9e,0xdb,0x7d,0x7b,0x7d,0xc1,0xb4 +,0xd3,0x5b,0x61,0xc2,0xec,0xe4,0x35,0x37 +,0x3f,0x83,0x43,0xc8,0x5b,0x78,0x67,0x4d +,0xad,0xfc,0x7e,0x14,0x6f,0x88,0x2b,0x4f diff --git a/nacl/tests/scalarmult3.cpp b/nacl/tests/scalarmult3.cpp new file mode 100644 index 00000000..4e8fef3d --- /dev/null +++ b/nacl/tests/scalarmult3.cpp @@ -0,0 +1,31 @@ +#include +#include +#include +using std::string; +using std::cout; +using std::setfill; +using std::setw; +using std::hex; +#include "crypto_scalarmult_curve25519.h" + +char alicesk_bytes[32] = { + 0x77,0x07,0x6d,0x0a,0x73,0x18,0xa5,0x7d +,0x3c,0x16,0xc1,0x72,0x51,0xb2,0x66,0x45 +,0xdf,0x4c,0x2f,0x87,0xeb,0xc0,0x99,0x2a +,0xb1,0x77,0xfb,0xa5,0x1d,0xb9,0x2c,0x2a +} ; + +main() +{ + int i; + cout << setfill('0'); + string alicesk(alicesk_bytes,sizeof alicesk_bytes); + string alicepk = crypto_scalarmult_curve25519_base(alicesk); + for (i = 0;i < alicepk.size();++i) { + unsigned char c = alicepk[i]; + if (i > 0) cout << ","; else cout << " "; + cout << "0x" << hex << setw(2) << (unsigned int) c; + if (i % 8 == 7) cout << "\n"; + } + return 0; +} diff --git a/nacl/tests/scalarmult3.out b/nacl/tests/scalarmult3.out new file mode 100644 index 00000000..ddd130d6 --- /dev/null +++ b/nacl/tests/scalarmult3.out @@ -0,0 +1,4 @@ + 0x85,0x20,0xf0,0x09,0x89,0x30,0xa7,0x54 +,0x74,0x8b,0x7d,0xdc,0xb4,0x3e,0xf7,0x5a +,0x0d,0xbf,0x3a,0x0d,0x26,0x38,0x1a,0xf4 +,0xeb,0xa4,0xa9,0x8e,0xaa,0x9b,0x4e,0x6a diff --git a/nacl/tests/scalarmult4.cpp b/nacl/tests/scalarmult4.cpp new file mode 100644 index 00000000..8e4d64e9 --- /dev/null +++ b/nacl/tests/scalarmult4.cpp @@ -0,0 +1,31 @@ +#include +#include +#include +using std::string; +using std::cout; +using std::setfill; +using std::setw; +using std::hex; +#include "crypto_scalarmult_curve25519.h" + +char bobsk_bytes[32] = { + 0x5d,0xab,0x08,0x7e,0x62,0x4a,0x8a,0x4b +,0x79,0xe1,0x7f,0x8b,0x83,0x80,0x0e,0xe6 +,0x6f,0x3b,0xb1,0x29,0x26,0x18,0xb6,0xfd +,0x1c,0x2f,0x8b,0x27,0xff,0x88,0xe0,0xeb +} ; + +main() +{ + int i; + cout << setfill('0'); + string bobsk(bobsk_bytes,sizeof bobsk_bytes); + string bobpk = crypto_scalarmult_curve25519_base(bobsk); + for (i = 0;i < bobpk.size();++i) { + unsigned char c = bobpk[i]; + if (i > 0) cout << ","; else cout << " "; + cout << "0x" << hex << setw(2) << (unsigned int) c; + if (i % 8 == 7) cout << "\n"; + } + return 0; +} diff --git a/nacl/tests/scalarmult4.out b/nacl/tests/scalarmult4.out new file mode 100644 index 00000000..b5391865 --- /dev/null +++ b/nacl/tests/scalarmult4.out @@ -0,0 +1,4 @@ + 0xde,0x9e,0xdb,0x7d,0x7b,0x7d,0xc1,0xb4 +,0xd3,0x5b,0x61,0xc2,0xec,0xe4,0x35,0x37 +,0x3f,0x83,0x43,0xc8,0x5b,0x78,0x67,0x4d +,0xad,0xfc,0x7e,0x14,0x6f,0x88,0x2b,0x4f diff --git a/nacl/tests/scalarmult5.c b/nacl/tests/scalarmult5.c new file mode 100644 index 00000000..14f8159d --- /dev/null +++ b/nacl/tests/scalarmult5.c @@ -0,0 +1,30 @@ +#include +#include "crypto_scalarmult_curve25519.h" + +unsigned char alicesk[32] = { + 0x77,0x07,0x6d,0x0a,0x73,0x18,0xa5,0x7d +,0x3c,0x16,0xc1,0x72,0x51,0xb2,0x66,0x45 +,0xdf,0x4c,0x2f,0x87,0xeb,0xc0,0x99,0x2a +,0xb1,0x77,0xfb,0xa5,0x1d,0xb9,0x2c,0x2a +} ; + +unsigned char bobpk[32] = { + 0xde,0x9e,0xdb,0x7d,0x7b,0x7d,0xc1,0xb4 +,0xd3,0x5b,0x61,0xc2,0xec,0xe4,0x35,0x37 +,0x3f,0x83,0x43,0xc8,0x5b,0x78,0x67,0x4d +,0xad,0xfc,0x7e,0x14,0x6f,0x88,0x2b,0x4f +} ; + +unsigned char k[32]; + +main() +{ + int i; + crypto_scalarmult_curve25519(k,alicesk,bobpk); + for (i = 0;i < 32;++i) { + if (i > 0) printf(","); else printf(" "); + printf("0x%02x",(unsigned int) k[i]); + if (i % 8 == 7) printf("\n"); + } + return 0; +} diff --git a/nacl/tests/scalarmult5.out b/nacl/tests/scalarmult5.out new file mode 100644 index 00000000..bec21130 --- /dev/null +++ b/nacl/tests/scalarmult5.out @@ -0,0 +1,4 @@ + 0x4a,0x5d,0x9d,0x5b,0xa4,0xce,0x2d,0xe1 +,0x72,0x8e,0x3b,0xf4,0x80,0x35,0x0f,0x25 +,0xe0,0x7e,0x21,0xc9,0x47,0xd1,0x9e,0x33 +,0x76,0xf0,0x9b,0x3c,0x1e,0x16,0x17,0x42 diff --git a/nacl/tests/scalarmult6.c b/nacl/tests/scalarmult6.c new file mode 100644 index 00000000..89bf9bdd --- /dev/null +++ b/nacl/tests/scalarmult6.c @@ -0,0 +1,30 @@ +#include +#include "crypto_scalarmult_curve25519.h" + +unsigned char bobsk[32] = { + 0x5d,0xab,0x08,0x7e,0x62,0x4a,0x8a,0x4b +,0x79,0xe1,0x7f,0x8b,0x83,0x80,0x0e,0xe6 +,0x6f,0x3b,0xb1,0x29,0x26,0x18,0xb6,0xfd +,0x1c,0x2f,0x8b,0x27,0xff,0x88,0xe0,0xeb +} ; + +unsigned char alicepk[32] = { + 0x85,0x20,0xf0,0x09,0x89,0x30,0xa7,0x54 +,0x74,0x8b,0x7d,0xdc,0xb4,0x3e,0xf7,0x5a +,0x0d,0xbf,0x3a,0x0d,0x26,0x38,0x1a,0xf4 +,0xeb,0xa4,0xa9,0x8e,0xaa,0x9b,0x4e,0x6a +} ; + +unsigned char k[32]; + +main() +{ + int i; + crypto_scalarmult_curve25519(k,bobsk,alicepk); + for (i = 0;i < 32;++i) { + if (i > 0) printf(","); else printf(" "); + printf("0x%02x",(unsigned int) k[i]); + if (i % 8 == 7) printf("\n"); + } + return 0; +} diff --git a/nacl/tests/scalarmult6.out b/nacl/tests/scalarmult6.out new file mode 100644 index 00000000..bec21130 --- /dev/null +++ b/nacl/tests/scalarmult6.out @@ -0,0 +1,4 @@ + 0x4a,0x5d,0x9d,0x5b,0xa4,0xce,0x2d,0xe1 +,0x72,0x8e,0x3b,0xf4,0x80,0x35,0x0f,0x25 +,0xe0,0x7e,0x21,0xc9,0x47,0xd1,0x9e,0x33 +,0x76,0xf0,0x9b,0x3c,0x1e,0x16,0x17,0x42 diff --git a/nacl/tests/scalarmult7.cpp b/nacl/tests/scalarmult7.cpp new file mode 100644 index 00000000..8382d747 --- /dev/null +++ b/nacl/tests/scalarmult7.cpp @@ -0,0 +1,32 @@ +#include +using std::string; +#include +#include "crypto_scalarmult_curve25519.h" + +char alicesk_bytes[32] = { + 0x77,0x07,0x6d,0x0a,0x73,0x18,0xa5,0x7d +,0x3c,0x16,0xc1,0x72,0x51,0xb2,0x66,0x45 +,0xdf,0x4c,0x2f,0x87,0xeb,0xc0,0x99,0x2a +,0xb1,0x77,0xfb,0xa5,0x1d,0xb9,0x2c,0x2a +} ; + +char bobpk_bytes[32] = { + 0xde,0x9e,0xdb,0x7d,0x7b,0x7d,0xc1,0xb4 +,0xd3,0x5b,0x61,0xc2,0xec,0xe4,0x35,0x37 +,0x3f,0x83,0x43,0xc8,0x5b,0x78,0x67,0x4d +,0xad,0xfc,0x7e,0x14,0x6f,0x88,0x2b,0x4f +} ; + +main() +{ + int i; + string alicesk(alicesk_bytes,sizeof alicesk_bytes); + string bobpk(bobpk_bytes,sizeof bobpk_bytes); + string k = crypto_scalarmult_curve25519(alicesk,bobpk); + for (i = 0;i < k.size();++i) { + if (i > 0) printf(","); else printf(" "); + printf("0x%02x",(unsigned int) (unsigned char) k[i]); + if (i % 8 == 7) printf("\n"); + } + return 0; +} diff --git a/nacl/tests/scalarmult7.out b/nacl/tests/scalarmult7.out new file mode 100644 index 00000000..bec21130 --- /dev/null +++ b/nacl/tests/scalarmult7.out @@ -0,0 +1,4 @@ + 0x4a,0x5d,0x9d,0x5b,0xa4,0xce,0x2d,0xe1 +,0x72,0x8e,0x3b,0xf4,0x80,0x35,0x0f,0x25 +,0xe0,0x7e,0x21,0xc9,0x47,0xd1,0x9e,0x33 +,0x76,0xf0,0x9b,0x3c,0x1e,0x16,0x17,0x42 diff --git a/nacl/tests/secretbox.c b/nacl/tests/secretbox.c new file mode 100644 index 00000000..773f5b62 --- /dev/null +++ b/nacl/tests/secretbox.c @@ -0,0 +1,56 @@ +#include +#include "crypto_secretbox_xsalsa20poly1305.h" + +unsigned char firstkey[32] = { + 0x1b,0x27,0x55,0x64,0x73,0xe9,0x85,0xd4 +,0x62,0xcd,0x51,0x19,0x7a,0x9a,0x46,0xc7 +,0x60,0x09,0x54,0x9e,0xac,0x64,0x74,0xf2 +,0x06,0xc4,0xee,0x08,0x44,0xf6,0x83,0x89 +} ; + +unsigned char nonce[24] = { + 0x69,0x69,0x6e,0xe9,0x55,0xb6,0x2b,0x73 +,0xcd,0x62,0xbd,0xa8,0x75,0xfc,0x73,0xd6 +,0x82,0x19,0xe0,0x03,0x6b,0x7a,0x0b,0x37 +} ; + +// API requires first 32 bytes to be 0 +unsigned char m[163] = { + 0, 0, 0, 0, 0, 0, 0, 0 +, 0, 0, 0, 0, 0, 0, 0, 0 +, 0, 0, 0, 0, 0, 0, 0, 0 +, 0, 0, 0, 0, 0, 0, 0, 0 +,0xbe,0x07,0x5f,0xc5,0x3c,0x81,0xf2,0xd5 +,0xcf,0x14,0x13,0x16,0xeb,0xeb,0x0c,0x7b +,0x52,0x28,0xc5,0x2a,0x4c,0x62,0xcb,0xd4 +,0x4b,0x66,0x84,0x9b,0x64,0x24,0x4f,0xfc +,0xe5,0xec,0xba,0xaf,0x33,0xbd,0x75,0x1a +,0x1a,0xc7,0x28,0xd4,0x5e,0x6c,0x61,0x29 +,0x6c,0xdc,0x3c,0x01,0x23,0x35,0x61,0xf4 +,0x1d,0xb6,0x6c,0xce,0x31,0x4a,0xdb,0x31 +,0x0e,0x3b,0xe8,0x25,0x0c,0x46,0xf0,0x6d +,0xce,0xea,0x3a,0x7f,0xa1,0x34,0x80,0x57 +,0xe2,0xf6,0x55,0x6a,0xd6,0xb1,0x31,0x8a +,0x02,0x4a,0x83,0x8f,0x21,0xaf,0x1f,0xde +,0x04,0x89,0x77,0xeb,0x48,0xf5,0x9f,0xfd +,0x49,0x24,0xca,0x1c,0x60,0x90,0x2e,0x52 +,0xf0,0xa0,0x89,0xbc,0x76,0x89,0x70,0x40 +,0xe0,0x82,0xf9,0x37,0x76,0x38,0x48,0x64 +,0x5e,0x07,0x05 +} ; + +unsigned char c[163]; + +main() +{ + int i; + crypto_secretbox_xsalsa20poly1305( + c,m,163,nonce,firstkey + ); + for (i = 16;i < 163;++i) { + printf(",0x%02x",(unsigned int) c[i]); + if (i % 8 == 7) printf("\n"); + } + printf("\n"); + return 0; +} diff --git a/nacl/tests/secretbox.out b/nacl/tests/secretbox.out new file mode 100644 index 00000000..2b6c51ea --- /dev/null +++ b/nacl/tests/secretbox.out @@ -0,0 +1,19 @@ +,0xf3,0xff,0xc7,0x70,0x3f,0x94,0x00,0xe5 +,0x2a,0x7d,0xfb,0x4b,0x3d,0x33,0x05,0xd9 +,0x8e,0x99,0x3b,0x9f,0x48,0x68,0x12,0x73 +,0xc2,0x96,0x50,0xba,0x32,0xfc,0x76,0xce +,0x48,0x33,0x2e,0xa7,0x16,0x4d,0x96,0xa4 +,0x47,0x6f,0xb8,0xc5,0x31,0xa1,0x18,0x6a +,0xc0,0xdf,0xc1,0x7c,0x98,0xdc,0xe8,0x7b +,0x4d,0xa7,0xf0,0x11,0xec,0x48,0xc9,0x72 +,0x71,0xd2,0xc2,0x0f,0x9b,0x92,0x8f,0xe2 +,0x27,0x0d,0x6f,0xb8,0x63,0xd5,0x17,0x38 +,0xb4,0x8e,0xee,0xe3,0x14,0xa7,0xcc,0x8a +,0xb9,0x32,0x16,0x45,0x48,0xe5,0x26,0xae +,0x90,0x22,0x43,0x68,0x51,0x7a,0xcf,0xea +,0xbd,0x6b,0xb3,0x73,0x2b,0xc0,0xe9,0xda +,0x99,0x83,0x2b,0x61,0xca,0x01,0xb6,0xde +,0x56,0x24,0x4a,0x9e,0x88,0xd5,0xf9,0xb3 +,0x79,0x73,0xf6,0x22,0xa4,0x3d,0x14,0xa6 +,0x59,0x9b,0x1f,0x65,0x4c,0xb4,0x5a,0x74 +,0xe3,0x55,0xa5 diff --git a/nacl/tests/secretbox2.c b/nacl/tests/secretbox2.c new file mode 100644 index 00000000..b6a2a937 --- /dev/null +++ b/nacl/tests/secretbox2.c @@ -0,0 +1,57 @@ +#include +#include "crypto_secretbox_xsalsa20poly1305.h" + +unsigned char firstkey[32] = { + 0x1b,0x27,0x55,0x64,0x73,0xe9,0x85,0xd4 +,0x62,0xcd,0x51,0x19,0x7a,0x9a,0x46,0xc7 +,0x60,0x09,0x54,0x9e,0xac,0x64,0x74,0xf2 +,0x06,0xc4,0xee,0x08,0x44,0xf6,0x83,0x89 +} ; + +unsigned char nonce[24] = { + 0x69,0x69,0x6e,0xe9,0x55,0xb6,0x2b,0x73 +,0xcd,0x62,0xbd,0xa8,0x75,0xfc,0x73,0xd6 +,0x82,0x19,0xe0,0x03,0x6b,0x7a,0x0b,0x37 +} ; + +// API requires first 16 bytes to be 0 +unsigned char c[163] = { + 0, 0, 0, 0, 0, 0, 0, 0 +, 0, 0, 0, 0, 0, 0, 0, 0 +,0xf3,0xff,0xc7,0x70,0x3f,0x94,0x00,0xe5 +,0x2a,0x7d,0xfb,0x4b,0x3d,0x33,0x05,0xd9 +,0x8e,0x99,0x3b,0x9f,0x48,0x68,0x12,0x73 +,0xc2,0x96,0x50,0xba,0x32,0xfc,0x76,0xce +,0x48,0x33,0x2e,0xa7,0x16,0x4d,0x96,0xa4 +,0x47,0x6f,0xb8,0xc5,0x31,0xa1,0x18,0x6a +,0xc0,0xdf,0xc1,0x7c,0x98,0xdc,0xe8,0x7b +,0x4d,0xa7,0xf0,0x11,0xec,0x48,0xc9,0x72 +,0x71,0xd2,0xc2,0x0f,0x9b,0x92,0x8f,0xe2 +,0x27,0x0d,0x6f,0xb8,0x63,0xd5,0x17,0x38 +,0xb4,0x8e,0xee,0xe3,0x14,0xa7,0xcc,0x8a +,0xb9,0x32,0x16,0x45,0x48,0xe5,0x26,0xae +,0x90,0x22,0x43,0x68,0x51,0x7a,0xcf,0xea +,0xbd,0x6b,0xb3,0x73,0x2b,0xc0,0xe9,0xda +,0x99,0x83,0x2b,0x61,0xca,0x01,0xb6,0xde +,0x56,0x24,0x4a,0x9e,0x88,0xd5,0xf9,0xb3 +,0x79,0x73,0xf6,0x22,0xa4,0x3d,0x14,0xa6 +,0x59,0x9b,0x1f,0x65,0x4c,0xb4,0x5a,0x74 +,0xe3,0x55,0xa5 +} ; + +unsigned char m[163]; + +main() +{ + int i; + if (crypto_secretbox_xsalsa20poly1305_open( + m,c,163,nonce,firstkey + ) == 0) { + for (i = 32;i < 163;++i) { + printf(",0x%02x",(unsigned int) m[i]); + if (i % 8 == 7) printf("\n"); + } + printf("\n"); + } + return 0; +} diff --git a/nacl/tests/secretbox2.out b/nacl/tests/secretbox2.out new file mode 100644 index 00000000..c61d4557 --- /dev/null +++ b/nacl/tests/secretbox2.out @@ -0,0 +1,17 @@ +,0xbe,0x07,0x5f,0xc5,0x3c,0x81,0xf2,0xd5 +,0xcf,0x14,0x13,0x16,0xeb,0xeb,0x0c,0x7b +,0x52,0x28,0xc5,0x2a,0x4c,0x62,0xcb,0xd4 +,0x4b,0x66,0x84,0x9b,0x64,0x24,0x4f,0xfc +,0xe5,0xec,0xba,0xaf,0x33,0xbd,0x75,0x1a +,0x1a,0xc7,0x28,0xd4,0x5e,0x6c,0x61,0x29 +,0x6c,0xdc,0x3c,0x01,0x23,0x35,0x61,0xf4 +,0x1d,0xb6,0x6c,0xce,0x31,0x4a,0xdb,0x31 +,0x0e,0x3b,0xe8,0x25,0x0c,0x46,0xf0,0x6d +,0xce,0xea,0x3a,0x7f,0xa1,0x34,0x80,0x57 +,0xe2,0xf6,0x55,0x6a,0xd6,0xb1,0x31,0x8a +,0x02,0x4a,0x83,0x8f,0x21,0xaf,0x1f,0xde +,0x04,0x89,0x77,0xeb,0x48,0xf5,0x9f,0xfd +,0x49,0x24,0xca,0x1c,0x60,0x90,0x2e,0x52 +,0xf0,0xa0,0x89,0xbc,0x76,0x89,0x70,0x40 +,0xe0,0x82,0xf9,0x37,0x76,0x38,0x48,0x64 +,0x5e,0x07,0x05 diff --git a/nacl/tests/secretbox3.cpp b/nacl/tests/secretbox3.cpp new file mode 100644 index 00000000..39ca7c53 --- /dev/null +++ b/nacl/tests/secretbox3.cpp @@ -0,0 +1,52 @@ +#include +using std::string; +#include +#include "crypto_secretbox_xsalsa20poly1305.h" + +char firstkey_bytes[32] = { + 0x1b,0x27,0x55,0x64,0x73,0xe9,0x85,0xd4 +,0x62,0xcd,0x51,0x19,0x7a,0x9a,0x46,0xc7 +,0x60,0x09,0x54,0x9e,0xac,0x64,0x74,0xf2 +,0x06,0xc4,0xee,0x08,0x44,0xf6,0x83,0x89 +} ; + +char nonce_bytes[24] = { + 0x69,0x69,0x6e,0xe9,0x55,0xb6,0x2b,0x73 +,0xcd,0x62,0xbd,0xa8,0x75,0xfc,0x73,0xd6 +,0x82,0x19,0xe0,0x03,0x6b,0x7a,0x0b,0x37 +} ; + +char m_bytes[131] = { + 0xbe,0x07,0x5f,0xc5,0x3c,0x81,0xf2,0xd5 +,0xcf,0x14,0x13,0x16,0xeb,0xeb,0x0c,0x7b +,0x52,0x28,0xc5,0x2a,0x4c,0x62,0xcb,0xd4 +,0x4b,0x66,0x84,0x9b,0x64,0x24,0x4f,0xfc +,0xe5,0xec,0xba,0xaf,0x33,0xbd,0x75,0x1a +,0x1a,0xc7,0x28,0xd4,0x5e,0x6c,0x61,0x29 +,0x6c,0xdc,0x3c,0x01,0x23,0x35,0x61,0xf4 +,0x1d,0xb6,0x6c,0xce,0x31,0x4a,0xdb,0x31 +,0x0e,0x3b,0xe8,0x25,0x0c,0x46,0xf0,0x6d +,0xce,0xea,0x3a,0x7f,0xa1,0x34,0x80,0x57 +,0xe2,0xf6,0x55,0x6a,0xd6,0xb1,0x31,0x8a +,0x02,0x4a,0x83,0x8f,0x21,0xaf,0x1f,0xde +,0x04,0x89,0x77,0xeb,0x48,0xf5,0x9f,0xfd +,0x49,0x24,0xca,0x1c,0x60,0x90,0x2e,0x52 +,0xf0,0xa0,0x89,0xbc,0x76,0x89,0x70,0x40 +,0xe0,0x82,0xf9,0x37,0x76,0x38,0x48,0x64 +,0x5e,0x07,0x05 +} ; + +main() +{ + int i; + string m(m_bytes,sizeof m_bytes); + string nonce(nonce_bytes,sizeof nonce_bytes); + string firstkey(firstkey_bytes,sizeof firstkey_bytes); + string c = crypto_secretbox_xsalsa20poly1305(m,nonce,firstkey); + for (i = 0;i < c.size();++i) { + printf(",0x%02x",(unsigned int) (unsigned char) c[i]); + if (i % 8 == 7) printf("\n"); + } + printf("\n"); + return 0; +} diff --git a/nacl/tests/secretbox3.out b/nacl/tests/secretbox3.out new file mode 100644 index 00000000..2b6c51ea --- /dev/null +++ b/nacl/tests/secretbox3.out @@ -0,0 +1,19 @@ +,0xf3,0xff,0xc7,0x70,0x3f,0x94,0x00,0xe5 +,0x2a,0x7d,0xfb,0x4b,0x3d,0x33,0x05,0xd9 +,0x8e,0x99,0x3b,0x9f,0x48,0x68,0x12,0x73 +,0xc2,0x96,0x50,0xba,0x32,0xfc,0x76,0xce +,0x48,0x33,0x2e,0xa7,0x16,0x4d,0x96,0xa4 +,0x47,0x6f,0xb8,0xc5,0x31,0xa1,0x18,0x6a +,0xc0,0xdf,0xc1,0x7c,0x98,0xdc,0xe8,0x7b +,0x4d,0xa7,0xf0,0x11,0xec,0x48,0xc9,0x72 +,0x71,0xd2,0xc2,0x0f,0x9b,0x92,0x8f,0xe2 +,0x27,0x0d,0x6f,0xb8,0x63,0xd5,0x17,0x38 +,0xb4,0x8e,0xee,0xe3,0x14,0xa7,0xcc,0x8a +,0xb9,0x32,0x16,0x45,0x48,0xe5,0x26,0xae +,0x90,0x22,0x43,0x68,0x51,0x7a,0xcf,0xea +,0xbd,0x6b,0xb3,0x73,0x2b,0xc0,0xe9,0xda +,0x99,0x83,0x2b,0x61,0xca,0x01,0xb6,0xde +,0x56,0x24,0x4a,0x9e,0x88,0xd5,0xf9,0xb3 +,0x79,0x73,0xf6,0x22,0xa4,0x3d,0x14,0xa6 +,0x59,0x9b,0x1f,0x65,0x4c,0xb4,0x5a,0x74 +,0xe3,0x55,0xa5 diff --git a/nacl/tests/secretbox4.cpp b/nacl/tests/secretbox4.cpp new file mode 100644 index 00000000..416e4d9e --- /dev/null +++ b/nacl/tests/secretbox4.cpp @@ -0,0 +1,54 @@ +#include +using std::string; +#include +#include "crypto_secretbox_xsalsa20poly1305.h" + +char firstkey_bytes[32] = { + 0x1b,0x27,0x55,0x64,0x73,0xe9,0x85,0xd4 +,0x62,0xcd,0x51,0x19,0x7a,0x9a,0x46,0xc7 +,0x60,0x09,0x54,0x9e,0xac,0x64,0x74,0xf2 +,0x06,0xc4,0xee,0x08,0x44,0xf6,0x83,0x89 +} ; + +char nonce_bytes[24] = { + 0x69,0x69,0x6e,0xe9,0x55,0xb6,0x2b,0x73 +,0xcd,0x62,0xbd,0xa8,0x75,0xfc,0x73,0xd6 +,0x82,0x19,0xe0,0x03,0x6b,0x7a,0x0b,0x37 +} ; + +char c_bytes[147] = { + 0xf3,0xff,0xc7,0x70,0x3f,0x94,0x00,0xe5 +,0x2a,0x7d,0xfb,0x4b,0x3d,0x33,0x05,0xd9 +,0x8e,0x99,0x3b,0x9f,0x48,0x68,0x12,0x73 +,0xc2,0x96,0x50,0xba,0x32,0xfc,0x76,0xce +,0x48,0x33,0x2e,0xa7,0x16,0x4d,0x96,0xa4 +,0x47,0x6f,0xb8,0xc5,0x31,0xa1,0x18,0x6a +,0xc0,0xdf,0xc1,0x7c,0x98,0xdc,0xe8,0x7b +,0x4d,0xa7,0xf0,0x11,0xec,0x48,0xc9,0x72 +,0x71,0xd2,0xc2,0x0f,0x9b,0x92,0x8f,0xe2 +,0x27,0x0d,0x6f,0xb8,0x63,0xd5,0x17,0x38 +,0xb4,0x8e,0xee,0xe3,0x14,0xa7,0xcc,0x8a +,0xb9,0x32,0x16,0x45,0x48,0xe5,0x26,0xae +,0x90,0x22,0x43,0x68,0x51,0x7a,0xcf,0xea +,0xbd,0x6b,0xb3,0x73,0x2b,0xc0,0xe9,0xda +,0x99,0x83,0x2b,0x61,0xca,0x01,0xb6,0xde +,0x56,0x24,0x4a,0x9e,0x88,0xd5,0xf9,0xb3 +,0x79,0x73,0xf6,0x22,0xa4,0x3d,0x14,0xa6 +,0x59,0x9b,0x1f,0x65,0x4c,0xb4,0x5a,0x74 +,0xe3,0x55,0xa5 +} ; + +main() +{ + int i; + string firstkey(firstkey_bytes,sizeof firstkey_bytes); + string nonce(nonce_bytes,sizeof nonce_bytes); + string c(c_bytes,sizeof c_bytes); + string m = crypto_secretbox_xsalsa20poly1305_open(c,nonce,firstkey); + for (i = 0;i < m.size();++i) { + printf(",0x%02x",(unsigned int) (unsigned char) m[i]); + if (i % 8 == 7) printf("\n"); + } + printf("\n"); + return 0; +} diff --git a/nacl/tests/secretbox4.out b/nacl/tests/secretbox4.out new file mode 100644 index 00000000..c61d4557 --- /dev/null +++ b/nacl/tests/secretbox4.out @@ -0,0 +1,17 @@ +,0xbe,0x07,0x5f,0xc5,0x3c,0x81,0xf2,0xd5 +,0xcf,0x14,0x13,0x16,0xeb,0xeb,0x0c,0x7b +,0x52,0x28,0xc5,0x2a,0x4c,0x62,0xcb,0xd4 +,0x4b,0x66,0x84,0x9b,0x64,0x24,0x4f,0xfc +,0xe5,0xec,0xba,0xaf,0x33,0xbd,0x75,0x1a +,0x1a,0xc7,0x28,0xd4,0x5e,0x6c,0x61,0x29 +,0x6c,0xdc,0x3c,0x01,0x23,0x35,0x61,0xf4 +,0x1d,0xb6,0x6c,0xce,0x31,0x4a,0xdb,0x31 +,0x0e,0x3b,0xe8,0x25,0x0c,0x46,0xf0,0x6d +,0xce,0xea,0x3a,0x7f,0xa1,0x34,0x80,0x57 +,0xe2,0xf6,0x55,0x6a,0xd6,0xb1,0x31,0x8a +,0x02,0x4a,0x83,0x8f,0x21,0xaf,0x1f,0xde +,0x04,0x89,0x77,0xeb,0x48,0xf5,0x9f,0xfd +,0x49,0x24,0xca,0x1c,0x60,0x90,0x2e,0x52 +,0xf0,0xa0,0x89,0xbc,0x76,0x89,0x70,0x40 +,0xe0,0x82,0xf9,0x37,0x76,0x38,0x48,0x64 +,0x5e,0x07,0x05 diff --git a/nacl/tests/secretbox5.cpp b/nacl/tests/secretbox5.cpp new file mode 100644 index 00000000..e8cc0eeb --- /dev/null +++ b/nacl/tests/secretbox5.cpp @@ -0,0 +1,29 @@ +#include +using std::string; +#include +#include "crypto_secretbox.h" +#include "randombytes.h" + +main() +{ + int mlen; + for (mlen = 0;mlen < 1000;++mlen) { + unsigned char kbytes[crypto_secretbox_KEYBYTES]; + randombytes(kbytes,crypto_secretbox_KEYBYTES); + string k((char *) kbytes,crypto_secretbox_KEYBYTES); + unsigned char nbytes[crypto_secretbox_NONCEBYTES]; + randombytes(nbytes,crypto_secretbox_NONCEBYTES); + string n((char *) nbytes,crypto_secretbox_NONCEBYTES); + unsigned char mbytes[mlen]; + randombytes(mbytes,mlen); + string m((char *) mbytes,mlen); + string c = crypto_secretbox(m,n,k); + try { + string m2 = crypto_secretbox_open(c,n,k); + if (m != m2) printf("bad decryption\n"); + } catch(const char *s) { + printf("%s\n",s); + } + } + return 0; +} diff --git a/nacl/tests/secretbox5.out b/nacl/tests/secretbox5.out new file mode 100644 index 00000000..e69de29b diff --git a/nacl/tests/secretbox6.cpp b/nacl/tests/secretbox6.cpp new file mode 100644 index 00000000..e8274006 --- /dev/null +++ b/nacl/tests/secretbox6.cpp @@ -0,0 +1,42 @@ +#include +using std::string; +#include +#include +#include "crypto_secretbox.h" +#include "randombytes.h" + +main() +{ + int mlen; + for (mlen = 0;mlen < 1000;++mlen) { + unsigned char kbytes[crypto_secretbox_KEYBYTES]; + randombytes(kbytes,crypto_secretbox_KEYBYTES); + string k((char *) kbytes,crypto_secretbox_KEYBYTES); + unsigned char nbytes[crypto_secretbox_NONCEBYTES]; + randombytes(nbytes,crypto_secretbox_NONCEBYTES); + string n((char *) nbytes,crypto_secretbox_NONCEBYTES); + unsigned char mbytes[mlen]; + randombytes(mbytes,mlen); + string m((char *) mbytes,mlen); + string c = crypto_secretbox(m,n,k); + int caught = 0; + while (caught < 10) { + c.replace(random() % c.size(),1,1,random()); + try { + string m2 = crypto_secretbox_open(c,n,k); + if (m != m2) { + printf("forgery\n"); + return 100; + } + } catch(const char *s) { + if (string(s) == string("ciphertext fails verification")) + ++caught; + else { + printf("%s\n",s); + return 111; + } + } + } + } + return 0; +} diff --git a/nacl/tests/secretbox6.out b/nacl/tests/secretbox6.out new file mode 100644 index 00000000..e69de29b diff --git a/nacl/tests/secretbox7.c b/nacl/tests/secretbox7.c new file mode 100644 index 00000000..d4be9b49 --- /dev/null +++ b/nacl/tests/secretbox7.c @@ -0,0 +1,32 @@ +#include +#include "crypto_secretbox.h" +#include "randombytes.h" + +unsigned char k[crypto_secretbox_KEYBYTES]; +unsigned char n[crypto_secretbox_NONCEBYTES]; +unsigned char m[10000]; +unsigned char c[10000]; +unsigned char m2[10000]; + +main() +{ + int mlen; + int i; + + for (mlen = 0;mlen < 1000 && mlen + crypto_secretbox_ZEROBYTES < sizeof m;++mlen) { + randombytes(k,crypto_secretbox_KEYBYTES); + randombytes(n,crypto_secretbox_NONCEBYTES); + randombytes(m + crypto_secretbox_ZEROBYTES,mlen); + crypto_secretbox(c,m,mlen + crypto_secretbox_ZEROBYTES,n,k); + if (crypto_secretbox_open(m2,c,mlen + crypto_secretbox_ZEROBYTES,n,k) == 0) { + for (i = 0;i < mlen + crypto_secretbox_ZEROBYTES;++i) + if (m2[i] != m[i]) { + printf("bad decryption\n"); + break; + } + } else { + printf("ciphertext fails verification\n"); + } + } + return 0; +} diff --git a/nacl/tests/secretbox7.out b/nacl/tests/secretbox7.out new file mode 100644 index 00000000..e69de29b diff --git a/nacl/tests/secretbox8.c b/nacl/tests/secretbox8.c new file mode 100644 index 00000000..a6c75c23 --- /dev/null +++ b/nacl/tests/secretbox8.c @@ -0,0 +1,37 @@ +#include +#include "crypto_secretbox.h" +#include "randombytes.h" + +unsigned char k[crypto_secretbox_KEYBYTES]; +unsigned char n[crypto_secretbox_NONCEBYTES]; +unsigned char m[10000]; +unsigned char c[10000]; +unsigned char m2[10000]; + +main() +{ + int mlen; + int i; + int caught; + + for (mlen = 0;mlen < 1000 && mlen + crypto_secretbox_ZEROBYTES < sizeof m;++mlen) { + randombytes(k,crypto_secretbox_KEYBYTES); + randombytes(n,crypto_secretbox_NONCEBYTES); + randombytes(m + crypto_secretbox_ZEROBYTES,mlen); + crypto_secretbox(c,m,mlen + crypto_secretbox_ZEROBYTES,n,k); + caught = 0; + while (caught < 10) { + c[random() % (mlen + crypto_secretbox_ZEROBYTES)] = random(); + if (crypto_secretbox_open(m2,c,mlen + crypto_secretbox_ZEROBYTES,n,k) == 0) { + for (i = 0;i < mlen + crypto_secretbox_ZEROBYTES;++i) + if (m2[i] != m[i]) { + printf("forgery\n"); + return 100; + } + } else { + ++caught; + } + } + } + return 0; +} diff --git a/nacl/tests/secretbox8.out b/nacl/tests/secretbox8.out new file mode 100644 index 00000000..e69de29b diff --git a/nacl/tests/stream.c b/nacl/tests/stream.c new file mode 100644 index 00000000..ebb39398 --- /dev/null +++ b/nacl/tests/stream.c @@ -0,0 +1,29 @@ +#include +#include "crypto_stream_xsalsa20.h" +#include "crypto_hash_sha256.h" + +unsigned char firstkey[32] = { + 0x1b,0x27,0x55,0x64,0x73,0xe9,0x85,0xd4 +,0x62,0xcd,0x51,0x19,0x7a,0x9a,0x46,0xc7 +,0x60,0x09,0x54,0x9e,0xac,0x64,0x74,0xf2 +,0x06,0xc4,0xee,0x08,0x44,0xf6,0x83,0x89 +} ; + +unsigned char nonce[24] = { + 0x69,0x69,0x6e,0xe9,0x55,0xb6,0x2b,0x73 +,0xcd,0x62,0xbd,0xa8,0x75,0xfc,0x73,0xd6 +,0x82,0x19,0xe0,0x03,0x6b,0x7a,0x0b,0x37 +} ; + +unsigned char output[4194304]; + +unsigned char h[32]; + +main() +{ + int i; + crypto_stream_xsalsa20(output,4194304,nonce,firstkey); + crypto_hash_sha256(h,output,sizeof output); + for (i = 0;i < 32;++i) printf("%02x",h[i]); printf("\n"); + return 0; +} diff --git a/nacl/tests/stream.out b/nacl/tests/stream.out new file mode 100644 index 00000000..5fa208c1 --- /dev/null +++ b/nacl/tests/stream.out @@ -0,0 +1 @@ +662b9d0e3463029156069b12f918691a98f7dfb2ca0393c96bbfc6b1fbd630a2 diff --git a/nacl/tests/stream2.c b/nacl/tests/stream2.c new file mode 100644 index 00000000..12f13de4 --- /dev/null +++ b/nacl/tests/stream2.c @@ -0,0 +1,27 @@ +#include +#include "crypto_stream_salsa20.h" +#include "crypto_hash_sha256.h" + +unsigned char secondkey[32] = { + 0xdc,0x90,0x8d,0xda,0x0b,0x93,0x44,0xa9 +,0x53,0x62,0x9b,0x73,0x38,0x20,0x77,0x88 +,0x80,0xf3,0xce,0xb4,0x21,0xbb,0x61,0xb9 +,0x1c,0xbd,0x4c,0x3e,0x66,0x25,0x6c,0xe4 +} ; + +unsigned char noncesuffix[8] = { + 0x82,0x19,0xe0,0x03,0x6b,0x7a,0x0b,0x37 +} ; + +unsigned char output[4194304]; + +unsigned char h[32]; + +main() +{ + int i; + crypto_stream_salsa20(output,4194304,noncesuffix,secondkey); + crypto_hash_sha256(h,output,sizeof output); + for (i = 0;i < 32;++i) printf("%02x",h[i]); printf("\n"); + return 0; +} diff --git a/nacl/tests/stream2.out b/nacl/tests/stream2.out new file mode 100644 index 00000000..5fa208c1 --- /dev/null +++ b/nacl/tests/stream2.out @@ -0,0 +1 @@ +662b9d0e3463029156069b12f918691a98f7dfb2ca0393c96bbfc6b1fbd630a2 diff --git a/nacl/tests/stream3.c b/nacl/tests/stream3.c new file mode 100644 index 00000000..7798dc18 --- /dev/null +++ b/nacl/tests/stream3.c @@ -0,0 +1,28 @@ +#include +#include "crypto_stream_xsalsa20.h" + +unsigned char firstkey[32] = { + 0x1b,0x27,0x55,0x64,0x73,0xe9,0x85,0xd4 +,0x62,0xcd,0x51,0x19,0x7a,0x9a,0x46,0xc7 +,0x60,0x09,0x54,0x9e,0xac,0x64,0x74,0xf2 +,0x06,0xc4,0xee,0x08,0x44,0xf6,0x83,0x89 +} ; + +unsigned char nonce[24] = { + 0x69,0x69,0x6e,0xe9,0x55,0xb6,0x2b,0x73 +,0xcd,0x62,0xbd,0xa8,0x75,0xfc,0x73,0xd6 +,0x82,0x19,0xe0,0x03,0x6b,0x7a,0x0b,0x37 +} ; + +unsigned char rs[32]; + +main() +{ + int i; + crypto_stream_xsalsa20(rs,32,nonce,firstkey); + for (i = 0;i < 32;++i) { + printf(",0x%02x",(unsigned int) rs[i]); + if (i % 8 == 7) printf("\n"); + } + return 0; +} diff --git a/nacl/tests/stream3.out b/nacl/tests/stream3.out new file mode 100644 index 00000000..9cd78798 --- /dev/null +++ b/nacl/tests/stream3.out @@ -0,0 +1,4 @@ +,0xee,0xa6,0xa7,0x25,0x1c,0x1e,0x72,0x91 +,0x6d,0x11,0xc2,0xcb,0x21,0x4d,0x3c,0x25 +,0x25,0x39,0x12,0x1d,0x8e,0x23,0x4e,0x65 +,0x2d,0x65,0x1f,0xa4,0xc8,0xcf,0xf8,0x80 diff --git a/nacl/tests/stream4.c b/nacl/tests/stream4.c new file mode 100644 index 00000000..84d8c523 --- /dev/null +++ b/nacl/tests/stream4.c @@ -0,0 +1,53 @@ +#include +#include "crypto_stream_xsalsa20.h" + +unsigned char firstkey[32] = { + 0x1b,0x27,0x55,0x64,0x73,0xe9,0x85,0xd4 +,0x62,0xcd,0x51,0x19,0x7a,0x9a,0x46,0xc7 +,0x60,0x09,0x54,0x9e,0xac,0x64,0x74,0xf2 +,0x06,0xc4,0xee,0x08,0x44,0xf6,0x83,0x89 +} ; + +unsigned char nonce[24] = { + 0x69,0x69,0x6e,0xe9,0x55,0xb6,0x2b,0x73 +,0xcd,0x62,0xbd,0xa8,0x75,0xfc,0x73,0xd6 +,0x82,0x19,0xe0,0x03,0x6b,0x7a,0x0b,0x37 +} ; + +unsigned char m[163] = { + 0, 0, 0, 0, 0, 0, 0, 0 +, 0, 0, 0, 0, 0, 0, 0, 0 +, 0, 0, 0, 0, 0, 0, 0, 0 +, 0, 0, 0, 0, 0, 0, 0, 0 +,0xbe,0x07,0x5f,0xc5,0x3c,0x81,0xf2,0xd5 +,0xcf,0x14,0x13,0x16,0xeb,0xeb,0x0c,0x7b +,0x52,0x28,0xc5,0x2a,0x4c,0x62,0xcb,0xd4 +,0x4b,0x66,0x84,0x9b,0x64,0x24,0x4f,0xfc +,0xe5,0xec,0xba,0xaf,0x33,0xbd,0x75,0x1a +,0x1a,0xc7,0x28,0xd4,0x5e,0x6c,0x61,0x29 +,0x6c,0xdc,0x3c,0x01,0x23,0x35,0x61,0xf4 +,0x1d,0xb6,0x6c,0xce,0x31,0x4a,0xdb,0x31 +,0x0e,0x3b,0xe8,0x25,0x0c,0x46,0xf0,0x6d +,0xce,0xea,0x3a,0x7f,0xa1,0x34,0x80,0x57 +,0xe2,0xf6,0x55,0x6a,0xd6,0xb1,0x31,0x8a +,0x02,0x4a,0x83,0x8f,0x21,0xaf,0x1f,0xde +,0x04,0x89,0x77,0xeb,0x48,0xf5,0x9f,0xfd +,0x49,0x24,0xca,0x1c,0x60,0x90,0x2e,0x52 +,0xf0,0xa0,0x89,0xbc,0x76,0x89,0x70,0x40 +,0xe0,0x82,0xf9,0x37,0x76,0x38,0x48,0x64 +,0x5e,0x07,0x05 +} ; + +unsigned char c[163]; + +main() +{ + int i; + crypto_stream_xsalsa20_xor(c,m,163,nonce,firstkey); + for (i = 32;i < 163;++i) { + printf(",0x%02x",(unsigned int) c[i]); + if (i % 8 == 7) printf("\n"); + } + printf("\n"); + return 0; +} diff --git a/nacl/tests/stream4.out b/nacl/tests/stream4.out new file mode 100644 index 00000000..0d3d8e94 --- /dev/null +++ b/nacl/tests/stream4.out @@ -0,0 +1,17 @@ +,0x8e,0x99,0x3b,0x9f,0x48,0x68,0x12,0x73 +,0xc2,0x96,0x50,0xba,0x32,0xfc,0x76,0xce +,0x48,0x33,0x2e,0xa7,0x16,0x4d,0x96,0xa4 +,0x47,0x6f,0xb8,0xc5,0x31,0xa1,0x18,0x6a +,0xc0,0xdf,0xc1,0x7c,0x98,0xdc,0xe8,0x7b +,0x4d,0xa7,0xf0,0x11,0xec,0x48,0xc9,0x72 +,0x71,0xd2,0xc2,0x0f,0x9b,0x92,0x8f,0xe2 +,0x27,0x0d,0x6f,0xb8,0x63,0xd5,0x17,0x38 +,0xb4,0x8e,0xee,0xe3,0x14,0xa7,0xcc,0x8a +,0xb9,0x32,0x16,0x45,0x48,0xe5,0x26,0xae +,0x90,0x22,0x43,0x68,0x51,0x7a,0xcf,0xea +,0xbd,0x6b,0xb3,0x73,0x2b,0xc0,0xe9,0xda +,0x99,0x83,0x2b,0x61,0xca,0x01,0xb6,0xde +,0x56,0x24,0x4a,0x9e,0x88,0xd5,0xf9,0xb3 +,0x79,0x73,0xf6,0x22,0xa4,0x3d,0x14,0xa6 +,0x59,0x9b,0x1f,0x65,0x4c,0xb4,0x5a,0x74 +,0xe3,0x55,0xa5 diff --git a/nacl/tests/stream5.cpp b/nacl/tests/stream5.cpp new file mode 100644 index 00000000..66f3839b --- /dev/null +++ b/nacl/tests/stream5.cpp @@ -0,0 +1,29 @@ +#include +using std::string; +#include +#include "crypto_stream_xsalsa20.h" +#include "crypto_hash_sha256.h" + +char firstkey_bytes[32] = { + 0x1b,0x27,0x55,0x64,0x73,0xe9,0x85,0xd4 +,0x62,0xcd,0x51,0x19,0x7a,0x9a,0x46,0xc7 +,0x60,0x09,0x54,0x9e,0xac,0x64,0x74,0xf2 +,0x06,0xc4,0xee,0x08,0x44,0xf6,0x83,0x89 +} ; + +char nonce_bytes[24] = { + 0x69,0x69,0x6e,0xe9,0x55,0xb6,0x2b,0x73 +,0xcd,0x62,0xbd,0xa8,0x75,0xfc,0x73,0xd6 +,0x82,0x19,0xe0,0x03,0x6b,0x7a,0x0b,0x37 +} ; + +main() +{ + int i; + string firstkey(firstkey_bytes,sizeof firstkey_bytes); + string nonce(nonce_bytes,sizeof nonce_bytes); + string output = crypto_stream_xsalsa20(4194304,nonce,firstkey); + string h = crypto_hash_sha256(output); + for (i = 0;i < 32;++i) printf("%02x",(unsigned int) (unsigned char) h[i]); printf("\n"); + return 0; +} diff --git a/nacl/tests/stream5.out b/nacl/tests/stream5.out new file mode 100644 index 00000000..5fa208c1 --- /dev/null +++ b/nacl/tests/stream5.out @@ -0,0 +1 @@ +662b9d0e3463029156069b12f918691a98f7dfb2ca0393c96bbfc6b1fbd630a2 diff --git a/nacl/tests/stream6.cpp b/nacl/tests/stream6.cpp new file mode 100644 index 00000000..d9ed61f7 --- /dev/null +++ b/nacl/tests/stream6.cpp @@ -0,0 +1,27 @@ +#include +using std::string; +#include +#include "crypto_stream_salsa20.h" +#include "crypto_hash_sha256.h" + +char secondkey_bytes[32] = { + 0xdc,0x90,0x8d,0xda,0x0b,0x93,0x44,0xa9 +,0x53,0x62,0x9b,0x73,0x38,0x20,0x77,0x88 +,0x80,0xf3,0xce,0xb4,0x21,0xbb,0x61,0xb9 +,0x1c,0xbd,0x4c,0x3e,0x66,0x25,0x6c,0xe4 +} ; + +char noncesuffix_bytes[8] = { + 0x82,0x19,0xe0,0x03,0x6b,0x7a,0x0b,0x37 +} ; + +main() +{ + int i; + string secondkey(secondkey_bytes,sizeof secondkey_bytes); + string noncesuffix(noncesuffix_bytes,sizeof noncesuffix_bytes); + string output = crypto_stream_salsa20(4194304,noncesuffix,secondkey); + string h = crypto_hash_sha256(output); + for (i = 0;i < 32;++i) printf("%02x",(unsigned int) (unsigned char) h[i]); printf("\n"); + return 0; +} diff --git a/nacl/tests/stream6.out b/nacl/tests/stream6.out new file mode 100644 index 00000000..5fa208c1 --- /dev/null +++ b/nacl/tests/stream6.out @@ -0,0 +1 @@ +662b9d0e3463029156069b12f918691a98f7dfb2ca0393c96bbfc6b1fbd630a2 diff --git a/nacl/tests/stream7.cpp b/nacl/tests/stream7.cpp new file mode 100644 index 00000000..d2f106e5 --- /dev/null +++ b/nacl/tests/stream7.cpp @@ -0,0 +1,30 @@ +#include +using std::string; +#include +#include "crypto_stream_xsalsa20.h" + +char firstkey_bytes[32] = { + 0x1b,0x27,0x55,0x64,0x73,0xe9,0x85,0xd4 +,0x62,0xcd,0x51,0x19,0x7a,0x9a,0x46,0xc7 +,0x60,0x09,0x54,0x9e,0xac,0x64,0x74,0xf2 +,0x06,0xc4,0xee,0x08,0x44,0xf6,0x83,0x89 +} ; + +char nonce_bytes[24] = { + 0x69,0x69,0x6e,0xe9,0x55,0xb6,0x2b,0x73 +,0xcd,0x62,0xbd,0xa8,0x75,0xfc,0x73,0xd6 +,0x82,0x19,0xe0,0x03,0x6b,0x7a,0x0b,0x37 +} ; + +main() +{ + int i; + string firstkey(firstkey_bytes,sizeof firstkey_bytes); + string nonce(nonce_bytes,sizeof nonce_bytes); + string rs = crypto_stream_xsalsa20(32,nonce,firstkey); + for (i = 0;i < rs.size();++i) { + printf(",0x%02x",(unsigned int) (unsigned char) rs[i]); + if (i % 8 == 7) printf("\n"); + } + return 0; +} diff --git a/nacl/tests/stream7.out b/nacl/tests/stream7.out new file mode 100644 index 00000000..9cd78798 --- /dev/null +++ b/nacl/tests/stream7.out @@ -0,0 +1,4 @@ +,0xee,0xa6,0xa7,0x25,0x1c,0x1e,0x72,0x91 +,0x6d,0x11,0xc2,0xcb,0x21,0x4d,0x3c,0x25 +,0x25,0x39,0x12,0x1d,0x8e,0x23,0x4e,0x65 +,0x2d,0x65,0x1f,0xa4,0xc8,0xcf,0xf8,0x80 diff --git a/nacl/tests/stream8.cpp b/nacl/tests/stream8.cpp new file mode 100644 index 00000000..ea95d68f --- /dev/null +++ b/nacl/tests/stream8.cpp @@ -0,0 +1,56 @@ +#include +using std::string; +#include +#include "crypto_stream_xsalsa20.h" + +char firstkey_bytes[32] = { + 0x1b,0x27,0x55,0x64,0x73,0xe9,0x85,0xd4 +,0x62,0xcd,0x51,0x19,0x7a,0x9a,0x46,0xc7 +,0x60,0x09,0x54,0x9e,0xac,0x64,0x74,0xf2 +,0x06,0xc4,0xee,0x08,0x44,0xf6,0x83,0x89 +} ; + +char nonce_bytes[24] = { + 0x69,0x69,0x6e,0xe9,0x55,0xb6,0x2b,0x73 +,0xcd,0x62,0xbd,0xa8,0x75,0xfc,0x73,0xd6 +,0x82,0x19,0xe0,0x03,0x6b,0x7a,0x0b,0x37 +} ; + +char m_bytes[163] = { + 0, 0, 0, 0, 0, 0, 0, 0 +, 0, 0, 0, 0, 0, 0, 0, 0 +, 0, 0, 0, 0, 0, 0, 0, 0 +, 0, 0, 0, 0, 0, 0, 0, 0 +,0xbe,0x07,0x5f,0xc5,0x3c,0x81,0xf2,0xd5 +,0xcf,0x14,0x13,0x16,0xeb,0xeb,0x0c,0x7b +,0x52,0x28,0xc5,0x2a,0x4c,0x62,0xcb,0xd4 +,0x4b,0x66,0x84,0x9b,0x64,0x24,0x4f,0xfc +,0xe5,0xec,0xba,0xaf,0x33,0xbd,0x75,0x1a +,0x1a,0xc7,0x28,0xd4,0x5e,0x6c,0x61,0x29 +,0x6c,0xdc,0x3c,0x01,0x23,0x35,0x61,0xf4 +,0x1d,0xb6,0x6c,0xce,0x31,0x4a,0xdb,0x31 +,0x0e,0x3b,0xe8,0x25,0x0c,0x46,0xf0,0x6d +,0xce,0xea,0x3a,0x7f,0xa1,0x34,0x80,0x57 +,0xe2,0xf6,0x55,0x6a,0xd6,0xb1,0x31,0x8a +,0x02,0x4a,0x83,0x8f,0x21,0xaf,0x1f,0xde +,0x04,0x89,0x77,0xeb,0x48,0xf5,0x9f,0xfd +,0x49,0x24,0xca,0x1c,0x60,0x90,0x2e,0x52 +,0xf0,0xa0,0x89,0xbc,0x76,0x89,0x70,0x40 +,0xe0,0x82,0xf9,0x37,0x76,0x38,0x48,0x64 +,0x5e,0x07,0x05 +} ; + +main() +{ + int i; + string firstkey(firstkey_bytes,sizeof firstkey_bytes); + string nonce(nonce_bytes,sizeof nonce_bytes); + string m(m_bytes,sizeof m_bytes); + string c = crypto_stream_xsalsa20_xor(m,nonce,firstkey); + for (i = 32;i < c.size();++i) { + printf(",0x%02x",(unsigned int) (unsigned char) c[i]); + if (i % 8 == 7) printf("\n"); + } + printf("\n"); + return 0; +} diff --git a/nacl/tests/stream8.out b/nacl/tests/stream8.out new file mode 100644 index 00000000..0d3d8e94 --- /dev/null +++ b/nacl/tests/stream8.out @@ -0,0 +1,17 @@ +,0x8e,0x99,0x3b,0x9f,0x48,0x68,0x12,0x73 +,0xc2,0x96,0x50,0xba,0x32,0xfc,0x76,0xce +,0x48,0x33,0x2e,0xa7,0x16,0x4d,0x96,0xa4 +,0x47,0x6f,0xb8,0xc5,0x31,0xa1,0x18,0x6a +,0xc0,0xdf,0xc1,0x7c,0x98,0xdc,0xe8,0x7b +,0x4d,0xa7,0xf0,0x11,0xec,0x48,0xc9,0x72 +,0x71,0xd2,0xc2,0x0f,0x9b,0x92,0x8f,0xe2 +,0x27,0x0d,0x6f,0xb8,0x63,0xd5,0x17,0x38 +,0xb4,0x8e,0xee,0xe3,0x14,0xa7,0xcc,0x8a +,0xb9,0x32,0x16,0x45,0x48,0xe5,0x26,0xae +,0x90,0x22,0x43,0x68,0x51,0x7a,0xcf,0xea +,0xbd,0x6b,0xb3,0x73,0x2b,0xc0,0xe9,0xda +,0x99,0x83,0x2b,0x61,0xca,0x01,0xb6,0xde +,0x56,0x24,0x4a,0x9e,0x88,0xd5,0xf9,0xb3 +,0x79,0x73,0xf6,0x22,0xa4,0x3d,0x14,0xa6 +,0x59,0x9b,0x1f,0x65,0x4c,0xb4,0x5a,0x74 +,0xe3,0x55,0xa5 diff --git a/nacl/try-anything.c b/nacl/try-anything.c new file mode 100644 index 00000000..b6847473 --- /dev/null +++ b/nacl/try-anything.c @@ -0,0 +1,173 @@ +/* + * try-anything.c version 20090215 + * D. J. Bernstein + * Public domain. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cpucycles.h" + +typedef int uint32; + +static uint32 seed[32] = { 3,1,4,1,5,9,2,6,5,3,5,8,9,7,9,3,2,3,8,4,6,2,6,4,3,3,8,3,2,7,9,5 } ; +static uint32 in[12]; +static uint32 out[8]; +static int outleft = 0; + +#define ROTATE(x,b) (((x) << (b)) | ((x) >> (32 - (b)))) +#define MUSH(i,b) x = t[i] += (((x ^ seed[i]) + sum) ^ ROTATE(x,b)); + +static void surf(void) +{ + uint32 t[12]; uint32 x; uint32 sum = 0; + int r; int i; int loop; + + for (i = 0;i < 12;++i) t[i] = in[i] ^ seed[12 + i]; + for (i = 0;i < 8;++i) out[i] = seed[24 + i]; + x = t[11]; + for (loop = 0;loop < 2;++loop) { + for (r = 0;r < 16;++r) { + sum += 0x9e3779b9; + MUSH(0,5) MUSH(1,7) MUSH(2,9) MUSH(3,13) + MUSH(4,5) MUSH(5,7) MUSH(6,9) MUSH(7,13) + MUSH(8,5) MUSH(9,7) MUSH(10,9) MUSH(11,13) + } + for (i = 0;i < 8;++i) out[i] ^= t[i + 4]; + } +} + +void randombytes(unsigned char *x,unsigned long long xlen) +{ + while (xlen > 0) { + if (!outleft) { + if (!++in[0]) if (!++in[1]) if (!++in[2]) ++in[3]; + surf(); + outleft = 8; + } + *x = out[--outleft]; + ++x; + --xlen; + } +} + +extern void preallocate(void); +extern void allocate(void); +extern void predoit(void); +extern void doit(void); +extern char checksum[]; +extern const char *checksum_compute(void); +extern const char *primitiveimplementation; + +static void printword(const char *s) +{ + if (!*s) putchar('-'); + while (*s) { + if (*s == ' ') putchar('_'); + else if (*s == '\t') putchar('_'); + else if (*s == '\r') putchar('_'); + else if (*s == '\n') putchar('_'); + else putchar(*s); + ++s; + } + putchar(' '); +} + +static void printnum(long long x) +{ + printf("%lld ",x); +} + +static void fail(const char *why) +{ + printf("%s\n",why); + exit(111); +} + +unsigned char *alignedcalloc(unsigned long long len) +{ + unsigned char *x = (unsigned char *) calloc(1,len + 256); + long long i; + if (!x) fail("out of memory"); + /* will never deallocate so shifting is ok */ + for (i = 0;i < len + 256;++i) x[i] = random(); + x += 64; + x += 63 & (-(unsigned long) x); + for (i = 0;i < len;++i) x[i] = 0; + return x; +} + +#define TIMINGS 63 +static long long cycles[TIMINGS + 1]; + +void limits() +{ +#ifdef RLIM_INFINITY + struct rlimit r; + r.rlim_cur = 0; + r.rlim_max = 0; +#ifdef RLIMIT_NOFILE + setrlimit(RLIMIT_NOFILE,&r); +#endif +#ifdef RLIMIT_NPROC + setrlimit(RLIMIT_NPROC,&r); +#endif +#ifdef RLIMIT_CORE + setrlimit(RLIMIT_CORE,&r); +#endif +#endif +} + +int main() +{ + long long i; + long long j; + long long abovej; + long long belowj; + long long checksumcycles; + long long cyclespersecond; + const char *problem; + + cyclespersecond = cpucycles_persecond(); + preallocate(); + limits(); + + allocate(); + srandom(getpid()); + + cycles[0] = cpucycles(); + problem = checksum_compute(); if (problem) fail(problem); + cycles[1] = cpucycles(); + checksumcycles = cycles[1] - cycles[0]; + + predoit(); + for (i = 0;i <= TIMINGS;++i) { + cycles[i] = cpucycles(); + } + for (i = 0;i <= TIMINGS;++i) { + cycles[i] = cpucycles(); + doit(); + } + for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i]; + for (j = 0;j < TIMINGS;++j) { + belowj = 0; + for (i = 0;i < TIMINGS;++i) if (cycles[i] < cycles[j]) ++belowj; + abovej = 0; + for (i = 0;i < TIMINGS;++i) if (cycles[i] > cycles[j]) ++abovej; + if (belowj * 2 < TIMINGS && abovej * 2 < TIMINGS) break; + } + + printword(checksum); + printnum(cycles[j]); + printnum(checksumcycles); + printnum(cyclespersecond); + printword(primitiveimplementation); + printf("\n"); + return 0; +} diff --git a/nacl/version b/nacl/version new file mode 100644 index 00000000..97840598 --- /dev/null +++ b/nacl/version @@ -0,0 +1 @@ +20110221 -- cgit v1.2.3