base64_sse3_test.cpp
September 18, 2017 ยท View on GitHub
#include <stdio.h> #include <intrin.h> #include "smmintrin.h"
size_t hash(unsigned char* lut, size_t c){ size_t a; size_t b;
// psrld
// note: the 0xE0 simulates the shifting in of three bits from the next byte...
a = 0xE0 | (c >> 3);
// pshufb
b = lut[c & 0x0F];
if(c & 0x80) b = 0x00;
// pavg
return ((a + b + 1) >> 1);
}
// I'm lazy... just use the real thing unsigned char adds8(unsigned char a, unsigned char b){ __m128i x = _mm_cvtsi32_si128(a); __m128i y = _mm_cvtsi32_si128(b); __m128i r = _mm_adds_epi8(x, y); return _mm_cvtsi128_si32(r); }
unsigned char valid_chars[64] = { 0x2B, 0x2F, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A };
unsigned char decoded_chars[64] = { 62, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51 };
// all signed chars are also invalid unsigned char invalid_chars[64] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2C, 0x2D, 0x2E, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, 0x40, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, 0x60, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F };
unsigned char delta_asso[16] = { 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x0F }; unsigned char delta_values[16] = { 0x00, 0x00, 0x00, 0x13, 0x04, 0xBF, 0xBF, 0xB9, 0xB9, 0x00, 0x10, 0xC3, 0xBF, 0xBF, 0xB9, 0xB9 }; unsigned char check_asso[16] = { 0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x03, 0x07, 0x0B, 0x0B, 0x0B, 0x0F }; unsigned char check_values[16] = { 0x80, 0x80, 0x80, 0x80, 0xCF, 0xBF, 0xD5, 0xA6, 0xB5, 0x86, 0xD1, 0x80, 0xB1, 0x80, 0x91, 0x80 };
void check_decode(){ for( size_t i = 0; i < 64; i++ ){ unsigned char c = valid_chars[i]; unsigned char h = hash(delta_asso, c); unsigned char v = adds8(delta_values[h & 0x0F],c); if( decoded_chars[i] != v ){ printf("FAIL: %02X decoded to %02X \n",c,v); } }
// set equal sign (0x3D) to zero in decode, because why not?
unsigned char x = adds8(delta_values[hash(delta_asso, 0x3D) & 0x0F], 0x3D);
if( x != 0 ) printf("FAIL: %02X decoded to %02X \n",0x3D,x);
}
void check_invalid_char_detection() { for( size_t i = 0; i < 64; i++ ){ unsigned char c = valid_chars[i]; unsigned char h = hash(check_asso, c); unsigned char v = adds8(check_values[h & 0x0F],c); if(v >= 0x80){ printf("FAIL: valid char 0x%02X not detected\n",c); } }
for( size_t i = 0; i < 64; i++ ){
unsigned char c = invalid_chars[i];
unsigned char h = hash(check_asso, c);
unsigned char v = adds8(check_values[h & 0x0F], c);
if(v < 0x80){
printf("FAIL: invalid char 0x%02X not detected\n",c);
printf( "%02X %02X %02X\n", h, check_values[h & 0x0F], v );
}
}
// note:
// all check_values are signed...
// because using sat_adds8: signed + signed = signed
// with signed input it is possible hash overflows
// in which case, 0 + signed = signed
for( int i = 0; i < 16; i++ ){
unsigned char c = check_values[i];
if((c & 0x80) == 0){
printf("FAIL: check_value %02X is unsigned\n", c);
}
}
for( size_t i = 128; i < 256; i++ ){
unsigned char c = i;
unsigned char h = hash(check_asso, c);
unsigned char v = adds8(check_values[h & 0x0F], c);
if(v < 0x80){
printf("FAIL: invalid char 0x%02X not detected\n",c);
}
}
}
// hash must produce unsigned results for unsigned input void check_unsigned_hash(){ for( size_t i = 0; i < 128; i++ ){ unsigned char h_c = hash(check_asso, i); if(h_c >= 0x80){ printf("FAIL: %02X hashes to %02X\n",i,h_c); } unsigned char h_d = hash(delta_asso, i); if(h_d >= 0x80){ printf("FAIL: %02X hashes to %02X\n",i,h_d); } } }
void print_hash( unsigned char* lut ){ for( size_t i = 0; i < 128; i++ ){ unsigned char c = i; unsigned char h = hash(lut, c); if((i & 0x0F) == 0)printf("\n"); printf( "%01X ", h & 0x0F); } printf("\n"); }
static const char moby_dick_base64[] = "Q2FsbCBtZSBJc2htYWVsLiBTb21lIHllYXJzIGFnby0tbmV2ZXIgbWluZCBob3cgbG9uZ" "yBwcmVjaXNlbHktLWhhdmluZwpsaXR0bGUgb3Igbm8gbW9uZXkgaW4gbXkgcHVyc2UsIG" "FuZCBub3RoaW5nIHBhcnRpY3VsYXIgdG8gaW50ZXJlc3QgbWUgb24Kc2hvcmUsIEkgdGh" "vdWdodCBJIHdvdWxkIHNhaWwgYWJvdXQgYSBsaXR0bGUgYW5kIHNlZSB0aGUgd2F0ZXJ5" "IHBhcnQgb2YKdGhlIHdvcmxkLiBJdCBpcyBhIHdheSBJIGhhdmUgb2YgZHJpdmluZyBvZ" "mYgdGhlIHNwbGVlbiBhbmQgcmVndWxhdGluZwp0aGUgY2lyY3VsYXRpb24uIFdoZW5ldm" "VyIEkgZmluZCBteXNlbGYgZ3Jvd2luZyBncmltIGFib3V0IHRoZSBtb3V0aDsKd2hlbmV" "2ZXIgaXQgaXMgYSBkYW1wLCBkcml6emx5IE5vdmVtYmVyIGluIG15IHNvdWw7IHdoZW5l" "dmVyIEkgZmluZApteXNlbGYgaW52b2x1bnRhcmlseSBwYXVzaW5nIGJlZm9yZSBjb2Zma" "W4gd2FyZWhvdXNlcywgYW5kIGJyaW5naW5nIHVwCnRoZSByZWFyIG9mIGV2ZXJ5IGZ1bm" "VyYWwgSSBtZWV0OyBhbmQgZXNwZWNpYWxseSB3aGVuZXZlciBteSBoeXBvcyBnZXQKc3V" "jaCBhbiB1cHBlciBoYW5kIG9mIG1lLCB0aGF0IGl0IHJlcXVpcmVzIGEgc3Ryb25nIG1v" "cmFsIHByaW5jaXBsZSB0bwpwcmV2ZW50IG1lIGZyb20gZGVsaWJlcmF0ZWx5IHN0ZXBwa" "W5nIGludG8gdGhlIHN0cmVldCwgYW5kIG1ldGhvZGljYWxseQprbm9ja2luZyBwZW9wbG" "UncyBoYXRzIG9mZi0tdGhlbiwgSSBhY2NvdW50IGl0IGhpZ2ggdGltZSB0byBnZXQgdG8" "gc2VhCmFzIHNvb24gYXMgSSBjYW4uIFRoaXMgaXMgbXkgc3Vic3RpdHV0ZSBmb3IgcGlz" "dG9sIGFuZCBiYWxsLiBXaXRoIGEKcGhpbG9zb3BoaWNhbCBmbG91cmlzaCBDYXRvIHRoc" "m93cyBoaW1zZWxmIHVwb24gaGlzIHN3b3JkOyBJIHF1aWV0bHkKdGFrZSB0byB0aGUgc2" "hpcC4gVGhlcmUgaXMgbm90aGluZyBzdXJwcmlzaW5nIGluIHRoaXMuIElmIHRoZXkgYnV" "0IGtuZXcKaXQsIGFsbW9zdCBhbGwgbWVuIGluIHRoZWlyIGRlZ3JlZSwgc29tZSB0aW1l" "IG9yIG90aGVyLCBjaGVyaXNoIHZlcnkKbmVhcmx5IHRoZSBzYW1lIGZlZWxpbmdzIHRvd" "2FyZHMgdGhlIG9jZWFuIHdpdGggbWUuCg==";
bool base64_decode_ssse3( void* dst_void, void* src_void, size_t length ) { unsigned char* src = (unsigned char*)src_void; unsigned char* dst = (unsigned char*)dst_void;
const __m128i delta_asso = _mm_setr_epi8(
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x00, 0x00, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x0F
);
const __m128i delta_values = _mm_setr_epi8(
0x00, 0x00, 0x00, 0x13, 0x04, 0xBF, 0xBF, 0xB9,
0xB9, 0x00, 0x10, 0xC3, 0xBF, 0xBF, 0xB9, 0xB9
);
const __m128i check_asso = _mm_setr_epi8(
0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x03, 0x07, 0x0B, 0x0B, 0x0B, 0x0F
);
const __m128i check_values = _mm_setr_epi8(
0x80, 0x80, 0x80, 0x80, 0xCF, 0xBF, 0xD5, 0xA6,
0xB5, 0x86, 0xD1, 0x80, 0xB1, 0x80, 0x91, 0x80
);
for( unsigned char* end = &src[(length & ~15)]; src != end; src += 16){
__m128i asrc, shifted, delta_hash, check_hash, out, chk;
int mask;
asrc = _mm_loadu_si128((__m128i *)src);
shifted = _mm_srli_epi32(asrc, 3);
delta_hash = _mm_avg_epu8(_mm_shuffle_epi8(delta_asso, asrc), shifted);
check_hash = _mm_avg_epu8(_mm_shuffle_epi8(check_asso, asrc), shifted);
out = _mm_adds_epi8(_mm_shuffle_epi8(delta_values, delta_hash), asrc);
chk = _mm_adds_epi8(_mm_shuffle_epi8(check_values, check_hash), asrc);
mask = _mm_movemask_epi8(chk);
if(mask != 0){
break;
}
const __m128i pack_shuffle = _mm_setr_epi8(
2, 1, 0, 6, 5, 4, 10, 9,
8, 14, 13, 12, -1, -1, -1, -1);
out = _mm_maddubs_epi16(out, _mm_set1_epi32(0x01400140));
out = _mm_madd_epi16(out, _mm_set1_epi32(0x00011000));
out = _mm_shuffle_epi8(out, pack_shuffle);
_mm_storeu_si128((__m128i *)dst, out);
dst += 12;
}
return true;
}
int main () { print_hash( check_asso ); print_hash( delta_asso );
check_unsigned_hash();
check_decode();
check_invalid_char_detection();
static unsigned char out[0x4000];
memset(out,0,sizeof(out));
base64_decode_ssse3(out, (void*)moby_dick_base64, -1);
printf( "\n\n%s", out);
printf("\npress enter to exit\n");
getchar();
return 0;
}