/* Base64 and UTF8 decoding routines in C by Pieter Suurmond, august 22, 2003. */ #include #include /*------------------------------------------------------------------------------------------*/ /* Decodes base64 string that argument src points to. It should be NULL terminated. Writes the decoded result to memory location that argument dst points to. It is the caller's responsibility that it is large enough. Returns 0 on success, or an error-code in case of failures. */ int decodeBase64(char* src, char* dst) { /* Reverse table of "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/". */ static const char b64reverse[128] = { 64,64,64,64, 64,64,64,64, 64,64,64,64, 64,64,64,64, /* 0- 15 */ 64,64,64,64, 64,64,64,64, 64,64,64,64, 64,64,64,64, /* 16- 31 */ 64,64,64,64, 64,64,64,64, 64,64,64,62, 64,64,64,63, /* 32- 47 (43=+->62; 47=/->63) */ 52,53,54,55, 56,57,58,59, 60,61,64,64, 64, 0,64,64, /* 48- 63 (48=0->52) 61==->0!) */ 64,0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,12,13,14, /* 64- 79 (65=A->0) */ 15,16,17,18, 19,20,21,22, 23,24,25,64, 64,64,64,64, /* 80- 95 */ 64,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40, /* 96-111 (97=a->26) */ 41,42,43,44, 45,46,47,48, 49,50,51,64, 64,64,64,64 /* 112-127 */ }; int e = 0; long accu = 0L; /* Must be able to hold 24 bits. */ unsigned char a, c; short count = 0; /* Read 4 chars. */ while ((c = *src++)) { if (c > 127) { e = 1; break; } /* ERROR: not a 7 bit ASCII character! */ a = b64reverse[c]; /* Always clear destination string. */ if (a > 63) { e = 2; break; } /* ERROR: not a base64 character! */ accu <<= 6; /* ('=' char is translated to 0.) */ accu |= a; /* Paste 6 bits to the accumulator. */ if (count++ == 3) { for (count = 2; count >= 0; count--) { dst[count] = (unsigned char)(accu & 255); accu >>= 8; } dst += 3; count = 0; /* accu = 0 is not necessary, all bits are shifted out by now. */ } } if (count) /* Should be 0. */ e = 3; /* ERROR: not enough characters! */ *dst = (char)0; /* Clear or terminate C string. */ return e; } /*------------------------------------------------------------------------------------------*/ /* Decodes UTF8 encoded string that argument src points to. It should be NULL terminated. Writes the decoded result to memory location that argument dst points to (a NULL terminated array of unsigned (16 bit!) shorts). It is the caller's responsibility that it is large enough. Returns 0 on success, or an error-code in case of failures. UTF8 range: Bit notation: Unicode range 0-127 0xxx.xxxx Unicode range 128-2047 110x.xxxx 10xx.xxxx Unicode range 2048-65535 1110.xxxx 10xx.xxxx 10xx.xxxx */ int decodeUTF8(char* src, unsigned short* dst) { int e = 0; unsigned short a, a2, h; while ((a = (unsigned short)*src++)) { if (a >= 128) /* 2 OR MORE CHARS: */ { h = a; if (!(a2 = (unsigned short)*src++)) { e = 1; break; } /* Not enough chars! */ if ((a2 & 192) != 128) { e = 2; break; } /* Invalid second byte! */ a <<= 6; a |= a2; if ((h & 240) == 224) /* 3 CHARS: */ { if (!(a2 = (unsigned short)*src++)) { e = 3; break; } /* Not enough chars! */ if ((a2 & 192) != 128) { e = 4; break; } /* Invalid third byte! */ a <<= 6; a |= a2; } else if ((h & 224) != 192) { e = 5; break; } /* Invalid first byte! */ } *dst++ = a; } *dst = (char)0; /* Clear or terminate C string. */ return e; } /*----------------------------------------------------------------*/ void printfUTF8(unsigned short* utf8) { unsigned short c; while ((c = *utf8++)) { printf("%c", (char)c); } } /*--------------------------------------------------------------------------*/ int main() { int e; char *in, buf1[128]; unsigned short buf2[128]; in = "Vml2acOp"; /* Vml2acOp TcOka2k= UGlldGVyIFN1dXJtb25k */ printf("input: %s.\n", in); if ((e = decodeBase64(in, buf1))) printf("decodeBase64()=%d.\n", e); else { printf("base64: %s.\n", buf1); if ((e = decodeUTF8(buf1, buf2))) printf("decodeUTF8()=%d.\n", e); else { printf("utf8: "); printfUTF8(buf2); printf(".\n"); } } return 0; }