unicode字符和多字节字符的相互转换接口
扫描二维码
随时随地手机看文章
发现开源代码的可利用资源真多,从sqlite3的源码中抠出了几个字符转换接口,稍微改造下了发现还挺好用的。下面是实现代码:
/* ** Convert a UTF-8 string to microsoft unicode (UTF-16?). ** ** Space to hold the returned string is obtained from malloc. */ static WCHAR *utf8ToUnicode(const char *zFilename){ int nChar; WCHAR *zWideFilename; nChar = MultiByteToWideChar(CP_UTF8, 0, zFilename, -1, NULL, 0); zWideFilename = static_cast(malloc(nChar*sizeof(zWideFilename[0]))); if( zWideFilename==0 ){ return 0; } nChar = MultiByteToWideChar(CP_UTF8, 0, zFilename, -1, zWideFilename, nChar); if( nChar==0 ){ free(zWideFilename); zWideFilename = 0; } return zWideFilename; } /* ** Convert microsoft unicode to UTF-8. Space to hold the returned string is ** obtained from malloc(). */ static char *unicodeToUtf8(const WCHAR *zWideFilename){ int nByte; char *zFilename; nByte = WideCharToMultiByte(CP_UTF8, 0, zWideFilename, -1, 0, 0, 0, 0); zFilename = static_cast(malloc( nByte )); if( zFilename==0 ){ return 0; } nByte = WideCharToMultiByte(CP_UTF8, 0, zWideFilename, -1, zFilename, nByte, 0, 0); if( nByte == 0 ) { free(zFilename); zFilename = 0; } return zFilename; } /* ** Convert an ansi string to microsoft unicode, based on the ** current codepage settings for file apis. ** ** Space to hold the returned string is obtained ** from malloc. */ static WCHAR *mbcsToUnicode(const char *zFilename){ int nByte; WCHAR *zMbcsFilename; int codepage = AreFileApisANSI() ? CP_ACP : CP_OEMCP; nByte = MultiByteToWideChar(codepage, 0, zFilename, -1, NULL,0)*sizeof(WCHAR); zMbcsFilename = static_cast(malloc( nByte*sizeof(zMbcsFilename[0]))); if( zMbcsFilename==0 ){ return 0; } nByte = MultiByteToWideChar(codepage, 0, zFilename, -1, zMbcsFilename, nByte); if( nByte==0 ) { free(zMbcsFilename); zMbcsFilename = 0; } return zMbcsFilename; } /* ** Convert microsoft unicode to multibyte character string, based on the ** user's Ansi codepage. ** ** Space to hold the returned string is obtained from ** malloc(). */ static char* unicodeToMbcs(const WCHAR* zWideFilename){ int nByte; char *zFilename; int codepage = AreFileApisANSI() ? CP_ACP : CP_OEMCP; nByte = WideCharToMultiByte(codepage, 0, zWideFilename, -1, 0, 0, 0, 0); zFilename = static_cast(malloc(nByte )); if( zFilename==0 ){ return 0; } nByte = WideCharToMultiByte(codepage, 0, zWideFilename, -1, zFilename, nByte, 0, 0); if( nByte == 0 ){ free(zFilename); zFilename = 0; } return zFilename; } /* ** Convert multibyte character string to UTF-8. Space to hold the ** returned string is obtained from malloc(). */ static char* mbcsToUtf8(const char *zFilename){ char *zFilenameUtf8; WCHAR *zTmpWide; zTmpWide = mbcsToUnicode(zFilename); if( zTmpWide==0 ){ return 0; } zFilenameUtf8 = unicodeToUtf8(zTmpWide); free(zTmpWide); return zFilenameUtf8; } /* ** Convert UTF-8 to multibyte character string. Space to hold the ** returned string is obtained from malloc(). */ static char* utf8ToMbcs(const char *zFilename){ char *zFilenameMbcs; WCHAR* zTmpWide; zTmpWide = utf8ToUnicode(zFilename); if( zTmpWide==0 ){ return 0; } zFilenameMbcs = unicodeToMbcs(zTmpWide); free(zTmpWide); return zFilenameMbcs; } std::string MbcsToUtf8( const char* pszMbcs ) { std::string str; WCHAR *pwchar=0; CHAR *pchar=0; int len=0; int codepage = AreFileApisANSI() ? CP_ACP : CP_OEMCP; len=MultiByteToWideChar(codepage, 0, pszMbcs, -1, NULL,0); pwchar=new WCHAR[len]; if(pwchar!=0) { len = MultiByteToWideChar(codepage, 0, pszMbcs, -1, pwchar, len); if( len!=0 ) { len = WideCharToMultiByte(CP_UTF8, 0, pwchar, -1, 0, 0, 0, 0); pchar=new CHAR[len]; if(pchar!=0) { len = WideCharToMultiByte(CP_UTF8, 0, pwchar, -1, pchar, len,0, 0); if(len!=0) { str = pchar; } delete pchar; } delete pwchar; } } return str; }
要测试这些接口,为此我写了一个测试工程,是读取一个xml文件把里面的字符进行转换的,测试工程的代码下载地址如下:
unicode字符和多字节字符的相互转换接口及测试工程