Subject: Patch: Encoding Manager enhancements
From: Andrew Dunbar (hippietrail@yahoo.com)
Date: Mon Jun 11 2001 - 05:40:49 CDT
This patch detects the correct names for UCS-2 little endian and big
endian and provides functions to get these names when needed instead
of hard-coding names. Different iconv implementations know different
names. Specifically, the libiconv we are using has the wrong names.
I've also provided a getNativeUnicodeEncodingName(). This will
return UTF-8 on *nix, Be, QNX, UCS-2LE on Windows, and whatever
Macs use on Macs (:
Andrew Dunbar.
-- http://linguaphile.sourceforge.net
Index: src/af/ev/win/Makefile =================================================================== RCS file: /cvsroot/abi/src/af/ev/win/Makefile,v retrieving revision 1.12 diff -u -r1.12 Makefile --- src/af/ev/win/Makefile 2000/02/14 11:24:49 1.12 +++ src/af/ev/win/Makefile 2001/06/11 10:21:05 @@ -28,6 +28,8 @@ ev_Win32Toolbar.cpp \ ev_Win32Toolbar_ViewListener.cpp +INCLUDES= -I$(ABI_XX_ROOT)/../libiconv/include + TARGETS= $(OBJS) include $(ABI_ROOT)/src/config/abi_rules.mk Index: src/af/ev/win/ev_Win32Keyboard.cpp =================================================================== RCS file: /cvsroot/abi/src/af/ev/win/ev_Win32Keyboard.cpp,v retrieving revision 1.25 diff -u -r1.25 ev_Win32Keyboard.cpp --- src/af/ev/win/ev_Win32Keyboard.cpp 2001/06/07 15:51:42 1.25 +++ src/af/ev/win/ev_Win32Keyboard.cpp 2001/06/11 10:21:12 @@ -31,6 +31,7 @@ #include "ev_EditMethod.h" #include "ev_EditBinding.h" #include "ev_EditEventMapper.h" +#include "xap_EncodingManager.h" #ifdef UT_DEBUG #define MSG(keydata,args) do { if ( ! (keyData & 0x40000000)) UT_DEBUGMSG args ; } while (0) @@ -193,11 +194,12 @@ if( GetLocaleInfo( LOWORD( hKeyboardLayout ), LOCALE_IDEFAULTANSICODEPAGE, &szCodePage[2], sizeof( szCodePage ) / sizeof( szCodePage[0] ) - 2 ) ) { // Unicode locale? - // TODO Does NT use UCS-2-BE internally on non-Intel CPUs? if( !strcmp( szCodePage, "CP0" ) ) { + const char *szUCS2Name = XAP_EncodingManager::get_instance()->getNativeUnicodeEncodingName(); + UT_ASSERT(szUCS2Name); m_bIsUnicodeInput = true; - strcpy( szCodePage, "UCS-2-LE" ); + strcpy( szCodePage, szUCS2Name ); } else m_bIsUnicodeInput = false; Index: src/af/util/xp/ut_Encoding.cpp =================================================================== RCS file: /cvsroot/abi/src/af/util/xp/ut_Encoding.cpp,v retrieving revision 1.1 diff -u -r1.1 ut_Encoding.cpp --- src/af/util/xp/ut_Encoding.cpp 2001/06/07 15:51:59 1.1 +++ src/af/util/xp/ut_Encoding.cpp 2001/06/11 10:21:16 @@ -167,8 +167,10 @@ {enc_tis620, NULL, XAP_STRING_ID_ENC_21}, {enc_ucs2be, NULL, XAP_STRING_ID_ENC_53}, {enc_ucs2le, NULL, XAP_STRING_ID_ENC_54}, + // UCS-4 be and le {enc_utf7, NULL, XAP_STRING_ID_ENC_51}, {enc_utf8, NULL, XAP_STRING_ID_ENC_52}, + // UTF-16, UTF-32 be and le {enc_viscii, NULL, XAP_STRING_ID_ENC_24}, }; Index: src/af/xap/win/xap_Win32EncodingManager.cpp =================================================================== RCS file: /cvsroot/abi/src/af/xap/win/xap_Win32EncodingManager.cpp,v retrieving revision 1.4 diff -u -r1.4 xap_Win32EncodingManager.cpp --- src/af/xap/win/xap_Win32EncodingManager.cpp 2001/06/07 15:52:18 1.4 +++ src/af/xap/win/xap_Win32EncodingManager.cpp 2001/06/11 10:21:22 @@ -28,11 +28,14 @@ XAP_Win32EncodingManager::~XAP_Win32EncodingManager() {} -static const char* NativeEncodingName, *LanguageISOName, *LanguageISOTerritory; +static const char* NativeEncodingName, *NativeUnicodeEncodingName, *LanguageISOName, *LanguageISOTerritory; const char* XAP_Win32EncodingManager::getNativeEncodingName() const { return NativeEncodingName; }; +const char* XAP_Win32EncodingManager::getNativeUnicodeEncodingName() const +{ return NativeUnicodeEncodingName; }; + const char* XAP_Win32EncodingManager::getLanguageISOName() const { return LanguageISOName; }; @@ -52,14 +55,17 @@ LanguageISOName = "en"; LanguageISOTerritory = NULL; + // Unicode Encoding Name + // TODO Does NT use UCS-2BE internally on non-Intel CPUs? + NativeUnicodeEncodingName = getUCS2LEName(); + // Encoding if (GetLocaleInfo(LOCALE_USER_DEFAULT,LOCALE_IDEFAULTANSICODEPAGE,szLocaleInfo,sizeof(szLocaleInfo)/sizeof(szLocaleInfo[0]))) { // Windows Unicode locale? if (!strcmp(szLocaleInfo,"0")) { - // TODO Does NT use UCS-2-BE internally on non-Intel CPUs? - NativeEncodingName = "UCS-2-LE"; + NativeEncodingName = NativeUnicodeEncodingName; m_bIsUnicodeLocale = true; } else Index: src/af/xap/win/xap_Win32EncodingManager.h =================================================================== RCS file: /cvsroot/abi/src/af/xap/win/xap_Win32EncodingManager.h,v retrieving revision 1.1 diff -u -r1.1 xap_Win32EncodingManager.h --- src/af/xap/win/xap_Win32EncodingManager.h 2001/05/25 18:12:44 1.1 +++ src/af/xap/win/xap_Win32EncodingManager.h 2001/06/11 10:21:22 @@ -15,6 +15,7 @@ public: const char* getNativeEncodingName() const; + const char* getNativeUnicodeEncodingName() const; inline virtual bool isUnicodeLocale() const {return m_bIsUnicodeLocale;} const char* getLanguageISOName() const; const char* getLanguageISOTerritory() const; Index: src/af/xap/xp/xap_EncodingManager.cpp =================================================================== RCS file: /cvsroot/abi/src/af/xap/xp/xap_EncodingManager.cpp,v retrieving revision 1.34 diff -u -r1.34 xap_EncodingManager.cpp --- src/af/xap/xp/xap_EncodingManager.cpp 2001/06/07 15:52:24 1.34 +++ src/af/xap/xp/xap_EncodingManager.cpp 2001/06/11 10:21:39 @@ -40,6 +40,23 @@ return "ISO-8859-1"; /* this will definitely work*/ } +const char* XAP_EncodingManager::getNativeUnicodeEncodingName() const +{ + return "UTF-8"; /* this will definitely work*/ +} + +static const char* UCS2BEName, *UCS2LEName; + +const char* XAP_EncodingManager::getUCS2BEName() const +{ + return UCS2BEName; +} + +const char* XAP_EncodingManager::getUCS2LEName() const +{ + return UCS2LEName; +} + #define VALID_ICONV_HANDLE(i) ((i) != (iconv_t)-1) XAP_EncodingManager::~XAP_EncodingManager() { @@ -754,6 +1757,51 @@ *terrname = getLanguageISOTerritory(), *enc = getNativeEncodingName(); + // UCS-2 Encoding Names + static const char * (szUCS2BENames[]) = { + "UCS-2BE", // preferred + "UCS-2-BE", // older libiconv + "UNICODEBIG", // older glibc + "UNICODE-1-1", // in libiconv source + "UTF-16BE", // superset + "UTF-16-BE", // my guess + 0 }; + static const char * (szUCS2LENames[]) = { + "UCS-2LE", // preferred + "UCS-2-LE", // older libiconv + "UNICODELITTLE", // older glibc + "UTF-16LE", // superset + "UTF-16-LE", // my guess + 0 }; + const char ** p; + iconv_t iconv_handle; + for (p = szUCS2BENames; *p; ++p) + { + if ((iconv_handle = iconv_open(*p,*p)) != (iconv_t)-1) + { + iconv_close(iconv_handle); + UCS2BEName = *p; + break; + } + } + for (p = szUCS2LENames; *p; ++p) + { + if ((iconv_handle = iconv_open(*p,*p)) != (iconv_t)-1) + { + iconv_close(iconv_handle); + UCS2LEName = *p; + break; + } + } + if (UCS2BEName) + UT_DEBUGMSG(("This iconv supports UCS-2BE as \"%s\"\n",UCS2BEName)); + else + UT_DEBUGMSG(("This iconv does not support UCS-2BE!\n")); + if (UCS2LEName) + UT_DEBUGMSG(("This iconv supports UCS-2LE as \"%s\"\n",UCS2LEName)); + else + UT_DEBUGMSG(("This iconv does not support UCS-2LE!\n")); + if(!strcmp(enc, "UTF-8") || !strcmp(enc, "UTF8") || !strcmp(enc, "utf-8") || !strcmp(enc, "utf8")) m_bIsUnicodeLocale = true; else Index: src/af/xap/xp/xap_EncodingManager.h =================================================================== RCS file: /cvsroot/abi/src/af/xap/xp/xap_EncodingManager.h,v retrieving revision 1.20 diff -u -r1.20 xap_EncodingManager.h --- src/af/xap/xp/xap_EncodingManager.h 2001/06/07 15:52:24 1.20 +++ src/af/xap/xp/xap_EncodingManager.h 2001/06/11 10:21:43 @@ -52,9 +52,26 @@ /* this shouldn't return NULL. Don't free or write to returned string. The string should be uppercased (extra font tarballs assume this). - TODO isn't iconv case sensitive? Mac encoding names are mixed case! */ virtual const char* getNativeEncodingName() const; + + /* + this can return NULL. Don't free or write to returned string. + The string should be uppercased (extra font tarballs assume this). + */ + virtual const char* getNativeUnicodeEncodingName() const; + + /* + this can return NULL. Don't free or write to returned string. + The string should be uppercased (extra font tarballs assume this). + */ + virtual const char* getUCS2BEName() const; + + /* + this can return NULL. Don't free or write to returned string. + The string should be uppercased (extra font tarballs assume this). + */ + virtual const char* getUCS2LEName() const; /* This should return true for any Unicode locale: Index: src/wp/impexp/xp/ie_imp_Text.cpp =================================================================== RCS file: /cvsroot/abi/src/wp/impexp/xp/ie_imp_Text.cpp,v retrieving revision 1.26 diff -u -r1.26 ie_imp_Text.cpp --- src/wp/impexp/xp/ie_imp_Text.cpp 2001/06/07 15:52:42 1.26 +++ src/wp/impexp/xp/ie_imp_Text.cpp 2001/06/11 10:22:44 @@ -355,11 +363,10 @@ eUcs2 = IE_Imp_Text_Sniffer::_recognizeUCS2(szBuf, iNumbytes, true); - // TODO Old libiconv uses UCS-2-BE, new uses UCS-2BE if (eUcs2 == IE_Imp_Text_Sniffer::UE_BigEnd) - _setEncoding("UCS-2-BE"); + _setEncoding(XAP_EncodingManager::get_instance()->getUCS2BEName()); else if (eUcs2 == IE_Imp_Text_Sniffer::UE_LittleEnd) - _setEncoding("UCS-2-LE"); + _setEncoding(XAP_EncodingManager::get_instance()->getUCS2LEName()); } return UT_OK; @@ -558,11 +566,10 @@ // Attempt to guess whether we're pasting 8 bit or unicode text IE_Imp_Text_Sniffer::UCS2_Endian eUcs2 = IE_Imp_Text_Sniffer::_recognizeUCS2((const char *)pData, lenData, true); - // TODO Old libiconv uses UCS-2-BE, new uses UCS-2BE if (eUcs2 == IE_Imp_Text_Sniffer::UE_BigEnd) - _setEncoding("UCS-2-BE"); + _setEncoding(XAP_EncodingManager::get_instance()->getUCS2BEName()); else if (eUcs2 == IE_Imp_Text_Sniffer::UE_LittleEnd) - _setEncoding("UCS-2-LE"); + _setEncoding(XAP_EncodingManager::get_instance()->getUCS2LEName()); else _setEncoding(XAP_EncodingManager::get_instance()->getNativeEncodingName());
_________________________________________________________ Do You Yahoo!? Get your free @yahoo.com address at http://mail.yahoo.com
This archive was generated by hypermail 2b25 : Mon Jun 11 2001 - 05:38:33 CDT