used uint16 instead of CharCodePage at CodePage.h/Endian.h

This commit is contained in:
2022-06-13 18:15:31 +08:00
parent fb765038f0
commit 1874288c2a
4 changed files with 104 additions and 90 deletions

View File

@@ -7,52 +7,58 @@ namespace hgl
{ {
struct CodePageAndCharSet struct CodePageAndCharSet
{ {
CharCodePage codepage; uint16 codepage;
CharSetName charset; CharSetName charset;
}; };
#define HGL_CODE_PAGE_AND_CHAR_SET(codepage,charset) {uint16(CharCodePage::codepage),charset}
constexpr struct CodePageAndCharSet CodePage2CharSet[]= constexpr struct CodePageAndCharSet CodePage2CharSet[]=
{ {
{CharCodePage::NONE, "us-ascii" },
{CharCodePage::GBK, "gbk" }, HGL_CODE_PAGE_AND_CHAR_SET(NONE, "us-ascii" ),
{CharCodePage::Big5, "big5" },
{CharCodePage::GB2312, "gb2312" },
{CharCodePage::GB18030, "gb18030" },
{CharCodePage::ShiftJIS, "shift-jis" }, HGL_CODE_PAGE_AND_CHAR_SET(GBK, "gbk" ),
{CharCodePage::JISX, "iso-2022-jp" }, HGL_CODE_PAGE_AND_CHAR_SET(Big5, "big5" ),
HGL_CODE_PAGE_AND_CHAR_SET(GB2312, "gb2312" ),
HGL_CODE_PAGE_AND_CHAR_SET(GB18030, "gb18030" ),
HGL_CODE_PAGE_AND_CHAR_SET(ShiftJIS, "shift-jis" ),
HGL_CODE_PAGE_AND_CHAR_SET(EUC_JP, "EUC-JP" ),
HGL_CODE_PAGE_AND_CHAR_SET(ISO2022JP, "iso-2022-jp" ),
HGL_CODE_PAGE_AND_CHAR_SET(csISO2022JP, "csISO2022JP" ),
HGL_CODE_PAGE_AND_CHAR_SET(JISX, "iso-2022-jp" ),
{CharCodePage::Korean, "ks_c_5601-1987"}, HGL_CODE_PAGE_AND_CHAR_SET(Korean, "ks_c_5601-1987"),
{CharCodePage::MacJanpan, "x-mac-japanese" }, HGL_CODE_PAGE_AND_CHAR_SET(MacJanpan, "x-mac-japanese" ),
{CharCodePage::MacTraditionalChinese, "x-mac-chinesetrad" }, HGL_CODE_PAGE_AND_CHAR_SET(MacTraditionalChinese, "x-mac-chinesetrad" ),
{CharCodePage::MacSimplifiedChinese, "x-mac-chinesesimp" }, HGL_CODE_PAGE_AND_CHAR_SET(MacSimplifiedChinese, "x-mac-chinesesimp" ),
{CharCodePage::ISO_8859_1, "iso-8859-1"}, HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_1, "iso-8859-1"),
{CharCodePage::ISO_8859_2, "iso-8859-2"}, HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_2, "iso-8859-2"),
{CharCodePage::ISO_8859_3, "iso-8859-3"}, HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_3, "iso-8859-3"),
{CharCodePage::ISO_8859_4, "iso-8859-4"}, HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_4, "iso-8859-4"),
{CharCodePage::ISO_8859_5, "iso-8859-5"}, HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_5, "iso-8859-5"),
{CharCodePage::ISO_8859_6, "iso-8859-6"}, HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_6, "iso-8859-6"),
{CharCodePage::ISO_8859_7, "iso-8859-7"}, HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_7, "iso-8859-7"),
{CharCodePage::ISO_8859_8, "iso-8859-8"}, HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_8, "iso-8859-8"),
{CharCodePage::ISO_8859_9, "iso-8859-9"}, HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_9, "iso-8859-9"),
{CharCodePage::ISO_8859_13, "iso-8859-13"}, HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_13, "iso-8859-13"),
{CharCodePage::ISO_8859_15, "iso-8859-15"}, HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_15, "iso-8859-15"),
{CharCodePage::UTF7, "utf-7" }, HGL_CODE_PAGE_AND_CHAR_SET(UTF7, "utf-7" ),
{CharCodePage::UTF8, "utf-8" }, HGL_CODE_PAGE_AND_CHAR_SET(UTF8, "utf-8" ),
{CharCodePage::UTF16LE, "utf-16le" }, HGL_CODE_PAGE_AND_CHAR_SET(UTF16LE, "utf-16le" ),
{CharCodePage::UTF16BE, "utf-16be" }, HGL_CODE_PAGE_AND_CHAR_SET(UTF16BE, "utf-16be" ),
{CharCodePage::UTF32LE, "utf-32le" }, HGL_CODE_PAGE_AND_CHAR_SET(UTF32LE, "utf-32le" ),
{CharCodePage::UTF32BE, "utf-32be" }, HGL_CODE_PAGE_AND_CHAR_SET(UTF32BE, "utf-32be" ),
};//const struct };//const struct
constexpr int CharSetCount=sizeof(CodePage2CharSet)/sizeof(CodePageAndCharSet); constexpr int CharSetCount=sizeof(CodePage2CharSet)/sizeof(CodePageAndCharSet);
inline const char *FindCharSet(CharCodePage ccp) inline const char *FindCharSet(uint16 ccp)
{ {
for(int i=0;i<CharSetCount;i++) for(int i=0;i<CharSetCount;i++)
if(CodePage2CharSet[i].codepage==ccp) if(CodePage2CharSet[i].codepage==ccp)
@@ -63,84 +69,87 @@ namespace hgl
constexpr struct CodePageAndCharSet CodeSet2CharPage[]= constexpr struct CodePageAndCharSet CodeSet2CharPage[]=
{ {
{CharCodePage::NONE, "us-ascii" }, HGL_CODE_PAGE_AND_CHAR_SET(NONE, "us-ascii" ),
HGL_CODE_PAGE_AND_CHAR_SET(GBK, "gbk" ),
HGL_CODE_PAGE_AND_CHAR_SET(Big5, "big5" ),
HGL_CODE_PAGE_AND_CHAR_SET(Big5, "bigfive" ),
{CharCodePage::GBK, "gbk" }, HGL_CODE_PAGE_AND_CHAR_SET(GB2312, "gb2312" ),
HGL_CODE_PAGE_AND_CHAR_SET(GB18030, "gb18030" ),
HGL_CODE_PAGE_AND_CHAR_SET(ShiftJIS, "shift_jis" ),
HGL_CODE_PAGE_AND_CHAR_SET(EUC_JP, "EUC-JP" ),
HGL_CODE_PAGE_AND_CHAR_SET(ISO2022JP, "iso-2022-jp" ),
HGL_CODE_PAGE_AND_CHAR_SET(csISO2022JP, "csISO2022JP" ),
HGL_CODE_PAGE_AND_CHAR_SET(JISX, "iso-2022-jp" ),
{CharCodePage::Big5, "big5" }, HGL_CODE_PAGE_AND_CHAR_SET(Korean, "ks_c_5601-1987"),
{CharCodePage::Big5, "bigfive" },
{CharCodePage::GB2312, "gb2312" }, HGL_CODE_PAGE_AND_CHAR_SET(MacJanpan, "x-mac-japanese" ),
{CharCodePage::GB18030, "gb18030" }, HGL_CODE_PAGE_AND_CHAR_SET(MacTraditionalChinese, "x-mac-chinesetrad" ),
HGL_CODE_PAGE_AND_CHAR_SET(MacSimplifiedChinese, "x-mac-chinesesimp" ),
{CharCodePage::ShiftJIS, "shift_jis" },
{CharCodePage::JISX, "iso-2022-jp" },
{CharCodePage::Korean, "ks_c_5601-1987"},
{CharCodePage::MacJanpan, "x-mac-japanese" },
{CharCodePage::MacTraditionalChinese, "x-mac-chinesetrad" },
{CharCodePage::MacSimplifiedChinese, "x-mac-chinesesimp" },
{CharCodePage::ISO_8859_1, "iso-8859-1"}, HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_1, "iso-8859-1"),
{CharCodePage::ISO_8859_2, "iso-8859-2"}, HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_2, "iso-8859-2"),
{CharCodePage::ISO_8859_3, "iso-8859-3"}, HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_3, "iso-8859-3"),
{CharCodePage::ISO_8859_4, "iso-8859-4"}, HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_4, "iso-8859-4"),
{CharCodePage::ISO_8859_5, "iso-8859-5"}, HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_5, "iso-8859-5"),
{CharCodePage::ISO_8859_6, "iso-8859-6"}, HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_6, "iso-8859-6"),
{CharCodePage::ISO_8859_7, "iso-8859-7"}, HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_7, "iso-8859-7"),
{CharCodePage::ISO_8859_8, "iso-8859-8"}, HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_8, "iso-8859-8"),
{CharCodePage::ISO_8859_9, "iso-8859-9"}, HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_9, "iso-8859-9"),
{CharCodePage::ISO_8859_13, "iso-8859-13"}, HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_13, "iso-8859-13"),
{CharCodePage::ISO_8859_15, "iso-8859-15"}, HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_15, "iso-8859-15"),
{CharCodePage::UTF7, "utf-7" }, HGL_CODE_PAGE_AND_CHAR_SET(UTF7, "utf-7" ),
{CharCodePage::UTF8, "utf-8" }, HGL_CODE_PAGE_AND_CHAR_SET(UTF8, "utf-8" ),
{CharCodePage::UTF16LE, "utf-16le" }, HGL_CODE_PAGE_AND_CHAR_SET(UTF16LE, "utf-16le" ),
{CharCodePage::UTF16BE, "utf-16be" }, HGL_CODE_PAGE_AND_CHAR_SET(UTF16BE, "utf-16be" ),
{CharCodePage::UTF32LE, "utf-32le" }, HGL_CODE_PAGE_AND_CHAR_SET(UTF32LE, "utf-32le" ),
{CharCodePage::UTF32BE, "utf-32be" }, HGL_CODE_PAGE_AND_CHAR_SET(UTF32BE, "utf-32be" ),
{CharCodePage::UTF16LE, "utf-16" }, HGL_CODE_PAGE_AND_CHAR_SET(UTF16LE, "utf-16" ),
{CharCodePage::UTF16BE, "unicodeFFFE"}, HGL_CODE_PAGE_AND_CHAR_SET(UTF16BE, "unicodeFFFE"),
{CharCodePage::UTF16LE, "ucs-2le" }, HGL_CODE_PAGE_AND_CHAR_SET(UTF16LE, "ucs-2le" ),
{CharCodePage::UTF16BE, "ucs-2be" }, HGL_CODE_PAGE_AND_CHAR_SET(UTF16BE, "ucs-2be" ),
{CharCodePage::UTF32LE, "ucs-4le" }, HGL_CODE_PAGE_AND_CHAR_SET(UTF32LE, "ucs-4le" ),
{CharCodePage::UTF32BE, "ucs-4be" } HGL_CODE_PAGE_AND_CHAR_SET(UTF32BE, "ucs-4be" )
};//const struct CharSet Characters };//const struct CharSet Characters
constexpr int CharPageCount=sizeof(CodeSet2CharPage)/sizeof(CodePageAndCharSet); constexpr int CharPageCount=sizeof(CodeSet2CharPage)/sizeof(CodePageAndCharSet);
inline CharCodePage FindCodePage(const u8char *char_set) inline uint16 FindCodePage(const u8char *char_set)
{ {
for(int i=0;i<CharPageCount;i++) for(int i=0;i<CharPageCount;i++)
if(!charset_cmp(CodePage2CharSet[i].charset,char_set)) if(!charset_cmp(CodePage2CharSet[i].charset,char_set))
return CodePage2CharSet[i].codepage; return CodePage2CharSet[i].codepage;
return CharCodePage::NONE; return (uint16)CharCodePage::NONE;
} }
struct CharSet struct CharSet
{ {
CharCodePage codepage; uint16 codepage;
CharSetName charset; CharSetName charset;
public: public:
CharSet() CharSet()
{ {
codepage=CharCodePage::NONE; codepage=0;
hgl::strcpy(charset,CHAR_SET_NAME_MAX_LENGTH,"us-ascii"); hgl::strcpy(charset,CHAR_SET_NAME_MAX_LENGTH,"us-ascii");
} }
CharSet(CharCodePage ccp,const char *cs) CharSet(uint16 ccp,const char *cs)
{ {
codepage=ccp; codepage=ccp;
hgl::strcpy(charset,CHAR_SET_NAME_MAX_LENGTH,cs); hgl::strcpy(charset,CHAR_SET_NAME_MAX_LENGTH,cs);
} }
CharSet(CharCodePage); CharSet(uint16);
CharSet(const u8char *); CharSet(const u8char *);
CharSet(const CodePageAndCharSet &cs) CharSet(const CodePageAndCharSet &cs)
@@ -153,7 +162,7 @@ namespace hgl
CompOperator(const CharSet &,_Comp) CompOperator(const CharSet &,_Comp)
};//struct CharacterSet };//struct CharacterSet
inline CharSet::CharSet(CharCodePage ccp) inline CharSet::CharSet(uint16 ccp)
{ {
codepage=ccp; codepage=ccp;
hgl::strcpy(charset,CHAR_SET_NAME_MAX_LENGTH,FindCharSet(ccp)); hgl::strcpy(charset,CHAR_SET_NAME_MAX_LENGTH,FindCharSet(ccp));

View File

@@ -24,6 +24,9 @@ namespace hgl
//日文 //日文
ShiftJIS =932, ///<日文ShiftJIS ShiftJIS =932, ///<日文ShiftJIS
EUC_JP =20932, ///<日文JIS 0208-1990 and 0212-1990
ISO2022JP =50220, ///<ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS)
csISO2022JP =50221, ///<ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana)
JISX =50222, ///<日文JIS X/ISO 2022 JISX =50222, ///<日文JIS X/ISO 2022
//韩文 //韩文
@@ -105,7 +108,7 @@ namespace hgl
ByteOrderMask bom; ///<字节序枚举 ByteOrderMask bom; ///<字节序枚举
const CharSetName *char_set;///<字符集名称 const CharSetName *char_set;///<字符集名称
CharCodePage code_page; ///<代码页 uint16 code_page; ///<代码页
}; };
/** /**
@@ -113,11 +116,11 @@ namespace hgl
*/ */
constexpr BOMFileHeader BOMData[size_t(ByteOrderMask::RANGE_SIZE)]= constexpr BOMFileHeader BOMData[size_t(ByteOrderMask::RANGE_SIZE)]=
{ {
{3,{0xEF,0xBB,0xBF} ,ByteOrderMask::UTF8, &utf8_charset ,CharCodePage::UTF8 }, {3,{0xEF,0xBB,0xBF} ,ByteOrderMask::UTF8, &utf8_charset ,(uint16)CharCodePage::UTF8 },
{2,{0xFF,0xFE} ,ByteOrderMask::UTF16LE,&utf16le_charset ,CharCodePage::UTF16LE }, {2,{0xFF,0xFE} ,ByteOrderMask::UTF16LE,&utf16le_charset ,(uint16)CharCodePage::UTF16LE },
{2,{0xFE,0xFF} ,ByteOrderMask::UTF16BE,&utf16be_charset ,CharCodePage::UTF16BE }, {2,{0xFE,0xFF} ,ByteOrderMask::UTF16BE,&utf16be_charset ,(uint16)CharCodePage::UTF16BE },
{4,{0xFF,0xFE,0x00,0x00},ByteOrderMask::UTF32LE,&utf32le_charset ,CharCodePage::UTF32LE }, {4,{0xFF,0xFE,0x00,0x00},ByteOrderMask::UTF32LE,&utf32le_charset ,(uint16)CharCodePage::UTF32LE },
{4,{0x00,0x00,0xFE,0xFF},ByteOrderMask::UTF32BE,&utf32be_charset ,CharCodePage::UTF32BE } {4,{0x00,0x00,0xFE,0xFF},ByteOrderMask::UTF32BE,&utf32be_charset ,(uint16)CharCodePage::UTF32BE }
}; };
inline ByteOrderMask CheckBOM(const void *data) inline ByteOrderMask CheckBOM(const void *data)

View File

@@ -5,13 +5,15 @@
#include<math.h> #include<math.h>
namespace hgl namespace hgl
{ {
#define HGL_OFFICAL_WEB "www.hyzgame.com" #define HGL_OFFICAL_WEB "www.hyzgame.com"
#define HGL_OFFICAL_WEB_U8 u8"www.hyzgame.com" #define HGL_OFFICAL_WEB_U8 U8_TEXT("www.hyzgame.com")
#define HGL_OFFICAL_WEB_URL "http://www.hyzgame.com" #define HGL_OFFICAL_WEB_OS OS_TEXT("www.hyzgame.com")
#define HGL_OFFICAL_WEB_URL_u8 u8"http://www.hyzgame.com" #define HGL_OFFICAL_WEB_URL "http://www.hyzgame.com"
#define HGL_OFFICAL_WEB_URL_u8 U8_TEXT("http://www.hyzgame.com")
#define HGL_OFFICAL_WEB_URL_OS OS_TEXT("http://www.hyzgame.com")
#define HGL_COPYRIGHT_STRING "(C)Copyright 2022 www.hyzgame.com" #define HGL_COPYRIGHT_STRING "(C)Copyright 2022 www.hyzgame.com"
#define HGL_COPYRIGHT_STRING_U8 u8"(C)Copyright 2022 www.hyzgame.com" #define HGL_COPYRIGHT_STRING_U8 U8_TEXT("(C)Copyright 2022 www.hyzgame.com")
#define HGL_COPYRIGHT_STRING_OS OS_TEXT("(C)Copyright 2022 www.hyzgame.com") #define HGL_COPYRIGHT_STRING_OS OS_TEXT("(C)Copyright 2022 www.hyzgame.com")
#define ENUM_CLASS_RANGE(begin,end) BEGIN_RANGE=begin,END_RANGE=end,RANGE_SIZE=(END_RANGE-BEGIN_RANGE)+1 #define ENUM_CLASS_RANGE(begin,end) BEGIN_RANGE=begin,END_RANGE=end,RANGE_SIZE=(END_RANGE-BEGIN_RANGE)+1

View File

@@ -4,9 +4,9 @@ namespace hgl
{ {
CharSet DefaultCharSet(); CharSet DefaultCharSet();
CharSet UTF8CharSet (CharCodePage::UTF8, utf8_charset ); CharSet UTF8CharSet ((uint16)CharCodePage::UTF8, utf8_charset );
CharSet UTF16LECharSet (CharCodePage::UTF16LE,utf16le_charset ); CharSet UTF16LECharSet ((uint16)CharCodePage::UTF16LE,utf16le_charset );
CharSet UTF16BECharSet (CharCodePage::UTF16BE,utf16be_charset ); CharSet UTF16BECharSet ((uint16)CharCodePage::UTF16BE,utf16be_charset );
int u16_to_u8(u8char *dst,int dst_size,const u16char *src,const int src_size) int u16_to_u8(u8char *dst,int dst_size,const u16char *src,const int src_size)
{ {