used uint16 instead of CharCodePage at CodePage.h/Endian.h

This commit is contained in:
2022-06-13 18:15:31 +08:00
parent fb765038f0
commit 1874288c2a
4 changed files with 104 additions and 90 deletions

View File

@@ -7,52 +7,58 @@ namespace hgl
{
struct CodePageAndCharSet
{
CharCodePage codepage;
uint16 codepage;
CharSetName charset;
};
#define HGL_CODE_PAGE_AND_CHAR_SET(codepage,charset) {uint16(CharCodePage::codepage),charset}
constexpr struct CodePageAndCharSet CodePage2CharSet[]=
{
{CharCodePage::NONE, "us-ascii" },
{CharCodePage::GBK, "gbk" },
{CharCodePage::Big5, "big5" },
{CharCodePage::GB2312, "gb2312" },
{CharCodePage::GB18030, "gb18030" },
HGL_CODE_PAGE_AND_CHAR_SET(NONE, "us-ascii" ),
{CharCodePage::ShiftJIS, "shift-jis" },
{CharCodePage::JISX, "iso-2022-jp" },
HGL_CODE_PAGE_AND_CHAR_SET(GBK, "gbk" ),
HGL_CODE_PAGE_AND_CHAR_SET(Big5, "big5" ),
HGL_CODE_PAGE_AND_CHAR_SET(GB2312, "gb2312" ),
HGL_CODE_PAGE_AND_CHAR_SET(GB18030, "gb18030" ),
HGL_CODE_PAGE_AND_CHAR_SET(ShiftJIS, "shift-jis" ),
HGL_CODE_PAGE_AND_CHAR_SET(EUC_JP, "EUC-JP" ),
HGL_CODE_PAGE_AND_CHAR_SET(ISO2022JP, "iso-2022-jp" ),
HGL_CODE_PAGE_AND_CHAR_SET(csISO2022JP, "csISO2022JP" ),
HGL_CODE_PAGE_AND_CHAR_SET(JISX, "iso-2022-jp" ),
{CharCodePage::Korean, "ks_c_5601-1987"},
HGL_CODE_PAGE_AND_CHAR_SET(Korean, "ks_c_5601-1987"),
{CharCodePage::MacJanpan, "x-mac-japanese" },
{CharCodePage::MacTraditionalChinese, "x-mac-chinesetrad" },
{CharCodePage::MacSimplifiedChinese, "x-mac-chinesesimp" },
HGL_CODE_PAGE_AND_CHAR_SET(MacJanpan, "x-mac-japanese" ),
HGL_CODE_PAGE_AND_CHAR_SET(MacTraditionalChinese, "x-mac-chinesetrad" ),
HGL_CODE_PAGE_AND_CHAR_SET(MacSimplifiedChinese, "x-mac-chinesesimp" ),
{CharCodePage::ISO_8859_1, "iso-8859-1"},
{CharCodePage::ISO_8859_2, "iso-8859-2"},
{CharCodePage::ISO_8859_3, "iso-8859-3"},
{CharCodePage::ISO_8859_4, "iso-8859-4"},
{CharCodePage::ISO_8859_5, "iso-8859-5"},
{CharCodePage::ISO_8859_6, "iso-8859-6"},
{CharCodePage::ISO_8859_7, "iso-8859-7"},
{CharCodePage::ISO_8859_8, "iso-8859-8"},
{CharCodePage::ISO_8859_9, "iso-8859-9"},
{CharCodePage::ISO_8859_13, "iso-8859-13"},
{CharCodePage::ISO_8859_15, "iso-8859-15"},
HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_1, "iso-8859-1"),
HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_2, "iso-8859-2"),
HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_3, "iso-8859-3"),
HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_4, "iso-8859-4"),
HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_5, "iso-8859-5"),
HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_6, "iso-8859-6"),
HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_7, "iso-8859-7"),
HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_8, "iso-8859-8"),
HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_9, "iso-8859-9"),
HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_13, "iso-8859-13"),
HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_15, "iso-8859-15"),
{CharCodePage::UTF7, "utf-7" },
{CharCodePage::UTF8, "utf-8" },
HGL_CODE_PAGE_AND_CHAR_SET(UTF7, "utf-7" ),
HGL_CODE_PAGE_AND_CHAR_SET(UTF8, "utf-8" ),
{CharCodePage::UTF16LE, "utf-16le" },
{CharCodePage::UTF16BE, "utf-16be" },
{CharCodePage::UTF32LE, "utf-32le" },
{CharCodePage::UTF32BE, "utf-32be" },
HGL_CODE_PAGE_AND_CHAR_SET(UTF16LE, "utf-16le" ),
HGL_CODE_PAGE_AND_CHAR_SET(UTF16BE, "utf-16be" ),
HGL_CODE_PAGE_AND_CHAR_SET(UTF32LE, "utf-32le" ),
HGL_CODE_PAGE_AND_CHAR_SET(UTF32BE, "utf-32be" ),
};//const struct
constexpr int CharSetCount=sizeof(CodePage2CharSet)/sizeof(CodePageAndCharSet);
inline const char *FindCharSet(CharCodePage ccp)
inline const char *FindCharSet(uint16 ccp)
{
for(int i=0;i<CharSetCount;i++)
if(CodePage2CharSet[i].codepage==ccp)
@@ -63,84 +69,87 @@ namespace hgl
constexpr struct CodePageAndCharSet CodeSet2CharPage[]=
{
{CharCodePage::NONE, "us-ascii" },
HGL_CODE_PAGE_AND_CHAR_SET(NONE, "us-ascii" ),
HGL_CODE_PAGE_AND_CHAR_SET(GBK, "gbk" ),
HGL_CODE_PAGE_AND_CHAR_SET(Big5, "big5" ),
HGL_CODE_PAGE_AND_CHAR_SET(Big5, "bigfive" ),
{CharCodePage::GBK, "gbk" },
HGL_CODE_PAGE_AND_CHAR_SET(GB2312, "gb2312" ),
HGL_CODE_PAGE_AND_CHAR_SET(GB18030, "gb18030" ),
HGL_CODE_PAGE_AND_CHAR_SET(ShiftJIS, "shift_jis" ),
HGL_CODE_PAGE_AND_CHAR_SET(EUC_JP, "EUC-JP" ),
HGL_CODE_PAGE_AND_CHAR_SET(ISO2022JP, "iso-2022-jp" ),
HGL_CODE_PAGE_AND_CHAR_SET(csISO2022JP, "csISO2022JP" ),
HGL_CODE_PAGE_AND_CHAR_SET(JISX, "iso-2022-jp" ),
{CharCodePage::Big5, "big5" },
{CharCodePage::Big5, "bigfive" },
HGL_CODE_PAGE_AND_CHAR_SET(Korean, "ks_c_5601-1987"),
{CharCodePage::GB2312, "gb2312" },
{CharCodePage::GB18030, "gb18030" },
{CharCodePage::ShiftJIS, "shift_jis" },
{CharCodePage::JISX, "iso-2022-jp" },
{CharCodePage::Korean, "ks_c_5601-1987"},
{CharCodePage::MacJanpan, "x-mac-japanese" },
{CharCodePage::MacTraditionalChinese, "x-mac-chinesetrad" },
{CharCodePage::MacSimplifiedChinese, "x-mac-chinesesimp" },
HGL_CODE_PAGE_AND_CHAR_SET(MacJanpan, "x-mac-japanese" ),
HGL_CODE_PAGE_AND_CHAR_SET(MacTraditionalChinese, "x-mac-chinesetrad" ),
HGL_CODE_PAGE_AND_CHAR_SET(MacSimplifiedChinese, "x-mac-chinesesimp" ),
{CharCodePage::ISO_8859_1, "iso-8859-1"},
{CharCodePage::ISO_8859_2, "iso-8859-2"},
{CharCodePage::ISO_8859_3, "iso-8859-3"},
{CharCodePage::ISO_8859_4, "iso-8859-4"},
{CharCodePage::ISO_8859_5, "iso-8859-5"},
{CharCodePage::ISO_8859_6, "iso-8859-6"},
{CharCodePage::ISO_8859_7, "iso-8859-7"},
{CharCodePage::ISO_8859_8, "iso-8859-8"},
{CharCodePage::ISO_8859_9, "iso-8859-9"},
{CharCodePage::ISO_8859_13, "iso-8859-13"},
{CharCodePage::ISO_8859_15, "iso-8859-15"},
HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_1, "iso-8859-1"),
HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_2, "iso-8859-2"),
HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_3, "iso-8859-3"),
HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_4, "iso-8859-4"),
HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_5, "iso-8859-5"),
HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_6, "iso-8859-6"),
HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_7, "iso-8859-7"),
HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_8, "iso-8859-8"),
HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_9, "iso-8859-9"),
HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_13, "iso-8859-13"),
HGL_CODE_PAGE_AND_CHAR_SET(ISO_8859_15, "iso-8859-15"),
{CharCodePage::UTF7, "utf-7" },
{CharCodePage::UTF8, "utf-8" },
{CharCodePage::UTF16LE, "utf-16le" },
{CharCodePage::UTF16BE, "utf-16be" },
{CharCodePage::UTF32LE, "utf-32le" },
{CharCodePage::UTF32BE, "utf-32be" },
HGL_CODE_PAGE_AND_CHAR_SET(UTF7, "utf-7" ),
HGL_CODE_PAGE_AND_CHAR_SET(UTF8, "utf-8" ),
HGL_CODE_PAGE_AND_CHAR_SET(UTF16LE, "utf-16le" ),
HGL_CODE_PAGE_AND_CHAR_SET(UTF16BE, "utf-16be" ),
HGL_CODE_PAGE_AND_CHAR_SET(UTF32LE, "utf-32le" ),
HGL_CODE_PAGE_AND_CHAR_SET(UTF32BE, "utf-32be" ),
{CharCodePage::UTF16LE, "utf-16" },
{CharCodePage::UTF16BE, "unicodeFFFE"},
HGL_CODE_PAGE_AND_CHAR_SET(UTF16LE, "utf-16" ),
HGL_CODE_PAGE_AND_CHAR_SET(UTF16BE, "unicodeFFFE"),
{CharCodePage::UTF16LE, "ucs-2le" },
{CharCodePage::UTF16BE, "ucs-2be" },
{CharCodePage::UTF32LE, "ucs-4le" },
{CharCodePage::UTF32BE, "ucs-4be" }
HGL_CODE_PAGE_AND_CHAR_SET(UTF16LE, "ucs-2le" ),
HGL_CODE_PAGE_AND_CHAR_SET(UTF16BE, "ucs-2be" ),
HGL_CODE_PAGE_AND_CHAR_SET(UTF32LE, "ucs-4le" ),
HGL_CODE_PAGE_AND_CHAR_SET(UTF32BE, "ucs-4be" )
};//const struct CharSet Characters
constexpr int CharPageCount=sizeof(CodeSet2CharPage)/sizeof(CodePageAndCharSet);
inline CharCodePage FindCodePage(const u8char *char_set)
inline uint16 FindCodePage(const u8char *char_set)
{
for(int i=0;i<CharPageCount;i++)
if(!charset_cmp(CodePage2CharSet[i].charset,char_set))
return CodePage2CharSet[i].codepage;
return CharCodePage::NONE;
return (uint16)CharCodePage::NONE;
}
struct CharSet
{
CharCodePage codepage;
uint16 codepage;
CharSetName charset;
public:
CharSet()
{
codepage=CharCodePage::NONE;
codepage=0;
hgl::strcpy(charset,CHAR_SET_NAME_MAX_LENGTH,"us-ascii");
}
CharSet(CharCodePage ccp,const char *cs)
CharSet(uint16 ccp,const char *cs)
{
codepage=ccp;
hgl::strcpy(charset,CHAR_SET_NAME_MAX_LENGTH,cs);
}
CharSet(CharCodePage);
CharSet(uint16);
CharSet(const u8char *);
CharSet(const CodePageAndCharSet &cs)
@@ -153,7 +162,7 @@ namespace hgl
CompOperator(const CharSet &,_Comp)
};//struct CharacterSet
inline CharSet::CharSet(CharCodePage ccp)
inline CharSet::CharSet(uint16 ccp)
{
codepage=ccp;
hgl::strcpy(charset,CHAR_SET_NAME_MAX_LENGTH,FindCharSet(ccp));

View File

@@ -24,6 +24,9 @@ namespace hgl
//日文
ShiftJIS =932, ///<日文ShiftJIS
EUC_JP =20932, ///<日文JIS 0208-1990 and 0212-1990
ISO2022JP =50220, ///<ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS)
csISO2022JP =50221, ///<ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana)
JISX =50222, ///<日文JIS X/ISO 2022
//韩文
@@ -105,7 +108,7 @@ namespace hgl
ByteOrderMask bom; ///<字节序枚举
const CharSetName *char_set;///<字符集名称
CharCodePage code_page; ///<代码页
uint16 code_page; ///<代码页
};
/**
@@ -113,11 +116,11 @@ namespace hgl
*/
constexpr BOMFileHeader BOMData[size_t(ByteOrderMask::RANGE_SIZE)]=
{
{3,{0xEF,0xBB,0xBF} ,ByteOrderMask::UTF8, &utf8_charset ,CharCodePage::UTF8 },
{2,{0xFF,0xFE} ,ByteOrderMask::UTF16LE,&utf16le_charset ,CharCodePage::UTF16LE },
{2,{0xFE,0xFF} ,ByteOrderMask::UTF16BE,&utf16be_charset ,CharCodePage::UTF16BE },
{4,{0xFF,0xFE,0x00,0x00},ByteOrderMask::UTF32LE,&utf32le_charset ,CharCodePage::UTF32LE },
{4,{0x00,0x00,0xFE,0xFF},ByteOrderMask::UTF32BE,&utf32be_charset ,CharCodePage::UTF32BE }
{3,{0xEF,0xBB,0xBF} ,ByteOrderMask::UTF8, &utf8_charset ,(uint16)CharCodePage::UTF8 },
{2,{0xFF,0xFE} ,ByteOrderMask::UTF16LE,&utf16le_charset ,(uint16)CharCodePage::UTF16LE },
{2,{0xFE,0xFF} ,ByteOrderMask::UTF16BE,&utf16be_charset ,(uint16)CharCodePage::UTF16BE },
{4,{0xFF,0xFE,0x00,0x00},ByteOrderMask::UTF32LE,&utf32le_charset ,(uint16)CharCodePage::UTF32LE },
{4,{0x00,0x00,0xFE,0xFF},ByteOrderMask::UTF32BE,&utf32be_charset ,(uint16)CharCodePage::UTF32BE }
};
inline ByteOrderMask CheckBOM(const void *data)

View File

@@ -5,13 +5,15 @@
#include<math.h>
namespace hgl
{
#define HGL_OFFICAL_WEB "www.hyzgame.com"
#define HGL_OFFICAL_WEB_U8 u8"www.hyzgame.com"
#define HGL_OFFICAL_WEB_URL "http://www.hyzgame.com"
#define HGL_OFFICAL_WEB_URL_u8 u8"http://www.hyzgame.com"
#define HGL_OFFICAL_WEB "www.hyzgame.com"
#define HGL_OFFICAL_WEB_U8 U8_TEXT("www.hyzgame.com")
#define HGL_OFFICAL_WEB_OS OS_TEXT("www.hyzgame.com")
#define HGL_OFFICAL_WEB_URL "http://www.hyzgame.com"
#define HGL_OFFICAL_WEB_URL_u8 U8_TEXT("http://www.hyzgame.com")
#define HGL_OFFICAL_WEB_URL_OS OS_TEXT("http://www.hyzgame.com")
#define HGL_COPYRIGHT_STRING "(C)Copyright 2022 www.hyzgame.com"
#define HGL_COPYRIGHT_STRING_U8 u8"(C)Copyright 2022 www.hyzgame.com"
#define HGL_COPYRIGHT_STRING_U8 U8_TEXT("(C)Copyright 2022 www.hyzgame.com")
#define HGL_COPYRIGHT_STRING_OS OS_TEXT("(C)Copyright 2022 www.hyzgame.com")
#define ENUM_CLASS_RANGE(begin,end) BEGIN_RANGE=begin,END_RANGE=end,RANGE_SIZE=(END_RANGE-BEGIN_RANGE)+1

View File

@@ -4,9 +4,9 @@ namespace hgl
{
CharSet DefaultCharSet();
CharSet UTF8CharSet (CharCodePage::UTF8, utf8_charset );
CharSet UTF16LECharSet (CharCodePage::UTF16LE,utf16le_charset );
CharSet UTF16BECharSet (CharCodePage::UTF16BE,utf16be_charset );
CharSet UTF8CharSet ((uint16)CharCodePage::UTF8, utf8_charset );
CharSet UTF16LECharSet ((uint16)CharCodePage::UTF16LE,utf16le_charset );
CharSet UTF16BECharSet ((uint16)CharCodePage::UTF16BE,utf16be_charset );
int u16_to_u8(u8char *dst,int dst_size,const u16char *src,const int src_size)
{