You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nuttx.apache.org by "Donny9 (via GitHub)" <gi...@apache.org> on 2023/03/13 12:36:52 UTC
[GitHub] [nuttx] Donny9 opened a new pull request, #8802: libc/locale: support iconv_open,iconv,iconv_close
Donny9 opened a new pull request, #8802:
URL: https://github.com/apache/nuttx/pull/8802
## Summary
libc/locale: support iconv_open,iconv,iconv_close
Refs to:https://github.com/esmil/musl/tree/master/src/locale
contol encoding by option config: LIBC_LOCALE_CHINESE/LIBC_LOCALE_CHINESE/LIBC_LOCALE_CHINESE/LIBC_LOCALE_CHINESE
## Impact
support iconv function
## Testing
local compile test.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@nuttx.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
[GitHub] [nuttx] Donny9 commented on a diff in pull request #8802: libc/locale: support iconv_open,iconv,iconv_close
Posted by "Donny9 (via GitHub)" <gi...@apache.org>.
Donny9 commented on code in PR #8802:
URL: https://github.com/apache/nuttx/pull/8802#discussion_r1136563771
##########
libs/libc/locale/lib_iconv.c:
##########
@@ -0,0 +1,1423 @@
+/****************************************************************************
+ * libs/libc/locale/lib_iconv.c
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership. The
+ * ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ ****************************************************************************/
+
+/****************************************************************************
+ * Included Files
+ ****************************************************************************/
+
+#include <iconv.h>
+#include <errno.h>
+#include <wchar.h>
+#include <string.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <stdint.h>
+#include <locale.h>
+
+/****************************************************************************
+ * Pre-processor Definitions
+ ****************************************************************************/
+
+#define UTF_32BE 0300
+#define UTF_16LE 0301
+#define UTF_16BE 0302
+#define UTF_32LE 0303
+#define UCS2BE 0304
+#define UCS2LE 0305
+#define WCHAR_T 0306
+#define US_ASCII 0307
+#define UTF_8 0310
+#define UTF_16 0312
+#define UTF_32 0313
+#define UCS2 0314
+#define EUC_JP 0320
+#define SHIFT_JIS 0321
+#define ISO2022_JP 0322
+#define GB18030 0330
+#define GBK 0331
+#define GB2312 0332
+#define BIG5 0340
+#define EUC_KR 0350
+
+/****************************************************************************
+ * Private Types
+ ****************************************************************************/
+
+struct stateful_cd
+{
+ iconv_t base_cd;
+ unsigned state;
+};
+
+/****************************************************************************
+ * Private Data
+ ****************************************************************************/
+
+/* Definitions of g_charmaps. Each charmap consists of:
+ * 1. Empty-string-terminated list of null-terminated aliases.
+ * 2. Special type code or number of elided quads of entries.
+ * 3. Character table (size determined by field 2), consisting
+ * of 5 bytes for every 4 characters, interpreted as 10-bit
+ * indices into the g_legacy_chars table.
+ */
+
+static const unsigned char g_charmaps[] =
+{
+ "utf8\0char\0\0\310"
+ "wchart\0\0\306"
+ "ucs2be\0\0\304"
+ "ucs2le\0\0\305"
+ "utf16be\0\0\302"
+ "utf16le\0\0\301"
+ "ucs4be\0utf32be\0\0\300"
+ "ucs4le\0utf32le\0\0\303"
+ "ascii\0usascii\0iso646\0iso646us\0\0\307"
+ "utf16\0\0\312"
+ "ucs4\0utf32\0\0\313"
+ "ucs2\0\0\314"
+#ifdef CONFIG_LIBC_LOCALE_JAPANESE
+ "eucjp\0\0\320"
+ "shiftjis\0sjis\0\0\321"
+ "iso2022jp\0\0\322"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_CHINESE
+ "gb18030\0\0\330"
+ "gbk\0\0\331"
+ "gb2312\0\0\332"
+ "big5\0bigfive\0cp950\0big5hkscs\0\0\340"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_KOREAN
+ "euckr\0ksc5601\0ksx1001\0cp949\0\0\350"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_CODEPAGES
+ #include "codepages.h"
+#endif
+};
+
+/* Table of characters that appear in legacy 8-bit codepages,
+ * limited to 1024 slots (10 bit indices). The first 256 entries
+ * are elided since those characters are obviously all included.
+ */
+
+static const unsigned short g_legacy_chars[] =
+{
+ #include "legacychars.h"
+};
+
+#ifdef CONFIG_LIBC_LOCALE_JAPANESE
+static const unsigned short g_jis0208[84][94] =
+{
+ #include "jis0208.h"
+};
+
+static const unsigned short g_rev_jis[] =
+{
+ #include "revjis.h"
+};
+#endif
+
+#ifdef CONFIG_LIBC_LOCALE_CHINESE
+static const unsigned short g_gb18030[126][190] =
+{
+ #include "gb18030.h"
+};
+
+static const unsigned short g_big5[89][157] =
+{
+ #include "big5.h"
+};
+
+static const unsigned short g_hkscs[] =
+{
+ #include "hkscs.h"
+};
+#endif
+
+#ifdef CONFIG_LIBC_LOCALE_KOREAN
+static const unsigned short g_ksc[93][94] =
+{
+ #include "ksc.h"
+};
+#endif
+
+/****************************************************************************
+ * Private Functions
+ ****************************************************************************/
+
+static int fuzzycmp(FAR const unsigned char *a, FAR const unsigned char *b)
+{
+ for (; *a && *b; a++, b++)
+ {
+ while (*a && (*a | 32U) - 'a' > 26 && *a - '0' > 10U)
+ a++;
+
+ if ((*a | 32U) != *b)
+ {
+ return 1;
+ }
+ }
+
+ return *a != *b;
+}
+
+static size_t find_charmap(FAR const void *name)
+{
+ FAR const unsigned char *s;
+
+ if (*(FAR char *)name == 0)
+ {
+ /* "utf8" */
+
+ name = g_charmaps;
+ }
+
+ for (s = g_charmaps; *s; )
Review Comment:
Done!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@nuttx.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
[GitHub] [nuttx] Donny9 commented on a diff in pull request #8802: libc/locale: support iconv_open,iconv,iconv_close
Posted by "Donny9 (via GitHub)" <gi...@apache.org>.
Donny9 commented on code in PR #8802:
URL: https://github.com/apache/nuttx/pull/8802#discussion_r1136563280
##########
libs/libc/locale/lib_iconv.c:
##########
@@ -0,0 +1,1423 @@
+/****************************************************************************
+ * libs/libc/locale/lib_iconv.c
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership. The
+ * ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ ****************************************************************************/
+
+/****************************************************************************
+ * Included Files
+ ****************************************************************************/
+
+#include <iconv.h>
+#include <errno.h>
+#include <wchar.h>
+#include <string.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <stdint.h>
+#include <locale.h>
+
+/****************************************************************************
+ * Pre-processor Definitions
+ ****************************************************************************/
+
+#define UTF_32BE 0300
+#define UTF_16LE 0301
+#define UTF_16BE 0302
+#define UTF_32LE 0303
+#define UCS2BE 0304
+#define UCS2LE 0305
+#define WCHAR_T 0306
+#define US_ASCII 0307
+#define UTF_8 0310
+#define UTF_16 0312
+#define UTF_32 0313
+#define UCS2 0314
+#define EUC_JP 0320
+#define SHIFT_JIS 0321
+#define ISO2022_JP 0322
+#define GB18030 0330
+#define GBK 0331
+#define GB2312 0332
+#define BIG5 0340
+#define EUC_KR 0350
+
+/****************************************************************************
+ * Private Types
+ ****************************************************************************/
+
+struct stateful_cd
+{
+ iconv_t base_cd;
+ unsigned state;
+};
+
+/****************************************************************************
+ * Private Data
+ ****************************************************************************/
+
+/* Definitions of g_charmaps. Each charmap consists of:
+ * 1. Empty-string-terminated list of null-terminated aliases.
+ * 2. Special type code or number of elided quads of entries.
+ * 3. Character table (size determined by field 2), consisting
+ * of 5 bytes for every 4 characters, interpreted as 10-bit
+ * indices into the g_legacy_chars table.
+ */
+
+static const unsigned char g_charmaps[] =
+{
+ "utf8\0char\0\0\310"
+ "wchart\0\0\306"
+ "ucs2be\0\0\304"
+ "ucs2le\0\0\305"
+ "utf16be\0\0\302"
+ "utf16le\0\0\301"
+ "ucs4be\0utf32be\0\0\300"
+ "ucs4le\0utf32le\0\0\303"
+ "ascii\0usascii\0iso646\0iso646us\0\0\307"
+ "utf16\0\0\312"
+ "ucs4\0utf32\0\0\313"
+ "ucs2\0\0\314"
+#ifdef CONFIG_LIBC_LOCALE_JAPANESE
+ "eucjp\0\0\320"
+ "shiftjis\0sjis\0\0\321"
+ "iso2022jp\0\0\322"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_CHINESE
+ "gb18030\0\0\330"
+ "gbk\0\0\331"
+ "gb2312\0\0\332"
+ "big5\0bigfive\0cp950\0big5hkscs\0\0\340"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_KOREAN
+ "euckr\0ksc5601\0ksx1001\0cp949\0\0\350"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_CODEPAGES
+ #include "codepages.h"
+#endif
+};
+
+/* Table of characters that appear in legacy 8-bit codepages,
+ * limited to 1024 slots (10 bit indices). The first 256 entries
+ * are elided since those characters are obviously all included.
+ */
+
+static const unsigned short g_legacy_chars[] =
+{
+ #include "legacychars.h"
Review Comment:
Done!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@nuttx.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
[GitHub] [nuttx] Donny9 commented on pull request #8802: libc/locale: support iconv_open,iconv,iconv_close
Posted by "Donny9 (via GitHub)" <gi...@apache.org>.
Donny9 commented on PR #8802:
URL: https://github.com/apache/nuttx/pull/8802#issuecomment-1471198556
> my question was why you are referring this particular (seemingly old) fork of musl
> instead of the official one: https://git.musl-libc.org/cgit/musl
@yamt Sorry, i wrote wrong link about musl, it's https://github.com/bminor/musl, It is close to the official version.
djz:musl$ git remote -v
origin git@github.com:bminor/musl.git (fetch)
origin git@github.com:bminor/musl.git (push)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@nuttx.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
[GitHub] [nuttx] Donny9 commented on pull request #8802: libc/locale: support iconv_open,iconv,iconv_close
Posted by "Donny9 (via GitHub)" <gi...@apache.org>.
Donny9 commented on PR #8802:
URL: https://github.com/apache/nuttx/pull/8802#issuecomment-1469298799
>
> > Refs to:https://github.com/esmil/musl/tree/master/src/locale
>
> do you mean you based this on this version of musl? why?
@yamt Because musl library has complete feature and achieve about locale, what are you have some suggests about this porting? other library?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@nuttx.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
[GitHub] [nuttx] btashton merged pull request #8802: libc/locale: support iconv_open,iconv,iconv_close
Posted by "btashton (via GitHub)" <gi...@apache.org>.
btashton merged PR #8802:
URL: https://github.com/apache/nuttx/pull/8802
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@nuttx.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
[GitHub] [nuttx] yamt commented on pull request #8802: libc/locale: support iconv_open,iconv,iconv_close
Posted by "yamt (via GitHub)" <gi...@apache.org>.
yamt commented on PR #8802:
URL: https://github.com/apache/nuttx/pull/8802#issuecomment-1470268763
> > > Refs to:https://github.com/esmil/musl/tree/master/src/locale
> >
> >
> > do you mean you based this on this version of musl? why?
> > @yamt Because musl library has complete feature and achieve about locale, what are you have some suggests about this porting? other library?
my question was why you are referring this particular (seemingly old) fork of musl
instead of the official one: https://git.musl-libc.org/cgit/musl
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@nuttx.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
[GitHub] [nuttx] Donny9 commented on a diff in pull request #8802: libc/locale: support iconv_open,iconv,iconv_close
Posted by "Donny9 (via GitHub)" <gi...@apache.org>.
Donny9 commented on code in PR #8802:
URL: https://github.com/apache/nuttx/pull/8802#discussion_r1136563094
##########
libs/libc/locale/lib_iconv.c:
##########
@@ -0,0 +1,1423 @@
+/****************************************************************************
+ * libs/libc/locale/lib_iconv.c
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership. The
+ * ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ ****************************************************************************/
+
+/****************************************************************************
+ * Included Files
+ ****************************************************************************/
+
+#include <iconv.h>
+#include <errno.h>
+#include <wchar.h>
+#include <string.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <stdint.h>
+#include <locale.h>
+
+/****************************************************************************
+ * Pre-processor Definitions
+ ****************************************************************************/
+
+#define UTF_32BE 0300
+#define UTF_16LE 0301
+#define UTF_16BE 0302
+#define UTF_32LE 0303
+#define UCS2BE 0304
+#define UCS2LE 0305
+#define WCHAR_T 0306
+#define US_ASCII 0307
+#define UTF_8 0310
+#define UTF_16 0312
+#define UTF_32 0313
+#define UCS2 0314
+#define EUC_JP 0320
+#define SHIFT_JIS 0321
+#define ISO2022_JP 0322
+#define GB18030 0330
+#define GBK 0331
+#define GB2312 0332
+#define BIG5 0340
+#define EUC_KR 0350
+
+/****************************************************************************
+ * Private Types
+ ****************************************************************************/
+
+struct stateful_cd
+{
+ iconv_t base_cd;
+ unsigned state;
+};
+
+/****************************************************************************
+ * Private Data
+ ****************************************************************************/
+
+/* Definitions of g_charmaps. Each charmap consists of:
+ * 1. Empty-string-terminated list of null-terminated aliases.
+ * 2. Special type code or number of elided quads of entries.
+ * 3. Character table (size determined by field 2), consisting
+ * of 5 bytes for every 4 characters, interpreted as 10-bit
+ * indices into the g_legacy_chars table.
+ */
+
+static const unsigned char g_charmaps[] =
+{
+ "utf8\0char\0\0\310"
+ "wchart\0\0\306"
+ "ucs2be\0\0\304"
+ "ucs2le\0\0\305"
+ "utf16be\0\0\302"
+ "utf16le\0\0\301"
+ "ucs4be\0utf32be\0\0\300"
+ "ucs4le\0utf32le\0\0\303"
+ "ascii\0usascii\0iso646\0iso646us\0\0\307"
+ "utf16\0\0\312"
+ "ucs4\0utf32\0\0\313"
+ "ucs2\0\0\314"
+#ifdef CONFIG_LIBC_LOCALE_JAPANESE
+ "eucjp\0\0\320"
+ "shiftjis\0sjis\0\0\321"
+ "iso2022jp\0\0\322"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_CHINESE
+ "gb18030\0\0\330"
+ "gbk\0\0\331"
+ "gb2312\0\0\332"
+ "big5\0bigfive\0cp950\0big5hkscs\0\0\340"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_KOREAN
+ "euckr\0ksc5601\0ksx1001\0cp949\0\0\350"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_CODEPAGES
+ #include "codepages.h"
Review Comment:
Done!
##########
include/iconv.h:
##########
@@ -0,0 +1,60 @@
+/********************************************************************************
+ * include/iconv.h
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership. The
+ * ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ ********************************************************************************/
Review Comment:
Done!
##########
include/iconv.h:
##########
@@ -0,0 +1,60 @@
+/********************************************************************************
Review Comment:
Done!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@nuttx.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
[GitHub] [nuttx] xiaoxiang781216 commented on pull request #8802: libc/locale: support iconv_open,iconv,iconv_close
Posted by "xiaoxiang781216 (via GitHub)" <gi...@apache.org>.
xiaoxiang781216 commented on PR #8802:
URL: https://github.com/apache/nuttx/pull/8802#issuecomment-1522174667
> Please get CI pass
@pkarashchenko the warning come from big5.h which is an encoding table, so it is better to keep as it.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@nuttx.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
[GitHub] [nuttx] Donny9 commented on pull request #8802: libc/locale: support iconv_open,iconv,iconv_close
Posted by "Donny9 (via GitHub)" <gi...@apache.org>.
Donny9 commented on PR #8802:
URL: https://github.com/apache/nuttx/pull/8802#issuecomment-1466073146
let's ignore coding style issue about these header file.
```
Error: /home/runner/work/nuttx/nuttx/nuttx/libs/libc/locale/big5.h:1:1: error: Missing file header comment block
Error: /home/runner/work/nuttx/nuttx/nuttx/libs/libc/locale/big5.h:1:5: error: Missing whitespace after comma
Error: /home/runner/work/nuttx/nuttx/nuttx/libs/libc/locale/big5.h:1:11: error: Missing whitespace after comma
Error: /home/runner/work/nuttx/nuttx/nuttx/libs/libc/locale/big5.h:1:17: error: Missing whitespace after comma
Error: /home/runner/work/nuttx/nuttx/nuttx/libs/libc/locale/big5.h:1:23: error: Missing whitespace after comma
Error: /home/runner/work/nuttx/nuttx/nuttx/libs/libc/locale/big5.h:1:29: error: Missing whitespace after comma
Error: /home/runner/work/nuttx/nuttx/nuttx/libs/libc/locale/big5.h:1:34: error: Missing whitespace after comma
Error: /home/runner/work/nuttx/nuttx/nuttx/libs/libc/locale/big5.h:1:40: error: Missing whitespace after comma
Error: /home/runner/work/nuttx/nuttx/nuttx/libs/libc/locale/big5.h:1:46: error: Missing whitespace after comma
Error: /home/runner/work/nuttx/nuttx/nuttx/libs/libc/locale/big5.h:1:52: error: Missing whitespace after comma
Error: /home/runner/work/nuttx/nuttx/nuttx/libs/libc/locale/big5.h:1:58: error: Missing whitespace after comma
Error: /home/runner/work/nuttx/nuttx/nuttx/libs/libc/locale/big5.h:1:64: error: Missing whitespace after comma
Error: /home/runner/work/nuttx/nuttx/nuttx/libs/libc/locale/big5.h:1:69: error: Missing whitespace after comma
Error: /home/runner/work/nuttx/nuttx/nuttx/libs/libc/locale/big5.h:2:0: error: Relative file path does not match actual file
Error: /home/runner/work/nuttx/nuttx/nuttx/libs/libc/locale/big5.h:2:5: error: Missing whitespace after comma
Error: /home/runner/work/nuttx/nuttx/nuttx/libs/libc/locale/big5.h:2:11: error: Missing whitespace after comma
Error: /home/runner/work/nuttx/nuttx/nuttx/libs/libc/locale/big5.h:2:[17](https://github.com/apache/nuttx/actions/runs/4404962420/jobs/7715248014#step:3:18): error: Missing whitespace after comma
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@nuttx.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
[GitHub] [nuttx] pkarashchenko commented on a diff in pull request #8802: libc/locale: support iconv_open,iconv,iconv_close
Posted by "pkarashchenko (via GitHub)" <gi...@apache.org>.
pkarashchenko commented on code in PR #8802:
URL: https://github.com/apache/nuttx/pull/8802#discussion_r1136312153
##########
include/iconv.h:
##########
@@ -0,0 +1,60 @@
+/********************************************************************************
Review Comment:
```suggestion
/****************************************************************************
```
##########
include/iconv.h:
##########
@@ -0,0 +1,60 @@
+/********************************************************************************
+ * include/iconv.h
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership. The
+ * ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ ********************************************************************************/
Review Comment:
```suggestion
****************************************************************************/
```
and same in all similar places
##########
libs/libc/locale/lib_iconv.c:
##########
@@ -0,0 +1,1423 @@
+/****************************************************************************
+ * libs/libc/locale/lib_iconv.c
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership. The
+ * ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ ****************************************************************************/
+
+/****************************************************************************
+ * Included Files
+ ****************************************************************************/
+
+#include <iconv.h>
+#include <errno.h>
+#include <wchar.h>
+#include <string.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <stdint.h>
+#include <locale.h>
+
+/****************************************************************************
+ * Pre-processor Definitions
+ ****************************************************************************/
+
+#define UTF_32BE 0300
+#define UTF_16LE 0301
+#define UTF_16BE 0302
+#define UTF_32LE 0303
+#define UCS2BE 0304
+#define UCS2LE 0305
+#define WCHAR_T 0306
+#define US_ASCII 0307
+#define UTF_8 0310
+#define UTF_16 0312
+#define UTF_32 0313
+#define UCS2 0314
+#define EUC_JP 0320
+#define SHIFT_JIS 0321
+#define ISO2022_JP 0322
+#define GB18030 0330
+#define GBK 0331
+#define GB2312 0332
+#define BIG5 0340
+#define EUC_KR 0350
+
+/****************************************************************************
+ * Private Types
+ ****************************************************************************/
+
+struct stateful_cd
+{
+ iconv_t base_cd;
+ unsigned state;
+};
+
+/****************************************************************************
+ * Private Data
+ ****************************************************************************/
+
+/* Definitions of g_charmaps. Each charmap consists of:
+ * 1. Empty-string-terminated list of null-terminated aliases.
+ * 2. Special type code or number of elided quads of entries.
+ * 3. Character table (size determined by field 2), consisting
+ * of 5 bytes for every 4 characters, interpreted as 10-bit
+ * indices into the g_legacy_chars table.
+ */
+
+static const unsigned char g_charmaps[] =
+{
+ "utf8\0char\0\0\310"
+ "wchart\0\0\306"
+ "ucs2be\0\0\304"
+ "ucs2le\0\0\305"
+ "utf16be\0\0\302"
+ "utf16le\0\0\301"
+ "ucs4be\0utf32be\0\0\300"
+ "ucs4le\0utf32le\0\0\303"
+ "ascii\0usascii\0iso646\0iso646us\0\0\307"
+ "utf16\0\0\312"
+ "ucs4\0utf32\0\0\313"
+ "ucs2\0\0\314"
+#ifdef CONFIG_LIBC_LOCALE_JAPANESE
+ "eucjp\0\0\320"
+ "shiftjis\0sjis\0\0\321"
+ "iso2022jp\0\0\322"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_CHINESE
+ "gb18030\0\0\330"
+ "gbk\0\0\331"
+ "gb2312\0\0\332"
+ "big5\0bigfive\0cp950\0big5hkscs\0\0\340"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_KOREAN
+ "euckr\0ksc5601\0ksx1001\0cp949\0\0\350"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_CODEPAGES
+ #include "codepages.h"
Review Comment:
```suggestion
# include "codepages.h"
```
##########
libs/libc/locale/lib_iconv.c:
##########
@@ -0,0 +1,1423 @@
+/****************************************************************************
+ * libs/libc/locale/lib_iconv.c
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership. The
+ * ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ ****************************************************************************/
+
+/****************************************************************************
+ * Included Files
+ ****************************************************************************/
+
+#include <iconv.h>
+#include <errno.h>
+#include <wchar.h>
+#include <string.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <stdint.h>
+#include <locale.h>
+
+/****************************************************************************
+ * Pre-processor Definitions
+ ****************************************************************************/
+
+#define UTF_32BE 0300
+#define UTF_16LE 0301
+#define UTF_16BE 0302
+#define UTF_32LE 0303
+#define UCS2BE 0304
+#define UCS2LE 0305
+#define WCHAR_T 0306
+#define US_ASCII 0307
+#define UTF_8 0310
+#define UTF_16 0312
+#define UTF_32 0313
+#define UCS2 0314
+#define EUC_JP 0320
+#define SHIFT_JIS 0321
+#define ISO2022_JP 0322
+#define GB18030 0330
+#define GBK 0331
+#define GB2312 0332
+#define BIG5 0340
+#define EUC_KR 0350
+
+/****************************************************************************
+ * Private Types
+ ****************************************************************************/
+
+struct stateful_cd
+{
+ iconv_t base_cd;
+ unsigned state;
+};
+
+/****************************************************************************
+ * Private Data
+ ****************************************************************************/
+
+/* Definitions of g_charmaps. Each charmap consists of:
+ * 1. Empty-string-terminated list of null-terminated aliases.
+ * 2. Special type code or number of elided quads of entries.
+ * 3. Character table (size determined by field 2), consisting
+ * of 5 bytes for every 4 characters, interpreted as 10-bit
+ * indices into the g_legacy_chars table.
+ */
+
+static const unsigned char g_charmaps[] =
+{
+ "utf8\0char\0\0\310"
+ "wchart\0\0\306"
+ "ucs2be\0\0\304"
+ "ucs2le\0\0\305"
+ "utf16be\0\0\302"
+ "utf16le\0\0\301"
+ "ucs4be\0utf32be\0\0\300"
+ "ucs4le\0utf32le\0\0\303"
+ "ascii\0usascii\0iso646\0iso646us\0\0\307"
+ "utf16\0\0\312"
+ "ucs4\0utf32\0\0\313"
+ "ucs2\0\0\314"
+#ifdef CONFIG_LIBC_LOCALE_JAPANESE
+ "eucjp\0\0\320"
+ "shiftjis\0sjis\0\0\321"
+ "iso2022jp\0\0\322"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_CHINESE
+ "gb18030\0\0\330"
+ "gbk\0\0\331"
+ "gb2312\0\0\332"
+ "big5\0bigfive\0cp950\0big5hkscs\0\0\340"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_KOREAN
+ "euckr\0ksc5601\0ksx1001\0cp949\0\0\350"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_CODEPAGES
+ #include "codepages.h"
+#endif
+};
+
+/* Table of characters that appear in legacy 8-bit codepages,
+ * limited to 1024 slots (10 bit indices). The first 256 entries
+ * are elided since those characters are obviously all included.
+ */
+
+static const unsigned short g_legacy_chars[] =
+{
+ #include "legacychars.h"
+};
+
+#ifdef CONFIG_LIBC_LOCALE_JAPANESE
+static const unsigned short g_jis0208[84][94] =
+{
+ #include "jis0208.h"
+};
+
+static const unsigned short g_rev_jis[] =
+{
+ #include "revjis.h"
+};
+#endif
+
+#ifdef CONFIG_LIBC_LOCALE_CHINESE
+static const unsigned short g_gb18030[126][190] =
+{
+ #include "gb18030.h"
+};
+
+static const unsigned short g_big5[89][157] =
+{
+ #include "big5.h"
+};
+
+static const unsigned short g_hkscs[] =
+{
+ #include "hkscs.h"
+};
+#endif
+
+#ifdef CONFIG_LIBC_LOCALE_KOREAN
+static const unsigned short g_ksc[93][94] =
+{
+ #include "ksc.h"
+};
+#endif
+
+/****************************************************************************
+ * Private Functions
+ ****************************************************************************/
+
+static int fuzzycmp(FAR const unsigned char *a, FAR const unsigned char *b)
+{
+ for (; *a && *b; a++, b++)
+ {
+ while (*a && (*a | 32U) - 'a' > 26 && *a - '0' > 10U)
+ a++;
+
+ if ((*a | 32U) != *b)
+ {
+ return 1;
+ }
+ }
+
+ return *a != *b;
+}
+
+static size_t find_charmap(FAR const void *name)
+{
+ FAR const unsigned char *s;
+
+ if (*(FAR char *)name == 0)
Review Comment:
```suggestion
if (*(FAR char *)name == '\0')
```
##########
libs/libc/locale/lib_iconv.c:
##########
@@ -0,0 +1,1423 @@
+/****************************************************************************
+ * libs/libc/locale/lib_iconv.c
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership. The
+ * ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ ****************************************************************************/
+
+/****************************************************************************
+ * Included Files
+ ****************************************************************************/
+
+#include <iconv.h>
+#include <errno.h>
+#include <wchar.h>
+#include <string.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <stdint.h>
+#include <locale.h>
+
+/****************************************************************************
+ * Pre-processor Definitions
+ ****************************************************************************/
+
+#define UTF_32BE 0300
+#define UTF_16LE 0301
+#define UTF_16BE 0302
+#define UTF_32LE 0303
+#define UCS2BE 0304
+#define UCS2LE 0305
+#define WCHAR_T 0306
+#define US_ASCII 0307
+#define UTF_8 0310
+#define UTF_16 0312
+#define UTF_32 0313
+#define UCS2 0314
+#define EUC_JP 0320
+#define SHIFT_JIS 0321
+#define ISO2022_JP 0322
+#define GB18030 0330
+#define GBK 0331
+#define GB2312 0332
+#define BIG5 0340
+#define EUC_KR 0350
+
+/****************************************************************************
+ * Private Types
+ ****************************************************************************/
+
+struct stateful_cd
+{
+ iconv_t base_cd;
+ unsigned state;
+};
+
+/****************************************************************************
+ * Private Data
+ ****************************************************************************/
+
+/* Definitions of g_charmaps. Each charmap consists of:
+ * 1. Empty-string-terminated list of null-terminated aliases.
+ * 2. Special type code or number of elided quads of entries.
+ * 3. Character table (size determined by field 2), consisting
+ * of 5 bytes for every 4 characters, interpreted as 10-bit
+ * indices into the g_legacy_chars table.
+ */
+
+static const unsigned char g_charmaps[] =
+{
+ "utf8\0char\0\0\310"
+ "wchart\0\0\306"
+ "ucs2be\0\0\304"
+ "ucs2le\0\0\305"
+ "utf16be\0\0\302"
+ "utf16le\0\0\301"
+ "ucs4be\0utf32be\0\0\300"
+ "ucs4le\0utf32le\0\0\303"
+ "ascii\0usascii\0iso646\0iso646us\0\0\307"
+ "utf16\0\0\312"
+ "ucs4\0utf32\0\0\313"
+ "ucs2\0\0\314"
+#ifdef CONFIG_LIBC_LOCALE_JAPANESE
+ "eucjp\0\0\320"
+ "shiftjis\0sjis\0\0\321"
+ "iso2022jp\0\0\322"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_CHINESE
+ "gb18030\0\0\330"
+ "gbk\0\0\331"
+ "gb2312\0\0\332"
+ "big5\0bigfive\0cp950\0big5hkscs\0\0\340"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_KOREAN
+ "euckr\0ksc5601\0ksx1001\0cp949\0\0\350"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_CODEPAGES
+ #include "codepages.h"
+#endif
+};
+
+/* Table of characters that appear in legacy 8-bit codepages,
+ * limited to 1024 slots (10 bit indices). The first 256 entries
+ * are elided since those characters are obviously all included.
+ */
+
+static const unsigned short g_legacy_chars[] =
+{
+ #include "legacychars.h"
+};
+
+#ifdef CONFIG_LIBC_LOCALE_JAPANESE
+static const unsigned short g_jis0208[84][94] =
+{
+ #include "jis0208.h"
+};
+
+static const unsigned short g_rev_jis[] =
+{
+ #include "revjis.h"
+};
+#endif
+
+#ifdef CONFIG_LIBC_LOCALE_CHINESE
+static const unsigned short g_gb18030[126][190] =
+{
+ #include "gb18030.h"
+};
+
+static const unsigned short g_big5[89][157] =
+{
+ #include "big5.h"
+};
+
+static const unsigned short g_hkscs[] =
+{
+ #include "hkscs.h"
+};
+#endif
+
+#ifdef CONFIG_LIBC_LOCALE_KOREAN
+static const unsigned short g_ksc[93][94] =
+{
+ #include "ksc.h"
+};
+#endif
+
+/****************************************************************************
+ * Private Functions
+ ****************************************************************************/
+
+static int fuzzycmp(FAR const unsigned char *a, FAR const unsigned char *b)
+{
+ for (; *a && *b; a++, b++)
+ {
+ while (*a && (*a | 32U) - 'a' > 26 && *a - '0' > 10U)
+ a++;
+
+ if ((*a | 32U) != *b)
+ {
+ return 1;
+ }
+ }
+
+ return *a != *b;
+}
+
+static size_t find_charmap(FAR const void *name)
+{
+ FAR const unsigned char *s;
+
+ if (*(FAR char *)name == 0)
+ {
+ /* "utf8" */
+
+ name = g_charmaps;
+ }
+
+ for (s = g_charmaps; *s; )
+ {
+ if (!fuzzycmp(name, s))
+ {
+ for (; *s; s += strlen((FAR void *)s) + 1);
+ return s + 1 - g_charmaps;
+ }
+
+ s += strlen((FAR void *)s)+1;
+ if (*s == 0)
Review Comment:
```suggestion
if (*s == '\0')
```
##########
libs/libc/locale/lib_iconv.c:
##########
@@ -0,0 +1,1423 @@
+/****************************************************************************
+ * libs/libc/locale/lib_iconv.c
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership. The
+ * ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ ****************************************************************************/
+
+/****************************************************************************
+ * Included Files
+ ****************************************************************************/
+
+#include <iconv.h>
+#include <errno.h>
+#include <wchar.h>
+#include <string.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <stdint.h>
+#include <locale.h>
+
+/****************************************************************************
+ * Pre-processor Definitions
+ ****************************************************************************/
+
+#define UTF_32BE 0300
+#define UTF_16LE 0301
+#define UTF_16BE 0302
+#define UTF_32LE 0303
+#define UCS2BE 0304
+#define UCS2LE 0305
+#define WCHAR_T 0306
+#define US_ASCII 0307
+#define UTF_8 0310
+#define UTF_16 0312
+#define UTF_32 0313
+#define UCS2 0314
+#define EUC_JP 0320
+#define SHIFT_JIS 0321
+#define ISO2022_JP 0322
+#define GB18030 0330
+#define GBK 0331
+#define GB2312 0332
+#define BIG5 0340
+#define EUC_KR 0350
+
+/****************************************************************************
+ * Private Types
+ ****************************************************************************/
+
+struct stateful_cd
+{
+ iconv_t base_cd;
+ unsigned state;
+};
+
+/****************************************************************************
+ * Private Data
+ ****************************************************************************/
+
+/* Definitions of g_charmaps. Each charmap consists of:
+ * 1. Empty-string-terminated list of null-terminated aliases.
+ * 2. Special type code or number of elided quads of entries.
+ * 3. Character table (size determined by field 2), consisting
+ * of 5 bytes for every 4 characters, interpreted as 10-bit
+ * indices into the g_legacy_chars table.
+ */
+
+static const unsigned char g_charmaps[] =
+{
+ "utf8\0char\0\0\310"
+ "wchart\0\0\306"
+ "ucs2be\0\0\304"
+ "ucs2le\0\0\305"
+ "utf16be\0\0\302"
+ "utf16le\0\0\301"
+ "ucs4be\0utf32be\0\0\300"
+ "ucs4le\0utf32le\0\0\303"
+ "ascii\0usascii\0iso646\0iso646us\0\0\307"
+ "utf16\0\0\312"
+ "ucs4\0utf32\0\0\313"
+ "ucs2\0\0\314"
+#ifdef CONFIG_LIBC_LOCALE_JAPANESE
+ "eucjp\0\0\320"
+ "shiftjis\0sjis\0\0\321"
+ "iso2022jp\0\0\322"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_CHINESE
+ "gb18030\0\0\330"
+ "gbk\0\0\331"
+ "gb2312\0\0\332"
+ "big5\0bigfive\0cp950\0big5hkscs\0\0\340"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_KOREAN
+ "euckr\0ksc5601\0ksx1001\0cp949\0\0\350"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_CODEPAGES
+ #include "codepages.h"
+#endif
+};
+
+/* Table of characters that appear in legacy 8-bit codepages,
+ * limited to 1024 slots (10 bit indices). The first 256 entries
+ * are elided since those characters are obviously all included.
+ */
+
+static const unsigned short g_legacy_chars[] =
+{
+ #include "legacychars.h"
+};
+
+#ifdef CONFIG_LIBC_LOCALE_JAPANESE
+static const unsigned short g_jis0208[84][94] =
+{
+ #include "jis0208.h"
+};
+
+static const unsigned short g_rev_jis[] =
+{
+ #include "revjis.h"
+};
+#endif
+
+#ifdef CONFIG_LIBC_LOCALE_CHINESE
+static const unsigned short g_gb18030[126][190] =
+{
+ #include "gb18030.h"
+};
+
+static const unsigned short g_big5[89][157] =
+{
+ #include "big5.h"
+};
+
+static const unsigned short g_hkscs[] =
+{
+ #include "hkscs.h"
+};
+#endif
+
+#ifdef CONFIG_LIBC_LOCALE_KOREAN
+static const unsigned short g_ksc[93][94] =
+{
+ #include "ksc.h"
+};
+#endif
+
+/****************************************************************************
+ * Private Functions
+ ****************************************************************************/
+
+static int fuzzycmp(FAR const unsigned char *a, FAR const unsigned char *b)
+{
+ for (; *a && *b; a++, b++)
+ {
+ while (*a && (*a | 32U) - 'a' > 26 && *a - '0' > 10U)
+ a++;
+
+ if ((*a | 32U) != *b)
+ {
+ return 1;
+ }
+ }
+
+ return *a != *b;
+}
+
+static size_t find_charmap(FAR const void *name)
+{
+ FAR const unsigned char *s;
+
+ if (*(FAR char *)name == 0)
+ {
+ /* "utf8" */
+
+ name = g_charmaps;
+ }
+
+ for (s = g_charmaps; *s; )
+ {
+ if (!fuzzycmp(name, s))
+ {
+ for (; *s; s += strlen((FAR void *)s) + 1);
+ return s + 1 - g_charmaps;
+ }
+
+ s += strlen((FAR void *)s)+1;
Review Comment:
```suggestion
s += strlen((FAR void *)s) + 1;
```
why cast to `void *`?
##########
libs/libc/locale/lib_iconv.c:
##########
@@ -0,0 +1,1423 @@
+/****************************************************************************
+ * libs/libc/locale/lib_iconv.c
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership. The
+ * ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ ****************************************************************************/
+
+/****************************************************************************
+ * Included Files
+ ****************************************************************************/
+
+#include <iconv.h>
+#include <errno.h>
+#include <wchar.h>
+#include <string.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <stdint.h>
+#include <locale.h>
+
+/****************************************************************************
+ * Pre-processor Definitions
+ ****************************************************************************/
+
+#define UTF_32BE 0300
+#define UTF_16LE 0301
+#define UTF_16BE 0302
+#define UTF_32LE 0303
+#define UCS2BE 0304
+#define UCS2LE 0305
+#define WCHAR_T 0306
+#define US_ASCII 0307
+#define UTF_8 0310
+#define UTF_16 0312
+#define UTF_32 0313
+#define UCS2 0314
+#define EUC_JP 0320
+#define SHIFT_JIS 0321
+#define ISO2022_JP 0322
+#define GB18030 0330
+#define GBK 0331
+#define GB2312 0332
+#define BIG5 0340
+#define EUC_KR 0350
+
+/****************************************************************************
+ * Private Types
+ ****************************************************************************/
+
+struct stateful_cd
+{
+ iconv_t base_cd;
+ unsigned state;
+};
+
+/****************************************************************************
+ * Private Data
+ ****************************************************************************/
+
+/* Definitions of g_charmaps. Each charmap consists of:
+ * 1. Empty-string-terminated list of null-terminated aliases.
+ * 2. Special type code or number of elided quads of entries.
+ * 3. Character table (size determined by field 2), consisting
+ * of 5 bytes for every 4 characters, interpreted as 10-bit
+ * indices into the g_legacy_chars table.
+ */
+
+static const unsigned char g_charmaps[] =
+{
+ "utf8\0char\0\0\310"
+ "wchart\0\0\306"
+ "ucs2be\0\0\304"
+ "ucs2le\0\0\305"
+ "utf16be\0\0\302"
+ "utf16le\0\0\301"
+ "ucs4be\0utf32be\0\0\300"
+ "ucs4le\0utf32le\0\0\303"
+ "ascii\0usascii\0iso646\0iso646us\0\0\307"
+ "utf16\0\0\312"
+ "ucs4\0utf32\0\0\313"
+ "ucs2\0\0\314"
+#ifdef CONFIG_LIBC_LOCALE_JAPANESE
+ "eucjp\0\0\320"
+ "shiftjis\0sjis\0\0\321"
+ "iso2022jp\0\0\322"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_CHINESE
+ "gb18030\0\0\330"
+ "gbk\0\0\331"
+ "gb2312\0\0\332"
+ "big5\0bigfive\0cp950\0big5hkscs\0\0\340"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_KOREAN
+ "euckr\0ksc5601\0ksx1001\0cp949\0\0\350"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_CODEPAGES
+ #include "codepages.h"
+#endif
+};
+
+/* Table of characters that appear in legacy 8-bit codepages,
+ * limited to 1024 slots (10 bit indices). The first 256 entries
+ * are elided since those characters are obviously all included.
+ */
+
+static const unsigned short g_legacy_chars[] =
+{
+ #include "legacychars.h"
+};
+
+#ifdef CONFIG_LIBC_LOCALE_JAPANESE
+static const unsigned short g_jis0208[84][94] =
+{
+ #include "jis0208.h"
+};
+
+static const unsigned short g_rev_jis[] =
+{
+ #include "revjis.h"
+};
+#endif
+
+#ifdef CONFIG_LIBC_LOCALE_CHINESE
+static const unsigned short g_gb18030[126][190] =
+{
+ #include "gb18030.h"
+};
+
+static const unsigned short g_big5[89][157] =
+{
+ #include "big5.h"
+};
+
+static const unsigned short g_hkscs[] =
+{
+ #include "hkscs.h"
+};
+#endif
+
+#ifdef CONFIG_LIBC_LOCALE_KOREAN
+static const unsigned short g_ksc[93][94] =
+{
+ #include "ksc.h"
+};
+#endif
+
+/****************************************************************************
+ * Private Functions
+ ****************************************************************************/
+
+static int fuzzycmp(FAR const unsigned char *a, FAR const unsigned char *b)
+{
+ for (; *a && *b; a++, b++)
+ {
+ while (*a && (*a | 32U) - 'a' > 26 && *a - '0' > 10U)
+ a++;
+
+ if ((*a | 32U) != *b)
+ {
+ return 1;
+ }
+ }
+
+ return *a != *b;
+}
+
+static size_t find_charmap(FAR const void *name)
+{
+ FAR const unsigned char *s;
+
+ if (*(FAR char *)name == 0)
+ {
+ /* "utf8" */
+
+ name = g_charmaps;
+ }
+
+ for (s = g_charmaps; *s; )
+ {
+ if (!fuzzycmp(name, s))
+ {
+ for (; *s; s += strlen((FAR void *)s) + 1);
+ return s + 1 - g_charmaps;
+ }
+
+ s += strlen((FAR void *)s)+1;
+ if (*s == 0)
+ {
+ if (s[1] > 0200)
+ {
+ s += 2;
+ }
+ else
+ {
+ s += 2 + (64U - s[1]) * 5;
+ }
+ }
+ }
+
+ return -1;
+}
+
+static iconv_t combine_to_from(size_t t, size_t f)
+{
+ return (FAR void *)(f << 16 | t << 1 | 1);
+}
+
+static size_t extract_from(iconv_t cd)
+{
+ return (size_t)cd >> 16;
+}
+
+static size_t extract_to(iconv_t cd)
+{
+ return (size_t)cd >> 1 & 0x7fff;
+}
+
+static unsigned get_16(FAR const unsigned char *s, int e)
+{
+ e &= 1;
+ return s[e] << 8 | s[1 - e];
+}
+
+static void put_16(FAR unsigned char *s, unsigned c, int e)
+{
+ e &= 1;
+ s[e] = c >> 8;
+ s[1 - e] = c;
+}
+
+static unsigned get_32(FAR const unsigned char *s, int e)
+{
+ e &= 3;
+ return (s[e] + 0U) << 24 | s[e ^ 1] << 16 | s[e ^ 2] << 8 | s[e ^ 3];
+}
+
+static void put_32(FAR unsigned char *s, unsigned c, int e)
+{
+ e &= 3;
+ s[e ^ 0] = c >> 24;
+ s[e ^ 1] = c >> 16;
+ s[e ^ 2] = c >> 8;
+ s[e ^ 3] = c;
+}
+
+static unsigned legacy_map(const unsigned char *map, unsigned c)
+{
+ unsigned x;
+
+ if (c < 4 * map[0 - 1])
+ {
+ return c;
+ }
+
+ x = c - 4 * map[0 - 1];
+ x = (map[x * 5 / 4] >> (2 * x % 8)) |
+ ((map[x * 5 / 4 + 1] << (8 - 2 * x % 8)) & 1023);
+ return x < 256 ? x : g_legacy_chars[x - 256];
+}
+
+#ifdef CONFIG_LIBC_LOCALE_JAPANESE
+static unsigned uni_to_jis(unsigned c)
+{
+ unsigned nel = sizeof(g_rev_jis) / sizeof(*g_rev_jis);
Review Comment:
we can use `nitems` here
##########
libs/libc/locale/lib_iconv.c:
##########
@@ -0,0 +1,1423 @@
+/****************************************************************************
+ * libs/libc/locale/lib_iconv.c
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership. The
+ * ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ ****************************************************************************/
+
+/****************************************************************************
+ * Included Files
+ ****************************************************************************/
+
+#include <iconv.h>
+#include <errno.h>
+#include <wchar.h>
+#include <string.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <stdint.h>
+#include <locale.h>
+
+/****************************************************************************
+ * Pre-processor Definitions
+ ****************************************************************************/
+
+#define UTF_32BE 0300
+#define UTF_16LE 0301
+#define UTF_16BE 0302
+#define UTF_32LE 0303
+#define UCS2BE 0304
+#define UCS2LE 0305
+#define WCHAR_T 0306
+#define US_ASCII 0307
+#define UTF_8 0310
+#define UTF_16 0312
+#define UTF_32 0313
+#define UCS2 0314
+#define EUC_JP 0320
+#define SHIFT_JIS 0321
+#define ISO2022_JP 0322
+#define GB18030 0330
+#define GBK 0331
+#define GB2312 0332
+#define BIG5 0340
+#define EUC_KR 0350
+
+/****************************************************************************
+ * Private Types
+ ****************************************************************************/
+
+struct stateful_cd
+{
+ iconv_t base_cd;
+ unsigned state;
+};
+
+/****************************************************************************
+ * Private Data
+ ****************************************************************************/
+
+/* Definitions of g_charmaps. Each charmap consists of:
+ * 1. Empty-string-terminated list of null-terminated aliases.
+ * 2. Special type code or number of elided quads of entries.
+ * 3. Character table (size determined by field 2), consisting
+ * of 5 bytes for every 4 characters, interpreted as 10-bit
+ * indices into the g_legacy_chars table.
+ */
+
+static const unsigned char g_charmaps[] =
+{
+ "utf8\0char\0\0\310"
+ "wchart\0\0\306"
+ "ucs2be\0\0\304"
+ "ucs2le\0\0\305"
+ "utf16be\0\0\302"
+ "utf16le\0\0\301"
+ "ucs4be\0utf32be\0\0\300"
+ "ucs4le\0utf32le\0\0\303"
+ "ascii\0usascii\0iso646\0iso646us\0\0\307"
+ "utf16\0\0\312"
+ "ucs4\0utf32\0\0\313"
+ "ucs2\0\0\314"
+#ifdef CONFIG_LIBC_LOCALE_JAPANESE
+ "eucjp\0\0\320"
+ "shiftjis\0sjis\0\0\321"
+ "iso2022jp\0\0\322"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_CHINESE
+ "gb18030\0\0\330"
+ "gbk\0\0\331"
+ "gb2312\0\0\332"
+ "big5\0bigfive\0cp950\0big5hkscs\0\0\340"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_KOREAN
+ "euckr\0ksc5601\0ksx1001\0cp949\0\0\350"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_CODEPAGES
+ #include "codepages.h"
+#endif
+};
+
+/* Table of characters that appear in legacy 8-bit codepages,
+ * limited to 1024 slots (10 bit indices). The first 256 entries
+ * are elided since those characters are obviously all included.
+ */
+
+static const unsigned short g_legacy_chars[] =
+{
+ #include "legacychars.h"
+};
+
+#ifdef CONFIG_LIBC_LOCALE_JAPANESE
+static const unsigned short g_jis0208[84][94] =
+{
+ #include "jis0208.h"
+};
+
+static const unsigned short g_rev_jis[] =
+{
+ #include "revjis.h"
+};
+#endif
+
+#ifdef CONFIG_LIBC_LOCALE_CHINESE
+static const unsigned short g_gb18030[126][190] =
+{
+ #include "gb18030.h"
+};
+
+static const unsigned short g_big5[89][157] =
+{
+ #include "big5.h"
+};
+
+static const unsigned short g_hkscs[] =
+{
+ #include "hkscs.h"
+};
+#endif
+
+#ifdef CONFIG_LIBC_LOCALE_KOREAN
+static const unsigned short g_ksc[93][94] =
+{
+ #include "ksc.h"
+};
+#endif
+
+/****************************************************************************
+ * Private Functions
+ ****************************************************************************/
+
+static int fuzzycmp(FAR const unsigned char *a, FAR const unsigned char *b)
+{
+ for (; *a && *b; a++, b++)
+ {
+ while (*a && (*a | 32U) - 'a' > 26 && *a - '0' > 10U)
+ a++;
+
+ if ((*a | 32U) != *b)
+ {
+ return 1;
+ }
+ }
+
+ return *a != *b;
+}
+
+static size_t find_charmap(FAR const void *name)
+{
+ FAR const unsigned char *s;
+
+ if (*(FAR char *)name == 0)
+ {
+ /* "utf8" */
+
+ name = g_charmaps;
+ }
+
+ for (s = g_charmaps; *s; )
Review Comment:
```suggestion
for (s = g_charmaps; *s != '\0'; )
```
##########
libs/libc/locale/lib_iconv.c:
##########
@@ -0,0 +1,1423 @@
+/****************************************************************************
+ * libs/libc/locale/lib_iconv.c
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership. The
+ * ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ ****************************************************************************/
+
+/****************************************************************************
+ * Included Files
+ ****************************************************************************/
+
+#include <iconv.h>
+#include <errno.h>
+#include <wchar.h>
+#include <string.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <stdint.h>
+#include <locale.h>
+
+/****************************************************************************
+ * Pre-processor Definitions
+ ****************************************************************************/
+
+#define UTF_32BE 0300
+#define UTF_16LE 0301
+#define UTF_16BE 0302
+#define UTF_32LE 0303
+#define UCS2BE 0304
+#define UCS2LE 0305
+#define WCHAR_T 0306
+#define US_ASCII 0307
+#define UTF_8 0310
+#define UTF_16 0312
+#define UTF_32 0313
+#define UCS2 0314
+#define EUC_JP 0320
+#define SHIFT_JIS 0321
+#define ISO2022_JP 0322
+#define GB18030 0330
+#define GBK 0331
+#define GB2312 0332
+#define BIG5 0340
+#define EUC_KR 0350
+
+/****************************************************************************
+ * Private Types
+ ****************************************************************************/
+
+struct stateful_cd
+{
+ iconv_t base_cd;
+ unsigned state;
+};
+
+/****************************************************************************
+ * Private Data
+ ****************************************************************************/
+
+/* Definitions of g_charmaps. Each charmap consists of:
+ * 1. Empty-string-terminated list of null-terminated aliases.
+ * 2. Special type code or number of elided quads of entries.
+ * 3. Character table (size determined by field 2), consisting
+ * of 5 bytes for every 4 characters, interpreted as 10-bit
+ * indices into the g_legacy_chars table.
+ */
+
+static const unsigned char g_charmaps[] =
+{
+ "utf8\0char\0\0\310"
+ "wchart\0\0\306"
+ "ucs2be\0\0\304"
+ "ucs2le\0\0\305"
+ "utf16be\0\0\302"
+ "utf16le\0\0\301"
+ "ucs4be\0utf32be\0\0\300"
+ "ucs4le\0utf32le\0\0\303"
+ "ascii\0usascii\0iso646\0iso646us\0\0\307"
+ "utf16\0\0\312"
+ "ucs4\0utf32\0\0\313"
+ "ucs2\0\0\314"
+#ifdef CONFIG_LIBC_LOCALE_JAPANESE
+ "eucjp\0\0\320"
+ "shiftjis\0sjis\0\0\321"
+ "iso2022jp\0\0\322"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_CHINESE
+ "gb18030\0\0\330"
+ "gbk\0\0\331"
+ "gb2312\0\0\332"
+ "big5\0bigfive\0cp950\0big5hkscs\0\0\340"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_KOREAN
+ "euckr\0ksc5601\0ksx1001\0cp949\0\0\350"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_CODEPAGES
+ #include "codepages.h"
+#endif
+};
+
+/* Table of characters that appear in legacy 8-bit codepages,
+ * limited to 1024 slots (10 bit indices). The first 256 entries
+ * are elided since those characters are obviously all included.
+ */
+
+static const unsigned short g_legacy_chars[] =
+{
+ #include "legacychars.h"
Review Comment:
```suggestion
#include "legacychars.h"
```
##########
libs/libc/locale/lib_iconv.c:
##########
@@ -0,0 +1,1423 @@
+/****************************************************************************
+ * libs/libc/locale/lib_iconv.c
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership. The
+ * ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ ****************************************************************************/
+
+/****************************************************************************
+ * Included Files
+ ****************************************************************************/
+
+#include <iconv.h>
+#include <errno.h>
+#include <wchar.h>
+#include <string.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <stdint.h>
+#include <locale.h>
+
+/****************************************************************************
+ * Pre-processor Definitions
+ ****************************************************************************/
+
+#define UTF_32BE 0300
+#define UTF_16LE 0301
+#define UTF_16BE 0302
+#define UTF_32LE 0303
+#define UCS2BE 0304
+#define UCS2LE 0305
+#define WCHAR_T 0306
+#define US_ASCII 0307
+#define UTF_8 0310
+#define UTF_16 0312
+#define UTF_32 0313
+#define UCS2 0314
+#define EUC_JP 0320
+#define SHIFT_JIS 0321
+#define ISO2022_JP 0322
+#define GB18030 0330
+#define GBK 0331
+#define GB2312 0332
+#define BIG5 0340
+#define EUC_KR 0350
+
+/****************************************************************************
+ * Private Types
+ ****************************************************************************/
+
+struct stateful_cd
+{
+ iconv_t base_cd;
+ unsigned state;
+};
+
+/****************************************************************************
+ * Private Data
+ ****************************************************************************/
+
+/* Definitions of g_charmaps. Each charmap consists of:
+ * 1. Empty-string-terminated list of null-terminated aliases.
+ * 2. Special type code or number of elided quads of entries.
+ * 3. Character table (size determined by field 2), consisting
+ * of 5 bytes for every 4 characters, interpreted as 10-bit
+ * indices into the g_legacy_chars table.
+ */
+
+static const unsigned char g_charmaps[] =
+{
+ "utf8\0char\0\0\310"
+ "wchart\0\0\306"
+ "ucs2be\0\0\304"
+ "ucs2le\0\0\305"
+ "utf16be\0\0\302"
+ "utf16le\0\0\301"
+ "ucs4be\0utf32be\0\0\300"
+ "ucs4le\0utf32le\0\0\303"
+ "ascii\0usascii\0iso646\0iso646us\0\0\307"
+ "utf16\0\0\312"
+ "ucs4\0utf32\0\0\313"
+ "ucs2\0\0\314"
+#ifdef CONFIG_LIBC_LOCALE_JAPANESE
+ "eucjp\0\0\320"
+ "shiftjis\0sjis\0\0\321"
+ "iso2022jp\0\0\322"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_CHINESE
+ "gb18030\0\0\330"
+ "gbk\0\0\331"
+ "gb2312\0\0\332"
+ "big5\0bigfive\0cp950\0big5hkscs\0\0\340"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_KOREAN
+ "euckr\0ksc5601\0ksx1001\0cp949\0\0\350"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_CODEPAGES
+ #include "codepages.h"
+#endif
+};
+
+/* Table of characters that appear in legacy 8-bit codepages,
+ * limited to 1024 slots (10 bit indices). The first 256 entries
+ * are elided since those characters are obviously all included.
+ */
+
+static const unsigned short g_legacy_chars[] =
+{
+ #include "legacychars.h"
+};
+
+#ifdef CONFIG_LIBC_LOCALE_JAPANESE
+static const unsigned short g_jis0208[84][94] =
+{
+ #include "jis0208.h"
+};
+
+static const unsigned short g_rev_jis[] =
+{
+ #include "revjis.h"
+};
+#endif
+
+#ifdef CONFIG_LIBC_LOCALE_CHINESE
+static const unsigned short g_gb18030[126][190] =
+{
+ #include "gb18030.h"
+};
+
+static const unsigned short g_big5[89][157] =
+{
+ #include "big5.h"
+};
+
+static const unsigned short g_hkscs[] =
+{
+ #include "hkscs.h"
+};
+#endif
+
+#ifdef CONFIG_LIBC_LOCALE_KOREAN
+static const unsigned short g_ksc[93][94] =
+{
+ #include "ksc.h"
+};
+#endif
+
+/****************************************************************************
+ * Private Functions
+ ****************************************************************************/
+
+static int fuzzycmp(FAR const unsigned char *a, FAR const unsigned char *b)
+{
+ for (; *a && *b; a++, b++)
+ {
+ while (*a && (*a | 32U) - 'a' > 26 && *a - '0' > 10U)
+ a++;
+
+ if ((*a | 32U) != *b)
+ {
+ return 1;
+ }
+ }
+
+ return *a != *b;
+}
+
+static size_t find_charmap(FAR const void *name)
+{
+ FAR const unsigned char *s;
+
+ if (*(FAR char *)name == 0)
+ {
+ /* "utf8" */
+
+ name = g_charmaps;
+ }
+
+ for (s = g_charmaps; *s; )
+ {
+ if (!fuzzycmp(name, s))
+ {
+ for (; *s; s += strlen((FAR void *)s) + 1);
Review Comment:
why cast to `void *`?
##########
libs/libc/locale/lib_iconv.c:
##########
@@ -0,0 +1,1423 @@
+/****************************************************************************
+ * libs/libc/locale/lib_iconv.c
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership. The
+ * ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ ****************************************************************************/
+
+/****************************************************************************
+ * Included Files
+ ****************************************************************************/
+
+#include <iconv.h>
+#include <errno.h>
+#include <wchar.h>
+#include <string.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <stdint.h>
+#include <locale.h>
+
+/****************************************************************************
+ * Pre-processor Definitions
+ ****************************************************************************/
+
+#define UTF_32BE 0300
+#define UTF_16LE 0301
+#define UTF_16BE 0302
+#define UTF_32LE 0303
+#define UCS2BE 0304
+#define UCS2LE 0305
+#define WCHAR_T 0306
+#define US_ASCII 0307
+#define UTF_8 0310
+#define UTF_16 0312
+#define UTF_32 0313
+#define UCS2 0314
+#define EUC_JP 0320
+#define SHIFT_JIS 0321
+#define ISO2022_JP 0322
+#define GB18030 0330
+#define GBK 0331
+#define GB2312 0332
+#define BIG5 0340
+#define EUC_KR 0350
+
+/****************************************************************************
+ * Private Types
+ ****************************************************************************/
+
+struct stateful_cd
+{
+ iconv_t base_cd;
+ unsigned state;
+};
+
+/****************************************************************************
+ * Private Data
+ ****************************************************************************/
+
+/* Definitions of g_charmaps. Each charmap consists of:
+ * 1. Empty-string-terminated list of null-terminated aliases.
+ * 2. Special type code or number of elided quads of entries.
+ * 3. Character table (size determined by field 2), consisting
+ * of 5 bytes for every 4 characters, interpreted as 10-bit
+ * indices into the g_legacy_chars table.
+ */
+
+static const unsigned char g_charmaps[] =
+{
+ "utf8\0char\0\0\310"
+ "wchart\0\0\306"
+ "ucs2be\0\0\304"
+ "ucs2le\0\0\305"
+ "utf16be\0\0\302"
+ "utf16le\0\0\301"
+ "ucs4be\0utf32be\0\0\300"
+ "ucs4le\0utf32le\0\0\303"
+ "ascii\0usascii\0iso646\0iso646us\0\0\307"
+ "utf16\0\0\312"
+ "ucs4\0utf32\0\0\313"
+ "ucs2\0\0\314"
+#ifdef CONFIG_LIBC_LOCALE_JAPANESE
+ "eucjp\0\0\320"
+ "shiftjis\0sjis\0\0\321"
+ "iso2022jp\0\0\322"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_CHINESE
+ "gb18030\0\0\330"
+ "gbk\0\0\331"
+ "gb2312\0\0\332"
+ "big5\0bigfive\0cp950\0big5hkscs\0\0\340"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_KOREAN
+ "euckr\0ksc5601\0ksx1001\0cp949\0\0\350"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_CODEPAGES
+ #include "codepages.h"
+#endif
+};
+
+/* Table of characters that appear in legacy 8-bit codepages,
+ * limited to 1024 slots (10 bit indices). The first 256 entries
+ * are elided since those characters are obviously all included.
+ */
+
+static const unsigned short g_legacy_chars[] =
+{
+ #include "legacychars.h"
+};
+
+#ifdef CONFIG_LIBC_LOCALE_JAPANESE
+static const unsigned short g_jis0208[84][94] =
+{
+ #include "jis0208.h"
+};
+
+static const unsigned short g_rev_jis[] =
+{
+ #include "revjis.h"
+};
+#endif
+
+#ifdef CONFIG_LIBC_LOCALE_CHINESE
+static const unsigned short g_gb18030[126][190] =
+{
+ #include "gb18030.h"
+};
+
+static const unsigned short g_big5[89][157] =
+{
+ #include "big5.h"
+};
+
+static const unsigned short g_hkscs[] =
+{
+ #include "hkscs.h"
+};
+#endif
+
+#ifdef CONFIG_LIBC_LOCALE_KOREAN
+static const unsigned short g_ksc[93][94] =
+{
+ #include "ksc.h"
+};
+#endif
+
+/****************************************************************************
+ * Private Functions
+ ****************************************************************************/
+
+static int fuzzycmp(FAR const unsigned char *a, FAR const unsigned char *b)
+{
+ for (; *a && *b; a++, b++)
+ {
+ while (*a && (*a | 32U) - 'a' > 26 && *a - '0' > 10U)
+ a++;
+
+ if ((*a | 32U) != *b)
+ {
+ return 1;
+ }
+ }
+
+ return *a != *b;
+}
+
+static size_t find_charmap(FAR const void *name)
+{
+ FAR const unsigned char *s;
+
+ if (*(FAR char *)name == 0)
+ {
+ /* "utf8" */
+
+ name = g_charmaps;
+ }
+
+ for (s = g_charmaps; *s; )
+ {
+ if (!fuzzycmp(name, s))
+ {
+ for (; *s; s += strlen((FAR void *)s) + 1);
+ return s + 1 - g_charmaps;
+ }
+
+ s += strlen((FAR void *)s)+1;
+ if (*s == 0)
+ {
+ if (s[1] > 0200)
+ {
+ s += 2;
+ }
+ else
+ {
+ s += 2 + (64U - s[1]) * 5;
+ }
+ }
+ }
+
+ return -1;
+}
+
+static iconv_t combine_to_from(size_t t, size_t f)
+{
+ return (FAR void *)(f << 16 | t << 1 | 1);
Review Comment:
why cast to `void *` and not to `iconv_t`?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@nuttx.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
[GitHub] [nuttx] pkarashchenko commented on pull request #8802: libc/locale: support iconv_open,iconv,iconv_close
Posted by "pkarashchenko (via GitHub)" <gi...@apache.org>.
pkarashchenko commented on PR #8802:
URL: https://github.com/apache/nuttx/pull/8802#issuecomment-1522058645
Please get CI pass
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@nuttx.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
[GitHub] [nuttx] Donny9 commented on pull request #8802: libc/locale: support iconv_open,iconv,iconv_close
Posted by "Donny9 (via GitHub)" <gi...@apache.org>.
Donny9 commented on PR #8802:
URL: https://github.com/apache/nuttx/pull/8802#issuecomment-1505103505
> > > my question was why you are referring this particular (seemingly old) fork of musl
> > > instead of the official one: https://git.musl-libc.org/cgit/musl
> >
> >
> > @yamt Sorry, i wrote wrong link about musl, it's https://github.com/bminor/musl, It is close to the official version.
> > djz:musl$ git remote -v origin [git@github.com](mailto:git@github.com):bminor/musl.git (fetch) origin [git@github.com](mailto:git@github.com):bminor/musl.git (push)
>
> ok. it makes sense then.
@yamt Please merge this pr, thank you~
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@nuttx.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
[GitHub] [nuttx] acassis commented on pull request #8802: libc/locale: support iconv_open,iconv,iconv_close
Posted by "acassis (via GitHub)" <gi...@apache.org>.
acassis commented on PR #8802:
URL: https://github.com/apache/nuttx/pull/8802#issuecomment-1468927957
@Donny9 is it possible to include some example application to test these locales? Maybe Chinese, Koreans and Japaneses will enjoying seeing their native language supported. Does it require some Fonts to display on NXText example or LVGL Demo?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@nuttx.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
[GitHub] [nuttx] yamt commented on pull request #8802: libc/locale: support iconv_open,iconv,iconv_close
Posted by "yamt (via GitHub)" <gi...@apache.org>.
yamt commented on PR #8802:
URL: https://github.com/apache/nuttx/pull/8802#issuecomment-1478731296
> > my question was why you are referring this particular (seemingly old) fork of musl
> > instead of the official one: https://git.musl-libc.org/cgit/musl
>
> @yamt Sorry, i wrote wrong link about musl, it's https://github.com/bminor/musl, It is close to the official version.
>
> djz:musl$ git remote -v origin [git@github.com](mailto:git@github.com):bminor/musl.git (fetch) origin [git@github.com](mailto:git@github.com):bminor/musl.git (push)
ok. it makes sense then.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@nuttx.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
[GitHub] [nuttx] yamt commented on pull request #8802: libc/locale: support iconv_open,iconv,iconv_close
Posted by "yamt (via GitHub)" <gi...@apache.org>.
yamt commented on PR #8802:
URL: https://github.com/apache/nuttx/pull/8802#issuecomment-1469046560
> Refs to:https://github.com/esmil/musl/tree/master/src/locale
do you mean you based this on this version of musl? why?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@nuttx.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
[GitHub] [nuttx] Donny9 commented on a diff in pull request #8802: libc/locale: support iconv_open,iconv,iconv_close
Posted by "Donny9 (via GitHub)" <gi...@apache.org>.
Donny9 commented on code in PR #8802:
URL: https://github.com/apache/nuttx/pull/8802#discussion_r1136562842
##########
libs/libc/locale/lib_iconv.c:
##########
@@ -0,0 +1,1423 @@
+/****************************************************************************
+ * libs/libc/locale/lib_iconv.c
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership. The
+ * ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ ****************************************************************************/
+
+/****************************************************************************
+ * Included Files
+ ****************************************************************************/
+
+#include <iconv.h>
+#include <errno.h>
+#include <wchar.h>
+#include <string.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <stdint.h>
+#include <locale.h>
+
+/****************************************************************************
+ * Pre-processor Definitions
+ ****************************************************************************/
+
+#define UTF_32BE 0300
+#define UTF_16LE 0301
+#define UTF_16BE 0302
+#define UTF_32LE 0303
+#define UCS2BE 0304
+#define UCS2LE 0305
+#define WCHAR_T 0306
+#define US_ASCII 0307
+#define UTF_8 0310
+#define UTF_16 0312
+#define UTF_32 0313
+#define UCS2 0314
+#define EUC_JP 0320
+#define SHIFT_JIS 0321
+#define ISO2022_JP 0322
+#define GB18030 0330
+#define GBK 0331
+#define GB2312 0332
+#define BIG5 0340
+#define EUC_KR 0350
+
+/****************************************************************************
+ * Private Types
+ ****************************************************************************/
+
+struct stateful_cd
+{
+ iconv_t base_cd;
+ unsigned state;
+};
+
+/****************************************************************************
+ * Private Data
+ ****************************************************************************/
+
+/* Definitions of g_charmaps. Each charmap consists of:
+ * 1. Empty-string-terminated list of null-terminated aliases.
+ * 2. Special type code or number of elided quads of entries.
+ * 3. Character table (size determined by field 2), consisting
+ * of 5 bytes for every 4 characters, interpreted as 10-bit
+ * indices into the g_legacy_chars table.
+ */
+
+static const unsigned char g_charmaps[] =
+{
+ "utf8\0char\0\0\310"
+ "wchart\0\0\306"
+ "ucs2be\0\0\304"
+ "ucs2le\0\0\305"
+ "utf16be\0\0\302"
+ "utf16le\0\0\301"
+ "ucs4be\0utf32be\0\0\300"
+ "ucs4le\0utf32le\0\0\303"
+ "ascii\0usascii\0iso646\0iso646us\0\0\307"
+ "utf16\0\0\312"
+ "ucs4\0utf32\0\0\313"
+ "ucs2\0\0\314"
+#ifdef CONFIG_LIBC_LOCALE_JAPANESE
+ "eucjp\0\0\320"
+ "shiftjis\0sjis\0\0\321"
+ "iso2022jp\0\0\322"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_CHINESE
+ "gb18030\0\0\330"
+ "gbk\0\0\331"
+ "gb2312\0\0\332"
+ "big5\0bigfive\0cp950\0big5hkscs\0\0\340"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_KOREAN
+ "euckr\0ksc5601\0ksx1001\0cp949\0\0\350"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_CODEPAGES
+ #include "codepages.h"
+#endif
+};
+
+/* Table of characters that appear in legacy 8-bit codepages,
+ * limited to 1024 slots (10 bit indices). The first 256 entries
+ * are elided since those characters are obviously all included.
+ */
+
+static const unsigned short g_legacy_chars[] =
+{
+ #include "legacychars.h"
+};
+
+#ifdef CONFIG_LIBC_LOCALE_JAPANESE
+static const unsigned short g_jis0208[84][94] =
+{
+ #include "jis0208.h"
+};
+
+static const unsigned short g_rev_jis[] =
+{
+ #include "revjis.h"
+};
+#endif
+
+#ifdef CONFIG_LIBC_LOCALE_CHINESE
+static const unsigned short g_gb18030[126][190] =
+{
+ #include "gb18030.h"
+};
+
+static const unsigned short g_big5[89][157] =
+{
+ #include "big5.h"
+};
+
+static const unsigned short g_hkscs[] =
+{
+ #include "hkscs.h"
+};
+#endif
+
+#ifdef CONFIG_LIBC_LOCALE_KOREAN
+static const unsigned short g_ksc[93][94] =
+{
+ #include "ksc.h"
+};
+#endif
+
+/****************************************************************************
+ * Private Functions
+ ****************************************************************************/
+
+static int fuzzycmp(FAR const unsigned char *a, FAR const unsigned char *b)
+{
+ for (; *a && *b; a++, b++)
+ {
+ while (*a && (*a | 32U) - 'a' > 26 && *a - '0' > 10U)
+ a++;
+
+ if ((*a | 32U) != *b)
+ {
+ return 1;
+ }
+ }
+
+ return *a != *b;
+}
+
+static size_t find_charmap(FAR const void *name)
+{
+ FAR const unsigned char *s;
+
+ if (*(FAR char *)name == 0)
+ {
+ /* "utf8" */
+
+ name = g_charmaps;
+ }
+
+ for (s = g_charmaps; *s; )
+ {
+ if (!fuzzycmp(name, s))
+ {
+ for (; *s; s += strlen((FAR void *)s) + 1);
+ return s + 1 - g_charmaps;
+ }
+
+ s += strlen((FAR void *)s)+1;
+ if (*s == 0)
+ {
+ if (s[1] > 0200)
+ {
+ s += 2;
+ }
+ else
+ {
+ s += 2 + (64U - s[1]) * 5;
+ }
+ }
+ }
+
+ return -1;
+}
+
+static iconv_t combine_to_from(size_t t, size_t f)
+{
+ return (FAR void *)(f << 16 | t << 1 | 1);
+}
+
+static size_t extract_from(iconv_t cd)
+{
+ return (size_t)cd >> 16;
+}
+
+static size_t extract_to(iconv_t cd)
+{
+ return (size_t)cd >> 1 & 0x7fff;
+}
+
+static unsigned get_16(FAR const unsigned char *s, int e)
+{
+ e &= 1;
+ return s[e] << 8 | s[1 - e];
+}
+
+static void put_16(FAR unsigned char *s, unsigned c, int e)
+{
+ e &= 1;
+ s[e] = c >> 8;
+ s[1 - e] = c;
+}
+
+static unsigned get_32(FAR const unsigned char *s, int e)
+{
+ e &= 3;
+ return (s[e] + 0U) << 24 | s[e ^ 1] << 16 | s[e ^ 2] << 8 | s[e ^ 3];
+}
+
+static void put_32(FAR unsigned char *s, unsigned c, int e)
+{
+ e &= 3;
+ s[e ^ 0] = c >> 24;
+ s[e ^ 1] = c >> 16;
+ s[e ^ 2] = c >> 8;
+ s[e ^ 3] = c;
+}
+
+static unsigned legacy_map(const unsigned char *map, unsigned c)
+{
+ unsigned x;
+
+ if (c < 4 * map[0 - 1])
+ {
+ return c;
+ }
+
+ x = c - 4 * map[0 - 1];
+ x = (map[x * 5 / 4] >> (2 * x % 8)) |
+ ((map[x * 5 / 4 + 1] << (8 - 2 * x % 8)) & 1023);
+ return x < 256 ? x : g_legacy_chars[x - 256];
+}
+
+#ifdef CONFIG_LIBC_LOCALE_JAPANESE
+static unsigned uni_to_jis(unsigned c)
+{
+ unsigned nel = sizeof(g_rev_jis) / sizeof(*g_rev_jis);
Review Comment:
Done!
##########
libs/libc/locale/lib_iconv.c:
##########
@@ -0,0 +1,1423 @@
+/****************************************************************************
+ * libs/libc/locale/lib_iconv.c
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership. The
+ * ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ ****************************************************************************/
+
+/****************************************************************************
+ * Included Files
+ ****************************************************************************/
+
+#include <iconv.h>
+#include <errno.h>
+#include <wchar.h>
+#include <string.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <stdint.h>
+#include <locale.h>
+
+/****************************************************************************
+ * Pre-processor Definitions
+ ****************************************************************************/
+
+#define UTF_32BE 0300
+#define UTF_16LE 0301
+#define UTF_16BE 0302
+#define UTF_32LE 0303
+#define UCS2BE 0304
+#define UCS2LE 0305
+#define WCHAR_T 0306
+#define US_ASCII 0307
+#define UTF_8 0310
+#define UTF_16 0312
+#define UTF_32 0313
+#define UCS2 0314
+#define EUC_JP 0320
+#define SHIFT_JIS 0321
+#define ISO2022_JP 0322
+#define GB18030 0330
+#define GBK 0331
+#define GB2312 0332
+#define BIG5 0340
+#define EUC_KR 0350
+
+/****************************************************************************
+ * Private Types
+ ****************************************************************************/
+
+struct stateful_cd
+{
+ iconv_t base_cd;
+ unsigned state;
+};
+
+/****************************************************************************
+ * Private Data
+ ****************************************************************************/
+
+/* Definitions of g_charmaps. Each charmap consists of:
+ * 1. Empty-string-terminated list of null-terminated aliases.
+ * 2. Special type code or number of elided quads of entries.
+ * 3. Character table (size determined by field 2), consisting
+ * of 5 bytes for every 4 characters, interpreted as 10-bit
+ * indices into the g_legacy_chars table.
+ */
+
+static const unsigned char g_charmaps[] =
+{
+ "utf8\0char\0\0\310"
+ "wchart\0\0\306"
+ "ucs2be\0\0\304"
+ "ucs2le\0\0\305"
+ "utf16be\0\0\302"
+ "utf16le\0\0\301"
+ "ucs4be\0utf32be\0\0\300"
+ "ucs4le\0utf32le\0\0\303"
+ "ascii\0usascii\0iso646\0iso646us\0\0\307"
+ "utf16\0\0\312"
+ "ucs4\0utf32\0\0\313"
+ "ucs2\0\0\314"
+#ifdef CONFIG_LIBC_LOCALE_JAPANESE
+ "eucjp\0\0\320"
+ "shiftjis\0sjis\0\0\321"
+ "iso2022jp\0\0\322"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_CHINESE
+ "gb18030\0\0\330"
+ "gbk\0\0\331"
+ "gb2312\0\0\332"
+ "big5\0bigfive\0cp950\0big5hkscs\0\0\340"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_KOREAN
+ "euckr\0ksc5601\0ksx1001\0cp949\0\0\350"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_CODEPAGES
+ #include "codepages.h"
+#endif
+};
+
+/* Table of characters that appear in legacy 8-bit codepages,
+ * limited to 1024 slots (10 bit indices). The first 256 entries
+ * are elided since those characters are obviously all included.
+ */
+
+static const unsigned short g_legacy_chars[] =
+{
+ #include "legacychars.h"
+};
+
+#ifdef CONFIG_LIBC_LOCALE_JAPANESE
+static const unsigned short g_jis0208[84][94] =
+{
+ #include "jis0208.h"
+};
+
+static const unsigned short g_rev_jis[] =
+{
+ #include "revjis.h"
+};
+#endif
+
+#ifdef CONFIG_LIBC_LOCALE_CHINESE
+static const unsigned short g_gb18030[126][190] =
+{
+ #include "gb18030.h"
+};
+
+static const unsigned short g_big5[89][157] =
+{
+ #include "big5.h"
+};
+
+static const unsigned short g_hkscs[] =
+{
+ #include "hkscs.h"
+};
+#endif
+
+#ifdef CONFIG_LIBC_LOCALE_KOREAN
+static const unsigned short g_ksc[93][94] =
+{
+ #include "ksc.h"
+};
+#endif
+
+/****************************************************************************
+ * Private Functions
+ ****************************************************************************/
+
+static int fuzzycmp(FAR const unsigned char *a, FAR const unsigned char *b)
+{
+ for (; *a && *b; a++, b++)
+ {
+ while (*a && (*a | 32U) - 'a' > 26 && *a - '0' > 10U)
+ a++;
+
+ if ((*a | 32U) != *b)
+ {
+ return 1;
+ }
+ }
+
+ return *a != *b;
+}
+
+static size_t find_charmap(FAR const void *name)
+{
+ FAR const unsigned char *s;
+
+ if (*(FAR char *)name == 0)
+ {
+ /* "utf8" */
+
+ name = g_charmaps;
+ }
+
+ for (s = g_charmaps; *s; )
+ {
+ if (!fuzzycmp(name, s))
+ {
+ for (; *s; s += strlen((FAR void *)s) + 1);
+ return s + 1 - g_charmaps;
+ }
+
+ s += strlen((FAR void *)s)+1;
+ if (*s == 0)
+ {
+ if (s[1] > 0200)
+ {
+ s += 2;
+ }
+ else
+ {
+ s += 2 + (64U - s[1]) * 5;
+ }
+ }
+ }
+
+ return -1;
+}
+
+static iconv_t combine_to_from(size_t t, size_t f)
+{
+ return (FAR void *)(f << 16 | t << 1 | 1);
Review Comment:
Done!
##########
libs/libc/locale/lib_iconv.c:
##########
@@ -0,0 +1,1423 @@
+/****************************************************************************
+ * libs/libc/locale/lib_iconv.c
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership. The
+ * ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ ****************************************************************************/
+
+/****************************************************************************
+ * Included Files
+ ****************************************************************************/
+
+#include <iconv.h>
+#include <errno.h>
+#include <wchar.h>
+#include <string.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <stdint.h>
+#include <locale.h>
+
+/****************************************************************************
+ * Pre-processor Definitions
+ ****************************************************************************/
+
+#define UTF_32BE 0300
+#define UTF_16LE 0301
+#define UTF_16BE 0302
+#define UTF_32LE 0303
+#define UCS2BE 0304
+#define UCS2LE 0305
+#define WCHAR_T 0306
+#define US_ASCII 0307
+#define UTF_8 0310
+#define UTF_16 0312
+#define UTF_32 0313
+#define UCS2 0314
+#define EUC_JP 0320
+#define SHIFT_JIS 0321
+#define ISO2022_JP 0322
+#define GB18030 0330
+#define GBK 0331
+#define GB2312 0332
+#define BIG5 0340
+#define EUC_KR 0350
+
+/****************************************************************************
+ * Private Types
+ ****************************************************************************/
+
+struct stateful_cd
+{
+ iconv_t base_cd;
+ unsigned state;
+};
+
+/****************************************************************************
+ * Private Data
+ ****************************************************************************/
+
+/* Definitions of g_charmaps. Each charmap consists of:
+ * 1. Empty-string-terminated list of null-terminated aliases.
+ * 2. Special type code or number of elided quads of entries.
+ * 3. Character table (size determined by field 2), consisting
+ * of 5 bytes for every 4 characters, interpreted as 10-bit
+ * indices into the g_legacy_chars table.
+ */
+
+static const unsigned char g_charmaps[] =
+{
+ "utf8\0char\0\0\310"
+ "wchart\0\0\306"
+ "ucs2be\0\0\304"
+ "ucs2le\0\0\305"
+ "utf16be\0\0\302"
+ "utf16le\0\0\301"
+ "ucs4be\0utf32be\0\0\300"
+ "ucs4le\0utf32le\0\0\303"
+ "ascii\0usascii\0iso646\0iso646us\0\0\307"
+ "utf16\0\0\312"
+ "ucs4\0utf32\0\0\313"
+ "ucs2\0\0\314"
+#ifdef CONFIG_LIBC_LOCALE_JAPANESE
+ "eucjp\0\0\320"
+ "shiftjis\0sjis\0\0\321"
+ "iso2022jp\0\0\322"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_CHINESE
+ "gb18030\0\0\330"
+ "gbk\0\0\331"
+ "gb2312\0\0\332"
+ "big5\0bigfive\0cp950\0big5hkscs\0\0\340"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_KOREAN
+ "euckr\0ksc5601\0ksx1001\0cp949\0\0\350"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_CODEPAGES
+ #include "codepages.h"
+#endif
+};
+
+/* Table of characters that appear in legacy 8-bit codepages,
+ * limited to 1024 slots (10 bit indices). The first 256 entries
+ * are elided since those characters are obviously all included.
+ */
+
+static const unsigned short g_legacy_chars[] =
+{
+ #include "legacychars.h"
+};
+
+#ifdef CONFIG_LIBC_LOCALE_JAPANESE
+static const unsigned short g_jis0208[84][94] =
+{
+ #include "jis0208.h"
+};
+
+static const unsigned short g_rev_jis[] =
+{
+ #include "revjis.h"
+};
+#endif
+
+#ifdef CONFIG_LIBC_LOCALE_CHINESE
+static const unsigned short g_gb18030[126][190] =
+{
+ #include "gb18030.h"
+};
+
+static const unsigned short g_big5[89][157] =
+{
+ #include "big5.h"
+};
+
+static const unsigned short g_hkscs[] =
+{
+ #include "hkscs.h"
+};
+#endif
+
+#ifdef CONFIG_LIBC_LOCALE_KOREAN
+static const unsigned short g_ksc[93][94] =
+{
+ #include "ksc.h"
+};
+#endif
+
+/****************************************************************************
+ * Private Functions
+ ****************************************************************************/
+
+static int fuzzycmp(FAR const unsigned char *a, FAR const unsigned char *b)
+{
+ for (; *a && *b; a++, b++)
+ {
+ while (*a && (*a | 32U) - 'a' > 26 && *a - '0' > 10U)
+ a++;
+
+ if ((*a | 32U) != *b)
+ {
+ return 1;
+ }
+ }
+
+ return *a != *b;
+}
+
+static size_t find_charmap(FAR const void *name)
+{
+ FAR const unsigned char *s;
+
+ if (*(FAR char *)name == 0)
+ {
+ /* "utf8" */
+
+ name = g_charmaps;
+ }
+
+ for (s = g_charmaps; *s; )
+ {
+ if (!fuzzycmp(name, s))
+ {
+ for (; *s; s += strlen((FAR void *)s) + 1);
+ return s + 1 - g_charmaps;
+ }
+
+ s += strlen((FAR void *)s)+1;
Review Comment:
Done!
##########
libs/libc/locale/lib_iconv.c:
##########
@@ -0,0 +1,1423 @@
+/****************************************************************************
+ * libs/libc/locale/lib_iconv.c
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership. The
+ * ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ ****************************************************************************/
+
+/****************************************************************************
+ * Included Files
+ ****************************************************************************/
+
+#include <iconv.h>
+#include <errno.h>
+#include <wchar.h>
+#include <string.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <stdint.h>
+#include <locale.h>
+
+/****************************************************************************
+ * Pre-processor Definitions
+ ****************************************************************************/
+
+#define UTF_32BE 0300
+#define UTF_16LE 0301
+#define UTF_16BE 0302
+#define UTF_32LE 0303
+#define UCS2BE 0304
+#define UCS2LE 0305
+#define WCHAR_T 0306
+#define US_ASCII 0307
+#define UTF_8 0310
+#define UTF_16 0312
+#define UTF_32 0313
+#define UCS2 0314
+#define EUC_JP 0320
+#define SHIFT_JIS 0321
+#define ISO2022_JP 0322
+#define GB18030 0330
+#define GBK 0331
+#define GB2312 0332
+#define BIG5 0340
+#define EUC_KR 0350
+
+/****************************************************************************
+ * Private Types
+ ****************************************************************************/
+
+struct stateful_cd
+{
+ iconv_t base_cd;
+ unsigned state;
+};
+
+/****************************************************************************
+ * Private Data
+ ****************************************************************************/
+
+/* Definitions of g_charmaps. Each charmap consists of:
+ * 1. Empty-string-terminated list of null-terminated aliases.
+ * 2. Special type code or number of elided quads of entries.
+ * 3. Character table (size determined by field 2), consisting
+ * of 5 bytes for every 4 characters, interpreted as 10-bit
+ * indices into the g_legacy_chars table.
+ */
+
+static const unsigned char g_charmaps[] =
+{
+ "utf8\0char\0\0\310"
+ "wchart\0\0\306"
+ "ucs2be\0\0\304"
+ "ucs2le\0\0\305"
+ "utf16be\0\0\302"
+ "utf16le\0\0\301"
+ "ucs4be\0utf32be\0\0\300"
+ "ucs4le\0utf32le\0\0\303"
+ "ascii\0usascii\0iso646\0iso646us\0\0\307"
+ "utf16\0\0\312"
+ "ucs4\0utf32\0\0\313"
+ "ucs2\0\0\314"
+#ifdef CONFIG_LIBC_LOCALE_JAPANESE
+ "eucjp\0\0\320"
+ "shiftjis\0sjis\0\0\321"
+ "iso2022jp\0\0\322"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_CHINESE
+ "gb18030\0\0\330"
+ "gbk\0\0\331"
+ "gb2312\0\0\332"
+ "big5\0bigfive\0cp950\0big5hkscs\0\0\340"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_KOREAN
+ "euckr\0ksc5601\0ksx1001\0cp949\0\0\350"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_CODEPAGES
+ #include "codepages.h"
+#endif
+};
+
+/* Table of characters that appear in legacy 8-bit codepages,
+ * limited to 1024 slots (10 bit indices). The first 256 entries
+ * are elided since those characters are obviously all included.
+ */
+
+static const unsigned short g_legacy_chars[] =
+{
+ #include "legacychars.h"
+};
+
+#ifdef CONFIG_LIBC_LOCALE_JAPANESE
+static const unsigned short g_jis0208[84][94] =
+{
+ #include "jis0208.h"
+};
+
+static const unsigned short g_rev_jis[] =
+{
+ #include "revjis.h"
+};
+#endif
+
+#ifdef CONFIG_LIBC_LOCALE_CHINESE
+static const unsigned short g_gb18030[126][190] =
+{
+ #include "gb18030.h"
+};
+
+static const unsigned short g_big5[89][157] =
+{
+ #include "big5.h"
+};
+
+static const unsigned short g_hkscs[] =
+{
+ #include "hkscs.h"
+};
+#endif
+
+#ifdef CONFIG_LIBC_LOCALE_KOREAN
+static const unsigned short g_ksc[93][94] =
+{
+ #include "ksc.h"
+};
+#endif
+
+/****************************************************************************
+ * Private Functions
+ ****************************************************************************/
+
+static int fuzzycmp(FAR const unsigned char *a, FAR const unsigned char *b)
+{
+ for (; *a && *b; a++, b++)
+ {
+ while (*a && (*a | 32U) - 'a' > 26 && *a - '0' > 10U)
+ a++;
+
+ if ((*a | 32U) != *b)
+ {
+ return 1;
+ }
+ }
+
+ return *a != *b;
+}
+
+static size_t find_charmap(FAR const void *name)
+{
+ FAR const unsigned char *s;
+
+ if (*(FAR char *)name == 0)
+ {
+ /* "utf8" */
+
+ name = g_charmaps;
+ }
+
+ for (s = g_charmaps; *s; )
+ {
+ if (!fuzzycmp(name, s))
+ {
+ for (; *s; s += strlen((FAR void *)s) + 1);
+ return s + 1 - g_charmaps;
+ }
+
+ s += strlen((FAR void *)s)+1;
+ if (*s == 0)
Review Comment:
Done!
##########
libs/libc/locale/lib_iconv.c:
##########
@@ -0,0 +1,1423 @@
+/****************************************************************************
+ * libs/libc/locale/lib_iconv.c
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership. The
+ * ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ ****************************************************************************/
+
+/****************************************************************************
+ * Included Files
+ ****************************************************************************/
+
+#include <iconv.h>
+#include <errno.h>
+#include <wchar.h>
+#include <string.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <stdint.h>
+#include <locale.h>
+
+/****************************************************************************
+ * Pre-processor Definitions
+ ****************************************************************************/
+
+#define UTF_32BE 0300
+#define UTF_16LE 0301
+#define UTF_16BE 0302
+#define UTF_32LE 0303
+#define UCS2BE 0304
+#define UCS2LE 0305
+#define WCHAR_T 0306
+#define US_ASCII 0307
+#define UTF_8 0310
+#define UTF_16 0312
+#define UTF_32 0313
+#define UCS2 0314
+#define EUC_JP 0320
+#define SHIFT_JIS 0321
+#define ISO2022_JP 0322
+#define GB18030 0330
+#define GBK 0331
+#define GB2312 0332
+#define BIG5 0340
+#define EUC_KR 0350
+
+/****************************************************************************
+ * Private Types
+ ****************************************************************************/
+
+struct stateful_cd
+{
+ iconv_t base_cd;
+ unsigned state;
+};
+
+/****************************************************************************
+ * Private Data
+ ****************************************************************************/
+
+/* Definitions of g_charmaps. Each charmap consists of:
+ * 1. Empty-string-terminated list of null-terminated aliases.
+ * 2. Special type code or number of elided quads of entries.
+ * 3. Character table (size determined by field 2), consisting
+ * of 5 bytes for every 4 characters, interpreted as 10-bit
+ * indices into the g_legacy_chars table.
+ */
+
+static const unsigned char g_charmaps[] =
+{
+ "utf8\0char\0\0\310"
+ "wchart\0\0\306"
+ "ucs2be\0\0\304"
+ "ucs2le\0\0\305"
+ "utf16be\0\0\302"
+ "utf16le\0\0\301"
+ "ucs4be\0utf32be\0\0\300"
+ "ucs4le\0utf32le\0\0\303"
+ "ascii\0usascii\0iso646\0iso646us\0\0\307"
+ "utf16\0\0\312"
+ "ucs4\0utf32\0\0\313"
+ "ucs2\0\0\314"
+#ifdef CONFIG_LIBC_LOCALE_JAPANESE
+ "eucjp\0\0\320"
+ "shiftjis\0sjis\0\0\321"
+ "iso2022jp\0\0\322"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_CHINESE
+ "gb18030\0\0\330"
+ "gbk\0\0\331"
+ "gb2312\0\0\332"
+ "big5\0bigfive\0cp950\0big5hkscs\0\0\340"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_KOREAN
+ "euckr\0ksc5601\0ksx1001\0cp949\0\0\350"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_CODEPAGES
+ #include "codepages.h"
+#endif
+};
+
+/* Table of characters that appear in legacy 8-bit codepages,
+ * limited to 1024 slots (10 bit indices). The first 256 entries
+ * are elided since those characters are obviously all included.
+ */
+
+static const unsigned short g_legacy_chars[] =
+{
+ #include "legacychars.h"
+};
+
+#ifdef CONFIG_LIBC_LOCALE_JAPANESE
+static const unsigned short g_jis0208[84][94] =
+{
+ #include "jis0208.h"
+};
+
+static const unsigned short g_rev_jis[] =
+{
+ #include "revjis.h"
+};
+#endif
+
+#ifdef CONFIG_LIBC_LOCALE_CHINESE
+static const unsigned short g_gb18030[126][190] =
+{
+ #include "gb18030.h"
+};
+
+static const unsigned short g_big5[89][157] =
+{
+ #include "big5.h"
+};
+
+static const unsigned short g_hkscs[] =
+{
+ #include "hkscs.h"
+};
+#endif
+
+#ifdef CONFIG_LIBC_LOCALE_KOREAN
+static const unsigned short g_ksc[93][94] =
+{
+ #include "ksc.h"
+};
+#endif
+
+/****************************************************************************
+ * Private Functions
+ ****************************************************************************/
+
+static int fuzzycmp(FAR const unsigned char *a, FAR const unsigned char *b)
+{
+ for (; *a && *b; a++, b++)
+ {
+ while (*a && (*a | 32U) - 'a' > 26 && *a - '0' > 10U)
+ a++;
+
+ if ((*a | 32U) != *b)
+ {
+ return 1;
+ }
+ }
+
+ return *a != *b;
+}
+
+static size_t find_charmap(FAR const void *name)
+{
+ FAR const unsigned char *s;
+
+ if (*(FAR char *)name == 0)
+ {
+ /* "utf8" */
+
+ name = g_charmaps;
+ }
+
+ for (s = g_charmaps; *s; )
+ {
+ if (!fuzzycmp(name, s))
+ {
+ for (; *s; s += strlen((FAR void *)s) + 1);
Review Comment:
Done!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@nuttx.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
[GitHub] [nuttx] Donny9 commented on a diff in pull request #8802: libc/locale: support iconv_open,iconv,iconv_close
Posted by "Donny9 (via GitHub)" <gi...@apache.org>.
Donny9 commented on code in PR #8802:
URL: https://github.com/apache/nuttx/pull/8802#discussion_r1136562961
##########
libs/libc/locale/lib_iconv.c:
##########
@@ -0,0 +1,1423 @@
+/****************************************************************************
+ * libs/libc/locale/lib_iconv.c
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership. The
+ * ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the
+ * License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ ****************************************************************************/
+
+/****************************************************************************
+ * Included Files
+ ****************************************************************************/
+
+#include <iconv.h>
+#include <errno.h>
+#include <wchar.h>
+#include <string.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <stdint.h>
+#include <locale.h>
+
+/****************************************************************************
+ * Pre-processor Definitions
+ ****************************************************************************/
+
+#define UTF_32BE 0300
+#define UTF_16LE 0301
+#define UTF_16BE 0302
+#define UTF_32LE 0303
+#define UCS2BE 0304
+#define UCS2LE 0305
+#define WCHAR_T 0306
+#define US_ASCII 0307
+#define UTF_8 0310
+#define UTF_16 0312
+#define UTF_32 0313
+#define UCS2 0314
+#define EUC_JP 0320
+#define SHIFT_JIS 0321
+#define ISO2022_JP 0322
+#define GB18030 0330
+#define GBK 0331
+#define GB2312 0332
+#define BIG5 0340
+#define EUC_KR 0350
+
+/****************************************************************************
+ * Private Types
+ ****************************************************************************/
+
+struct stateful_cd
+{
+ iconv_t base_cd;
+ unsigned state;
+};
+
+/****************************************************************************
+ * Private Data
+ ****************************************************************************/
+
+/* Definitions of g_charmaps. Each charmap consists of:
+ * 1. Empty-string-terminated list of null-terminated aliases.
+ * 2. Special type code or number of elided quads of entries.
+ * 3. Character table (size determined by field 2), consisting
+ * of 5 bytes for every 4 characters, interpreted as 10-bit
+ * indices into the g_legacy_chars table.
+ */
+
+static const unsigned char g_charmaps[] =
+{
+ "utf8\0char\0\0\310"
+ "wchart\0\0\306"
+ "ucs2be\0\0\304"
+ "ucs2le\0\0\305"
+ "utf16be\0\0\302"
+ "utf16le\0\0\301"
+ "ucs4be\0utf32be\0\0\300"
+ "ucs4le\0utf32le\0\0\303"
+ "ascii\0usascii\0iso646\0iso646us\0\0\307"
+ "utf16\0\0\312"
+ "ucs4\0utf32\0\0\313"
+ "ucs2\0\0\314"
+#ifdef CONFIG_LIBC_LOCALE_JAPANESE
+ "eucjp\0\0\320"
+ "shiftjis\0sjis\0\0\321"
+ "iso2022jp\0\0\322"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_CHINESE
+ "gb18030\0\0\330"
+ "gbk\0\0\331"
+ "gb2312\0\0\332"
+ "big5\0bigfive\0cp950\0big5hkscs\0\0\340"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_KOREAN
+ "euckr\0ksc5601\0ksx1001\0cp949\0\0\350"
+#endif
+#ifdef CONFIG_LIBC_LOCALE_CODEPAGES
+ #include "codepages.h"
+#endif
+};
+
+/* Table of characters that appear in legacy 8-bit codepages,
+ * limited to 1024 slots (10 bit indices). The first 256 entries
+ * are elided since those characters are obviously all included.
+ */
+
+static const unsigned short g_legacy_chars[] =
+{
+ #include "legacychars.h"
+};
+
+#ifdef CONFIG_LIBC_LOCALE_JAPANESE
+static const unsigned short g_jis0208[84][94] =
+{
+ #include "jis0208.h"
+};
+
+static const unsigned short g_rev_jis[] =
+{
+ #include "revjis.h"
+};
+#endif
+
+#ifdef CONFIG_LIBC_LOCALE_CHINESE
+static const unsigned short g_gb18030[126][190] =
+{
+ #include "gb18030.h"
+};
+
+static const unsigned short g_big5[89][157] =
+{
+ #include "big5.h"
+};
+
+static const unsigned short g_hkscs[] =
+{
+ #include "hkscs.h"
+};
+#endif
+
+#ifdef CONFIG_LIBC_LOCALE_KOREAN
+static const unsigned short g_ksc[93][94] =
+{
+ #include "ksc.h"
+};
+#endif
+
+/****************************************************************************
+ * Private Functions
+ ****************************************************************************/
+
+static int fuzzycmp(FAR const unsigned char *a, FAR const unsigned char *b)
+{
+ for (; *a && *b; a++, b++)
+ {
+ while (*a && (*a | 32U) - 'a' > 26 && *a - '0' > 10U)
+ a++;
+
+ if ((*a | 32U) != *b)
+ {
+ return 1;
+ }
+ }
+
+ return *a != *b;
+}
+
+static size_t find_charmap(FAR const void *name)
+{
+ FAR const unsigned char *s;
+
+ if (*(FAR char *)name == 0)
Review Comment:
Done!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@nuttx.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org