You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nuttx.apache.org by ac...@apache.org on 2020/07/31 21:01:14 UTC
[incubator-nuttx] branch master updated: FAT Filesystem: UTF8
support for long filenames,
bugfixes. New CONFIG_FAT_LFN_UTF8: UTF8 strings are converted to UCS2-LFN
Bugfix in fat_createalias: space is now also converted to underbar. Change
(bugfix) in fat_getlfname: init characters (0xff) and '\0' are rewound as
well.
This is an automated email from the ASF dual-hosted git repository.
acassis pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nuttx.git
The following commit(s) were added to refs/heads/master by this push:
new 070f4ed FAT Filesystem: UTF8 support for long filenames, bugfixes. New CONFIG_FAT_LFN_UTF8: UTF8 strings are converted to UCS2-LFN Bugfix in fat_createalias: space is now also converted to underbar. Change (bugfix) in fat_getlfname: init characters (0xff) and '\0' are rewound as well.
070f4ed is described below
commit 070f4ed7e9f7b94c442822e4bfefdf633ebc1868
Author: Johannes Schock <jo...@nivus.com>
AuthorDate: Sun Jul 26 12:52:22 2020 +0200
FAT Filesystem: UTF8 support for long filenames, bugfixes.
New CONFIG_FAT_LFN_UTF8: UTF8 strings are converted to UCS2-LFN
Bugfix in fat_createalias: space is now also converted to underbar.
Change (bugfix) in fat_getlfname: init characters (0xff) and '\0' are rewound as well.
---
fs/fat/Kconfig | 7 ++
fs/fat/fs_fat32.h | 10 +-
fs/fat/fs_fat32dirent.c | 277 ++++++++++++++++++++++++++++++++++++++++++------
3 files changed, 260 insertions(+), 34 deletions(-)
diff --git a/fs/fat/Kconfig b/fs/fat/Kconfig
index fd3bd46..d855767 100644
--- a/fs/fat/Kconfig
+++ b/fs/fat/Kconfig
@@ -58,6 +58,13 @@ config FAT_LFN_ALIAS_HASH
filename. This method is similar to what is used by Windows 2000 and
later.
+config FAT_LFN_UTF8
+ bool "Allow UTF8 long filenames"
+ depends on FAT_LFN
+ default n
+ ---help---
+ UTF8 long filenames are accepted and converted to UCS2.
+
config FAT_LFN_ALIAS_TRAILCHARS
int "Number of trailing characters to use for 8.3 alias"
depends on FAT_LFN
diff --git a/fs/fat/fs_fat32.h b/fs/fat/fs_fat32.h
index 1a916b9..c88179d 100644
--- a/fs/fat/fs_fat32.h
+++ b/fs/fat/fs_fat32.h
@@ -943,6 +943,14 @@ struct fat_dirseq_s
#endif
};
+#ifdef CONFIG_FAT_LFN
+# ifdef CONFIG_FAT_LFN_UTF8
+typedef wchar_t lfnchar;
+# else
+typedef uint8_t lfnchar;
+# endif
+#endif
+
/* This structure is used internally for describing directory entries */
struct fat_dirinfo_s
@@ -950,7 +958,7 @@ struct fat_dirinfo_s
/* The file/directory name */
#ifdef CONFIG_FAT_LFN
- uint8_t fd_lfname[LDIR_MAXFNAME + 1]; /* Long filename with terminator */
+ lfnchar fd_lfname[LDIR_MAXFNAME + 1]; /* Long filename with terminator */
#endif
uint8_t fd_name[DIR_MAXFNAME]; /* Short 8.3 alias filename (no terminator) */
diff --git a/fs/fat/fs_fat32dirent.c b/fs/fat/fs_fat32dirent.c
index 8f7f7c5..bf85f80 100644
--- a/fs/fat/fs_fat32dirent.c
+++ b/fs/fat/fs_fat32dirent.c
@@ -108,10 +108,10 @@ static int fat_path2dirname(FAR const char **path,
static int fat_findsfnentry(FAR struct fat_mountpt_s *fs,
FAR struct fat_dirinfo_s *dirinfo);
#ifdef CONFIG_FAT_LFN
-static bool fat_cmplfnchunk(FAR uint8_t *chunk, FAR const uint8_t *substr,
+static bool fat_cmplfnchunk(FAR uint8_t *chunk, FAR const lfnchar *substr,
int nchunk);
static bool fat_cmplfname(FAR const uint8_t *direntry,
- FAR const uint8_t *substr);
+ FAR const lfnchar *substr);
static inline int fat_findlfnentry(FAR struct fat_mountpt_s *fs,
FAR struct fat_dirinfo_s *dirinfo);
@@ -125,7 +125,7 @@ static inline int fat_allocatelfnentry(FAR struct fat_mountpt_s *fs,
static inline int fat_getsfname(FAR uint8_t *direntry, FAR char *buffer,
unsigned int buflen);
#ifdef CONFIG_FAT_LFN
-static void fat_getlfnchunk(FAR uint8_t *chunk, FAR uint8_t *dest,
+static void fat_getlfnchunk(FAR uint8_t *chunk, FAR lfnchar *dest,
int nchunk);
static inline int fat_getlfname(FAR struct fat_mountpt_s *fs,
FAR struct fs_dirent_s *dir);
@@ -134,7 +134,7 @@ static int fat_putsfname(FAR struct fat_mountpt_s *fs,
FAR struct fat_dirinfo_s *dirinfo);
#ifdef CONFIG_FAT_LFN
static void fat_initlfname(FAR uint8_t *chunk, int nchunk);
-static void fat_putlfnchunk(FAR uint8_t *chunk, FAR const uint8_t *src,
+static void fat_putlfnchunk(FAR uint8_t *chunk, FAR const lfnchar *src,
int nchunk);
static int fat_putlfname(FAR struct fat_mountpt_s *fs,
FAR struct fat_dirinfo_s *dirinfo);
@@ -143,11 +143,103 @@ static int fat_putsfdirentry(FAR struct fat_mountpt_s *fs,
FAR struct fat_dirinfo_s *dirinfo,
uint8_t attributes, uint32_t fattime);
+#if defined(CONFIG_FAT_LFN) && defined(CONFIG_FAT_LFN_UTF8)
+static int fat_utf8toucs(FAR const char **str, lfnchar *ucs);
+static int fat_ucstoutf8(FAR uint8_t *dest, uint8_t offset, lfnchar ucs);
+#endif
+
/****************************************************************************
* Private Functions
****************************************************************************/
/****************************************************************************
+ * Name: fat_utf8toucs
+ *
+ * Description:
+ * Convert the next characters from UTF8 to UCS2.
+ *
+ ****************************************************************************/
+#if defined(CONFIG_FAT_LFN) && defined(CONFIG_FAT_LFN_UTF8)
+static int fat_utf8toucs(FAR const char **str, lfnchar *ucs)
+{
+ uint8_t chr;
+ lfnchar tucs;
+ int ret = ERROR;
+
+ *ucs = '\0';
+ chr = *((*str)++);
+
+ if ((chr & 0x80) == 0x00)
+ {
+ tucs = (lfnchar)chr;
+ ret = OK;
+ }
+ else if ((chr & 0xe0) == 0xc0)
+ {
+ tucs = ((lfnchar)(chr & ~0xe0)) << 6;
+ chr = *((*str)++);
+ if ((chr & 0xc0) == 0x80)
+ {
+ tucs |= (lfnchar)(chr & ~0xc0);
+ ret = OK;
+ }
+ }
+ else if ((chr & 0xf0) == 0xe0)
+ {
+ tucs = ((lfnchar)(chr & ~0xf0)) << 12;
+ chr = *((*str)++);
+ if ((chr & 0xc0) == 0x80)
+ {
+ tucs |= (lfnchar)(chr & ~0xc0) << 6;
+ chr = *((*str)++);
+ if ((chr & 0xc0) == 0x80)
+ {
+ tucs |= (lfnchar)(chr & ~0xc0);
+ ret = OK;
+ }
+ }
+ }
+
+ if (ret == OK)
+ {
+ *ucs = tucs;
+ }
+
+ return ret;
+}
+#endif
+
+/****************************************************************************
+ * Name: fat_utf8toucs
+ *
+ * Description:
+ * Convert the next character from UCS2 to UTF8, reverse.
+ *
+ ****************************************************************************/
+#if defined(CONFIG_FAT_LFN) && defined(CONFIG_FAT_LFN_UTF8)
+static int fat_ucstoutf8(FAR uint8_t *dest, uint8_t offset, lfnchar ucs)
+{
+ if (ucs < 128 && offset >= 1)
+ {
+ dest[--offset] = (uint8_t)(ucs & 0xff);
+ }
+ else if (ucs < 2048 && offset >= 2)
+ {
+ dest[--offset] = (uint8_t)((ucs >> 0) & ~0xc0) | 0x80;
+ dest[--offset] = (uint8_t)((ucs >> 6) & ~0xe0) | 0xc0;
+ }
+ else if (offset >= 3)
+ {
+ dest[--offset] = (uint8_t)((ucs >> 0) & ~0xc0) | 0x80;
+ dest[--offset] = (uint8_t)((ucs >> 6) & ~0xc0) | 0x80;
+ dest[--offset] = (uint8_t)((ucs >> 12) & ~0xf0) | 0xe0;
+ }
+
+ return offset;
+}
+#endif
+
+/****************************************************************************
* Name: fat_lfnchecksum
*
* Description:
@@ -478,16 +570,23 @@ static inline int fat_parselfname(FAR const char **path,
FAR char *terminator)
{
FAR const char *node = *path;
- uint8_t ch;
+ lfnchar ch;
int ndx = 0;
/* Loop until the name is successfully parsed or an error occurs */
for (; ; )
{
- /* Get the next byte from the path */
+ /* Get the next character from the path */
+# ifdef CONFIG_FAT_LFN_UTF8
+ if (fat_utf8toucs(&node, &ch) != OK)
+ {
+ goto errout;
+ }
+# else
ch = *node++;
+# endif
/* Check if this the last byte in this node of the name */
@@ -499,14 +598,19 @@ static inline int fat_parselfname(FAR const char **path,
/* Return the remaining sub-string and the terminating character. */
- *terminator = ch;
+ *terminator = (char)ch;
*path = node;
return OK;
}
/* Accept only the printable character set (including space) */
+# ifdef CONFIG_FAT_LFN_UTF8
+ /* We assume all ucs2 characters printable REVISIT? */
+ else if (ch < ' ')
+# else
else if (!isprint(ch))
+# endif
{
goto errout;
}
@@ -564,19 +668,25 @@ errout:
#ifdef CONFIG_FAT_LFN
static inline int fat_createalias(FAR struct fat_dirinfo_s *dirinfo)
{
- uint8_t ch; /* Current character being processed */
- char *ext; /* Pointer to the extension substring */
- char *src; /* Pointer to the long file name source */
- int len; /* Total length of the long file name */
- int namechars; /* Number of characters available in long name */
- int extchars; /* Number of characters available in long name extension */
- int endndx; /* Maximum index into the short name array */
- int ndx; /* Index to store next character */
+ uint8_t ch; /* Current character being processed */
+ lfnchar *ext; /* Pointer to the extension substring */
+ lfnchar *src; /* Pointer to the long file name source */
+ int len; /* Total length of the long file name */
+ int namechars; /* Number of characters available in long name */
+ int extchars; /* Number of characters available in long name extension */
+ int endndx; /* Maximum index into the short name array */
+ int ndx; /* Index to store next character */
/* First, let's decide what is name and what is extension */
- len = strlen((FAR char *)dirinfo->fd_lfname);
- ext = strrchr((FAR char *)dirinfo->fd_lfname, '.');
+ for (len = 0, ext = NULL; dirinfo->fd_lfname[len] != '\0'; len++)
+ {
+ if (dirinfo->fd_lfname[len] == '.')
+ {
+ ext = &dirinfo->fd_lfname[len];
+ }
+ }
+
if (ext)
{
ptrdiff_t tmp;
@@ -585,7 +695,7 @@ static inline int fat_createalias(FAR struct fat_dirinfo_s *dirinfo)
* beginning of the string is then the name length.
*/
- tmp = ext - (FAR char *)dirinfo->fd_lfname;
+ tmp = ext - (FAR lfnchar *)dirinfo->fd_lfname;
namechars = tmp;
/* And the rest, excluding the '.' is the extension. */
@@ -637,7 +747,7 @@ static inline int fat_createalias(FAR struct fat_dirinfo_s *dirinfo)
}
else
{
- src = (FAR char *)dirinfo->fd_lfname;
+ src = (FAR lfnchar *)dirinfo->fd_lfname;
}
/* Then copy the name and extension, handling upper case conversions and
@@ -653,7 +763,25 @@ static inline int fat_createalias(FAR struct fat_dirinfo_s *dirinfo)
* encounter the end of null-terminated the long file name string.
*/
+# ifdef CONFIG_FAT_LFN_UTF8
+ /* Make sure ch is within printable characters */
+
+ if (*src > 0x7f)
+ {
+ ch = (uint8_t)(*src++ & 0x1f) + 'A';
+ if (ch >= '[')
+ {
+ ch -= ('[' - '0');
+ }
+ }
+ else
+ {
+ ch = *src++;
+ }
+# else
ch = *src++;
+# endif
+
if (ch == '\0')
{
/* This is the end of the source string. Do we need to add ~1. We
@@ -679,7 +807,7 @@ static inline int fat_createalias(FAR struct fat_dirinfo_s *dirinfo)
*/
if (ch == '+' || ch == ',' || ch == '.' || ch == ';' ||
- ch == '=' || ch == '[' || ch == ']' || ch == '|')
+ ch == '=' || ch == '[' || ch == ']' || ch == '|' || ch == ' ')
{
/* Use the underbar character instead */
@@ -834,7 +962,11 @@ static inline int fat_uniquealias(FAR struct fat_mountpt_s *fs,
* can not occur in positions 0 or 7:
*/
- for (tilde = 1; tilde < 7 && dirinfo->fd_name[tilde] != '~'; tilde++);
+ for (tilde = 1; tilde < 7 && dirinfo->fd_name[tilde] != '~'; tilde++)
+ {
+ /* Empty */
+ }
+
if (tilde >= 7)
{
return -EINVAL;
@@ -1090,11 +1222,11 @@ static int fat_findsfnentry(FAR struct fat_mountpt_s *fs,
****************************************************************************/
#ifdef CONFIG_FAT_LFN
-static bool fat_cmplfnchunk(FAR uint8_t *chunk, FAR const uint8_t *substr,
+static bool fat_cmplfnchunk(FAR uint8_t *chunk, FAR const lfnchar *substr,
int nchunk)
{
wchar_t wch;
- uint8_t ch;
+ lfnchar ch;
int i;
/* Check bytes 1-nchunk */
@@ -1120,7 +1252,11 @@ static bool fat_cmplfnchunk(FAR uint8_t *chunk, FAR const uint8_t *substr,
*/
wch = (wchar_t)fat_getuint16((FAR uint8_t *)chunk);
+# ifdef CONFIG_FAT_LFN_UTF8
+ if (wch != ch)
+# else
if ((wch & 0xff) != (wchar_t)ch)
+# endif
{
return false;
}
@@ -1155,7 +1291,7 @@ static bool fat_cmplfnchunk(FAR uint8_t *chunk, FAR const uint8_t *substr,
#ifdef CONFIG_FAT_LFN
static bool fat_cmplfname(FAR const uint8_t *direntry,
- FAR const uint8_t *substr)
+ FAR const lfnchar *substr)
{
FAR uint8_t *chunk;
int len;
@@ -1165,7 +1301,10 @@ static bool fat_cmplfname(FAR const uint8_t *direntry,
* terminator).
*/
- len = strlen((FAR char *)substr) + 1;
+ for (len = 1; substr[len - 1] != '\0'; len++)
+ {
+ /* Empty */
+ }
/* Check bytes 1-5 */
@@ -1221,7 +1360,11 @@ static inline int fat_findlfnentry(FAR struct fat_mountpt_s *fs,
* LDIR_MAXFNAME+1 we do not have to check the length of the string).
*/
- namelen = strlen((FAR char *)dirinfo->fd_lfname);
+ for (namelen = 0; dirinfo->fd_lfname[namelen] != '\0'; namelen++)
+ {
+ /* Empty */
+ }
+
DEBUGASSERT(namelen <= LDIR_MAXFNAME + 1);
/* How many LFN directory entries are we expecting? */
@@ -1522,7 +1665,11 @@ static inline int fat_allocatelfnentry(FAR struct fat_mountpt_s *fs,
* LDIR_MAXFNAME+1 we do not have to check the length of the string).
*/
- namelen = strlen((FAR char *)dirinfo->fd_lfname);
+ for (namelen = 0; dirinfo->fd_lfname[namelen] != '\0'; namelen++)
+ {
+ /* Empty */
+ }
+
DEBUGASSERT(namelen <= LDIR_MAXFNAME + 1);
/* How many LFN directory entries are we expecting? */
@@ -1768,7 +1915,7 @@ static inline int fat_getsfname(FAR uint8_t *direntry, FAR char *buffer,
****************************************************************************/
#ifdef CONFIG_FAT_LFN
-static void fat_getlfnchunk(FAR uint8_t *chunk, FAR uint8_t *dest,
+static void fat_getlfnchunk(FAR uint8_t *chunk, FAR lfnchar *dest,
int nchunk)
{
wchar_t wch;
@@ -1784,7 +1931,11 @@ static void fat_getlfnchunk(FAR uint8_t *chunk, FAR uint8_t *dest,
*/
wch = (wchar_t)fat_getuint16(chunk);
+# ifdef CONFIG_FAT_LFN_UTF8
+ *dest++ = wch;
+# else
*dest++ = (uint8_t)(wch & 0xff);
+# endif
chunk += sizeof(wchar_t);
}
}
@@ -1804,7 +1955,7 @@ static inline int fat_getlfname(FAR struct fat_mountpt_s *fs,
FAR struct fs_dirent_s *dir)
{
FAR uint8_t *direntry;
- uint8_t lfname[LDIR_MAXLFNCHARS];
+ lfnchar lfname[LDIR_MAXLFNCHARS];
uint16_t diroffset;
uint8_t seqno;
uint8_t rawseq;
@@ -1840,6 +1991,49 @@ static inline int fat_getlfname(FAR struct fat_mountpt_s *fs,
for (; ; )
{
+# ifdef CONFIG_FAT_LFN_UTF8
+ /* Get the string offset associated with the "last" entry. */
+
+ /* Extract and convert the unicode name */
+
+ fat_getlfnchunk(LDIR_PTRWCHAR1_5(direntry), lfname, 5);
+ fat_getlfnchunk(LDIR_PTRWCHAR6_11(direntry), &lfname[5], 6);
+ fat_getlfnchunk(LDIR_PTRWCHAR12_13(direntry), &lfname[11], 2);
+
+ /* Ignore trailing spaces on the "last" directory entry. The
+ * number of characters available is LDIR_MAXLFNCHARS or that
+ * minus the number of trailing spaces on the "last" directory
+ * entry.
+ */
+
+ nsrc = LDIR_MAXLFNCHARS;
+ if ((seqno & LDIR0_LAST) != 0)
+ {
+ /* Reduce the number of characters by the number of trailing
+ * spaces, init chars (0xffff) and '\0'.
+ */
+
+ for (; nsrc > 0 && (lfname[nsrc - 1] == ' ' ||
+ lfname[nsrc - 1] == '\0' ||
+ lfname[nsrc - 1] == 0xffff); nsrc--);
+
+ /* Add a null terminator to the destination string (the actual
+ * length of the destination buffer is NAME_MAX+1, so the NUL
+ * terminator will fit).
+ */
+
+ dir->fd_dir.d_name[NAME_MAX] = '\0';
+ offset = NAME_MAX;
+ }
+
+ /* Then transfer the characters */
+
+ for (i = nsrc - 1; i >= 0; i--)
+ {
+ offset = fat_ucstoutf8((FAR uint8_t *)dir->fd_dir.d_name,
+ offset, lfname[i]);
+ }
+# else
/* Get the string offset associated with the "last" entry. */
offset = (rawseq - 1) * LDIR_MAXLFNCHARS;
@@ -1864,10 +2058,12 @@ static inline int fat_getlfname(FAR struct fat_mountpt_s *fs,
if ((seqno & LDIR0_LAST) != 0)
{
/* Reduce the number of characters by the number of trailing
- * spaces.
+ * spaces, init chars (0xff) and '\0'.
*/
- for (; nsrc > 0 && lfname[nsrc - 1] == ' '; nsrc--);
+ for (; nsrc > 0 && (lfname[nsrc - 1] == ' ' ||
+ lfname[nsrc - 1] == '\0' ||
+ lfname[nsrc - 1] == 0xff); nsrc--);
/* Further reduce the length so that it fits in the destination
* buffer.
@@ -1893,6 +2089,7 @@ static inline int fat_getlfname(FAR struct fat_mountpt_s *fs,
dir->fd_dir.d_name[offset + i] = lfname[i];
}
}
+#endif
/* Read next directory entry */
@@ -1919,6 +2116,16 @@ static inline int fat_getlfname(FAR struct fat_mountpt_s *fs,
seqno = --rawseq;
if (seqno < 1)
{
+# ifdef CONFIG_FAT_LFN_UTF8
+ /* We must left align the d_name after utf8 processing */
+
+ if (offset > 0)
+ {
+ memmove(dir->fd_dir.d_name, &dir->fd_dir.d_name[offset],
+ (NAME_MAX + 1) - offset);
+ }
+# endif
+
/* We just completed processing the "first" long file name entry
* and we just read the short file name entry. Verify that the
* checksum of the short file name matches the checksum that we
@@ -2014,7 +2221,7 @@ static void fat_initlfname(FAR uint8_t *chunk, int nchunk)
****************************************************************************/
#ifdef CONFIG_FAT_LFN
-static void fat_putlfnchunk(FAR uint8_t *chunk, FAR const uint8_t *src,
+static void fat_putlfnchunk(FAR uint8_t *chunk, FAR const lfnchar *src,
int nchunk)
{
uint16_t wch;
@@ -2071,7 +2278,11 @@ static int fat_putlfname(FAR struct fat_mountpt_s *fs,
* LDIR_MAXLFNCHARS (13).
*/
- namelen = strlen((FAR char *)dirinfo->fd_lfname);
+ for (namelen = 0; dirinfo->fd_lfname[namelen] != '\0'; namelen++)
+ {
+ /* Empty */
+ }
+
DEBUGASSERT(namelen <= LDIR_MAXFNAME + 1);
/* How many LFN directory entries do we need to write? */