^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-or-later
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * fs/cifs/cifs_unicode.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Copyright (c) International Business Machines Corp., 2000,2009
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * Modified by Steve French (sfrench@us.ibm.com)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <linux/fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include "cifs_fs_sb.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include "cifs_unicode.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include "cifs_uniupr.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include "cifspdu.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include "cifsglob.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include "cifs_debug.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) int cifs_remap(struct cifs_sb_info *cifs_sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) int map_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SFM_CHR)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) map_type = SFM_MAP_UNI_RSVD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) map_type = SFU_MAP_UNI_RSVD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) map_type = NO_MAP_UNI_RSVD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) return map_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) /* Convert character using the SFU - "Services for Unix" remapping range */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) static bool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) convert_sfu_char(const __u16 src_char, char *target)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * BB: Cannot handle remapping UNI_SLASH until all the calls to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * build_path_from_dentry are modified, as they use slash as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) * separator.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) switch (src_char) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) case UNI_COLON:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) *target = ':';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) case UNI_ASTERISK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) *target = '*';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) case UNI_QUESTION:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) *target = '?';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) case UNI_PIPE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) *target = '|';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) case UNI_GRTRTHAN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) *target = '>';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) case UNI_LESSTHAN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) *target = '<';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) /* Convert character using the SFM - "Services for Mac" remapping range */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) static bool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) convert_sfm_char(const __u16 src_char, char *target)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) if (src_char >= 0xF001 && src_char <= 0xF01F) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) *target = src_char - 0xF000;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) switch (src_char) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) case SFM_COLON:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) *target = ':';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) case SFM_DOUBLEQUOTE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) *target = '"';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) case SFM_ASTERISK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) *target = '*';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) case SFM_QUESTION:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) *target = '?';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) case SFM_PIPE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) *target = '|';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) case SFM_GRTRTHAN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) *target = '>';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) case SFM_LESSTHAN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) *target = '<';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) case SFM_SPACE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) *target = ' ';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) case SFM_PERIOD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) *target = '.';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) * cifs_mapchar - convert a host-endian char to proper char in codepage
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) * @target - where converted character should be copied
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) * @src_char - 2 byte host-endian source character
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) * @cp - codepage to which character should be converted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) * @map_type - How should the 7 NTFS/SMB reserved characters be mapped to UCS2?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) * This function handles the conversion of a single character. It is the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) * responsibility of the caller to ensure that the target buffer is large
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) cifs_mapchar(char *target, const __u16 *from, const struct nls_table *cp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) int maptype)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) int len = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) __u16 src_char;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) src_char = *from;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) if ((maptype == SFM_MAP_UNI_RSVD) && convert_sfm_char(src_char, target))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) return len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) else if ((maptype == SFU_MAP_UNI_RSVD) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) convert_sfu_char(src_char, target))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) return len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) /* if character not one of seven in special remap set */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) if (len <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) goto surrogate_pair;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) return len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) surrogate_pair:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) /* convert SURROGATE_PAIR and IVS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) if (strcmp(cp->charset, "utf8"))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) goto unknown;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) len = utf16s_to_utf8s(from, 3, UTF16_LITTLE_ENDIAN, target, 6);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) if (len <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) goto unknown;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) return len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) unknown:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) *target = '?';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) len = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) return len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) * cifs_from_utf16 - convert utf16le string to local charset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) * @to - destination buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) * @from - source buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) * @tolen - destination buffer size (in bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) * @fromlen - source buffer size (in bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) * @codepage - codepage to which characters should be converted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) * @mapchar - should characters be remapped according to the mapchars option?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) * Convert a little-endian utf16le string (as sent by the server) to a string
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) * in the provided codepage. The tolen and fromlen parameters are to ensure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) * that the code doesn't walk off of the end of the buffer (which is always
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) * a danger if the alignment of the source buffer is off). The destination
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) * string is always properly null terminated and fits in the destination
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) * buffer. Returns the length of the destination string in bytes (including
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) * null terminator).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) * Note that some windows versions actually send multiword UTF-16 characters
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) * instead of straight UTF16-2. The linux nls routines however aren't able to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) * deal with those characters properly. In the event that we get some of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) * those characters, they won't be translated properly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) const struct nls_table *codepage, int map_type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) int i, charlen, safelen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) int outlen = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) int nullsize = nls_nullsize(codepage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) int fromwords = fromlen / 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) char tmp[NLS_MAX_CHARSET_SIZE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) __u16 ftmp[3]; /* ftmp[3] = 3array x 2bytes = 6bytes UTF-16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) * because the chars can be of varying widths, we need to take care
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) * not to overflow the destination buffer when we get close to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) * end of it. Until we get to this offset, we don't need to check
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) * for overflow however.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) for (i = 0; i < fromwords; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) ftmp[0] = get_unaligned_le16(&from[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) if (ftmp[0] == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) if (i + 1 < fromwords)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) ftmp[1] = get_unaligned_le16(&from[i + 1]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) ftmp[1] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) if (i + 2 < fromwords)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) ftmp[2] = get_unaligned_le16(&from[i + 2]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) ftmp[2] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) * check to see if converting this character might make the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) * conversion bleed into the null terminator
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) if (outlen >= safelen) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) charlen = cifs_mapchar(tmp, ftmp, codepage, map_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) if ((outlen + charlen) > (tolen - nullsize))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) /* put converted char into 'to' buffer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) charlen = cifs_mapchar(&to[outlen], ftmp, codepage, map_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) outlen += charlen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) /* charlen (=bytes of UTF-8 for 1 character)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) * 4bytes UTF-8(surrogate pair) is charlen=4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) * (4bytes UTF-16 code)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) * 7-8bytes UTF-8(IVS) is charlen=3+4 or 4+4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) * (2 UTF-8 pairs divided to 2 UTF-16 pairs) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) if (charlen == 4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) i++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) else if (charlen >= 5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) /* 5-6bytes UTF-8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) i += 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) /* properly null-terminate string */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) for (i = 0; i < nullsize; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) to[outlen++] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) return outlen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) * NAME: cifs_strtoUTF16()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) * FUNCTION: Convert character string to unicode string
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) cifs_strtoUTF16(__le16 *to, const char *from, int len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) const struct nls_table *codepage)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) int charlen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) wchar_t wchar_to; /* needed to quiet sparse */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) /* special case for utf8 to handle no plane0 chars */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) if (!strcmp(codepage->charset, "utf8")) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) * convert utf8 -> utf16, we assume we have enough space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) * as caller should have assumed conversion does not overflow
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) * in destination len is length in wchar_t units (16bits)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) i = utf8s_to_utf16s(from, len, UTF16_LITTLE_ENDIAN,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) (wchar_t *) to, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) /* if success terminate and exit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) if (i >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) goto success;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) * if fails fall back to UCS encoding as this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) * function should not return negative values
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) * currently can fail only if source contains
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) * invalid encoded characters
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) for (i = 0; len && *from; i++, from += charlen, len -= charlen) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) charlen = codepage->char2uni(from, len, &wchar_to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) if (charlen < 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) cifs_dbg(VFS, "strtoUTF16: char2uni of 0x%x returned %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) *from, charlen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) /* A question mark */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) wchar_to = 0x003f;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) charlen = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) put_unaligned_le16(wchar_to, &to[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) success:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) put_unaligned_le16(0, &to[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) return i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) * cifs_utf16_bytes - how long will a string be after conversion?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) * @utf16 - pointer to input string
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) * @maxbytes - don't go past this many bytes of input string
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) * @codepage - destination codepage
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) * Walk a utf16le string and return the number of bytes that the string will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) * be after being converted to the given charset, not including any null
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) * termination required. Don't walk past maxbytes in the source buffer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) cifs_utf16_bytes(const __le16 *from, int maxbytes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) const struct nls_table *codepage)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) int charlen, outlen = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) int maxwords = maxbytes / 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) char tmp[NLS_MAX_CHARSET_SIZE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) __u16 ftmp[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) for (i = 0; i < maxwords; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) ftmp[0] = get_unaligned_le16(&from[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) if (ftmp[0] == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) if (i + 1 < maxwords)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) ftmp[1] = get_unaligned_le16(&from[i + 1]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) ftmp[1] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) if (i + 2 < maxwords)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) ftmp[2] = get_unaligned_le16(&from[i + 2]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) ftmp[2] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) charlen = cifs_mapchar(tmp, ftmp, codepage, NO_MAP_UNI_RSVD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) outlen += charlen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) return outlen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) * cifs_strndup_from_utf16 - copy a string from wire format to the local
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) * codepage
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) * @src - source string
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) * @maxlen - don't walk past this many bytes in the source string
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) * @is_unicode - is this a unicode string?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) * @codepage - destination codepage
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) * Take a string given by the server, convert it to the local codepage and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) * put it in a new buffer. Returns a pointer to the new string or NULL on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) * error.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) char *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) cifs_strndup_from_utf16(const char *src, const int maxlen,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) const bool is_unicode, const struct nls_table *codepage)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) int len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) char *dst;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) if (is_unicode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) len = cifs_utf16_bytes((__le16 *) src, maxlen, codepage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) len += nls_nullsize(codepage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) dst = kmalloc(len, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) if (!dst)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) cifs_from_utf16(dst, (__le16 *) src, len, maxlen, codepage,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) NO_MAP_UNI_RSVD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) dst = kstrndup(src, maxlen, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) return dst;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) static __le16 convert_to_sfu_char(char src_char)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) __le16 dest_char;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) switch (src_char) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) case ':':
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) dest_char = cpu_to_le16(UNI_COLON);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) case '*':
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) dest_char = cpu_to_le16(UNI_ASTERISK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) case '?':
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) dest_char = cpu_to_le16(UNI_QUESTION);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) case '<':
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) dest_char = cpu_to_le16(UNI_LESSTHAN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) case '>':
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) dest_char = cpu_to_le16(UNI_GRTRTHAN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) case '|':
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) dest_char = cpu_to_le16(UNI_PIPE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) dest_char = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) return dest_char;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) static __le16 convert_to_sfm_char(char src_char, bool end_of_string)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) __le16 dest_char;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) if (src_char >= 0x01 && src_char <= 0x1F) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) dest_char = cpu_to_le16(src_char + 0xF000);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) return dest_char;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) switch (src_char) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) case ':':
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) dest_char = cpu_to_le16(SFM_COLON);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) case '"':
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) dest_char = cpu_to_le16(SFM_DOUBLEQUOTE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) case '*':
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) dest_char = cpu_to_le16(SFM_ASTERISK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) case '?':
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) dest_char = cpu_to_le16(SFM_QUESTION);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) case '<':
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) dest_char = cpu_to_le16(SFM_LESSTHAN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) case '>':
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) dest_char = cpu_to_le16(SFM_GRTRTHAN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) case '|':
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) dest_char = cpu_to_le16(SFM_PIPE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) case '.':
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) if (end_of_string)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) dest_char = cpu_to_le16(SFM_PERIOD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) dest_char = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) case ' ':
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) if (end_of_string)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) dest_char = cpu_to_le16(SFM_SPACE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) dest_char = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) dest_char = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) return dest_char;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) * Convert 16 bit Unicode pathname to wire format from string in current code
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) * page. Conversion may involve remapping up the six characters that are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) * only legal in POSIX-like OS (if they are present in the string). Path
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) * names are little endian 16 bit Unicode on the wire
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) const struct nls_table *cp, int map_chars)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) int i, charlen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) int j = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) char src_char;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) __le16 dst_char;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) wchar_t tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) wchar_t *wchar_to; /* UTF-16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) unicode_t u;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) if (map_chars == NO_MAP_UNI_RSVD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) return cifs_strtoUTF16(target, source, PATH_MAX, cp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) wchar_to = kzalloc(6, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) for (i = 0; i < srclen; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) src_char = source[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) charlen = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) /* check if end of string */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) if (src_char == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) goto ctoUTF16_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) /* see if we must remap this char */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) if (map_chars == SFU_MAP_UNI_RSVD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) dst_char = convert_to_sfu_char(src_char);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) else if (map_chars == SFM_MAP_UNI_RSVD) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) bool end_of_string;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) * Remap spaces and periods found at the end of every
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) * component of the path. The special cases of '.' and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) * '..' do not need to be dealt with explicitly because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) * they are addressed in namei.c:link_path_walk().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) **/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) if ((i == srclen - 1) || (source[i+1] == '\\'))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) end_of_string = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) end_of_string = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) dst_char = convert_to_sfm_char(src_char, end_of_string);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) dst_char = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) * FIXME: We can not handle remapping backslash (UNI_SLASH)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) * until all the calls to build_path_from_dentry are modified,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) * as they use backslash as separator.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) if (dst_char == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) charlen = cp->char2uni(source + i, srclen - i, &tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) dst_char = cpu_to_le16(tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) * if no match, use question mark, which at least in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) * some cases serves as wild card
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) if (charlen > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) goto ctoUTF16;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) /* convert SURROGATE_PAIR */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) if (strcmp(cp->charset, "utf8") || !wchar_to)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) goto unknown;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) if (*(source + i) & 0x80) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) charlen = utf8_to_utf32(source + i, 6, &u);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) if (charlen < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) goto unknown;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) goto unknown;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) ret = utf8s_to_utf16s(source + i, charlen,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) UTF16_LITTLE_ENDIAN,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) wchar_to, 6);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) goto unknown;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) i += charlen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) dst_char = cpu_to_le16(*wchar_to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) if (charlen <= 3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) /* 1-3bytes UTF-8 to 2bytes UTF-16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) put_unaligned(dst_char, &target[j]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) else if (charlen == 4) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) /* 4bytes UTF-8(surrogate pair) to 4bytes UTF-16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) * 7-8bytes UTF-8(IVS) divided to 2 UTF-16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) * (charlen=3+4 or 4+4) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) put_unaligned(dst_char, &target[j]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) dst_char = cpu_to_le16(*(wchar_to + 1));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) j++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) put_unaligned(dst_char, &target[j]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) } else if (charlen >= 5) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) /* 5-6bytes UTF-8 to 6bytes UTF-16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) put_unaligned(dst_char, &target[j]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) dst_char = cpu_to_le16(*(wchar_to + 1));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) j++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) put_unaligned(dst_char, &target[j]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) dst_char = cpu_to_le16(*(wchar_to + 2));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) j++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) put_unaligned(dst_char, &target[j]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) unknown:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) dst_char = cpu_to_le16(0x003f);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) charlen = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) ctoUTF16:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) * character may take more than one byte in the source string,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) * but will take exactly two bytes in the target string
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) i += charlen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) put_unaligned(dst_char, &target[j]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) ctoUTF16_out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) put_unaligned(0, &target[j]); /* Null terminate target unicode string */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) kfree(wchar_to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) return j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) * cifs_local_to_utf16_bytes - how long will a string be after conversion?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) * @from - pointer to input string
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) * @maxbytes - don't go past this many bytes of input string
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) * @codepage - source codepage
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) * Walk a string and return the number of bytes that the string will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) * be after being converted to the given charset, not including any null
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) * termination required. Don't walk past maxbytes in the source buffer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) cifs_local_to_utf16_bytes(const char *from, int len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) const struct nls_table *codepage)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) int charlen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) wchar_t wchar_to;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) for (i = 0; len && *from; i++, from += charlen, len -= charlen) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) charlen = codepage->char2uni(from, len, &wchar_to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) /* Failed conversion defaults to a question mark */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) if (charlen < 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) charlen = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) return 2 * i; /* UTF16 characters are two bytes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) * cifs_strndup_to_utf16 - copy a string to wire format from the local codepage
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) * @src - source string
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) * @maxlen - don't walk past this many bytes in the source string
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) * @utf16_len - the length of the allocated string in bytes (including null)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) * @cp - source codepage
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) * @remap - map special chars
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) * Take a string convert it from the local codepage to UTF16 and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) * put it in a new buffer. Returns a pointer to the new string or NULL on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) * error.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) __le16 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) cifs_strndup_to_utf16(const char *src, const int maxlen, int *utf16_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) const struct nls_table *cp, int remap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) int len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) __le16 *dst;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) len = cifs_local_to_utf16_bytes(src, maxlen, cp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) len += 2; /* NULL */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) dst = kmalloc(len, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) if (!dst) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) *utf16_len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) cifsConvertToUTF16(dst, src, strlen(src), cp, remap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) *utf16_len = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) return dst;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) }