^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) * unicode.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * PURPOSE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Routines for converting between UTF-8 and OSTA Compressed Unicode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * Also handles filename mangling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * DESCRIPTION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * OSTA Compressed Unicode is explained in the OSTA UDF specification.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * http://www.osta.org/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * UTF-8 is explained in the IETF RFC XXXX.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * ftp://ftp.internic.net/rfc/rfcxxxx.txt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * COPYRIGHT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * This file is distributed under the terms of the GNU General Public
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * License (GPL). Copies of the GPL can be obtained from:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) * ftp://prep.ai.mit.edu/pub/gnu/GPL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) * Each contributing author retains all rights to their own work.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include "udfdecl.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include <linux/string.h> /* for memset */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include <linux/nls.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include <linux/crc-itu-t.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #include "udf_sb.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #define PLANE_SIZE 0x10000
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) #define UNICODE_MAX 0x10ffff
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) #define SURROGATE_MASK 0xfffff800
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) #define SURROGATE_PAIR 0x0000d800
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) #define SURROGATE_LOW 0x00000400
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) #define SURROGATE_CHAR_BITS 10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) #define SURROGATE_CHAR_MASK ((1 << SURROGATE_CHAR_BITS) - 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) #define ILLEGAL_CHAR_MARK '_'
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) #define EXT_MARK '.'
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) #define CRC_MARK '#'
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) #define EXT_SIZE 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) /* Number of chars we need to store generated CRC to make filename unique */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) #define CRC_LEN 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) static unicode_t get_utf16_char(const uint8_t *str_i, int str_i_max_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) int str_i_idx, int u_ch, unicode_t *ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) unicode_t c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) int start_idx = str_i_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) /* Expand OSTA compressed Unicode to Unicode */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) c = str_i[str_i_idx++];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) if (u_ch > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) c = (c << 8) | str_i[str_i_idx++];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) if ((c & SURROGATE_MASK) == SURROGATE_PAIR) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) unicode_t next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) /* Trailing surrogate char */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) if (str_i_idx >= str_i_max_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) c = UNICODE_MAX + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) /* Low surrogate must follow the high one... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) if (c & SURROGATE_LOW) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) c = UNICODE_MAX + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) WARN_ON_ONCE(u_ch != 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) next = str_i[str_i_idx++] << 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) next |= str_i[str_i_idx++];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) if ((next & SURROGATE_MASK) != SURROGATE_PAIR ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) !(next & SURROGATE_LOW)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) c = UNICODE_MAX + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) c = PLANE_SIZE +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) ((c & SURROGATE_CHAR_MASK) << SURROGATE_CHAR_BITS) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) (next & SURROGATE_CHAR_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) *ret = c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) return str_i_idx - start_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) static int udf_name_conv_char(uint8_t *str_o, int str_o_max_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) int *str_o_idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) const uint8_t *str_i, int str_i_max_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) int *str_i_idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) int u_ch, int *needsCRC,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) int (*conv_f)(wchar_t, unsigned char *, int),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) int translate)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) unicode_t c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) int illChar = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) int len, gotch = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) while (!gotch && *str_i_idx < str_i_max_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) if (*str_o_idx >= str_o_max_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) *needsCRC = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) return gotch;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) len = get_utf16_char(str_i, str_i_max_len, *str_i_idx, u_ch,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) &c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) /* These chars cannot be converted. Replace them. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) if (c == 0 || c > UNICODE_MAX || (conv_f && c > MAX_WCHAR_T) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) (translate && c == '/')) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) illChar = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) if (!translate)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) gotch = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) } else if (illChar)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) gotch = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) *str_i_idx += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) if (illChar) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) *needsCRC = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) c = ILLEGAL_CHAR_MARK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) gotch = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) if (gotch) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) if (conv_f) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) len = conv_f(c, &str_o[*str_o_idx],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) str_o_max_len - *str_o_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) len = utf32_to_utf8(c, &str_o[*str_o_idx],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) str_o_max_len - *str_o_idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) if (len < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) len = -ENAMETOOLONG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) /* Valid character? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) if (len >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) *str_o_idx += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) else if (len == -ENAMETOOLONG) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) *needsCRC = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) gotch = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) str_o[(*str_o_idx)++] = ILLEGAL_CHAR_MARK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) *needsCRC = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) return gotch;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) static int udf_name_from_CS0(struct super_block *sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) uint8_t *str_o, int str_max_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) const uint8_t *ocu, int ocu_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) int translate)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) uint32_t c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) uint8_t cmp_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) int idx, len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) int u_ch;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) int needsCRC = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) int ext_i_len, ext_max_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) int str_o_len = 0; /* Length of resulting output */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) int ext_o_len = 0; /* Extension output length */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) int ext_crc_len = 0; /* Extension output length if used with CRC */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) int i_ext = -1; /* Extension position in input buffer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) int o_crc = 0; /* Rightmost possible output pos for CRC+ext */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) unsigned short valueCRC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) uint8_t ext[EXT_SIZE * NLS_MAX_CHARSET_SIZE + 1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) uint8_t crc[CRC_LEN];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) int (*conv_f)(wchar_t, unsigned char *, int);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) if (str_max_len <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) if (ocu_len == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) memset(str_o, 0, str_max_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) if (UDF_SB(sb)->s_nls_map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) conv_f = UDF_SB(sb)->s_nls_map->uni2char;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) conv_f = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) cmp_id = ocu[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) if (cmp_id != 8 && cmp_id != 16) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) memset(str_o, 0, str_max_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) pr_err("unknown compression code (%u)\n", cmp_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) u_ch = cmp_id >> 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) ocu++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) ocu_len--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) if (ocu_len % u_ch) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) pr_err("incorrect filename length (%d)\n", ocu_len + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) if (translate) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) /* Look for extension */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) for (idx = ocu_len - u_ch, ext_i_len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) (idx >= 0) && (ext_i_len < EXT_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) idx -= u_ch, ext_i_len++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) c = ocu[idx];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) if (u_ch > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) c = (c << 8) | ocu[idx + 1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) if (c == EXT_MARK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) if (ext_i_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) i_ext = idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) if (i_ext >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) /* Convert extension */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) ext_max_len = min_t(int, sizeof(ext), str_max_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) ext[ext_o_len++] = EXT_MARK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) idx = i_ext + u_ch;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) while (udf_name_conv_char(ext, ext_max_len, &ext_o_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) ocu, ocu_len, &idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) u_ch, &needsCRC,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) conv_f, translate)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) if ((ext_o_len + CRC_LEN) < str_max_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) ext_crc_len = ext_o_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) idx = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) if (translate && (idx == i_ext)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) if (str_o_len > (str_max_len - ext_o_len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) needsCRC = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) if (!udf_name_conv_char(str_o, str_max_len, &str_o_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) ocu, ocu_len, &idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) u_ch, &needsCRC, conv_f, translate))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) if (translate &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) (str_o_len <= (str_max_len - ext_o_len - CRC_LEN)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) o_crc = str_o_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) if (translate) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) if (str_o_len <= 2 && str_o[0] == '.' &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) (str_o_len == 1 || str_o[1] == '.'))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) needsCRC = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) if (needsCRC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) str_o_len = o_crc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) valueCRC = crc_itu_t(0, ocu, ocu_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) crc[0] = CRC_MARK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) crc[1] = hex_asc_upper_hi(valueCRC >> 8);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) crc[2] = hex_asc_upper_lo(valueCRC >> 8);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) crc[3] = hex_asc_upper_hi(valueCRC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) crc[4] = hex_asc_upper_lo(valueCRC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) len = min_t(int, CRC_LEN, str_max_len - str_o_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) memcpy(&str_o[str_o_len], crc, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) str_o_len += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) ext_o_len = ext_crc_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) if (ext_o_len > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) memcpy(&str_o[str_o_len], ext, ext_o_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) str_o_len += ext_o_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) return str_o_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) static int udf_name_to_CS0(struct super_block *sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) uint8_t *ocu, int ocu_max_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) const uint8_t *str_i, int str_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) int i, len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) unsigned int max_val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) int u_len, u_ch;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) unicode_t uni_char;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) int (*conv_f)(const unsigned char *, int, wchar_t *);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) if (ocu_max_len <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) if (UDF_SB(sb)->s_nls_map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) conv_f = UDF_SB(sb)->s_nls_map->char2uni;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) conv_f = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) memset(ocu, 0, ocu_max_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) ocu[0] = 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) max_val = 0xff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) u_ch = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) try_again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) u_len = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) for (i = 0; i < str_len; i += len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) /* Name didn't fit? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) if (u_len + u_ch > ocu_max_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) if (conv_f) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) wchar_t wchar;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) len = conv_f(&str_i[i], str_len - i, &wchar);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) if (len > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) uni_char = wchar;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) len = utf8_to_utf32(&str_i[i], str_len - i,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) &uni_char);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) /* Invalid character, deal with it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) if (len <= 0 || uni_char > UNICODE_MAX) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) len = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) uni_char = '?';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) if (uni_char > max_val) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) unicode_t c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) if (max_val == 0xff) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) max_val = 0xffff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) ocu[0] = 0x10;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) u_ch = 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) goto try_again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) * Use UTF-16 encoding for chars outside we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) * cannot encode directly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) if (u_len + 2 * u_ch > ocu_max_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) uni_char -= PLANE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) c = SURROGATE_PAIR |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) ((uni_char >> SURROGATE_CHAR_BITS) &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) SURROGATE_CHAR_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) ocu[u_len++] = (uint8_t)(c >> 8);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) ocu[u_len++] = (uint8_t)(c & 0xff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) uni_char = SURROGATE_PAIR | SURROGATE_LOW |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) (uni_char & SURROGATE_CHAR_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) if (max_val == 0xffff)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) ocu[u_len++] = (uint8_t)(uni_char >> 8);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) ocu[u_len++] = (uint8_t)(uni_char & 0xff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) return u_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) * Convert CS0 dstring to output charset. Warning: This function may truncate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) * input string if it is too long as it is used for informational strings only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) * and it is better to truncate the string than to refuse mounting a media.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) int udf_dstrCS0toChar(struct super_block *sb, uint8_t *utf_o, int o_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) const uint8_t *ocu_i, int i_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) int s_len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) if (i_len > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) s_len = ocu_i[i_len - 1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) if (s_len >= i_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) pr_warn("incorrect dstring lengths (%d/%d),"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) " truncating\n", s_len, i_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) s_len = i_len - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) /* 2-byte encoding? Need to round properly... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) if (ocu_i[0] == 16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) s_len -= (s_len - 1) & 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) return udf_name_from_CS0(sb, utf_o, o_len, ocu_i, s_len, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) int udf_get_filename(struct super_block *sb, const uint8_t *sname, int slen,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) uint8_t *dname, int dlen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) if (!slen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) if (dlen <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) ret = udf_name_from_CS0(sb, dname, dlen, sname, slen, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) /* Zero length filename isn't valid... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) if (ret == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) int udf_put_filename(struct super_block *sb, const uint8_t *sname, int slen,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) uint8_t *dname, int dlen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) return udf_name_to_CS0(sb, dname, dlen, sname, slen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402)