^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) * LZMA2 decoder
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Authors: Lasse Collin <lasse.collin@tukaani.org>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Igor Pavlov <https://7-zip.org/>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * This file has been put into the public domain.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * You can do whatever you want with this file.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include "xz_private.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include "xz_lzma2.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * Range decoder initialization eats the first five bytes of each LZMA chunk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #define RC_INIT_BYTES 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) * Minimum number of usable input buffer to safely decode one LZMA symbol.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) * The worst case is that we decode 22 bits using probabilities and 26
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) * direct bits. This may decode at maximum of 20 bytes of input. However,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) * lzma_main() does an extra normalization before returning, thus we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) * need to put 21 here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #define LZMA_IN_REQUIRED 21
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * Dictionary (history buffer)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) * These are always true:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * start <= pos <= full <= end
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) * pos <= limit <= end
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) * In multi-call mode, also these are true:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * end == size
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * size <= size_max
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) * allocated <= size
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) * Most of these variables are size_t to support single-call mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) * in which the dictionary variables address the actual output
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) * buffer directly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) struct dictionary {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) /* Beginning of the history buffer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) uint8_t *buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) /* Old position in buf (before decoding more data) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) size_t start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) /* Position in buf */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) size_t pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) * How full dictionary is. This is used to detect corrupt input that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) * would read beyond the beginning of the uncompressed stream.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) size_t full;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) /* Write limit; we don't write to buf[limit] or later bytes. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) size_t limit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) * End of the dictionary buffer. In multi-call mode, this is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) * the same as the dictionary size. In single-call mode, this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) * indicates the size of the output buffer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) size_t end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) * Size of the dictionary as specified in Block Header. This is used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) * together with "full" to detect corrupt input that would make us
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) * read beyond the beginning of the uncompressed stream.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) uint32_t size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) * Maximum allowed dictionary size in multi-call mode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) * This is ignored in single-call mode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) uint32_t size_max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) * Amount of memory currently allocated for the dictionary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) * This is used only with XZ_DYNALLOC. (With XZ_PREALLOC,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) * size_max is always the same as the allocated size.)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) uint32_t allocated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) /* Operation mode */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) enum xz_mode mode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) /* Range decoder */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) struct rc_dec {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) uint32_t range;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) uint32_t code;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) * Number of initializing bytes remaining to be read
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) * by rc_read_init().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) uint32_t init_bytes_left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) * Buffer from which we read our input. It can be either
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) * temp.buf or the caller-provided input buffer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) const uint8_t *in;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) size_t in_pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) size_t in_limit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) /* Probabilities for a length decoder. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) struct lzma_len_dec {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) /* Probability of match length being at least 10 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) uint16_t choice;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) /* Probability of match length being at least 18 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) uint16_t choice2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) /* Probabilities for match lengths 2-9 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) uint16_t low[POS_STATES_MAX][LEN_LOW_SYMBOLS];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) /* Probabilities for match lengths 10-17 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) uint16_t mid[POS_STATES_MAX][LEN_MID_SYMBOLS];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) /* Probabilities for match lengths 18-273 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) uint16_t high[LEN_HIGH_SYMBOLS];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) struct lzma_dec {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) /* Distances of latest four matches */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) uint32_t rep0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) uint32_t rep1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) uint32_t rep2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) uint32_t rep3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) /* Types of the most recently seen LZMA symbols */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) enum lzma_state state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) * Length of a match. This is updated so that dict_repeat can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) * be called again to finish repeating the whole match.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) uint32_t len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) * LZMA properties or related bit masks (number of literal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) * context bits, a mask dervied from the number of literal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) * position bits, and a mask dervied from the number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) * position bits)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) uint32_t lc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) uint32_t literal_pos_mask; /* (1 << lp) - 1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) uint32_t pos_mask; /* (1 << pb) - 1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) /* If 1, it's a match. Otherwise it's a single 8-bit literal. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) uint16_t is_match[STATES][POS_STATES_MAX];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) /* If 1, it's a repeated match. The distance is one of rep0 .. rep3. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) uint16_t is_rep[STATES];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) * If 0, distance of a repeated match is rep0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) * Otherwise check is_rep1.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) uint16_t is_rep0[STATES];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) * If 0, distance of a repeated match is rep1.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) * Otherwise check is_rep2.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) uint16_t is_rep1[STATES];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) /* If 0, distance of a repeated match is rep2. Otherwise it is rep3. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) uint16_t is_rep2[STATES];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) * If 1, the repeated match has length of one byte. Otherwise
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) * the length is decoded from rep_len_decoder.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) uint16_t is_rep0_long[STATES][POS_STATES_MAX];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) * Probability tree for the highest two bits of the match
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) * distance. There is a separate probability tree for match
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) * lengths of 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273].
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) uint16_t dist_slot[DIST_STATES][DIST_SLOTS];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) * Probility trees for additional bits for match distance
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) * when the distance is in the range [4, 127].
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) uint16_t dist_special[FULL_DISTANCES - DIST_MODEL_END];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) * Probability tree for the lowest four bits of a match
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) * distance that is equal to or greater than 128.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) uint16_t dist_align[ALIGN_SIZE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) /* Length of a normal match */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) struct lzma_len_dec match_len_dec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) /* Length of a repeated match */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) struct lzma_len_dec rep_len_dec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) /* Probabilities of literals */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) uint16_t literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) struct lzma2_dec {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) /* Position in xz_dec_lzma2_run(). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) enum lzma2_seq {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) SEQ_CONTROL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) SEQ_UNCOMPRESSED_1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) SEQ_UNCOMPRESSED_2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) SEQ_COMPRESSED_0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) SEQ_COMPRESSED_1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) SEQ_PROPERTIES,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) SEQ_LZMA_PREPARE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) SEQ_LZMA_RUN,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) SEQ_COPY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) } sequence;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) /* Next position after decoding the compressed size of the chunk. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) enum lzma2_seq next_sequence;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) /* Uncompressed size of LZMA chunk (2 MiB at maximum) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) uint32_t uncompressed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) * Compressed size of LZMA chunk or compressed/uncompressed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) * size of uncompressed chunk (64 KiB at maximum)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) uint32_t compressed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) * True if dictionary reset is needed. This is false before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) * the first chunk (LZMA or uncompressed).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) bool need_dict_reset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) * True if new LZMA properties are needed. This is false
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) * before the first LZMA chunk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) bool need_props;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) struct xz_dec_lzma2 {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) * The order below is important on x86 to reduce code size and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) * it shouldn't hurt on other platforms. Everything up to and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) * including lzma.pos_mask are in the first 128 bytes on x86-32,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) * which allows using smaller instructions to access those
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) * variables. On x86-64, fewer variables fit into the first 128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) * bytes, but this is still the best order without sacrificing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) * the readability by splitting the structures.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) struct rc_dec rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) struct dictionary dict;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) struct lzma2_dec lzma2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) struct lzma_dec lzma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) * Temporary buffer which holds small number of input bytes between
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) * decoder calls. See lzma2_lzma() for details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) uint32_t size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) uint8_t buf[3 * LZMA_IN_REQUIRED];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) } temp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) /**************
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) * Dictionary *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) **************/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) * Reset the dictionary state. When in single-call mode, set up the beginning
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) * of the dictionary to point to the actual output buffer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) static void dict_reset(struct dictionary *dict, struct xz_buf *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) if (DEC_IS_SINGLE(dict->mode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) dict->buf = b->out + b->out_pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) dict->end = b->out_size - b->out_pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) dict->start = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) dict->pos = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) dict->limit = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) dict->full = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) /* Set dictionary write limit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) static void dict_limit(struct dictionary *dict, size_t out_max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) if (dict->end - dict->pos <= out_max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) dict->limit = dict->end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) dict->limit = dict->pos + out_max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) /* Return true if at least one byte can be written into the dictionary. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) static inline bool dict_has_space(const struct dictionary *dict)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) return dict->pos < dict->limit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) * Get a byte from the dictionary at the given distance. The distance is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) * assumed to valid, or as a special case, zero when the dictionary is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) * still empty. This special case is needed for single-call decoding to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) * avoid writing a '\0' to the end of the destination buffer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) static inline uint32_t dict_get(const struct dictionary *dict, uint32_t dist)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) size_t offset = dict->pos - dist - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) if (dist >= dict->pos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) offset += dict->end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) return dict->full > 0 ? dict->buf[offset] : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) * Put one byte into the dictionary. It is assumed that there is space for it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) static inline void dict_put(struct dictionary *dict, uint8_t byte)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) dict->buf[dict->pos++] = byte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) if (dict->full < dict->pos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) dict->full = dict->pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) * Repeat given number of bytes from the given distance. If the distance is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) * invalid, false is returned. On success, true is returned and *len is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) * updated to indicate how many bytes were left to be repeated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) static bool dict_repeat(struct dictionary *dict, uint32_t *len, uint32_t dist)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) size_t back;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) uint32_t left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) if (dist >= dict->full || dist >= dict->size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) left = min_t(size_t, dict->limit - dict->pos, *len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) *len -= left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) back = dict->pos - dist - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) if (dist >= dict->pos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) back += dict->end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) dict->buf[dict->pos++] = dict->buf[back++];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) if (back == dict->end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) back = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) } while (--left > 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) if (dict->full < dict->pos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) dict->full = dict->pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) /* Copy uncompressed data as is from input to dictionary and output buffers. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) static void dict_uncompressed(struct dictionary *dict, struct xz_buf *b,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) uint32_t *left)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) size_t copy_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) while (*left > 0 && b->in_pos < b->in_size
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) && b->out_pos < b->out_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) copy_size = min(b->in_size - b->in_pos,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) b->out_size - b->out_pos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) if (copy_size > dict->end - dict->pos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) copy_size = dict->end - dict->pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) if (copy_size > *left)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) copy_size = *left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) *left -= copy_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) * If doing in-place decompression in single-call mode and the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) * uncompressed size of the file is larger than the caller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) * thought (i.e. it is invalid input!), the buffers below may
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) * overlap and cause undefined behavior with memcpy().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) * With valid inputs memcpy() would be fine here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) memmove(dict->buf + dict->pos, b->in + b->in_pos, copy_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) dict->pos += copy_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) if (dict->full < dict->pos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) dict->full = dict->pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) if (DEC_IS_MULTI(dict->mode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) if (dict->pos == dict->end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) dict->pos = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) * Like above but for multi-call mode: use memmove()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) * to avoid undefined behavior with invalid input.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) memmove(b->out + b->out_pos, b->in + b->in_pos,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) copy_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) dict->start = dict->pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) b->out_pos += copy_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) b->in_pos += copy_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) * Flush pending data from dictionary to b->out. It is assumed that there is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) * enough space in b->out. This is guaranteed because caller uses dict_limit()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) * before decoding data into the dictionary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) static uint32_t dict_flush(struct dictionary *dict, struct xz_buf *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) size_t copy_size = dict->pos - dict->start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) if (DEC_IS_MULTI(dict->mode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) if (dict->pos == dict->end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) dict->pos = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) * These buffers cannot overlap even if doing in-place
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) * decompression because in multi-call mode dict->buf
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) * has been allocated by us in this file; it's not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) * provided by the caller like in single-call mode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) memcpy(b->out + b->out_pos, dict->buf + dict->start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) copy_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) dict->start = dict->pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) b->out_pos += copy_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) return copy_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) /*****************
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) * Range decoder *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) *****************/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) /* Reset the range decoder. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) static void rc_reset(struct rc_dec *rc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) rc->range = (uint32_t)-1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) rc->code = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) rc->init_bytes_left = RC_INIT_BYTES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) * Read the first five initial bytes into rc->code if they haven't been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) * read already. (Yes, the first byte gets completely ignored.)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) static bool rc_read_init(struct rc_dec *rc, struct xz_buf *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) while (rc->init_bytes_left > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) if (b->in_pos == b->in_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) rc->code = (rc->code << 8) + b->in[b->in_pos++];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) --rc->init_bytes_left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) /* Return true if there may not be enough input for the next decoding loop. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) static inline bool rc_limit_exceeded(const struct rc_dec *rc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) return rc->in_pos > rc->in_limit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) * Return true if it is possible (from point of view of range decoder) that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) * we have reached the end of the LZMA chunk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) static inline bool rc_is_finished(const struct rc_dec *rc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) return rc->code == 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) /* Read the next input byte if needed. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) static __always_inline void rc_normalize(struct rc_dec *rc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) if (rc->range < RC_TOP_VALUE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) rc->range <<= RC_SHIFT_BITS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) rc->code = (rc->code << RC_SHIFT_BITS) + rc->in[rc->in_pos++];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) * Decode one bit. In some versions, this function has been splitted in three
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) * functions so that the compiler is supposed to be able to more easily avoid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) * an extra branch. In this particular version of the LZMA decoder, this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) * doesn't seem to be a good idea (tested with GCC 3.3.6, 3.4.6, and 4.3.3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) * on x86). Using a non-splitted version results in nicer looking code too.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) * NOTE: This must return an int. Do not make it return a bool or the speed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) * of the code generated by GCC 3.x decreases 10-15 %. (GCC 4.3 doesn't care,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) * and it generates 10-20 % faster code than GCC 3.x from this file anyway.)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) static __always_inline int rc_bit(struct rc_dec *rc, uint16_t *prob)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) uint32_t bound;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) int bit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) rc_normalize(rc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) bound = (rc->range >> RC_BIT_MODEL_TOTAL_BITS) * *prob;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) if (rc->code < bound) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) rc->range = bound;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) *prob += (RC_BIT_MODEL_TOTAL - *prob) >> RC_MOVE_BITS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) bit = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) rc->range -= bound;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) rc->code -= bound;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) *prob -= *prob >> RC_MOVE_BITS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) bit = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) return bit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) /* Decode a bittree starting from the most significant bit. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) static __always_inline uint32_t rc_bittree(struct rc_dec *rc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) uint16_t *probs, uint32_t limit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) uint32_t symbol = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) if (rc_bit(rc, &probs[symbol]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) symbol = (symbol << 1) + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) symbol <<= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) } while (symbol < limit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) return symbol;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) /* Decode a bittree starting from the least significant bit. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) static __always_inline void rc_bittree_reverse(struct rc_dec *rc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) uint16_t *probs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) uint32_t *dest, uint32_t limit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) uint32_t symbol = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) uint32_t i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) if (rc_bit(rc, &probs[symbol])) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) symbol = (symbol << 1) + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) *dest += 1 << i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) symbol <<= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) } while (++i < limit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) /* Decode direct bits (fixed fifty-fifty probability) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) static inline void rc_direct(struct rc_dec *rc, uint32_t *dest, uint32_t limit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) uint32_t mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) rc_normalize(rc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) rc->range >>= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) rc->code -= rc->range;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) mask = (uint32_t)0 - (rc->code >> 31);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) rc->code += rc->range & mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) *dest = (*dest << 1) + (mask + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) } while (--limit > 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) /********
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) * LZMA *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) ********/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) /* Get pointer to literal coder probability array. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) static uint16_t *lzma_literal_probs(struct xz_dec_lzma2 *s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) uint32_t prev_byte = dict_get(&s->dict, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) uint32_t low = prev_byte >> (8 - s->lzma.lc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) uint32_t high = (s->dict.pos & s->lzma.literal_pos_mask) << s->lzma.lc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) return s->lzma.literal[low + high];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) /* Decode a literal (one 8-bit byte) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) static void lzma_literal(struct xz_dec_lzma2 *s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) uint16_t *probs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) uint32_t symbol;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) uint32_t match_byte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) uint32_t match_bit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) uint32_t offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) uint32_t i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) probs = lzma_literal_probs(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) if (lzma_state_is_literal(s->lzma.state)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) symbol = rc_bittree(&s->rc, probs, 0x100);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) symbol = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) match_byte = dict_get(&s->dict, s->lzma.rep0) << 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) offset = 0x100;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) match_bit = match_byte & offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) match_byte <<= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) i = offset + match_bit + symbol;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) if (rc_bit(&s->rc, &probs[i])) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) symbol = (symbol << 1) + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) offset &= match_bit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) symbol <<= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) offset &= ~match_bit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) } while (symbol < 0x100);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) dict_put(&s->dict, (uint8_t)symbol);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) lzma_state_literal(&s->lzma.state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) /* Decode the length of the match into s->lzma.len. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) static void lzma_len(struct xz_dec_lzma2 *s, struct lzma_len_dec *l,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) uint32_t pos_state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) uint16_t *probs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) uint32_t limit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) if (!rc_bit(&s->rc, &l->choice)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) probs = l->low[pos_state];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) limit = LEN_LOW_SYMBOLS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) s->lzma.len = MATCH_LEN_MIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) if (!rc_bit(&s->rc, &l->choice2)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) probs = l->mid[pos_state];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) limit = LEN_MID_SYMBOLS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) probs = l->high;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) limit = LEN_HIGH_SYMBOLS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) + LEN_MID_SYMBOLS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) s->lzma.len += rc_bittree(&s->rc, probs, limit) - limit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) /* Decode a match. The distance will be stored in s->lzma.rep0. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) static void lzma_match(struct xz_dec_lzma2 *s, uint32_t pos_state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) uint16_t *probs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) uint32_t dist_slot;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) uint32_t limit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) lzma_state_match(&s->lzma.state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) s->lzma.rep3 = s->lzma.rep2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) s->lzma.rep2 = s->lzma.rep1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) s->lzma.rep1 = s->lzma.rep0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) lzma_len(s, &s->lzma.match_len_dec, pos_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) probs = s->lzma.dist_slot[lzma_get_dist_state(s->lzma.len)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) dist_slot = rc_bittree(&s->rc, probs, DIST_SLOTS) - DIST_SLOTS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) if (dist_slot < DIST_MODEL_START) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) s->lzma.rep0 = dist_slot;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) limit = (dist_slot >> 1) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) s->lzma.rep0 = 2 + (dist_slot & 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) if (dist_slot < DIST_MODEL_END) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) s->lzma.rep0 <<= limit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) probs = s->lzma.dist_special + s->lzma.rep0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) - dist_slot - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) rc_bittree_reverse(&s->rc, probs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) &s->lzma.rep0, limit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) rc_direct(&s->rc, &s->lzma.rep0, limit - ALIGN_BITS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) s->lzma.rep0 <<= ALIGN_BITS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) rc_bittree_reverse(&s->rc, s->lzma.dist_align,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) &s->lzma.rep0, ALIGN_BITS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) * Decode a repeated match. The distance is one of the four most recently
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) * seen matches. The distance will be stored in s->lzma.rep0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) static void lzma_rep_match(struct xz_dec_lzma2 *s, uint32_t pos_state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) uint32_t tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) if (!rc_bit(&s->rc, &s->lzma.is_rep0[s->lzma.state])) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) if (!rc_bit(&s->rc, &s->lzma.is_rep0_long[
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) s->lzma.state][pos_state])) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) lzma_state_short_rep(&s->lzma.state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) s->lzma.len = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) if (!rc_bit(&s->rc, &s->lzma.is_rep1[s->lzma.state])) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) tmp = s->lzma.rep1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) if (!rc_bit(&s->rc, &s->lzma.is_rep2[s->lzma.state])) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) tmp = s->lzma.rep2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) tmp = s->lzma.rep3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) s->lzma.rep3 = s->lzma.rep2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) s->lzma.rep2 = s->lzma.rep1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) s->lzma.rep1 = s->lzma.rep0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) s->lzma.rep0 = tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) lzma_state_long_rep(&s->lzma.state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) lzma_len(s, &s->lzma.rep_len_dec, pos_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) /* LZMA decoder core */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) static bool lzma_main(struct xz_dec_lzma2 *s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) uint32_t pos_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) * If the dictionary was reached during the previous call, try to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) * finish the possibly pending repeat in the dictionary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) if (dict_has_space(&s->dict) && s->lzma.len > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) * Decode more LZMA symbols. One iteration may consume up to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) * LZMA_IN_REQUIRED - 1 bytes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) while (dict_has_space(&s->dict) && !rc_limit_exceeded(&s->rc)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) pos_state = s->dict.pos & s->lzma.pos_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) if (!rc_bit(&s->rc, &s->lzma.is_match[
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) s->lzma.state][pos_state])) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) lzma_literal(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) if (rc_bit(&s->rc, &s->lzma.is_rep[s->lzma.state]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) lzma_rep_match(s, pos_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) lzma_match(s, pos_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) if (!dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) * Having the range decoder always normalized when we are outside
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) * this function makes it easier to correctly handle end of the chunk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) rc_normalize(&s->rc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) * Reset the LZMA decoder and range decoder state. Dictionary is nore reset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) * here, because LZMA state may be reset without resetting the dictionary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) static void lzma_reset(struct xz_dec_lzma2 *s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) uint16_t *probs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) size_t i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) s->lzma.state = STATE_LIT_LIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) s->lzma.rep0 = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) s->lzma.rep1 = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) s->lzma.rep2 = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) s->lzma.rep3 = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) * All probabilities are initialized to the same value. This hack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) * makes the code smaller by avoiding a separate loop for each
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) * probability array.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) * This could be optimized so that only that part of literal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) * probabilities that are actually required. In the common case
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) * we would write 12 KiB less.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) probs = s->lzma.is_match[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) for (i = 0; i < PROBS_TOTAL; ++i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) probs[i] = RC_BIT_MODEL_TOTAL / 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) rc_reset(&s->rc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) * Decode and validate LZMA properties (lc/lp/pb) and calculate the bit masks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) * from the decoded lp and pb values. On success, the LZMA decoder state is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) * reset and true is returned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) static bool lzma_props(struct xz_dec_lzma2 *s, uint8_t props)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) if (props > (4 * 5 + 4) * 9 + 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) s->lzma.pos_mask = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) while (props >= 9 * 5) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) props -= 9 * 5;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) ++s->lzma.pos_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) s->lzma.pos_mask = (1 << s->lzma.pos_mask) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) s->lzma.literal_pos_mask = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) while (props >= 9) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) props -= 9;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) ++s->lzma.literal_pos_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) s->lzma.lc = props;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) if (s->lzma.lc + s->lzma.literal_pos_mask > 4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) s->lzma.literal_pos_mask = (1 << s->lzma.literal_pos_mask) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) lzma_reset(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) /*********
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) * LZMA2 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) *********/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) * The LZMA decoder assumes that if the input limit (s->rc.in_limit) hasn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) * been exceeded, it is safe to read up to LZMA_IN_REQUIRED bytes. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) * wrapper function takes care of making the LZMA decoder's assumption safe.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) * As long as there is plenty of input left to be decoded in the current LZMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) * chunk, we decode directly from the caller-supplied input buffer until
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) * there's LZMA_IN_REQUIRED bytes left. Those remaining bytes are copied into
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) * s->temp.buf, which (hopefully) gets filled on the next call to this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) * function. We decode a few bytes from the temporary buffer so that we can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) * continue decoding from the caller-supplied input buffer again.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) static bool lzma2_lzma(struct xz_dec_lzma2 *s, struct xz_buf *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) size_t in_avail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) uint32_t tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) in_avail = b->in_size - b->in_pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) if (s->temp.size > 0 || s->lzma2.compressed == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) tmp = 2 * LZMA_IN_REQUIRED - s->temp.size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) if (tmp > s->lzma2.compressed - s->temp.size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) tmp = s->lzma2.compressed - s->temp.size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) if (tmp > in_avail)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) tmp = in_avail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) memcpy(s->temp.buf + s->temp.size, b->in + b->in_pos, tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) if (s->temp.size + tmp == s->lzma2.compressed) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) memzero(s->temp.buf + s->temp.size + tmp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) sizeof(s->temp.buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) - s->temp.size - tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) s->rc.in_limit = s->temp.size + tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) } else if (s->temp.size + tmp < LZMA_IN_REQUIRED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) s->temp.size += tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) b->in_pos += tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) s->rc.in_limit = s->temp.size + tmp - LZMA_IN_REQUIRED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) s->rc.in = s->temp.buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) s->rc.in_pos = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) if (!lzma_main(s) || s->rc.in_pos > s->temp.size + tmp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) s->lzma2.compressed -= s->rc.in_pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) if (s->rc.in_pos < s->temp.size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) s->temp.size -= s->rc.in_pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) memmove(s->temp.buf, s->temp.buf + s->rc.in_pos,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) s->temp.size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) b->in_pos += s->rc.in_pos - s->temp.size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) s->temp.size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) in_avail = b->in_size - b->in_pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) if (in_avail >= LZMA_IN_REQUIRED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) s->rc.in = b->in;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) s->rc.in_pos = b->in_pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) if (in_avail >= s->lzma2.compressed + LZMA_IN_REQUIRED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) s->rc.in_limit = b->in_pos + s->lzma2.compressed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) s->rc.in_limit = b->in_size - LZMA_IN_REQUIRED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) if (!lzma_main(s))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) in_avail = s->rc.in_pos - b->in_pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) if (in_avail > s->lzma2.compressed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) s->lzma2.compressed -= in_avail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) b->in_pos = s->rc.in_pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) in_avail = b->in_size - b->in_pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) if (in_avail < LZMA_IN_REQUIRED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) if (in_avail > s->lzma2.compressed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) in_avail = s->lzma2.compressed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) memcpy(s->temp.buf, b->in + b->in_pos, in_avail);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) s->temp.size = in_avail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) b->in_pos += in_avail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) * Take care of the LZMA2 control layer, and forward the job of actual LZMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) * decoding or copying of uncompressed chunks to other functions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) XZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) struct xz_buf *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) uint32_t tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) while (b->in_pos < b->in_size || s->lzma2.sequence == SEQ_LZMA_RUN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) switch (s->lzma2.sequence) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) case SEQ_CONTROL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) * LZMA2 control byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) * Exact values:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) * 0x00 End marker
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) * 0x01 Dictionary reset followed by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) * an uncompressed chunk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) * 0x02 Uncompressed chunk (no dictionary reset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) * Highest three bits (s->control & 0xE0):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) * 0xE0 Dictionary reset, new properties and state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) * reset, followed by LZMA compressed chunk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) * 0xC0 New properties and state reset, followed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) * by LZMA compressed chunk (no dictionary
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) * reset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) * 0xA0 State reset using old properties,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) * followed by LZMA compressed chunk (no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) * dictionary reset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) * 0x80 LZMA chunk (no dictionary or state reset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) * For LZMA compressed chunks, the lowest five bits
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) * (s->control & 1F) are the highest bits of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) * uncompressed size (bits 16-20).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) * A new LZMA2 stream must begin with a dictionary
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) * reset. The first LZMA chunk must set new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) * properties and reset the LZMA state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) * Values that don't match anything described above
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) * are invalid and we return XZ_DATA_ERROR.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) tmp = b->in[b->in_pos++];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) if (tmp == 0x00)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) return XZ_STREAM_END;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) if (tmp >= 0xE0 || tmp == 0x01) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) s->lzma2.need_props = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) s->lzma2.need_dict_reset = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) dict_reset(&s->dict, b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) } else if (s->lzma2.need_dict_reset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) return XZ_DATA_ERROR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) if (tmp >= 0x80) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) s->lzma2.uncompressed = (tmp & 0x1F) << 16;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) s->lzma2.sequence = SEQ_UNCOMPRESSED_1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) if (tmp >= 0xC0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) * When there are new properties,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) * state reset is done at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) * SEQ_PROPERTIES.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) s->lzma2.need_props = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) s->lzma2.next_sequence
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) = SEQ_PROPERTIES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) } else if (s->lzma2.need_props) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) return XZ_DATA_ERROR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) s->lzma2.next_sequence
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) = SEQ_LZMA_PREPARE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) if (tmp >= 0xA0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) lzma_reset(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) if (tmp > 0x02)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) return XZ_DATA_ERROR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) s->lzma2.sequence = SEQ_COMPRESSED_0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) s->lzma2.next_sequence = SEQ_COPY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) case SEQ_UNCOMPRESSED_1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) s->lzma2.uncompressed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) += (uint32_t)b->in[b->in_pos++] << 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) s->lzma2.sequence = SEQ_UNCOMPRESSED_2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) case SEQ_UNCOMPRESSED_2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) s->lzma2.uncompressed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) += (uint32_t)b->in[b->in_pos++] + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) s->lzma2.sequence = SEQ_COMPRESSED_0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) case SEQ_COMPRESSED_0:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) s->lzma2.compressed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) = (uint32_t)b->in[b->in_pos++] << 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) s->lzma2.sequence = SEQ_COMPRESSED_1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) case SEQ_COMPRESSED_1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) s->lzma2.compressed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) += (uint32_t)b->in[b->in_pos++] + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) s->lzma2.sequence = s->lzma2.next_sequence;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) case SEQ_PROPERTIES:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) if (!lzma_props(s, b->in[b->in_pos++]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) return XZ_DATA_ERROR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) s->lzma2.sequence = SEQ_LZMA_PREPARE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) /* fall through */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) case SEQ_LZMA_PREPARE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) if (s->lzma2.compressed < RC_INIT_BYTES)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) return XZ_DATA_ERROR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) if (!rc_read_init(&s->rc, b))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) return XZ_OK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) s->lzma2.compressed -= RC_INIT_BYTES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) s->lzma2.sequence = SEQ_LZMA_RUN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) /* fall through */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) case SEQ_LZMA_RUN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) * Set dictionary limit to indicate how much we want
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) * to be encoded at maximum. Decode new data into the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) * dictionary. Flush the new data from dictionary to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) * b->out. Check if we finished decoding this chunk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) * In case the dictionary got full but we didn't fill
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) * the output buffer yet, we may run this loop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) * multiple times without changing s->lzma2.sequence.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) dict_limit(&s->dict, min_t(size_t,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) b->out_size - b->out_pos,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) s->lzma2.uncompressed));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) if (!lzma2_lzma(s, b))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) return XZ_DATA_ERROR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) s->lzma2.uncompressed -= dict_flush(&s->dict, b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) if (s->lzma2.uncompressed == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) if (s->lzma2.compressed > 0 || s->lzma.len > 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) || !rc_is_finished(&s->rc))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) return XZ_DATA_ERROR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) rc_reset(&s->rc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) s->lzma2.sequence = SEQ_CONTROL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) } else if (b->out_pos == b->out_size
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) || (b->in_pos == b->in_size
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) && s->temp.size
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) < s->lzma2.compressed)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) return XZ_OK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) case SEQ_COPY:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) dict_uncompressed(&s->dict, b, &s->lzma2.compressed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) if (s->lzma2.compressed > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) return XZ_OK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) s->lzma2.sequence = SEQ_CONTROL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) return XZ_OK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) XZ_EXTERN struct xz_dec_lzma2 *xz_dec_lzma2_create(enum xz_mode mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) uint32_t dict_max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) struct xz_dec_lzma2 *s = kmalloc(sizeof(*s), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) if (s == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) s->dict.mode = mode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) s->dict.size_max = dict_max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) if (DEC_IS_PREALLOC(mode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) s->dict.buf = vmalloc(dict_max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) if (s->dict.buf == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) kfree(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) } else if (DEC_IS_DYNALLOC(mode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) s->dict.buf = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) s->dict.allocated = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) return s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) XZ_EXTERN enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, uint8_t props)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) /* This limits dictionary size to 3 GiB to keep parsing simpler. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) if (props > 39)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) return XZ_OPTIONS_ERROR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) s->dict.size = 2 + (props & 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) s->dict.size <<= (props >> 1) + 11;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) if (DEC_IS_MULTI(s->dict.mode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) if (s->dict.size > s->dict.size_max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) return XZ_MEMLIMIT_ERROR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) s->dict.end = s->dict.size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) if (DEC_IS_DYNALLOC(s->dict.mode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) if (s->dict.allocated < s->dict.size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) s->dict.allocated = s->dict.size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) vfree(s->dict.buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) s->dict.buf = vmalloc(s->dict.size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) if (s->dict.buf == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) s->dict.allocated = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) return XZ_MEM_ERROR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) s->lzma.len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) s->lzma2.sequence = SEQ_CONTROL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) s->lzma2.need_dict_reset = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) s->temp.size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) return XZ_OK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) XZ_EXTERN void xz_dec_lzma2_end(struct xz_dec_lzma2 *s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) if (DEC_IS_MULTI(s->dict.mode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) vfree(s->dict.buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) kfree(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) }