^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) * Huffman encoder, part of New Generation Entropy library
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Copyright (C) 2013-2016, Yann Collet.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Redistribution and use in source and binary forms, with or without
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * modification, are permitted provided that the following conditions are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * met:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * * Redistributions of source code must retain the above copyright
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * notice, this list of conditions and the following disclaimer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) * * Redistributions in binary form must reproduce the above
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * copyright notice, this list of conditions and the following disclaimer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * in the documentation and/or other materials provided with the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * distribution.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) * This program is free software; you can redistribute it and/or modify it under
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) * the terms of the GNU General Public License version 2 as published by the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * Free Software Foundation. This program is dual-licensed; you may select
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) * either version 2 of the GNU General Public License ("GPL") or BSD license
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) * ("BSD").
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * You can contact the author at :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) /* **************************************************************
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) * Includes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) ****************************************************************/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) #include "bitstream.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) #include "fse.h" /* header compression */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) #include "huf.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) #include <linux/string.h> /* memcpy, memset */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) /* **************************************************************
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) * Error Management
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) ****************************************************************/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) #define HUF_STATIC_ASSERT(c) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) enum { HUF_static_assert = 1 / (int)(!!(c)) }; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) } /* use only *after* variable declarations */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) #define CHECK_V_F(e, f) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) size_t const e = f; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) if (ERR_isError(e)) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) return f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) #define CHECK_F(f) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) CHECK_V_F(_var_err__, f); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) /* **************************************************************
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) * Utils
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) ****************************************************************/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) /* *******************************************************
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) * HUF : Huffman block compression
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) *********************************************************/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) /* HUF_compressWeights() :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) * Same as FSE_compress(), but dedicated to huff0's weights compression.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) * The use case needs much less stack memory.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) size_t HUF_compressWeights_wksp(void *dst, size_t dstSize, const void *weightTable, size_t wtSize, void *workspace, size_t workspaceSize)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) BYTE *const ostart = (BYTE *)dst;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) BYTE *op = ostart;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) BYTE *const oend = ostart + dstSize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) U32 maxSymbolValue = HUF_TABLELOG_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) FSE_CTable *CTable;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) U32 *count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) S16 *norm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) size_t spaceUsed32 = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) HUF_STATIC_ASSERT(sizeof(FSE_CTable) == sizeof(U32));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) CTable = (FSE_CTable *)((U32 *)workspace + spaceUsed32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) spaceUsed32 += FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) count = (U32 *)workspace + spaceUsed32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) spaceUsed32 += HUF_TABLELOG_MAX + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) norm = (S16 *)((U32 *)workspace + spaceUsed32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) spaceUsed32 += ALIGN(sizeof(S16) * (HUF_TABLELOG_MAX + 1), sizeof(U32)) >> 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) if ((spaceUsed32 << 2) > workspaceSize)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) return ERROR(tableLog_tooLarge);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) workspace = (U32 *)workspace + spaceUsed32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) workspaceSize -= (spaceUsed32 << 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) /* init conditions */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) if (wtSize <= 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) return 0; /* Not compressible */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) /* Scan input and build symbol stats */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) CHECK_V_F(maxCount, FSE_count_simple(count, &maxSymbolValue, weightTable, wtSize));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) if (maxCount == wtSize)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) return 1; /* only a single symbol in src : rle */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) if (maxCount == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) return 0; /* each symbol present maximum once => not compressible */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) CHECK_F(FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) /* Write table description header */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) CHECK_V_F(hSize, FSE_writeNCount(op, oend - op, norm, maxSymbolValue, tableLog));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) op += hSize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) /* Compress */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) CHECK_F(FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, workspace, workspaceSize));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) if (cSize == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) return 0; /* not enough space for compressed data */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) op += cSize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) return op - ostart;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) struct HUF_CElt_s {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) U16 val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) BYTE nbBits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) }; /* typedef'd to HUF_CElt within "huf.h" */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) /*! HUF_writeCTable_wksp() :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) `CTable` : Huffman tree to save, using huf representation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) @return : size of saved CTable */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) size_t HUF_writeCTable_wksp(void *dst, size_t maxDstSize, const HUF_CElt *CTable, U32 maxSymbolValue, U32 huffLog, void *workspace, size_t workspaceSize)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) BYTE *op = (BYTE *)dst;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) U32 n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) BYTE *bitsToWeight;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) BYTE *huffWeight;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) size_t spaceUsed32 = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) bitsToWeight = (BYTE *)((U32 *)workspace + spaceUsed32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) spaceUsed32 += ALIGN(HUF_TABLELOG_MAX + 1, sizeof(U32)) >> 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) huffWeight = (BYTE *)((U32 *)workspace + spaceUsed32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX, sizeof(U32)) >> 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) if ((spaceUsed32 << 2) > workspaceSize)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) return ERROR(tableLog_tooLarge);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) workspace = (U32 *)workspace + spaceUsed32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) workspaceSize -= (spaceUsed32 << 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) /* check conditions */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) return ERROR(maxSymbolValue_tooLarge);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) /* convert to weight */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) bitsToWeight[0] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) for (n = 1; n < huffLog + 1; n++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) for (n = 0; n < maxSymbolValue; n++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) huffWeight[n] = bitsToWeight[CTable[n].nbBits];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) /* attempt weights compression by FSE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) CHECK_V_F(hSize, HUF_compressWeights_wksp(op + 1, maxDstSize - 1, huffWeight, maxSymbolValue, workspace, workspaceSize));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) if ((hSize > 1) & (hSize < maxSymbolValue / 2)) { /* FSE compressed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) op[0] = (BYTE)hSize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) return hSize + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) /* write raw values as 4-bits (max : 15) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) if (maxSymbolValue > (256 - 128))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) if (((maxSymbolValue + 1) / 2) + 1 > maxDstSize)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue - 1));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) for (n = 0; n < maxSymbolValue; n += 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) op[(n / 2) + 1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n + 1]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) return ((maxSymbolValue + 1) / 2) + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) size_t HUF_readCTable_wksp(HUF_CElt *CTable, U32 maxSymbolValue, const void *src, size_t srcSize, void *workspace, size_t workspaceSize)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) U32 *rankVal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) BYTE *huffWeight;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) U32 tableLog = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) U32 nbSymbols = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) size_t readSize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) size_t spaceUsed32 = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) rankVal = (U32 *)workspace + spaceUsed32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) huffWeight = (BYTE *)((U32 *)workspace + spaceUsed32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) if ((spaceUsed32 << 2) > workspaceSize)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) return ERROR(tableLog_tooLarge);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) workspace = (U32 *)workspace + spaceUsed32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) workspaceSize -= (spaceUsed32 << 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) /* get symbol weights */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) readSize = HUF_readStats_wksp(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize, workspace, workspaceSize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) if (ERR_isError(readSize))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) return readSize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) /* check result */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) if (tableLog > HUF_TABLELOG_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) return ERROR(tableLog_tooLarge);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) if (nbSymbols > maxSymbolValue + 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) return ERROR(maxSymbolValue_tooSmall);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) /* Prepare base value per rank */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) U32 n, nextRankStart = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) for (n = 1; n <= tableLog; n++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) U32 curr = nextRankStart;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) nextRankStart += (rankVal[n] << (n - 1));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) rankVal[n] = curr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) /* fill nbBits */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) U32 n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) for (n = 0; n < nbSymbols; n++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) const U32 w = huffWeight[n];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) CTable[n].nbBits = (BYTE)(tableLog + 1 - w);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) /* fill val */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) U16 nbPerRank[HUF_TABLELOG_MAX + 2] = {0}; /* support w=0=>n=tableLog+1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) U16 valPerRank[HUF_TABLELOG_MAX + 2] = {0};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) U32 n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) for (n = 0; n < nbSymbols; n++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) nbPerRank[CTable[n].nbBits]++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) /* determine stating value per rank */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) valPerRank[tableLog + 1] = 0; /* for w==0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) U16 min = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) U32 n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) for (n = tableLog; n > 0; n--) { /* start at n=tablelog <-> w=1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) valPerRank[n] = min; /* get starting value within each rank */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) min += nbPerRank[n];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) min >>= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) /* assign value within rank, symbol order */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) U32 n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) for (n = 0; n <= maxSymbolValue; n++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) CTable[n].val = valPerRank[CTable[n].nbBits]++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) return readSize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) typedef struct nodeElt_s {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) U32 count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) U16 parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) BYTE byte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) BYTE nbBits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) } nodeElt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) static U32 HUF_setMaxHeight(nodeElt *huffNode, U32 lastNonNull, U32 maxNbBits)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) const U32 largestBits = huffNode[lastNonNull].nbBits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) if (largestBits <= maxNbBits)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) return largestBits; /* early exit : no elt > maxNbBits */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) /* there are several too large elements (at least >= 2) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) int totalCost = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) const U32 baseCost = 1 << (largestBits - maxNbBits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) U32 n = lastNonNull;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) while (huffNode[n].nbBits > maxNbBits) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) huffNode[n].nbBits = (BYTE)maxNbBits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) n--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) } /* n stops at huffNode[n].nbBits <= maxNbBits */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) while (huffNode[n].nbBits == maxNbBits)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) n--; /* n end at index of smallest symbol using < maxNbBits */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) /* renorm totalCost */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) totalCost >>= (largestBits - maxNbBits); /* note : totalCost is necessarily a multiple of baseCost */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) /* repay normalized cost */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) U32 const noSymbol = 0xF0F0F0F0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) U32 rankLast[HUF_TABLELOG_MAX + 2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) int pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) /* Get pos of last (smallest) symbol per rank */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) memset(rankLast, 0xF0, sizeof(rankLast));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) U32 currNbBits = maxNbBits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) for (pos = n; pos >= 0; pos--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) if (huffNode[pos].nbBits >= currNbBits)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) currNbBits = huffNode[pos].nbBits; /* < maxNbBits */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) rankLast[maxNbBits - currNbBits] = pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) while (totalCost > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) for (; nBitsToDecrease > 1; nBitsToDecrease--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) U32 highPos = rankLast[nBitsToDecrease];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) U32 lowPos = rankLast[nBitsToDecrease - 1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) if (highPos == noSymbol)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) if (lowPos == noSymbol)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) U32 const highTotal = huffNode[highPos].count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) U32 const lowTotal = 2 * huffNode[lowPos].count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) if (highTotal <= lowTotal)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) while ((nBitsToDecrease <= HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) nBitsToDecrease++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) totalCost -= 1 << (nBitsToDecrease - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) if (rankLast[nBitsToDecrease - 1] == noSymbol)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) rankLast[nBitsToDecrease - 1] = rankLast[nBitsToDecrease]; /* this rank is no longer empty */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) huffNode[rankLast[nBitsToDecrease]].nbBits++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) rankLast[nBitsToDecrease] = noSymbol;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) rankLast[nBitsToDecrease]--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits - nBitsToDecrease)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) } /* while (totalCost > 0) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) while (totalCost < 0) { /* Sometimes, cost correction overshoot */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) (using maxNbBits) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) while (huffNode[n].nbBits == maxNbBits)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) n--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) huffNode[n + 1].nbBits--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) rankLast[1] = n + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) totalCost++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) huffNode[rankLast[1] + 1].nbBits--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) rankLast[1]++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) totalCost++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) } /* there are several too large elements (at least >= 2) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) return maxNbBits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) typedef struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) U32 base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) U32 curr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) } rankPos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) static void HUF_sort(nodeElt *huffNode, const U32 *count, U32 maxSymbolValue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) rankPos rank[32];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) U32 n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) memset(rank, 0, sizeof(rank));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) for (n = 0; n <= maxSymbolValue; n++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) U32 r = BIT_highbit32(count[n] + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) rank[r].base++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) for (n = 30; n > 0; n--)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) rank[n - 1].base += rank[n].base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) for (n = 0; n < 32; n++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) rank[n].curr = rank[n].base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) for (n = 0; n <= maxSymbolValue; n++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) U32 const c = count[n];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) U32 const r = BIT_highbit32(c + 1) + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) U32 pos = rank[r].curr++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) while ((pos > rank[r].base) && (c > huffNode[pos - 1].count))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) huffNode[pos] = huffNode[pos - 1], pos--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) huffNode[pos].count = c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) huffNode[pos].byte = (BYTE)n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) /** HUF_buildCTable_wksp() :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) * Same as HUF_buildCTable(), but using externally allocated scratch buffer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) #define STARTNODE (HUF_SYMBOLVALUE_MAX + 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) typedef nodeElt huffNodeTable[2 * HUF_SYMBOLVALUE_MAX + 1 + 1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) size_t HUF_buildCTable_wksp(HUF_CElt *tree, const U32 *count, U32 maxSymbolValue, U32 maxNbBits, void *workSpace, size_t wkspSize)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) nodeElt *const huffNode0 = (nodeElt *)workSpace;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) nodeElt *const huffNode = huffNode0 + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) U32 n, nonNullRank;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) int lowS, lowN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) U16 nodeNb = STARTNODE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) U32 nodeRoot;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) /* safety checks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) if (wkspSize < sizeof(huffNodeTable))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) return ERROR(GENERIC); /* workSpace is not large enough */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) if (maxNbBits == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) maxNbBits = HUF_TABLELOG_DEFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) return ERROR(GENERIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) memset(huffNode0, 0, sizeof(huffNodeTable));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) /* sort, decreasing order */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) HUF_sort(huffNode, count, maxSymbolValue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) /* init for parents */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) nonNullRank = maxSymbolValue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) while (huffNode[nonNullRank].count == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) nonNullRank--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) lowS = nonNullRank;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) nodeRoot = nodeNb + lowS - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) lowN = nodeNb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS - 1].count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) huffNode[lowS].parent = huffNode[lowS - 1].parent = nodeNb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) nodeNb++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) lowS -= 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) for (n = nodeNb; n <= nodeRoot; n++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) huffNode[n].count = (U32)(1U << 30);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) huffNode0[0].count = (U32)(1U << 31); /* fake entry, strong barrier */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) /* create parents */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) while (nodeNb <= nodeRoot) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) huffNode[n1].parent = huffNode[n2].parent = nodeNb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) nodeNb++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) /* distribute weights (unlimited tree height) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) huffNode[nodeRoot].nbBits = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) for (n = nodeRoot - 1; n >= STARTNODE; n--)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) for (n = 0; n <= nonNullRank; n++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) /* enforce maxTableLog */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) /* fill result into tree (val, nbBits) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) U16 nbPerRank[HUF_TABLELOG_MAX + 1] = {0};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) U16 valPerRank[HUF_TABLELOG_MAX + 1] = {0};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) if (maxNbBits > HUF_TABLELOG_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) return ERROR(GENERIC); /* check fit into table */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) for (n = 0; n <= nonNullRank; n++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) nbPerRank[huffNode[n].nbBits]++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) /* determine stating value per rank */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) U16 min = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) for (n = maxNbBits; n > 0; n--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) valPerRank[n] = min; /* get starting value within each rank */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) min += nbPerRank[n];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) min >>= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) for (n = 0; n <= maxSymbolValue; n++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) for (n = 0; n <= maxSymbolValue; n++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) return maxNbBits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) static size_t HUF_estimateCompressedSize(HUF_CElt *CTable, const unsigned *count, unsigned maxSymbolValue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) size_t nbBits = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) int s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) for (s = 0; s <= (int)maxSymbolValue; ++s) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) nbBits += CTable[s].nbBits * count[s];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) return nbBits >> 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) static int HUF_validateCTable(const HUF_CElt *CTable, const unsigned *count, unsigned maxSymbolValue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) int bad = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) int s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) for (s = 0; s <= (int)maxSymbolValue; ++s) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) bad |= (count[s] != 0) & (CTable[s].nbBits == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) return !bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) static void HUF_encodeSymbol(BIT_CStream_t *bitCPtr, U32 symbol, const HUF_CElt *CTable)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) #define HUF_FLUSHBITS(s) BIT_flushBits(s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) #define HUF_FLUSHBITS_1(stream) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) if (sizeof((stream)->bitContainer) * 8 < HUF_TABLELOG_MAX * 2 + 7) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) HUF_FLUSHBITS(stream)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) #define HUF_FLUSHBITS_2(stream) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) if (sizeof((stream)->bitContainer) * 8 < HUF_TABLELOG_MAX * 4 + 7) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) HUF_FLUSHBITS(stream)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) size_t HUF_compress1X_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const HUF_CElt *CTable)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) const BYTE *ip = (const BYTE *)src;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) BYTE *const ostart = (BYTE *)dst;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) BYTE *const oend = ostart + dstSize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) BYTE *op = ostart;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) size_t n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) BIT_CStream_t bitC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) /* init */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) if (dstSize < 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) return 0; /* not enough space to compress */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) size_t const initErr = BIT_initCStream(&bitC, op, oend - op);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) if (HUF_isError(initErr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) n = srcSize & ~3; /* join to mod 4 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) switch (srcSize & 3) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) case 3: HUF_encodeSymbol(&bitC, ip[n + 2], CTable); HUF_FLUSHBITS_2(&bitC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) /* fall through */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) case 2: HUF_encodeSymbol(&bitC, ip[n + 1], CTable); HUF_FLUSHBITS_1(&bitC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) /* fall through */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) case 1: HUF_encodeSymbol(&bitC, ip[n + 0], CTable); HUF_FLUSHBITS(&bitC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) case 0:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) default:;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) for (; n > 0; n -= 4) { /* note : n&3==0 at this stage */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) HUF_encodeSymbol(&bitC, ip[n - 1], CTable);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) HUF_FLUSHBITS_1(&bitC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) HUF_encodeSymbol(&bitC, ip[n - 2], CTable);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) HUF_FLUSHBITS_2(&bitC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) HUF_encodeSymbol(&bitC, ip[n - 3], CTable);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) HUF_FLUSHBITS_1(&bitC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) HUF_encodeSymbol(&bitC, ip[n - 4], CTable);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) HUF_FLUSHBITS(&bitC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) return BIT_closeCStream(&bitC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) size_t HUF_compress4X_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const HUF_CElt *CTable)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) size_t const segmentSize = (srcSize + 3) / 4; /* first 3 segments */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) const BYTE *ip = (const BYTE *)src;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) const BYTE *const iend = ip + srcSize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) BYTE *const ostart = (BYTE *)dst;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) BYTE *const oend = ostart + dstSize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) BYTE *op = ostart;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) if (dstSize < 6 + 1 + 1 + 1 + 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) return 0; /* minimum space to compress successfully */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) if (srcSize < 12)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) return 0; /* no saving possible : too small input */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) op += 6; /* jumpTable */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, segmentSize, CTable));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) if (cSize == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) ZSTD_writeLE16(ostart, (U16)cSize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) op += cSize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) ip += segmentSize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, segmentSize, CTable));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) if (cSize == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) ZSTD_writeLE16(ostart + 2, (U16)cSize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) op += cSize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) ip += segmentSize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, segmentSize, CTable));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) if (cSize == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) ZSTD_writeLE16(ostart + 4, (U16)cSize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) op += cSize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) ip += segmentSize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, iend - ip, CTable));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) if (cSize == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) op += cSize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) return op - ostart;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) static size_t HUF_compressCTable_internal(BYTE *const ostart, BYTE *op, BYTE *const oend, const void *src, size_t srcSize, unsigned singleStream,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) const HUF_CElt *CTable)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) size_t const cSize =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) singleStream ? HUF_compress1X_usingCTable(op, oend - op, src, srcSize, CTable) : HUF_compress4X_usingCTable(op, oend - op, src, srcSize, CTable);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) if (HUF_isError(cSize)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) return cSize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) if (cSize == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) } /* uncompressible */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) op += cSize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) /* check compressibility */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) if ((size_t)(op - ostart) >= srcSize - 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) return op - ostart;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) /* `workSpace` must a table of at least 1024 unsigned */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) static size_t HUF_compress_internal(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) unsigned singleStream, void *workSpace, size_t wkspSize, HUF_CElt *oldHufTable, HUF_repeat *repeat, int preferRepeat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) BYTE *const ostart = (BYTE *)dst;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) BYTE *const oend = ostart + dstSize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) BYTE *op = ostart;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) U32 *count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) size_t const countSize = sizeof(U32) * (HUF_SYMBOLVALUE_MAX + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) HUF_CElt *CTable;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) size_t const CTableSize = sizeof(HUF_CElt) * (HUF_SYMBOLVALUE_MAX + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) /* checks & inits */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) if (wkspSize < sizeof(huffNodeTable) + countSize + CTableSize)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) return ERROR(GENERIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) if (!srcSize)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) return 0; /* Uncompressed (note : 1 means rle, so first byte must be correct) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) if (!dstSize)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) return 0; /* cannot fit within dst budget */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) if (srcSize > HUF_BLOCKSIZE_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) return ERROR(srcSize_wrong); /* curr block size limit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) if (huffLog > HUF_TABLELOG_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) return ERROR(tableLog_tooLarge);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) if (!maxSymbolValue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) maxSymbolValue = HUF_SYMBOLVALUE_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) if (!huffLog)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) huffLog = HUF_TABLELOG_DEFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) count = (U32 *)workSpace;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) workSpace = (BYTE *)workSpace + countSize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) wkspSize -= countSize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) CTable = (HUF_CElt *)workSpace;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) workSpace = (BYTE *)workSpace + CTableSize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) wkspSize -= CTableSize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) /* Heuristic : If we don't need to check the validity of the old table use the old table for small inputs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) /* Scan input and build symbol stats */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) CHECK_V_F(largest, FSE_count_wksp(count, &maxSymbolValue, (const BYTE *)src, srcSize, (U32 *)workSpace));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) if (largest == srcSize) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) *ostart = ((const BYTE *)src)[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) } /* single symbol, rle */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) if (largest <= (srcSize >> 7) + 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) return 0; /* Fast heuristic : not compressible enough */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) /* Check validity of previous table */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) if (repeat && *repeat == HUF_repeat_check && !HUF_validateCTable(oldHufTable, count, maxSymbolValue)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) *repeat = HUF_repeat_none;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) /* Heuristic : use existing table for small inputs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) /* Build Huffman Tree */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) CHECK_V_F(maxBits, HUF_buildCTable_wksp(CTable, count, maxSymbolValue, huffLog, workSpace, wkspSize));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) huffLog = (U32)maxBits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) /* Zero the unused symbols so we can check it for validity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) memset(CTable + maxSymbolValue + 1, 0, CTableSize - (maxSymbolValue + 1) * sizeof(HUF_CElt));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) /* Write table description header */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, CTable, maxSymbolValue, huffLog, workSpace, wkspSize));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) /* Check if using the previous table will be beneficial */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) if (repeat && *repeat != HUF_repeat_none) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, count, maxSymbolValue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) size_t const newSize = HUF_estimateCompressedSize(CTable, count, maxSymbolValue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) /* Use the new table */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) if (hSize + 12ul >= srcSize) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) op += hSize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) if (repeat) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) *repeat = HUF_repeat_none;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) if (oldHufTable) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) memcpy(oldHufTable, CTable, CTableSize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) } /* Save the new table */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, CTable);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) size_t HUF_compress1X_wksp(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) size_t wkspSize)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, NULL, NULL, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) size_t HUF_compress1X_repeat(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) size_t wkspSize, HUF_CElt *hufTable, HUF_repeat *repeat, int preferRepeat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, hufTable, repeat,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) preferRepeat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) size_t HUF_compress4X_wksp(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) size_t wkspSize)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, NULL, NULL, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) size_t HUF_compress4X_repeat(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) size_t wkspSize, HUF_CElt *hufTable, HUF_repeat *repeat, int preferRepeat)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, hufTable, repeat,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) preferRepeat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) }