^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-or-later
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * lib/ts_fsm.c A naive finite state machine text search approach
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Authors: Thomas Graf <tgraf@suug.ch>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * ==========================================================================
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * A finite state machine consists of n states (struct ts_fsm_token)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * representing the pattern as a finite automaton. The data is read
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * sequentially on an octet basis. Every state token specifies the number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * of recurrences and the type of value accepted which can be either a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) * specific character or ctype based set of characters. The available
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * type of recurrences include 1, (0|1), [0 n], and [1 n].
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * The algorithm differs between strict/non-strict mode specifying
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) * whether the pattern has to start at the first octet. Strict mode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) * is enabled by default and can be disabled by inserting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) * TS_FSM_HEAD_IGNORE as the first token in the chain.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) * The runtime performance of the algorithm should be around O(n),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) * however while in strict mode the average runtime can be better.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include <linux/types.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #include <linux/string.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #include <linux/ctype.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #include <linux/textsearch.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #include <linux/textsearch_fsm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) struct ts_fsm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) unsigned int ntokens;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) struct ts_fsm_token tokens[];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) /* other values derived from ctype.h */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) #define _A 0x100 /* ascii */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) #define _W 0x200 /* wildcard */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) /* Map to _ctype flags and some magic numbers */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) static const u16 token_map[TS_FSM_TYPE_MAX+1] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) [TS_FSM_SPECIFIC] = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) [TS_FSM_WILDCARD] = _W,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) [TS_FSM_CNTRL] = _C,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) [TS_FSM_LOWER] = _L,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) [TS_FSM_UPPER] = _U,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) [TS_FSM_PUNCT] = _P,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) [TS_FSM_SPACE] = _S,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) [TS_FSM_DIGIT] = _D,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) [TS_FSM_XDIGIT] = _D | _X,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) [TS_FSM_ALPHA] = _U | _L,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) [TS_FSM_ALNUM] = _U | _L | _D,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) [TS_FSM_PRINT] = _P | _U | _L | _D | _SP,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) [TS_FSM_GRAPH] = _P | _U | _L | _D,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) [TS_FSM_ASCII] = _A,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) static const u16 token_lookup_tbl[256] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) _W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C, /* 0- 3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) _W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C, /* 4- 7 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) _W|_A|_C, _W|_A|_C|_S, _W|_A|_C|_S, _W|_A|_C|_S, /* 8- 11 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) _W|_A|_C|_S, _W|_A|_C|_S, _W|_A|_C, _W|_A|_C, /* 12- 15 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) _W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C, /* 16- 19 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) _W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C, /* 20- 23 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) _W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C, /* 24- 27 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) _W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C, /* 28- 31 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) _W|_A|_S|_SP, _W|_A|_P, _W|_A|_P, _W|_A|_P, /* 32- 35 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) _W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P, /* 36- 39 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) _W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P, /* 40- 43 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) _W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P, /* 44- 47 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) _W|_A|_D, _W|_A|_D, _W|_A|_D, _W|_A|_D, /* 48- 51 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) _W|_A|_D, _W|_A|_D, _W|_A|_D, _W|_A|_D, /* 52- 55 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) _W|_A|_D, _W|_A|_D, _W|_A|_P, _W|_A|_P, /* 56- 59 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) _W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P, /* 60- 63 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) _W|_A|_P, _W|_A|_U|_X, _W|_A|_U|_X, _W|_A|_U|_X, /* 64- 67 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) _W|_A|_U|_X, _W|_A|_U|_X, _W|_A|_U|_X, _W|_A|_U, /* 68- 71 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) _W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_U, /* 72- 75 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) _W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_U, /* 76- 79 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) _W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_U, /* 80- 83 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) _W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_U, /* 84- 87 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) _W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_P, /* 88- 91 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) _W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P, /* 92- 95 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) _W|_A|_P, _W|_A|_L|_X, _W|_A|_L|_X, _W|_A|_L|_X, /* 96- 99 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) _W|_A|_L|_X, _W|_A|_L|_X, _W|_A|_L|_X, _W|_A|_L, /* 100-103 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) _W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_L, /* 104-107 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) _W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_L, /* 108-111 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) _W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_L, /* 112-115 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) _W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_L, /* 116-119 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) _W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_P, /* 120-123 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) _W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_C, /* 124-127 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) _W, _W, _W, _W, /* 128-131 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) _W, _W, _W, _W, /* 132-135 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) _W, _W, _W, _W, /* 136-139 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) _W, _W, _W, _W, /* 140-143 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) _W, _W, _W, _W, /* 144-147 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) _W, _W, _W, _W, /* 148-151 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) _W, _W, _W, _W, /* 152-155 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) _W, _W, _W, _W, /* 156-159 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) _W|_S|_SP, _W|_P, _W|_P, _W|_P, /* 160-163 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) _W|_P, _W|_P, _W|_P, _W|_P, /* 164-167 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) _W|_P, _W|_P, _W|_P, _W|_P, /* 168-171 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) _W|_P, _W|_P, _W|_P, _W|_P, /* 172-175 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) _W|_P, _W|_P, _W|_P, _W|_P, /* 176-179 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) _W|_P, _W|_P, _W|_P, _W|_P, /* 180-183 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) _W|_P, _W|_P, _W|_P, _W|_P, /* 184-187 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) _W|_P, _W|_P, _W|_P, _W|_P, /* 188-191 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) _W|_U, _W|_U, _W|_U, _W|_U, /* 192-195 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) _W|_U, _W|_U, _W|_U, _W|_U, /* 196-199 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) _W|_U, _W|_U, _W|_U, _W|_U, /* 200-203 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) _W|_U, _W|_U, _W|_U, _W|_U, /* 204-207 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) _W|_U, _W|_U, _W|_U, _W|_U, /* 208-211 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) _W|_U, _W|_U, _W|_U, _W|_P, /* 212-215 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) _W|_U, _W|_U, _W|_U, _W|_U, /* 216-219 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) _W|_U, _W|_U, _W|_U, _W|_L, /* 220-223 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) _W|_L, _W|_L, _W|_L, _W|_L, /* 224-227 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) _W|_L, _W|_L, _W|_L, _W|_L, /* 228-231 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) _W|_L, _W|_L, _W|_L, _W|_L, /* 232-235 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) _W|_L, _W|_L, _W|_L, _W|_L, /* 236-239 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) _W|_L, _W|_L, _W|_L, _W|_L, /* 240-243 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) _W|_L, _W|_L, _W|_L, _W|_P, /* 244-247 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) _W|_L, _W|_L, _W|_L, _W|_L, /* 248-251 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) _W|_L, _W|_L, _W|_L, _W|_L}; /* 252-255 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) static inline int match_token(struct ts_fsm_token *t, u8 d)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) if (t->type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) return (token_lookup_tbl[d] & t->type) != 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) return t->value == d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) static unsigned int fsm_find(struct ts_config *conf, struct ts_state *state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) struct ts_fsm *fsm = ts_config_priv(conf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) struct ts_fsm_token *cur = NULL, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) unsigned int match_start, block_idx = 0, tok_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) unsigned block_len = 0, strict, consumed = state->offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) const u8 *data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) #define GET_NEXT_BLOCK() \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) ({ consumed += block_idx; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) block_idx = 0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) block_len = conf->get_next_block(consumed, &data, conf, state); })
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) #define TOKEN_MISMATCH() \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) do { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) if (strict) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) goto no_match; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) block_idx++; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) goto startover; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) } while(0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) #define end_of_data() unlikely(block_idx >= block_len && !GET_NEXT_BLOCK())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) if (end_of_data())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) goto no_match;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) strict = fsm->tokens[0].recur != TS_FSM_HEAD_IGNORE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) startover:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) match_start = consumed + block_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) for (tok_idx = 0; tok_idx < fsm->ntokens; tok_idx++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) cur = &fsm->tokens[tok_idx];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) if (likely(tok_idx < (fsm->ntokens - 1)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) next = &fsm->tokens[tok_idx + 1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) next = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) switch (cur->recur) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) case TS_FSM_SINGLE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) if (end_of_data())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) goto no_match;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) if (!match_token(cur, data[block_idx]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) TOKEN_MISMATCH();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) case TS_FSM_PERHAPS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) if (end_of_data() ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) !match_token(cur, data[block_idx]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) case TS_FSM_MULTI:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) if (end_of_data())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) goto no_match;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) if (!match_token(cur, data[block_idx]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) TOKEN_MISMATCH();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) block_idx++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) /* fall through */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) case TS_FSM_ANY:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) if (next == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) goto found_match;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) if (end_of_data())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) while (!match_token(next, data[block_idx])) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) if (!match_token(cur, data[block_idx]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) TOKEN_MISMATCH();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) block_idx++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) if (end_of_data())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) goto no_match;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) * Optimization: Prefer small local loop over jumping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) * back and forth until garbage at head is munched.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) case TS_FSM_HEAD_IGNORE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) if (end_of_data())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) while (!match_token(next, data[block_idx])) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) * Special case, don't start over upon
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) * a mismatch, give the user the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) * chance to specify the type of data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) * allowed to be ignored.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) if (!match_token(cur, data[block_idx]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) goto no_match;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) block_idx++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) if (end_of_data())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) goto no_match;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) match_start = consumed + block_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) block_idx++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) if (end_of_data())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) goto found_match;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) no_match:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) return UINT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) found_match:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) state->offset = consumed + block_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) return match_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) static struct ts_config *fsm_init(const void *pattern, unsigned int len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) gfp_t gfp_mask, int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) int i, err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) struct ts_config *conf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) struct ts_fsm *fsm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) struct ts_fsm_token *tokens = (struct ts_fsm_token *) pattern;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) unsigned int ntokens = len / sizeof(*tokens);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) size_t priv_size = sizeof(*fsm) + len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) if (len % sizeof(struct ts_fsm_token) || ntokens < 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) goto errout;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) if (flags & TS_IGNORECASE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) goto errout;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) for (i = 0; i < ntokens; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) struct ts_fsm_token *t = &tokens[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) if (t->type > TS_FSM_TYPE_MAX || t->recur > TS_FSM_RECUR_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) goto errout;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) if (t->recur == TS_FSM_HEAD_IGNORE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) (i != 0 || i == (ntokens - 1)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) goto errout;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) conf = alloc_ts_config(priv_size, gfp_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) if (IS_ERR(conf))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) return conf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) conf->flags = flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) fsm = ts_config_priv(conf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) fsm->ntokens = ntokens;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) memcpy(fsm->tokens, pattern, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) for (i = 0; i < fsm->ntokens; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) struct ts_fsm_token *t = &fsm->tokens[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) t->type = token_map[t->type];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) return conf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) errout:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) return ERR_PTR(err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) static void *fsm_get_pattern(struct ts_config *conf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) struct ts_fsm *fsm = ts_config_priv(conf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) return fsm->tokens;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) static unsigned int fsm_get_pattern_len(struct ts_config *conf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) struct ts_fsm *fsm = ts_config_priv(conf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) return fsm->ntokens * sizeof(struct ts_fsm_token);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) static struct ts_ops fsm_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) .name = "fsm",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) .find = fsm_find,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) .init = fsm_init,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) .get_pattern = fsm_get_pattern,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) .get_pattern_len = fsm_get_pattern_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) .owner = THIS_MODULE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) .list = LIST_HEAD_INIT(fsm_ops.list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) static int __init init_fsm(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) return textsearch_register(&fsm_ops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) static void __exit exit_fsm(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) textsearch_unregister(&fsm_ops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) MODULE_LICENSE("GPL");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) module_init(init_fsm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) module_exit(exit_fsm);