^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Regression1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Description:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Salman Qazi describes the following radix-tree bug:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * In the following case, we get can get a deadlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * 0. The radix tree contains two items, one has the index 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * 1. The reader (in this case find_get_pages) takes the rcu_read_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * 2. The reader acquires slot(s) for item(s) including the index 0 item.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * 3. The non-zero index item is deleted, and as a consequence the other item
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) * is moved to the root of the tree. The place where it used to be is queued
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * for deletion after the readers finish.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * 3b. The zero item is deleted, removing it from the direct slot, it remains in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * the rcu-delayed indirect node.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) * 4. The reader looks at the index 0 slot, and finds that the page has 0 ref
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) * count
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) * 5. The reader looks at it again, hoping that the item will either be freed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) * or the ref count will increase. This never happens, as the slot it is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) * looking at will never be updated. Also, this slot can never be reclaimed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) * because the reader is holding rcu_read_lock and is in an infinite loop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) * The fix is to re-use the same "indirect" pointer case that requires a slot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) * lookup retry into a general "retry the lookup" bit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) * Running:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) * This test should run to completion in a few seconds. The above bug would
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * cause it to hang indefinitely.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) * Upstream commit:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * Not yet
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) #include <linux/gfp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) #include <linux/radix-tree.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) #include <linux/rcupdate.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) #include <stdlib.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) #include <pthread.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) #include <stdio.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) #include <assert.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) #include "regression.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) static RADIX_TREE(mt_tree, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) struct page {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) pthread_mutex_t lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) struct rcu_head rcu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) int count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) unsigned long index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) static struct page *page_alloc(int index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) struct page *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) p = malloc(sizeof(struct page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) p->count = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) p->index = index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) pthread_mutex_init(&p->lock, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) return p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) static void page_rcu_free(struct rcu_head *rcu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) struct page *p = container_of(rcu, struct page, rcu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) assert(!p->count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) pthread_mutex_destroy(&p->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) free(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) static void page_free(struct page *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) call_rcu(&p->rcu, page_rcu_free);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) static unsigned find_get_pages(unsigned long start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) unsigned int nr_pages, struct page **pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) XA_STATE(xas, &mt_tree, start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) unsigned int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) xas_for_each(&xas, page, ULONG_MAX) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) if (xas_retry(&xas, page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) pthread_mutex_lock(&page->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) if (!page->count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) /* don't actually update page refcount */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) pthread_mutex_unlock(&page->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) /* Has the page moved? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) if (unlikely(page != xas_reload(&xas)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) goto put_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) pages[ret] = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) ret++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) pthread_mutex_unlock(&page->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) put_page:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) xas_reset(&xas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) static pthread_barrier_t worker_barrier;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) static void *regression1_fn(void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) rcu_register_thread();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) if (pthread_barrier_wait(&worker_barrier) ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) PTHREAD_BARRIER_SERIAL_THREAD) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) int j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) for (j = 0; j < 1000000; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) struct page *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) p = page_alloc(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) xa_lock(&mt_tree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) radix_tree_insert(&mt_tree, 0, p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) xa_unlock(&mt_tree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) p = page_alloc(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) xa_lock(&mt_tree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) radix_tree_insert(&mt_tree, 1, p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) xa_unlock(&mt_tree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) xa_lock(&mt_tree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) p = radix_tree_delete(&mt_tree, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) pthread_mutex_lock(&p->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) p->count--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) pthread_mutex_unlock(&p->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) xa_unlock(&mt_tree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) page_free(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) xa_lock(&mt_tree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) p = radix_tree_delete(&mt_tree, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) pthread_mutex_lock(&p->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) p->count--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) pthread_mutex_unlock(&p->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) xa_unlock(&mt_tree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) page_free(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) int j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) for (j = 0; j < 100000000; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) struct page *pages[10];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) find_get_pages(0, 10, pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) rcu_unregister_thread();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) static pthread_t *threads;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) void regression1_test(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) int nr_threads;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) long arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) /* Regression #1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) printv(1, "running regression test 1, should finish in under a minute\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) nr_threads = 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) pthread_barrier_init(&worker_barrier, NULL, nr_threads);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) threads = malloc(nr_threads * sizeof(pthread_t *));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) for (i = 0; i < nr_threads; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) arg = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) if (pthread_create(&threads[i], NULL, regression1_fn, (void *)arg)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) perror("pthread_create");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) exit(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) for (i = 0; i < nr_threads; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) if (pthread_join(threads[i], NULL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) perror("pthread_join");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) exit(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) free(threads);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) printv(1, "regression test 1, done\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) }