| |
| |
| |
| |
| |
| |
| |
| #include <uapi/misc/habanalabs.h> |
| #include "habanalabs.h" |
| #include "../include/hw_ip/mmu/mmu_general.h" |
| |
| #include <linux/uaccess.h> |
| #include <linux/slab.h> |
| #include <linux/genalloc.h> |
| |
| #define HL_MMU_DEBUG 0 |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, |
| <------><------><------><------>u32 *ret_handle) |
| { |
| <------>struct hl_device *hdev = ctx->hdev; |
| <------>struct hl_vm *vm = &hdev->vm; |
| <------>struct hl_vm_phys_pg_pack *phys_pg_pack; |
| <------>u64 paddr = 0, total_size, num_pgs, i; |
| <------>u32 num_curr_pgs, page_size, page_shift; |
| <------>int handle, rc; |
| <------>bool contiguous; |
| |
| <------>num_curr_pgs = 0; |
| <------>page_size = hdev->asic_prop.dram_page_size; |
| <------>page_shift = __ffs(page_size); |
| <------>num_pgs = (args->alloc.mem_size + (page_size - 1)) >> page_shift; |
| <------>total_size = num_pgs << page_shift; |
| |
| <------>if (!total_size) { |
| <------><------>dev_err(hdev->dev, "Cannot allocate 0 bytes\n"); |
| <------><------>return -EINVAL; |
| <------>} |
| |
| <------>contiguous = args->flags & HL_MEM_CONTIGUOUS; |
| |
| <------>if (contiguous) { |
| <------><------>paddr = (u64) gen_pool_alloc(vm->dram_pg_pool, total_size); |
| <------><------>if (!paddr) { |
| <------><------><------>dev_err(hdev->dev, |
| <------><------><------><------>"failed to allocate %llu contiguous pages with total size of %llu\n", |
| <------><------><------><------>num_pgs, total_size); |
| <------><------><------>return -ENOMEM; |
| <------><------>} |
| <------>} |
| |
| <------>phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL); |
| <------>if (!phys_pg_pack) { |
| <------><------>rc = -ENOMEM; |
| <------><------>goto pages_pack_err; |
| <------>} |
| |
| <------>phys_pg_pack->vm_type = VM_TYPE_PHYS_PACK; |
| <------>phys_pg_pack->asid = ctx->asid; |
| <------>phys_pg_pack->npages = num_pgs; |
| <------>phys_pg_pack->page_size = page_size; |
| <------>phys_pg_pack->total_size = total_size; |
| <------>phys_pg_pack->flags = args->flags; |
| <------>phys_pg_pack->contiguous = contiguous; |
| |
| <------>phys_pg_pack->pages = kvmalloc_array(num_pgs, sizeof(u64), GFP_KERNEL); |
| <------>if (ZERO_OR_NULL_PTR(phys_pg_pack->pages)) { |
| <------><------>rc = -ENOMEM; |
| <------><------>goto pages_arr_err; |
| <------>} |
| |
| <------>if (phys_pg_pack->contiguous) { |
| <------><------>for (i = 0 ; i < num_pgs ; i++) |
| <------><------><------>phys_pg_pack->pages[i] = paddr + i * page_size; |
| <------>} else { |
| <------><------>for (i = 0 ; i < num_pgs ; i++) { |
| <------><------><------>phys_pg_pack->pages[i] = (u64) gen_pool_alloc( |
| <------><------><------><------><------><------><------>vm->dram_pg_pool, |
| <------><------><------><------><------><------><------>page_size); |
| <------><------><------>if (!phys_pg_pack->pages[i]) { |
| <------><------><------><------>dev_err(hdev->dev, |
| <------><------><------><------><------>"Failed to allocate device memory (out of memory)\n"); |
| <------><------><------><------>rc = -ENOMEM; |
| <------><------><------><------>goto page_err; |
| <------><------><------>} |
| |
| <------><------><------>num_curr_pgs++; |
| <------><------>} |
| <------>} |
| |
| <------>spin_lock(&vm->idr_lock); |
| <------>handle = idr_alloc(&vm->phys_pg_pack_handles, phys_pg_pack, 1, 0, |
| <------><------><------><------>GFP_ATOMIC); |
| <------>spin_unlock(&vm->idr_lock); |
| |
| <------>if (handle < 0) { |
| <------><------>dev_err(hdev->dev, "Failed to get handle for page\n"); |
| <------><------>rc = -EFAULT; |
| <------><------>goto idr_err; |
| <------>} |
| |
| <------>for (i = 0 ; i < num_pgs ; i++) |
| <------><------>kref_get(&vm->dram_pg_pool_refcount); |
| |
| <------>phys_pg_pack->handle = handle; |
| |
| <------>atomic64_add(phys_pg_pack->total_size, &ctx->dram_phys_mem); |
| <------>atomic64_add(phys_pg_pack->total_size, &hdev->dram_used_mem); |
| |
| <------>*ret_handle = handle; |
| |
| <------>return 0; |
| |
| idr_err: |
| page_err: |
| <------>if (!phys_pg_pack->contiguous) |
| <------><------>for (i = 0 ; i < num_curr_pgs ; i++) |
| <------><------><------>gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[i], |
| <------><------><------><------><------>page_size); |
| |
| <------>kvfree(phys_pg_pack->pages); |
| pages_arr_err: |
| <------>kfree(phys_pg_pack); |
| pages_pack_err: |
| <------>if (contiguous) |
| <------><------>gen_pool_free(vm->dram_pg_pool, paddr, total_size); |
| |
| <------>return rc; |
| } |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size, |
| <------><------><------><------>struct hl_userptr **p_userptr) |
| { |
| <------>struct hl_userptr *userptr; |
| <------>int rc; |
| |
| <------>userptr = kzalloc(sizeof(*userptr), GFP_KERNEL); |
| <------>if (!userptr) { |
| <------><------>rc = -ENOMEM; |
| <------><------>goto userptr_err; |
| <------>} |
| |
| <------>rc = hl_pin_host_memory(hdev, addr, size, userptr); |
| <------>if (rc) { |
| <------><------>dev_err(hdev->dev, "Failed to pin host memory\n"); |
| <------><------>goto pin_err; |
| <------>} |
| |
| <------>rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl, |
| <------><------><------><------><------>userptr->sgt->nents, DMA_BIDIRECTIONAL); |
| <------>if (rc) { |
| <------><------>dev_err(hdev->dev, "failed to map sgt with DMA region\n"); |
| <------><------>goto dma_map_err; |
| <------>} |
| |
| <------>userptr->dma_mapped = true; |
| <------>userptr->dir = DMA_BIDIRECTIONAL; |
| <------>userptr->vm_type = VM_TYPE_USERPTR; |
| |
| <------>*p_userptr = userptr; |
| |
| <------>return 0; |
| |
| dma_map_err: |
| <------>hl_unpin_host_memory(hdev, userptr); |
| pin_err: |
| <------>kfree(userptr); |
| userptr_err: |
| |
| <------>return rc; |
| } |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| static void dma_unmap_host_va(struct hl_device *hdev, |
| <------><------><------><------>struct hl_userptr *userptr) |
| { |
| <------>hl_unpin_host_memory(hdev, userptr); |
| <------>kfree(userptr); |
| } |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| static void dram_pg_pool_do_release(struct kref *ref) |
| { |
| <------>struct hl_vm *vm = container_of(ref, struct hl_vm, |
| <------><------><------>dram_pg_pool_refcount); |
| |
| <------> |
| <------> * free the idr here as only here we know for sure that there are no |
| <------> * allocated physical pages and hence there are no handles in use |
| <------> */ |
| <------>idr_destroy(&vm->phys_pg_pack_handles); |
| <------>gen_pool_destroy(vm->dram_pg_pool); |
| } |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| static void free_phys_pg_pack(struct hl_device *hdev, |
| <------><------><------><------>struct hl_vm_phys_pg_pack *phys_pg_pack) |
| { |
| <------>struct hl_vm *vm = &hdev->vm; |
| <------>u64 i; |
| |
| <------>if (!phys_pg_pack->created_from_userptr) { |
| <------><------>if (phys_pg_pack->contiguous) { |
| <------><------><------>gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[0], |
| <------><------><------><------><------>phys_pg_pack->total_size); |
| |
| <------><------><------>for (i = 0; i < phys_pg_pack->npages ; i++) |
| <------><------><------><------>kref_put(&vm->dram_pg_pool_refcount, |
| <------><------><------><------><------>dram_pg_pool_do_release); |
| <------><------>} else { |
| <------><------><------>for (i = 0 ; i < phys_pg_pack->npages ; i++) { |
| <------><------><------><------>gen_pool_free(vm->dram_pg_pool, |
| <------><------><------><------><------><------>phys_pg_pack->pages[i], |
| <------><------><------><------><------><------>phys_pg_pack->page_size); |
| <------><------><------><------>kref_put(&vm->dram_pg_pool_refcount, |
| <------><------><------><------><------>dram_pg_pool_do_release); |
| <------><------><------>} |
| <------><------>} |
| <------>} |
| |
| <------>kvfree(phys_pg_pack->pages); |
| <------>kfree(phys_pg_pack); |
| } |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| static int free_device_memory(struct hl_ctx *ctx, u32 handle) |
| { |
| <------>struct hl_device *hdev = ctx->hdev; |
| <------>struct hl_vm *vm = &hdev->vm; |
| <------>struct hl_vm_phys_pg_pack *phys_pg_pack; |
| |
| <------>spin_lock(&vm->idr_lock); |
| <------>phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle); |
| <------>if (phys_pg_pack) { |
| <------><------>if (atomic_read(&phys_pg_pack->mapping_cnt) > 0) { |
| <------><------><------>dev_err(hdev->dev, "handle %u is mapped, cannot free\n", |
| <------><------><------><------>handle); |
| <------><------><------>spin_unlock(&vm->idr_lock); |
| <------><------><------>return -EINVAL; |
| <------><------>} |
| |
| <------><------> |
| <------><------> * must remove from idr before the freeing of the physical |
| <------><------> * pages as the refcount of the pool is also the trigger of the |
| <------><------> * idr destroy |
| <------><------> */ |
| <------><------>idr_remove(&vm->phys_pg_pack_handles, handle); |
| <------><------>spin_unlock(&vm->idr_lock); |
| |
| <------><------>atomic64_sub(phys_pg_pack->total_size, &ctx->dram_phys_mem); |
| <------><------>atomic64_sub(phys_pg_pack->total_size, &hdev->dram_used_mem); |
| |
| <------><------>free_phys_pg_pack(hdev, phys_pg_pack); |
| <------>} else { |
| <------><------>spin_unlock(&vm->idr_lock); |
| <------><------>dev_err(hdev->dev, |
| <------><------><------>"free device memory failed, no match for handle %u\n", |
| <------><------><------>handle); |
| <------><------>return -EINVAL; |
| <------>} |
| |
| <------>return 0; |
| } |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| static void clear_va_list_locked(struct hl_device *hdev, |
| <------><------>struct list_head *va_list) |
| { |
| <------>struct hl_vm_va_block *va_block, *tmp; |
| |
| <------>list_for_each_entry_safe(va_block, tmp, va_list, node) { |
| <------><------>list_del(&va_block->node); |
| <------><------>kfree(va_block); |
| <------>} |
| } |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| static void print_va_list_locked(struct hl_device *hdev, |
| <------><------>struct list_head *va_list) |
| { |
| #if HL_MMU_DEBUG |
| <------>struct hl_vm_va_block *va_block; |
| |
| <------>dev_dbg(hdev->dev, "print va list:\n"); |
| |
| <------>list_for_each_entry(va_block, va_list, node) |
| <------><------>dev_dbg(hdev->dev, |
| <------><------><------>"va block, start: 0x%llx, end: 0x%llx, size: %llu\n", |
| <------><------><------>va_block->start, va_block->end, va_block->size); |
| #endif |
| } |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| static void merge_va_blocks_locked(struct hl_device *hdev, |
| <------><------>struct list_head *va_list, struct hl_vm_va_block *va_block) |
| { |
| <------>struct hl_vm_va_block *prev, *next; |
| |
| <------>prev = list_prev_entry(va_block, node); |
| <------>if (&prev->node != va_list && prev->end + 1 == va_block->start) { |
| <------><------>prev->end = va_block->end; |
| <------><------>prev->size = prev->end - prev->start; |
| <------><------>list_del(&va_block->node); |
| <------><------>kfree(va_block); |
| <------><------>va_block = prev; |
| <------>} |
| |
| <------>next = list_next_entry(va_block, node); |
| <------>if (&next->node != va_list && va_block->end + 1 == next->start) { |
| <------><------>next->start = va_block->start; |
| <------><------>next->size = next->end - next->start; |
| <------><------>list_del(&va_block->node); |
| <------><------>kfree(va_block); |
| <------>} |
| } |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| static int add_va_block_locked(struct hl_device *hdev, |
| <------><------>struct list_head *va_list, u64 start, u64 end) |
| { |
| <------>struct hl_vm_va_block *va_block, *res = NULL; |
| <------>u64 size = end - start; |
| |
| <------>print_va_list_locked(hdev, va_list); |
| |
| <------>list_for_each_entry(va_block, va_list, node) { |
| <------><------> |
| <------><------>if (hl_mem_area_crosses_range(start, size, va_block->start, |
| <------><------><------><------>va_block->end)) { |
| <------><------><------>dev_err(hdev->dev, |
| <------><------><------><------>"block crossing ranges at start 0x%llx, end 0x%llx\n", |
| <------><------><------><------>va_block->start, va_block->end); |
| <------><------><------>return -EINVAL; |
| <------><------>} |
| |
| <------><------>if (va_block->end < start) |
| <------><------><------>res = va_block; |
| <------>} |
| |
| <------>va_block = kmalloc(sizeof(*va_block), GFP_KERNEL); |
| <------>if (!va_block) |
| <------><------>return -ENOMEM; |
| |
| <------>va_block->start = start; |
| <------>va_block->end = end; |
| <------>va_block->size = size; |
| |
| <------>if (!res) |
| <------><------>list_add(&va_block->node, va_list); |
| <------>else |
| <------><------>list_add(&va_block->node, &res->node); |
| |
| <------>merge_va_blocks_locked(hdev, va_list, va_block); |
| |
| <------>print_va_list_locked(hdev, va_list); |
| |
| <------>return 0; |
| } |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| static inline int add_va_block(struct hl_device *hdev, |
| <------><------>struct hl_va_range *va_range, u64 start, u64 end) |
| { |
| <------>int rc; |
| |
| <------>mutex_lock(&va_range->lock); |
| <------>rc = add_va_block_locked(hdev, &va_range->list, start, end); |
| <------>mutex_unlock(&va_range->lock); |
| |
| <------>return rc; |
| } |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| static u64 get_va_block(struct hl_device *hdev, struct hl_va_range *va_range, |
| <------><------><------>u64 size, u64 hint_addr, u32 va_block_align) |
| { |
| <------>struct hl_vm_va_block *va_block, *new_va_block = NULL; |
| <------>u64 valid_start, valid_size, prev_start, prev_end, align_mask, |
| <------><------>res_valid_start = 0, res_valid_size = 0; |
| <------>bool add_prev = false; |
| |
| <------>align_mask = ~((u64)va_block_align - 1); |
| |
| <------> |
| <------>if (hint_addr & (va_block_align - 1)) |
| <------><------>hint_addr = 0; |
| |
| <------>mutex_lock(&va_range->lock); |
| |
| <------>print_va_list_locked(hdev, &va_range->list); |
| |
| <------>list_for_each_entry(va_block, &va_range->list, node) { |
| <------><------> |
| <------><------>valid_start = va_block->start; |
| |
| <------><------>if (valid_start & (va_block_align - 1)) { |
| <------><------><------>valid_start &= align_mask; |
| <------><------><------>valid_start += va_block_align; |
| <------><------><------>if (valid_start > va_block->end) |
| <------><------><------><------>continue; |
| <------><------>} |
| |
| <------><------>valid_size = va_block->end - valid_start; |
| |
| <------><------>if (valid_size >= size && |
| <------><------><------>(!new_va_block || valid_size < res_valid_size)) { |
| <------><------><------>new_va_block = va_block; |
| <------><------><------>res_valid_start = valid_start; |
| <------><------><------>res_valid_size = valid_size; |
| <------><------>} |
| |
| <------><------>if (hint_addr && hint_addr >= valid_start && |
| <------><------><------><------>((hint_addr + size) <= va_block->end)) { |
| <------><------><------>new_va_block = va_block; |
| <------><------><------>res_valid_start = hint_addr; |
| <------><------><------>res_valid_size = valid_size; |
| <------><------><------>break; |
| <------><------>} |
| <------>} |
| |
| <------>if (!new_va_block) { |
| <------><------>dev_err(hdev->dev, "no available va block for size %llu\n", |
| <------><------><------><------>size); |
| <------><------>goto out; |
| <------>} |
| |
| <------>if (res_valid_start > new_va_block->start) { |
| <------><------>prev_start = new_va_block->start; |
| <------><------>prev_end = res_valid_start - 1; |
| |
| <------><------>new_va_block->start = res_valid_start; |
| <------><------>new_va_block->size = res_valid_size; |
| |
| <------><------>add_prev = true; |
| <------>} |
| |
| <------>if (new_va_block->size > size) { |
| <------><------>new_va_block->start += size; |
| <------><------>new_va_block->size = new_va_block->end - new_va_block->start; |
| <------>} else { |
| <------><------>list_del(&new_va_block->node); |
| <------><------>kfree(new_va_block); |
| <------>} |
| |
| <------>if (add_prev) |
| <------><------>add_va_block_locked(hdev, &va_range->list, prev_start, |
| <------><------><------><------>prev_end); |
| |
| <------>print_va_list_locked(hdev, &va_range->list); |
| out: |
| <------>mutex_unlock(&va_range->lock); |
| |
| <------>return res_valid_start; |
| } |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr) |
| { |
| <------>*dma_addr = sg_dma_address(sg); |
| |
| <------>return ((((*dma_addr) & (PAGE_SIZE - 1)) + sg_dma_len(sg)) + |
| <------><------><------>(PAGE_SIZE - 1)) >> PAGE_SHIFT; |
| } |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx, |
| <------><------><------><------>struct hl_userptr *userptr, |
| <------><------><------><------>struct hl_vm_phys_pg_pack **pphys_pg_pack) |
| { |
| <------>struct hl_vm_phys_pg_pack *phys_pg_pack; |
| <------>struct scatterlist *sg; |
| <------>dma_addr_t dma_addr; |
| <------>u64 page_mask, total_npages; |
| <------>u32 npages, page_size = PAGE_SIZE, |
| <------><------>huge_page_size = ctx->hdev->asic_prop.pmmu_huge.page_size; |
| <------>bool first = true, is_huge_page_opt = true; |
| <------>int rc, i, j; |
| <------>u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size); |
| |
| <------>phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL); |
| <------>if (!phys_pg_pack) |
| <------><------>return -ENOMEM; |
| |
| <------>phys_pg_pack->vm_type = userptr->vm_type; |
| <------>phys_pg_pack->created_from_userptr = true; |
| <------>phys_pg_pack->asid = ctx->asid; |
| <------>atomic_set(&phys_pg_pack->mapping_cnt, 1); |
| |
| <------> |
| <------> * sizes is at least 2MB, we can use huge page mapping. |
| <------> * We limit the 2MB optimization to this condition, |
| <------> * since later on we acquire the related VA range as one |
| <------> * consecutive block. |
| <------> */ |
| <------>total_npages = 0; |
| <------>for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) { |
| <------><------>npages = get_sg_info(sg, &dma_addr); |
| |
| <------><------>total_npages += npages; |
| |
| <------><------>if ((npages % pgs_in_huge_page) || |
| <------><------><------><------><------>(dma_addr & (huge_page_size - 1))) |
| <------><------><------>is_huge_page_opt = false; |
| <------>} |
| |
| <------>if (is_huge_page_opt) { |
| <------><------>page_size = huge_page_size; |
| <------><------>do_div(total_npages, pgs_in_huge_page); |
| <------>} |
| |
| <------>page_mask = ~(((u64) page_size) - 1); |
| |
| <------>phys_pg_pack->pages = kvmalloc_array(total_npages, sizeof(u64), |
| <------><------><------><------><------><------>GFP_KERNEL); |
| <------>if (ZERO_OR_NULL_PTR(phys_pg_pack->pages)) { |
| <------><------>rc = -ENOMEM; |
| <------><------>goto page_pack_arr_mem_err; |
| <------>} |
| |
| <------>phys_pg_pack->npages = total_npages; |
| <------>phys_pg_pack->page_size = page_size; |
| <------>phys_pg_pack->total_size = total_npages * page_size; |
| |
| <------>j = 0; |
| <------>for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) { |
| <------><------>npages = get_sg_info(sg, &dma_addr); |
| |
| <------><------> |
| <------><------>if (first) { |
| <------><------><------>first = false; |
| <------><------><------>phys_pg_pack->offset = dma_addr & (page_size - 1); |
| <------><------><------>dma_addr &= page_mask; |
| <------><------>} |
| |
| <------><------>while (npages) { |
| <------><------><------>phys_pg_pack->pages[j++] = dma_addr; |
| <------><------><------>dma_addr += page_size; |
| |
| <------><------><------>if (is_huge_page_opt) |
| <------><------><------><------>npages -= pgs_in_huge_page; |
| <------><------><------>else |
| <------><------><------><------>npages--; |
| <------><------>} |
| <------>} |
| |
| <------>*pphys_pg_pack = phys_pg_pack; |
| |
| <------>return 0; |
| |
| page_pack_arr_mem_err: |
| <------>kfree(phys_pg_pack); |
| |
| <------>return rc; |
| } |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, |
| <------><------><------><------>struct hl_vm_phys_pg_pack *phys_pg_pack) |
| { |
| <------>struct hl_device *hdev = ctx->hdev; |
| <------>u64 next_vaddr = vaddr, paddr, mapped_pg_cnt = 0, i; |
| <------>u32 page_size = phys_pg_pack->page_size; |
| <------>int rc = 0; |
| |
| <------>for (i = 0 ; i < phys_pg_pack->npages ; i++) { |
| <------><------>paddr = phys_pg_pack->pages[i]; |
| |
| <------><------>rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size, |
| <------><------><------><------>(i + 1) == phys_pg_pack->npages); |
| <------><------>if (rc) { |
| <------><------><------>dev_err(hdev->dev, |
| <------><------><------><------>"map failed for handle %u, npages: %llu, mapped: %llu", |
| <------><------><------><------>phys_pg_pack->handle, phys_pg_pack->npages, |
| <------><------><------><------>mapped_pg_cnt); |
| <------><------><------>goto err; |
| <------><------>} |
| |
| <------><------>mapped_pg_cnt++; |
| <------><------>next_vaddr += page_size; |
| <------>} |
| |
| <------>return 0; |
| |
| err: |
| <------>next_vaddr = vaddr; |
| <------>for (i = 0 ; i < mapped_pg_cnt ; i++) { |
| <------><------>if (hl_mmu_unmap(ctx, next_vaddr, page_size, |
| <------><------><------><------><------>(i + 1) == mapped_pg_cnt)) |
| <------><------><------>dev_warn_ratelimited(hdev->dev, |
| <------><------><------><------>"failed to unmap handle %u, va: 0x%llx, pa: 0x%llx, page size: %u\n", |
| <------><------><------><------><------>phys_pg_pack->handle, next_vaddr, |
| <------><------><------><------><------>phys_pg_pack->pages[i], page_size); |
| |
| <------><------>next_vaddr += page_size; |
| <------>} |
| |
| <------>return rc; |
| } |
| |
| |
| |
| |
| |
| |
| |
| static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, |
| <------><------><------><------>struct hl_vm_phys_pg_pack *phys_pg_pack) |
| { |
| <------>struct hl_device *hdev = ctx->hdev; |
| <------>u64 next_vaddr, i; |
| <------>u32 page_size; |
| |
| <------>page_size = phys_pg_pack->page_size; |
| <------>next_vaddr = vaddr; |
| |
| <------>for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) { |
| <------><------>if (hl_mmu_unmap(ctx, next_vaddr, page_size, |
| <------><------><------><------> (i + 1) == phys_pg_pack->npages)) |
| <------><------><------>dev_warn_ratelimited(hdev->dev, |
| <------><------><------>"unmap failed for vaddr: 0x%llx\n", next_vaddr); |
| |
| <------><------> |
| <------><------> * unmapping on Palladium can be really long, so avoid a CPU |
| <------><------> * soft lockup bug by sleeping a little between unmapping pages |
| <------><------> */ |
| <------><------>if (hdev->pldm) |
| <------><------><------>usleep_range(500, 1000); |
| <------>} |
| } |
| |
| static int get_paddr_from_handle(struct hl_ctx *ctx, struct hl_mem_in *args, |
| <------><------><------><------>u64 *paddr) |
| { |
| <------>struct hl_device *hdev = ctx->hdev; |
| <------>struct hl_vm *vm = &hdev->vm; |
| <------>struct hl_vm_phys_pg_pack *phys_pg_pack; |
| <------>u32 handle; |
| |
| <------>handle = lower_32_bits(args->map_device.handle); |
| <------>spin_lock(&vm->idr_lock); |
| <------>phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle); |
| <------>if (!phys_pg_pack) { |
| <------><------>spin_unlock(&vm->idr_lock); |
| <------><------>dev_err(hdev->dev, "no match for handle %u\n", handle); |
| <------><------>return -EINVAL; |
| <------>} |
| |
| <------>*paddr = phys_pg_pack->pages[0]; |
| |
| <------>spin_unlock(&vm->idr_lock); |
| |
| <------>return 0; |
| } |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, |
| <------><------>u64 *device_addr) |
| { |
| <------>struct hl_device *hdev = ctx->hdev; |
| <------>struct hl_vm *vm = &hdev->vm; |
| <------>struct hl_vm_phys_pg_pack *phys_pg_pack; |
| <------>struct hl_userptr *userptr = NULL; |
| <------>struct hl_vm_hash_node *hnode; |
| <------>struct hl_va_range *va_range; |
| <------>enum vm_type_t *vm_type; |
| <------>u64 ret_vaddr, hint_addr; |
| <------>u32 handle = 0, va_block_align; |
| <------>int rc; |
| <------>bool is_userptr = args->flags & HL_MEM_USERPTR; |
| |
| <------> |
| <------>*device_addr = 0; |
| |
| <------>if (is_userptr) { |
| <------><------>u64 addr = args->map_host.host_virt_addr, |
| <------><------><------>size = args->map_host.mem_size; |
| <------><------>u32 page_size = hdev->asic_prop.pmmu.page_size, |
| <------><------><------>huge_page_size = hdev->asic_prop.pmmu_huge.page_size; |
| |
| <------><------>rc = dma_map_host_va(hdev, addr, size, &userptr); |
| <------><------>if (rc) { |
| <------><------><------>dev_err(hdev->dev, "failed to get userptr from va\n"); |
| <------><------><------>return rc; |
| <------><------>} |
| |
| <------><------>rc = init_phys_pg_pack_from_userptr(ctx, userptr, |
| <------><------><------><------>&phys_pg_pack); |
| <------><------>if (rc) { |
| <------><------><------>dev_err(hdev->dev, |
| <------><------><------><------>"unable to init page pack for vaddr 0x%llx\n", |
| <------><------><------><------>addr); |
| <------><------><------>goto init_page_pack_err; |
| <------><------>} |
| |
| <------><------>vm_type = (enum vm_type_t *) userptr; |
| <------><------>hint_addr = args->map_host.hint_addr; |
| <------><------>handle = phys_pg_pack->handle; |
| |
| <------><------> |
| <------><------>if (phys_pg_pack->page_size == page_size) { |
| <------><------><------>va_range = ctx->host_va_range; |
| |
| <------><------><------> |
| <------><------><------> * huge page alignment may be needed in case of regular |
| <------><------><------> * page mapping, depending on the host VA alignment |
| <------><------><------> */ |
| <------><------><------>if (addr & (huge_page_size - 1)) |
| <------><------><------><------>va_block_align = page_size; |
| <------><------><------>else |
| <------><------><------><------>va_block_align = huge_page_size; |
| <------><------>} else { |
| <------><------><------> |
| <------><------><------> * huge page alignment is needed in case of huge page |
| <------><------><------> * mapping |
| <------><------><------> */ |
| <------><------><------>va_range = ctx->host_huge_va_range; |
| <------><------><------>va_block_align = huge_page_size; |
| <------><------>} |
| <------>} else { |
| <------><------>handle = lower_32_bits(args->map_device.handle); |
| |
| <------><------>spin_lock(&vm->idr_lock); |
| <------><------>phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle); |
| <------><------>if (!phys_pg_pack) { |
| <------><------><------>spin_unlock(&vm->idr_lock); |
| <------><------><------>dev_err(hdev->dev, |
| <------><------><------><------>"no match for handle %u\n", handle); |
| <------><------><------>return -EINVAL; |
| <------><------>} |
| |
| <------><------> |
| <------><------>atomic_inc(&phys_pg_pack->mapping_cnt); |
| |
| <------><------>spin_unlock(&vm->idr_lock); |
| |
| <------><------>vm_type = (enum vm_type_t *) phys_pg_pack; |
| |
| <------><------>hint_addr = args->map_device.hint_addr; |
| |
| <------><------> |
| <------><------>va_range = ctx->dram_va_range; |
| <------><------>va_block_align = hdev->asic_prop.dmmu.page_size; |
| <------>} |
| |
| <------> |
| <------> * relevant for mapping device physical memory only, as host memory is |
| <------> * implicitly shared |
| <------> */ |
| <------>if (!is_userptr && !(phys_pg_pack->flags & HL_MEM_SHARED) && |
| <------><------><------>phys_pg_pack->asid != ctx->asid) { |
| <------><------>dev_err(hdev->dev, |
| <------><------><------>"Failed to map memory, handle %u is not shared\n", |
| <------><------><------>handle); |
| <------><------>rc = -EPERM; |
| <------><------>goto shared_err; |
| <------>} |
| |
| <------>hnode = kzalloc(sizeof(*hnode), GFP_KERNEL); |
| <------>if (!hnode) { |
| <------><------>rc = -ENOMEM; |
| <------><------>goto hnode_err; |
| <------>} |
| |
| <------>ret_vaddr = get_va_block(hdev, va_range, phys_pg_pack->total_size, |
| <------><------><------><------><------>hint_addr, va_block_align); |
| <------>if (!ret_vaddr) { |
| <------><------>dev_err(hdev->dev, "no available va block for handle %u\n", |
| <------><------><------><------>handle); |
| <------><------>rc = -ENOMEM; |
| <------><------>goto va_block_err; |
| <------>} |
| |
| <------>mutex_lock(&ctx->mmu_lock); |
| |
| <------>rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack); |
| <------>if (rc) { |
| <------><------>mutex_unlock(&ctx->mmu_lock); |
| <------><------>dev_err(hdev->dev, "mapping page pack failed for handle %u\n", |
| <------><------><------><------>handle); |
| <------><------>goto map_err; |
| <------>} |
| |
| <------>rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, false, *vm_type); |
| |
| <------>mutex_unlock(&ctx->mmu_lock); |
| |
| <------>if (rc) { |
| <------><------>dev_err(hdev->dev, |
| <------><------><------>"mapping handle %u failed due to MMU cache invalidation\n", |
| <------><------><------>handle); |
| <------><------>goto map_err; |
| <------>} |
| |
| <------>ret_vaddr += phys_pg_pack->offset; |
| |
| <------>hnode->ptr = vm_type; |
| <------>hnode->vaddr = ret_vaddr; |
| |
| <------>mutex_lock(&ctx->mem_hash_lock); |
| <------>hash_add(ctx->mem_hash, &hnode->node, ret_vaddr); |
| <------>mutex_unlock(&ctx->mem_hash_lock); |
| |
| <------>*device_addr = ret_vaddr; |
| |
| <------>if (is_userptr) |
| <------><------>free_phys_pg_pack(hdev, phys_pg_pack); |
| |
| <------>return 0; |
| |
| map_err: |
| <------>if (add_va_block(hdev, va_range, ret_vaddr, |
| <------><------><------><------>ret_vaddr + phys_pg_pack->total_size - 1)) |
| <------><------>dev_warn(hdev->dev, |
| <------><------><------>"release va block failed for handle 0x%x, vaddr: 0x%llx\n", |
| <------><------><------><------>handle, ret_vaddr); |
| |
| va_block_err: |
| <------>kfree(hnode); |
| hnode_err: |
| shared_err: |
| <------>atomic_dec(&phys_pg_pack->mapping_cnt); |
| <------>if (is_userptr) |
| <------><------>free_phys_pg_pack(hdev, phys_pg_pack); |
| init_page_pack_err: |
| <------>if (is_userptr) |
| <------><------>dma_unmap_host_va(hdev, userptr); |
| |
| <------>return rc; |
| } |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free) |
| { |
| <------>struct hl_device *hdev = ctx->hdev; |
| <------>struct hl_vm_phys_pg_pack *phys_pg_pack = NULL; |
| <------>struct hl_vm_hash_node *hnode = NULL; |
| <------>struct hl_userptr *userptr = NULL; |
| <------>struct hl_va_range *va_range; |
| <------>enum vm_type_t *vm_type; |
| <------>bool is_userptr; |
| <------>int rc = 0; |
| |
| <------> |
| <------>mutex_lock(&ctx->mem_hash_lock); |
| <------>hash_for_each_possible(ctx->mem_hash, hnode, node, (unsigned long)vaddr) |
| <------><------>if (vaddr == hnode->vaddr) |
| <------><------><------>break; |
| |
| <------>if (!hnode) { |
| <------><------>mutex_unlock(&ctx->mem_hash_lock); |
| <------><------>dev_err(hdev->dev, |
| <------><------><------>"unmap failed, no mem hnode for vaddr 0x%llx\n", |
| <------><------><------>vaddr); |
| <------><------>return -EINVAL; |
| <------>} |
| |
| <------>hash_del(&hnode->node); |
| <------>mutex_unlock(&ctx->mem_hash_lock); |
| |
| <------>vm_type = hnode->ptr; |
| |
| <------>if (*vm_type == VM_TYPE_USERPTR) { |
| <------><------>is_userptr = true; |
| <------><------>userptr = hnode->ptr; |
| <------><------>rc = init_phys_pg_pack_from_userptr(ctx, userptr, |
| <------><------><------><------><------><------><------>&phys_pg_pack); |
| <------><------>if (rc) { |
| <------><------><------>dev_err(hdev->dev, |
| <------><------><------><------>"unable to init page pack for vaddr 0x%llx\n", |
| <------><------><------><------>vaddr); |
| <------><------><------>goto vm_type_err; |
| <------><------>} |
| |
| <------><------>if (phys_pg_pack->page_size == |
| <------><------><------><------><------>hdev->asic_prop.pmmu.page_size) |
| <------><------><------>va_range = ctx->host_va_range; |
| <------><------>else |
| <------><------><------>va_range = ctx->host_huge_va_range; |
| <------>} else if (*vm_type == VM_TYPE_PHYS_PACK) { |
| <------><------>is_userptr = false; |
| <------><------>va_range = ctx->dram_va_range; |
| <------><------>phys_pg_pack = hnode->ptr; |
| <------>} else { |
| <------><------>dev_warn(hdev->dev, |
| <------><------><------>"unmap failed, unknown vm desc for vaddr 0x%llx\n", |
| <------><------><------><------>vaddr); |
| <------><------>rc = -EFAULT; |
| <------><------>goto vm_type_err; |
| <------>} |
| |
| <------>if (atomic_read(&phys_pg_pack->mapping_cnt) == 0) { |
| <------><------>dev_err(hdev->dev, "vaddr 0x%llx is not mapped\n", vaddr); |
| <------><------>rc = -EINVAL; |
| <------><------>goto mapping_cnt_err; |
| <------>} |
| |
| <------>vaddr &= ~(((u64) phys_pg_pack->page_size) - 1); |
| |
| <------>mutex_lock(&ctx->mmu_lock); |
| |
| <------>unmap_phys_pg_pack(ctx, vaddr, phys_pg_pack); |
| |
| <------> |
| <------> * During context free this function is called in a loop to clean all |
| <------> * the context mappings. Hence the cache invalidation can be called once |
| <------> * at the loop end rather than for each iteration |
| <------> */ |
| <------>if (!ctx_free) |
| <------><------>rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, true, |
| <------><------><------><------><------><------><------><------>*vm_type); |
| |
| <------>mutex_unlock(&ctx->mmu_lock); |
| |
| <------> |
| <------> * If the context is closing we don't need to check for the MMU cache |
| <------> * invalidation return code and update the VA free list as in this flow |
| <------> * we invalidate the MMU cache outside of this unmap function and the VA |
| <------> * free list will be freed anyway. |
| <------> */ |
| <------>if (!ctx_free) { |
| <------><------>int tmp_rc; |
| |
| <------><------>if (rc) |
| <------><------><------>dev_err(hdev->dev, |
| <------><------><------><------>"unmapping vaddr 0x%llx failed due to MMU cache invalidation\n", |
| <------><------><------><------>vaddr); |
| |
| <------><------>tmp_rc = add_va_block(hdev, va_range, vaddr, |
| <------><------><------><------><------>vaddr + phys_pg_pack->total_size - 1); |
| <------><------>if (tmp_rc) { |
| <------><------><------>dev_warn(hdev->dev, |
| <------><------><------><------><------>"add va block failed for vaddr: 0x%llx\n", |
| <------><------><------><------><------>vaddr); |
| <------><------><------>if (!rc) |
| <------><------><------><------>rc = tmp_rc; |
| <------><------>} |
| <------>} |
| |
| <------>atomic_dec(&phys_pg_pack->mapping_cnt); |
| <------>kfree(hnode); |
| |
| <------>if (is_userptr) { |
| <------><------>free_phys_pg_pack(hdev, phys_pg_pack); |
| <------><------>dma_unmap_host_va(hdev, userptr); |
| <------>} |
| |
| <------>return rc; |
| |
| mapping_cnt_err: |
| <------>if (is_userptr) |
| <------><------>free_phys_pg_pack(hdev, phys_pg_pack); |
| vm_type_err: |
| <------>mutex_lock(&ctx->mem_hash_lock); |
| <------>hash_add(ctx->mem_hash, &hnode->node, vaddr); |
| <------>mutex_unlock(&ctx->mem_hash_lock); |
| |
| <------>return rc; |
| } |
| |
| static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args) |
| { |
| <------>struct hl_device *hdev = hpriv->hdev; |
| <------>struct hl_ctx *ctx = hpriv->ctx; |
| <------>u64 device_addr = 0; |
| <------>u32 handle = 0; |
| <------>int rc; |
| |
| <------>switch (args->in.op) { |
| <------>case HL_MEM_OP_ALLOC: |
| <------><------>if (args->in.alloc.mem_size == 0) { |
| <------><------><------>dev_err(hdev->dev, |
| <------><------><------><------>"alloc size must be larger than 0\n"); |
| <------><------><------>rc = -EINVAL; |
| <------><------><------>goto out; |
| <------><------>} |
| |
| <------><------> |
| <------><------> * translations to overcome physical memory gaps |
| <------><------> */ |
| <------><------>args->in.flags |= HL_MEM_CONTIGUOUS; |
| <------><------>rc = alloc_device_memory(ctx, &args->in, &handle); |
| |
| <------><------>memset(args, 0, sizeof(*args)); |
| <------><------>args->out.handle = (__u64) handle; |
| <------><------>break; |
| |
| <------>case HL_MEM_OP_FREE: |
| <------><------>rc = free_device_memory(ctx, args->in.free.handle); |
| <------><------>break; |
| |
| <------>case HL_MEM_OP_MAP: |
| <------><------>if (args->in.flags & HL_MEM_USERPTR) { |
| <------><------><------>device_addr = args->in.map_host.host_virt_addr; |
| <------><------><------>rc = 0; |
| <------><------>} else { |
| <------><------><------>rc = get_paddr_from_handle(ctx, &args->in, |
| <------><------><------><------><------>&device_addr); |
| <------><------>} |
| |
| <------><------>memset(args, 0, sizeof(*args)); |
| <------><------>args->out.device_virt_addr = device_addr; |
| <------><------>break; |
| |
| <------>case HL_MEM_OP_UNMAP: |
| <------><------>rc = 0; |
| <------><------>break; |
| |
| <------>default: |
| <------><------>dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n"); |
| <------><------>rc = -ENOTTY; |
| <------><------>break; |
| <------>} |
| |
| out: |
| <------>return rc; |
| } |
| |
| int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data) |
| { |
| <------>union hl_mem_args *args = data; |
| <------>struct hl_device *hdev = hpriv->hdev; |
| <------>struct hl_ctx *ctx = hpriv->ctx; |
| <------>u64 device_addr = 0; |
| <------>u32 handle = 0; |
| <------>int rc; |
| |
| <------>if (hl_device_disabled_or_in_reset(hdev)) { |
| <------><------>dev_warn_ratelimited(hdev->dev, |
| <------><------><------>"Device is %s. Can't execute MEMORY IOCTL\n", |
| <------><------><------>atomic_read(&hdev->in_reset) ? "in_reset" : "disabled"); |
| <------><------>return -EBUSY; |
| <------>} |
| |
| <------>if (!hdev->mmu_enable) |
| <------><------>return mem_ioctl_no_mmu(hpriv, args); |
| |
| <------>switch (args->in.op) { |
| <------>case HL_MEM_OP_ALLOC: |
| <------><------>if (!hdev->dram_supports_virtual_memory) { |
| <------><------><------>dev_err(hdev->dev, "DRAM alloc is not supported\n"); |
| <------><------><------>rc = -EINVAL; |
| <------><------><------>goto out; |
| <------><------>} |
| |
| <------><------>if (args->in.alloc.mem_size == 0) { |
| <------><------><------>dev_err(hdev->dev, |
| <------><------><------><------>"alloc size must be larger than 0\n"); |
| <------><------><------>rc = -EINVAL; |
| <------><------><------>goto out; |
| <------><------>} |
| <------><------>rc = alloc_device_memory(ctx, &args->in, &handle); |
| |
| <------><------>memset(args, 0, sizeof(*args)); |
| <------><------>args->out.handle = (__u64) handle; |
| <------><------>break; |
| |
| <------>case HL_MEM_OP_FREE: |
| <------><------>rc = free_device_memory(ctx, args->in.free.handle); |
| <------><------>break; |
| |
| <------>case HL_MEM_OP_MAP: |
| <------><------>rc = map_device_va(ctx, &args->in, &device_addr); |
| |
| <------><------>memset(args, 0, sizeof(*args)); |
| <------><------>args->out.device_virt_addr = device_addr; |
| <------><------>break; |
| |
| <------>case HL_MEM_OP_UNMAP: |
| <------><------>rc = unmap_device_va(ctx, args->in.unmap.device_virt_addr, |
| <------><------><------><------><------>false); |
| <------><------>break; |
| |
| <------>default: |
| <------><------>dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n"); |
| <------><------>rc = -ENOTTY; |
| <------><------>break; |
| <------>} |
| |
| out: |
| <------>return rc; |
| } |
| |
| static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size, |
| <------><------><------><------>u32 npages, u64 start, u32 offset, |
| <------><------><------><------>struct hl_userptr *userptr) |
| { |
| <------>int rc; |
| |
| <------>if (!access_ok((void __user *) (uintptr_t) addr, size)) { |
| <------><------>dev_err(hdev->dev, "user pointer is invalid - 0x%llx\n", addr); |
| <------><------>return -EFAULT; |
| <------>} |
| |
| <------>userptr->vec = frame_vector_create(npages); |
| <------>if (!userptr->vec) { |
| <------><------>dev_err(hdev->dev, "Failed to create frame vector\n"); |
| <------><------>return -ENOMEM; |
| <------>} |
| |
| <------>rc = get_vaddr_frames(start, npages, FOLL_FORCE | FOLL_WRITE, |
| <------><------><------><------>userptr->vec); |
| |
| <------>if (rc != npages) { |
| <------><------>dev_err(hdev->dev, |
| <------><------><------>"Failed to map host memory, user ptr probably wrong\n"); |
| <------><------>if (rc < 0) |
| <------><------><------>goto destroy_framevec; |
| <------><------>rc = -EFAULT; |
| <------><------>goto put_framevec; |
| <------>} |
| |
| <------>if (frame_vector_to_pages(userptr->vec) < 0) { |
| <------><------>dev_err(hdev->dev, |
| <------><------><------>"Failed to translate frame vector to pages\n"); |
| <------><------>rc = -EFAULT; |
| <------><------>goto put_framevec; |
| <------>} |
| |
| <------>rc = sg_alloc_table_from_pages(userptr->sgt, |
| <------><------><------><------><------>frame_vector_pages(userptr->vec), |
| <------><------><------><------><------>npages, offset, size, GFP_ATOMIC); |
| <------>if (rc < 0) { |
| <------><------>dev_err(hdev->dev, "failed to create SG table from pages\n"); |
| <------><------>goto put_framevec; |
| <------>} |
| |
| <------>return 0; |
| |
| put_framevec: |
| <------>put_vaddr_frames(userptr->vec); |
| destroy_framevec: |
| <------>frame_vector_destroy(userptr->vec); |
| <------>return rc; |
| } |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size, |
| <------><------><------><------><------>struct hl_userptr *userptr) |
| { |
| <------>u64 start, end; |
| <------>u32 npages, offset; |
| <------>int rc; |
| |
| <------>if (!size) { |
| <------><------>dev_err(hdev->dev, "size to pin is invalid - %llu\n", size); |
| <------><------>return -EINVAL; |
| <------>} |
| |
| <------> |
| <------> * If the combination of the address and size requested for this memory |
| <------> * region causes an integer overflow, return error. |
| <------> */ |
| <------>if (((addr + size) < addr) || |
| <------><------><------>PAGE_ALIGN(addr + size) < (addr + size)) { |
| <------><------>dev_err(hdev->dev, |
| <------><------><------>"user pointer 0x%llx + %llu causes integer overflow\n", |
| <------><------><------>addr, size); |
| <------><------>return -EINVAL; |
| <------>} |
| |
| <------> |
| <------> * This function can be called also from data path, hence use atomic |
| <------> * always as it is not a big allocation. |
| <------> */ |
| <------>userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_ATOMIC); |
| <------>if (!userptr->sgt) |
| <------><------>return -ENOMEM; |
| |
| <------>start = addr & PAGE_MASK; |
| <------>offset = addr & ~PAGE_MASK; |
| <------>end = PAGE_ALIGN(addr + size); |
| <------>npages = (end - start) >> PAGE_SHIFT; |
| |
| <------>userptr->size = size; |
| <------>userptr->addr = addr; |
| <------>userptr->dma_mapped = false; |
| <------>INIT_LIST_HEAD(&userptr->job_node); |
| |
| <------>rc = get_user_memory(hdev, addr, size, npages, start, offset, |
| <------><------><------><------>userptr); |
| <------>if (rc) { |
| <------><------>dev_err(hdev->dev, |
| <------><------><------>"failed to get user memory for address 0x%llx\n", |
| <------><------><------>addr); |
| <------><------>goto free_sgt; |
| <------>} |
| |
| <------>hl_debugfs_add_userptr(hdev, userptr); |
| |
| <------>return 0; |
| |
| free_sgt: |
| <------>kfree(userptr->sgt); |
| <------>return rc; |
| } |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr) |
| { |
| <------>struct page **pages; |
| |
| <------>hl_debugfs_remove_userptr(hdev, userptr); |
| |
| <------>if (userptr->dma_mapped) |
| <------><------>hdev->asic_funcs->hl_dma_unmap_sg(hdev, userptr->sgt->sgl, |
| <------><------><------><------><------><------><------>userptr->sgt->nents, |
| <------><------><------><------><------><------><------>userptr->dir); |
| |
| <------>pages = frame_vector_pages(userptr->vec); |
| <------>if (!IS_ERR(pages)) { |
| <------><------>int i; |
| |
| <------><------>for (i = 0; i < frame_vector_count(userptr->vec); i++) |
| <------><------><------>set_page_dirty_lock(pages[i]); |
| <------>} |
| <------>put_vaddr_frames(userptr->vec); |
| <------>frame_vector_destroy(userptr->vec); |
| |
| <------>list_del(&userptr->job_node); |
| |
| <------>sg_free_table(userptr->sgt); |
| <------>kfree(userptr->sgt); |
| } |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| void hl_userptr_delete_list(struct hl_device *hdev, |
| <------><------><------><------>struct list_head *userptr_list) |
| { |
| <------>struct hl_userptr *userptr, *tmp; |
| |
| <------>list_for_each_entry_safe(userptr, tmp, userptr_list, job_node) { |
| <------><------>hl_unpin_host_memory(hdev, userptr); |
| <------><------>kfree(userptr); |
| <------>} |
| |
| <------>INIT_LIST_HEAD(userptr_list); |
| } |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, |
| <------><------><------><------>u32 size, struct list_head *userptr_list, |
| <------><------><------><------>struct hl_userptr **userptr) |
| { |
| <------>list_for_each_entry((*userptr), userptr_list, job_node) { |
| <------><------>if ((addr == (*userptr)->addr) && (size == (*userptr)->size)) |
| <------><------><------>return true; |
| <------>} |
| |
| <------>return false; |
| } |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range, |
| <------><------><------><------>u64 start, u64 end) |
| { |
| <------>int rc; |
| |
| <------>INIT_LIST_HEAD(&va_range->list); |
| |
| <------> |
| |
| <------>if (start & (PAGE_SIZE - 1)) { |
| <------><------>start &= PAGE_MASK; |
| <------><------>start += PAGE_SIZE; |
| <------>} |
| |
| <------>if (end & (PAGE_SIZE - 1)) |
| <------><------>end &= PAGE_MASK; |
| |
| <------>if (start >= end) { |
| <------><------>dev_err(hdev->dev, "too small vm range for va list\n"); |
| <------><------>return -EFAULT; |
| <------>} |
| |
| <------>rc = add_va_block(hdev, va_range, start, end); |
| |
| <------>if (rc) { |
| <------><------>dev_err(hdev->dev, "Failed to init host va list\n"); |
| <------><------>return rc; |
| <------>} |
| |
| <------>va_range->start_addr = start; |
| <------>va_range->end_addr = end; |
| |
| <------>return 0; |
| } |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| static void va_range_fini(struct hl_device *hdev, |
| <------><------>struct hl_va_range *va_range) |
| { |
| <------>mutex_lock(&va_range->lock); |
| <------>clear_va_list_locked(hdev, &va_range->list); |
| <------>mutex_unlock(&va_range->lock); |
| |
| <------>mutex_destroy(&va_range->lock); |
| <------>kfree(va_range); |
| } |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| static int vm_ctx_init_with_ranges(struct hl_ctx *ctx, |
| <------><------><------><------><------>u64 host_range_start, |
| <------><------><------><------><------>u64 host_range_end, |
| <------><------><------><------><------>u64 host_huge_range_start, |
| <------><------><------><------><------>u64 host_huge_range_end, |
| <------><------><------><------><------>u64 dram_range_start, |
| <------><------><------><------><------>u64 dram_range_end) |
| { |
| <------>struct hl_device *hdev = ctx->hdev; |
| <------>int rc; |
| |
| <------>ctx->host_va_range = kzalloc(sizeof(*ctx->host_va_range), GFP_KERNEL); |
| <------>if (!ctx->host_va_range) |
| <------><------>return -ENOMEM; |
| |
| <------>ctx->host_huge_va_range = kzalloc(sizeof(*ctx->host_huge_va_range), |
| <------><------><------><------><------><------>GFP_KERNEL); |
| <------>if (!ctx->host_huge_va_range) { |
| <------><------>rc = -ENOMEM; |
| <------><------>goto host_huge_va_range_err; |
| <------>} |
| |
| <------>ctx->dram_va_range = kzalloc(sizeof(*ctx->dram_va_range), GFP_KERNEL); |
| <------>if (!ctx->dram_va_range) { |
| <------><------>rc = -ENOMEM; |
| <------><------>goto dram_va_range_err; |
| <------>} |
| |
| <------>rc = hl_mmu_ctx_init(ctx); |
| <------>if (rc) { |
| <------><------>dev_err(hdev->dev, "failed to init context %d\n", ctx->asid); |
| <------><------>goto mmu_ctx_err; |
| <------>} |
| |
| <------>mutex_init(&ctx->mem_hash_lock); |
| <------>hash_init(ctx->mem_hash); |
| |
| <------>mutex_init(&ctx->host_va_range->lock); |
| |
| <------>rc = va_range_init(hdev, ctx->host_va_range, host_range_start, |
| <------><------><------><------>host_range_end); |
| <------>if (rc) { |
| <------><------>dev_err(hdev->dev, "failed to init host vm range\n"); |
| <------><------>goto host_page_range_err; |
| <------>} |
| |
| <------>if (hdev->pmmu_huge_range) { |
| <------><------>mutex_init(&ctx->host_huge_va_range->lock); |
| |
| <------><------>rc = va_range_init(hdev, ctx->host_huge_va_range, |
| <------><------><------><------><------>host_huge_range_start, |
| <------><------><------><------><------>host_huge_range_end); |
| <------><------>if (rc) { |
| <------><------><------>dev_err(hdev->dev, |
| <------><------><------><------>"failed to init host huge vm range\n"); |
| <------><------><------>goto host_hpage_range_err; |
| <------><------>} |
| <------>} else { |
| <------><------>kfree(ctx->host_huge_va_range); |
| <------><------>ctx->host_huge_va_range = ctx->host_va_range; |
| <------>} |
| |
| <------>mutex_init(&ctx->dram_va_range->lock); |
| |
| <------>rc = va_range_init(hdev, ctx->dram_va_range, dram_range_start, |
| <------><------><------>dram_range_end); |
| <------>if (rc) { |
| <------><------>dev_err(hdev->dev, "failed to init dram vm range\n"); |
| <------><------>goto dram_vm_err; |
| <------>} |
| |
| <------>hl_debugfs_add_ctx_mem_hash(hdev, ctx); |
| |
| <------>return 0; |
| |
| dram_vm_err: |
| <------>mutex_destroy(&ctx->dram_va_range->lock); |
| |
| <------>if (hdev->pmmu_huge_range) { |
| <------><------>mutex_lock(&ctx->host_huge_va_range->lock); |
| <------><------>clear_va_list_locked(hdev, &ctx->host_huge_va_range->list); |
| <------><------>mutex_unlock(&ctx->host_huge_va_range->lock); |
| <------>} |
| host_hpage_range_err: |
| <------>if (hdev->pmmu_huge_range) |
| <------><------>mutex_destroy(&ctx->host_huge_va_range->lock); |
| <------>mutex_lock(&ctx->host_va_range->lock); |
| <------>clear_va_list_locked(hdev, &ctx->host_va_range->list); |
| <------>mutex_unlock(&ctx->host_va_range->lock); |
| host_page_range_err: |
| <------>mutex_destroy(&ctx->host_va_range->lock); |
| <------>mutex_destroy(&ctx->mem_hash_lock); |
| <------>hl_mmu_ctx_fini(ctx); |
| mmu_ctx_err: |
| <------>kfree(ctx->dram_va_range); |
| dram_va_range_err: |
| <------>kfree(ctx->host_huge_va_range); |
| host_huge_va_range_err: |
| <------>kfree(ctx->host_va_range); |
| |
| <------>return rc; |
| } |
| |
| int hl_vm_ctx_init(struct hl_ctx *ctx) |
| { |
| <------>struct asic_fixed_properties *prop = &ctx->hdev->asic_prop; |
| <------>u64 host_range_start, host_range_end, host_huge_range_start, |
| <------><------>host_huge_range_end, dram_range_start, dram_range_end; |
| |
| <------>atomic64_set(&ctx->dram_phys_mem, 0); |
| |
| <------> |
| <------> * - If MMU is enabled, init the ranges as usual. |
| <------> * - If MMU is disabled, in case of host mapping, the returned address |
| <------> * is the given one. |
| <------> * In case of DRAM mapping, the returned address is the physical |
| <------> * address of the memory related to the given handle. |
| <------> */ |
| <------>if (ctx->hdev->mmu_enable) { |
| <------><------>dram_range_start = prop->dmmu.start_addr; |
| <------><------>dram_range_end = prop->dmmu.end_addr; |
| <------><------>host_range_start = prop->pmmu.start_addr; |
| <------><------>host_range_end = prop->pmmu.end_addr; |
| <------><------>host_huge_range_start = prop->pmmu_huge.start_addr; |
| <------><------>host_huge_range_end = prop->pmmu_huge.end_addr; |
| <------>} else { |
| <------><------>dram_range_start = prop->dram_user_base_address; |
| <------><------>dram_range_end = prop->dram_end_address; |
| <------><------>host_range_start = prop->dram_user_base_address; |
| <------><------>host_range_end = prop->dram_end_address; |
| <------><------>host_huge_range_start = prop->dram_user_base_address; |
| <------><------>host_huge_range_end = prop->dram_end_address; |
| <------>} |
| |
| <------>return vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end, |
| <------><------><------><------><------>host_huge_range_start, |
| <------><------><------><------><------>host_huge_range_end, |
| <------><------><------><------><------>dram_range_start, |
| <------><------><------><------><------>dram_range_end); |
| } |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| void hl_vm_ctx_fini(struct hl_ctx *ctx) |
| { |
| <------>struct hl_device *hdev = ctx->hdev; |
| <------>struct hl_vm *vm = &hdev->vm; |
| <------>struct hl_vm_phys_pg_pack *phys_pg_list; |
| <------>struct hl_vm_hash_node *hnode; |
| <------>struct hlist_node *tmp_node; |
| <------>int i; |
| |
| <------>hl_debugfs_remove_ctx_mem_hash(hdev, ctx); |
| |
| <------> |
| <------> * Clearly something went wrong on hard reset so no point in printing |
| <------> * another side effect error |
| <------> */ |
| <------>if (!hdev->hard_reset_pending && !hash_empty(ctx->mem_hash)) |
| <------><------>dev_notice(hdev->dev, |
| <------><------><------>"user released device without removing its memory mappings\n"); |
| |
| <------>hash_for_each_safe(ctx->mem_hash, i, tmp_node, hnode, node) { |
| <------><------>dev_dbg(hdev->dev, |
| <------><------><------>"hl_mem_hash_node of vaddr 0x%llx of asid %d is still alive\n", |
| <------><------><------>hnode->vaddr, ctx->asid); |
| <------><------>unmap_device_va(ctx, hnode->vaddr, true); |
| <------>} |
| |
| <------> |
| <------>hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR); |
| <------>hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_PHYS_PACK); |
| |
| <------>spin_lock(&vm->idr_lock); |
| <------>idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_list, i) |
| <------><------>if (phys_pg_list->asid == ctx->asid) { |
| <------><------><------>dev_dbg(hdev->dev, |
| <------><------><------><------>"page list 0x%px of asid %d is still alive\n", |
| <------><------><------><------>phys_pg_list, ctx->asid); |
| <------><------><------>atomic64_sub(phys_pg_list->total_size, |
| <------><------><------><------><------>&hdev->dram_used_mem); |
| <------><------><------>free_phys_pg_pack(hdev, phys_pg_list); |
| <------><------><------>idr_remove(&vm->phys_pg_pack_handles, i); |
| <------><------>} |
| <------>spin_unlock(&vm->idr_lock); |
| |
| <------>va_range_fini(hdev, ctx->dram_va_range); |
| <------>if (hdev->pmmu_huge_range) |
| <------><------>va_range_fini(hdev, ctx->host_huge_va_range); |
| <------>va_range_fini(hdev, ctx->host_va_range); |
| |
| <------>mutex_destroy(&ctx->mem_hash_lock); |
| <------>hl_mmu_ctx_fini(ctx); |
| } |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| int hl_vm_init(struct hl_device *hdev) |
| { |
| <------>struct asic_fixed_properties *prop = &hdev->asic_prop; |
| <------>struct hl_vm *vm = &hdev->vm; |
| <------>int rc; |
| |
| <------>vm->dram_pg_pool = gen_pool_create(__ffs(prop->dram_page_size), -1); |
| <------>if (!vm->dram_pg_pool) { |
| <------><------>dev_err(hdev->dev, "Failed to create dram page pool\n"); |
| <------><------>return -ENOMEM; |
| <------>} |
| |
| <------>kref_init(&vm->dram_pg_pool_refcount); |
| |
| <------>rc = gen_pool_add(vm->dram_pg_pool, prop->dram_user_base_address, |
| <------><------><------>prop->dram_end_address - prop->dram_user_base_address, |
| <------><------><------>-1); |
| |
| <------>if (rc) { |
| <------><------>dev_err(hdev->dev, |
| <------><------><------>"Failed to add memory to dram page pool %d\n", rc); |
| <------><------>goto pool_add_err; |
| <------>} |
| |
| <------>spin_lock_init(&vm->idr_lock); |
| <------>idr_init(&vm->phys_pg_pack_handles); |
| |
| <------>atomic64_set(&hdev->dram_used_mem, 0); |
| |
| <------>vm->init_done = true; |
| |
| <------>return 0; |
| |
| pool_add_err: |
| <------>gen_pool_destroy(vm->dram_pg_pool); |
| |
| <------>return rc; |
| } |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| void hl_vm_fini(struct hl_device *hdev) |
| { |
| <------>struct hl_vm *vm = &hdev->vm; |
| |
| <------>if (!vm->init_done) |
| <------><------>return; |
| |
| <------> |
| <------> * At this point all the contexts should be freed and hence no DRAM |
| <------> * memory should be in use. Hence the DRAM pool should be freed here. |
| <------> */ |
| <------>if (kref_put(&vm->dram_pg_pool_refcount, dram_pg_pool_do_release) != 1) |
| <------><------>dev_warn(hdev->dev, "dram_pg_pool was not destroyed on %s\n", |
| <------><------><------><------>__func__); |
| |
| <------>vm->init_done = false; |
| } |
| |