Multitarget-tracker/Detector/gpu_allocator.cpp at master · Shinung/Multitarget-tracker · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#include "gpu_allocator.h"

#include <stdexcept>
#include <cstring>
#include <cuda_runtime.h>

// Page offset on Kepler/Maxwell
#define ALIGNMENT (128*1024)

GPUAllocator::GPUAllocator(size_t size)
    : total_size_(size),
      current_size_(0)
{
    cudaError_t rc = cudaMalloc(&base_ptr_, total_size_);
    if (rc != cudaSuccess)
        throw std::runtime_error("Could not allocate GPU memory");

    current_ptr_ = base_ptr_;
}

GPUAllocator::~GPUAllocator()
{
    cudaFree(base_ptr_);
}

static int align_up(unsigned int v, unsigned int alignment)
{
    return ((v + alignment - 1) / alignment) * alignment;
}

cudaError_t GPUAllocator::grow(void** dev_ptr, size_t size)
{
    if (current_size_ + size >= total_size_)
	return cudaErrorMemoryAllocation;

    *dev_ptr = current_ptr_;
    size_t aligned_size = align_up(size, ALIGNMENT);
    current_ptr_ = (char*)current_ptr_ + aligned_size;
    current_size_ += aligned_size;

    return cudaSuccess;
}

void GPUAllocator::reset()
{
    current_ptr_ = base_ptr_;
    current_size_ = 0;
}

bool GPUAllocator::allocate(cv::cuda::GpuMat* mat, int rows, int cols, size_t elemSize)
{
    int padded_width  = align_up(cols, 16);
    int padded_height = align_up(rows, 16);
    int total_size = elemSize * padded_width * padded_height;

    cudaError_t status = grow((void**)&mat->data, total_size);
    if (status != cudaSuccess)
        return false;

    mat->step = padded_width * elemSize;
    mat->refcount = new int;

    return true;
}

void GPUAllocator::free(cv::cuda::GpuMat* mat)
{
    delete mat->refcount;
}