Tricks and Tips: Timing CPU and GPU (CUDA) operations

In this post, timing of both C/C++ as well as CUDA operations is shortly discussed.
You will just need to add the 4 files reported below to your project and #include the two header files as:

// --- Timing includes
#include "TimingCPU.h"
#include "TimingGPU.cuh"

The two classes can be used as follows.

Timing CPU section

TimingCPU timer_CPU;

timer_CPU.StartCounter();
CPU perations to be timed
std::cout << "CPU Timing = " << timer_CPU.GetCounter() << " ms" << std::endl;

Timing GPU section

TimingGPU timer_GPU;
timer_GPU.StartCounter();
GPU perations to be timed
std::cout << "GPU Timing = " << timer_GPU.GetCounter() << " ms" << std::endl;

In both the cases, the timing is in milliseconds. Also, the two classes can be used under linux or windows.

Here are the 4 files:

TimingCPU.cpp

/**************/
/* TIMING CPU */
/**************/

#include "TimingCPU.h"

#ifdef __linux__

    #include <sys/time.h>
    #include <stdio.h>

    TimingCPU::TimingCPU(): cur_time_(0) { StartCounter(); }

    TimingCPU::~TimingCPU() { }

    void TimingCPU::StartCounter()
    {
        struct timeval time;
        if(gettimeofday( &time, 0 )) return;
        cur_time_ = 1000000 * time.tv_sec + time.tv_usec;
    }

    double TimingCPU::GetCounter()
    {
        struct timeval time;
        if(gettimeofday( &time, 0 )) return -1;

        long cur_time = 1000000 * time.tv_sec + time.tv_usec;
        double sec = (cur_time - cur_time_) / 1000000.0;
        if(sec < 0) sec += 86400;
        cur_time_ = cur_time;

        return 1000.*sec;
    }

#elif _WIN32 || _WIN64
    #include <windows.h>
    #include <iostream>

    struct PrivateTimingCPU {
        double  PCFreq;
        __int64 CounterStart;
    };

    // --- Default constructor
    TimingCPU::TimingCPU() { privateTimingCPU = new PrivateTimingCPU; (*privateTimingCPU).PCFreq = 0.0; (*privateTimingCPU).CounterStart = 0; }

    // --- Default destructor
    TimingCPU::~TimingCPU() { }

    // --- Starts the timing
    void TimingCPU::StartCounter()
    {
        LARGE_INTEGER li;
        if(!QueryPerformanceFrequency(&li)) std::cout << "QueryPerformanceFrequency failed!n";

        (*privateTimingCPU).PCFreq = double(li.QuadPart)/1000.0;

        QueryPerformanceCounter(&li);
        (*privateTimingCPU).CounterStart = li.QuadPart;
    }

    // --- Gets the timing counter in ms
    double TimingCPU::GetCounter()
    {
        LARGE_INTEGER li;
        QueryPerformanceCounter(&li);
        return double(li.QuadPart-(*privateTimingCPU).CounterStart)/(*privateTimingCPU).PCFreq;
    }
#endif

TimingCPU.h

// 1 micro-second accuracy
// Returns the time in seconds

#ifndef __TIMINGCPU_H__
#define __TIMINGCPU_H__

#ifdef __linux__

    class TimingCPU {

        private:
            long cur_time_;

        public:

            TimingCPU();

            ~TimingCPU();

            void StartCounter();

            double GetCounter();
    };

#elif _WIN32 || _WIN64

    struct PrivateTimingCPU;

    class TimingCPU
    {
        private:
            PrivateTimingCPU *privateTimingCPU;

        public:

            TimingCPU();

            ~TimingCPU();

            void StartCounter();

            double GetCounter();

    }; // TimingCPU class

#endif

#endif

TimingGPU.cu

/**************/
/* TIMING GPU */
/**************/

#include "TimingGPU.cuh"

#include <cuda.h>
#include <cuda_runtime.h>

struct PrivateTimingGPU {
    cudaEvent_t     start;
    cudaEvent_t     stop;
};

// default constructor
TimingGPU::TimingGPU() { privateTimingGPU = new PrivateTimingGPU; }

// default destructor
TimingGPU::~TimingGPU() { }

void TimingGPU::StartCounter()
{
    cudaEventCreate(&((*privateTimingGPU).start));
    cudaEventCreate(&((*privateTimingGPU).stop));
    cudaEventRecord((*privateTimingGPU).start,0);
}

void TimingGPU::StartCounterFlags()
{
    int eventflags = cudaEventBlockingSync;

    cudaEventCreateWithFlags(&((*privateTimingGPU).start),eventflags);
    cudaEventCreateWithFlags(&((*privateTimingGPU).stop),eventflags);
    cudaEventRecord((*privateTimingGPU).start,0);
}

// Gets the counter in ms
float TimingGPU::GetCounter()
{
    float   time;
    cudaEventRecord((*privateTimingGPU).stop, 0);
    cudaEventSynchronize((*privateTimingGPU).stop);
    cudaEventElapsedTime(&time,(*privateTimingGPU).start,(*privateTimingGPU).stop);
    return time;
}

TimingGPU.cuh

#ifndef __TIMING_CUH__
#define __TIMING_CUH__

/**************/
/* TIMING GPU */
/**************/

// Events are a part of CUDA API and provide a system independent way to measure execution times on CUDA devices with approximately 0.5
// microsecond precision.

struct PrivateTimingGPU;

class TimingGPU
{
    private:
        PrivateTimingGPU *privateTimingGPU;

    public:

        TimingGPU();

        ~TimingGPU();

        void StartCounter();
        void StartCounterFlags();

        float GetCounter();

}; // TimingCPU class

#endif

Leave a Reply

Your email address will not be published. Required fields are marked *