Simpson's integration of real valued functions by CUDA Thrust

Simpson’s integration can be easily implemented by using CUDA Thrust. You basically need five steps:

  1. 1. Generate the Simpson’s quadrature weights;
  2. 2. Generate the function sampling points;
  3. 3. Generate the function values;
  4. 4. Calculate the elementwise product between the quadrature weights and the function values;
  5. 5. Sum the above products.

Step #1 requires creating an array with elements repeated many times, namely, 1 4 2 4 2 4 … 1 for the Simpson’s case.
This can be accomplished by borrowing Robert Crovella’s approach in cuda thrust library repeat vector multiple times.

Step #2 can be accomplished by using couting_iterators and borrowing talonmies approach in Purpose and usage of counting_iterators in CUDA Thrust library.

Step #3 is an application of thrust::transform.

Steps #4 and #5 can be accomplished together by thrust::inner_product.

This approach can be exploited also for use when other quadrature integration rules are of interest.

Here is the code:

#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/iterator/permutation_iterator.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/constant_iterator.h>
#include <thrust/inner_product.h>
#include <thrust/functional.h>
#include <thrust/fill.h>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>

// for printing
#include <thrust/copy.h>
#include <ostream>

#define STRIDE 2
#define N 100

#define pi_f  3.14159265358979f        // Greek pi in single precision

struct sin_functor
   __host__ __device__
   float operator()(float x) const
      return sin(2.f*pi_f*x);

template <typename Iterator>

class strided_range

   typedef typename thrust::iterator_difference<Iterator>::type difference_type;

   struct stride_functor : public thrust::unary_function<difference_type,difference_type>
      difference_type stride;

      stride_functor(difference_type stride)
        : stride(stride) {}

      __host__ __device__
      difference_type operator()(const difference_type& i) const
         return stride * i;

   typedef typename thrust::counting_iterator<difference_type>                   

   typedef typename thrust::transform_iterator<stride_functor, CountingIterator> TransformIterator;

   typedef typename thrust::permutation_iterator<Iterator,TransformIterator> PermutationIterator;

   // type of the strided_range iterator
   typedef PermutationIterator iterator;

   // construct strided_range for the range [first,last)
   strided_range(Iterator first, Iterator last, difference_type stride)
     : first(first), last(last), stride(stride) {}

   iterator begin(void) const
      return PermutationIterator(first, TransformIterator(CountingIterator(0), stride_functor(stride)));

   iterator end(void) const
      return begin() + ((last - first) + (stride - 1)) / stride;


   Iterator first;
   Iterator last;
   difference_type stride;


int main(void)

   // --- Generate the integration coefficients
   thrust::host_vector<float> h_coefficients(STRIDE);
   h_coefficients[0] = 4.f;
   h_coefficients[1] = 2.f;

   thrust::device_vector<float> d_coefficients(N);

   typedef thrust::device_vector<float>::iterator Iterator;

   strided_range<Iterator> pos1(d_coefficients.begin()+1, d_coefficients.end()-2, STRIDE);

   strided_range<Iterator> pos2(d_coefficients.begin()+2, d_coefficients.end()-1, STRIDE);

   thrust::fill(pos1.begin(), pos1.end(), h_coefficients[0]);

   thrust::fill(pos2.begin(), pos2.end(), h_coefficients[1]);

   d_coefficients[0]  = 1.f;

   d_coefficients[N-1]  = 1.f;

   // print the generated d_coefficients
   std::cout << "d_coefficients: ";
   thrust::copy(d_coefficients.begin(), d_coefficients.end(), std::ostream_iterator<float>(std::cout, " "));  std::cout << std::endl;

   // --- Generate sampling points
   float a     = 0.f;
   float b     = .5f;

   float Dx    = (b-a)/(float)(N-1);

   thrust::device_vector<float> d_x(N);






   for(int i=0; i<N; i++) {
      float val = d_x[i];
      printf("%d %fn", i, val);

   // --- Calculate function values

   thrust::device_vector<float> d_y(N);

   thrust::transform(d_x.begin(), d_x.end(), d_y.begin(), sin_functor());

   for(int i=0; i<N; i++) {
      float val = d_y[i];
      printf("%d %fn", i, val);

   // --- Calculate integral

   float integral = (Dx/3.f) * thrust::inner_product(d_y.begin(), d_y.begin() + N, d_coefficients.begin(), 0.0f);
   printf("The integral is = %fn", integral);

   return 0;


Leave a Reply

Your email address will not be published. Required fields are marked *