Need help debugging Stride Iterator

I writing an application that takes finds unique tuples in a collection, with one several important caveats: (1) the application should be able to process arbitrarily-large tuples, as large as 30 dimensions in fact (2) furthermore, the tuple size is unknown at compile-time and varies depending on the input data. As these two factors rule out the use of thrust tuple, which is limited to 10 dimensions) which must be known at compile time, I’m trying to adapt the stride iterator from here to thrust. Here is my implementation

#pragma once

#include <thrust/iterator/iterator_traits.h>

template<typename T>
class StrideIterator
{
public:
	typedef typename thrust::iterator_traits<T>::pointer pointer;
	typedef typename thrust::iterator_traits<T>::reference reference;
	typedef typename thrust::iterator_traits<T>::value_type value_type;
	typedef typename thrust::iterator_traits<T>::difference_type difference_type;
	typedef typename thrust::iterator_traits<T>::iterator_category iterator_category;
	typedef StrideIterator<T> Self;

	StrideIterator( T ptr = NULL , difference_type dim  = 0 , difference_type size = 0 )
		: m_ptr( ptr )
		, m_dim( dim )
		, m_size( size )
	{
	}

	StrideIterator( const Self& strideIter )
		: m_ptr( strideIter.m_ptr )
		, m_dim( strideIter.m_dim )
		, m_size( strideIter.m_size )
	{
	}

	// operators -- TODO: bounds check
	Self& operator++()
	{
		m_ptr += m_dim;
		return *this;
	}

	Self operator++( int )
	{
		Self tmp = *this;
		m_ptr += m_dim;
		return tmp;
	}

	Self& operator--()
	{
		m_ptr -= m_dim;
		return *this;
	}

	Self operator--(int)
	{
		Self tmp = *this;
		m_ptr -= m_dim;
		return tmp;
	}

	Self& operator+=( difference_type step )
	{
		m_ptr += step * m_dim;
		return *this;
	}

	Self& operator-=( difference_type step )
	{
		m_ptr -= step * m_dim;
		return *this;
	}

	reference operator[]( difference_type step )
	{
		return m_ptr[step * m_dim];
	}

	/////

	friend difference_type operator-( const Self& iter1 , const Self& iter2 )
	{
		// Assert( iter1.m_dim , iter2.m_dim );
		return ( iter1.m_ptr - iter2.m_ptr ) / iter1.m_dim;
	}

	friend bool operator!=( const Self& iter1 , const Self& iter2 )
	{
		return false;
	}

	reference operator*( )
	{
		return *m_ptr;
	}

	// TODO: implement
	/*	
	friend bool operator<( const Self& iter1 , const Self& iter2 );
	friend bool operator==( const Self& iter1 , const Self& iter2 );
	friend self operator+( const Self& iter, difference_type step );
	friend self operator+( difference_type step , const Self& iter );
	*/

private:
	T 			m_ptr;
	difference_type 	m_dim;
	difference_type		m_size;
};

Here’s my code where I try to use the sort an array of tuples using the stride iterator

#include "StrideIterator.cuh"

#include <thrust/unique.h>
#include <thrust/execution_policy.h>

typedef unsigned short ushort;

int main()
{
	ushort dim = 2;
	unsigned size = 10;

	// AoS form
	/*
	2 , 1 ,			// tuple 0
	1 , 2 ,			// tuple 1
	1 , 1 ,			// ...

	1 , 1 ,

	1 , 3 ,
	2 , 2 ,
	3 , 1 ,
	2 , 1 ,
	1 , 2 ,
	1 , 1			// tuple 9
	*/

	// SoA representation
	ushort data[] = {
			    2 , 1 , 1 , 1 , 1 , 2 , 3 , 2 , 1 , 1 ,
			    1 , 2 , 1 , 1 , 3 , 2 , 1 , 1 , 2 , 1
			};

	StrideIterator<ushort*> first( data , dim , size );
	StrideIterator<ushort*> last( data + size , dim , size );

	thrust::unique( thrust::device , first , last );
}

On compiling the program (Demo.cu) thus:

nvcc -ccbin g++ -m64 -gencode arch=compute_30,code=sm_30 -o Demo Demo.cu

I get the following error-novel

/usr/include/thrust/iterator/detail/zip_iterator_base.h(96): error: no operator "*" matches these operands
            operand types are: * const StridedIterator<ushort *>
          detected during:
            instantiation of "thrust::detail::dereference_iterator::apply<Iterator>::type thrust::detail::dereference_iterator::operator()(const Iterator &) [with Iterator=StridedIterator<ushort *>]" 
/usr/include/thrust/detail/tuple_transform.h(102): here
            instantiation of "thrust::detail::tuple_meta_transform<Tuple, UnaryMetaFunction, thrust::tuple_size<Tuple>::value>::type thrust::detail::tuple_transform_functor<Tuple, UnaryMetaFunction, UnaryFunction, 2U>::do_it_on_the_host_or_device(const Tuple &, UnaryFunction) [with Tuple=thrust::tuple<StridedIterator<ushort *>, thrust::pointer<ushort, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>, UnaryMetaFunction=thrust::detail::dereference_iterator::apply, UnaryFunction=thrust::detail::dereference_iterator]" 
/usr/include/thrust/detail/tuple_transform.h(412): here
            instantiation of "thrust::detail::tuple_meta_transform<Tuple, UnaryMetaFunction, thrust::tuple_size<Tuple>::value>::type thrust::detail::tuple_host_device_transform<UnaryMetaFunction,Tuple,UnaryFunction>(const Tuple &, UnaryFunction) [with UnaryMetaFunction=thrust::detail::dereference_iterator::apply, Tuple=thrust::tuple<StridedIterator<ushort *>, thrust::pointer<ushort, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>, UnaryFunction=thrust::detail::dereference_iterator]" 
/usr/include/thrust/iterator/detail/zip_iterator.inl(74): here
            instantiation of "thrust::detail::zip_iterator_base<IteratorTuple>::type::reference thrust::zip_iterator<IteratorTuple>::dereference() const [with IteratorTuple=thrust::tuple<StridedIterator<ushort *>, thrust::pointer<ushort, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>]" 
/usr/include/thrust/iterator/iterator_facade.h(128): here
            instantiation of "Facade::reference thrust::iterator_core_access::dereference(const Facade &) [with Facade=thrust::zip_iterator<thrust::tuple<StridedIterator<ushort *>, thrust::pointer<ushort, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>>]" 
/usr/include/thrust/iterator/iterator_facade.h(310): here
            [ 25 instantiation contexts not shown ]
            instantiation of "thrust::detail::temporary_array<T, System>::temporary_array(thrust::execution_policy<System> &, InputIterator, InputIterator) [with T=ushort, System=thrust::system::cuda::detail::par_t, InputIterator=StridedIterator<ushort *>]" 
/usr/include/thrust/system/detail/generic/unique.inl(69): here
            instantiation of "ForwardIterator thrust::system::detail::generic::unique(thrust::execution_policy<DerivedPolicy> &, ForwardIterator, ForwardIterator, BinaryPredicate) [with DerivedPolicy=thrust::system::cuda::detail::par_t, ForwardIterator=StridedIterator<ushort *>, BinaryPredicate=thrust::equal_to<ushort>]" 
/usr/include/thrust/detail/unique.inl(59): here
            instantiation of "ForwardIterator thrust::unique(const thrust::detail::execution_policy_base<DerivedPolicy> &, ForwardIterator, ForwardIterator, BinaryPredicate) [with DerivedPolicy=thrust::system::cuda::detail::par_t, ForwardIterator=StridedIterator<ushort *>, BinaryPredicate=thrust::equal_to<ushort>]" 
/usr/include/thrust/system/detail/generic/unique.inl(54): here
            instantiation of "ForwardIterator thrust::system::detail::generic::unique(thrust::execution_policy<DerivedPolicy> &, ForwardIterator, ForwardIterator) [with DerivedPolicy=thrust::system::cuda::detail::par_t, ForwardIterator=StridedIterator<ushort *>]" 
/usr/include/thrust/detail/unique.inl(44): here
            instantiation of "ForwardIterator thrust::unique(const thrust::detail::execution_policy_base<DerivedPolicy> &, ForwardIterator, ForwardIterator) [with DerivedPolicy=thrust::system::cuda::detail::par_t, ForwardIterator=StridedIterator<ushort *>]" 
Demo.cu(39): here

/usr/include/thrust/system/cuda/detail/copy_if.inl(122): error: no operator "+" matches these operands
            operand types are: StridedIterator<ushort *> + unsigned int
          detected during:
            instantiation of "void thrust::system::cuda::detail::copy_if_detail::copy_if_intervals_closure<InputIterator1, InputIterator2, InputIterator3, Decomposition, OutputIterator, Context>::operator()() [with InputIterator1=thrust::detail::normal_iterator<thrust::pointer<ushort, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>>, InputIterator2=thrust::transform_iterator<thrust::detail::predicate_to_integral<thrust::identity<int>, signed long>, thrust::detail::normal_iterator<thrust::pointer<int, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>>, signed long, thrust::use_default>, InputIterator3=thrust::detail::normal_iterator<thrust::pointer<signed long, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>>, Decomposition=thrust::system::detail::internal::uniform_decomposition<signed long>, OutputIterator=StridedIterator<ushort *>, Context=thrust::system::cuda::detail::detail::statically_blocked_thread_array<256U>]" 
/usr/include/thrust/system/cuda/detail/detail/launch_closure.inl(52): here
            instantiation of "void thrust::system::cuda::detail::detail::launch_closure_by_value(Closure) [with Closure=thrust::system::cuda::detail::copy_if_detail::copy_if_intervals_closure<thrust::detail::normal_iterator<thrust::pointer<ushort, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>>, thrust::transform_iterator<thrust::detail::predicate_to_integral<thrust::identity<int>, signed long>, thrust::detail::normal_iterator<thrust::pointer<int, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>>, signed long, thrust::use_default>, thrust::detail::normal_iterator<thrust::pointer<signed long, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>>, thrust::system::detail::internal::uniform_decomposition<signed long>, StridedIterator<ushort *>, thrust::system::cuda::detail::detail::statically_blocked_thread_array<256U>>]" 
/usr/include/thrust/system/cuda/detail/detail/launch_closure.inl(81): here
            instantiation of "thrust::system::cuda::detail::detail::closure_launcher_base<Closure, launch_by_value>::launch_function_t thrust::system::cuda::detail::detail::closure_launcher_base<Closure, launch_by_value>::get_launch_function() [with Closure=thrust::system::cuda::detail::copy_if_detail::copy_if_intervals_closure<thrust::detail::normal_iterator<thrust::pointer<ushort, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>>, thrust::transform_iterator<thrust::detail::predicate_to_integral<thrust::identity<int>, signed long>, thrust::detail::normal_iterator<thrust::pointer<int, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>>, signed long, thrust::use_default>, thrust::detail::normal_iterator<thrust::pointer<signed long, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>>, thrust::system::detail::internal::uniform_decomposition<signed long>, StridedIterator<ushort *>, thrust::system::cuda::detail::detail::statically_blocked_thread_array<256U>>, launch_by_value=true]" 
/usr/include/thrust/system/cuda/detail/detail/launch_closure.inl(89): here
            instantiation of "void thrust::system::cuda::detail::detail::closure_launcher_base<Closure, launch_by_value>::launch(thrust::system::cuda::detail::execution_policy<DerivedPolicy> &, Closure, Size1, Size2, Size3) [with Closure=thrust::system::cuda::detail::copy_if_detail::copy_if_intervals_closure<thrust::detail::normal_iterator<thrust::pointer<ushort, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>>, thrust::transform_iterator<thrust::detail::predicate_to_integral<thrust::identity<int>, signed long>, thrust::detail::normal_iterator<thrust::pointer<int, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>>, signed long, thrust::use_default>, thrust::detail::normal_iterator<thrust::pointer<signed long, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>>, thrust::system::detail::internal::uniform_decomposition<signed long>, StridedIterator<ushort *>, thrust::system::cuda::detail::detail::statically_blocked_thread_array<256U>>, launch_by_value=true, DerivedPolicy=thrust::system::cuda::detail::par_t, Size1=signed long, Size2=unsigned int, Size3=unsigned int]" 
/usr/include/thrust/system/cuda/detail/detail/launch_closure.inl(170): here
            instantiation of "void thrust::system::cuda::detail::detail::closure_launcher<Closure>::launch(thrust::system::cuda::detail::execution_policy<DerivedPolicy> &, Closure, Size1, Size2, Size3) [with Closure=thrust::system::cuda::detail::copy_if_detail::copy_if_intervals_closure<thrust::detail::normal_iterator<thrust::pointer<ushort, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>>, thrust::transform_iterator<thrust::detail::predicate_to_integral<thrust::identity<int>, signed long>, thrust::detail::normal_iterator<thrust::pointer<int, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>>, signed long, thrust::use_default>, thrust::detail::normal_iterator<thrust::pointer<signed long, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>>, thrust::system::detail::internal::uniform_decomposition<signed long>, StridedIterator<ushort *>, thrust::system::cuda::detail::detail::statically_blocked_thread_array<256U>>, DerivedPolicy=thrust::system::cuda::detail::par_t, Size1=signed long, Size2=unsigned int, Size3=unsigned int]" 
/usr/include/thrust/system/cuda/detail/detail/launch_closure.inl(193): here
            [ 6 instantiation contexts not shown ]
            instantiation of "OutputIterator thrust::unique_copy(const thrust::detail::execution_policy_base<DerivedPolicy> &, InputIterator, InputIterator, OutputIterator, BinaryPredicate) [with DerivedPolicy=thrust::system::cuda::detail::par_t, InputIterator=thrust::detail::normal_iterator<thrust::pointer<ushort, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>>, OutputIterator=StridedIterator<ushort *>, BinaryPredicate=thrust::equal_to<ushort>]" 
/usr/include/thrust/system/detail/generic/unique.inl(71): here
            instantiation of "ForwardIterator thrust::system::detail::generic::unique(thrust::execution_policy<DerivedPolicy> &, ForwardIterator, ForwardIterator, BinaryPredicate) [with DerivedPolicy=thrust::system::cuda::detail::par_t, ForwardIterator=StridedIterator<ushort *>, BinaryPredicate=thrust::equal_to<ushort>]" 
/usr/include/thrust/detail/unique.inl(59): here
            instantiation of "ForwardIterator thrust::unique(const thrust::detail::execution_policy_base<DerivedPolicy> &, ForwardIterator, ForwardIterator, BinaryPredicate) [with DerivedPolicy=thrust::system::cuda::detail::par_t, ForwardIterator=StridedIterator<ushort *>, BinaryPredicate=thrust::equal_to<ushort>]" 
/usr/include/thrust/system/detail/generic/unique.inl(54): here
            instantiation of "ForwardIterator thrust::system::detail::generic::unique(thrust::execution_policy<DerivedPolicy> &, ForwardIterator, ForwardIterator) [with DerivedPolicy=thrust::system::cuda::detail::par_t, ForwardIterator=StridedIterator<ushort *>]" 
/usr/include/thrust/detail/unique.inl(44): here
            instantiation of "ForwardIterator thrust::unique(const thrust::detail::execution_policy_base<DerivedPolicy> &, ForwardIterator, ForwardIterator) [with DerivedPolicy=thrust::system::cuda::detail::par_t, ForwardIterator=StridedIterator<ushort *>]" 
Demo.cu(39): here

/usr/include/thrust/system/cuda/detail/copy_if.inl(158): error: no operator "+" matches these operands
            operand types are: StridedIterator<ushort *> + unsigned int
          detected during:
            instantiation of "void thrust::system::cuda::detail::copy_if_detail::copy_if_intervals_closure<InputIterator1, InputIterator2, InputIterator3, Decomposition, OutputIterator, Context>::operator()() [with InputIterator1=thrust::detail::normal_iterator<thrust::pointer<ushort, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>>, InputIterator2=thrust::transform_iterator<thrust::detail::predicate_to_integral<thrust::identity<int>, signed long>, thrust::detail::normal_iterator<thrust::pointer<int, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>>, signed long, thrust::use_default>, InputIterator3=thrust::detail::normal_iterator<thrust::pointer<signed long, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>>, Decomposition=thrust::system::detail::internal::uniform_decomposition<signed long>, OutputIterator=StridedIterator<ushort *>, Context=thrust::system::cuda::detail::detail::statically_blocked_thread_array<256U>]" 
/usr/include/thrust/system/cuda/detail/detail/launch_closure.inl(52): here
            instantiation of "void thrust::system::cuda::detail::detail::launch_closure_by_value(Closure) [with Closure=thrust::system::cuda::detail::copy_if_detail::copy_if_intervals_closure<thrust::detail::normal_iterator<thrust::pointer<ushort, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>>, thrust::transform_iterator<thrust::detail::predicate_to_integral<thrust::identity<int>, signed long>, thrust::detail::normal_iterator<thrust::pointer<int, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>>, signed long, thrust::use_default>, thrust::detail::normal_iterator<thrust::pointer<signed long, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>>, thrust::system::detail::internal::uniform_decomposition<signed long>, StridedIterator<ushort *>, thrust::system::cuda::detail::detail::statically_blocked_thread_array<256U>>]" 
/usr/include/thrust/system/cuda/detail/detail/launch_closure.inl(81): here
            instantiation of "thrust::system::cuda::detail::detail::closure_launcher_base<Closure, launch_by_value>::launch_function_t thrust::system::cuda::detail::detail::closure_launcher_base<Closure, launch_by_value>::get_launch_function() [with Closure=thrust::system::cuda::detail::copy_if_detail::copy_if_intervals_closure<thrust::detail::normal_iterator<thrust::pointer<ushort, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>>, thrust::transform_iterator<thrust::detail::predicate_to_integral<thrust::identity<int>, signed long>, thrust::detail::normal_iterator<thrust::pointer<int, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>>, signed long, thrust::use_default>, thrust::detail::normal_iterator<thrust::pointer<signed long, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>>, thrust::system::detail::internal::uniform_decomposition<signed long>, StridedIterator<ushort *>, thrust::system::cuda::detail::detail::statically_blocked_thread_array<256U>>, launch_by_value=true]" 
/usr/include/thrust/system/cuda/detail/detail/launch_closure.inl(89): here
            instantiation of "void thrust::system::cuda::detail::detail::closure_launcher_base<Closure, launch_by_value>::launch(thrust::system::cuda::detail::execution_policy<DerivedPolicy> &, Closure, Size1, Size2, Size3) [with Closure=thrust::system::cuda::detail::copy_if_detail::copy_if_intervals_closure<thrust::detail::normal_iterator<thrust::pointer<ushort, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>>, thrust::transform_iterator<thrust::detail::predicate_to_integral<thrust::identity<int>, signed long>, thrust::detail::normal_iterator<thrust::pointer<int, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>>, signed long, thrust::use_default>, thrust::detail::normal_iterator<thrust::pointer<signed long, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>>, thrust::system::detail::internal::uniform_decomposition<signed long>, StridedIterator<ushort *>, thrust::system::cuda::detail::detail::statically_blocked_thread_array<256U>>, launch_by_value=true, DerivedPolicy=thrust::system::cuda::detail::par_t, Size1=signed long, Size2=unsigned int, Size3=unsigned int]" 
/usr/include/thrust/system/cuda/detail/detail/launch_closure.inl(170): here
            instantiation of "void thrust::system::cuda::detail::detail::closure_launcher<Closure>::launch(thrust::system::cuda::detail::execution_policy<DerivedPolicy> &, Closure, Size1, Size2, Size3) [with Closure=thrust::system::cuda::detail::copy_if_detail::copy_if_intervals_closure<thrust::detail::normal_iterator<thrust::pointer<ushort, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>>, thrust::transform_iterator<thrust::detail::predicate_to_integral<thrust::identity<int>, signed long>, thrust::detail::normal_iterator<thrust::pointer<int, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>>, signed long, thrust::use_default>, thrust::detail::normal_iterator<thrust::pointer<signed long, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>>, thrust::system::detail::internal::uniform_decomposition<signed long>, StridedIterator<ushort *>, thrust::system::cuda::detail::detail::statically_blocked_thread_array<256U>>, DerivedPolicy=thrust::system::cuda::detail::par_t, Size1=signed long, Size2=unsigned int, Size3=unsigned int]" 
/usr/include/thrust/system/cuda/detail/detail/launch_closure.inl(193): here
            [ 6 instantiation contexts not shown ]
            instantiation of "OutputIterator thrust::unique_copy(const thrust::detail::execution_policy_base<DerivedPolicy> &, InputIterator, InputIterator, OutputIterator, BinaryPredicate) [with DerivedPolicy=thrust::system::cuda::detail::par_t, InputIterator=thrust::detail::normal_iterator<thrust::pointer<ushort, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>>, OutputIterator=StridedIterator<ushort *>, BinaryPredicate=thrust::equal_to<ushort>]" 
/usr/include/thrust/system/detail/generic/unique.inl(71): here
            instantiation of "ForwardIterator thrust::system::detail::generic::unique(thrust::execution_policy<DerivedPolicy> &, ForwardIterator, ForwardIterator, BinaryPredicate) [with DerivedPolicy=thrust::system::cuda::detail::par_t, ForwardIterator=StridedIterator<ushort *>, BinaryPredicate=thrust::equal_to<ushort>]" 
/usr/include/thrust/detail/unique.inl(59): here
            instantiation of "ForwardIterator thrust::unique(const thrust::detail::execution_policy_base<DerivedPolicy> &, ForwardIterator, ForwardIterator, BinaryPredicate) [with DerivedPolicy=thrust::system::cuda::detail::par_t, ForwardIterator=StridedIterator<ushort *>, BinaryPredicate=thrust::equal_to<ushort>]" 
/usr/include/thrust/system/detail/generic/unique.inl(54): here
            instantiation of "ForwardIterator thrust::system::detail::generic::unique(thrust::execution_policy<DerivedPolicy> &, ForwardIterator, ForwardIterator) [with DerivedPolicy=thrust::system::cuda::detail::par_t, ForwardIterator=StridedIterator<ushort *>]" 
/usr/include/thrust/detail/unique.inl(44): here
            instantiation of "ForwardIterator thrust::unique(const thrust::detail::execution_policy_base<DerivedPolicy> &, ForwardIterator, ForwardIterator) [with DerivedPolicy=thrust::system::cuda::detail::par_t, ForwardIterator=StridedIterator<ushort *>]" 
Demo.cu(39): here

/usr/include/thrust/system/cuda/detail/copy_if.inl(214): error: no operator "+" matches these operands
            operand types are: StridedIterator<ushort *> + thrust::reference<signed long, thrust::pointer<signed long, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>, thrust::use_default>
          detected during:
            instantiation of "OutputIterator thrust::system::cuda::detail::copy_if_detail::copy_if(thrust::system::cuda::detail::execution_policy<DerivedPolicy> &, InputIterator1, InputIterator1, InputIterator2, OutputIterator, Predicate) [with DerivedPolicy=thrust::system::cuda::detail::par_t, InputIterator1=thrust::detail::normal_iterator<thrust::pointer<ushort, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>>, InputIterator2=thrust::detail::normal_iterator<thrust::pointer<int, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>>, OutputIterator=StridedIterator<ushort *>, Predicate=thrust::identity<int>]" 
(251): here
            instantiation of "OutputIterator thrust::system::cuda::detail::copy_if(thrust::system::cuda::detail::execution_policy<DerivedPolicy> &, InputIterator1, InputIterator1, InputIterator2, OutputIterator, Predicate) [with DerivedPolicy=thrust::system::cuda::detail::par_t, InputIterator1=thrust::detail::normal_iterator<thrust::pointer<ushort, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>>, InputIterator2=thrust::detail::normal_iterator<thrust::pointer<int, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>>, OutputIterator=StridedIterator<ushort *>, Predicate=thrust::identity<int>]" 
/usr/include/thrust/detail/copy_if.inl(60): here
            instantiation of "OutputIterator thrust::copy_if(const thrust::detail::execution_policy_base<DerivedPolicy> &, InputIterator1, InputIterator1, InputIterator2, OutputIterator, Predicate) [with DerivedPolicy=thrust::system::cuda::detail::par_t, InputIterator1=thrust::detail::normal_iterator<thrust::pointer<ushort, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>>, InputIterator2=thrust::detail::normal_iterator<thrust::pointer<int, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>>, OutputIterator=StridedIterator<ushort *>, Predicate=thrust::identity<int>]" 
/usr/include/thrust/system/detail/generic/unique.inl(110): here
            instantiation of "OutputIterator thrust::system::detail::generic::unique_copy(thrust::execution_policy<DerivedPolicy> &, InputIterator, InputIterator, OutputIterator, BinaryPredicate) [with DerivedPolicy=thrust::system::cuda::detail::par_t, InputIterator=thrust::detail::normal_iterator<thrust::pointer<ushort, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>>, OutputIterator=StridedIterator<ushort *>, BinaryPredicate=thrust::equal_to<ushort>]" 
/usr/include/thrust/detail/unique.inl(91): here
            instantiation of "OutputIterator thrust::unique_copy(const thrust::detail::execution_policy_base<DerivedPolicy> &, InputIterator, InputIterator, OutputIterator, BinaryPredicate) [with DerivedPolicy=thrust::system::cuda::detail::par_t, InputIterator=thrust::detail::normal_iterator<thrust::pointer<ushort, thrust::system::cuda::detail::par_t, thrust::use_default, thrust::use_default>>, OutputIterator=StridedIterator<ushort *>, BinaryPredicate=thrust::equal_to<ushort>]" 
/usr/include/thrust/system/detail/generic/unique.inl(71): here
            instantiation of "ForwardIterator thrust::system::detail::generic::unique(thrust::execution_policy<DerivedPolicy> &, ForwardIterator, ForwardIterator, BinaryPredicate) [with DerivedPolicy=thrust::system::cuda::detail::par_t, ForwardIterator=StridedIterator<ushort *>, BinaryPredicate=thrust::equal_to<ushort>]" 
/usr/include/thrust/detail/unique.inl(59): here
            instantiation of "ForwardIterator thrust::unique(const thrust::detail::execution_policy_base<DerivedPolicy> &, ForwardIterator, ForwardIterator, BinaryPredicate) [with DerivedPolicy=thrust::system::cuda::detail::par_t, ForwardIterator=StridedIterator<ushort *>, BinaryPredicate=thrust::equal_to<ushort>]" 
/usr/include/thrust/system/detail/generic/unique.inl(54): here
            instantiation of "ForwardIterator thrust::system::detail::generic::unique(thrust::execution_policy<DerivedPolicy> &, ForwardIterator, ForwardIterator) [with DerivedPolicy=thrust::system::cuda::detail::par_t, ForwardIterator=StridedIterator<ushort *>]" 
/usr/include/thrust/detail/unique.inl(44): here
            instantiation of "ForwardIterator thrust::unique(const thrust::detail::execution_policy_base<DerivedPolicy> &, ForwardIterator, ForwardIterator) [with DerivedPolicy=thrust::system::cuda::detail::par_t, ForwardIterator=StridedIterator<ushort *>]" 
Demo.cu(39): here

4 errors detected in the compilation of "/tmp/tmpxft_00001755_00000000-9_Demo.cpp1.ii".

Please advise.

Oops … Thanks to the syntax highlighter here I can now ‘parse’ the error messages. I need to implement operator “+” and “*”.

You don’t want a stride iterator.

You want a zip iterator that easily extend beyond the hard-coded limitation of template arguments.

Strided accesses in CUDA are probably not what you want. Instead, you want a 2d representation.

Imagine the memory layout a bit more like this:

data_0: x x x x x x x x ...
data_1: y y y y y y y y ...
data_2: z z z z z z z z ...
...

You want to access the data more like this, pulling in (x, y, z, …) for each column in that arrangement. This is because you’ll get higher load coalescence than other access patterns. Or at least are more likely to induce that in the general case.

So instead, you need a simple array of arrays and its respective length.

The only reason why this is limiting in C++ is because you won’t be able to have poly-typed arrays. So zip_iterator is still “better” in this sense because it’s more flexible.

Honestly, you’re better off figuring out how to rewrite your problem to avoid these massive arrays that don’t fit into the current toolsets.

Using iterator_traits on a generic name like T is confusing as well. Normally T represents something like a value type and not another iterator.

Thanks MutantJohn. Unfortunately, it is impossible to recast the problem in lower dimensions, if that’s what you mean.

Furthermore, the problem, as I understand it is that (1) the dimension of a tuple is not known beforehand or rather varies from problem to problem (2) the maximum tuple dimension that the thrust function make_tuple supports is 10.

BTW, although I’ve implemented the missing iterators, I’m still getting weird compile-time errors with my attempt at a Stride Iterator. Therefore, I am investigating using the permutation iterator as suggested in this stack-overflow thread.

Okay, here’s my implementation. Its a shameless rip off of the strided range example

#include <vector>
#include <iostream>

#include <thrust/unique.h>
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/execution_policy.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/iterator/permutation_iterator.h>

typedef unsigned short ushort;

template<typename Iterator>
struct StridedIterator
{
	typedef typename thrust::iterator_difference<Iterator>::type difference_type;

	struct StrideFunctor : public thrust::unary_function<difference_type,difference_type>
	{
		difference_type m_stride;

		StrideFunctor(difference_type stride ): m_stride( stride )
		{
		}

		__host__ __device__
		difference_type operator()(const difference_type& i) const
		{ 
		    return m_stride * i;
		}
	};

	typedef typename thrust::counting_iterator<difference_type>                   CountingIterator;
	typedef typename thrust::transform_iterator<StrideFunctor, CountingIterator>  TransformIterator;
	typedef typename thrust::permutation_iterator<Iterator,TransformIterator>     PermutationIterator;

	typedef PermutationIterator Type;

	StridedIterator( Iterator startIter , Iterator endIter , difference_type stride )
		: m_startIter( startIter )
		, m_endIter( endIter )
		, m_stride( stride )
	{
	}

    Type begin() const
    {
        return PermutationIterator( m_startIter , TransformIterator( CountingIterator(0) , StrideFunctor( m_stride ) ) );
    }

    Type end() const
    {
        return begin() + ( ( m_endIter - m_startIter ) + ( m_stride - 1 ) ) / m_stride;
    }

    Iterator 		m_startIter;
    Iterator 		m_endIter;
    difference_type m_stride;
};

int main()
{
	std::vector<ushort> input{
						2 , 1 , 1 , 1 , 1 , 2 , 3 , 2 , 1 , 1 ,
						1 , 2 , 1 , 1 , 3 , 2 , 1 , 1 , 2 , 1
					};

	thrust::host_vector<ushort> hData = input;
	//std::cout << hData.size() << std::endl;
	ushort dim = 2;

	thrust::device_vector<ushort> data = hData;
	typedef thrust::device_vector<ushort>::iterator Iterator;

	StridedIterator<Iterator> stridedIter( data.begin() , data.end() , dim );
	auto iter = thrust::unique( thrust::device , stridedIter.begin() , stridedIter.end() );
	std::cout << stridedIter.end() - stridedIter.begin() << std::endl;
	std::cout << iter - stridedIter.begin() << std::endl;

	hData = data;
}