PROSAGA码农传奇-Tex-CUDA从非均匀采样中重新取样

<div class =“post-text”itemprop =“text”>
  
    可能有很多方法。例如，你可以使用基本的
    <a href="https://stackoverflow.com/questions/21658518/search-an-ordered-array-in-a-cuda-kernel">
      cuda二元搜索
    </A>
     以线程并行的方式。我会演示一个
    <a href="https://github.com/thrust/thrust/wiki/Quick-Start-Guide" rel="nofollow noreferrer">
      推力
    </A>
     实现。
  
  
    出于本讨论的目的，我将假设两个数据集（已知的采样点和所需的采样点）是任意放置的（即我不假设任一样本集均匀间隔）。但是我会规定或要求所需的样本点
    
      完全包含
    
     在已知的样本点内。我相信这是明智的，因为通常的线性插值需要在所需采样点两侧的已知采样点。
  
  
    因此我们将使用这样的数据集：
  
   <pre>
 <code>
 o: 1,3,7
f(o): 3,7,15
 x: 1.5, 2.5, 4.5, 5.0, 6.0, 6.5
f(x): ?, ?, ?, ?, ?, ?

</code>
 </pre>
  
    我们看到了
     <code>
 f
 </code>
     是已知的功能值，对应于
     <code>
 f(o) = 2o+1
 </code>
    ，在这种情况下是一条直线（虽然这种方法不要求已知的采样点适合任何特定的功能）。
     <code>
 x
 </code>
     表示我们希望根据已知值插入功能值的指数（
     <code>
 f(o)
 </code>
    ）。我们的愿望是计算
     <code>
 f(x)
 </code>
     通过插值从最近的
     <code>
 f(o)
 </code>
     点。请注意，我们的数据集是所有值的
     <code>
 x
 </code>
     介于最小值（1）和最大值（7）之间
     <code>
 o
 </code>
     值。这是我之前说过的规定/要求。
  
  
    我们的推力方法是使用矢量化二进制搜索
     <code>
 thrust::upper_bound
 </code>
    ，找到每个所需的“插入点”
     <code>
 x
 </code>
     价值适合
     <code>
 o
 </code>
     序列。这给了我们正确的邻居，并且左邻居（右1）进行插值。一旦我们知道了插入点，那么选择例如这个算法将是一个简单的扩展。该
    
      二
    
     离开了
    
      二
    
     如果我们想要使用除线性插值之外的其他东西，那么右邻居（或更多）。
  
  
    插入点然后给我们左右邻居，我们使用这些信息传递到适当的制作
     <code>
 thrust::transform
 </code>
     操作
     <code>
 x
 </code>
     矢量（所需的插值点）和a
     <code>
 thrust::tuple
 </code>
     （通过
     <code>
 thrust::zip_iterator
 </code>
    ）提供：
  
  <UL>
    <LI>
      正确的邻居指数
    </LI>
    <LI>
      正确的邻居功能价值
    </LI>
    <LI>
      左邻居指数
    </LI>
    <LI>
      左邻居功能值
    </LI>
  </UL>
  
    使用这些数量，加上所需的指数（
     <code>
 x
 </code>
    ），插值很简单。
  
  
    的
      编辑：
    </强>
     受其他答案的启发，我决定采用一种避免并行二进制搜索的方法，而是使用前缀和方法来识别插入索引。
     <code>
 x
 </code>
     中的数据
     <code>
 o
 </code>
     数据。这种方法假定
    
      都
    
     <code>
 x
 </code>
     和
     <code>
 o
 </code>
     序列被排序。
  
  
    我们将从merge_by_key操作开始。我们将合并
     <code>
 x
 </code>
     同
     <code>
 o
 </code>
    ，建立排序（这似乎比二分搜索更有效）。该
     <code>
 x
 </code>
     和
     <code>
 o
 </code>
     数量将是“键”，值将全部为1
     <code>
 o
 </code>
     并且全部为0
     <code>
 x
 </code>
    。然后使用我们的示例数据，merge_by_key将产生以下结果：
  
   <pre>
 <code>
 o keys: 1,3,7
o vals: 1,1,1

x keys:  1.5,2.5,4.5,5.0,6.0,6.5
x vals:    0,  0,  0,  0,  0,  0

merged keys:  1, 1.5, 2.5,   3, 4.5, 5.0, 6.0, 6.5,   7
merged vals:  1,   0,   0,   1,   0,   0,   0,   0,   1

</code>
 </pre>
  
    当我们对合并的val执行前缀sum（包含扫描）时，我们得到：
  
   <pre>
 <code>
 ins. ind.: 1, 1, 1, 2, 2, 2, 2, 2, 3

</code>
 </pre>
  
    然后我们可以执行copy_if操作来仅提取与之关联的插入索引
     <code>
 x
 </code>
     vals（其合并的val为零）生成在步骤1中生成的相同插入索引序列：
  
   <pre>
 <code>
 d_i: 1, 1, 2, 2, 2, 2

</code>
 </pre>
  
    然后，方法2的其余部分可以使用完全相同的剩余插值代码（
     <code>
 thrust::transform
 </code>
    ）如方法1中所使用的那样。
  
  
    这是一个完整的示例，显示了两种方法：
  
   <pre>
 <code>
 $ cat t1224.cu
#include <thrust/device_vector.h>
#include <thrust/binary_search.h>
#include <thrust/transform.h>
#include <thrust/copy.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/iterator/permutation_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <iostream>
#include <thrust/merge.h>
#include <thrust/iterator/constant_iterator.h>
#include <thrust/scan.h>

struct interp_func
{
 template <typename T>
 __host__ __device__
 float operator()(float t1, T t2){ // m = (y1-y0)/(x1-x0) y = m(x-x0) + y0
 return ((thrust::get<1>(t2) - thrust::get<3>(t2))/(thrust::get<0>(t2) - thrust::get<2>(t2)))*(t1 - thrust::get<2>(t2)) + thrust::get<3>(t2);
 }
};

using namespace thrust::placeholders;

int main(){

// sample data
  float o[] = {1.0f, 3.0f, 7.0f}; // unevenly spaced sample points for function f
  float f[] = {3.0f, 7.0f, 15.0f}; // f(o) = 2o+1
  float x[] = {1.5f, 2.5f, 4.5f, 5.0f, 6.0f, 6.5f}; // additional desired sample points for f
  int so = sizeof(o)/sizeof(o[0]);
  int sx = sizeof(x)/sizeof(x[0]);

// setup data on device
 thrust::device_vector<float> d_o(o, o+so);
 thrust::device_vector<float> d_f(f, f+so);
 thrust::device_vector<float> d_x(x, x+sx);
 thrust::device_vector<int> d_i(sx); // insertion indices
 thrust::device_vector<float> d_r(sx); // results
 // method 1: binary search
 // perform search for insertion indices
 thrust::upper_bound(d_o.begin(), d_o.end(), d_x.begin(), d_x.end(), d_i.begin());
 // then perform linear interpolation based on left and right neighbors
 std::cout << "Method 1 insertion indices:" << std::endl;
 thrust::copy(d_i.begin(), d_i.end(), std::ostream_iterator<int>(std::cout, ","));
 std::cout << std::endl;
 thrust::transform(d_x.begin(), d_x.end(), thrust::make_zip_iterator(thrust::make_tuple(thrust::make_permutation_iterator(d_o.begin(), d_i.begin()), thrust::make_permutation_iterator(d_f.begin(), d_i.begin()), thrust::make_permutation_iterator(d_o.begin(), thrust::make_transform_iterator(d_i.begin(), _1-1)), thrust::make_permutation_iterator(d_f.begin(), thrust::make_transform_iterator(d_i.begin(), _1-1)))), d_r.begin(), interp_func());

// output results
 std::cout << "Interpolation points:" << std::endl;
 thrust::copy(d_x.begin(), d_x.end(), std::ostream_iterator<float>(std::cout, ","));
 std::cout << std::endl << "Interpolated values:" << std::endl;
 thrust::copy(d_r.begin(), d_r.end(), std::ostream_iterator<float>(std::cout, ","));
 std::cout << std::endl << "Expected values:" << std::endl;
 for (int i = 0; i < sx; i++) std::cout << 2*x[i]+1 << ",";
 std::cout << std::endl;

//method 2: merge + prefix sum
 thrust::device_vector<float> d_kr(sx+so);
 thrust::device_vector<int> d_vr(sx+so);
 thrust::device_vector<int> d_s(sx+so);
 thrust::merge_by_key(d_o.begin(), d_o.end(), d_x.begin(), d_x.end(), thrust::constant_iterator<int>(1), thrust::constant_iterator<int>(0), d_kr.begin(), d_vr.begin());
 thrust::inclusive_scan(d_vr.begin(), d_vr.end(), d_s.begin());
 thrust::copy_if(d_s.begin(), d_s.end(), d_vr.begin(), d_i.begin(), _1 == 0);
 std::cout << "Method 2 insertion indices:" << std::endl;
 thrust::copy(d_i.begin(), d_i.end(), std::ostream_iterator<int>(std::cout, ","));
 std::cout << std::endl;
 // remainder of solution method would be identical to end of method 1 starting with the thrust::transform
 return 0;
}
$ nvcc -o t1224 t1224.cu
$ ./t1224
Method 1 insertion indices:
1,1,2,2,2,2,
Interpolation points:
1.5,2.5,4.5,5,6,6.5,
Interpolated values:
4,6,10,11,13,14,
Expected values:
4,6,10,11,13,14,
Method 2 insertion indices:
1,1,2,2,2,2,
$

</code>
 </pre>
  
    同样，一旦我们知道了插入点，选择2个右边和2个左边邻居进行更多涉及的插值将是一个简单的扩展。我们只需修改传递给transform（插值）仿函数的zip迭代器，并修改仿函数本身来实现想要的算术。
  
  
    另请注意，此方法假定输入
     <code>
 o
 </code>
     序列已经排序。如果不是，则需要添加按键排序
     <code>
 o
 </code>
     （键）用
     <code>
 f
 </code>
     （值）。该
     <code>
 x
 </code>
     不需要为方法1排序序列，但必须对方法2进行排序（合并需要对两个序列进行排序）。
  
</DIV>