const int row_dst = blockDim.x*blockIdx.x + threadIdx.x;
- if (i0 >= n_dims) {
- const int i = row_dst*ne0 + i0;
-
- dst[i + 0] = x[i + 0];
- dst[i + 1] = x[i + 1];
-
- return;
- }
-
const int row_x = row_dst % ne1;
const int channel_x = row_dst / ne1;
const int idst = row_dst*ne0 + i0;
const int ix = channel_x*s2 + row_x*s1 + i0;
+ if (i0 >= n_dims) {
+ dst[idst + 0] = x[ix + 0];
+ dst[idst + 1] = x[ix + 1];
+
+ return;
+ }
+
const float theta_base = pos[channel_x]*powf(theta_scale, i0/2.0f);
const float freq_factor = has_ff ? freq_factors[i0/2] : 1.0f;
const int row_dst = blockDim.x*blockIdx.x + threadIdx.x;
- if (i0 >= n_dims) {
- const int i = row_dst*ne0 + i0;
-
- dst[i + 0] = x[i + 0];
- dst[i + 1] = x[i + 1];
-
- return;
- }
-
const int row_x = row_dst % ne1;
const int channel_x = row_dst / ne1;
const int idst = row_dst*ne0 + i0/2;
const int ix = channel_x*s2 + row_x*s1 + i0/2;
+ if (i0 >= n_dims) {
+ dst[idst + i0/2 + 0] = x[ix + i0/2 + 0];
+ dst[idst + i0/2 + 1] = x[ix + i0/2 + 1];
+
+ return;
+ }
+
const float theta_base = pos[channel_x]*powf(theta_scale, i0/2.0f);
const float freq_factor = has_ff ? freq_factors[i0/2] : 1.0f;
const int row_dst = blockDim.x*blockIdx.x + threadIdx.x;
- if (i0 >= n_dims) {
- const int i = row_dst*ne0 + i0;
-
- dst[i + 0] = x[i + 0];
- dst[i + 1] = x[i + 1];
-
- return;
- }
-
const int row_x = row_dst % ne1;
const int channel_x = row_dst / ne1;
const int idst = row_dst*ne0 + i0/2;
const int ix = channel_x*s2 + row_x*s1 + i0/2;
+ if (i0 >= n_dims) {
+ dst[idst + i0/2 + 0] = x[ix + i0/2 + 0];
+ dst[idst + i0/2 + 1] = x[ix + i0/2 + 1];
+
+ return;
+ }
+
const int sect_dims = sections.v[0] + sections.v[1] + sections.v[2] + sections.v[3];
const int sec_w = sections.v[1] + sections.v[0];
const int sector = (i0 / 2) % sect_dims;
const int row = item_ct1.get_local_range(2) * item_ct1.get_group(2) + item_ct1.get_local_id(2);
- if (i0 >= n_dims) {
- const int i = row * ne0 + i0;
- *reinterpret_cast<sycl::vec<T, 2> *>(dst + i) = *reinterpret_cast<const sycl::vec<T, 2> *>(x + i);
- return;
- }
-
const int row0 = row % ne1;
const int channel0 = row / ne1;
const int i = row * ne0 + i0;
const int i2 = channel0 * s2 + row0 * s1 + i0;
+ if (i0 >= n_dims) {
+ *reinterpret_cast<sycl::vec<T, 2> *>(dst + i) = *reinterpret_cast<const sycl::vec<T, 2> *>(x + i2);
+ return;
+ }
+
const float theta_base = pos[channel0] * sycl::pow(theta_scale, i0 / 2.0f);
const float freq_factor = has_ff ? freq_factors[i0 / 2] : 1.0f;
const int row = item_ct1.get_local_range(2) * item_ct1.get_group(2) + item_ct1.get_local_id(2);
- if (i0 >= n_dims) {
- const int i = row * ne0 + i0;
- *reinterpret_cast<sycl::vec<T, 2> *>(dst + i) = *reinterpret_cast<const sycl::vec<T, 2> *>(x + i);
- return;
- }
-
const int row0 = row % ne1;
const int channel0 = row / ne1;
const int i = row * ne0 + i0 / 2;
const int i2 = channel0 * s2 + row0 * s1 + i0 / 2;
+ if (i0 >= n_dims) {
+ *reinterpret_cast<sycl::vec<T, 2> *>(dst + i + i0 / 2) = *reinterpret_cast<const sycl::vec<T, 2> *>(x + i2 + i0 / 2);
+ return;
+ }
+
const float theta_base = pos[channel0] * sycl::pow(theta_scale, i0 / 2.0f);
const float freq_factor = has_ff ? freq_factors[i0 / 2] : 1.0f;
}
const int row_dst = (item_ct1.get_group(2) * item_ct1.get_local_range(2)) + item_ct1.get_local_id(2);
- if (i0 >= n_dims) {
- const int i = row_dst*ne0 + i0;
- *reinterpret_cast<sycl::vec<T, 2> *>(dst + i) = *reinterpret_cast<const sycl::vec<T, 2> *>(x + i);
- return;
- }
-
const int row_x = row_dst % ne1;
const int channel_x = row_dst / ne1;
const int idst = (row_dst * ne0) + (i0 / 2);
const size_t ix = ((size_t) channel_x * s2) + ((size_t) row_x * s1) + (i0 / 2);
+ if (i0 >= n_dims) {
+ *reinterpret_cast<sycl::vec<T, 2> *>(dst + idst + i0 / 2) = *reinterpret_cast<const sycl::vec<T, 2> *>(x + i0 / 2 + ix);
+ return;
+ }
+
const int sect_dims = sections.v[0] + sections.v[1] + sections.v[2] + sections.v[3];
const int sec_w = sections.v[1] + sections.v[0];
const int sector = (i0 / 2) % sect_dims;