#include "rawimage.h" #ifdef __SSE2__ #include template void fromPlanarSSE(const void *in, void *out, size_t count) { const __m128i *_in[4] = {(const __m128i*) static_cast(in), (const __m128i*)(static_cast(in) + count), (const __m128i*)(static_cast(in) + count*2), (const __m128i*)(static_cast(in) + count*3)}; __m128i *_out = (__m128i*)out; size_t s2 = count; if constexpr(sizeof(T) == 1) { count /= 16; __m128i a = _mm_set1_epi8(-1); for(size_t i = 0; i < count; i++) { __m128i r = _mm_loadu_si128(_in[0] + i); __m128i g = _mm_loadu_si128(_in[1] + i); __m128i b = _mm_loadu_si128(_in[2] + i); if constexpr(ch==4)a = _mm_loadu_si128(_in[3]); __m128i d1 = _mm_unpacklo_epi8(r, b); __m128i d2 = _mm_unpacklo_epi8(g, a); _mm_storeu_si128(_out + i*4, _mm_unpacklo_epi8(d1, d2)); _mm_storeu_si128(_out + i*4 + 1, _mm_unpackhi_epi8(d1, d2)); d1 = _mm_unpackhi_epi8(r, b); d2 = _mm_unpackhi_epi8(g, a); _mm_storeu_si128(_out + i*4 + 2, _mm_unpacklo_epi8(d1, d2)); _mm_storeu_si128(_out + i*4 + 3, _mm_unpackhi_epi8(d1, d2)); } count *= 16; } if constexpr(sizeof(T) == 2) { count /= 8; __m128i a = _mm_set1_epi16(-1); for(size_t i = 0; i < count; i++) { __m128i r = _mm_loadu_si128(_in[0] + i); __m128i g = _mm_loadu_si128(_in[1] + i); __m128i b = _mm_loadu_si128(_in[2] + i); if constexpr(ch==4)a = _mm_loadu_si128(_in[3]); __m128i d1 = _mm_unpacklo_epi16(r, b); __m128i d2 = _mm_unpacklo_epi16(g, a); _mm_storeu_si128(_out + i*4, _mm_unpacklo_epi16(d1, d2)); _mm_storeu_si128(_out + i*4 + 1, _mm_unpackhi_epi16(d1, d2)); d1 = _mm_unpackhi_epi16(r, b); d2 = _mm_unpackhi_epi16(g, a); _mm_storeu_si128(_out + i*4 + 2, _mm_unpacklo_epi16(d1, d2)); _mm_storeu_si128(_out + i*4 + 3, _mm_unpackhi_epi16(d1, d2)); } count *= 8; } if constexpr(sizeof(T) == 4) { count /= 4; __m128i a = _mm_set1_epi32(-1); if constexpr(!std::numeric_limits::is_integer)a = _mm_castps_si128(_mm_set1_ps(1.0)); for(size_t i = 0; i < count; i++) { __m128i r = _mm_loadu_si128(_in[0] + i); __m128i g = _mm_loadu_si128(_in[1] + i); __m128i b = _mm_loadu_si128(_in[2] + i); if constexpr(ch==4)a = _mm_loadu_si128(_in[3]); __m128i d1 = _mm_unpacklo_epi32(r, b); __m128i d2 = _mm_unpacklo_epi32(g, a); _mm_storeu_si128(_out + i*4, _mm_unpacklo_epi32(d1, d2)); _mm_storeu_si128(_out + i*4 + 1, _mm_unpackhi_epi32(d1, d2)); d1 = _mm_unpackhi_epi32(r, b); d2 = _mm_unpackhi_epi32(g, a); _mm_storeu_si128(_out + i*4 + 2, _mm_unpacklo_epi32(d1, d2)); _mm_storeu_si128(_out + i*4 + 3, _mm_unpackhi_epi32(d1, d2)); } count *= 4; } for(size_t i = count; i < s2; i++) { switch(sizeof(T)) { case 1: for(uint32_t o=0; o(out)[i + o] = static_cast(in)[i + o + s2]; if(ch==3)static_cast(out)[i*4 + 3] = 0xff; break; case 2: for(uint32_t o=0; o(out)[i + o] = static_cast(in)[i + o + s2]; if(ch==3)static_cast(out)[i*4 + 3] = 0xffff; break; case 4: for(uint32_t o=0; o(out)[i + o] = static_cast(in)[i + o + s2]; if(ch==3) { if(!std::numeric_limits::is_integer)static_cast(out)[i*4 + 3] = 1.0; else static_cast(out)[i*4 + 3] = 0xffffffff; } break; } } } template void fromPlanarSSE(const void *in, void *out, size_t count); template void fromPlanarSSE(const void *in, void *out, size_t count); template void fromPlanarSSE(const void *in, void *out, size_t count); template void fromPlanarSSE(const void *in, void *out, size_t count); template void fromPlanarSSE(const void *in, void *out, size_t count); template void fromPlanarSSE(const void *in, void *out, size_t count); template void fromPlanarSSE(const void *in, void *out, size_t count); template void fromPlanarSSE(const void *in, void *out, size_t count); #endif