294 lines
9.3 KiB
C++
294 lines
9.3 KiB
C++
#include "mainwindow.h"
|
||
|
||
#include "serfile.h"
|
||
#include <QApplication>
|
||
#include <QElapsedTimer>
|
||
#include <QThread>
|
||
#include <QMutex>
|
||
#include <QWaitCondition>
|
||
#include <QQueue>
|
||
#include <opencv2/opencv.hpp>
|
||
#include <complex>
|
||
#include <cmath>
|
||
|
||
#include <immintrin.h>
|
||
#include <vector>
|
||
#include <complex>
|
||
#include <cassert>
|
||
|
||
// Twiddle factor struct
|
||
struct Twiddle {
|
||
float re;
|
||
float im;
|
||
};
|
||
|
||
// Multiply (a + i b) * (c + i d) = (a*c - b*d) + i(a*d + b*c)
|
||
inline __m256 cmul_avx2(__m256 are, __m256 aim, __m256 bre, __m256 bim) {
|
||
// (are + i aim) * (bre + i bim)
|
||
__m256 ac = _mm256_mul_ps(are, bre);
|
||
__m256 bd = _mm256_mul_ps(aim, bim);
|
||
__m256 ad = _mm256_mul_ps(are, bim);
|
||
__m256 bc = _mm256_mul_ps(aim, bre);
|
||
// real = ac - bd
|
||
__m256 real = _mm256_sub_ps(ac, bd);
|
||
// imag = ad + bc
|
||
__m256 imag = _mm256_add_ps(ad, bc);
|
||
|
||
// We pack real, imag as [r0, i0, r1, i1, ..., r3, i3]
|
||
// But because we process 4 complex numbers in the vector, we need shuffling
|
||
// to interleave real and imag properly. But for simplicity, assume the calling
|
||
// code expects separate real & imag vectors, or we’ll implement interleaving.
|
||
// Here, return real in lower 128 bits & imag in upper, or some scheme.
|
||
// For clarity: return real in output lane 0‑127, imag in 128‑255.
|
||
// But better is to use separate vectors for real & imag, or AoS with shuffles.
|
||
|
||
// Pack: in low half real, in high half imag
|
||
return _mm256_blend_ps(real, imag, 0xF0);
|
||
// 0xF0 = upper 4 lanes from imag
|
||
}
|
||
|
||
// Precompute twiddles
|
||
static std::vector<Twiddle> make_twiddles(int N) {
|
||
std::vector<Twiddle> W(N/2);
|
||
const float PI = std::acos(-1.0f);
|
||
for(int k = 0; k < N/2; ++k) {
|
||
float angle = -2.0f * PI * k / N;
|
||
W[k].re = std::cos(angle);
|
||
W[k].im = std::sin(angle);
|
||
}
|
||
return W;
|
||
}
|
||
|
||
// Stockham FFT with AVX2 for complex<float> (AoS: interleaved real, imag)
|
||
void stockham_fft_avx2(std::complex<float>* data, std::complex<float>* temp,
|
||
int N, bool inverse = false)
|
||
{
|
||
assert((N & (N - 1)) == 0); // power of two
|
||
auto W = make_twiddles(N);
|
||
const float inv_sign = inverse ? +1.0f : -1.0f;
|
||
|
||
std::complex<float>* in = data;
|
||
std::complex<float>* out = temp;
|
||
|
||
int logN = 0;
|
||
while ((1 << logN) < N) ++logN;
|
||
|
||
for(int stage = 0; stage < logN; ++stage) {
|
||
int m = 1 << (stage + 1);
|
||
int half_m = m >> 1;
|
||
// stride between groups
|
||
int group_stride = N / m;
|
||
|
||
for(int k = 0; k < N; k += m) {
|
||
for(int j = 0; j < half_m; ++j) {
|
||
// twiddle W_index:
|
||
int w_index = j * group_stride;
|
||
float w_re = W[w_index].re;
|
||
float w_im = inv_sign * W[w_index].im; // invert sign for inverse
|
||
|
||
// Load w_re, w_im (we can broadcast them)
|
||
__m256 w_re_b = _mm256_set1_ps(w_re);
|
||
__m256 w_im_b = _mm256_set1_ps(w_im);
|
||
|
||
// Process 4 complex numbers at once in the j position strides
|
||
// The 4 complex numbers are from positions:
|
||
// in[k + j + 0*half_m], in[k + j + 1*half_m], in[k + j + 2*half_m], in[k + j + 3*half_m]
|
||
// But that depends on how many half_m, whether half_m >=4 etc.
|
||
// For simplicity, require half_m >=4 in vectorized branch.
|
||
|
||
if (half_m >= 4 && (j + 3*half_m) < N) {
|
||
// Load real parts
|
||
float *ptr_u = reinterpret_cast<float*>(&in[k + j]);
|
||
float *ptr_t0 = reinterpret_cast<float*>(&in[k + j + half_m]);
|
||
// Assuming interleaved: data layout: [Re0, Im0, Re1, Im1, ...]
|
||
// We need gather 4 complex u's and t's with step half_m.
|
||
|
||
// Load u (4 complex): u0, u1, u2, u3
|
||
__m256 u0 = _mm256_loadu_ps(reinterpret_cast<float*>(&in[k + j]));
|
||
__m256 t0 = _mm256_loadu_ps(reinterpret_cast<float*>(&in[k + j + half_m]));
|
||
|
||
// Complex multiply t0 * w
|
||
// Split t0 into re, im
|
||
__m256 t0_re = _mm256_shuffle_ps(t0, t0, _MM_SHUFFLE(2, 0, 2, 0)); // pick re lanes
|
||
__m256 t0_im = _mm256_shuffle_ps(t0, t0, _MM_SHUFFLE(3, 1, 3, 1)); // pick im
|
||
|
||
__m256 mul = cmul_avx2(t0_re, t0_im, w_re_b, w_im_b);
|
||
|
||
// Now compute:
|
||
// out[k/2 + j + 0] = u + t * w
|
||
// out[k/2 + j + N/2 + j] = u - t * w
|
||
// But with vector, we do elementwise addition/subtraction
|
||
|
||
__m256 sum = _mm256_add_ps(u0, mul);
|
||
__m256 diff = _mm256_sub_ps(u0, mul);
|
||
|
||
// Store sum and diff to their respective locations in out
|
||
// Need to compute positions:
|
||
|
||
// Position for “sum”:
|
||
std::complex<float>* out_sum = &out[k/2 + j];
|
||
std::complex<float>* out_diff = &out[k/2 + j + N/2];
|
||
|
||
// Store
|
||
_mm256_storeu_ps(reinterpret_cast<float*>(out_sum), sum);
|
||
_mm256_storeu_ps(reinterpret_cast<float*>(out_diff), diff);
|
||
|
||
} else {
|
||
// Fallback scalar for j's not fitting vectorization
|
||
auto u = in[k + j];
|
||
auto t = in[k + j + half_m] * std::complex<float>(w_re, w_im);
|
||
|
||
out[k/2 + j] = u + t;
|
||
out[k/2 + j + N/2] = u - t;
|
||
}
|
||
}
|
||
}
|
||
|
||
// Swap in/out buffers
|
||
std::swap(in, out);
|
||
}
|
||
|
||
// If number of stages is odd, data is currently in temp
|
||
if (logN & 1) {
|
||
for(int i = 0; i < N; ++i)
|
||
data[i] = in[i];
|
||
}
|
||
|
||
// Normalize for inverse
|
||
if (inverse) {
|
||
float invN = 1.0f / N;
|
||
for(int i = 0; i < N; ++i) {
|
||
data[i] *= invN;
|
||
}
|
||
}
|
||
}
|
||
|
||
|
||
void fft(std::vector<std::complex<float>> &x, bool inv = false)
|
||
{
|
||
const size_t N = x.size();
|
||
if (N <= 1) return;
|
||
|
||
// Bit-reversed addressing permutation
|
||
size_t j = 0;
|
||
for(size_t i = 1; i < N; ++i)
|
||
{
|
||
size_t bit = N >> 1;
|
||
while(j & bit)
|
||
{
|
||
j ^= bit;
|
||
bit >>= 1;
|
||
}
|
||
j ^= bit;
|
||
|
||
if (i < j)
|
||
{
|
||
std::swap(x[i], x[j]);
|
||
}
|
||
}
|
||
|
||
// Iterative FFT
|
||
for(size_t len = 2; len <= N; len <<= 1)
|
||
{
|
||
double angle = inv ? (2 * M_PI / len) : (-2 * M_PI / len);
|
||
std::complex<float> wlen(std::cos(angle), std::sin(angle));
|
||
for(size_t i = 0; i < N; i += len)
|
||
{
|
||
std::complex<float> w(1);
|
||
for(size_t j = 0; j < len / 2; ++j)
|
||
{
|
||
std::complex<float> u = x[i + j];
|
||
std::complex<float> v = x[i + j + len / 2] * w;
|
||
x[i + j] = u + v;
|
||
x[i + j + len / 2] = u - v;
|
||
w *= wlen;
|
||
}
|
||
}
|
||
}
|
||
|
||
if(inv)
|
||
{
|
||
for(size_t i = 0; i < N; i++)
|
||
x[i] /= N;
|
||
}
|
||
}
|
||
|
||
|
||
|
||
//光流
|
||
void opticalflow()
|
||
{
|
||
cv::Mat frame1, prvs;
|
||
SERFileReader ser;
|
||
ser.open("/media/data/indi_2025-09-29/indi_record_2025-09-29@17-29-08.ser");
|
||
frame1 = cv::Mat(ser.height(), ser.width(), CV_16U);
|
||
ser.getFrame(0, (char*)frame1.data);
|
||
|
||
cv::Mat frame2(ser.height(), ser.width(), CV_16U);
|
||
for(int i=0; i<ser.frameCount(); i++)
|
||
{
|
||
ser.getFrame(i, (char*)frame2.data);
|
||
if (frame2.empty())
|
||
break;
|
||
|
||
cv::Mat flow(prvs.size(), CV_32FC2);
|
||
cv::calcOpticalFlowFarneback(frame1, frame2, flow, 0.5, 3, 40, 3, 5, 1.2, 0);
|
||
|
||
|
||
std::vector<cv::Mat> flow_xy(2);
|
||
cv::split(flow, flow_xy);
|
||
cv::Mat flow_x = flow_xy[0];
|
||
cv::Mat flow_y = flow_xy[1];
|
||
|
||
// --- Build map_x and map_y for remapping
|
||
cv::Mat map_x(frame1.size(), CV_32FC1);
|
||
cv::Mat map_y(frame1.size(), CV_32FC1);
|
||
|
||
for (int y = 0; y < frame1.rows; y++) {
|
||
for (int x = 0; x < frame1.cols; x++) {
|
||
map_x.at<float>(y, x) = x + flow_x.at<float>(y, x);
|
||
map_y.at<float>(y, x) = y + flow_y.at<float>(y, x);
|
||
}
|
||
}
|
||
|
||
// --- Warp img1 to align it with img2 using the optical flow
|
||
cv::Mat warped;
|
||
cv::remap(frame2, warped, map_x, map_y, cv::INTER_LANCZOS4);
|
||
cv::imshow("orig", frame2);
|
||
cv::imshow("warp", warped);
|
||
/*int key = cv::waitKey(3);
|
||
if (key == 'q' || key == 27)
|
||
break;
|
||
continue;*/
|
||
|
||
// visualization
|
||
cv::Mat flow_parts[2];
|
||
split(flow, flow_parts);
|
||
cv::Mat magnitude, angle, magn_norm;
|
||
cv::cartToPolar(flow_parts[0], flow_parts[1], magnitude, angle, true);
|
||
cv::normalize(magnitude, magn_norm, 0.0f, 1.0f, cv::NORM_MINMAX);
|
||
cv::imshow("mag", magn_norm);
|
||
/*angle *= ((1.f / 360.f) * (180.f / 255.f));
|
||
//build hsv image
|
||
cv::Mat _hsv[3], hsv, hsv8, bgr;
|
||
_hsv[0] = angle;
|
||
_hsv[1] = cv::Mat::ones(angle.size(), CV_32F);
|
||
_hsv[2] = magn_norm;
|
||
merge(_hsv, 3, hsv);
|
||
hsv.convertTo(hsv8, CV_8U, 255.0);
|
||
cvtColor(hsv8, bgr, cv::COLOR_HSV2BGR);
|
||
imshow("frame2", bgr);*/
|
||
int keyboard = cv::waitKey(30);
|
||
if (keyboard == 'q' || keyboard == 27)
|
||
break;
|
||
}
|
||
}
|
||
|
||
int main(int argc, char *argv[])
|
||
{
|
||
QApplication a(argc, argv);
|
||
MainWindow w;
|
||
w.show();
|
||
return a.exec();
|
||
}
|