diff options
| author | fschildt <florian.schildt@protonmail.com> | 2025-12-11 07:56:17 +0100 |
|---|---|---|
| committer | fschildt <florian.schildt@protonmail.com> | 2025-12-11 07:56:17 +0100 |
| commit | ef11873681a1eaf45f66a4c20cbc9863c4a19318 (patch) | |
| tree | 2ba4b52b476ddaf97af44a604c9610a0a6338267 /src/renderer/RSoftwareBackend.cpp | |
| parent | daa76b3841809f3b8ee7e15e76581595752d9217 (diff) | |
renderer: improve perf for clear (avx2) and rect
Diffstat (limited to 'src/renderer/RSoftwareBackend.cpp')
| -rw-r--r-- | src/renderer/RSoftwareBackend.cpp | 63 |
1 files changed, 46 insertions, 17 deletions
diff --git a/src/renderer/RSoftwareBackend.cpp b/src/renderer/RSoftwareBackend.cpp index 2606553..754a30f 100644 --- a/src/renderer/RSoftwareBackend.cpp +++ b/src/renderer/RSoftwareBackend.cpp @@ -8,6 +8,7 @@ #include <algorithm> #include <cstdlib> #include <cstdio> +#include <immintrin.h> RSoftwareBackend::RSoftwareBackend(Renderer& renderer) @@ -17,6 +18,8 @@ RSoftwareBackend::RSoftwareBackend(Renderer& renderer) m_canvas.gshift = 8; m_canvas.bshift = 16; m_canvas.ashift = 24; + m_canvas.w = 0; + m_canvas.h = 0; m_canvas.pixels = nullptr; @@ -77,16 +80,30 @@ RSoftwareBackend::Draw() void RSoftwareBackend::Resize(int32_t w, int32_t h) { - size_t realloc_size = (size_t)(w * h) * sizeof(m_canvas.pixels[0]); - void *realloc_data = realloc(m_canvas.pixels, realloc_size); - if (!realloc_data) { - printf("could not resize offscreen buffer\n"); + if ((m_canvas.w == w && m_canvas.h == h)) { return; } + size_t alignment = 32; + size_t new_size = static_cast<size_t>(w) * static_cast<size_t>(h) * sizeof(m_canvas.pixels[0]); + if (new_size == 0) { + _mm_free(m_canvas.pixels); + m_canvas.pixels = nullptr; + m_canvas.w = 0; + m_canvas.h = 0; + return; + } + + uint32_t* new_pixels = (uint32_t*)_mm_malloc(new_size, alignment); + if (!new_pixels) { + printf("_mm_malloc failed for resizing canvas\n"); + return; + } + + _mm_free(m_canvas.pixels); m_canvas.w = w; m_canvas.h = h; - m_canvas.pixels = (uint32_t*)realloc_data; + m_canvas.pixels = new_pixels; glViewport(0, 0, w, h); } @@ -109,14 +126,24 @@ RSoftwareBackend::DrawClear() uint32_t gshift = m_canvas.gshift; uint32_t bshift = m_canvas.bshift; - for (int32_t y = 0; y < m_canvas.h; y++) { - for (int32_t x = 0; x < m_canvas.w; x++) { - uint32_t r = (uint32_t)(color.r * 255.0f); - uint32_t g = (uint32_t)(color.g * 255.0f); - uint32_t b = (uint32_t)(color.b * 255.0f); - uint32_t val = r << rshift | g << gshift | b << bshift; - m_canvas.pixels[y*m_canvas.w + x] = val; - } + uint32_t r = static_cast<uint32_t>(color.r * 255.0f); + uint32_t g = static_cast<uint32_t>(color.g * 255.0f); + uint32_t b = static_cast<uint32_t>(color.b * 255.0f); + uint32_t val = r << rshift | g << gshift | b << bshift; + + size_t pixel_count = size_t(m_canvas.w) * size_t(m_canvas.h); + size_t chunk_count = pixel_count / 8; + size_t chunk_rest = pixel_count % 8; + uint32_t* pixels = m_canvas.pixels; + + __m256i vec_val = _mm256_set1_epi32((int32_t)val); + for (size_t i = 0; i < chunk_count; i++) { + _mm256_storeu_si256(reinterpret_cast<__m256i*>(pixels), vec_val); + pixels += 8; + } + + for (size_t i = 0; i < chunk_rest; i++) { + *pixels++ = val; } } @@ -144,13 +171,15 @@ RSoftwareBackend::DrawRectangle(REntity_Rectangle& entity) uint32_t rshift = m_canvas.rshift; uint32_t gshift = m_canvas.gshift; uint32_t bshift = m_canvas.bshift; + + uint32_t r = (uint32_t)(entity.color.r * 255.0f); + uint32_t g = (uint32_t)(entity.color.g * 255.0f); + uint32_t b = (uint32_t)(entity.color.b * 255.0f); + uint32_t val = r << rshift | g << gshift | b << bshift; + for (int32_t y = ymin; y <= ymax; ++y) { uint32_t *pixel = m_canvas.pixels + y * m_canvas.w + xmin; for (int32_t x = xmin; x <= xmax; ++x) { - uint32_t r = (uint32_t)(entity.color.r * 255.0f); - uint32_t g = (uint32_t)(entity.color.g * 255.0f); - uint32_t b = (uint32_t)(entity.color.b * 255.0f); - uint32_t val = r << rshift | g << gshift | b << bshift; *pixel++ = val; } } |
