aboutsummaryrefslogtreecommitdiff
path: root/src/renderer
diff options
context:
space:
mode:
authorfschildt <florian.schildt@protonmail.com>2025-12-11 07:56:17 +0100
committerfschildt <florian.schildt@protonmail.com>2025-12-11 07:56:17 +0100
commitef11873681a1eaf45f66a4c20cbc9863c4a19318 (patch)
tree2ba4b52b476ddaf97af44a604c9610a0a6338267 /src/renderer
parentdaa76b3841809f3b8ee7e15e76581595752d9217 (diff)
renderer: improve perf for clear (avx2) and rect
Diffstat (limited to 'src/renderer')
-rw-r--r--src/renderer/RSoftwareBackend.cpp63
1 files changed, 46 insertions, 17 deletions
diff --git a/src/renderer/RSoftwareBackend.cpp b/src/renderer/RSoftwareBackend.cpp
index 2606553..754a30f 100644
--- a/src/renderer/RSoftwareBackend.cpp
+++ b/src/renderer/RSoftwareBackend.cpp
@@ -8,6 +8,7 @@
#include <algorithm>
#include <cstdlib>
#include <cstdio>
+#include <immintrin.h>
RSoftwareBackend::RSoftwareBackend(Renderer& renderer)
@@ -17,6 +18,8 @@ RSoftwareBackend::RSoftwareBackend(Renderer& renderer)
m_canvas.gshift = 8;
m_canvas.bshift = 16;
m_canvas.ashift = 24;
+ m_canvas.w = 0;
+ m_canvas.h = 0;
m_canvas.pixels = nullptr;
@@ -77,16 +80,30 @@ RSoftwareBackend::Draw()
void
RSoftwareBackend::Resize(int32_t w, int32_t h)
{
- size_t realloc_size = (size_t)(w * h) * sizeof(m_canvas.pixels[0]);
- void *realloc_data = realloc(m_canvas.pixels, realloc_size);
- if (!realloc_data) {
- printf("could not resize offscreen buffer\n");
+ if ((m_canvas.w == w && m_canvas.h == h)) {
return;
}
+ size_t alignment = 32;
+ size_t new_size = static_cast<size_t>(w) * static_cast<size_t>(h) * sizeof(m_canvas.pixels[0]);
+ if (new_size == 0) {
+ _mm_free(m_canvas.pixels);
+ m_canvas.pixels = nullptr;
+ m_canvas.w = 0;
+ m_canvas.h = 0;
+ return;
+ }
+
+ uint32_t* new_pixels = (uint32_t*)_mm_malloc(new_size, alignment);
+ if (!new_pixels) {
+ printf("_mm_malloc failed for resizing canvas\n");
+ return;
+ }
+
+ _mm_free(m_canvas.pixels);
m_canvas.w = w;
m_canvas.h = h;
- m_canvas.pixels = (uint32_t*)realloc_data;
+ m_canvas.pixels = new_pixels;
glViewport(0, 0, w, h);
}
@@ -109,14 +126,24 @@ RSoftwareBackend::DrawClear()
uint32_t gshift = m_canvas.gshift;
uint32_t bshift = m_canvas.bshift;
- for (int32_t y = 0; y < m_canvas.h; y++) {
- for (int32_t x = 0; x < m_canvas.w; x++) {
- uint32_t r = (uint32_t)(color.r * 255.0f);
- uint32_t g = (uint32_t)(color.g * 255.0f);
- uint32_t b = (uint32_t)(color.b * 255.0f);
- uint32_t val = r << rshift | g << gshift | b << bshift;
- m_canvas.pixels[y*m_canvas.w + x] = val;
- }
+ uint32_t r = static_cast<uint32_t>(color.r * 255.0f);
+ uint32_t g = static_cast<uint32_t>(color.g * 255.0f);
+ uint32_t b = static_cast<uint32_t>(color.b * 255.0f);
+ uint32_t val = r << rshift | g << gshift | b << bshift;
+
+ size_t pixel_count = size_t(m_canvas.w) * size_t(m_canvas.h);
+ size_t chunk_count = pixel_count / 8;
+ size_t chunk_rest = pixel_count % 8;
+ uint32_t* pixels = m_canvas.pixels;
+
+ __m256i vec_val = _mm256_set1_epi32((int32_t)val);
+ for (size_t i = 0; i < chunk_count; i++) {
+ _mm256_storeu_si256(reinterpret_cast<__m256i*>(pixels), vec_val);
+ pixels += 8;
+ }
+
+ for (size_t i = 0; i < chunk_rest; i++) {
+ *pixels++ = val;
}
}
@@ -144,13 +171,15 @@ RSoftwareBackend::DrawRectangle(REntity_Rectangle& entity)
uint32_t rshift = m_canvas.rshift;
uint32_t gshift = m_canvas.gshift;
uint32_t bshift = m_canvas.bshift;
+
+ uint32_t r = (uint32_t)(entity.color.r * 255.0f);
+ uint32_t g = (uint32_t)(entity.color.g * 255.0f);
+ uint32_t b = (uint32_t)(entity.color.b * 255.0f);
+ uint32_t val = r << rshift | g << gshift | b << bshift;
+
for (int32_t y = ymin; y <= ymax; ++y) {
uint32_t *pixel = m_canvas.pixels + y * m_canvas.w + xmin;
for (int32_t x = xmin; x <= xmax; ++x) {
- uint32_t r = (uint32_t)(entity.color.r * 255.0f);
- uint32_t g = (uint32_t)(entity.color.g * 255.0f);
- uint32_t b = (uint32_t)(entity.color.b * 255.0f);
- uint32_t val = r << rshift | g << gshift | b << bshift;
*pixel++ = val;
}
}