From 7366a671b4e4a247dae0ba7a1e62894696aae194 Mon Sep 17 00:00:00 2001 From: GlassOnTin Date: Sat, 28 Mar 2026 14:54:29 +0000 Subject: [PATCH] Optimize display: DMA SPI, on-demand screenshots, faster scroll MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Switched all SPI operations from spi_device_polling_transmit (CPU busy-wait) to spi_device_transmit (DMA with FreeRTOS yield). The CPU blocks on a semaphore instead of spinning, allowing ISRs to run during the ~18ms frame transfer. Removed per-frame 411KB screenshot buffer copy — now only copies when a screenshot is actually requested via serial trigger. Shortened tileview scroll snap animation from default ~300ms to 150ms for snappier transitions with fewer intermediate frames. Async DMA (queue_trans with deferred flush_ready) was investigated but spi_device_queue_trans conflicts with spi_device_transmit on the same device — needs further investigation with a fully queued pipeline (no mixed blocking/queued calls). --- CO5300.h | 88 +++++++++++++++++++++++++++++++++++++++++++++++--------- Gui.h | 15 ++++++++-- 2 files changed, 88 insertions(+), 15 deletions(-) diff --git a/CO5300.h b/CO5300.h index 678be5c..1236838 100644 --- a/CO5300.h +++ b/CO5300.h @@ -54,18 +54,18 @@ static spi_device_handle_t co5300_spi = NULL; static bool co5300_ready = false; static uint8_t co5300_brightness = 0; -// Send a command with optional data bytes via QSPI +// Send a command with optional data bytes via QSPI (DMA-based) static void co5300_write_cmd(uint8_t cmd, uint8_t *data, uint32_t len) { digitalWrite(DISP_CS, LOW); spi_transaction_t t = {}; t.flags = SPI_TRANS_MULTILINE_CMD | SPI_TRANS_MULTILINE_ADDR; - t.cmd = 0x02; // QSPI write command - t.addr = cmd << 8; // Display command in address field + t.cmd = 0x02; + t.addr = cmd << 8; if (len > 0 && data) { t.tx_buffer = data; t.length = 8 * len; } - spi_device_polling_transmit(co5300_spi, &t); + spi_device_transmit(co5300_spi, &t); digitalWrite(DISP_CS, HIGH); } @@ -83,11 +83,10 @@ static void co5300_set_window(uint16_t x1, uint16_t y1, uint16_t x2, uint16_t y2 co5300_write_cmd(CO5300_CMD_RAMWR, NULL, 0); } -// Push pixel data to the display (RGB565, big-endian) +// Push pixel data to the display (RGB565, DMA-based, blocking) void co5300_push_pixels(uint16_t x, uint16_t y, uint16_t w, uint16_t h, uint16_t *pixels) { if (!co5300_ready) return; - spi_device_acquire_bus(co5300_spi, portMAX_DELAY); co5300_set_window(x, y, x + w - 1, y + h - 1); uint32_t total = w * h; @@ -112,32 +111,96 @@ void co5300_push_pixels(uint16_t x, uint16_t y, uint16_t w, uint16_t h, uint16_t } t.base.tx_buffer = p; t.base.length = chunk * 16; - spi_device_polling_transmit(co5300_spi, (spi_transaction_t *)&t); + spi_device_transmit(co5300_spi, (spi_transaction_t *)&t); p += chunk; total -= chunk; } digitalWrite(DISP_CS, HIGH); +} - spi_device_release_bus(co5300_spi); +// --- Async pixel push --- +// Queues all DMA transactions and returns immediately. +// co5300_push_done() returns true when all transactions complete. +// co5300_push_finish() blocks until complete. +#define CO5300_MAX_ASYNC_TXNS 14 // 410*502 / 16384 = ~13 chunks +static spi_transaction_ext_t co5300_async_txns[CO5300_MAX_ASYNC_TXNS]; +static int co5300_async_queued = 0; + +void co5300_push_pixels_start(uint16_t x, uint16_t y, uint16_t w, uint16_t h, uint16_t *pixels) { + if (!co5300_ready) return; + + // Must not have pending async work + co5300_set_window(x, y, x + w - 1, y + h - 1); + + uint32_t total = w * h; + uint16_t *p = pixels; + bool first = true; + co5300_async_queued = 0; + + digitalWrite(DISP_CS, LOW); + while (total > 0 && co5300_async_queued < CO5300_MAX_ASYNC_TXNS) { + uint32_t chunk = (total > CO5300_SEND_BUF_SIZE) ? CO5300_SEND_BUF_SIZE : total; + int i = co5300_async_queued; + memset(&co5300_async_txns[i], 0, sizeof(spi_transaction_ext_t)); + if (first) { + co5300_async_txns[i].base.flags = SPI_TRANS_MODE_QIO; + co5300_async_txns[i].base.cmd = 0x32; + co5300_async_txns[i].base.addr = 0x002C00; + first = false; + } else { + co5300_async_txns[i].base.flags = SPI_TRANS_MODE_QIO | SPI_TRANS_VARIABLE_CMD | + SPI_TRANS_VARIABLE_ADDR | SPI_TRANS_VARIABLE_DUMMY; + co5300_async_txns[i].command_bits = 0; + co5300_async_txns[i].address_bits = 0; + co5300_async_txns[i].dummy_bits = 0; + } + co5300_async_txns[i].base.tx_buffer = p; + co5300_async_txns[i].base.length = chunk * 16; + spi_device_queue_trans(co5300_spi, (spi_transaction_t *)&co5300_async_txns[i], portMAX_DELAY); + p += chunk; + total -= chunk; + co5300_async_queued++; + } +} + +void co5300_push_finish() { + spi_transaction_t *rtrans; + while (co5300_async_queued > 0) { + spi_device_get_trans_result(co5300_spi, &rtrans, portMAX_DELAY); + co5300_async_queued--; + } + digitalWrite(DISP_CS, HIGH); +} + +bool co5300_push_done() { + if (co5300_async_queued == 0) return true; + spi_transaction_t *rtrans; + // Non-blocking check + while (co5300_async_queued > 0) { + if (spi_device_get_trans_result(co5300_spi, &rtrans, 0) == ESP_OK) { + co5300_async_queued--; + } else { + return false; // Still in progress + } + } + digitalWrite(DISP_CS, HIGH); + return true; } // Fill a rectangle with a solid colour void co5300_fill_rect(uint16_t x, uint16_t y, uint16_t w, uint16_t h, uint16_t color) { if (!co5300_ready) return; - // Polling SPI doesn't need DMA memory — use PSRAM-capable heap uint32_t total = w * h; uint32_t buf_size = (total > CO5300_SEND_BUF_SIZE) ? CO5300_SEND_BUF_SIZE : total; uint16_t *buf = (uint16_t *)heap_caps_malloc(buf_size * 2, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); if (!buf) { - // Fallback to internal RAM buf = (uint16_t *)malloc(buf_size * 2); if (!buf) return; } for (uint32_t i = 0; i < buf_size; i++) buf[i] = color; - spi_device_acquire_bus(co5300_spi, portMAX_DELAY); co5300_set_window(x, y, x + w - 1, y + h - 1); uint32_t remaining = total; @@ -160,11 +223,10 @@ void co5300_fill_rect(uint16_t x, uint16_t y, uint16_t w, uint16_t h, uint16_t c } t.base.tx_buffer = buf; t.base.length = chunk * 16; - spi_device_polling_transmit(co5300_spi, (spi_transaction_t *)&t); + spi_device_transmit(co5300_spi, (spi_transaction_t *)&t); remaining -= chunk; } digitalWrite(DISP_CS, HIGH); - spi_device_release_bus(co5300_spi); heap_caps_free(buf); } diff --git a/Gui.h b/Gui.h index 7c686b6..f6fdc2f 100644 --- a/Gui.h +++ b/Gui.h @@ -116,6 +116,7 @@ static uint8_t gui_last_tile_row = 1; // --------------------------------------------------------------------------- // Shadow framebuffer for screenshots (RGB565 swapped / big-endian — same as display) uint16_t *gui_screenshot_buf = NULL; +static volatile bool gui_screenshot_pending = false; // set true to capture next frame static void gui_flush_cb(lv_display_t *disp, const lv_area_t *area, uint8_t *px_map) { uint16_t x1 = area->x1; @@ -124,12 +125,13 @@ static void gui_flush_cb(lv_display_t *disp, const lv_area_t *area, uint8_t *px_ uint16_t h = area->y2 - area->y1 + 1; uint16_t *pixels = (uint16_t *)px_map; - // Copy to shadow framebuffer for screenshots - if (gui_screenshot_buf) { + // Copy to shadow framebuffer only when screenshot requested + if (gui_screenshot_buf && gui_screenshot_pending) { for (uint16_t row = 0; row < h; row++) { memcpy(&gui_screenshot_buf[(y1 + row) * GUI_W + x1], &pixels[row * w], w * sizeof(uint16_t)); } + gui_screenshot_pending = false; } co5300_push_pixels(x1, y1, w, h, pixels); @@ -572,6 +574,7 @@ bool gui_init() { lv_obj_set_style_border_width(gui_tileview, 0, 0); lv_obj_set_style_pad_all(gui_tileview, 0, 0); lv_obj_set_scrollbar_mode(gui_tileview, LV_SCROLLBAR_MODE_OFF); + lv_obj_set_style_anim_duration(gui_tileview, 150, 0); // Snappy 150ms scroll snap lv_obj_set_size(gui_tileview, GUI_W, GUI_H); gui_tile_watch = lv_tileview_add_tile(gui_tileview, 1, 1, LV_DIR_ALL); @@ -618,6 +621,12 @@ void gui_check_screenshot_trigger(uint8_t byte_in) { if (gui_ss_state == 4) { gui_ss_state = 0; if (gui_screenshot_buf) { + // Request capture of next rendered frame + gui_screenshot_pending = true; + // Wait for the next flush to capture (max 100ms) + uint32_t t0 = millis(); + while (gui_screenshot_pending && millis() - t0 < 100) { delay(1); } + Serial.write(magic, 4); uint16_t w = GUI_W, h = GUI_H; Serial.write((uint8_t *)&w, 2); @@ -728,6 +737,8 @@ void gui_update() { gui_update_data(); lv_timer_handler(); + // After timer_handler, a new flush may have been queued via gui_flush_cb. + // DMA runs in background until next gui_update() call. } #endif // BOARD_MODEL == BOARD_TWATCH_ULT