Optimize display: DMA SPI, on-demand screenshots, faster scroll

Switched all SPI operations from spi_device_polling_transmit (CPU
busy-wait) to spi_device_transmit (DMA with FreeRTOS yield). The
CPU blocks on a semaphore instead of spinning, allowing ISRs to
run during the ~18ms frame transfer.

Removed per-frame 411KB screenshot buffer copy — now only copies
when a screenshot is actually requested via serial trigger.

Shortened tileview scroll snap animation from default ~300ms to
150ms for snappier transitions with fewer intermediate frames.

Async DMA (queue_trans with deferred flush_ready) was investigated
but spi_device_queue_trans conflicts with spi_device_transmit on
the same device — needs further investigation with a fully queued
pipeline (no mixed blocking/queued calls).
This commit is contained in:
GlassOnTin 2026-03-28 14:54:29 +00:00
commit 7366a671b4
2 changed files with 88 additions and 15 deletions

View file

@ -54,18 +54,18 @@ static spi_device_handle_t co5300_spi = NULL;
static bool co5300_ready = false;
static uint8_t co5300_brightness = 0;
// Send a command with optional data bytes via QSPI
// Send a command with optional data bytes via QSPI (DMA-based)
static void co5300_write_cmd(uint8_t cmd, uint8_t *data, uint32_t len) {
digitalWrite(DISP_CS, LOW);
spi_transaction_t t = {};
t.flags = SPI_TRANS_MULTILINE_CMD | SPI_TRANS_MULTILINE_ADDR;
t.cmd = 0x02; // QSPI write command
t.addr = cmd << 8; // Display command in address field
t.cmd = 0x02;
t.addr = cmd << 8;
if (len > 0 && data) {
t.tx_buffer = data;
t.length = 8 * len;
}
spi_device_polling_transmit(co5300_spi, &t);
spi_device_transmit(co5300_spi, &t);
digitalWrite(DISP_CS, HIGH);
}
@ -83,11 +83,10 @@ static void co5300_set_window(uint16_t x1, uint16_t y1, uint16_t x2, uint16_t y2
co5300_write_cmd(CO5300_CMD_RAMWR, NULL, 0);
}
// Push pixel data to the display (RGB565, big-endian)
// Push pixel data to the display (RGB565, DMA-based, blocking)
void co5300_push_pixels(uint16_t x, uint16_t y, uint16_t w, uint16_t h, uint16_t *pixels) {
if (!co5300_ready) return;
spi_device_acquire_bus(co5300_spi, portMAX_DELAY);
co5300_set_window(x, y, x + w - 1, y + h - 1);
uint32_t total = w * h;
@ -112,32 +111,96 @@ void co5300_push_pixels(uint16_t x, uint16_t y, uint16_t w, uint16_t h, uint16_t
}
t.base.tx_buffer = p;
t.base.length = chunk * 16;
spi_device_polling_transmit(co5300_spi, (spi_transaction_t *)&t);
spi_device_transmit(co5300_spi, (spi_transaction_t *)&t);
p += chunk;
total -= chunk;
}
digitalWrite(DISP_CS, HIGH);
}
spi_device_release_bus(co5300_spi);
// --- Async pixel push ---
// Queues all DMA transactions and returns immediately.
// co5300_push_done() returns true when all transactions complete.
// co5300_push_finish() blocks until complete.
#define CO5300_MAX_ASYNC_TXNS 14 // 410*502 / 16384 = ~13 chunks
static spi_transaction_ext_t co5300_async_txns[CO5300_MAX_ASYNC_TXNS];
static int co5300_async_queued = 0;
void co5300_push_pixels_start(uint16_t x, uint16_t y, uint16_t w, uint16_t h, uint16_t *pixels) {
if (!co5300_ready) return;
// Must not have pending async work
co5300_set_window(x, y, x + w - 1, y + h - 1);
uint32_t total = w * h;
uint16_t *p = pixels;
bool first = true;
co5300_async_queued = 0;
digitalWrite(DISP_CS, LOW);
while (total > 0 && co5300_async_queued < CO5300_MAX_ASYNC_TXNS) {
uint32_t chunk = (total > CO5300_SEND_BUF_SIZE) ? CO5300_SEND_BUF_SIZE : total;
int i = co5300_async_queued;
memset(&co5300_async_txns[i], 0, sizeof(spi_transaction_ext_t));
if (first) {
co5300_async_txns[i].base.flags = SPI_TRANS_MODE_QIO;
co5300_async_txns[i].base.cmd = 0x32;
co5300_async_txns[i].base.addr = 0x002C00;
first = false;
} else {
co5300_async_txns[i].base.flags = SPI_TRANS_MODE_QIO | SPI_TRANS_VARIABLE_CMD |
SPI_TRANS_VARIABLE_ADDR | SPI_TRANS_VARIABLE_DUMMY;
co5300_async_txns[i].command_bits = 0;
co5300_async_txns[i].address_bits = 0;
co5300_async_txns[i].dummy_bits = 0;
}
co5300_async_txns[i].base.tx_buffer = p;
co5300_async_txns[i].base.length = chunk * 16;
spi_device_queue_trans(co5300_spi, (spi_transaction_t *)&co5300_async_txns[i], portMAX_DELAY);
p += chunk;
total -= chunk;
co5300_async_queued++;
}
}
void co5300_push_finish() {
spi_transaction_t *rtrans;
while (co5300_async_queued > 0) {
spi_device_get_trans_result(co5300_spi, &rtrans, portMAX_DELAY);
co5300_async_queued--;
}
digitalWrite(DISP_CS, HIGH);
}
bool co5300_push_done() {
if (co5300_async_queued == 0) return true;
spi_transaction_t *rtrans;
// Non-blocking check
while (co5300_async_queued > 0) {
if (spi_device_get_trans_result(co5300_spi, &rtrans, 0) == ESP_OK) {
co5300_async_queued--;
} else {
return false; // Still in progress
}
}
digitalWrite(DISP_CS, HIGH);
return true;
}
// Fill a rectangle with a solid colour
void co5300_fill_rect(uint16_t x, uint16_t y, uint16_t w, uint16_t h, uint16_t color) {
if (!co5300_ready) return;
// Polling SPI doesn't need DMA memory — use PSRAM-capable heap
uint32_t total = w * h;
uint32_t buf_size = (total > CO5300_SEND_BUF_SIZE) ? CO5300_SEND_BUF_SIZE : total;
uint16_t *buf = (uint16_t *)heap_caps_malloc(buf_size * 2, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
if (!buf) {
// Fallback to internal RAM
buf = (uint16_t *)malloc(buf_size * 2);
if (!buf) return;
}
for (uint32_t i = 0; i < buf_size; i++) buf[i] = color;
spi_device_acquire_bus(co5300_spi, portMAX_DELAY);
co5300_set_window(x, y, x + w - 1, y + h - 1);
uint32_t remaining = total;
@ -160,11 +223,10 @@ void co5300_fill_rect(uint16_t x, uint16_t y, uint16_t w, uint16_t h, uint16_t c
}
t.base.tx_buffer = buf;
t.base.length = chunk * 16;
spi_device_polling_transmit(co5300_spi, (spi_transaction_t *)&t);
spi_device_transmit(co5300_spi, (spi_transaction_t *)&t);
remaining -= chunk;
}
digitalWrite(DISP_CS, HIGH);
spi_device_release_bus(co5300_spi);
heap_caps_free(buf);
}

15
Gui.h
View file

@ -116,6 +116,7 @@ static uint8_t gui_last_tile_row = 1;
// ---------------------------------------------------------------------------
// Shadow framebuffer for screenshots (RGB565 swapped / big-endian — same as display)
uint16_t *gui_screenshot_buf = NULL;
static volatile bool gui_screenshot_pending = false; // set true to capture next frame
static void gui_flush_cb(lv_display_t *disp, const lv_area_t *area, uint8_t *px_map) {
uint16_t x1 = area->x1;
@ -124,12 +125,13 @@ static void gui_flush_cb(lv_display_t *disp, const lv_area_t *area, uint8_t *px_
uint16_t h = area->y2 - area->y1 + 1;
uint16_t *pixels = (uint16_t *)px_map;
// Copy to shadow framebuffer for screenshots
if (gui_screenshot_buf) {
// Copy to shadow framebuffer only when screenshot requested
if (gui_screenshot_buf && gui_screenshot_pending) {
for (uint16_t row = 0; row < h; row++) {
memcpy(&gui_screenshot_buf[(y1 + row) * GUI_W + x1],
&pixels[row * w], w * sizeof(uint16_t));
}
gui_screenshot_pending = false;
}
co5300_push_pixels(x1, y1, w, h, pixels);
@ -572,6 +574,7 @@ bool gui_init() {
lv_obj_set_style_border_width(gui_tileview, 0, 0);
lv_obj_set_style_pad_all(gui_tileview, 0, 0);
lv_obj_set_scrollbar_mode(gui_tileview, LV_SCROLLBAR_MODE_OFF);
lv_obj_set_style_anim_duration(gui_tileview, 150, 0); // Snappy 150ms scroll snap
lv_obj_set_size(gui_tileview, GUI_W, GUI_H);
gui_tile_watch = lv_tileview_add_tile(gui_tileview, 1, 1, LV_DIR_ALL);
@ -618,6 +621,12 @@ void gui_check_screenshot_trigger(uint8_t byte_in) {
if (gui_ss_state == 4) {
gui_ss_state = 0;
if (gui_screenshot_buf) {
// Request capture of next rendered frame
gui_screenshot_pending = true;
// Wait for the next flush to capture (max 100ms)
uint32_t t0 = millis();
while (gui_screenshot_pending && millis() - t0 < 100) { delay(1); }
Serial.write(magic, 4);
uint16_t w = GUI_W, h = GUI_H;
Serial.write((uint8_t *)&w, 2);
@ -728,6 +737,8 @@ void gui_update() {
gui_update_data();
lv_timer_handler();
// After timer_handler, a new flush may have been queued via gui_flush_cb.
// DMA runs in background until next gui_update() call.
}
#endif // BOARD_MODEL == BOARD_TWATCH_ULT