From 6aa3b9de8732ecc672eab15ac04262df40af24f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20du=20Hamel?= Date: Wed, 21 Jan 2026 18:12:13 +0100 Subject: [PATCH] improve handling of VAE decode failures --- examples/cli/main.cpp | 30 +++++++++++++++++++++--------- ggml_extend.hpp | 15 +++++++++------ stable-diffusion.cpp | 25 +++++++++++++++++-------- upscaler.cpp | 2 +- 4 files changed, 48 insertions(+), 24 deletions(-) diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp index ddc282817..6438fde16 100644 --- a/examples/cli/main.cpp +++ b/examples/cli/main.cpp @@ -409,7 +409,7 @@ bool save_results(const SDCliParams& cli_params, auto write_image = [&](const fs::path& path, int idx) { const sd_image_t& img = results[idx]; if (!img.data) - return; + return false; std::string params = get_image_params(cli_params, ctx_params, gen_params, gen_params.seed + idx); int ok = 0; @@ -419,8 +419,11 @@ bool save_results(const SDCliParams& cli_params, ok = stbi_write_png(path.string().c_str(), img.width, img.height, img.channel, img.data, 0, params.c_str()); } LOG_INFO("save result image %d to '%s' (%s)", idx, path.string().c_str(), ok ? "success" : "failure"); + return ok != 0; }; + int sucessful_reults = 0; + if (std::regex_search(cli_params.output_path, format_specifier_regex)) { if (!is_jpg && ext_lower != ".png") ext = ".png"; @@ -429,9 +432,12 @@ bool save_results(const SDCliParams& cli_params, for (int i = 0; i < num_results; ++i) { fs::path img_path = format_frame_idx(pattern.string(), output_begin_idx + i); - write_image(img_path, i); + if (write_image(img_path, i)) { + sucessful_reults++; + } } - return true; + LOG_INFO("%d/%d images saved", sucessful_reults, num_results); + return sucessful_reults != 0; } if (cli_params.mode == VID_GEN && num_results > 1) { @@ -439,9 +445,13 @@ bool save_results(const SDCliParams& cli_params, ext = ".avi"; fs::path video_path = base_path; video_path += ext; - create_mjpg_avi_from_sd_images(video_path.string().c_str(), results, num_results, gen_params.fps); - LOG_INFO("save result MJPG AVI video to '%s'", video_path.string().c_str()); - return true; + if (create_mjpg_avi_from_sd_images(video_path.string().c_str(), results, num_results, gen_params.fps) == 0) { + LOG_INFO("save result MJPG AVI video to '%s'", video_path.string().c_str()); + return true; + } else { + LOG_ERROR("Failed to save result MPG AVI video to '%s'", video_path.string().c_str()); + return false; + } } if (!is_jpg && ext_lower != ".png") @@ -453,10 +463,12 @@ bool save_results(const SDCliParams& cli_params, img_path += "_" + std::to_string(output_begin_idx + i); } img_path += ext; - write_image(img_path, i); + if (write_image(img_path, i)) { + sucessful_reults++; + } } - - return true; + LOG_INFO("%d/%d images saved", sucessful_reults, num_results); + return sucessful_reults != 0; } int main(int argc, const char* argv[]) { diff --git a/ggml_extend.hpp b/ggml_extend.hpp index 9d5ea316b..0b84b02ee 100644 --- a/ggml_extend.hpp +++ b/ggml_extend.hpp @@ -778,7 +778,7 @@ __STATIC_INLINE__ ggml_tensor* ggml_ext_silu_act(ggml_context* ctx, ggml_tensor* return x; } -typedef std::function on_tile_process; +typedef std::function on_tile_process; __STATIC_INLINE__ void sd_tiling_calc_tiles(int& num_tiles_dim, float& tile_overlap_factor_dim, @@ -929,12 +929,15 @@ __STATIC_INLINE__ void sd_tiling_non_square(ggml_tensor* input, int64_t t1 = ggml_time_ms(); ggml_ext_tensor_split_2d(input, input_tile, x_in, y_in); - on_processing(input_tile, output_tile, false); - ggml_ext_tensor_merge_2d(output_tile, output, x_out, y_out, overlap_x_out, overlap_y_out, dx, dy); + if (on_processing(input_tile, output_tile, false)) { + ggml_ext_tensor_merge_2d(output_tile, output, x_out, y_out, overlap_x_out, overlap_y_out, dx, dy); - int64_t t2 = ggml_time_ms(); - last_time = (t2 - t1) / 1000.0f; - pretty_progress(tile_count, num_tiles, last_time); + int64_t t2 = ggml_time_ms(); + last_time = (t2 - t1) / 1000.0f; + pretty_progress(tile_count, num_tiles, last_time); + } else { + LOG_ERROR("Failed to process patch %d at (%d, %d)", tile_count, x, y); + } tile_count++; } last_x = false; diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index b181f994b..803d3f4f6 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -1542,7 +1542,7 @@ class StableDiffusionGGML { if (vae_tiling_params.enabled) { // split latent in 32x32 tiles and compute in several steps auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) { - first_stage_model->compute(n_threads, in, true, &out, nullptr); + return first_stage_model->compute(n_threads, in, true, &out, nullptr); }; silent_tiling(latents, result, get_vae_scale_factor(), 32, 0.5f, on_tiling); @@ -1561,7 +1561,7 @@ class StableDiffusionGGML { if (vae_tiling_params.enabled) { // split latent in 64x64 tiles and compute in several steps auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) { - tae_first_stage->compute(n_threads, in, true, &out, nullptr); + return tae_first_stage->compute(n_threads, in, true, &out, nullptr); }; silent_tiling(latents, result, get_vae_scale_factor(), 64, 0.5f, on_tiling); } else { @@ -2530,7 +2530,7 @@ class StableDiffusionGGML { LOG_DEBUG("VAE Tile size: %dx%d", tile_size_x, tile_size_y); auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) { - first_stage_model->compute(n_threads, in, false, &out, work_ctx); + return first_stage_model->compute(n_threads, in, false, &out, work_ctx); }; sd_tiling_non_square(x, result, vae_scale_factor, tile_size_x, tile_size_y, tile_overlap, on_tiling); } else { @@ -2541,7 +2541,7 @@ class StableDiffusionGGML { if (vae_tiling_params.enabled && !encode_video) { // split latent in 32x32 tiles and compute in several steps auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) { - tae_first_stage->compute(n_threads, in, false, &out, nullptr); + return tae_first_stage->compute(n_threads, in, false, &out, nullptr); }; sd_tiling(x, result, vae_scale_factor, 64, 0.5f, on_tiling); } else { @@ -2659,11 +2659,15 @@ class StableDiffusionGGML { // split latent in 32x32 tiles and compute in several steps auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) { - first_stage_model->compute(n_threads, in, true, &out, nullptr); + return first_stage_model->compute(n_threads, in, true, &out, nullptr); }; sd_tiling_non_square(x, result, vae_scale_factor, tile_size_x, tile_size_y, tile_overlap, on_tiling); } else { - first_stage_model->compute(n_threads, x, true, &result, work_ctx); + if(!first_stage_model->compute(n_threads, x, true, &result, work_ctx)){ + LOG_ERROR("Failed to decode latetnts"); + first_stage_model->free_compute_buffer(); + return nullptr; + } } first_stage_model->free_compute_buffer(); process_vae_output_tensor(result); @@ -2671,11 +2675,15 @@ class StableDiffusionGGML { if (vae_tiling_params.enabled) { // split latent in 64x64 tiles and compute in several steps auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) { - tae_first_stage->compute(n_threads, in, true, &out); + return tae_first_stage->compute(n_threads, in, true, &out); }; sd_tiling(x, result, vae_scale_factor, 64, 0.5f, on_tiling); } else { - tae_first_stage->compute(n_threads, x, true, &result); + if(!tae_first_stage->compute(n_threads, x, true, &result)){ + LOG_ERROR("Failed to decode latetnts"); + tae_first_stage->free_compute_buffer(); + return nullptr; + } } tae_first_stage->free_compute_buffer(); } @@ -3440,6 +3448,7 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx, ggml_free(work_ctx); return nullptr; } + memset(result_images, 0, batch_count * sizeof(sd_image_t)); for (size_t i = 0; i < decoded_images.size(); i++) { result_images[i].width = width; diff --git a/upscaler.cpp b/upscaler.cpp index 29ac981e6..fd0dc8242 100644 --- a/upscaler.cpp +++ b/upscaler.cpp @@ -89,7 +89,7 @@ struct UpscalerGGML { ggml_tensor* upscaled = ggml_new_tensor_4d(upscale_ctx, GGML_TYPE_F32, output_width, output_height, 3, 1); auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) { - esrgan_upscaler->compute(n_threads, in, &out); + return esrgan_upscaler->compute(n_threads, in, &out); }; int64_t t0 = ggml_time_ms(); sd_tiling(input_image_tensor, upscaled, esrgan_upscaler->scale, esrgan_upscaler->tile_size, 0.25f, on_tiling);