diff --git a/src/rgbcx.cpp b/src/rgbcx.cpp index cd5291f..85f47b1 100644 --- a/src/rgbcx.cpp +++ b/src/rgbcx.cpp @@ -2,16 +2,19 @@ // High-performance scalar BC1-5 encoders. Public Domain or MIT license (you choose - see below), written by Richard Geldreich 2020 . #include "rgbcx.h" -#include "blocks.h" -#include "color.h" -#include "tables.h" -#include "util.h" + #include #include #include +#include #include #include +#include "blocks.h" +#include "color.h" +#include "tables.h" +#include "util.h" + namespace rgbcx { static const uint32_t TOTAL_ORDER_4_0_16 = 15; @@ -40,14 +43,10 @@ struct hist4 { } inline bool operator==(const hist4 &h) const { - if (m_hist[0] != h.m_hist[0]) - return false; - if (m_hist[1] != h.m_hist[1]) - return false; - if (m_hist[2] != h.m_hist[2]) - return false; - if (m_hist[3] != h.m_hist[3]) - return false; + if (m_hist[0] != h.m_hist[0]) return false; + if (m_hist[1] != h.m_hist[1]) return false; + if (m_hist[2] != h.m_hist[2]) return false; + if (m_hist[3] != h.m_hist[3]) return false; return true; } @@ -80,12 +79,9 @@ struct hist3 { } inline bool operator==(const hist3 &h) const { - if (m_hist[0] != h.m_hist[0]) - return false; - if (m_hist[1] != h.m_hist[1]) - return false; - if (m_hist[2] != h.m_hist[2]) - return false; + if (m_hist[0] != h.m_hist[0]) return false; + if (m_hist[1] != h.m_hist[1]) return false; + if (m_hist[2] != h.m_hist[2]) return false; return true; } @@ -161,15 +157,15 @@ static inline int interp_half_5_6_amd(int c0, int c1) { static inline int interp_5(int v0, int v1, int c0, int c1, bc1_approx_mode mode) { // assert(scale_5_to_8(v0) == c0 && scale5To8(v1) == c1); switch (mode) { - case bc1_approx_mode::cBC1NVidia: - return interp_5_nv(v0, v1); - case bc1_approx_mode::cBC1AMD: - return interp_5_6_amd(c0, c1); - default: - case bc1_approx_mode::cBC1Ideal: - return interp_5_6_ideal(c0, c1); - case bc1_approx_mode::cBC1IdealRound4: - return interp_5_6_ideal_round(c0, c1); + case bc1_approx_mode::cBC1NVidia: + return interp_5_nv(v0, v1); + case bc1_approx_mode::cBC1AMD: + return interp_5_6_amd(c0, c1); + default: + case bc1_approx_mode::cBC1Ideal: + return interp_5_6_ideal(c0, c1); + case bc1_approx_mode::cBC1IdealRound4: + return interp_5_6_ideal_round(c0, c1); } } @@ -178,29 +174,29 @@ static inline int interp_6(int v0, int v1, int c0, int c1, bc1_approx_mode mode) (void)v1; // assert(scale_6_to_8(v0) == c0 && scale6To8(v1) == c1); switch (mode) { - case bc1_approx_mode::cBC1NVidia: - return interp_6_nv(c0, c1); - case bc1_approx_mode::cBC1AMD: - return interp_5_6_amd(c0, c1); - default: - case bc1_approx_mode::cBC1Ideal: - return interp_5_6_ideal(c0, c1); - case bc1_approx_mode::cBC1IdealRound4: - return interp_5_6_ideal_round(c0, c1); + case bc1_approx_mode::cBC1NVidia: + return interp_6_nv(c0, c1); + case bc1_approx_mode::cBC1AMD: + return interp_5_6_amd(c0, c1); + default: + case bc1_approx_mode::cBC1Ideal: + return interp_5_6_ideal(c0, c1); + case bc1_approx_mode::cBC1IdealRound4: + return interp_5_6_ideal_round(c0, c1); } } static inline int interp_half_5(int v0, int v1, int c0, int c1, bc1_approx_mode mode) { assert(scale5To8(v0) == c0 && scale5To8(v1) == c1); switch (mode) { - case bc1_approx_mode::cBC1NVidia: - return interp_half_5_nv(v0, v1); - case bc1_approx_mode::cBC1AMD: - return interp_half_5_6_amd(c0, c1); - case bc1_approx_mode::cBC1Ideal: - case bc1_approx_mode::cBC1IdealRound4: - default: - return interp_half_5_6_ideal(c0, c1); + case bc1_approx_mode::cBC1NVidia: + return interp_half_5_nv(v0, v1); + case bc1_approx_mode::cBC1AMD: + return interp_half_5_6_amd(c0, c1); + case bc1_approx_mode::cBC1Ideal: + case bc1_approx_mode::cBC1IdealRound4: + default: + return interp_half_5_6_ideal(c0, c1); } } @@ -209,14 +205,14 @@ static inline int interp_half_6(int v0, int v1, int c0, int c1, bc1_approx_mode (void)v1; assert(scale6To8(v0) == c0 && scale6To8(v1) == c1); switch (mode) { - case bc1_approx_mode::cBC1NVidia: - return interp_half_6_nv(c0, c1); - case bc1_approx_mode::cBC1AMD: - return interp_half_5_6_amd(c0, c1); - case bc1_approx_mode::cBC1Ideal: - case bc1_approx_mode::cBC1IdealRound4: - default: - return interp_half_5_6_ideal(c0, c1); + case bc1_approx_mode::cBC1NVidia: + return interp_half_6_nv(c0, c1); + case bc1_approx_mode::cBC1AMD: + return interp_half_5_6_amd(c0, c1); + case bc1_approx_mode::cBC1Ideal: + case bc1_approx_mode::cBC1IdealRound4: + default: + return interp_half_5_6_ideal(c0, c1); } } @@ -234,8 +230,7 @@ static void prepare_bc1_single_color_table_half(bc1_match_entry *pTable, const u int e = iabs(v - i); // We only need to factor in 3% error in BC1 ideal mode. - if ((mode == bc1_approx_mode::cBC1Ideal) || (mode == bc1_approx_mode::cBC1IdealRound4)) - e += (iabs(hi_e - lo_e) * 3) / 100; + if ((mode == bc1_approx_mode::cBC1Ideal) || (mode == bc1_approx_mode::cBC1IdealRound4)) e += (iabs(hi_e - lo_e) * 3) / 100; // Favor equal endpoints, for lower error on actual GPU's which approximate the interpolation. if ((e < lowest_e) || ((e == lowest_e) && (lo == hi))) { @@ -248,8 +243,8 @@ static void prepare_bc1_single_color_table_half(bc1_match_entry *pTable, const u lowest_e = e; } - } // hi - } // lo + } // hi + } // lo } } @@ -266,8 +261,7 @@ static void prepare_bc1_single_color_table(bc1_match_entry *pTable, const uint8_ int e = iabs(v - i); - if ((mode == bc1_approx_mode::cBC1Ideal) || (mode == bc1_approx_mode::cBC1IdealRound4)) - e += (iabs(hi_e - lo_e) * 3) / 100; + if ((mode == bc1_approx_mode::cBC1Ideal) || (mode == bc1_approx_mode::cBC1IdealRound4)) e += (iabs(hi_e - lo_e) * 3) / 100; // Favor equal endpoints, for lower error on actual GPU's which approximate the interpolation. if ((e < lowest_e) || ((e == lowest_e) && (lo == hi))) { @@ -280,8 +274,8 @@ static void prepare_bc1_single_color_table(bc1_match_entry *pTable, const uint8_ lowest_e = e; } - } // hi - } // lo + } // hi + } // lo } } @@ -294,8 +288,7 @@ static const uint32_t g_weight_vals3[3] = {0x000004, 0x040000, 0x010101}; static inline void compute_selector_factors4(const hist4 &h, float &iz00, float &iz10, float &iz11) { uint32_t weight_accum = 0; - for (uint32_t sel = 0; sel < 4; sel++) - weight_accum += g_weight_vals4[sel] * h.m_hist[sel]; + for (uint32_t sel = 0; sel < 4; sel++) weight_accum += g_weight_vals4[sel] * h.m_hist[sel]; float z00 = (float)((weight_accum >> 16) & 0xFF); float z10 = (float)((weight_accum >> 8) & 0xFF); @@ -315,8 +308,7 @@ static inline void compute_selector_factors4(const hist4 &h, float &iz00, float static inline void compute_selector_factors3(const hist3 &h, float &iz00, float &iz10, float &iz11) { uint32_t weight_accum = 0; - for (uint32_t sel = 0; sel < 3; sel++) - weight_accum += g_weight_vals3[sel] * h.m_hist[sel]; + for (uint32_t sel = 0; sel < 3; sel++) weight_accum += g_weight_vals3[sel] * h.m_hist[sel]; float z00 = (float)((weight_accum >> 16) & 0xFF); float z10 = (float)((weight_accum >> 8) & 0xFF); @@ -340,14 +332,12 @@ void init(bc1_approx_mode mode) { g_bc1_approx_mode = mode; uint8_t bc1_expand5[32]; - for (int i = 0; i < 32; i++) - bc1_expand5[i] = static_cast((i << 3) | (i >> 2)); + for (int i = 0; i < 32; i++) bc1_expand5[i] = static_cast((i << 3) | (i >> 2)); prepare_bc1_single_color_table(g_bc1_match5_equals_1, bc1_expand5, 32, mode); prepare_bc1_single_color_table_half(g_bc1_match5_half, bc1_expand5, 32, mode); uint8_t bc1_expand6[64]; - for (int i = 0; i < 64; i++) - bc1_expand6[i] = static_cast((i << 2) | (i >> 4)); + for (int i = 0; i < 64; i++) bc1_expand6[i] = static_cast((i << 2) | (i >> 4)); prepare_bc1_single_color_table(g_bc1_match6_equals_1, bc1_expand6, 64, mode); prepare_bc1_single_color_table_half(g_bc1_match6_half, bc1_expand6, 64, mode); @@ -399,8 +389,7 @@ void encode_bc1_solid_block(void *pDst, uint32_t fr, uint32_t fg, uint32_t fb, b max16 = (g_bc1_match5_half[fr].m_hi << 11) | (g_bc1_match6_half[fg].m_hi << 5) | g_bc1_match5_half[fb].m_hi; min16 = (g_bc1_match5_half[fr].m_lo << 11) | (g_bc1_match6_half[fg].m_lo << 5) | g_bc1_match5_half[fb].m_lo; - if (max16 > min16) - std::swap(max16, min16); + if (max16 > min16) std::swap(max16, min16); } } @@ -505,8 +494,7 @@ static inline bool compute_least_squares_endpoints4_rgb(const Color32 *pColors, float z01 = z10; float det = z00 * z11 - z01 * z10; - if (fabs(det) < 1e-8f) - return false; + if (fabs(det) < 1e-8f) return false; det = (3.0f / 255.0f) / det; @@ -560,14 +548,12 @@ static inline bool compute_least_squares_endpoints3_rgb(bool use_black, const Co for (uint32_t i = 0; i < 16; i++) { const uint8_t r = pColors[i].C[0], g = pColors[i].C[1], b = pColors[i].C[2]; if (use_black) { - if ((r | g | b) < 4) - continue; + if ((r | g | b) < 4) continue; } const uint8_t sel = pSelectors[i]; assert(sel <= 3); - if (sel == 3) - continue; + if (sel == 3) continue; weight_accum += g_weight_vals3[sel]; @@ -592,8 +578,7 @@ static inline bool compute_least_squares_endpoints3_rgb(bool use_black, const Co float z01 = z10; float det = z00 * z11 - z01 * z10; - if (fabs(det) < 1e-8f) - return false; + if (fabs(det) < 1e-8f) return false; det = (2.0f / 255.0f) / det; @@ -687,8 +672,7 @@ static inline void bc1_find_sels4_noerr(const Color32 *pSrc_pixels, uint32_t lr, int ar = block_r[3] - block_r[0], ag = block_g[3] - block_g[0], ab = block_b[3] - block_b[0]; int dots[4]; - for (uint32_t i = 0; i < 4; i++) - dots[i] = (int)block_r[i] * ar + (int)block_g[i] * ag + (int)block_b[i] * ab; + for (uint32_t i = 0; i < 4; i++) dots[i] = (int)block_r[i] * ar + (int)block_g[i] * ag + (int)block_b[i] * ab; int t0 = dots[0] + dots[1], t1 = dots[1] + dots[2], t2 = dots[2] + dots[3]; @@ -719,8 +703,7 @@ static inline uint32_t bc1_find_sels4_fasterr(const Color32 *pSrc_pixels, uint32 int ar = block_r[3] - block_r[0], ag = block_g[3] - block_g[0], ab = block_b[3] - block_b[0]; int dots[4]; - for (uint32_t i = 0; i < 4; i++) - dots[i] = (int)block_r[i] * ar + (int)block_g[i] * ag + (int)block_b[i] * ab; + for (uint32_t i = 0; i < 4; i++) dots[i] = (int)block_r[i] * ar + (int)block_g[i] * ag + (int)block_b[i] * ab; int t0 = dots[0] + dots[1], t1 = dots[1] + dots[2], t2 = dots[2] + dots[3]; @@ -757,8 +740,7 @@ static inline uint32_t bc1_find_sels4_fasterr(const Color32 *pSrc_pixels, uint32 total_err += squarei(pSrc_pixels[i + 3].R - block_r[sel3]) + squarei(pSrc_pixels[i + 3].G - block_g[sel3]) + squarei(pSrc_pixels[i + 3].B - block_b[sel3]); - if (total_err >= cur_err) - break; + if (total_err >= cur_err) break; } return total_err; @@ -790,8 +772,7 @@ static inline uint32_t bc1_find_sels4_check2_err(const Color32 *pSrc_pixels, uin uint32_t best_err = err1; if (err0 == err1) { // Prefer non-interpolation - if ((best_sel - 1) == 0) - best_sel = 0; + if ((best_sel - 1) == 0) best_sel = 0; } else if (err0 < best_err) { best_sel = sel - 1; best_err = err0; @@ -799,8 +780,7 @@ static inline uint32_t bc1_find_sels4_check2_err(const Color32 *pSrc_pixels, uin total_err += best_err; - if (total_err >= cur_err) - break; + if (total_err >= cur_err) break; sels[i] = (uint8_t)best_sel; } @@ -832,8 +812,7 @@ static inline uint32_t bc1_find_sels4_fullerr(const Color32 *pSrc_pixels, uint32 total_err += best_err; - if (total_err >= cur_err) - break; + if (total_err >= cur_err) break; sels[i] = (uint8_t)best_sel; } @@ -890,8 +869,7 @@ static inline uint32_t bc1_find_sels3_fullerr(bool use_black, const Color32 *pSr } total_err += best_err; - if (total_err >= cur_err) - return total_err; + if (total_err >= cur_err) return total_err; sels[i] = (uint8_t)best_sel; } @@ -960,7 +938,7 @@ static inline void bc1_encode4(BC1Block *pDst_block, int lr, int lg, int lb, int hc16 = 0; lc16 = 1; - mask = 0x55; // select hc16 + mask = 0x55; // select hc16 } assert(lc16 > hc16); @@ -984,8 +962,7 @@ static inline void bc1_encode4(BC1Block *pDst_block, int lr, int lg, int lb, int uint32_t packed_sels = 0; static const uint8_t s_sel_trans[4] = {0, 2, 3, 1}; - for (uint32_t i = 0; i < 16; i++) - packed_sels |= ((uint32_t)s_sel_trans[sels[i]] << (i * 2)); + for (uint32_t i = 0; i < 16; i++) packed_sels |= ((uint32_t)s_sel_trans[sels[i]] << (i * 2)); // todo: make this less silly to prevent packing and unpacking pDst_block->selectors[0] = (uint8_t)packed_sels ^ invert_mask; @@ -1015,11 +992,9 @@ static inline void bc1_encode3(BC1Block *pDst_block, int lr, int lg, int lb, int if (invert_flag) { static const uint8_t s_sel_trans_inv[4] = {1, 0, 2, 3}; - for (uint32_t i = 0; i < 16; i++) - packed_sels |= ((uint32_t)s_sel_trans_inv[sels[i]] << (i * 2)); + for (uint32_t i = 0; i < 16; i++) packed_sels |= ((uint32_t)s_sel_trans_inv[sels[i]] << (i * 2)); } else { - for (uint32_t i = 0; i < 16; i++) - packed_sels |= ((uint32_t)sels[i] << (i * 2)); + for (uint32_t i = 0; i < 16; i++) packed_sels |= ((uint32_t)sels[i] << (i * 2)); } // todo: make this less silly to prevent packing and unpacking @@ -1043,8 +1018,7 @@ static bool try_3color_block_useblack(const Color32 *pSrc_pixels, uint32_t flags int total_pixels = 0; for (uint32_t i = 0; i < 16; i++) { const int r = pSrc_pixels[i].R, g = pSrc_pixels[i].G, b = pSrc_pixels[i].B; - if ((r | g | b) < 4) - continue; + if ((r | g | b) < 4) continue; max_r = std::max(max_r, r); max_g = std::max(max_g, g); @@ -1059,8 +1033,7 @@ static bool try_3color_block_useblack(const Color32 *pSrc_pixels, uint32_t flags total_pixels++; } - if (!total_pixels) - return false; + if (!total_pixels) return false; int half_total_pixels = total_pixels >> 1; int avg_r = (total_r + half_total_pixels) / total_pixels; @@ -1075,8 +1048,7 @@ static bool try_3color_block_useblack(const Color32 *pSrc_pixels, uint32_t flags int g = (int)pSrc_pixels[i].G; int b = (int)pSrc_pixels[i].B; - if ((r | g | b) < 4) - continue; + if ((r | g | b) < 4) continue; r -= avg_r; g -= avg_g; @@ -1091,18 +1063,15 @@ static bool try_3color_block_useblack(const Color32 *pSrc_pixels, uint32_t flags } float cov[6]; - for (uint32_t i = 0; i < 6; i++) - cov[i] = (float)(icov[i]) * (1.0f / 255.0f); + for (uint32_t i = 0; i < 6; i++) cov[i] = (float)(icov[i]) * (1.0f / 255.0f); float xr = (float)(max_r - min_r); float xg = (float)(max_g - min_g); float xb = (float)(max_b - min_b); - if (icov[2] < 0) - xr = -xr; + if (icov[2] < 0) xr = -xr; - if (icov[4] < 0) - xg = -xg; + if (icov[4] < 0) xg = -xg; for (uint32_t power_iter = 0; power_iter < 4; power_iter++) { float r = xr * cov[0] + xg * cov[1] + xb * cov[2]; @@ -1126,8 +1095,7 @@ static bool try_3color_block_useblack(const Color32 *pSrc_pixels, uint32_t flags for (uint32_t i = 0; i < 16; i++) { int r = (int)pSrc_pixels[i].R, g = (int)pSrc_pixels[i].G, b = (int)pSrc_pixels[i].B; - if ((r | g | b) < 4) - continue; + if ((r | g | b) < 4) continue; int dot = r * saxis_r + g * saxis_g + b * saxis_b; if (dot < low_dot) { @@ -1168,8 +1136,7 @@ static bool try_3color_block_useblack(const Color32 *pSrc_pixels, uint32_t flags precise_round_565(xl, xh, hr2, hg2, hb2, lr2, lg2, lb2); } - if ((lr == lr2) && (lg == lg2) && (lb == lb2) && (hr == hr2) && (hg == hg2) && (hb == hb2)) - break; + if ((lr == lr2) && (lg == lg2) && (lb == lb2) && (hr == hr2) && (hg == hg2) && (hb == hb2)) break; uint8_t trial_sels2[16]; uint32_t trial_err2 = bc1_find_sels3_fullerr(true, pSrc_pixels, lr2, lg2, lb2, hr2, hg2, hb2, trial_sels2, trial_err); @@ -1228,8 +1195,7 @@ static bool try_3color_block(const Color32 *pSrc_pixels, uint32_t flags, uint32_ precise_round_565(xl, xh, hr2, hg2, hb2, lr2, lg2, lb2); } - if ((lr == lr2) && (lg == lg2) && (lb == lb2) && (hr == hr2) && (hg == hg2) && (hb == hb2)) - break; + if ((lr == lr2) && (lg == lg2) && (lb == lb2) && (hr == hr2) && (hg == hg2) && (hb == hb2)) break; uint8_t trial_sels2[16]; uint32_t trial_err2 = bc1_find_sels3_fullerr(false, pSrc_pixels, lr2, lg2, lb2, hr2, hg2, hb2, trial_sels2, trial_err); @@ -1338,7 +1304,7 @@ static bool try_3color_block(const Color32 *pSrc_pixels, uint32_t flags, uint32_ memcpy(trial_sels, trial_sels2, sizeof(trial_sels)); } - } // s + } // s } if (trial_err < cur_err) { @@ -1366,120 +1332,120 @@ void encode_bc1(uint32_t level, void *pDst, const uint8_t *pPixels, bool allow_3 static_assert(MAX_TOTAL_ORDERINGS4 >= 32, "MAX_TOTAL_ORDERINGS4 >= 32"); switch (level) { - case 0: - // Faster/higher quality than stb_dxt default. - flags = cEncodeBC1BoundingBoxInt; - break; - case 1: - // Faster/higher quality than stb_dxt default. A bit higher average quality vs. mode 0. - flags = cEncodeBC1Use2DLS; - break; - case 2: - // On average mode 2 is a little weaker than modes 0/1, but it's stronger on outliers (very tough textures). - // Slightly stronger than stb_dxt. - flags = 0; - break; - case 3: - // Slightly stronger than stb_dxt HIGHQUAL. - flags = cEncodeBC1TwoLeastSquaresPasses; - break; - case 4: - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1Use6PowerIters; - break; - default: - case 5: - // stb_dxt HIGHQUAL + permit 3 color (if it's enabled). - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFasterMSEEval; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - break; - case 6: - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFasterMSEEval | cEncodeBC1UseLikelyTotalOrderings; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - break; - case 7: - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFasterMSEEval | cEncodeBC1UseLikelyTotalOrderings; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - total_orderings4 = 4; - break; - case 8: - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFasterMSEEval | cEncodeBC1UseLikelyTotalOrderings; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - total_orderings4 = 8; - break; - case 9: - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseLikelyTotalOrderings; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - total_orderings4 = 11; - total_orderings3 = 3; - break; - case 10: - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseLikelyTotalOrderings; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - total_orderings4 = 20; - total_orderings3 = 8; - break; - case 11: - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseLikelyTotalOrderings; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - total_orderings4 = 28; - total_orderings3 = 16; - break; - case 12: - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseLikelyTotalOrderings; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - total_orderings4 = 32; - total_orderings3 = 32; - break; - case 13: - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | - (20 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - total_orderings4 = 32; - total_orderings3 = 32; - break; - case 14: - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | - (32 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - total_orderings4 = 32; - total_orderings3 = 32; - break; - case 15: - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | - (32 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - total_orderings4 = ((((32 + MAX_TOTAL_ORDERINGS4) / 2) + 32) / 2); - total_orderings3 = 32; - break; - case 16: - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | - (256 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - total_orderings4 = (32 + MAX_TOTAL_ORDERINGS4) / 2; - total_orderings3 = 32; - break; - case 17: - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | - (256 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - total_orderings4 = MAX_TOTAL_ORDERINGS4; - total_orderings3 = 32; - break; - case 18: - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | - cEncodeBC1Iterative | (256 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - total_orderings4 = MAX_TOTAL_ORDERINGS4; - total_orderings3 = 32; - break; - case 19: - // This hidden mode is *extremely* slow and abuses the encoder. It's just for testing/training. - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | - cEncodeBC1Exhaustive | cEncodeBC1Iterative | (256 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - total_orderings4 = 32; - total_orderings3 = 32; - break; + case 0: + // Faster/higher quality than stb_dxt default. + flags = cEncodeBC1BoundingBoxInt; + break; + case 1: + // Faster/higher quality than stb_dxt default. A bit higher average quality vs. mode 0. + flags = cEncodeBC1Use2DLS; + break; + case 2: + // On average mode 2 is a little weaker than modes 0/1, but it's stronger on outliers (very tough textures). + // Slightly stronger than stb_dxt. + flags = 0; + break; + case 3: + // Slightly stronger than stb_dxt HIGHQUAL. + flags = cEncodeBC1TwoLeastSquaresPasses; + break; + case 4: + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1Use6PowerIters; + break; + default: + case 5: + // stb_dxt HIGHQUAL + permit 3 color (if it's enabled). + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFasterMSEEval; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + break; + case 6: + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFasterMSEEval | cEncodeBC1UseLikelyTotalOrderings; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + break; + case 7: + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFasterMSEEval | cEncodeBC1UseLikelyTotalOrderings; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + total_orderings4 = 4; + break; + case 8: + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFasterMSEEval | cEncodeBC1UseLikelyTotalOrderings; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + total_orderings4 = 8; + break; + case 9: + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseLikelyTotalOrderings; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + total_orderings4 = 11; + total_orderings3 = 3; + break; + case 10: + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseLikelyTotalOrderings; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + total_orderings4 = 20; + total_orderings3 = 8; + break; + case 11: + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseLikelyTotalOrderings; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + total_orderings4 = 28; + total_orderings3 = 16; + break; + case 12: + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseLikelyTotalOrderings; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + total_orderings4 = 32; + total_orderings3 = 32; + break; + case 13: + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | + (20 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + total_orderings4 = 32; + total_orderings3 = 32; + break; + case 14: + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | + (32 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + total_orderings4 = 32; + total_orderings3 = 32; + break; + case 15: + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | + (32 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + total_orderings4 = ((((32 + MAX_TOTAL_ORDERINGS4) / 2) + 32) / 2); + total_orderings3 = 32; + break; + case 16: + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | + (256 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + total_orderings4 = (32 + MAX_TOTAL_ORDERINGS4) / 2; + total_orderings3 = 32; + break; + case 17: + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | + (256 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + total_orderings4 = MAX_TOTAL_ORDERINGS4; + total_orderings3 = 32; + break; + case 18: + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | + cEncodeBC1Iterative | (256 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + total_orderings4 = MAX_TOTAL_ORDERINGS4; + total_orderings3 = 32; + break; + case 19: + // This hidden mode is *extremely* slow and abuses the encoder. It's just for testing/training. + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | + cEncodeBC1Exhaustive | cEncodeBC1Iterative | (256 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + total_orderings4 = 32; + total_orderings3 = 32; + break; } encode_bc1(pDst, pPixels, flags, total_orderings4, total_orderings3); @@ -1505,11 +1471,9 @@ static inline void encode_bc1_pick_initial(const Color32 *pSrc_pixels, uint32_t } else if (flags & cEncodeBC1Use2DLS) { // 2D Least Squares approach from Humus's example, with added inset and optimal rounding. int big_chan = 0, min_chan_val = min_r, max_chan_val = max_r; - if ((max_g - min_g) > (max_chan_val - min_chan_val)) - big_chan = 1, min_chan_val = min_g, max_chan_val = max_g; + if ((max_g - min_g) > (max_chan_val - min_chan_val)) big_chan = 1, min_chan_val = min_g, max_chan_val = max_g; - if ((max_b - min_b) > (max_chan_val - min_chan_val)) - big_chan = 2, min_chan_val = min_b, max_chan_val = max_b; + if ((max_b - min_b) > (max_chan_val - min_chan_val)) big_chan = 2, min_chan_val = min_b, max_chan_val = max_b; int sum_xy_r = 0, sum_xy_g = 0, sum_xy_b = 0; vec3F l, h; @@ -1672,11 +1636,9 @@ static inline void encode_bc1_pick_initial(const Color32 *pSrc_pixels, uint32_t icov_yz += g * b; } - if (icov_xz < 0) - std::swap(l.c[0], h.c[0]); + if (icov_xz < 0) std::swap(l.c[0], h.c[0]); - if (icov_yz < 0) - std::swap(l.c[1], h.c[1]); + if (icov_yz < 0) std::swap(l.c[1], h.c[1]); precise_round_565(l, h, lr, lg, lb, hr, hg, hb); } else if (flags & cEncodeBC1BoundingBoxInt) { @@ -1717,11 +1679,9 @@ static inline void encode_bc1_pick_initial(const Color32 *pSrc_pixels, uint32_t int x1 = max_r; int y1 = max_g; - if (icov_xz < 0) - std::swap(x0, x1); + if (icov_xz < 0) std::swap(x0, x1); - if (icov_yz < 0) - std::swap(y0, y1); + if (icov_yz < 0) std::swap(y0, y1); lr = scale8To5(x0); lg = scale8To6(y0); @@ -1753,15 +1713,12 @@ static inline void encode_bc1_pick_initial(const Color32 *pSrc_pixels, uint32_t float xg = (float)(max_g - min_g); float xb = (float)(max_b - min_b); - if (icov[2] < 0) - xr = -xr; + if (icov[2] < 0) xr = -xr; - if (icov[4] < 0) - xg = -xg; + if (icov[4] < 0) xg = -xg; float cov[6]; - for (uint32_t i = 0; i < 6; i++) - cov[i] = (float)(icov[i]) * (1.0f / 255.0f); + for (uint32_t i = 0; i < 6; i++) cov[i] = (float)(icov[i]) * (1.0f / 255.0f); const uint32_t total_power_iters = (flags & cEncodeBC1Use6PowerIters) ? 6 : 4; for (uint32_t power_iter = 0; power_iter < total_power_iters; power_iter++) { @@ -1819,22 +1776,22 @@ static inline void encode_bc1_pick_initial(const Color32 *pSrc_pixels, uint32_t } static const int8_t s_adjacent_voxels[16][4] = { - {1, 0, 0, 3}, // 0 - {0, 1, 0, 4}, // 1 - {0, 0, 1, 5}, // 2 - {-1, 0, 0, 0}, // 3 - {0, -1, 0, 1}, // 4 - {0, 0, -1, 2}, // 5 - {1, 1, 0, 9}, // 6 - {1, 0, 1, 10}, // 7 - {0, 1, 1, 11}, // 8 - {-1, -1, 0, 6}, // 9 - {-1, 0, -1, 7}, // 10 - {0, -1, -1, 8}, // 11 - {-1, 1, 0, 13}, // 12 - {1, -1, 0, 12}, // 13 - {0, -1, 1, 15}, // 14 - {0, 1, -1, 14}, // 15 + {1, 0, 0, 3}, // 0 + {0, 1, 0, 4}, // 1 + {0, 0, 1, 5}, // 2 + {-1, 0, 0, 0}, // 3 + {0, -1, 0, 1}, // 4 + {0, 0, -1, 2}, // 5 + {1, 1, 0, 9}, // 6 + {1, 0, 1, 10}, // 7 + {0, 1, 1, 11}, // 8 + {-1, -1, 0, 6}, // 9 + {-1, 0, -1, 7}, // 10 + {0, -1, -1, 8}, // 11 + {-1, 1, 0, 13}, // 12 + {1, -1, 0, 12}, // 13 + {0, -1, 1, 15}, // 14 + {0, 1, -1, 14}, // 15 }; // From icbc's high quality mode. @@ -1849,8 +1806,7 @@ static inline void encode_bc1_endpoint_search(const Color32 *pSrc_pixels, bool a for (int i = 0; i < endpoint_search_rounds; i++) { assert(s_adjacent_voxels[s_adjacent_voxels[i & 15][3]][3] == (i & 15)); - if (forbidden_direction == (i & 31)) - continue; + if (forbidden_direction == (i & 31)) continue; const int8_t delta[3] = {s_adjacent_voxels[i & 15][0], s_adjacent_voxels[i & 15][1], s_adjacent_voxels[i & 15][2]}; @@ -1888,8 +1844,7 @@ static inline void encode_bc1_endpoint_search(const Color32 *pSrc_pixels, bool a prev_improvement_index = i; } - if (i - prev_improvement_index > 32) - break; + if (i - prev_improvement_index > 32) break; } } @@ -1905,8 +1860,7 @@ void encode_bc1(void *pDst, const uint8_t *pPixels, uint32_t flags, uint32_t tot uint32_t j; for (j = 15; j >= 1; --j) - if ((pSrc_pixels[j].R != fr) || (pSrc_pixels[j].G != fg) || (pSrc_pixels[j].B != fb)) - break; + if ((pSrc_pixels[j].R != fr) || (pSrc_pixels[j].G != fg) || (pSrc_pixels[j].B != fb)) break; if (j == 0) { encode_bc1_solid_block(pDst, fr, fg, fb, (flags & (cEncodeBC1Use3ColorBlocks | cEncodeBC1Use3ColorBlocksForBlackPixels)) != 0); @@ -1985,8 +1939,7 @@ void encode_bc1(void *pDst, const uint8_t *pPixels, uint32_t flags, uint32_t tot precise_round_565(xl, xh, trial_hr, trial_hg, trial_hb, trial_lr, trial_lg, trial_lb); } - if ((lr == trial_lr) && (lg == trial_lg) && (lb == trial_lb) && (hr == trial_hr) && (hg == trial_hg) && (hb == trial_hb)) - break; + if ((lr == trial_lr) && (lg == trial_lg) && (lb == trial_lb) && (hr == trial_hr) && (hg == trial_hg) && (hb == trial_hb)) break; bc1_find_sels4_noerr(pSrc_pixels, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb, sels); @@ -1997,7 +1950,7 @@ void encode_bc1(void *pDst, const uint8_t *pPixels, uint32_t flags, uint32_t tot hg = trial_hg; hb = trial_hb; - } // ls_pass + } // ls_pass } else { const uint32_t total_rounds = (flags & cEncodeBC1TryAllInitialEndponts) ? 2 : 1; for (uint32_t round = 0; round < total_rounds; round++) { @@ -2059,7 +2012,7 @@ void encode_bc1(void *pDst, const uint8_t *pPixels, uint32_t flags, uint32_t tot } else break; - } // ls_pass + } // ls_pass if (round_err <= cur_err) { cur_err = round_err; @@ -2081,7 +2034,7 @@ void encode_bc1(void *pDst, const uint8_t *pPixels, uint32_t flags, uint32_t tot memcpy(sels, round_sels, 16); } - } // round + } // round } if ((cur_err) && (flags & cEncodeBC1UseLikelyTotalOrderings)) { @@ -2181,12 +2134,11 @@ void encode_bc1(void *pDst, const uint8_t *pPixels, uint32_t flags, uint32_t tot memcpy(sels, trial_sels, 16); } - } // s + } // s - if ((!cur_err) || (cur_err == orig_err)) - break; + if ((!cur_err) || (cur_err == orig_err)) break; - } // iter_index + } // iter_index } if (((flags & (cEncodeBC1Use3ColorBlocks | cEncodeBC1Use3ColorBlocksForBlackPixels)) != 0) && (cur_err)) { @@ -2418,37 +2370,37 @@ bool unpack_bc1(const void *pBlock_bits, void *pPixels, bool set_alpha, bc1_appr c[0].set(r0, g0, b0, 255); c[1].set(r1, g1, b1, 255); switch (mode) { - case bc1_approx_mode::cBC1Ideal: - c[2].set((r0 * 2 + r1) / 3, (g0 * 2 + g1) / 3, (b0 * 2 + b1) / 3, 255); - c[3].set((r1 * 2 + r0) / 3, (g1 * 2 + g0) / 3, (b1 * 2 + b0) / 3, 255); - break; - case bc1_approx_mode::cBC1IdealRound4: - c[2].set((r0 * 2 + r1 + 1) / 3, (g0 * 2 + g1 + 1) / 3, (b0 * 2 + b1 + 1) / 3, 255); - c[3].set((r1 * 2 + r0 + 1) / 3, (g1 * 2 + g0 + 1) / 3, (b1 * 2 + b0 + 1) / 3, 255); - break; - case bc1_approx_mode::cBC1NVidia: - c[2].set(interp_5_nv(cr0, cr1), interp_6_nv(g0, g1), interp_5_nv(cb0, cb1), 255); - c[3].set(interp_5_nv(cr1, cr0), interp_6_nv(g1, g0), interp_5_nv(cb1, cb0), 255); - break; - case bc1_approx_mode::cBC1AMD: - c[2].set(interp_5_6_amd(r0, r1), interp_5_6_amd(g0, g1), interp_5_6_amd(b0, b1), 255); - c[3].set(interp_5_6_amd(r1, r0), interp_5_6_amd(g1, g0), interp_5_6_amd(b1, b0), 255); - break; + case bc1_approx_mode::cBC1Ideal: + c[2].set((r0 * 2 + r1) / 3, (g0 * 2 + g1) / 3, (b0 * 2 + b1) / 3, 255); + c[3].set((r1 * 2 + r0) / 3, (g1 * 2 + g0) / 3, (b1 * 2 + b0) / 3, 255); + break; + case bc1_approx_mode::cBC1IdealRound4: + c[2].set((r0 * 2 + r1 + 1) / 3, (g0 * 2 + g1 + 1) / 3, (b0 * 2 + b1 + 1) / 3, 255); + c[3].set((r1 * 2 + r0 + 1) / 3, (g1 * 2 + g0 + 1) / 3, (b1 * 2 + b0 + 1) / 3, 255); + break; + case bc1_approx_mode::cBC1NVidia: + c[2].set(interp_5_nv(cr0, cr1), interp_6_nv(g0, g1), interp_5_nv(cb0, cb1), 255); + c[3].set(interp_5_nv(cr1, cr0), interp_6_nv(g1, g0), interp_5_nv(cb1, cb0), 255); + break; + case bc1_approx_mode::cBC1AMD: + c[2].set(interp_5_6_amd(r0, r1), interp_5_6_amd(g0, g1), interp_5_6_amd(b0, b1), 255); + c[3].set(interp_5_6_amd(r1, r0), interp_5_6_amd(g1, g0), interp_5_6_amd(b1, b0), 255); + break; } } else { c[0].set(r0, g0, b0, 255); c[1].set(r1, g1, b1, 255); switch (mode) { - case bc1_approx_mode::cBC1Ideal: - case bc1_approx_mode::cBC1IdealRound4: - c[2].set((r0 + r1) / 2, (g0 + g1) / 2, (b0 + b1) / 2, 255); - break; - case bc1_approx_mode::cBC1NVidia: - c[2].set(interp_half_5_nv(cr0, cr1), interp_half_6_nv(g0, g1), interp_half_5_nv(cb0, cb1), 255); - break; - case bc1_approx_mode::cBC1AMD: - c[2].set(interp_half_5_6_amd(r0, r1), interp_half_5_6_amd(g0, g1), interp_half_5_6_amd(b0, b1), 255); - break; + case bc1_approx_mode::cBC1Ideal: + case bc1_approx_mode::cBC1IdealRound4: + c[2].set((r0 + r1) / 2, (g0 + g1) / 2, (b0 + b1) / 2, 255); + break; + case bc1_approx_mode::cBC1NVidia: + c[2].set(interp_half_5_nv(cr0, cr1), interp_half_6_nv(g0, g1), interp_half_5_nv(cb0, cb1), 255); + break; + case bc1_approx_mode::cBC1AMD: + c[2].set(interp_half_5_6_amd(r0, r1), interp_half_5_6_amd(g0, g1), interp_half_5_6_amd(b0, b1), 255); + break; } c[3].set(0, 0, 0, 0); @@ -2497,8 +2449,7 @@ bool unpack_bc3(const void *pBlock_bits, void *pPixels, bc1_approx_mode mode) { bool success = true; - if (unpack_bc1((const uint8_t *)pBlock_bits + sizeof(BC4Block), pDst_pixels, true, mode)) - success = false; + if (unpack_bc1((const uint8_t *)pBlock_bits + sizeof(BC4Block), pDst_pixels, true, mode)) success = false; unpack_bc4(pBlock_bits, &pDst_pixels[0].A, sizeof(Color32)); @@ -2511,7 +2462,7 @@ void unpack_bc5(const void *pBlock_bits, void *pPixels, uint32_t chan0, uint32_t unpack_bc4((const uint8_t *)pBlock_bits + sizeof(BC4Block), (uint8_t *)pPixels + chan1, stride); } -} // namespace rgbcx +} // namespace rgbcx /* ------------------------------------------------------------------------------