2 * Copyright 2016 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
27 #include <linux/slab.h>
31 #include "color_gamma.h"
33 /* When calculating LUT values the first region and at least one subsequent
34 * region are calculated with full precision. These defines are a demarcation
35 * of where the second region starts and ends.
36 * These are hardcoded values to avoid recalculating them in loops.
38 #define PRECISE_LUT_REGION_START 224
39 #define PRECISE_LUT_REGION_END 239
41 static struct hw_x_point coordinates_x[MAX_HW_POINTS + 2];
43 // these are helpers for calculations to reduce stack usage
44 // do not depend on these being preserved across calls
46 /* Helper to optimize gamma calculation, only use in translate_from_linear, in
47 * particular the dc_fixpt_pow function which is very expensive
48 * The idea is that our regions for X points are exponential and currently they all use
49 * the same number of points (NUM_PTS_IN_REGION) and in each region every point
50 * is exactly 2x the one at the same index in the previous region. In other words
51 * X[i] = 2 * X[i-NUM_PTS_IN_REGION] for i>=16
52 * The other fact is that (2x)^gamma = 2^gamma * x^gamma
53 * So we compute and save x^gamma for the first 16 regions, and for every next region
54 * just multiply with 2^gamma which can be computed once, and save the result so we
55 * recursively compute all the values.
57 /*sRGB 709 2.2 2.4 P3*/
58 static const int32_t gamma_numerator01[] = { 31308, 180000, 0, 0, 0};
59 static const int32_t gamma_numerator02[] = { 12920, 4500, 0, 0, 0};
60 static const int32_t gamma_numerator03[] = { 55, 99, 0, 0, 0};
61 static const int32_t gamma_numerator04[] = { 55, 99, 0, 0, 0};
62 static const int32_t gamma_numerator05[] = { 2400, 2200, 2200, 2400, 2600};
64 /* one-time setup of X points */
65 void setup_x_points_distribution(void)
67 struct fixed31_32 region_size = dc_fixpt_from_int(128);
71 struct fixed31_32 increment;
73 coordinates_x[MAX_HW_POINTS].x = region_size;
74 coordinates_x[MAX_HW_POINTS + 1].x = region_size;
76 for (segment = 6; segment > (6 - NUM_REGIONS); segment--) {
77 region_size = dc_fixpt_div_int(region_size, 2);
78 increment = dc_fixpt_div_int(region_size,
80 seg_offset = (segment + (NUM_REGIONS - 7)) * NUM_PTS_IN_REGION;
81 coordinates_x[seg_offset].x = region_size;
83 for (index = seg_offset + 1;
84 index < seg_offset + NUM_PTS_IN_REGION;
86 coordinates_x[index].x = dc_fixpt_add
87 (coordinates_x[index-1].x, increment);
92 void log_x_points_distribution(struct dal_logger *logger)
97 LOG_GAMMA_WRITE("Log X Distribution\n");
99 for (i = 0; i < MAX_HW_POINTS; i++)
100 LOG_GAMMA_WRITE("%llu\n", coordinates_x[i].x.value);
104 static void compute_pq(struct fixed31_32 in_x, struct fixed31_32 *out_y)
106 /* consts for PQ gamma formula. */
107 const struct fixed31_32 m1 =
108 dc_fixpt_from_fraction(159301758, 1000000000);
109 const struct fixed31_32 m2 =
110 dc_fixpt_from_fraction(7884375, 100000);
111 const struct fixed31_32 c1 =
112 dc_fixpt_from_fraction(8359375, 10000000);
113 const struct fixed31_32 c2 =
114 dc_fixpt_from_fraction(188515625, 10000000);
115 const struct fixed31_32 c3 =
116 dc_fixpt_from_fraction(186875, 10000);
118 struct fixed31_32 l_pow_m1;
119 struct fixed31_32 base;
121 if (dc_fixpt_lt(in_x, dc_fixpt_zero))
122 in_x = dc_fixpt_zero;
124 l_pow_m1 = dc_fixpt_pow(in_x, m1);
127 (dc_fixpt_mul(c2, l_pow_m1))),
128 dc_fixpt_add(dc_fixpt_one,
129 (dc_fixpt_mul(c3, l_pow_m1))));
130 *out_y = dc_fixpt_pow(base, m2);
133 static void compute_de_pq(struct fixed31_32 in_x, struct fixed31_32 *out_y)
135 /* consts for dePQ gamma formula. */
136 const struct fixed31_32 m1 =
137 dc_fixpt_from_fraction(159301758, 1000000000);
138 const struct fixed31_32 m2 =
139 dc_fixpt_from_fraction(7884375, 100000);
140 const struct fixed31_32 c1 =
141 dc_fixpt_from_fraction(8359375, 10000000);
142 const struct fixed31_32 c2 =
143 dc_fixpt_from_fraction(188515625, 10000000);
144 const struct fixed31_32 c3 =
145 dc_fixpt_from_fraction(186875, 10000);
147 struct fixed31_32 l_pow_m1;
148 struct fixed31_32 base, div;
149 struct fixed31_32 base2;
152 if (dc_fixpt_lt(in_x, dc_fixpt_zero))
153 in_x = dc_fixpt_zero;
155 l_pow_m1 = dc_fixpt_pow(in_x,
156 dc_fixpt_div(dc_fixpt_one, m2));
157 base = dc_fixpt_sub(l_pow_m1, c1);
159 div = dc_fixpt_sub(c2, dc_fixpt_mul(c3, l_pow_m1));
161 base2 = dc_fixpt_div(base, div);
162 // avoid complex numbers
163 if (dc_fixpt_lt(base2, dc_fixpt_zero))
164 base2 = dc_fixpt_sub(dc_fixpt_zero, base2);
167 *out_y = dc_fixpt_pow(base2, dc_fixpt_div(dc_fixpt_one, m1));
172 /* de gamma, non-linear to linear */
173 static void compute_hlg_eotf(struct fixed31_32 in_x,
174 struct fixed31_32 *out_y,
175 uint32_t sdr_white_level, uint32_t max_luminance_nits)
180 struct fixed31_32 threshold;
183 struct fixed31_32 scaling_factor =
184 dc_fixpt_from_fraction(max_luminance_nits, sdr_white_level);
185 a = dc_fixpt_from_fraction(17883277, 100000000);
186 b = dc_fixpt_from_fraction(28466892, 100000000);
187 c = dc_fixpt_from_fraction(55991073, 100000000);
188 threshold = dc_fixpt_from_fraction(1, 2);
190 if (dc_fixpt_lt(in_x, threshold)) {
191 x = dc_fixpt_mul(in_x, in_x);
192 x = dc_fixpt_div_int(x, 3);
194 x = dc_fixpt_sub(in_x, c);
195 x = dc_fixpt_div(x, a);
197 x = dc_fixpt_add(x, b);
198 x = dc_fixpt_div_int(x, 12);
200 *out_y = dc_fixpt_mul(x, scaling_factor);
204 /* re gamma, linear to non-linear */
205 static void compute_hlg_oetf(struct fixed31_32 in_x, struct fixed31_32 *out_y,
206 uint32_t sdr_white_level, uint32_t max_luminance_nits)
211 struct fixed31_32 threshold;
214 struct fixed31_32 scaling_factor =
215 dc_fixpt_from_fraction(sdr_white_level, max_luminance_nits);
216 a = dc_fixpt_from_fraction(17883277, 100000000);
217 b = dc_fixpt_from_fraction(28466892, 100000000);
218 c = dc_fixpt_from_fraction(55991073, 100000000);
219 threshold = dc_fixpt_from_fraction(1, 12);
220 x = dc_fixpt_mul(in_x, scaling_factor);
223 if (dc_fixpt_lt(x, threshold)) {
224 x = dc_fixpt_mul(x, dc_fixpt_from_fraction(3, 1));
225 *out_y = dc_fixpt_pow(x, dc_fixpt_half);
227 x = dc_fixpt_mul(x, dc_fixpt_from_fraction(12, 1));
228 x = dc_fixpt_sub(x, b);
230 x = dc_fixpt_mul(a, x);
231 *out_y = dc_fixpt_add(x, c);
236 /* one-time pre-compute PQ values - only for sdr_white_level 80 */
237 void precompute_pq(void)
241 const struct hw_x_point *coord_x = coordinates_x + 32;
242 struct fixed31_32 scaling_factor =
243 dc_fixpt_from_fraction(80, 10000);
245 struct fixed31_32 *pq_table = mod_color_get_table(type_pq_table);
247 /* pow function has problems with arguments too small */
248 for (i = 0; i < 32; i++)
249 pq_table[i] = dc_fixpt_zero;
251 for (i = 32; i <= MAX_HW_POINTS; i++) {
252 x = dc_fixpt_mul(coord_x->x, scaling_factor);
253 compute_pq(x, &pq_table[i]);
258 /* one-time pre-compute dePQ values - only for max pixel value 125 FP16 */
259 void precompute_de_pq(void)
263 uint32_t begin_index, end_index;
265 struct fixed31_32 scaling_factor = dc_fixpt_from_int(125);
266 struct fixed31_32 *de_pq_table = mod_color_get_table(type_de_pq_table);
267 /* X points is 2^-25 to 2^7
268 * De-gamma X is 2^-12 to 2^0 – we are skipping first -12-(-25) = 13 regions
270 begin_index = 13 * NUM_PTS_IN_REGION;
271 end_index = begin_index + 12 * NUM_PTS_IN_REGION;
273 for (i = 0; i <= begin_index; i++)
274 de_pq_table[i] = dc_fixpt_zero;
276 for (; i <= end_index; i++) {
277 compute_de_pq(coordinates_x[i].x, &y);
278 de_pq_table[i] = dc_fixpt_mul(y, scaling_factor);
281 for (; i <= MAX_HW_POINTS; i++)
282 de_pq_table[i] = de_pq_table[i-1];
285 struct fixed31_32 divider1;
286 struct fixed31_32 divider2;
287 struct fixed31_32 divider3;
291 static bool build_coefficients(struct gamma_coefficients *coefficients, enum dc_transfer_func_predefined type)
298 if (type == TRANSFER_FUNCTION_SRGB)
300 else if (type == TRANSFER_FUNCTION_BT709)
302 else if (type == TRANSFER_FUNCTION_GAMMA22)
304 else if (type == TRANSFER_FUNCTION_GAMMA24)
306 else if (type == TRANSFER_FUNCTION_GAMMA26)
314 coefficients->a0[i] = dc_fixpt_from_fraction(
315 gamma_numerator01[index], 10000000);
316 coefficients->a1[i] = dc_fixpt_from_fraction(
317 gamma_numerator02[index], 1000);
318 coefficients->a2[i] = dc_fixpt_from_fraction(
319 gamma_numerator03[index], 1000);
320 coefficients->a3[i] = dc_fixpt_from_fraction(
321 gamma_numerator04[index], 1000);
322 coefficients->user_gamma[i] = dc_fixpt_from_fraction(
323 gamma_numerator05[index], 1000);
326 } while (i != ARRAY_SIZE(coefficients->a0));
331 static struct fixed31_32 translate_from_linear_space(
332 struct translate_from_linear_space_args *args)
334 const struct fixed31_32 one = dc_fixpt_from_int(1);
336 struct fixed31_32 scratch_1, scratch_2;
337 struct calculate_buffer *cal_buffer = args->cal_buffer;
339 if (dc_fixpt_le(one, args->arg))
342 if (dc_fixpt_le(args->arg, dc_fixpt_neg(args->a0))) {
343 scratch_1 = dc_fixpt_add(one, args->a3);
344 scratch_2 = dc_fixpt_pow(
345 dc_fixpt_neg(args->arg),
346 dc_fixpt_recip(args->gamma));
347 scratch_1 = dc_fixpt_mul(scratch_1, scratch_2);
348 scratch_1 = dc_fixpt_sub(args->a2, scratch_1);
351 } else if (dc_fixpt_le(args->a0, args->arg)) {
352 if (cal_buffer->buffer_index == 0) {
353 cal_buffer->gamma_of_2 = dc_fixpt_pow(dc_fixpt_from_int(2),
354 dc_fixpt_recip(args->gamma));
356 scratch_1 = dc_fixpt_add(one, args->a3);
357 /* In the first region (first 16 points) and in the
358 * region delimited by START/END we calculate with
359 * full precision to avoid error accumulation.
361 if ((cal_buffer->buffer_index >= PRECISE_LUT_REGION_START &&
362 cal_buffer->buffer_index <= PRECISE_LUT_REGION_END) ||
363 (cal_buffer->buffer_index < 16))
364 scratch_2 = dc_fixpt_pow(args->arg,
365 dc_fixpt_recip(args->gamma));
367 scratch_2 = dc_fixpt_mul(cal_buffer->gamma_of_2,
368 cal_buffer->buffer[cal_buffer->buffer_index%16]);
370 if (cal_buffer->buffer_index != -1) {
371 cal_buffer->buffer[cal_buffer->buffer_index%16] = scratch_2;
372 cal_buffer->buffer_index++;
375 scratch_1 = dc_fixpt_mul(scratch_1, scratch_2);
376 scratch_1 = dc_fixpt_sub(scratch_1, args->a2);
381 return dc_fixpt_mul(args->arg, args->a1);
385 static struct fixed31_32 translate_from_linear_space_long(
386 struct translate_from_linear_space_args *args)
388 const struct fixed31_32 one = dc_fixpt_from_int(1);
390 if (dc_fixpt_lt(one, args->arg))
393 if (dc_fixpt_le(args->arg, dc_fixpt_neg(args->a0)))
401 dc_fixpt_neg(args->arg),
402 dc_fixpt_recip(args->gamma))));
403 else if (dc_fixpt_le(args->a0, args->arg))
411 dc_fixpt_recip(args->gamma))),
414 return dc_fixpt_mul(args->arg, args->a1);
417 static struct fixed31_32 calculate_gamma22(struct fixed31_32 arg, bool use_eetf, struct calculate_buffer *cal_buffer)
419 struct fixed31_32 gamma = dc_fixpt_from_fraction(22, 10);
420 struct translate_from_linear_space_args scratch_gamma_args;
422 scratch_gamma_args.arg = arg;
423 scratch_gamma_args.a0 = dc_fixpt_zero;
424 scratch_gamma_args.a1 = dc_fixpt_zero;
425 scratch_gamma_args.a2 = dc_fixpt_zero;
426 scratch_gamma_args.a3 = dc_fixpt_zero;
427 scratch_gamma_args.cal_buffer = cal_buffer;
428 scratch_gamma_args.gamma = gamma;
431 return translate_from_linear_space_long(&scratch_gamma_args);
433 return translate_from_linear_space(&scratch_gamma_args);
437 static struct fixed31_32 translate_to_linear_space(
438 struct fixed31_32 arg,
439 struct fixed31_32 a0,
440 struct fixed31_32 a1,
441 struct fixed31_32 a2,
442 struct fixed31_32 a3,
443 struct fixed31_32 gamma)
445 struct fixed31_32 linear;
447 a0 = dc_fixpt_mul(a0, a1);
448 if (dc_fixpt_le(arg, dc_fixpt_neg(a0)))
450 linear = dc_fixpt_neg(
453 dc_fixpt_sub(a2, arg),
455 dc_fixpt_one, a3)), gamma));
457 else if (dc_fixpt_le(dc_fixpt_neg(a0), arg) &&
458 dc_fixpt_le(arg, a0))
459 linear = dc_fixpt_div(arg, a1);
461 linear = dc_fixpt_pow(
463 dc_fixpt_add(a2, arg),
465 dc_fixpt_one, a3)), gamma);
470 static struct fixed31_32 translate_from_linear_space_ex(
471 struct fixed31_32 arg,
472 struct gamma_coefficients *coeff,
473 uint32_t color_index,
474 struct calculate_buffer *cal_buffer)
476 struct translate_from_linear_space_args scratch_gamma_args;
478 scratch_gamma_args.arg = arg;
479 scratch_gamma_args.a0 = coeff->a0[color_index];
480 scratch_gamma_args.a1 = coeff->a1[color_index];
481 scratch_gamma_args.a2 = coeff->a2[color_index];
482 scratch_gamma_args.a3 = coeff->a3[color_index];
483 scratch_gamma_args.gamma = coeff->user_gamma[color_index];
484 scratch_gamma_args.cal_buffer = cal_buffer;
486 return translate_from_linear_space(&scratch_gamma_args);
490 static inline struct fixed31_32 translate_to_linear_space_ex(
491 struct fixed31_32 arg,
492 struct gamma_coefficients *coeff,
493 uint32_t color_index)
495 return translate_to_linear_space(
497 coeff->a0[color_index],
498 coeff->a1[color_index],
499 coeff->a2[color_index],
500 coeff->a3[color_index],
501 coeff->user_gamma[color_index]);
505 static bool find_software_points(
506 const struct dc_gamma *ramp,
507 const struct gamma_pixel *axis_x,
508 struct fixed31_32 hw_point,
509 enum channel_name channel,
510 uint32_t *index_to_start,
511 uint32_t *index_left,
512 uint32_t *index_right,
513 enum hw_point_position *pos)
515 const uint32_t max_number = ramp->num_entries + 3;
517 struct fixed31_32 left, right;
519 uint32_t i = *index_to_start;
521 while (i < max_number) {
522 if (channel == CHANNEL_NAME_RED) {
525 if (i < max_number - 1)
526 right = axis_x[i + 1].r;
528 right = axis_x[max_number - 1].r;
529 } else if (channel == CHANNEL_NAME_GREEN) {
532 if (i < max_number - 1)
533 right = axis_x[i + 1].g;
535 right = axis_x[max_number - 1].g;
539 if (i < max_number - 1)
540 right = axis_x[i + 1].b;
542 right = axis_x[max_number - 1].b;
545 if (dc_fixpt_le(left, hw_point) &&
546 dc_fixpt_le(hw_point, right)) {
550 if (i < max_number - 1)
551 *index_right = i + 1;
553 *index_right = max_number - 1;
555 *pos = HW_POINT_POSITION_MIDDLE;
558 } else if ((i == *index_to_start) &&
559 dc_fixpt_le(hw_point, left)) {
564 *pos = HW_POINT_POSITION_LEFT;
567 } else if ((i == max_number - 1) &&
568 dc_fixpt_le(right, hw_point)) {
573 *pos = HW_POINT_POSITION_RIGHT;
584 static bool build_custom_gamma_mapping_coefficients_worker(
585 const struct dc_gamma *ramp,
586 struct pixel_gamma_point *coeff,
587 const struct hw_x_point *coordinates_x,
588 const struct gamma_pixel *axis_x,
589 enum channel_name channel,
590 uint32_t number_of_points)
594 while (i <= number_of_points) {
595 struct fixed31_32 coord_x;
597 uint32_t index_to_start = 0;
598 uint32_t index_left = 0;
599 uint32_t index_right = 0;
601 enum hw_point_position hw_pos;
603 struct gamma_point *point;
605 struct fixed31_32 left_pos;
606 struct fixed31_32 right_pos;
608 if (channel == CHANNEL_NAME_RED)
609 coord_x = coordinates_x[i].regamma_y_red;
610 else if (channel == CHANNEL_NAME_GREEN)
611 coord_x = coordinates_x[i].regamma_y_green;
613 coord_x = coordinates_x[i].regamma_y_blue;
615 if (!find_software_points(
616 ramp, axis_x, coord_x, channel,
617 &index_to_start, &index_left, &index_right, &hw_pos)) {
622 if (index_left >= ramp->num_entries + 3) {
627 if (index_right >= ramp->num_entries + 3) {
632 if (channel == CHANNEL_NAME_RED) {
635 left_pos = axis_x[index_left].r;
636 right_pos = axis_x[index_right].r;
637 } else if (channel == CHANNEL_NAME_GREEN) {
640 left_pos = axis_x[index_left].g;
641 right_pos = axis_x[index_right].g;
645 left_pos = axis_x[index_left].b;
646 right_pos = axis_x[index_right].b;
649 if (hw_pos == HW_POINT_POSITION_MIDDLE)
650 point->coeff = dc_fixpt_div(
657 else if (hw_pos == HW_POINT_POSITION_LEFT)
658 point->coeff = dc_fixpt_zero;
659 else if (hw_pos == HW_POINT_POSITION_RIGHT)
660 point->coeff = dc_fixpt_from_int(2);
666 point->left_index = index_left;
667 point->right_index = index_right;
676 static struct fixed31_32 calculate_mapped_value(
677 struct pwl_float_data *rgb,
678 const struct pixel_gamma_point *coeff,
679 enum channel_name channel,
682 const struct gamma_point *point;
684 struct fixed31_32 result;
686 if (channel == CHANNEL_NAME_RED)
688 else if (channel == CHANNEL_NAME_GREEN)
693 if ((point->left_index < 0) || (point->left_index > max_index)) {
695 return dc_fixpt_zero;
698 if ((point->right_index < 0) || (point->right_index > max_index)) {
700 return dc_fixpt_zero;
703 if (point->pos == HW_POINT_POSITION_MIDDLE)
704 if (channel == CHANNEL_NAME_RED)
705 result = dc_fixpt_add(
709 rgb[point->right_index].r,
710 rgb[point->left_index].r)),
711 rgb[point->left_index].r);
712 else if (channel == CHANNEL_NAME_GREEN)
713 result = dc_fixpt_add(
717 rgb[point->right_index].g,
718 rgb[point->left_index].g)),
719 rgb[point->left_index].g);
721 result = dc_fixpt_add(
725 rgb[point->right_index].b,
726 rgb[point->left_index].b)),
727 rgb[point->left_index].b);
728 else if (point->pos == HW_POINT_POSITION_LEFT) {
730 result = dc_fixpt_zero;
732 result = dc_fixpt_one;
738 static void build_pq(struct pwl_float_data_ex *rgb_regamma,
739 uint32_t hw_points_num,
740 const struct hw_x_point *coordinate_x,
741 uint32_t sdr_white_level)
743 uint32_t i, start_index;
745 struct pwl_float_data_ex *rgb = rgb_regamma;
746 const struct hw_x_point *coord_x = coordinate_x;
748 struct fixed31_32 output;
749 struct fixed31_32 scaling_factor =
750 dc_fixpt_from_fraction(sdr_white_level, 10000);
751 struct fixed31_32 *pq_table = mod_color_get_table(type_pq_table);
753 if (!mod_color_is_table_init(type_pq_table) && sdr_white_level == 80) {
755 mod_color_set_table_init_state(type_pq_table, true);
758 /* TODO: start index is from segment 2^-24, skipping first segment
759 * due to x values too small for power calculations
763 coord_x += start_index;
765 for (i = start_index; i <= hw_points_num; i++) {
766 /* Multiply 0.008 as regamma is 0-1 and FP16 input is 0-125.
769 if (sdr_white_level == 80) {
770 output = pq_table[i];
772 x = dc_fixpt_mul(coord_x->x, scaling_factor);
773 compute_pq(x, &output);
776 /* should really not happen? */
777 if (dc_fixpt_lt(output, dc_fixpt_zero))
778 output = dc_fixpt_zero;
779 else if (dc_fixpt_lt(dc_fixpt_one, output))
780 output = dc_fixpt_one;
791 static void build_de_pq(struct pwl_float_data_ex *de_pq,
792 uint32_t hw_points_num,
793 const struct hw_x_point *coordinate_x)
796 struct fixed31_32 output;
797 struct fixed31_32 *de_pq_table = mod_color_get_table(type_de_pq_table);
798 struct fixed31_32 scaling_factor = dc_fixpt_from_int(125);
800 if (!mod_color_is_table_init(type_de_pq_table)) {
802 mod_color_set_table_init_state(type_de_pq_table, true);
806 for (i = 0; i <= hw_points_num; i++) {
807 output = de_pq_table[i];
808 /* should really not happen? */
809 if (dc_fixpt_lt(output, dc_fixpt_zero))
810 output = dc_fixpt_zero;
811 else if (dc_fixpt_lt(scaling_factor, output))
812 output = scaling_factor;
819 static bool build_regamma(struct pwl_float_data_ex *rgb_regamma,
820 uint32_t hw_points_num,
821 const struct hw_x_point *coordinate_x,
822 enum dc_transfer_func_predefined type,
823 struct calculate_buffer *cal_buffer)
828 struct gamma_coefficients *coeff;
829 struct pwl_float_data_ex *rgb = rgb_regamma;
830 const struct hw_x_point *coord_x = coordinate_x;
832 coeff = kvzalloc(sizeof(*coeff), GFP_KERNEL);
836 if (!build_coefficients(coeff, type))
839 memset(cal_buffer->buffer, 0, NUM_PTS_IN_REGION * sizeof(struct fixed31_32));
840 cal_buffer->buffer_index = 0; // see variable definition for more info
843 while (i <= hw_points_num) {
844 /* TODO use y vs r,g,b */
845 rgb->r = translate_from_linear_space_ex(
846 coord_x->x, coeff, 0, cal_buffer);
853 cal_buffer->buffer_index = -1;
860 static void hermite_spline_eetf(struct fixed31_32 input_x,
861 struct fixed31_32 max_display,
862 struct fixed31_32 min_display,
863 struct fixed31_32 max_content,
864 struct fixed31_32 *out_x)
866 struct fixed31_32 min_lum_pq;
867 struct fixed31_32 max_lum_pq;
868 struct fixed31_32 max_content_pq;
869 struct fixed31_32 ks;
870 struct fixed31_32 E1;
871 struct fixed31_32 E2;
872 struct fixed31_32 E3;
874 struct fixed31_32 t2;
875 struct fixed31_32 t3;
876 struct fixed31_32 two;
877 struct fixed31_32 three;
878 struct fixed31_32 temp1;
879 struct fixed31_32 temp2;
880 struct fixed31_32 a = dc_fixpt_from_fraction(15, 10);
881 struct fixed31_32 b = dc_fixpt_from_fraction(5, 10);
882 struct fixed31_32 epsilon = dc_fixpt_from_fraction(1, 1000000); // dc_fixpt_epsilon is a bit too small
884 if (dc_fixpt_eq(max_content, dc_fixpt_zero)) {
885 *out_x = dc_fixpt_zero;
889 compute_pq(input_x, &E1);
890 compute_pq(dc_fixpt_div(min_display, max_content), &min_lum_pq);
891 compute_pq(dc_fixpt_div(max_display, max_content), &max_lum_pq);
892 compute_pq(dc_fixpt_one, &max_content_pq); // always 1? DAL2 code is weird
893 a = dc_fixpt_div(dc_fixpt_add(dc_fixpt_one, b), max_content_pq); // (1+b)/maxContent
894 ks = dc_fixpt_sub(dc_fixpt_mul(a, max_lum_pq), b); // a * max_lum_pq - b
896 if (dc_fixpt_lt(E1, ks))
898 else if (dc_fixpt_le(ks, E1) && dc_fixpt_le(E1, dc_fixpt_one)) {
899 if (dc_fixpt_lt(epsilon, dc_fixpt_sub(dc_fixpt_one, ks)))
900 // t = (E1 - ks) / (1 - ks)
901 t = dc_fixpt_div(dc_fixpt_sub(E1, ks),
902 dc_fixpt_sub(dc_fixpt_one, ks));
906 two = dc_fixpt_from_int(2);
907 three = dc_fixpt_from_int(3);
909 t2 = dc_fixpt_mul(t, t);
910 t3 = dc_fixpt_mul(t2, t);
911 temp1 = dc_fixpt_mul(two, t3);
912 temp2 = dc_fixpt_mul(three, t2);
914 // (2t^3 - 3t^2 + 1) * ks
915 E2 = dc_fixpt_mul(ks, dc_fixpt_add(dc_fixpt_one,
916 dc_fixpt_sub(temp1, temp2)));
918 // (-2t^3 + 3t^2) * max_lum_pq
919 E2 = dc_fixpt_add(E2, dc_fixpt_mul(max_lum_pq,
920 dc_fixpt_sub(temp2, temp1)));
922 temp1 = dc_fixpt_mul(two, t2);
923 temp2 = dc_fixpt_sub(dc_fixpt_one, ks);
925 // (t^3 - 2t^2 + t) * (1-ks)
926 E2 = dc_fixpt_add(E2, dc_fixpt_mul(temp2,
927 dc_fixpt_add(t, dc_fixpt_sub(t3, temp1))));
931 temp1 = dc_fixpt_sub(dc_fixpt_one, E2);
932 temp2 = dc_fixpt_mul(temp1, temp1);
933 temp2 = dc_fixpt_mul(temp2, temp2);
936 E3 = dc_fixpt_add(E2, dc_fixpt_mul(min_lum_pq, temp2));
937 compute_de_pq(E3, out_x);
939 *out_x = dc_fixpt_div(*out_x, dc_fixpt_div(max_display, max_content));
942 static bool build_freesync_hdr(struct pwl_float_data_ex *rgb_regamma,
943 uint32_t hw_points_num,
944 const struct hw_x_point *coordinate_x,
945 const struct hdr_tm_params *fs_params,
946 struct calculate_buffer *cal_buffer)
949 struct pwl_float_data_ex *rgb = rgb_regamma;
950 const struct hw_x_point *coord_x = coordinate_x;
951 const struct hw_x_point *prv_coord_x = coord_x;
952 struct fixed31_32 scaledX = dc_fixpt_zero;
953 struct fixed31_32 scaledX1 = dc_fixpt_zero;
954 struct fixed31_32 max_display;
955 struct fixed31_32 min_display;
956 struct fixed31_32 max_content;
957 struct fixed31_32 clip = dc_fixpt_one;
958 struct fixed31_32 output;
959 bool use_eetf = false;
960 bool is_clipped = false;
961 struct fixed31_32 sdr_white_level;
962 struct fixed31_32 coordX_diff;
963 struct fixed31_32 out_dist_max;
964 struct fixed31_32 bright_norm;
966 if (fs_params->max_content == 0 ||
967 fs_params->max_display == 0)
970 max_display = dc_fixpt_from_int(fs_params->max_display);
971 min_display = dc_fixpt_from_fraction(fs_params->min_display, 10000);
972 max_content = dc_fixpt_from_int(fs_params->max_content);
973 sdr_white_level = dc_fixpt_from_int(fs_params->sdr_white_level);
975 if (fs_params->min_display > 1000) // cap at 0.1 at the bottom
976 min_display = dc_fixpt_from_fraction(1, 10);
977 if (fs_params->max_display < 100) // cap at 100 at the top
978 max_display = dc_fixpt_from_int(100);
980 // only max used, we don't adjust min luminance
981 if (fs_params->max_content > fs_params->max_display)
984 max_content = max_display;
987 cal_buffer->buffer_index = 0; // see var definition for more info
988 rgb += 32; // first 32 points have problems with fixed point, too small
991 for (i = 32; i <= hw_points_num; i++) {
994 /* max content is equal 1 */
995 scaledX1 = dc_fixpt_div(coord_x->x,
996 dc_fixpt_div(max_content, sdr_white_level));
997 hermite_spline_eetf(scaledX1, max_display, min_display,
998 max_content, &scaledX);
1000 scaledX = dc_fixpt_div(coord_x->x,
1001 dc_fixpt_div(max_display, sdr_white_level));
1003 if (dc_fixpt_lt(scaledX, clip)) {
1004 if (dc_fixpt_lt(scaledX, dc_fixpt_zero))
1005 output = dc_fixpt_zero;
1007 output = calculate_gamma22(scaledX, use_eetf, cal_buffer);
1009 // Ensure output respects reasonable boundaries
1010 output = dc_fixpt_clamp(output, dc_fixpt_zero, dc_fixpt_one);
1016 /* Here clipping happens for the first time */
1019 /* The next few lines implement the equation
1020 * output = prev_out +
1021 * (coord_x->x - prev_coord_x->x) *
1022 * (1.0 - prev_out) /
1023 * (maxDisp/sdr_white_level - prevCoordX)
1025 * This equation interpolates the first point
1026 * after max_display/80 so that the slope from
1027 * hw_x_before_max and hw_x_after_max is such
1028 * that we hit Y=1.0 at max_display/80.
1031 coordX_diff = dc_fixpt_sub(coord_x->x, prv_coord_x->x);
1032 out_dist_max = dc_fixpt_sub(dc_fixpt_one, output);
1033 bright_norm = dc_fixpt_div(max_display, sdr_white_level);
1035 output = dc_fixpt_add(
1036 output, dc_fixpt_mul(
1037 coordX_diff, dc_fixpt_div(
1039 dc_fixpt_sub(bright_norm, prv_coord_x->x)
1044 /* Relaxing the maximum boundary to 1.07 (instead of 1.0)
1045 * because the last point in the curve must be such that
1046 * the maximum display pixel brightness interpolates to
1047 * exactly 1.0. The worst case scenario was calculated
1048 * around 1.057, so the limit of 1.07 leaves some safety
1051 output = dc_fixpt_clamp(output, dc_fixpt_zero,
1052 dc_fixpt_from_fraction(107, 100));
1059 /* Every other clipping after the first
1060 * one is dealt with here
1067 prv_coord_x = coord_x;
1071 cal_buffer->buffer_index = -1;
1076 static bool build_degamma(struct pwl_float_data_ex *curve,
1077 uint32_t hw_points_num,
1078 const struct hw_x_point *coordinate_x, enum dc_transfer_func_predefined type)
1081 struct gamma_coefficients coeff;
1082 uint32_t begin_index, end_index;
1085 if (!build_coefficients(&coeff, type))
1090 /* X points is 2^-25 to 2^7
1091 * De-gamma X is 2^-12 to 2^0 – we are skipping first -12-(-25) = 13 regions
1093 begin_index = 13 * NUM_PTS_IN_REGION;
1094 end_index = begin_index + 12 * NUM_PTS_IN_REGION;
1096 while (i != begin_index) {
1097 curve[i].r = dc_fixpt_zero;
1098 curve[i].g = dc_fixpt_zero;
1099 curve[i].b = dc_fixpt_zero;
1103 while (i != end_index) {
1104 curve[i].r = translate_to_linear_space_ex(
1105 coordinate_x[i].x, &coeff, 0);
1106 curve[i].g = curve[i].r;
1107 curve[i].b = curve[i].r;
1110 while (i != hw_points_num + 1) {
1111 curve[i].r = dc_fixpt_one;
1112 curve[i].g = dc_fixpt_one;
1113 curve[i].b = dc_fixpt_one;
1125 static void build_hlg_degamma(struct pwl_float_data_ex *degamma,
1126 uint32_t hw_points_num,
1127 const struct hw_x_point *coordinate_x,
1128 uint32_t sdr_white_level, uint32_t max_luminance_nits)
1132 struct pwl_float_data_ex *rgb = degamma;
1133 const struct hw_x_point *coord_x = coordinate_x;
1136 // check when i == 434
1137 while (i != hw_points_num + 1) {
1138 compute_hlg_eotf(coord_x->x, &rgb->r, sdr_white_level, max_luminance_nits);
1148 static void build_hlg_regamma(struct pwl_float_data_ex *regamma,
1149 uint32_t hw_points_num,
1150 const struct hw_x_point *coordinate_x,
1151 uint32_t sdr_white_level, uint32_t max_luminance_nits)
1155 struct pwl_float_data_ex *rgb = regamma;
1156 const struct hw_x_point *coord_x = coordinate_x;
1161 while (i != hw_points_num + 1) {
1162 compute_hlg_oetf(coord_x->x, &rgb->r, sdr_white_level, max_luminance_nits);
1171 static void scale_gamma(struct pwl_float_data *pwl_rgb,
1172 const struct dc_gamma *ramp,
1173 struct dividers dividers)
1175 const struct fixed31_32 max_driver = dc_fixpt_from_int(0xFFFF);
1176 const struct fixed31_32 max_os = dc_fixpt_from_int(0xFF00);
1177 struct fixed31_32 scaler = max_os;
1179 struct pwl_float_data *rgb = pwl_rgb;
1180 struct pwl_float_data *rgb_last = rgb + ramp->num_entries - 1;
1185 if (dc_fixpt_lt(max_os, ramp->entries.red[i]) ||
1186 dc_fixpt_lt(max_os, ramp->entries.green[i]) ||
1187 dc_fixpt_lt(max_os, ramp->entries.blue[i])) {
1188 scaler = max_driver;
1192 } while (i != ramp->num_entries);
1197 rgb->r = dc_fixpt_div(
1198 ramp->entries.red[i], scaler);
1199 rgb->g = dc_fixpt_div(
1200 ramp->entries.green[i], scaler);
1201 rgb->b = dc_fixpt_div(
1202 ramp->entries.blue[i], scaler);
1206 } while (i != ramp->num_entries);
1208 rgb->r = dc_fixpt_mul(rgb_last->r,
1210 rgb->g = dc_fixpt_mul(rgb_last->g,
1212 rgb->b = dc_fixpt_mul(rgb_last->b,
1217 rgb->r = dc_fixpt_mul(rgb_last->r,
1219 rgb->g = dc_fixpt_mul(rgb_last->g,
1221 rgb->b = dc_fixpt_mul(rgb_last->b,
1226 rgb->r = dc_fixpt_mul(rgb_last->r,
1228 rgb->g = dc_fixpt_mul(rgb_last->g,
1230 rgb->b = dc_fixpt_mul(rgb_last->b,
1234 static void scale_gamma_dx(struct pwl_float_data *pwl_rgb,
1235 const struct dc_gamma *ramp,
1236 struct dividers dividers)
1239 struct fixed31_32 min = dc_fixpt_zero;
1240 struct fixed31_32 max = dc_fixpt_one;
1242 struct fixed31_32 delta = dc_fixpt_zero;
1243 struct fixed31_32 offset = dc_fixpt_zero;
1245 for (i = 0 ; i < ramp->num_entries; i++) {
1246 if (dc_fixpt_lt(ramp->entries.red[i], min))
1247 min = ramp->entries.red[i];
1249 if (dc_fixpt_lt(ramp->entries.green[i], min))
1250 min = ramp->entries.green[i];
1252 if (dc_fixpt_lt(ramp->entries.blue[i], min))
1253 min = ramp->entries.blue[i];
1255 if (dc_fixpt_lt(max, ramp->entries.red[i]))
1256 max = ramp->entries.red[i];
1258 if (dc_fixpt_lt(max, ramp->entries.green[i]))
1259 max = ramp->entries.green[i];
1261 if (dc_fixpt_lt(max, ramp->entries.blue[i]))
1262 max = ramp->entries.blue[i];
1265 if (dc_fixpt_lt(min, dc_fixpt_zero))
1266 delta = dc_fixpt_neg(min);
1268 offset = dc_fixpt_add(min, max);
1270 for (i = 0 ; i < ramp->num_entries; i++) {
1271 pwl_rgb[i].r = dc_fixpt_div(
1273 ramp->entries.red[i], delta), offset);
1274 pwl_rgb[i].g = dc_fixpt_div(
1276 ramp->entries.green[i], delta), offset);
1277 pwl_rgb[i].b = dc_fixpt_div(
1279 ramp->entries.blue[i], delta), offset);
1283 pwl_rgb[i].r = dc_fixpt_sub(dc_fixpt_mul_int(
1284 pwl_rgb[i-1].r, 2), pwl_rgb[i-2].r);
1285 pwl_rgb[i].g = dc_fixpt_sub(dc_fixpt_mul_int(
1286 pwl_rgb[i-1].g, 2), pwl_rgb[i-2].g);
1287 pwl_rgb[i].b = dc_fixpt_sub(dc_fixpt_mul_int(
1288 pwl_rgb[i-1].b, 2), pwl_rgb[i-2].b);
1290 pwl_rgb[i].r = dc_fixpt_sub(dc_fixpt_mul_int(
1291 pwl_rgb[i-1].r, 2), pwl_rgb[i-2].r);
1292 pwl_rgb[i].g = dc_fixpt_sub(dc_fixpt_mul_int(
1293 pwl_rgb[i-1].g, 2), pwl_rgb[i-2].g);
1294 pwl_rgb[i].b = dc_fixpt_sub(dc_fixpt_mul_int(
1295 pwl_rgb[i-1].b, 2), pwl_rgb[i-2].b);
1298 /* todo: all these scale_gamma functions are inherently the same but
1299 * take different structures as params or different format for ramp
1300 * values. We could probably implement it in a more generic fashion
1302 static void scale_user_regamma_ramp(struct pwl_float_data *pwl_rgb,
1303 const struct regamma_ramp *ramp,
1304 struct dividers dividers)
1306 unsigned short max_driver = 0xFFFF;
1307 unsigned short max_os = 0xFF00;
1308 unsigned short scaler = max_os;
1310 struct pwl_float_data *rgb = pwl_rgb;
1311 struct pwl_float_data *rgb_last = rgb + GAMMA_RGB_256_ENTRIES - 1;
1315 if (ramp->gamma[i] > max_os ||
1316 ramp->gamma[i + 256] > max_os ||
1317 ramp->gamma[i + 512] > max_os) {
1318 scaler = max_driver;
1322 } while (i != GAMMA_RGB_256_ENTRIES);
1326 rgb->r = dc_fixpt_from_fraction(
1327 ramp->gamma[i], scaler);
1328 rgb->g = dc_fixpt_from_fraction(
1329 ramp->gamma[i + 256], scaler);
1330 rgb->b = dc_fixpt_from_fraction(
1331 ramp->gamma[i + 512], scaler);
1335 } while (i != GAMMA_RGB_256_ENTRIES);
1337 rgb->r = dc_fixpt_mul(rgb_last->r,
1339 rgb->g = dc_fixpt_mul(rgb_last->g,
1341 rgb->b = dc_fixpt_mul(rgb_last->b,
1346 rgb->r = dc_fixpt_mul(rgb_last->r,
1348 rgb->g = dc_fixpt_mul(rgb_last->g,
1350 rgb->b = dc_fixpt_mul(rgb_last->b,
1355 rgb->r = dc_fixpt_mul(rgb_last->r,
1357 rgb->g = dc_fixpt_mul(rgb_last->g,
1359 rgb->b = dc_fixpt_mul(rgb_last->b,
1364 * RS3+ color transform DDI - 1D LUT adjustment is composed with regamma here
1365 * Input is evenly distributed in the output color space as specified in
1368 * Interpolation details:
1369 * 1D LUT has 4096 values which give curve correction in 0-1 float range
1370 * for evenly spaced points in 0-1 range. lut1D[index] gives correction
1372 * First we find index for which:
1373 * index/4095 < regamma_y < (index+1)/4095 =>
1374 * index < 4095*regamma_y < index + 1
1375 * norm_y = 4095*regamma_y, and index is just truncating to nearest integer
1376 * lut1 = lut1D[index], lut2 = lut1D[index+1]
1378 * adjustedY is then linearly interpolating regamma Y between lut1 and lut2
1380 * Custom degamma on Linux uses the same interpolation math, so is handled here
1382 static void apply_lut_1d(
1383 const struct dc_gamma *ramp,
1384 uint32_t num_hw_points,
1385 struct dc_transfer_func_distributed_points *tf_pts)
1389 struct fixed31_32 *regamma_y;
1390 struct fixed31_32 norm_y;
1391 struct fixed31_32 lut1;
1392 struct fixed31_32 lut2;
1393 const int max_lut_index = 4095;
1394 const struct fixed31_32 penult_lut_index_f =
1395 dc_fixpt_from_int(max_lut_index-1);
1396 const struct fixed31_32 max_lut_index_f =
1397 dc_fixpt_from_int(max_lut_index);
1398 int32_t index = 0, index_next = 0;
1399 struct fixed31_32 index_f;
1400 struct fixed31_32 delta_lut;
1401 struct fixed31_32 delta_index;
1403 if (ramp->type != GAMMA_CS_TFM_1D && ramp->type != GAMMA_CUSTOM)
1404 return; // this is not expected
1406 for (i = 0; i < num_hw_points; i++) {
1407 for (color = 0; color < 3; color++) {
1409 regamma_y = &tf_pts->red[i];
1410 else if (color == 1)
1411 regamma_y = &tf_pts->green[i];
1413 regamma_y = &tf_pts->blue[i];
1415 norm_y = dc_fixpt_mul(max_lut_index_f,
1417 index = dc_fixpt_floor(norm_y);
1418 index_f = dc_fixpt_from_int(index);
1423 if (index <= max_lut_index)
1424 index_next = (index == max_lut_index) ? index : index+1;
1426 /* Here we are dealing with the last point in the curve,
1427 * which in some cases might exceed the range given by
1428 * max_lut_index. So we interpolate the value using
1429 * max_lut_index and max_lut_index - 1.
1431 index = max_lut_index - 1;
1432 index_next = max_lut_index;
1433 index_f = penult_lut_index_f;
1437 lut1 = ramp->entries.red[index];
1438 lut2 = ramp->entries.red[index_next];
1439 } else if (color == 1) {
1440 lut1 = ramp->entries.green[index];
1441 lut2 = ramp->entries.green[index_next];
1443 lut1 = ramp->entries.blue[index];
1444 lut2 = ramp->entries.blue[index_next];
1447 // we have everything now, so interpolate
1448 delta_lut = dc_fixpt_sub(lut2, lut1);
1449 delta_index = dc_fixpt_sub(norm_y, index_f);
1451 *regamma_y = dc_fixpt_add(lut1,
1452 dc_fixpt_mul(delta_index, delta_lut));
1457 static void build_evenly_distributed_points(
1458 struct gamma_pixel *points,
1459 uint32_t numberof_points,
1460 struct dividers dividers)
1462 struct gamma_pixel *p = points;
1463 struct gamma_pixel *p_last;
1467 // This function should not gets called with 0 as a parameter
1468 ASSERT(numberof_points > 0);
1469 p_last = p + numberof_points - 1;
1472 struct fixed31_32 value = dc_fixpt_from_fraction(i,
1473 numberof_points - 1);
1481 } while (i < numberof_points);
1483 p->r = dc_fixpt_div(p_last->r, dividers.divider1);
1484 p->g = dc_fixpt_div(p_last->g, dividers.divider1);
1485 p->b = dc_fixpt_div(p_last->b, dividers.divider1);
1489 p->r = dc_fixpt_div(p_last->r, dividers.divider2);
1490 p->g = dc_fixpt_div(p_last->g, dividers.divider2);
1491 p->b = dc_fixpt_div(p_last->b, dividers.divider2);
1495 p->r = dc_fixpt_div(p_last->r, dividers.divider3);
1496 p->g = dc_fixpt_div(p_last->g, dividers.divider3);
1497 p->b = dc_fixpt_div(p_last->b, dividers.divider3);
1500 static inline void copy_rgb_regamma_to_coordinates_x(
1501 struct hw_x_point *coordinates_x,
1502 uint32_t hw_points_num,
1503 const struct pwl_float_data_ex *rgb_ex)
1505 struct hw_x_point *coords = coordinates_x;
1507 const struct pwl_float_data_ex *rgb_regamma = rgb_ex;
1509 while (i <= hw_points_num + 1) {
1510 coords->regamma_y_red = rgb_regamma->r;
1511 coords->regamma_y_green = rgb_regamma->g;
1512 coords->regamma_y_blue = rgb_regamma->b;
1520 static bool calculate_interpolated_hardware_curve(
1521 const struct dc_gamma *ramp,
1522 struct pixel_gamma_point *coeff128,
1523 struct pwl_float_data *rgb_user,
1524 const struct hw_x_point *coordinates_x,
1525 const struct gamma_pixel *axis_x,
1526 uint32_t number_of_points,
1527 struct dc_transfer_func_distributed_points *tf_pts)
1530 const struct pixel_gamma_point *coeff = coeff128;
1531 uint32_t max_entries = 3 - 1;
1535 for (i = 0; i < 3; i++) {
1536 if (!build_custom_gamma_mapping_coefficients_worker(
1537 ramp, coeff128, coordinates_x, axis_x, i,
1543 max_entries += ramp->num_entries;
1545 /* TODO: float point case */
1547 while (i <= number_of_points) {
1548 tf_pts->red[i] = calculate_mapped_value(
1549 rgb_user, coeff, CHANNEL_NAME_RED, max_entries);
1550 tf_pts->green[i] = calculate_mapped_value(
1551 rgb_user, coeff, CHANNEL_NAME_GREEN, max_entries);
1552 tf_pts->blue[i] = calculate_mapped_value(
1553 rgb_user, coeff, CHANNEL_NAME_BLUE, max_entries);
1562 /* The "old" interpolation uses a complicated scheme to build an array of
1563 * coefficients while also using an array of 0-255 normalized to 0-1
1564 * Then there's another loop using both of the above + new scaled user ramp
1565 * and we concatenate them. It also searches for points of interpolation and
1566 * uses enums for positions.
1568 * This function uses a different approach:
1569 * user ramp is always applied on X with 0/255, 1/255, 2/255, ..., 255/255
1570 * To find index for hwX , we notice the following:
1571 * i/255 <= hwX < (i+1)/255 <=> i <= 255*hwX < i+1
1572 * See apply_lut_1d which is the same principle, but on 4K entry 1D LUT
1574 * Once the index is known, combined Y is simply:
1575 * user_ramp(index) + (hwX-index/255)*(user_ramp(index+1) - user_ramp(index)
1577 * We should switch to this method in all cases, it's simpler and faster
1578 * ToDo one day - for now this only applies to ADL regamma to avoid regression
1579 * for regular use cases (sRGB and PQ)
1581 static void interpolate_user_regamma(uint32_t hw_points_num,
1582 struct pwl_float_data *rgb_user,
1584 struct dc_transfer_func_distributed_points *tf_pts)
1590 struct fixed31_32 *tf_point;
1591 struct fixed31_32 hw_x;
1592 struct fixed31_32 norm_factor =
1593 dc_fixpt_from_int(255);
1594 struct fixed31_32 norm_x;
1595 struct fixed31_32 index_f;
1596 struct fixed31_32 lut1;
1597 struct fixed31_32 lut2;
1598 struct fixed31_32 delta_lut;
1599 struct fixed31_32 delta_index;
1602 /* fixed_pt library has problems handling too small values */
1604 tf_pts->red[i] = dc_fixpt_zero;
1605 tf_pts->green[i] = dc_fixpt_zero;
1606 tf_pts->blue[i] = dc_fixpt_zero;
1609 while (i <= hw_points_num + 1) {
1610 for (color = 0; color < 3; color++) {
1612 tf_point = &tf_pts->red[i];
1613 else if (color == 1)
1614 tf_point = &tf_pts->green[i];
1616 tf_point = &tf_pts->blue[i];
1618 if (apply_degamma) {
1620 hw_x = coordinates_x[i].regamma_y_red;
1621 else if (color == 1)
1622 hw_x = coordinates_x[i].regamma_y_green;
1624 hw_x = coordinates_x[i].regamma_y_blue;
1626 hw_x = coordinates_x[i].x;
1628 norm_x = dc_fixpt_mul(norm_factor, hw_x);
1629 index = dc_fixpt_floor(norm_x);
1630 if (index < 0 || index > 255)
1633 index_f = dc_fixpt_from_int(index);
1634 index_next = (index == 255) ? index : index + 1;
1637 lut1 = rgb_user[index].r;
1638 lut2 = rgb_user[index_next].r;
1639 } else if (color == 1) {
1640 lut1 = rgb_user[index].g;
1641 lut2 = rgb_user[index_next].g;
1643 lut1 = rgb_user[index].b;
1644 lut2 = rgb_user[index_next].b;
1647 // we have everything now, so interpolate
1648 delta_lut = dc_fixpt_sub(lut2, lut1);
1649 delta_index = dc_fixpt_sub(norm_x, index_f);
1651 *tf_point = dc_fixpt_add(lut1,
1652 dc_fixpt_mul(delta_index, delta_lut));
1658 static void build_new_custom_resulted_curve(
1659 uint32_t hw_points_num,
1660 struct dc_transfer_func_distributed_points *tf_pts)
1664 while (i != hw_points_num + 1) {
1665 tf_pts->red[i] = dc_fixpt_clamp(
1666 tf_pts->red[i], dc_fixpt_zero,
1668 tf_pts->green[i] = dc_fixpt_clamp(
1669 tf_pts->green[i], dc_fixpt_zero,
1671 tf_pts->blue[i] = dc_fixpt_clamp(
1672 tf_pts->blue[i], dc_fixpt_zero,
1679 static void apply_degamma_for_user_regamma(struct pwl_float_data_ex *rgb_regamma,
1680 uint32_t hw_points_num, struct calculate_buffer *cal_buffer)
1684 struct gamma_coefficients coeff;
1685 struct pwl_float_data_ex *rgb = rgb_regamma;
1686 const struct hw_x_point *coord_x = coordinates_x;
1688 build_coefficients(&coeff, TRANSFER_FUNCTION_SRGB);
1691 while (i != hw_points_num + 1) {
1692 rgb->r = translate_from_linear_space_ex(
1693 coord_x->x, &coeff, 0, cal_buffer);
1702 static bool map_regamma_hw_to_x_user(
1703 const struct dc_gamma *ramp,
1704 struct pixel_gamma_point *coeff128,
1705 struct pwl_float_data *rgb_user,
1706 struct hw_x_point *coords_x,
1707 const struct gamma_pixel *axis_x,
1708 const struct pwl_float_data_ex *rgb_regamma,
1709 uint32_t hw_points_num,
1710 struct dc_transfer_func_distributed_points *tf_pts,
1714 /* setup to spare calculated ideal regamma values */
1717 struct hw_x_point *coords = coords_x;
1718 const struct pwl_float_data_ex *regamma = rgb_regamma;
1720 if (ramp && mapUserRamp) {
1721 copy_rgb_regamma_to_coordinates_x(coords,
1725 calculate_interpolated_hardware_curve(
1726 ramp, coeff128, rgb_user, coords, axis_x,
1727 hw_points_num, tf_pts);
1729 /* just copy current rgb_regamma into tf_pts */
1730 while (i <= hw_points_num) {
1731 tf_pts->red[i] = regamma->r;
1732 tf_pts->green[i] = regamma->g;
1733 tf_pts->blue[i] = regamma->b;
1741 /* this should be named differently, all it does is clamp to 0-1 */
1742 build_new_custom_resulted_curve(hw_points_num, tf_pts);
1748 #define _EXTRA_POINTS 3
1750 bool calculate_user_regamma_coeff(struct dc_transfer_func *output_tf,
1751 const struct regamma_lut *regamma,
1752 struct calculate_buffer *cal_buffer,
1753 const struct dc_gamma *ramp)
1755 struct gamma_coefficients coeff;
1756 const struct hw_x_point *coord_x = coordinates_x;
1760 coeff.a0[i] = dc_fixpt_from_fraction(
1761 regamma->coeff.A0[i], 10000000);
1762 coeff.a1[i] = dc_fixpt_from_fraction(
1763 regamma->coeff.A1[i], 1000);
1764 coeff.a2[i] = dc_fixpt_from_fraction(
1765 regamma->coeff.A2[i], 1000);
1766 coeff.a3[i] = dc_fixpt_from_fraction(
1767 regamma->coeff.A3[i], 1000);
1768 coeff.user_gamma[i] = dc_fixpt_from_fraction(
1769 regamma->coeff.gamma[i], 1000);
1775 /* fixed_pt library has problems handling too small values */
1777 output_tf->tf_pts.red[i] = dc_fixpt_zero;
1778 output_tf->tf_pts.green[i] = dc_fixpt_zero;
1779 output_tf->tf_pts.blue[i] = dc_fixpt_zero;
1783 while (i != MAX_HW_POINTS + 1) {
1784 output_tf->tf_pts.red[i] = translate_from_linear_space_ex(
1785 coord_x->x, &coeff, 0, cal_buffer);
1786 output_tf->tf_pts.green[i] = translate_from_linear_space_ex(
1787 coord_x->x, &coeff, 1, cal_buffer);
1788 output_tf->tf_pts.blue[i] = translate_from_linear_space_ex(
1789 coord_x->x, &coeff, 2, cal_buffer);
1794 if (ramp && ramp->type == GAMMA_CS_TFM_1D)
1795 apply_lut_1d(ramp, MAX_HW_POINTS, &output_tf->tf_pts);
1797 // this function just clamps output to 0-1
1798 build_new_custom_resulted_curve(MAX_HW_POINTS, &output_tf->tf_pts);
1799 output_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
1804 bool calculate_user_regamma_ramp(struct dc_transfer_func *output_tf,
1805 const struct regamma_lut *regamma,
1806 struct calculate_buffer *cal_buffer,
1807 const struct dc_gamma *ramp)
1809 struct dc_transfer_func_distributed_points *tf_pts = &output_tf->tf_pts;
1810 struct dividers dividers;
1812 struct pwl_float_data *rgb_user = NULL;
1813 struct pwl_float_data_ex *rgb_regamma = NULL;
1816 if (regamma == NULL)
1819 output_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
1821 rgb_user = kcalloc(GAMMA_RGB_256_ENTRIES + _EXTRA_POINTS,
1825 goto rgb_user_alloc_fail;
1827 rgb_regamma = kcalloc(MAX_HW_POINTS + _EXTRA_POINTS,
1828 sizeof(*rgb_regamma),
1831 goto rgb_regamma_alloc_fail;
1833 dividers.divider1 = dc_fixpt_from_fraction(3, 2);
1834 dividers.divider2 = dc_fixpt_from_int(2);
1835 dividers.divider3 = dc_fixpt_from_fraction(5, 2);
1837 scale_user_regamma_ramp(rgb_user, ®amma->ramp, dividers);
1839 if (regamma->flags.bits.applyDegamma == 1) {
1840 apply_degamma_for_user_regamma(rgb_regamma, MAX_HW_POINTS, cal_buffer);
1841 copy_rgb_regamma_to_coordinates_x(coordinates_x,
1842 MAX_HW_POINTS, rgb_regamma);
1845 interpolate_user_regamma(MAX_HW_POINTS, rgb_user,
1846 regamma->flags.bits.applyDegamma, tf_pts);
1848 // no custom HDR curves!
1849 tf_pts->end_exponent = 0;
1850 tf_pts->x_point_at_y1_red = 1;
1851 tf_pts->x_point_at_y1_green = 1;
1852 tf_pts->x_point_at_y1_blue = 1;
1854 if (ramp && ramp->type == GAMMA_CS_TFM_1D)
1855 apply_lut_1d(ramp, MAX_HW_POINTS, &output_tf->tf_pts);
1857 // this function just clamps output to 0-1
1858 build_new_custom_resulted_curve(MAX_HW_POINTS, tf_pts);
1863 rgb_regamma_alloc_fail:
1865 rgb_user_alloc_fail:
1869 bool mod_color_calculate_degamma_params(struct dc_color_caps *dc_caps,
1870 struct dc_transfer_func *input_tf,
1871 const struct dc_gamma *ramp, bool mapUserRamp)
1873 struct dc_transfer_func_distributed_points *tf_pts = &input_tf->tf_pts;
1874 struct dividers dividers;
1875 struct pwl_float_data *rgb_user = NULL;
1876 struct pwl_float_data_ex *curve = NULL;
1877 struct gamma_pixel *axis_x = NULL;
1878 struct pixel_gamma_point *coeff = NULL;
1879 enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB;
1883 if (input_tf->type == TF_TYPE_BYPASS)
1886 /* we can use hardcoded curve for plain SRGB TF
1887 * If linear, it's bypass if on user ramp
1889 if (input_tf->type == TF_TYPE_PREDEFINED) {
1890 if ((input_tf->tf == TRANSFER_FUNCTION_SRGB ||
1891 input_tf->tf == TRANSFER_FUNCTION_LINEAR) &&
1895 if (dc_caps != NULL &&
1896 dc_caps->dpp.dcn_arch == 1) {
1898 if (input_tf->tf == TRANSFER_FUNCTION_PQ &&
1899 dc_caps->dpp.dgam_rom_caps.pq == 1)
1902 if (input_tf->tf == TRANSFER_FUNCTION_GAMMA22 &&
1903 dc_caps->dpp.dgam_rom_caps.gamma2_2 == 1)
1906 // HLG OOTF not accounted for
1907 if (input_tf->tf == TRANSFER_FUNCTION_HLG &&
1908 dc_caps->dpp.dgam_rom_caps.hlg == 1)
1913 input_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
1915 if (mapUserRamp && ramp && ramp->type == GAMMA_RGB_256) {
1916 rgb_user = kvcalloc(ramp->num_entries + _EXTRA_POINTS,
1920 goto rgb_user_alloc_fail;
1922 axis_x = kvcalloc(ramp->num_entries + _EXTRA_POINTS, sizeof(*axis_x),
1925 goto axis_x_alloc_fail;
1927 dividers.divider1 = dc_fixpt_from_fraction(3, 2);
1928 dividers.divider2 = dc_fixpt_from_int(2);
1929 dividers.divider3 = dc_fixpt_from_fraction(5, 2);
1931 build_evenly_distributed_points(
1936 scale_gamma(rgb_user, ramp, dividers);
1939 curve = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS, sizeof(*curve),
1942 goto curve_alloc_fail;
1944 coeff = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS, sizeof(*coeff),
1947 goto coeff_alloc_fail;
1951 if (tf == TRANSFER_FUNCTION_PQ)
1955 else if (tf == TRANSFER_FUNCTION_SRGB ||
1956 tf == TRANSFER_FUNCTION_BT709 ||
1957 tf == TRANSFER_FUNCTION_GAMMA22 ||
1958 tf == TRANSFER_FUNCTION_GAMMA24 ||
1959 tf == TRANSFER_FUNCTION_GAMMA26)
1960 build_degamma(curve,
1964 else if (tf == TRANSFER_FUNCTION_HLG)
1965 build_hlg_degamma(curve,
1969 else if (tf == TRANSFER_FUNCTION_LINEAR) {
1970 // just copy coordinates_x into curve
1972 while (i != MAX_HW_POINTS + 1) {
1973 curve[i].r = coordinates_x[i].x;
1974 curve[i].g = curve[i].r;
1975 curve[i].b = curve[i].r;
1979 goto invalid_tf_fail;
1981 tf_pts->end_exponent = 0;
1982 tf_pts->x_point_at_y1_red = 1;
1983 tf_pts->x_point_at_y1_green = 1;
1984 tf_pts->x_point_at_y1_blue = 1;
1986 if (input_tf->tf == TRANSFER_FUNCTION_PQ) {
1987 /* just copy current rgb_regamma into tf_pts */
1988 struct pwl_float_data_ex *curvePt = curve;
1991 while (i <= MAX_HW_POINTS) {
1992 tf_pts->red[i] = curvePt->r;
1993 tf_pts->green[i] = curvePt->g;
1994 tf_pts->blue[i] = curvePt->b;
2000 map_regamma_hw_to_x_user(ramp, coeff, rgb_user,
2001 coordinates_x, axis_x, curve,
2002 MAX_HW_POINTS, tf_pts,
2003 mapUserRamp && ramp && ramp->type == GAMMA_RGB_256,
2009 if (ramp && ramp->type == GAMMA_CUSTOM)
2010 apply_lut_1d(ramp, MAX_HW_POINTS, tf_pts);
2022 rgb_user_alloc_fail:
2027 static bool calculate_curve(enum dc_transfer_func_predefined trans,
2028 struct dc_transfer_func_distributed_points *points,
2029 struct pwl_float_data_ex *rgb_regamma,
2030 const struct hdr_tm_params *fs_params,
2031 uint32_t sdr_ref_white_level,
2032 struct calculate_buffer *cal_buffer)
2037 if (trans == TRANSFER_FUNCTION_UNITY ||
2038 trans == TRANSFER_FUNCTION_LINEAR) {
2039 points->end_exponent = 0;
2040 points->x_point_at_y1_red = 1;
2041 points->x_point_at_y1_green = 1;
2042 points->x_point_at_y1_blue = 1;
2044 for (i = 0; i <= MAX_HW_POINTS ; i++) {
2045 rgb_regamma[i].r = coordinates_x[i].x;
2046 rgb_regamma[i].g = coordinates_x[i].x;
2047 rgb_regamma[i].b = coordinates_x[i].x;
2051 } else if (trans == TRANSFER_FUNCTION_PQ) {
2052 points->end_exponent = 7;
2053 points->x_point_at_y1_red = 125;
2054 points->x_point_at_y1_green = 125;
2055 points->x_point_at_y1_blue = 125;
2057 build_pq(rgb_regamma,
2060 sdr_ref_white_level);
2063 } else if (trans == TRANSFER_FUNCTION_GAMMA22 &&
2064 fs_params != NULL && fs_params->skip_tm == 0) {
2065 build_freesync_hdr(rgb_regamma,
2072 } else if (trans == TRANSFER_FUNCTION_HLG) {
2073 points->end_exponent = 4;
2074 points->x_point_at_y1_red = 12;
2075 points->x_point_at_y1_green = 12;
2076 points->x_point_at_y1_blue = 12;
2078 build_hlg_regamma(rgb_regamma,
2085 // trans == TRANSFER_FUNCTION_SRGB
2086 // trans == TRANSFER_FUNCTION_BT709
2087 // trans == TRANSFER_FUNCTION_GAMMA22
2088 // trans == TRANSFER_FUNCTION_GAMMA24
2089 // trans == TRANSFER_FUNCTION_GAMMA26
2090 points->end_exponent = 0;
2091 points->x_point_at_y1_red = 1;
2092 points->x_point_at_y1_green = 1;
2093 points->x_point_at_y1_blue = 1;
2095 build_regamma(rgb_regamma,
2107 bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf,
2108 const struct dc_gamma *ramp, bool mapUserRamp, bool canRomBeUsed,
2109 const struct hdr_tm_params *fs_params,
2110 struct calculate_buffer *cal_buffer)
2112 struct dc_transfer_func_distributed_points *tf_pts = &output_tf->tf_pts;
2113 struct dividers dividers;
2115 struct pwl_float_data *rgb_user = NULL;
2116 struct pwl_float_data_ex *rgb_regamma = NULL;
2117 struct gamma_pixel *axis_x = NULL;
2118 struct pixel_gamma_point *coeff = NULL;
2119 enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB;
2120 bool doClamping = true;
2123 if (output_tf->type == TF_TYPE_BYPASS)
2126 /* we can use hardcoded curve for plain SRGB TF */
2127 if (output_tf->type == TF_TYPE_PREDEFINED && canRomBeUsed == true &&
2128 output_tf->tf == TRANSFER_FUNCTION_SRGB) {
2131 if ((ramp->is_identity && ramp->type != GAMMA_CS_TFM_1D) ||
2132 (!mapUserRamp && ramp->type == GAMMA_RGB_256))
2136 output_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
2138 if (ramp && ramp->type != GAMMA_CS_TFM_1D &&
2139 (mapUserRamp || ramp->type != GAMMA_RGB_256)) {
2140 rgb_user = kvcalloc(ramp->num_entries + _EXTRA_POINTS,
2144 goto rgb_user_alloc_fail;
2146 axis_x = kvcalloc(ramp->num_entries + 3, sizeof(*axis_x),
2149 goto axis_x_alloc_fail;
2151 dividers.divider1 = dc_fixpt_from_fraction(3, 2);
2152 dividers.divider2 = dc_fixpt_from_int(2);
2153 dividers.divider3 = dc_fixpt_from_fraction(5, 2);
2155 build_evenly_distributed_points(
2160 if (ramp->type == GAMMA_RGB_256 && mapUserRamp)
2161 scale_gamma(rgb_user, ramp, dividers);
2162 else if (ramp->type == GAMMA_RGB_FLOAT_1024)
2163 scale_gamma_dx(rgb_user, ramp, dividers);
2166 rgb_regamma = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS,
2167 sizeof(*rgb_regamma),
2170 goto rgb_regamma_alloc_fail;
2172 coeff = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS, sizeof(*coeff),
2175 goto coeff_alloc_fail;
2179 ret = calculate_curve(tf,
2183 output_tf->sdr_ref_white_level,
2187 doClamping = !(output_tf->tf == TRANSFER_FUNCTION_GAMMA22 &&
2188 fs_params != NULL && fs_params->skip_tm == 0);
2190 map_regamma_hw_to_x_user(ramp, coeff, rgb_user,
2191 coordinates_x, axis_x, rgb_regamma,
2192 MAX_HW_POINTS, tf_pts,
2193 (mapUserRamp || (ramp && ramp->type != GAMMA_RGB_256)) &&
2194 (ramp && ramp->type != GAMMA_CS_TFM_1D),
2197 if (ramp && ramp->type == GAMMA_CS_TFM_1D)
2198 apply_lut_1d(ramp, MAX_HW_POINTS, tf_pts);
2203 kvfree(rgb_regamma);
2204 rgb_regamma_alloc_fail:
2208 rgb_user_alloc_fail:
2212 bool mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans,
2213 struct dc_transfer_func_distributed_points *points)
2217 struct pwl_float_data_ex *rgb_degamma = NULL;
2219 if (trans == TRANSFER_FUNCTION_UNITY ||
2220 trans == TRANSFER_FUNCTION_LINEAR) {
2222 for (i = 0; i <= MAX_HW_POINTS ; i++) {
2223 points->red[i] = coordinates_x[i].x;
2224 points->green[i] = coordinates_x[i].x;
2225 points->blue[i] = coordinates_x[i].x;
2228 } else if (trans == TRANSFER_FUNCTION_PQ) {
2229 rgb_degamma = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS,
2230 sizeof(*rgb_degamma),
2233 goto rgb_degamma_alloc_fail;
2236 build_de_pq(rgb_degamma,
2239 for (i = 0; i <= MAX_HW_POINTS ; i++) {
2240 points->red[i] = rgb_degamma[i].r;
2241 points->green[i] = rgb_degamma[i].g;
2242 points->blue[i] = rgb_degamma[i].b;
2246 kvfree(rgb_degamma);
2247 } else if (trans == TRANSFER_FUNCTION_SRGB ||
2248 trans == TRANSFER_FUNCTION_BT709 ||
2249 trans == TRANSFER_FUNCTION_GAMMA22 ||
2250 trans == TRANSFER_FUNCTION_GAMMA24 ||
2251 trans == TRANSFER_FUNCTION_GAMMA26) {
2252 rgb_degamma = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS,
2253 sizeof(*rgb_degamma),
2256 goto rgb_degamma_alloc_fail;
2258 build_degamma(rgb_degamma,
2262 for (i = 0; i <= MAX_HW_POINTS ; i++) {
2263 points->red[i] = rgb_degamma[i].r;
2264 points->green[i] = rgb_degamma[i].g;
2265 points->blue[i] = rgb_degamma[i].b;
2269 kvfree(rgb_degamma);
2270 } else if (trans == TRANSFER_FUNCTION_HLG) {
2271 rgb_degamma = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS,
2272 sizeof(*rgb_degamma),
2275 goto rgb_degamma_alloc_fail;
2277 build_hlg_degamma(rgb_degamma,
2281 for (i = 0; i <= MAX_HW_POINTS ; i++) {
2282 points->red[i] = rgb_degamma[i].r;
2283 points->green[i] = rgb_degamma[i].g;
2284 points->blue[i] = rgb_degamma[i].b;
2287 kvfree(rgb_degamma);
2289 points->end_exponent = 0;
2290 points->x_point_at_y1_red = 1;
2291 points->x_point_at_y1_green = 1;
2292 points->x_point_at_y1_blue = 1;
2294 rgb_degamma_alloc_fail: