2 * Support for Intel Camera Imaging ISP subsystem.
3 * Copyright (c) 2015, Intel Corporation.
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 #ifndef _REF_VECTOR_FUNC_H_INCLUDED_
16 #define _REF_VECTOR_FUNC_H_INCLUDED_
19 #ifdef INLINE_VECTOR_FUNC
20 #define STORAGE_CLASS_REF_VECTOR_FUNC_H static inline
21 #define STORAGE_CLASS_REF_VECTOR_DATA_H static inline_DATA
22 #else /* INLINE_VECTOR_FUNC */
23 #define STORAGE_CLASS_REF_VECTOR_FUNC_H extern
24 #define STORAGE_CLASS_REF_VECTOR_DATA_H extern_DATA
25 #endif /* INLINE_VECTOR_FUNC */
28 #include "ref_vector_func_types.h"
30 /* @brief Doubling multiply accumulate with saturation
32 * @param[in] acc accumulator
33 * @param[in] a multiply input
34 * @param[in] b multiply input
38 * This function will do a doubling multiply ont
39 * inputs a and b, and will add the result to acc.
40 * in case of an overflow of acc, it will saturate.
42 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector2w OP_1w_maccd_sat(
47 /* @brief Doubling multiply accumulate
49 * @param[in] acc accumulator
50 * @param[in] a multiply input
51 * @param[in] b multiply input
55 * This function will do a doubling multiply ont
56 * inputs a and b, and will add the result to acc.
57 * in case of overflow it will not saturate but wrap around.
59 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector2w OP_1w_maccd(
64 /* @brief Re-aligning multiply
66 * @param[in] a multiply input
67 * @param[in] b multiply input
68 * @param[in] shift shift amount
70 * @return (a*b)>>shift
72 * This function will multiply a with b, followed by a right
73 * shift with rounding. the result is saturated and casted
74 * to single precision.
76 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_mul_realigning(
81 /* @brief Leading bit index
85 * @return index of the leading bit of each element
87 * This function finds the index of leading one (set) bit of the
88 * input. The index starts with 0 for the LSB and can go upto
89 * ISP_VEC_ELEMBITS-1 for the MSB. For an input equal to zero,
90 * the returned index is -1.
92 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_lod(
95 /* @brief Config Unit Input Processing
98 * @param[in] input_scale input scaling factor
99 * @param[in] input_offset input offset factor
101 * @return scaled & offset added input clamped to MAXVALUE
103 * As part of input processing for piecewise linear estimation config unit,
104 * this function will perform scaling followed by adding offset and
105 * then clamping to the MAX InputValue
106 * It asserts -MAX_SHIFT_1W <= input_scale <= MAX_SHIFT_1W, and
107 * -MAX_SHIFT_1W <= input_offset <= MAX_SHIFT_1W
109 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_input_scaling_offset_clamping(
111 tscalar1w_5bit_signed input_scale,
112 tscalar1w_5bit_signed input_offset);
114 /* @brief Config Unit Output Processing
116 * @param[in] a output
117 * @param[in] output_scale output scaling factor
119 * @return scaled & clamped output value
121 * As part of output processing for piecewise linear estimation config unit,
122 * This function will perform scaling and then clamping to output
124 * It asserts -MAX_SHIFT_1W <= output_scale <= MAX_SHIFT_1W
126 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_output_scaling_clamping(
128 tscalar1w_5bit_signed output_scale);
130 /* @brief Config Unit Piecewiselinear estimation
133 * @param[in] config_points config parameter structure
135 * @return piecewise linear estimated output
137 * Given a set of N points {(x1,y1),()x2,y2), ....,(xn,yn)}, to find
138 * the functional value at an arbitrary point around the input set,
139 * this function will perform input processing followed by piecewise
140 * linear estimation and then output processing to yield the final value.
142 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_piecewise_estimation(
144 ref_config_points config_points);
146 /* @brief Fast Config Unit
149 * @param[in] init_vectors LUT data structure
151 * @return piecewise linear estimated output
152 * This block gets an input x and a set of input configuration points stored in a look-up
153 * table of 32 elements. First, the x input is clipped to be within the range [x1, xn+1].
154 * Then, it computes the interval in which the input lies. Finally, the output is computed
155 * by performing linear interpolation based on the interval properties (i.e. x_prev, slope,
156 * and offset). This block assumes that the points are equally spaced and that the interval
157 * size is a power of 2.
159 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_XCU(
161 xcu_ref_init_vectors init_vectors);
167 * @param[in] init_vectors LUT data structure
169 * @return logarithmic piecewise linear estimated output.
170 * This block gets an input x and a set of input configuration points stored in a look-up
171 * table of 32 elements. It computes the interval in which the input lies.
172 * Then output is computed by performing linear interpolation based on the interval
173 * properties (i.e. x_prev, slope, * and offset).
174 * This BBB assumes spacing x-coordinates of "init vectors" increase exponentially as
176 * interval size : 2^0 2^1 2^2 2^3
177 * x-coordinates: x0<--->x1<---->x2<---->x3<---->
179 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_LXCU(
181 xcu_ref_init_vectors init_vectors);
185 * @param[in] coring_vec Amount of coring based on brightness level
186 * @param[in] filt_input Vector of input pixels on which Coring is applied
187 * @param[in] m_CnrCoring0 Coring Level0
189 * @return vector of filtered pixels after coring is applied
191 * This function will perform adaptive coring based on brightness level to
194 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w coring(
195 tvector1w coring_vec,
196 tvector1w filt_input,
197 tscalar1w m_CnrCoring0 );
199 /* @brief Normalised FIR with coefficients [3,4,1]
201 * @param[in] m 1x3 matrix with pixels
203 * @return filtered output
205 * This function will calculate the
206 * Normalised FIR with coefficients [3,4,1],
207 *-5dB at Fs/2, -90 degree phase shift (quarter pixel)
209 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_5dB_m90_nrm (
210 const s_1w_1x3_matrix m);
212 /* @brief Normalised FIR with coefficients [1,4,3]
214 * @param[in] m 1x3 matrix with pixels
216 * @return filtered output
218 * This function will calculate the
219 * Normalised FIR with coefficients [1,4,3],
220 *-5dB at Fs/2, +90 degree phase shift (quarter pixel)
222 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_5dB_p90_nrm (
223 const s_1w_1x3_matrix m);
225 /* @brief Normalised FIR with coefficients [1,2,1]
227 * @param[in] m 1x3 matrix with pixels
229 * @return filtered output
231 * This function will calculate the
232 * Normalised FIR with coefficients [1,2,1], -6dB at Fs/2
234 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_6dB_nrm (
235 const s_1w_1x3_matrix m);
237 /* @brief Normalised FIR with coefficients [13,16,3]
239 * @param[in] m 1x3 matrix with pixels
241 * @return filtered output
243 * This function will calculate the
244 * Normalised FIR with coefficients [13,16,3],
246 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_6dB_nrm_ph0 (
247 const s_1w_1x3_matrix m);
249 /* @brief Normalised FIR with coefficients [9,16,7]
251 * @param[in] m 1x3 matrix with pixels
253 * @return filtered output
255 * This function will calculate the
256 * Normalised FIR with coefficients [9,16,7],
258 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_6dB_nrm_ph1 (
259 const s_1w_1x3_matrix m);
261 /* @brief Normalised FIR with coefficients [5,16,11]
263 * @param[in] m 1x3 matrix with pixels
265 * @return filtered output
267 * This function will calculate the
268 * Normalised FIR with coefficients [5,16,11],
270 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_6dB_nrm_ph2 (
271 const s_1w_1x3_matrix m);
273 /* @brief Normalised FIR with coefficients [1,16,15]
275 * @param[in] m 1x3 matrix with pixels
277 * @return filtered output
279 * This function will calculate the
280 * Normalised FIR with coefficients [1,16,15],
282 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_6dB_nrm_ph3 (
283 const s_1w_1x3_matrix m);
285 /* @brief Normalised FIR with programable phase shift
287 * @param[in] m 1x3 matrix with pixels
288 * @param[in] coeff phase shift
290 * @return filtered output
292 * This function will calculate the
293 * Normalised FIR with coefficients [8-coeff,16,8+coeff],
295 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_6dB_nrm_calc_coeff (
296 const s_1w_1x3_matrix m, tscalar1w_3bit coeff);
298 /* @brief 3 tap FIR with coefficients [1,1,1]
300 * @param[in] m 1x3 matrix with pixels
302 * @return filtered output
304 * This function will calculate the
305 * FIR with coefficients [1,1,1], -9dB at Fs/2 normalized with factor 1/2
307 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x3m_9dB_nrm (
308 const s_1w_1x3_matrix m);
311 /* @brief symmetric 3 tap FIR acts as LPF or BSF
313 * @param[in] m 1x3 matrix with pixels
314 * @param[in] k filter coefficient shift
315 * @param[in] bsf_flag 1 for BSF and 0 for LPF
317 * @return filtered output
319 * This function performs variable coefficient symmetric 3 tap filter which can
320 * be either used as Low Pass Filter or Band Stop Filter.
321 * Symmetric 3tap tap filter with DC gain 1 has filter coefficients [a, 1-2a, a]
322 * For LPF 'a' can be approximated as (1 - 2^(-k))/4, k = 0, 1, 2, ...
323 * and filter output can be approximated as:
324 * out_LPF = ((v00 + v02) - ((v00 + v02) >> k) + (2 * (v01 + (v01 >> k)))) >> 2
325 * For BSF 'a' can be approximated as (1 + 2^(-k))/4, k = 0, 1, 2, ...
326 * and filter output can be approximated as:
327 * out_BSF = ((v00 + v02) + ((v00 + v02) >> k) + (2 * (v01 - (v01 >> k)))) >> 2
328 * For a given filter coefficient shift 'k' and bsf_flag this function
329 * behaves either as LPF or BSF.
330 * All computation is done using 1w arithmetic and implementation does not use
331 * any multiplication.
333 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w
334 sym_fir1x3m_lpf_bsf(s_1w_1x3_matrix m,
336 tscalar_bool bsf_flag);
339 /* @brief Normalised 2D FIR with coefficients [1;2;1] * [1,2,1]
341 * @param[in] m 3x3 matrix with pixels
343 * @return filtered output
345 * This function will calculate the
346 * Normalised FIR with coefficients [1;2;1] * [1,2,1]
347 * Unity gain filter through repeated scaling and rounding
348 * - 6 rotate operations per output
349 * - 8 vector operations per output
351 * 14 total operations
353 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir3x3m_6dB_nrm (
354 const s_1w_3x3_matrix m);
356 /* @brief Normalised 2D FIR with coefficients [1;1;1] * [1,1,1]
358 * @param[in] m 3x3 matrix with pixels
360 * @return filtered output
362 * This function will calculate the
363 * Normalised FIR with coefficients [1;1;1] * [1,1,1]
365 * (near) Unity gain filter through repeated scaling and rounding
366 * - 6 rotate operations per output
367 * - 8 vector operations per output
371 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir3x3m_9dB_nrm (
372 const s_1w_3x3_matrix m);
374 /* @brief Normalised dual output 2D FIR with coefficients [1;2;1] * [1,2,1]
376 * @param[in] m 4x3 matrix with pixels
378 * @return two filtered outputs (2x1 matrix)
380 * This function will calculate the
381 * Normalised FIR with coefficients [1;2;1] * [1,2,1]
382 * and produce two outputs (vertical)
383 * Unity gain filter through repeated scaling and rounding
384 * compute two outputs per call to re-use common intermediates
385 * - 4 rotate operations per output
386 * - 6 vector operations per output (alternative possible, but in this
387 * form it's not obvious to re-use variables)
389 * 10 total operations
391 STORAGE_CLASS_REF_VECTOR_FUNC_H s_1w_2x1_matrix fir3x3m_6dB_out2x1_nrm (
392 const s_1w_4x3_matrix m);
394 /* @brief Normalised dual output 2D FIR with coefficients [1;1;1] * [1,1,1]
396 * @param[in] m 4x3 matrix with pixels
398 * @return two filtered outputs (2x1 matrix)
400 * This function will calculate the
401 * Normalised FIR with coefficients [1;1;1] * [1,1,1]
402 * and produce two outputs (vertical)
403 * (near) Unity gain filter through repeated scaling and rounding
404 * compute two outputs per call to re-use common intermediates
405 * - 4 rotate operations per output
406 * - 7 vector operations per output (alternative possible, but in this
407 * form it's not obvious to re-use variables)
409 * 11 total operations
411 STORAGE_CLASS_REF_VECTOR_FUNC_H s_1w_2x1_matrix fir3x3m_9dB_out2x1_nrm (
412 const s_1w_4x3_matrix m);
414 /* @brief Normalised 2D FIR 5x5
416 * @param[in] m 5x5 matrix with pixels
418 * @return filtered output
420 * This function will calculate the
421 * Normalised FIR with coefficients [1;1;1] * [1;2;1] * [1,2,1] * [1,1,1]
422 * and produce a filtered output
423 * (near) Unity gain filter through repeated scaling and rounding
424 * - 20 rotate operations per output
425 * - 28 vector operations per output
427 * 48 total operations
429 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir5x5m_15dB_nrm (
430 const s_1w_5x5_matrix m);
432 /* @brief Normalised FIR 1x5
434 * @param[in] m 1x5 matrix with pixels
436 * @return filtered output
438 * This function will calculate the
439 * Normalised FIR with coefficients [1,2,1] * [1,1,1] = [1,4,6,4,1]
440 * and produce a filtered output
441 * (near) Unity gain filter through repeated scaling and rounding
442 * - 4 rotate operations per output
443 * - 5 vector operations per output
447 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x5m_12dB_nrm (
448 const s_1w_1x5_matrix m);
450 /* @brief Normalised 2D FIR 5x5
452 * @param[in] m 5x5 matrix with pixels
454 * @return filtered output
456 * This function will calculate the
457 * Normalised FIR with coefficients [1;2;1] * [1;2;1] * [1,2,1] * [1,2,1]
458 * and produce a filtered output
459 * (near) Unity gain filter through repeated scaling and rounding
460 * - 20 rotate operations per output
461 * - 30 vector operations per output
463 * 50 total operations
465 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir5x5m_12dB_nrm (
466 const s_1w_5x5_matrix m);
468 /* @brief Approximate averaging FIR 1x5
470 * @param[in] m 1x5 matrix with pixels
472 * @return filtered output
474 * This function will produce filtered output by
475 * applying the filter coefficients (1/8) * [1,1,1,1,1]
477 * 5 vector operations
479 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x5m_box (
482 /* @brief Approximate averaging FIR 1x9
484 * @param[in] m 1x9 matrix with pixels
486 * @return filtered output
488 * This function will produce filtered output by
489 * applying the filter coefficients (1/16) * [1,1,1,1,1,1,1,1,1]
491 * 9 vector operations
493 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x9m_box (
496 /* @brief Approximate averaging FIR 1x11
498 * @param[in] m 1x11 matrix with pixels
500 * @return filtered output
502 * This function will produce filtered output by
503 * applying the filter coefficients (1/16) * [1,1,1,1,1,1,1,1,1,1,1]
505 * 12 vector operations
507 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w fir1x11m_box (
510 /* @brief Symmetric 7 tap filter with normalization
512 * @param[in] in 1x7 matrix with pixels
513 * @param[in] coeff 1x4 matrix with coefficients
514 * @param[in] out_shift output pixel shift value for normalization
516 * @return symmetric 7 tap filter output
518 * This function performs symmetric 7 tap filter over input pixels.
519 * Filter sum is normalized by shifting out_shift bits.
520 * Filter sum: p0*c3 + p1*c2 + p2*c1 + p3*c0 + p4*c1 + p5*c2 + p6*c3
521 * is implemented as: (p0 + p6)*c3 + (p1 + p5)*c2 + (p2 + p4)*c1 + p3*c0 to
522 * reduce multiplication.
523 * Input pixels should to be scaled, otherwise overflow is possible during
526 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w
527 fir1x7m_sym_nrm(s_1w_1x7_matrix in,
528 s_1w_1x4_matrix coeff,
529 tvector1w out_shift);
531 /* @brief Symmetric 7 tap filter with normalization at input side
533 * @param[in] in 1x7 matrix with pixels
534 * @param[in] coeff 1x4 matrix with coefficients
536 * @return symmetric 7 tap filter output
538 * This function performs symmetric 7 tap filter over input pixels.
539 * Filter sum: p0*c3 + p1*c2 + p2*c1 + p3*c0 + p4*c1 + p5*c2 + p6*c3
540 * = (p0 + p6)*c3 + (p1 + p5)*c2 + (p2 + p4)*c1 + p3*c0
541 * Input pixels and coefficients are in Qn format, where n =
542 * ISP_VEC_ELEMBITS - 1 (ie Q15 for Broxton)
543 * To avoid double precision arithmetic input pixel sum and final sum is
544 * implemented using avgrnd and coefficient multiplication using qrmul.
545 * Final result is in Qm format where m = ISP_VEC_ELEMBITS - 2 (ie Q14 for
548 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w
549 fir1x7m_sym_innrm_approx(s_1w_1x7_matrix in,
550 s_1w_1x4_matrix coeff);
552 /* @brief Symmetric 7 tap filter with normalization at output side
554 * @param[in] in 1x7 matrix with pixels
555 * @param[in] coeff 1x4 matrix with coefficients
557 * @return symmetric 7 tap filter output
559 * This function performs symmetric 7 tap filter over input pixels.
560 * Filter sum: p0*c3 + p1*c2 + p2*c1 + p3*c0 + p4*c1 + p5*c2 + p6*c3
561 * = (p0 + p6)*c3 + (p1 + p5)*c2 + (p2 + p4)*c1 + p3*c0
562 * Input pixels are in Qn and coefficients are in Qm format, where n =
563 * ISP_VEC_ELEMBITS - 2 and m = ISP_VEC_ELEMBITS - 1 (ie Q14 and Q15
564 * respectively for Broxton)
565 * To avoid double precision arithmetic input pixel sum and final sum is
566 * implemented using addsat and coefficient multiplication using qrmul.
567 * Final sum is left shifted by 2 and saturated to produce result is Qm format
568 * (ie Q15 for Broxton)
570 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w
571 fir1x7m_sym_outnrm_approx(s_1w_1x7_matrix in,
572 s_1w_1x4_matrix coeff);
574 /* @brief 4 tap filter with normalization
576 * @param[in] in 1x4 matrix with pixels
577 * @param[in] coeff 1x4 matrix with coefficients
578 * @param[in] out_shift output pixel shift value for normalization
580 * @return 4 tap filter output
582 * This function performs 4 tap filter over input pixels.
583 * Filter sum is normalized by shifting out_shift bits.
584 * Filter sum: p0*c0 + p1*c1 + p2*c2 + p3*c3
586 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w
587 fir1x4m_nrm(s_1w_1x4_matrix in,
588 s_1w_1x4_matrix coeff,
589 tvector1w out_shift);
591 /* @brief 4 tap filter with normalization for half pixel interpolation
593 * @param[in] in 1x4 matrix with pixels
595 * @return 4 tap filter output with filter tap [-1 9 9 -1]/16
597 * This function performs 4 tap filter over input pixels.
598 * Filter sum: -p0 + 9*p1 + 9*p2 - p3
599 * This filter implementation is completely free from multiplication and double
600 * precision arithmetic.
601 * Typical usage of this filter is to half pixel interpolation of Bezier
604 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w
605 fir1x4m_bicubic_bezier_half(s_1w_1x4_matrix in);
607 /* @brief 4 tap filter with normalization for quarter pixel interpolation
609 * @param[in] in 1x4 matrix with pixels
610 * @param[in] coeff 1x4 matrix with coefficients
612 * @return 4 tap filter output
614 * This function performs 4 tap filter over input pixels.
615 * Filter sum: p0*c0 + p1*c1 + p2*c2 + p3*c3
616 * To avoid double precision arithmetic we implemented multiplication using
617 * qrmul and addition using avgrnd. Coefficients( c0 to c3) formats are assumed
618 * to be: Qm, Qn, Qo, Qm, where m = n + 2 and o = n + 1.
619 * Typical usage of this filter is to quarter pixel interpolation of Bezier
620 * surface with filter coefficients:[-9 111 29 -3]/128. For which coefficient
621 * values should be: [-9216/2^17 28416/2^15 1484/2^16 -3072/2^17] for
622 * ISP_VEC_ELEMBITS = 16.
624 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w
625 fir1x4m_bicubic_bezier_quarter(s_1w_1x4_matrix in,
626 s_1w_1x4_matrix coeff);
629 /* @brief Symmetric 3 tap filter with normalization
631 * @param[in] in 1x3 matrix with pixels
632 * @param[in] coeff 1x2 matrix with coefficients
633 * @param[in] out_shift output pixel shift value for normalization
635 * @return symmetric 3 tap filter output
637 * This function performs symmetric 3 tap filter input pixels.
638 * Filter sum is normalized by shifting out_shift bits.
639 * Filter sum: p0*c1 + p1*c0 + p2*c1
640 * is implemented as: (p0 + p2)*c1 + p1*c0 to reduce multiplication.
641 * Input pixels should to be scaled, otherwise overflow is possible during
644 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w
645 fir1x3m_sym_nrm(s_1w_1x3_matrix in,
646 s_1w_1x2_matrix coeff,
647 tvector1w out_shift);
649 /* @brief Symmetric 3 tap filter with normalization
651 * @param[in] in 1x3 matrix with pixels
652 * @param[in] coeff 1x2 matrix with coefficients
654 * @return symmetric 3 tap filter output
656 * This function performs symmetric 3 tap filter over input pixels.
657 * Filter sum: p0*c1 + p1*c0 + p2*c1 = (p0 + p2)*c1 + p1*c0
658 * Input pixels are in Qn and coefficient c0 is in Qm and c1 is in Qn format,
659 * where n = ISP_VEC_ELEMBITS - 1 and m = ISP_VEC_ELEMBITS - 2 ( ie Q15 and Q14
660 * respectively for Broxton)
661 * To avoid double precision arithmetic input pixel sum is implemented using
662 * avgrnd, coefficient multiplication using qrmul and final sum using addsat
663 * Final sum is Qm format (ie Q14 for Broxton)
665 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w
666 fir1x3m_sym_nrm_approx(s_1w_1x3_matrix in,
667 s_1w_1x2_matrix coeff);
669 /* @brief Mean of 1x3 matrix
671 * @param[in] m 1x3 matrix with pixels
673 * @return mean of 1x3 matrix
675 * This function calculates the mean of 1x3 pixels,
676 * with a factor of 4/3.
678 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean1x3m(
681 /* @brief Mean of 3x3 matrix
683 * @param[in] m 3x3 matrix with pixels
685 * @return mean of 3x3 matrix
687 * This function calculates the mean of 3x3 pixels,
688 * with a factor of 16/9.
690 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean3x3m(
693 /* @brief Mean of 1x4 matrix
695 * @param[in] m 1x4 matrix with pixels
697 * @return mean of 1x4 matrix
699 * This function calculates the mean of 1x4 pixels
701 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean1x4m(
704 /* @brief Mean of 4x4 matrix
706 * @param[in] m 4x4 matrix with pixels
708 * @return mean of 4x4 matrix
710 * This function calculates the mean of 4x4 matrix with pixels
712 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean4x4m(
715 /* @brief Mean of 2x3 matrix
717 * @param[in] m 2x3 matrix with pixels
719 * @return mean of 2x3 matrix
721 * This function calculates the mean of 2x3 matrix with pixels
722 * with a factor of 8/6.
724 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean2x3m(
727 /* @brief Mean of 1x5 matrix
729 * @param[in] m 1x5 matrix with pixels
731 * @return mean of 1x5 matrix
733 * This function calculates the mean of 1x5 matrix with pixels
734 * with a factor of 8/5.
736 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean1x5m(s_1w_1x5_matrix m);
738 /* @brief Mean of 1x6 matrix
740 * @param[in] m 1x6 matrix with pixels
742 * @return mean of 1x6 matrix
744 * This function calculates the mean of 1x6 matrix with pixels
745 * with a factor of 8/6.
747 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean1x6m(
750 /* @brief Mean of 5x5 matrix
752 * @param[in] m 5x5 matrix with pixels
754 * @return mean of 5x5 matrix
756 * This function calculates the mean of 5x5 matrix with pixels
757 * with a factor of 32/25.
759 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean5x5m(
762 /* @brief Mean of 6x6 matrix
764 * @param[in] m 6x6 matrix with pixels
766 * @return mean of 6x6 matrix
768 * This function calculates the mean of 6x6 matrix with pixels
769 * with a factor of 64/36.
771 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w mean6x6m(
774 /* @brief Minimum of 4x4 matrix
776 * @param[in] m 4x4 matrix with pixels
778 * @return minimum of 4x4 matrix
780 * This function calculates the minimum of
781 * 4x4 matrix with pixels.
783 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w min4x4m(
786 /* @brief Maximum of 4x4 matrix
788 * @param[in] m 4x4 matrix with pixels
790 * @return maximum of 4x4 matrix
792 * This function calculates the maximum of
793 * 4x4 matrix with pixels.
795 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w max4x4m(
798 /* @brief SAD between two 3x3 matrices
800 * @param[in] a 3x3 matrix with pixels
802 * @param[in] b 3x3 matrix with pixels
804 * @return 3x3 matrix SAD
806 * This function calculates the sum of absolute difference between two matrices.
807 * Both input pixels and SAD are normalized by a factor of SAD3x3_IN_SHIFT and
808 * SAD3x3_OUT_SHIFT respectively.
809 * Computed SAD is 1/(2 ^ (SAD3x3_IN_SHIFT + SAD3x3_OUT_SHIFT)) ie 1/16 factor
810 * of original SAD and it's more precise than sad3x3m()
812 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w sad3x3m_precise(
816 /* @brief SAD between two 3x3 matrices
818 * @param[in] a 3x3 matrix with pixels
820 * @param[in] b 3x3 matrix with pixels
822 * @return 3x3 matrix SAD
824 * This function calculates the sum of absolute difference between two matrices.
825 * This version saves cycles by avoiding input normalization and wide vector
826 * operation during sum computation
827 * Input pixel differences are computed by absolute of rounded, halved
828 * subtraction. Normalized sum is computed by rounded averages.
829 * Computed SAD is (1/2)*(1/16) = 1/32 factor of original SAD. Factor 1/2 comes
830 * from input halving operation and factor 1/16 comes from mean operation
832 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w sad3x3m(
836 /* @brief SAD between two 5x5 matrices
838 * @param[in] a 5x5 matrix with pixels
840 * @param[in] b 5x5 matrix with pixels
842 * @return 5x5 matrix SAD
844 * Computed SAD is = 1/32 factor of original SAD.
846 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w sad5x5m(
850 /* @brief Absolute gradient between two sets of 1x5 matrices
852 * @param[in] m0 first set of 1x5 matrix with pixels
853 * @param[in] m1 second set of 1x5 matrix with pixels
855 * @return absolute gradient between two 1x5 matrices
857 * This function computes mean of two input 1x5 matrices and returns
858 * absolute difference between two mean values.
860 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w
861 absgrad1x5m(s_1w_1x5_matrix m0, s_1w_1x5_matrix m1);
863 /* @brief Bi-linear Interpolation optimized(approximate)
865 * @param[in] a input0
866 * @param[in] b input1
867 * @param[in] c cloned weight factor
869 * @return (a-b)*c + b
871 * This function will do bi-linear Interpolation on
872 * inputs a and b using constant weight factor c
874 * Inputs a,b are assumed in S1.15 format
875 * Weight factor has to be in range [0,1] and is assumed to be in S2.14 format
877 * The bilinear interpolation equation is (a*c) + b*(1-c),
878 * But this is implemented as (a-b)*c + b for optimization
880 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_bilinear_interpol_approx_c(
885 /* @brief Bi-linear Interpolation optimized(approximate)
887 * @param[in] a input0
888 * @param[in] b input1
889 * @param[in] c weight factor
891 * @return (a-b)*c + b
893 * This function will do bi-linear Interpolation on
894 * inputs a and b using weight factor c
896 * Inputs a,b are assumed in S1.15 format
897 * Weight factor has to be in range [0,1] and is assumed to be in S2.14 format
899 * The bilinear interpolation equation is (a*c) + b*(1-c),
900 * But this is implemented as (a-b)*c + b for optimization
902 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_bilinear_interpol_approx(
907 /* @brief Bi-linear Interpolation
909 * @param[in] a input0
910 * @param[in] b input1
911 * @param[in] c weight factor
913 * @return (a*c) + b*(1-c)
915 * This function will do bi-linear Interpolation on
916 * inputs a and b using weight factor c
918 * Inputs a,b are assumed in S1.15 format
919 * Weight factor has to be in range [0,1] and is assumed to be in S2.14 format
921 * The bilinear interpolation equation is (a*c) + b*(1-c),
923 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_bilinear_interpol(
928 /* @brief Generic Block Matching Algorithm
929 * @param[in] search_window pointer to input search window of 16x16 pixels
930 * @param[in] ref_block pointer to input reference block of 8x8 pixels, where N<=M
931 * @param[in] output pointer to output sads
932 * @param[in] search_sz search size for SAD computation
933 * @param[in] ref_sz block size
934 * @param[in] pixel_shift pixel shift to search the data
935 * @param[in] search_block_sz search window block size
936 * @param[in] shift shift value, with which the output is shifted right
938 * @return 0 when the computation is successful.
940 * * This function compares the reference block with a block of size NxN in the search
941 * window. Sum of absolute differences for each pixel in the reference block and the
942 * corresponding pixel in the search block. Whole search window os traversed with the
943 * reference block with the given pixel shift.
946 STORAGE_CLASS_REF_VECTOR_FUNC_H int generic_block_matching_algorithm(
947 tscalar1w **search_window,
948 tscalar1w **ref_block,
954 tscalar1w_4bit_bma_shift shift);
957 /* @brief OP_1w_asp_bma_16_1_32way
959 /* @brief OP_1w_asp_bma_16_1_32way_nomask
962 * @param[in] search_area input search window of 16x16 pixels
963 * @param[in] input_block input reference block of 8x8 pixels, where N<=M
964 * @param[in] shift shift value, with which the output is shifted right
966 * @return 81 SADs for all the search blocks.
968 * This function compares the reference block with a block of size 8x8 pixels in the
969 * search window of 16x16 pixels. Sum of absolute differences for each pixel in the
970 * reference block and the corresponding pixel in the search block is calculated.
971 * Whole search window is traversed with the reference block with the pixel shift of 1
972 * pixels. The output is right shifted with the given shift value. The shift value is
978 STORAGE_CLASS_REF_VECTOR_FUNC_H bma_output_16_1 OP_1w_asp_bma_16_1_32way(
980 STORAGE_CLASS_REF_VECTOR_FUNC_H bma_output_16_1 OP_1w_asp_bma_16_1_32way_nomask(
982 bma_16x16_search_window search_area,
983 ref_block_8x8 input_block,
984 tscalar1w_4bit_bma_shift shift);
987 /* @brief OP_1w_asp_bma_16_2_32way
989 /* @brief OP_1w_asp_bma_16_2_32way_nomask
992 * @param[in] search_area input search window of 16x16 pixels
993 * @param[in] input_block input reference block of 8x8 pixels, where N<=M
994 * @param[in] shift shift value, with which the output is shifted right
996 * @return 25 SADs for all the search blocks.
997 * This function compares the reference block with a block of size 8x8 in the search
998 * window of 16x61. Sum of absolute differences for each pixel in the reference block
999 * and the corresponding pixel in the search block is computed. Whole search window is
1000 * traversed with the reference block with the given pixel shift of 2 pixels. The output
1001 * is right shifted with the given shift value. The shift value is a 4 bit value.
1006 STORAGE_CLASS_REF_VECTOR_FUNC_H bma_output_16_2 OP_1w_asp_bma_16_2_32way(
1008 STORAGE_CLASS_REF_VECTOR_FUNC_H bma_output_16_2 OP_1w_asp_bma_16_2_32way_nomask(
1010 bma_16x16_search_window search_area,
1011 ref_block_8x8 input_block,
1012 tscalar1w_4bit_bma_shift shift);
1014 /* @brief OP_1w_asp_bma_14_1_32way
1016 /* @brief OP_1w_asp_bma_14_1_32way_nomask
1019 * @param[in] search_area input search block of 16x16 pixels with search window of 14x14 pixels
1020 * @param[in] input_block input reference block of 8x8 pixels, where N<=M
1021 * @param[in] shift shift value, with which the output is shifted right
1023 * @return 49 SADs for all the search blocks.
1024 * This function compares the reference block with a block of size 8x8 in the search
1025 * window of 14x14. Sum of absolute differences for each pixel in the reference block
1026 * and the corresponding pixel in the search block. Whole search window is traversed
1027 * with the reference block with 2 pixel shift. The output is right shifted with the
1028 * given shift value. The shift value is a 4 bit value. Input is always a 16x16 block
1029 * but the search window is 14x14, with last 2 pixels of row and column are not used
1035 STORAGE_CLASS_REF_VECTOR_FUNC_H bma_output_14_1 OP_1w_asp_bma_14_1_32way(
1037 STORAGE_CLASS_REF_VECTOR_FUNC_H bma_output_14_1 OP_1w_asp_bma_14_1_32way_nomask(
1039 bma_16x16_search_window search_area,
1040 ref_block_8x8 input_block,
1041 tscalar1w_4bit_bma_shift shift);
1044 /* @brief OP_1w_asp_bma_14_2_32way
1046 /* @brief OP_1w_asp_bma_14_2_32way_nomask
1049 * @param[in] search_area input search block of 16x16 pixels with search window of 14x14 pixels
1050 * @param[in] input_block input reference block of 8x8 pixels, where N<=M
1051 * @param[in] shift shift value, with which the output is shifted right
1053 * @return 16 SADs for all the search blocks.
1054 * This function compares the reference block with a block of size 8x8 in the search
1055 * window of 14x14. Sum of absolute differences for each pixel in the reference block
1056 * and the corresponding pixel in the search block. Whole search window is traversed
1057 * with the reference block with 2 pixels shift. The output is right shifted with the
1058 * given shift value. The shift value is a 4 bit value.
1063 STORAGE_CLASS_REF_VECTOR_FUNC_H bma_output_14_2 OP_1w_asp_bma_14_2_32way(
1065 STORAGE_CLASS_REF_VECTOR_FUNC_H bma_output_14_2 OP_1w_asp_bma_14_2_32way_nomask(
1067 bma_16x16_search_window search_area,
1068 ref_block_8x8 input_block,
1069 tscalar1w_4bit_bma_shift shift);
1072 /* @brief multiplex addition and passing
1074 * @param[in] _a first pixel
1075 * @param[in] _b second pixel
1076 * @param[in] _c condition flag
1078 * @return (_a + _b) if condition flag is true
1079 * _a if condition flag is false
1081 * This function does multiplex addition depending on the input condition flag
1083 STORAGE_CLASS_REF_VECTOR_FUNC_H tvector1w OP_1w_cond_add(
1090 /* @brief OP_1w_single_bfa_7x7
1092 * @param[in] weights - spatial and range weight lut
1093 * @param[in] threshold - threshold plane, for range weight scaling
1094 * @param[in] central_pix - central pixel plane
1095 * @param[in] src_plane - src pixel plane
1097 * @return Bilateral filter output
1099 * This function implements, 7x7 single bilateral filter.
1100 * Output = {sum(pixel * weight), sum(weight)}
1101 * Where sum is summation over 7x7 block set.
1102 * weight = spatial weight * range weight
1103 * spatial weights are loaded from spatial_weight_lut depending on src pixel
1104 * position in the 7x7 block
1105 * range weights are computed by table look up from range_weight_lut depending
1106 * on scaled absolute difference between src and central pixels.
1107 * threshold is used as scaling factor. range_weight_lut consists of
1108 * BFA_RW_LUT_SIZE numbers of LUT entries to model any distribution function.
1109 * Piecewise linear approximation technique is used to compute range weight
1110 * It computes absolute difference between central pixel and 61 src pixels.
1112 STORAGE_CLASS_REF_VECTOR_FUNC_H bfa_7x7_output OP_1w_single_bfa_7x7(
1113 bfa_weights weights,
1114 tvector1w threshold,
1115 tvector1w central_pix,
1116 s_1w_7x7_matrix src_plane);
1118 /* @brief OP_1w_joint_bfa_7x7
1120 * @param[in] weights - spatial and range weight lut
1121 * @param[in] threshold0 - 1st threshold plane, for range weight scaling
1122 * @param[in] central_pix0 - 1st central pixel plane
1123 * @param[in] src0_plane - 1st pixel plane
1124 * @param[in] threshold1 - 2nd threshold plane, for range weight scaling
1125 * @param[in] central_pix1 - 2nd central pixel plane
1126 * @param[in] src1_plane - 2nd pixel plane
1128 * @return Joint bilateral filter output
1130 * This function implements, 7x7 joint bilateral filter.
1131 * Output = {sum(pixel * weight), sum(weight)}
1132 * Where sum is summation over 7x7 block set.
1133 * weight = spatial weight * range weight
1134 * spatial weights are loaded from spatial_weight_lut depending on src pixel
1135 * position in the 7x7 block
1136 * range weights are computed by table look up from range_weight_lut depending
1137 * on sum of scaled absolute difference between central pixel and two src pixel
1138 * planes. threshold is used as scaling factor. range_weight_lut consists of
1139 * BFA_RW_LUT_SIZE numbers of LUT entries to model any distribution function.
1140 * Piecewise linear approximation technique is used to compute range weight
1141 * It computes absolute difference between central pixel and 61 src pixels.
1143 STORAGE_CLASS_REF_VECTOR_FUNC_H bfa_7x7_output OP_1w_joint_bfa_7x7(
1144 bfa_weights weights,
1145 tvector1w threshold0,
1146 tvector1w central_pix0,
1147 s_1w_7x7_matrix src0_plane,
1148 tvector1w threshold1,
1149 tvector1w central_pix1,
1150 s_1w_7x7_matrix src1_plane);
1152 /* @brief bbb_bfa_gen_spatial_weight_lut
1154 * @param[in] in - 7x7 matrix of spatial weights
1155 * @param[in] out - generated LUT
1159 * This function implements, creates spatial weight look up table used
1160 * for bilaterl filter instruction.
1162 STORAGE_CLASS_REF_VECTOR_FUNC_H void bbb_bfa_gen_spatial_weight_lut(
1164 tvector1w out[BFA_MAX_KWAY]);
1166 /* @brief bbb_bfa_gen_range_weight_lut
1168 * @param[in] in - input range weight,
1169 * @param[in] out - generated LUT
1173 * This function implements, creates range weight look up table used
1174 * for bilaterl filter instruction.
1175 * 8 unsigned 7b weights are represented in 7 16bits LUT
1176 * LUT formation is done as follows:
1177 * higher 8 bit: Point(N) = Point(N+1) - Point(N)
1178 * lower 8 bit: Point(N) = Point(N)
1179 * Weight function can be any monotonic decreasing function for x >= 0
1181 STORAGE_CLASS_REF_VECTOR_FUNC_H void bbb_bfa_gen_range_weight_lut(
1182 tvector1w in[BFA_RW_LUT_SIZE+1],
1183 tvector1w out[BFA_RW_LUT_SIZE]);
1187 /* @brief OP_1w_imax32
1189 * @param[in] src - structure that holds an array of 32 elements.
1191 * @return maximum element among input array.
1193 *This function gets maximum element from an array of 32 elements.
1195 STORAGE_CLASS_REF_VECTOR_FUNC_H int OP_1w_imax32(
1196 imax32_ref_in_vector src);
1198 /* @brief OP_1w_imaxidx32
1200 * @param[in] src - structure that holds a vector of elements.
1202 * @return index of first element with maximum value among array.
1204 * This function gets index of first element with maximum value
1207 STORAGE_CLASS_REF_VECTOR_FUNC_H int OP_1w_imaxidx32(
1208 imax32_ref_in_vector src);
1211 #ifndef INLINE_VECTOR_FUNC
1212 #define STORAGE_CLASS_REF_VECTOR_FUNC_C
1213 #define STORAGE_CLASS_REF_VECTOR_DATA_C const
1214 #else /* INLINE_VECTOR_FUNC */
1215 #define STORAGE_CLASS_REF_VECTOR_FUNC_C STORAGE_CLASS_REF_VECTOR_FUNC_H
1216 #define STORAGE_CLASS_REF_VECTOR_DATA_C STORAGE_CLASS_REF_VECTOR_DATA_H
1217 #include "ref_vector_func.c"
1218 #define VECTOR_FUNC_INLINED
1219 #endif /* INLINE_VECTOR_FUNC */
1221 #endif /*_REF_VECTOR_FUNC_H_INCLUDED_*/