libavcodec/aacpsy.c

   1 /*
   2  * AAC encoder psychoacoustic model
   3  * Copyright (C) 2008 Konstantin Shishkov
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file libavcodec/aacpsy.c
  24  * AAC encoder psychoacoustic model
  25  */
  26
  27 #include "avcodec.h"
  28 #include "aacpsy.h"
  29 #include "aactab.h"
  30
  31 /***********************************
  32  *              TODOs:
  33  * General:
  34  * better audio preprocessing (add DC highpass filter?)
  35  * more psy models
  36  * maybe improve coefficient quantization function in some way
  37  *
  38  * 3GPP-based psy model:
  39  * thresholds linearization after their modifications for attaining given bitrate
  40  * try other bitrate controlling mechanism (maybe use ratecontrol.c?)
  41  * control quality for quality-based output
  42  **********************************/
  43
  44 /**
  45  * Quantize one coefficient.
  46  * @return absolute value of the quantized coefficient
  47  * @see 3GPP TS26.403 5.6.2 "Scalefactor determination"
  48  */
  49 static av_always_inline int quant(float coef, const float Q)
  50 {
  51     return av_clip((int)(pow(fabsf(coef) * Q, 0.75) + 0.4054), 0, 8191);
  52 }
  53
  54 static inline float get_approximate_quant_error(float *c, int size, int scale_idx)
  55 {
  56     int i;
  57     int q;
  58     float coef, unquant, sum = 0.0f;
  59     const float Q  = ff_aac_pow2sf_tab[200 - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  60     const float IQ = ff_aac_pow2sf_tab[200 + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  61     for(i = 0; i < size; i++){
  62         coef = fabs(c[i]);
  63         q = quant(c[i], Q);
  64         unquant = (q * cbrt(q)) * IQ;
  65         sum += (coef - unquant) * (coef - unquant);
  66     }
  67     return sum;
  68 }
  69
  70 /**
  71  * constants for 3GPP AAC psychoacoustic model
  72  * @{
  73  */
  74 #define PSY_3GPP_SPREAD_LOW  1.5f // spreading factor for ascending threshold spreading  (15 dB/Bark)
  75 #define PSY_3GPP_SPREAD_HI   3.0f // spreading factor for descending threshold spreading (30 dB/Bark)
  76 /**
  77  * @}
  78  */
  79
  80 /**
  81  * information for single band used by 3GPP TS26.403-inspired psychoacoustic model
  82  */
  83 typedef struct Psy3gppBand{
  84     float energy;    ///< band energy
  85     float ffac;      ///< form factor
  86 }Psy3gppBand;
  87
  88 /**
  89  * psychoacoustic model frame type-dependent coefficients
  90  */
  91 typedef struct Psy3gppCoeffs{
  92     float ath       [64]; ///< absolute threshold of hearing per bands
  93     float barks     [64]; ///< Bark value for each spectral band in long frame
  94     float spread_low[64]; ///< spreading factor for low-to-high threshold spreading in long frame
  95     float spread_hi [64]; ///< spreading factor for high-to-low threshold spreading in long frame
  96 }Psy3gppCoeffs;
  97
  98 /**
  99  * Calculate Bark value for given line.
 100  */
 101 static inline float calc_bark(float f)
 102 {
 103     return 13.3f * atanf(0.00076f * f) + 3.5f * atanf((f / 7500.0f) * (f / 7500.0f));
 104 }