Subversion Repositories DashDisplay

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. /* ----------------------------------------------------------------------    
  2. * Copyright (C) 2010-2014 ARM Limited. All rights reserved.    
  3. *    
  4. * $Date:        19. March 2015
  5. * $Revision:    V.1.4.5  
  6. *    
  7. * Project:          CMSIS DSP Library    
  8. * Title:            arm_cfft_radix8_f32.c    
  9. *    
  10. * Description:  Radix-8 Decimation in Frequency CFFT & CIFFT Floating point processing function        
  11. *    
  12. * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  13. *  
  14. * Redistribution and use in source and binary forms, with or without
  15. * modification, are permitted provided that the following conditions
  16. * are met:
  17. *   - Redistributions of source code must retain the above copyright
  18. *     notice, this list of conditions and the following disclaimer.
  19. *   - Redistributions in binary form must reproduce the above copyright
  20. *     notice, this list of conditions and the following disclaimer in
  21. *     the documentation and/or other materials provided with the
  22. *     distribution.
  23. *   - Neither the name of ARM LIMITED nor the names of its contributors
  24. *     may be used to endorse or promote products derived from this
  25. *     software without specific prior written permission.
  26. *
  27. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  28. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  29. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  30. * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  31. * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  32. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  33. * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  34. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  35. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  36. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  37. * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  38. * POSSIBILITY OF SUCH DAMAGE.      
  39. * -------------------------------------------------------------------- */
  40.  
  41. #include "arm_math.h"
  42.  
  43. /**    
  44. * @ingroup groupTransforms    
  45. */
  46.  
  47. /**    
  48. * @defgroup Radix8_CFFT_CIFFT Radix-8 Complex FFT Functions    
  49. *    
  50. * \par    
  51. * Complex Fast Fourier Transform(CFFT) and Complex Inverse Fast Fourier Transform(CIFFT) is an efficient algorithm to compute Discrete Fourier Transform(DFT) and Inverse Discrete Fourier Transform(IDFT).    
  52. * Computational complexity of CFFT reduces drastically when compared to DFT.    
  53. * \par    
  54. * This set of functions implements CFFT/CIFFT    
  55. * for floating-point data types.  The functions operates on in-place buffer which uses same buffer for input and output.    
  56. * Complex input is stored in input buffer in an interleaved fashion.    
  57. *    
  58. * \par    
  59. * The functions operate on blocks of input and output data and each call to the function processes    
  60. * <code>2*fftLen</code> samples through the transform.  <code>pSrc</code>  points to In-place arrays containing <code>2*fftLen</code> values.    
  61. * \par  
  62. * The <code>pSrc</code> points to the array of in-place buffer of size <code>2*fftLen</code> and inputs and outputs are stored in an interleaved fashion as shown below.    
  63. * <pre> {real[0], imag[0], real[1], imag[1],..} </pre>    
  64. *    
  65. * \par Lengths supported by the transform:  
  66. * \par    
  67. * Internally, the function utilize a Radix-8 decimation in frequency(DIF) algorithm    
  68. * and the size of the FFT supported are of the lengths [ 64, 512, 4096].  
  69. *    
  70. *    
  71. * \par Algorithm:    
  72. *    
  73. * <b>Complex Fast Fourier Transform:</b>    
  74. * \par    
  75. * Input real and imaginary data:    
  76. * <pre>    
  77. * x(n) = xa + j * ya    
  78. * x(n+N/4 ) = xb + j * yb    
  79. * x(n+N/2 ) = xc + j * yc    
  80. * x(n+3N 4) = xd + j * yd    
  81. * </pre>    
  82. * where N is length of FFT    
  83. * \par    
  84. * Output real and imaginary data:    
  85. * <pre>    
  86. * X(4r) = xa'+ j * ya'    
  87. * X(4r+1) = xb'+ j * yb'    
  88. * X(4r+2) = xc'+ j * yc'    
  89. * X(4r+3) = xd'+ j * yd'    
  90. * </pre>    
  91. * \par    
  92. * Twiddle factors for Radix-8 FFT:    
  93. * <pre>    
  94. * Wn = co1 + j * (- si1)    
  95. * W2n = co2 + j * (- si2)    
  96. * W3n = co3 + j * (- si3)    
  97. * </pre>    
  98. *    
  99. * \par    
  100. * \image html CFFT.gif "Radix-8 Decimation-in Frequency Complex Fast Fourier Transform"    
  101. *    
  102. * \par    
  103. * Output from Radix-8 CFFT Results in Digit reversal order. Interchange middle two branches of every butterfly results in Bit reversed output.    
  104. * \par    
  105. * <b> Butterfly CFFT equations:</b>    
  106. * <pre>    
  107. * xa' = xa + xb + xc + xd    
  108. * ya' = ya + yb + yc + yd    
  109. * xc' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)    
  110. * yc' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)    
  111. * xb' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)    
  112. * yb' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)    
  113. * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)    
  114. * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)    
  115. * </pre>    
  116. *    
  117. * \par    
  118. * where <code>fftLen</code> length of CFFT/CIFFT; <code>ifftFlag</code> Flag for selection of CFFT or CIFFT(Set ifftFlag to calculate CIFFT otherwise calculates CFFT);    
  119. * <code>bitReverseFlag</code> Flag for selection of output order(Set bitReverseFlag to output in normal order otherwise output in bit reversed order);    
  120. * <code>pTwiddle</code>points to array of twiddle coefficients; <code>pBitRevTable</code> points to the array of bit reversal table.    
  121. * <code>twidCoefModifier</code> modifier for twiddle factor table which supports all FFT lengths with same table;    
  122. * <code>pBitRevTable</code> modifier for bit reversal table which supports all FFT lengths with same table.    
  123. * <code>onebyfftLen</code> value of 1/fftLen to calculate CIFFT;    
  124. *  
  125. * \par Fixed-Point Behavior    
  126. * Care must be taken when using the fixed-point versions of the CFFT/CIFFT function.    
  127. * Refer to the function specific documentation below for usage guidelines.    
  128. */
  129.  
  130.  
  131. /*    
  132. * @brief  Core function for the floating-point CFFT butterfly process.  
  133. * @param[in, out] *pSrc            points to the in-place buffer of floating-point data type.  
  134. * @param[in]      fftLen           length of the FFT.  
  135. * @param[in]      *pCoef           points to the twiddle coefficient buffer.  
  136. * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.  
  137. * @return none.  
  138. */
  139.  
  140. void arm_radix8_butterfly_f32(
  141. float32_t * pSrc,
  142. uint16_t fftLen,
  143. const float32_t * pCoef,
  144. uint16_t twidCoefModifier)
  145. {
  146.    uint32_t ia1, ia2, ia3, ia4, ia5, ia6, ia7;
  147.    uint32_t i1, i2, i3, i4, i5, i6, i7, i8;
  148.    uint32_t id;
  149.    uint32_t n1, n2, j;
  150.    
  151.    float32_t r1, r2, r3, r4, r5, r6, r7, r8;
  152.    float32_t t1, t2;
  153.    float32_t s1, s2, s3, s4, s5, s6, s7, s8;
  154.    float32_t p1, p2, p3, p4;
  155.    float32_t co2, co3, co4, co5, co6, co7, co8;
  156.    float32_t si2, si3, si4, si5, si6, si7, si8;
  157.    const float32_t C81 = 0.70710678118f;
  158.  
  159.    n2 = fftLen;
  160.    
  161.    do
  162.    {
  163.       n1 = n2;
  164.       n2 = n2 >> 3;
  165.       i1 = 0;
  166.      
  167.       do
  168.       {
  169.          i2 = i1 + n2;
  170.          i3 = i2 + n2;
  171.          i4 = i3 + n2;
  172.          i5 = i4 + n2;
  173.          i6 = i5 + n2;
  174.          i7 = i6 + n2;
  175.          i8 = i7 + n2;
  176.          r1 = pSrc[2 * i1] + pSrc[2 * i5];
  177.          r5 = pSrc[2 * i1] - pSrc[2 * i5];
  178.          r2 = pSrc[2 * i2] + pSrc[2 * i6];
  179.          r6 = pSrc[2 * i2] - pSrc[2 * i6];
  180.          r3 = pSrc[2 * i3] + pSrc[2 * i7];
  181.          r7 = pSrc[2 * i3] - pSrc[2 * i7];
  182.          r4 = pSrc[2 * i4] + pSrc[2 * i8];
  183.          r8 = pSrc[2 * i4] - pSrc[2 * i8];
  184.          t1 = r1 - r3;
  185.          r1 = r1 + r3;
  186.          r3 = r2 - r4;
  187.          r2 = r2 + r4;
  188.          pSrc[2 * i1] = r1 + r2;  
  189.          pSrc[2 * i5] = r1 - r2;
  190.          r1 = pSrc[2 * i1 + 1] + pSrc[2 * i5 + 1];
  191.          s5 = pSrc[2 * i1 + 1] - pSrc[2 * i5 + 1];
  192.          r2 = pSrc[2 * i2 + 1] + pSrc[2 * i6 + 1];
  193.          s6 = pSrc[2 * i2 + 1] - pSrc[2 * i6 + 1];
  194.          s3 = pSrc[2 * i3 + 1] + pSrc[2 * i7 + 1];
  195.          s7 = pSrc[2 * i3 + 1] - pSrc[2 * i7 + 1];
  196.          r4 = pSrc[2 * i4 + 1] + pSrc[2 * i8 + 1];
  197.          s8 = pSrc[2 * i4 + 1] - pSrc[2 * i8 + 1];
  198.          t2 = r1 - s3;
  199.          r1 = r1 + s3;
  200.          s3 = r2 - r4;
  201.          r2 = r2 + r4;
  202.          pSrc[2 * i1 + 1] = r1 + r2;
  203.          pSrc[2 * i5 + 1] = r1 - r2;
  204.          pSrc[2 * i3]     = t1 + s3;
  205.          pSrc[2 * i7]     = t1 - s3;
  206.          pSrc[2 * i3 + 1] = t2 - r3;
  207.          pSrc[2 * i7 + 1] = t2 + r3;
  208.          r1 = (r6 - r8) * C81;
  209.          r6 = (r6 + r8) * C81;
  210.          r2 = (s6 - s8) * C81;
  211.          s6 = (s6 + s8) * C81;
  212.          t1 = r5 - r1;
  213.          r5 = r5 + r1;
  214.          r8 = r7 - r6;
  215.          r7 = r7 + r6;
  216.          t2 = s5 - r2;
  217.          s5 = s5 + r2;
  218.          s8 = s7 - s6;
  219.          s7 = s7 + s6;
  220.          pSrc[2 * i2]     = r5 + s7;
  221.          pSrc[2 * i8]     = r5 - s7;
  222.          pSrc[2 * i6]     = t1 + s8;
  223.          pSrc[2 * i4]     = t1 - s8;
  224.          pSrc[2 * i2 + 1] = s5 - r7;
  225.          pSrc[2 * i8 + 1] = s5 + r7;
  226.          pSrc[2 * i6 + 1] = t2 - r8;
  227.          pSrc[2 * i4 + 1] = t2 + r8;
  228.          
  229.          i1 += n1;
  230.       } while(i1 < fftLen);
  231.      
  232.       if(n2 < 8)
  233.          break;
  234.      
  235.       ia1 = 0;
  236.       j = 1;
  237.      
  238.       do
  239.       {      
  240.          /*  index calculation for the coefficients */
  241.          id  = ia1 + twidCoefModifier;
  242.          ia1 = id;
  243.          ia2 = ia1 + id;
  244.          ia3 = ia2 + id;
  245.          ia4 = ia3 + id;
  246.          ia5 = ia4 + id;
  247.          ia6 = ia5 + id;
  248.          ia7 = ia6 + id;
  249.                  
  250.          co2 = pCoef[2 * ia1];
  251.          co3 = pCoef[2 * ia2];
  252.          co4 = pCoef[2 * ia3];
  253.          co5 = pCoef[2 * ia4];
  254.          co6 = pCoef[2 * ia5];
  255.          co7 = pCoef[2 * ia6];
  256.          co8 = pCoef[2 * ia7];
  257.          si2 = pCoef[2 * ia1 + 1];
  258.          si3 = pCoef[2 * ia2 + 1];
  259.          si4 = pCoef[2 * ia3 + 1];
  260.          si5 = pCoef[2 * ia4 + 1];
  261.          si6 = pCoef[2 * ia5 + 1];
  262.          si7 = pCoef[2 * ia6 + 1];
  263.          si8 = pCoef[2 * ia7 + 1];        
  264.          
  265.          i1 = j;
  266.          
  267.          do
  268.          {
  269.             /*  index calculation for the input */
  270.             i2 = i1 + n2;
  271.             i3 = i2 + n2;
  272.             i4 = i3 + n2;
  273.             i5 = i4 + n2;
  274.             i6 = i5 + n2;
  275.             i7 = i6 + n2;
  276.             i8 = i7 + n2;
  277.             r1 = pSrc[2 * i1] + pSrc[2 * i5];
  278.             r5 = pSrc[2 * i1] - pSrc[2 * i5];
  279.             r2 = pSrc[2 * i2] + pSrc[2 * i6];
  280.             r6 = pSrc[2 * i2] - pSrc[2 * i6];
  281.             r3 = pSrc[2 * i3] + pSrc[2 * i7];
  282.             r7 = pSrc[2 * i3] - pSrc[2 * i7];
  283.             r4 = pSrc[2 * i4] + pSrc[2 * i8];
  284.             r8 = pSrc[2 * i4] - pSrc[2 * i8];
  285.             t1 = r1 - r3;
  286.             r1 = r1 + r3;
  287.             r3 = r2 - r4;
  288.             r2 = r2 + r4;
  289.             pSrc[2 * i1] = r1 + r2;
  290.             r2 = r1 - r2;
  291.             s1 = pSrc[2 * i1 + 1] + pSrc[2 * i5 + 1];
  292.             s5 = pSrc[2 * i1 + 1] - pSrc[2 * i5 + 1];
  293.             s2 = pSrc[2 * i2 + 1] + pSrc[2 * i6 + 1];
  294.             s6 = pSrc[2 * i2 + 1] - pSrc[2 * i6 + 1];
  295.             s3 = pSrc[2 * i3 + 1] + pSrc[2 * i7 + 1];
  296.             s7 = pSrc[2 * i3 + 1] - pSrc[2 * i7 + 1];
  297.             s4 = pSrc[2 * i4 + 1] + pSrc[2 * i8 + 1];
  298.             s8 = pSrc[2 * i4 + 1] - pSrc[2 * i8 + 1];
  299.             t2 = s1 - s3;
  300.             s1 = s1 + s3;
  301.             s3 = s2 - s4;
  302.             s2 = s2 + s4;
  303.             r1 = t1 + s3;
  304.             t1 = t1 - s3;
  305.             pSrc[2 * i1 + 1] = s1 + s2;
  306.             s2 = s1 - s2;
  307.             s1 = t2 - r3;
  308.             t2 = t2 + r3;
  309.             p1 = co5 * r2;
  310.             p2 = si5 * s2;
  311.             p3 = co5 * s2;
  312.             p4 = si5 * r2;
  313.             pSrc[2 * i5]     = p1 + p2;
  314.             pSrc[2 * i5 + 1] = p3 - p4;
  315.             p1 = co3 * r1;
  316.             p2 = si3 * s1;
  317.             p3 = co3 * s1;
  318.             p4 = si3 * r1;
  319.             pSrc[2 * i3]     = p1 + p2;
  320.             pSrc[2 * i3 + 1] = p3 - p4;
  321.             p1 = co7 * t1;
  322.             p2 = si7 * t2;
  323.             p3 = co7 * t2;
  324.             p4 = si7 * t1;
  325.             pSrc[2 * i7]     = p1 + p2;
  326.             pSrc[2 * i7 + 1] = p3 - p4;
  327.             r1 = (r6 - r8) * C81;
  328.             r6 = (r6 + r8) * C81;
  329.             s1 = (s6 - s8) * C81;
  330.             s6 = (s6 + s8) * C81;
  331.             t1 = r5 - r1;
  332.             r5 = r5 + r1;
  333.             r8 = r7 - r6;
  334.             r7 = r7 + r6;
  335.             t2 = s5 - s1;
  336.             s5 = s5 + s1;
  337.             s8 = s7 - s6;
  338.             s7 = s7 + s6;
  339.             r1 = r5 + s7;
  340.             r5 = r5 - s7;
  341.             r6 = t1 + s8;
  342.             t1 = t1 - s8;
  343.             s1 = s5 - r7;
  344.             s5 = s5 + r7;
  345.             s6 = t2 - r8;
  346.             t2 = t2 + r8;
  347.             p1 = co2 * r1;
  348.             p2 = si2 * s1;
  349.             p3 = co2 * s1;
  350.             p4 = si2 * r1;
  351.             pSrc[2 * i2]     = p1 + p2;
  352.             pSrc[2 * i2 + 1] = p3 - p4;
  353.             p1 = co8 * r5;
  354.             p2 = si8 * s5;
  355.             p3 = co8 * s5;
  356.             p4 = si8 * r5;
  357.             pSrc[2 * i8]     = p1 + p2;
  358.             pSrc[2 * i8 + 1] = p3 - p4;
  359.             p1 = co6 * r6;
  360.             p2 = si6 * s6;
  361.             p3 = co6 * s6;
  362.             p4 = si6 * r6;
  363.             pSrc[2 * i6]     = p1 + p2;
  364.             pSrc[2 * i6 + 1] = p3 - p4;
  365.             p1 = co4 * t1;
  366.             p2 = si4 * t2;
  367.             p3 = co4 * t2;
  368.             p4 = si4 * t1;
  369.             pSrc[2 * i4]     = p1 + p2;
  370.             pSrc[2 * i4 + 1] = p3 - p4;
  371.            
  372.             i1 += n1;
  373.          } while(i1 < fftLen);
  374.          
  375.          j++;
  376.       } while(j < n2);
  377.      
  378.       twidCoefModifier <<= 3;
  379.    } while(n2 > 7);  
  380. }
  381.  
  382. /**    
  383. * @} end of Radix8_CFFT_CIFFT group    
  384. */
  385.