Subversion Repositories AFRtranscoder

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 mjames 1
/* ----------------------------------------------------------------------
2
 * Project:      CMSIS DSP Library
3
 * Title:        arm_cmplx_mult_cmplx_q31.c
4
 * Description:  Q31 complex-by-complex multiplication
5
 *
6
 * $Date:        27. January 2017
7
 * $Revision:    V.1.5.1
8
 *
9
 * Target Processor: Cortex-M cores
10
 * -------------------------------------------------------------------- */
11
/*
12
 * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
13
 *
14
 * SPDX-License-Identifier: Apache-2.0
15
 *
16
 * Licensed under the Apache License, Version 2.0 (the License); you may
17
 * not use this file except in compliance with the License.
18
 * You may obtain a copy of the License at
19
 *
20
 * www.apache.org/licenses/LICENSE-2.0
21
 *
22
 * Unless required by applicable law or agreed to in writing, software
23
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25
 * See the License for the specific language governing permissions and
26
 * limitations under the License.
27
 */
28
 
29
#include "arm_math.h"
30
 
31
/**
32
 * @ingroup groupCmplxMath
33
 */
34
 
35
/**
36
 * @addtogroup CmplxByCmplxMult
37
 * @{
38
 */
39
 
40
 
41
/**
42
 * @brief  Q31 complex-by-complex multiplication
43
 * @param[in]  *pSrcA points to the first input vector
44
 * @param[in]  *pSrcB points to the second input vector
45
 * @param[out]  *pDst  points to the output vector
46
 * @param[in]  numSamples number of complex samples in each vector
47
 * @return none.
48
 *
49
 * <b>Scaling and Overflow Behavior:</b>
50
 * \par
51
 * The function implements 1.31 by 1.31 multiplications and finally output is converted into 3.29 format.
52
 * Input down scaling is not required.
53
 */
54
 
55
void arm_cmplx_mult_cmplx_q31(
56
  q31_t * pSrcA,
57
  q31_t * pSrcB,
58
  q31_t * pDst,
59
  uint32_t numSamples)
60
{
61
  q31_t a, b, c, d;                              /* Temporary variables to store real and imaginary values */
62
  uint32_t blkCnt;                               /* loop counters */
63
  q31_t mul1, mul2, mul3, mul4;
64
  q31_t out1, out2;
65
 
66
#if defined (ARM_MATH_DSP)
67
 
68
  /* Run the below code for Cortex-M4 and Cortex-M3 */
69
 
70
  /* loop Unrolling */
71
  blkCnt = numSamples >> 2U;
72
 
73
  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
74
   ** a second loop below computes the remaining 1 to 3 samples. */
75
  while (blkCnt > 0U)
76
  {
77
    /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */
78
    /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */
79
    a = *pSrcA++;
80
    b = *pSrcA++;
81
    c = *pSrcB++;
82
    d = *pSrcB++;
83
 
84
    mul1 = (q31_t) (((q63_t) a * c) >> 32);
85
    mul2 = (q31_t) (((q63_t) b * d) >> 32);
86
    mul3 = (q31_t) (((q63_t) a * d) >> 32);
87
    mul4 = (q31_t) (((q63_t) b * c) >> 32);
88
 
89
    mul1 = (mul1 >> 1);
90
    mul2 = (mul2 >> 1);
91
    mul3 = (mul3 >> 1);
92
    mul4 = (mul4 >> 1);
93
 
94
    out1 = mul1 - mul2;
95
    out2 = mul3 + mul4;
96
 
97
    /* store the real result in 3.29 format in the destination buffer. */
98
    *pDst++ = out1;
99
    /* store the imag result in 3.29 format in the destination buffer. */
100
    *pDst++ = out2;
101
 
102
    a = *pSrcA++;
103
    b = *pSrcA++;
104
    c = *pSrcB++;
105
    d = *pSrcB++;
106
 
107
    mul1 = (q31_t) (((q63_t) a * c) >> 32);
108
    mul2 = (q31_t) (((q63_t) b * d) >> 32);
109
    mul3 = (q31_t) (((q63_t) a * d) >> 32);
110
    mul4 = (q31_t) (((q63_t) b * c) >> 32);
111
 
112
    mul1 = (mul1 >> 1);
113
    mul2 = (mul2 >> 1);
114
    mul3 = (mul3 >> 1);
115
    mul4 = (mul4 >> 1);
116
 
117
    out1 = mul1 - mul2;
118
    out2 = mul3 + mul4;
119
 
120
    /* store the real result in 3.29 format in the destination buffer. */
121
    *pDst++ = out1;
122
    /* store the imag result in 3.29 format in the destination buffer. */
123
    *pDst++ = out2;
124
 
125
    a = *pSrcA++;
126
    b = *pSrcA++;
127
    c = *pSrcB++;
128
    d = *pSrcB++;
129
 
130
    mul1 = (q31_t) (((q63_t) a * c) >> 32);
131
    mul2 = (q31_t) (((q63_t) b * d) >> 32);
132
    mul3 = (q31_t) (((q63_t) a * d) >> 32);
133
    mul4 = (q31_t) (((q63_t) b * c) >> 32);
134
 
135
    mul1 = (mul1 >> 1);
136
    mul2 = (mul2 >> 1);
137
    mul3 = (mul3 >> 1);
138
    mul4 = (mul4 >> 1);
139
 
140
    out1 = mul1 - mul2;
141
    out2 = mul3 + mul4;
142
 
143
    /* store the real result in 3.29 format in the destination buffer. */
144
    *pDst++ = out1;
145
    /* store the imag result in 3.29 format in the destination buffer. */
146
    *pDst++ = out2;
147
 
148
    a = *pSrcA++;
149
    b = *pSrcA++;
150
    c = *pSrcB++;
151
    d = *pSrcB++;
152
 
153
    mul1 = (q31_t) (((q63_t) a * c) >> 32);
154
    mul2 = (q31_t) (((q63_t) b * d) >> 32);
155
    mul3 = (q31_t) (((q63_t) a * d) >> 32);
156
    mul4 = (q31_t) (((q63_t) b * c) >> 32);
157
 
158
    mul1 = (mul1 >> 1);
159
    mul2 = (mul2 >> 1);
160
    mul3 = (mul3 >> 1);
161
    mul4 = (mul4 >> 1);
162
 
163
    out1 = mul1 - mul2;
164
    out2 = mul3 + mul4;
165
 
166
    /* store the real result in 3.29 format in the destination buffer. */
167
    *pDst++ = out1;
168
    /* store the imag result in 3.29 format in the destination buffer. */
169
    *pDst++ = out2;
170
 
171
    /* Decrement the blockSize loop counter */
172
    blkCnt--;
173
  }
174
 
175
  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
176
   ** No loop unrolling is used. */
177
  blkCnt = numSamples % 0x4U;
178
 
179
  while (blkCnt > 0U)
180
  {
181
    /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */
182
    /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */
183
    a = *pSrcA++;
184
    b = *pSrcA++;
185
    c = *pSrcB++;
186
    d = *pSrcB++;
187
 
188
    mul1 = (q31_t) (((q63_t) a * c) >> 32);
189
    mul2 = (q31_t) (((q63_t) b * d) >> 32);
190
    mul3 = (q31_t) (((q63_t) a * d) >> 32);
191
    mul4 = (q31_t) (((q63_t) b * c) >> 32);
192
 
193
    mul1 = (mul1 >> 1);
194
    mul2 = (mul2 >> 1);
195
    mul3 = (mul3 >> 1);
196
    mul4 = (mul4 >> 1);
197
 
198
    out1 = mul1 - mul2;
199
    out2 = mul3 + mul4;
200
 
201
    /* store the real result in 3.29 format in the destination buffer. */
202
    *pDst++ = out1;
203
    /* store the imag result in 3.29 format in the destination buffer. */
204
    *pDst++ = out2;
205
 
206
    /* Decrement the blockSize loop counter */
207
    blkCnt--;
208
  }
209
 
210
#else
211
 
212
  /* Run the below code for Cortex-M0 */
213
 
214
  /* loop Unrolling */
215
  blkCnt = numSamples >> 1U;
216
 
217
  /* First part of the processing with loop unrolling.  Compute 2 outputs at a time.
218
   ** a second loop below computes the remaining 1 sample. */
219
  while (blkCnt > 0U)
220
  {
221
    /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */
222
    /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */
223
    a = *pSrcA++;
224
    b = *pSrcA++;
225
    c = *pSrcB++;
226
    d = *pSrcB++;
227
 
228
    mul1 = (q31_t) (((q63_t) a * c) >> 32);
229
    mul2 = (q31_t) (((q63_t) b * d) >> 32);
230
    mul3 = (q31_t) (((q63_t) a * d) >> 32);
231
    mul4 = (q31_t) (((q63_t) b * c) >> 32);
232
 
233
    mul1 = (mul1 >> 1);
234
    mul2 = (mul2 >> 1);
235
    mul3 = (mul3 >> 1);
236
    mul4 = (mul4 >> 1);
237
 
238
    out1 = mul1 - mul2;
239
    out2 = mul3 + mul4;
240
 
241
    /* store the real result in 3.29 format in the destination buffer. */
242
    *pDst++ = out1;
243
    /* store the imag result in 3.29 format in the destination buffer. */
244
    *pDst++ = out2;
245
 
246
    a = *pSrcA++;
247
    b = *pSrcA++;
248
    c = *pSrcB++;
249
    d = *pSrcB++;
250
 
251
    mul1 = (q31_t) (((q63_t) a * c) >> 32);
252
    mul2 = (q31_t) (((q63_t) b * d) >> 32);
253
    mul3 = (q31_t) (((q63_t) a * d) >> 32);
254
    mul4 = (q31_t) (((q63_t) b * c) >> 32);
255
 
256
    mul1 = (mul1 >> 1);
257
    mul2 = (mul2 >> 1);
258
    mul3 = (mul3 >> 1);
259
    mul4 = (mul4 >> 1);
260
 
261
    out1 = mul1 - mul2;
262
    out2 = mul3 + mul4;
263
 
264
    /* store the real result in 3.29 format in the destination buffer. */
265
    *pDst++ = out1;
266
    /* store the imag result in 3.29 format in the destination buffer. */
267
    *pDst++ = out2;
268
 
269
    /* Decrement the blockSize loop counter */
270
    blkCnt--;
271
  }
272
 
273
  /* If the blockSize is not a multiple of 2, compute any remaining output samples here.
274
   ** No loop unrolling is used. */
275
  blkCnt = numSamples % 0x2U;
276
 
277
  while (blkCnt > 0U)
278
  {
279
    /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */
280
    /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */
281
    a = *pSrcA++;
282
    b = *pSrcA++;
283
    c = *pSrcB++;
284
    d = *pSrcB++;
285
 
286
    mul1 = (q31_t) (((q63_t) a * c) >> 32);
287
    mul2 = (q31_t) (((q63_t) b * d) >> 32);
288
    mul3 = (q31_t) (((q63_t) a * d) >> 32);
289
    mul4 = (q31_t) (((q63_t) b * c) >> 32);
290
 
291
    mul1 = (mul1 >> 1);
292
    mul2 = (mul2 >> 1);
293
    mul3 = (mul3 >> 1);
294
    mul4 = (mul4 >> 1);
295
 
296
    out1 = mul1 - mul2;
297
    out2 = mul3 + mul4;
298
 
299
    /* store the real result in 3.29 format in the destination buffer. */
300
    *pDst++ = out1;
301
    /* store the imag result in 3.29 format in the destination buffer. */
302
    *pDst++ = out2;
303
 
304
    /* Decrement the blockSize loop counter */
305
    blkCnt--;
306
  }
307
 
308
#endif /* #if defined (ARM_MATH_DSP) */
309
 
310
}
311
 
312
/**
313
 * @} end of CmplxByCmplxMult group
314
 */