WebSVN – canSerial – Blame – /trunk/Drivers/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_q31.c

Rev	Author	Line No.	Line
2	mjames	1	/* ----------------------------------------------------------------------
		2	* Project: CMSIS DSP Library
		3	* Title: arm_cmplx_dot_prod_q31.c
		4	* Description: Q31 complex dot product
		5	*
		6	* $Date: 27. January 2017
		7	* $Revision: V.1.5.1
		8	*
		9	* Target Processor: Cortex-M cores
		10	* -------------------------------------------------------------------- */
		11	/*
		12	* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
		13	*
		14	* SPDX-License-Identifier: Apache-2.0
		15	*
		16	* Licensed under the Apache License, Version 2.0 (the License); you may
		17	* not use this file except in compliance with the License.
		18	* You may obtain a copy of the License at
		19	*
		20	* www.apache.org/licenses/LICENSE-2.0
		21	*
		22	* Unless required by applicable law or agreed to in writing, software
		23	* distributed under the License is distributed on an AS IS BASIS, WITHOUT
		24	* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		25	* See the License for the specific language governing permissions and
		26	* limitations under the License.
		27	*/
		28
		29	#include "arm_math.h"
		30
		31	/**
		32	* @ingroup groupCmplxMath
		33	*/
		34
		35	/**
		36	* @addtogroup cmplx_dot_prod
		37	* @{
		38	*/
		39
		40	/**
		41	* @brief Q31 complex dot product
		42	* @param *pSrcA points to the first input vector
		43	* @param *pSrcB points to the second input vector
		44	* @param numSamples number of complex samples in each vector
		45	* @param *realResult real part of the result returned here
		46	* @param *imagResult imaginary part of the result returned here
		47	* @return none.
		48	*
		49	* <b>Scaling and Overflow Behavior:</b>
		50	* \par
		51	* The function is implemented using an internal 64-bit accumulator.
		52	* The intermediate 1.31 by 1.31 multiplications are performed with 64-bit precision and then shifted to 16.48 format.
		53	* The internal real and imaginary accumulators are in 16.48 format and provide 15 guard bits.
		54	* Additions are nonsaturating and no overflow will occur as long as <code>numSamples</code> is less than 32768.
		55	* The return results <code>realResult</code> and <code>imagResult</code> are in 16.48 format.
		56	* Input down scaling is not required.
		57	*/
		58
		59	void arm_cmplx_dot_prod_q31(
		60	q31_t * pSrcA,
		61	q31_t * pSrcB,
		62	uint32_t numSamples,
		63	q63_t * realResult,
		64	q63_t * imagResult)
		65	{
		66	q63_t real_sum = 0, imag_sum = 0; /* Temporary result storage */
		67	q31_t a0,b0,c0,d0;
		68
		69	#if defined (ARM_MATH_DSP)
		70
		71	/* Run the below code for Cortex-M4 and Cortex-M3 */
		72	uint32_t blkCnt; /* loop counter */
		73
		74
		75	/loop Unrolling /
		76	blkCnt = numSamples >> 2U;
		77
		78	/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
		79	** a second loop below computes the remaining 1 to 3 samples. */
		80	while (blkCnt > 0U)
		81	{
		82	a0 = *pSrcA++;
		83	b0 = *pSrcA++;
		84	c0 = *pSrcB++;
		85	d0 = *pSrcB++;
		86
		87	real_sum += ((q63_t)a0 * c0) >> 14;
		88	imag_sum += ((q63_t)a0 * d0) >> 14;
		89	real_sum -= ((q63_t)b0 * d0) >> 14;
		90	imag_sum += ((q63_t)b0 * c0) >> 14;
		91
		92	a0 = *pSrcA++;
		93	b0 = *pSrcA++;
		94	c0 = *pSrcB++;
		95	d0 = *pSrcB++;
		96
		97	real_sum += ((q63_t)a0 * c0) >> 14;
		98	imag_sum += ((q63_t)a0 * d0) >> 14;
		99	real_sum -= ((q63_t)b0 * d0) >> 14;
		100	imag_sum += ((q63_t)b0 * c0) >> 14;
		101
		102	a0 = *pSrcA++;
		103	b0 = *pSrcA++;
		104	c0 = *pSrcB++;
		105	d0 = *pSrcB++;
		106
		107	real_sum += ((q63_t)a0 * c0) >> 14;
		108	imag_sum += ((q63_t)a0 * d0) >> 14;
		109	real_sum -= ((q63_t)b0 * d0) >> 14;
		110	imag_sum += ((q63_t)b0 * c0) >> 14;
		111
		112	a0 = *pSrcA++;
		113	b0 = *pSrcA++;
		114	c0 = *pSrcB++;
		115	d0 = *pSrcB++;
		116
		117	real_sum += ((q63_t)a0 * c0) >> 14;
		118	imag_sum += ((q63_t)a0 * d0) >> 14;
		119	real_sum -= ((q63_t)b0 * d0) >> 14;
		120	imag_sum += ((q63_t)b0 * c0) >> 14;
		121
		122	/* Decrement the loop counter */
		123	blkCnt--;
		124	}
		125
		126	/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
		127	** No loop unrolling is used. */
		128	blkCnt = numSamples % 0x4U;
		129
		130	while (blkCnt > 0U)
		131	{
		132	a0 = *pSrcA++;
		133	b0 = *pSrcA++;
		134	c0 = *pSrcB++;
		135	d0 = *pSrcB++;
		136
		137	real_sum += ((q63_t)a0 * c0) >> 14;
		138	imag_sum += ((q63_t)a0 * d0) >> 14;
		139	real_sum -= ((q63_t)b0 * d0) >> 14;
		140	imag_sum += ((q63_t)b0 * c0) >> 14;
		141
		142	/* Decrement the loop counter */
		143	blkCnt--;
		144	}
		145
		146	#else
		147
		148	/* Run the below code for Cortex-M0 */
		149
		150	while (numSamples > 0U)
		151	{
		152	a0 = *pSrcA++;
		153	b0 = *pSrcA++;
		154	c0 = *pSrcB++;
		155	d0 = *pSrcB++;
		156
		157	real_sum += ((q63_t)a0 * c0) >> 14;
		158	imag_sum += ((q63_t)a0 * d0) >> 14;
		159	real_sum -= ((q63_t)b0 * d0) >> 14;
		160	imag_sum += ((q63_t)b0 * c0) >> 14;
		161
		162	/* Decrement the loop counter */
		163	numSamples--;
		164	}
		165
		166	#endif /* #if defined (ARM_MATH_DSP) */
		167
		168	/* Store the real and imaginary results in 16.48 format */
		169	*realResult = real_sum;
		170	*imagResult = imag_sum;
		171	}
		172
		173	/**
		174	* @} end of cmplx_dot_prod group
		175	*/

Subversion Repositories canSerial

(root)/trunk/Drivers/CMSIS/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_q31.c – Rev 2