WebSVN – LedShow – Blame – /trunk/Drivers/CMSIS/DSP_Lib/Source/BasicMathFunctions/arm_shift_q7.c

Rev	Author	Line No.	Line
2	mjames	1	/* ----------------------------------------------------------------------
		2	* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
		3	*
		4	* $Date: 19. March 2015
		5	* $Revision: V.1.4.5
		6	*
		7	* Project: CMSIS DSP Library
		8	* Title: arm_shift_q7.c
		9	*
		10	* Description: Processing function for the Q7 Shifting
		11	*
		12	* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
		13	*
		14	* Redistribution and use in source and binary forms, with or without
		15	* modification, are permitted provided that the following conditions
		16	* are met:
		17	* - Redistributions of source code must retain the above copyright
		18	* notice, this list of conditions and the following disclaimer.
		19	* - Redistributions in binary form must reproduce the above copyright
		20	* notice, this list of conditions and the following disclaimer in
		21	* the documentation and/or other materials provided with the
		22	* distribution.
		23	* - Neither the name of ARM LIMITED nor the names of its contributors
		24	* may be used to endorse or promote products derived from this
		25	* software without specific prior written permission.
		26	*
		27	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
		28	* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
		29	* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
		30	* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
		31	* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
		32	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
		33	* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
		34	* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
		35	* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
		36	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
		37	* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
		38	* POSSIBILITY OF SUCH DAMAGE.
		39	* -------------------------------------------------------------------- */
		40
		41	#include "arm_math.h"
		42
		43	/**
		44	* @ingroup groupMath
		45	*/
		46
		47	/**
		48	* @addtogroup shift
		49	* @{
		50	*/
		51
		52
		53	/**
		54	* @brief Shifts the elements of a Q7 vector a specified number of bits.
		55	* @param[in] *pSrc points to the input vector
		56	* @param[in] shiftBits number of bits to shift. A positive value shifts left; a negative value shifts right.
		57	* @param[out] *pDst points to the output vector
		58	* @param[in] blockSize number of samples in the vector
		59	* @return none.
		60	*
		61	* \par Conditions for optimum performance
		62	* Input and output buffers should be aligned by 32-bit
		63	*
		64	*
		65	* <b>Scaling and Overflow Behavior:</b>
		66	* \par
		67	* The function uses saturating arithmetic.
		68	* Results outside of the allowable Q7 range [0x8 0x7F] will be saturated.
		69	*/
		70
		71	void arm_shift_q7(
		72	q7_t * pSrc,
		73	int8_t shiftBits,
		74	q7_t * pDst,
		75	uint32_t blockSize)
		76	{
		77	uint32_t blkCnt; /* loop counter */
		78	uint8_t sign; /* Sign of shiftBits */
		79
		80	#ifndef ARM_MATH_CM0_FAMILY
		81
		82	/* Run the below code for Cortex-M4 and Cortex-M3 */
		83	q7_t in1; /* Input value1 */
		84	q7_t in2; /* Input value2 */
		85	q7_t in3; /* Input value3 */
		86	q7_t in4; /* Input value4 */
		87
		88
		89	/loop Unrolling /
		90	blkCnt = blockSize >> 2u;
		91
		92	/* Getting the sign of shiftBits */
		93	sign = (shiftBits & 0x80);
		94
		95	/* If the shift value is positive then do right shift else left shift */
		96	if(sign == 0u)
		97	{
		98	/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
		99	** a second loop below computes the remaining 1 to 3 samples. */
		100	while(blkCnt > 0u)
		101	{
		102	/* C = A << shiftBits */
		103	/* Read 4 inputs */
		104	in1 = *pSrc;
		105	in2 = *(pSrc + 1);
		106	in3 = *(pSrc + 2);
		107	in4 = *(pSrc + 3);
		108
		109	/* Store the Shifted result in the destination buffer in single cycle by packing the outputs */
		110	*__SIMD32(pDst)++ = __PACKq7(__SSAT((in1 << shiftBits), 8),
		111	__SSAT((in2 << shiftBits), 8),
		112	__SSAT((in3 << shiftBits), 8),
		113	__SSAT((in4 << shiftBits), 8));
		114	/* Update source pointer to process next sampels */
		115	pSrc += 4u;
		116
		117	/* Decrement the loop counter */
		118	blkCnt--;
		119	}
		120
		121	/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
		122	** No loop unrolling is used. */
		123	blkCnt = blockSize % 0x4u;
		124
		125	while(blkCnt > 0u)
		126	{
		127	/* C = A << shiftBits */
		128	/* Shift the input and then store the result in the destination buffer. */
		129	pDst++ = (q7_t) __SSAT((pSrc++ << shiftBits), 8);
		130
		131	/* Decrement the loop counter */
		132	blkCnt--;
		133	}
		134	}
		135	else
		136	{
		137	shiftBits = -shiftBits;
		138	/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
		139	** a second loop below computes the remaining 1 to 3 samples. */
		140	while(blkCnt > 0u)
		141	{
		142	/* C = A >> shiftBits */
		143	/* Read 4 inputs */
		144	in1 = *pSrc;
		145	in2 = *(pSrc + 1);
		146	in3 = *(pSrc + 2);
		147	in4 = *(pSrc + 3);
		148
		149	/* Store the Shifted result in the destination buffer in single cycle by packing the outputs */
		150	*__SIMD32(pDst)++ = __PACKq7((in1 >> shiftBits), (in2 >> shiftBits),
		151	(in3 >> shiftBits), (in4 >> shiftBits));
		152
		153
		154	pSrc += 4u;
		155
		156	/* Decrement the loop counter */
		157	blkCnt--;
		158	}
		159
		160	/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
		161	** No loop unrolling is used. */
		162	blkCnt = blockSize % 0x4u;
		163
		164	while(blkCnt > 0u)
		165	{
		166	/* C = A >> shiftBits */
		167	/* Shift the input and then store the result in the destination buffer. */
		168	in1 = *pSrc++;
		169	*pDst++ = (in1 >> shiftBits);
		170
		171	/* Decrement the loop counter */
		172	blkCnt--;
		173	}
		174	}
		175
		176	#else
		177
		178	/* Run the below code for Cortex-M0 */
		179
		180	/* Getting the sign of shiftBits */
		181	sign = (shiftBits & 0x80);
		182
		183	/* If the shift value is positive then do right shift else left shift */
		184	if(sign == 0u)
		185	{
		186	/* Initialize blkCnt with number of samples */
		187	blkCnt = blockSize;
		188
		189	while(blkCnt > 0u)
		190	{
		191	/* C = A << shiftBits */
		192	/* Shift the input and then store the result in the destination buffer. */
		193	pDst++ = (q7_t) __SSAT(((q15_t) pSrc++ << shiftBits), 8);
		194
		195	/* Decrement the loop counter */
		196	blkCnt--;
		197	}
		198	}
		199	else
		200	{
		201	/* Initialize blkCnt with number of samples */
		202	blkCnt = blockSize;
		203
		204	while(blkCnt > 0u)
		205	{
		206	/* C = A >> shiftBits */
		207	/* Shift the input and then store the result in the destination buffer. */
		208	pDst++ = (pSrc++ >> -shiftBits);
		209
		210	/* Decrement the loop counter */
		211	blkCnt--;
		212	}
		213	}
		214
		215	#endif /* #ifndef ARM_MATH_CM0_FAMILY */
		216	}
		217
		218	/**
		219	* @} end of shift group
		220	*/

Subversion Repositories LedShow

(root)/trunk/Drivers/CMSIS/DSP_Lib/Source/BasicMathFunctions/arm_shift_q7.c – Rev 2