WebSVN – FuelGauge – Blame – /trunk/Drivers/CMSIS/DSP/Source/BasicMathFunctions/arm_shift_q7.c

Rev	Author	Line No.	Line
2	mjames	1	/* ----------------------------------------------------------------------
		2	* Project: CMSIS DSP Library
		3	* Title: arm_shift_q7.c
		4	* Description: Processing function for the Q7 Shifting
		5	*
		6	* $Date: 27. January 2017
		7	* $Revision: V.1.5.1
		8	*
		9	* Target Processor: Cortex-M cores
		10	* -------------------------------------------------------------------- */
		11	/*
		12	* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
		13	*
		14	* SPDX-License-Identifier: Apache-2.0
		15	*
		16	* Licensed under the Apache License, Version 2.0 (the License); you may
		17	* not use this file except in compliance with the License.
		18	* You may obtain a copy of the License at
		19	*
		20	* www.apache.org/licenses/LICENSE-2.0
		21	*
		22	* Unless required by applicable law or agreed to in writing, software
		23	* distributed under the License is distributed on an AS IS BASIS, WITHOUT
		24	* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
		25	* See the License for the specific language governing permissions and
		26	* limitations under the License.
		27	*/
		28
		29	#include "arm_math.h"
		30
		31	/**
		32	* @ingroup groupMath
		33	*/
		34
		35	/**
		36	* @addtogroup shift
		37	* @{
		38	*/
		39
		40
		41	/**
		42	* @brief Shifts the elements of a Q7 vector a specified number of bits.
		43	* @param[in] *pSrc points to the input vector
		44	* @param[in] shiftBits number of bits to shift. A positive value shifts left; a negative value shifts right.
		45	* @param[out] *pDst points to the output vector
		46	* @param[in] blockSize number of samples in the vector
		47	* @return none.
		48	*
		49	* \par Conditions for optimum performance
		50	* Input and output buffers should be aligned by 32-bit
		51	*
		52	*
		53	* <b>Scaling and Overflow Behavior:</b>
		54	* \par
		55	* The function uses saturating arithmetic.
		56	* Results outside of the allowable Q7 range [0x8 0x7F] will be saturated.
		57	*/
		58
		59	void arm_shift_q7(
		60	q7_t * pSrc,
		61	int8_t shiftBits,
		62	q7_t * pDst,
		63	uint32_t blockSize)
		64	{
		65	uint32_t blkCnt; /* loop counter */
		66	uint8_t sign; /* Sign of shiftBits */
		67
		68	#if defined (ARM_MATH_DSP)
		69
		70	/* Run the below code for Cortex-M4 and Cortex-M3 */
		71	q7_t in1; /* Input value1 */
		72	q7_t in2; /* Input value2 */
		73	q7_t in3; /* Input value3 */
		74	q7_t in4; /* Input value4 */
		75
		76
		77	/loop Unrolling /
		78	blkCnt = blockSize >> 2U;
		79
		80	/* Getting the sign of shiftBits */
		81	sign = (shiftBits & 0x80);
		82
		83	/* If the shift value is positive then do right shift else left shift */
		84	if (sign == 0U)
		85	{
		86	/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
		87	** a second loop below computes the remaining 1 to 3 samples. */
		88	while (blkCnt > 0U)
		89	{
		90	/* C = A << shiftBits */
		91	/* Read 4 inputs */
		92	in1 = *pSrc;
		93	in2 = *(pSrc + 1);
		94	in3 = *(pSrc + 2);
		95	in4 = *(pSrc + 3);
		96
		97	/* Store the Shifted result in the destination buffer in single cycle by packing the outputs */
		98	*__SIMD32(pDst)++ = __PACKq7(__SSAT((in1 << shiftBits), 8),
		99	__SSAT((in2 << shiftBits), 8),
		100	__SSAT((in3 << shiftBits), 8),
		101	__SSAT((in4 << shiftBits), 8));
		102	/* Update source pointer to process next sampels */
		103	pSrc += 4U;
		104
		105	/* Decrement the loop counter */
		106	blkCnt--;
		107	}
		108
		109	/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
		110	** No loop unrolling is used. */
		111	blkCnt = blockSize % 0x4U;
		112
		113	while (blkCnt > 0U)
		114	{
		115	/* C = A << shiftBits */
		116	/* Shift the input and then store the result in the destination buffer. */
		117	pDst++ = (q7_t) __SSAT((pSrc++ << shiftBits), 8);
		118
		119	/* Decrement the loop counter */
		120	blkCnt--;
		121	}
		122	}
		123	else
		124	{
		125	shiftBits = -shiftBits;
		126	/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
		127	** a second loop below computes the remaining 1 to 3 samples. */
		128	while (blkCnt > 0U)
		129	{
		130	/* C = A >> shiftBits */
		131	/* Read 4 inputs */
		132	in1 = *pSrc;
		133	in2 = *(pSrc + 1);
		134	in3 = *(pSrc + 2);
		135	in4 = *(pSrc + 3);
		136
		137	/* Store the Shifted result in the destination buffer in single cycle by packing the outputs */
		138	*__SIMD32(pDst)++ = __PACKq7((in1 >> shiftBits), (in2 >> shiftBits),
		139	(in3 >> shiftBits), (in4 >> shiftBits));
		140
		141
		142	pSrc += 4U;
		143
		144	/* Decrement the loop counter */
		145	blkCnt--;
		146	}
		147
		148	/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
		149	** No loop unrolling is used. */
		150	blkCnt = blockSize % 0x4U;
		151
		152	while (blkCnt > 0U)
		153	{
		154	/* C = A >> shiftBits */
		155	/* Shift the input and then store the result in the destination buffer. */
		156	in1 = *pSrc++;
		157	*pDst++ = (in1 >> shiftBits);
		158
		159	/* Decrement the loop counter */
		160	blkCnt--;
		161	}
		162	}
		163
		164	#else
		165
		166	/* Run the below code for Cortex-M0 */
		167
		168	/* Getting the sign of shiftBits */
		169	sign = (shiftBits & 0x80);
		170
		171	/* If the shift value is positive then do right shift else left shift */
		172	if (sign == 0U)
		173	{
		174	/* Initialize blkCnt with number of samples */
		175	blkCnt = blockSize;
		176
		177	while (blkCnt > 0U)
		178	{
		179	/* C = A << shiftBits */
		180	/* Shift the input and then store the result in the destination buffer. */
		181	pDst++ = (q7_t) __SSAT(((q15_t) pSrc++ << shiftBits), 8);
		182
		183	/* Decrement the loop counter */
		184	blkCnt--;
		185	}
		186	}
		187	else
		188	{
		189	/* Initialize blkCnt with number of samples */
		190	blkCnt = blockSize;
		191
		192	while (blkCnt > 0U)
		193	{
		194	/* C = A >> shiftBits */
		195	/* Shift the input and then store the result in the destination buffer. */
		196	pDst++ = (pSrc++ >> -shiftBits);
		197
		198	/* Decrement the loop counter */
		199	blkCnt--;
		200	}
		201	}
		202
		203	#endif /* #if defined (ARM_MATH_DSP) */
		204	}
		205
		206	/**
		207	* @} end of shift group
		208	*/

Subversion Repositories FuelGauge

(root)/trunk/Drivers/CMSIS/DSP/Source/BasicMathFunctions/arm_shift_q7.c – Rev 2