Subversion Repositories FuelGauge

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 mjames 1
/* ----------------------------------------------------------------------
2
 * Project:      CMSIS DSP Library
3
 * Title:        arm_shift_q7.c
4
 * Description:  Processing function for the Q7 Shifting
5
 *
6
 * $Date:        27. January 2017
7
 * $Revision:    V.1.5.1
8
 *
9
 * Target Processor: Cortex-M cores
10
 * -------------------------------------------------------------------- */
11
/*
12
 * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
13
 *
14
 * SPDX-License-Identifier: Apache-2.0
15
 *
16
 * Licensed under the Apache License, Version 2.0 (the License); you may
17
 * not use this file except in compliance with the License.
18
 * You may obtain a copy of the License at
19
 *
20
 * www.apache.org/licenses/LICENSE-2.0
21
 *
22
 * Unless required by applicable law or agreed to in writing, software
23
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25
 * See the License for the specific language governing permissions and
26
 * limitations under the License.
27
 */
28
 
29
#include "arm_math.h"
30
 
31
/**
32
 * @ingroup groupMath
33
 */
34
 
35
/**
36
 * @addtogroup shift
37
 * @{
38
 */
39
 
40
 
41
/**
42
 * @brief  Shifts the elements of a Q7 vector a specified number of bits.
43
 * @param[in]  *pSrc points to the input vector
44
 * @param[in]  shiftBits number of bits to shift.  A positive value shifts left; a negative value shifts right.
45
 * @param[out]  *pDst points to the output vector
46
 * @param[in]  blockSize number of samples in the vector
47
 * @return none.
48
 *
49
 * \par Conditions for optimum performance
50
 *  Input and output buffers should be aligned by 32-bit
51
 *
52
 *
53
 * <b>Scaling and Overflow Behavior:</b>
54
 * \par
55
 * The function uses saturating arithmetic.
56
 * Results outside of the allowable Q7 range [0x8 0x7F] will be saturated.
57
 */
58
 
59
void arm_shift_q7(
60
  q7_t * pSrc,
61
  int8_t shiftBits,
62
  q7_t * pDst,
63
  uint32_t blockSize)
64
{
65
  uint32_t blkCnt;                               /* loop counter */
66
  uint8_t sign;                                  /* Sign of shiftBits */
67
 
68
#if defined (ARM_MATH_DSP)
69
 
70
/* Run the below code for Cortex-M4 and Cortex-M3 */
71
  q7_t in1;                                      /* Input value1 */
72
  q7_t in2;                                      /* Input value2 */
73
  q7_t in3;                                      /* Input value3 */
74
  q7_t in4;                                      /* Input value4 */
75
 
76
 
77
  /*loop Unrolling */
78
  blkCnt = blockSize >> 2U;
79
 
80
  /* Getting the sign of shiftBits */
81
  sign = (shiftBits & 0x80);
82
 
83
  /* If the shift value is positive then do right shift else left shift */
84
  if (sign == 0U)
85
  {
86
    /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
87
     ** a second loop below computes the remaining 1 to 3 samples. */
88
    while (blkCnt > 0U)
89
    {
90
      /* C = A << shiftBits */
91
      /* Read 4 inputs */
92
      in1 = *pSrc;
93
      in2 = *(pSrc + 1);
94
      in3 = *(pSrc + 2);
95
      in4 = *(pSrc + 3);
96
 
97
      /* Store the Shifted result in the destination buffer in single cycle by packing the outputs */
98
      *__SIMD32(pDst)++ = __PACKq7(__SSAT((in1 << shiftBits), 8),
99
                                   __SSAT((in2 << shiftBits), 8),
100
                                   __SSAT((in3 << shiftBits), 8),
101
                                   __SSAT((in4 << shiftBits), 8));
102
      /* Update source pointer to process next sampels */
103
      pSrc += 4U;
104
 
105
      /* Decrement the loop counter */
106
      blkCnt--;
107
    }
108
 
109
    /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
110
     ** No loop unrolling is used. */
111
    blkCnt = blockSize % 0x4U;
112
 
113
    while (blkCnt > 0U)
114
    {
115
      /* C = A << shiftBits */
116
      /* Shift the input and then store the result in the destination buffer. */
117
      *pDst++ = (q7_t) __SSAT((*pSrc++ << shiftBits), 8);
118
 
119
      /* Decrement the loop counter */
120
      blkCnt--;
121
    }
122
  }
123
  else
124
  {
125
    shiftBits = -shiftBits;
126
    /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
127
     ** a second loop below computes the remaining 1 to 3 samples. */
128
    while (blkCnt > 0U)
129
    {
130
      /* C = A >> shiftBits */
131
      /* Read 4 inputs */
132
      in1 = *pSrc;
133
      in2 = *(pSrc + 1);
134
      in3 = *(pSrc + 2);
135
      in4 = *(pSrc + 3);
136
 
137
      /* Store the Shifted result in the destination buffer in single cycle by packing the outputs */
138
      *__SIMD32(pDst)++ = __PACKq7((in1 >> shiftBits), (in2 >> shiftBits),
139
                                   (in3 >> shiftBits), (in4 >> shiftBits));
140
 
141
 
142
      pSrc += 4U;
143
 
144
      /* Decrement the loop counter */
145
      blkCnt--;
146
    }
147
 
148
    /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
149
     ** No loop unrolling is used. */
150
    blkCnt = blockSize % 0x4U;
151
 
152
    while (blkCnt > 0U)
153
    {
154
      /* C = A >> shiftBits */
155
      /* Shift the input and then store the result in the destination buffer. */
156
      in1 = *pSrc++;
157
      *pDst++ = (in1 >> shiftBits);
158
 
159
      /* Decrement the loop counter */
160
      blkCnt--;
161
    }
162
  }
163
 
164
#else
165
 
166
  /* Run the below code for Cortex-M0 */
167
 
168
  /* Getting the sign of shiftBits */
169
  sign = (shiftBits & 0x80);
170
 
171
  /* If the shift value is positive then do right shift else left shift */
172
  if (sign == 0U)
173
  {
174
    /* Initialize blkCnt with number of samples */
175
    blkCnt = blockSize;
176
 
177
    while (blkCnt > 0U)
178
    {
179
      /* C = A << shiftBits */
180
      /* Shift the input and then store the result in the destination buffer. */
181
      *pDst++ = (q7_t) __SSAT(((q15_t) * pSrc++ << shiftBits), 8);
182
 
183
      /* Decrement the loop counter */
184
      blkCnt--;
185
    }
186
  }
187
  else
188
  {
189
    /* Initialize blkCnt with number of samples */
190
    blkCnt = blockSize;
191
 
192
    while (blkCnt > 0U)
193
    {
194
      /* C = A >> shiftBits */
195
      /* Shift the input and then store the result in the destination buffer. */
196
      *pDst++ = (*pSrc++ >> -shiftBits);
197
 
198
      /* Decrement the loop counter */
199
      blkCnt--;
200
    }
201
  }
202
 
203
#endif /* #if defined (ARM_MATH_DSP) */
204
}
205
 
206
/**
207
 * @} end of shift group
208
 */