auto import from //depot/cupcake/@135843
[android/platform/external/neven.git] / Embedded / common / src / b_TensorEm / Flt16Vec.c
1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 /* ---- includes ----------------------------------------------------------- */
18
19 #include "b_TensorEm/Flt16Vec.h"
20 #include "b_BasicEm/Memory.h"
21 #include "b_BasicEm/Math.h"
22 #include "b_BasicEm/Functions.h"
23
24 /* ------------------------------------------------------------------------- */
25
26 /* ========================================================================= */
27 /*                                                                           */
28 /* ---- \ghd{ auxiliary functions } ---------------------------------------- */
29 /*                                                                           */
30 /* ========================================================================= */
31
32 /* ------------------------------------------------------------------------- */
33
34 /* ========================================================================= */
35 /*                                                                           */
36 /* ---- \ghd{ constructor / destructor } ----------------------------------- */
37 /*                                                                           */
38 /* ========================================================================= */
39
40 /* ------------------------------------------------------------------------- */
41
42 void bts_Flt16Vec_init( struct bbs_Context* cpA,
43                                                 struct bts_Flt16Vec* ptrA )
44 {
45         bbs_Int16Arr_init( cpA, &ptrA->arrE );
46         ptrA->expE = 0;
47 }
48
49 /* ------------------------------------------------------------------------- */
50
51 void bts_Flt16Vec_exit( struct bbs_Context* cpA,
52                                                 struct bts_Flt16Vec* ptrA )
53 {
54         bbs_Int16Arr_exit( cpA, &ptrA->arrE );
55         ptrA->expE = 0;
56 }
57
58 /* ------------------------------------------------------------------------- */
59
60 /* ========================================================================= */
61 /*                                                                           */
62 /* ---- \ghd{ operators } -------------------------------------------------- */
63 /*                                                                           */
64 /* ========================================================================= */
65
66 /* ------------------------------------------------------------------------- */
67
68 void bts_Flt16Vec_copy( struct bbs_Context* cpA,
69                                                 struct bts_Flt16Vec* ptrA, 
70                                                 const struct bts_Flt16Vec* srcPtrA )
71 {
72         bbs_Int16Arr_copy( cpA, &ptrA->arrE, &srcPtrA->arrE );
73         ptrA->expE = srcPtrA->expE;
74 }
75
76 /* ------------------------------------------------------------------------- */
77
78 flag bts_Flt16Vec_equal( struct bbs_Context* cpA,
79                                                  const struct bts_Flt16Vec* ptrA, 
80                                                  const struct bts_Flt16Vec* srcPtrA )
81 {
82         if( !bbs_Int16Arr_equal( cpA, &ptrA->arrE, &srcPtrA->arrE ) ) return FALSE;
83         if( ptrA->expE != srcPtrA->expE ) return FALSE;
84         return TRUE;
85 }
86
87 /* ------------------------------------------------------------------------- */
88
89 /* ========================================================================= */
90 /*                                                                           */
91 /* ---- \ghd{ query functions } -------------------------------------------- */
92 /*                                                                           */
93 /* ========================================================================= */
94
95 /* ------------------------------------------------------------------------- */
96
97 int16 bts_Flt16Vec_avg( struct bbs_Context* cpA, const struct bts_Flt16Vec* ptrA )
98 {
99         uint16 iL;
100         uint16 sizeL = ptrA->arrE.sizeE;
101         int32 sumL = 0;
102         const int16* srcL = ptrA->arrE.arrPtrE;
103         for( iL = 0; iL < sizeL; iL++ )
104         {
105                 sumL += srcL[ iL ];
106         }
107         return sumL / ( int32 )sizeL;
108 }
109
110 /* ------------------------------------------------------------------------- */
111
112 uint32 bts_Flt16Vec_norm( struct bbs_Context* cpA, const struct bts_Flt16Vec* ptrA )
113 {
114         return bbs_vecNorm16( ptrA->arrE.arrPtrE, ptrA->arrE.sizeE );
115 }
116
117 /* ------------------------------------------------------------------------- */
118
119 uint16 bts_Flt16Vec_maxAbs( struct bbs_Context* cpA, const struct bts_Flt16Vec* ptrA )
120 {
121         uint16 iL;
122         uint16 sizeL = ptrA->arrE.sizeE;
123         uint16 maxL = 0;
124         const int16* srcL = ptrA->arrE.arrPtrE;
125         for( iL = 0; iL < sizeL; iL++ )
126         {
127                 uint16 vL = srcL[ iL ] > 0 ? srcL[ iL ] : -srcL[ iL ];
128                 maxL = vL > maxL ? vL : maxL;
129         }
130         return maxL;
131 }
132
133 /* ------------------------------------------------------------------------- */
134
135 /* ========================================================================= */
136 /*                                                                           */
137 /* ---- \ghd{ modify functions } ------------------------------------------- */
138 /*                                                                           */
139 /* ========================================================================= */
140
141 /* ------------------------------------------------------------------------- */
142         
143 void bts_Flt16Vec_create( struct bbs_Context* cpA,
144                                                   struct bts_Flt16Vec* ptrA, 
145                                                   uint32 sizeA,
146                                                   struct bbs_MemSeg* mspA )
147 {
148         bbs_Int16Arr_create( cpA, &ptrA->arrE, sizeA, mspA );
149 }
150
151 /* ------------------------------------------------------------------------- */
152         
153 void bts_Flt16Vec_size( struct bbs_Context* cpA,
154                                                 struct bts_Flt16Vec* ptrA, 
155                                                 uint32 sizeA )
156 {
157         bbs_Int16Arr_size( cpA, &ptrA->arrE, sizeA );
158 }
159
160 /* ------------------------------------------------------------------------- */
161         
162 /* ========================================================================= */
163 /*                                                                           */
164 /* ---- \ghd{ I/O } -------------------------------------------------------- */
165 /*                                                                           */
166 /* ========================================================================= */
167
168 /* ------------------------------------------------------------------------- */
169         
170 uint32 bts_Flt16Vec_memSize( struct bbs_Context* cpA,
171                                                           const struct bts_Flt16Vec *ptrA )
172 {
173         return  bbs_SIZEOF16( uint32 ) /* mem size */
174                 + bbs_Int16Arr_memSize( cpA, &ptrA->arrE )
175                 + bbs_SIZEOF16( ptrA->expE );
176 }
177
178 /* ------------------------------------------------------------------------- */
179         
180 uint32 bts_Flt16Vec_memWrite( struct bbs_Context* cpA,
181                                                            const struct bts_Flt16Vec* ptrA, 
182                                                            uint16* memPtrA )
183 {
184         uint32 memSizeL = bts_Flt16Vec_memSize( cpA, ptrA );
185         memPtrA += bbs_memWrite32( &memSizeL, memPtrA );
186         memPtrA += bbs_Int16Arr_memWrite( cpA, &ptrA->arrE, memPtrA );
187         memPtrA += bbs_memWrite16( &ptrA->expE, memPtrA );
188         return memSizeL;
189 }
190
191 /* ------------------------------------------------------------------------- */
192         
193 uint32 bts_Flt16Vec_memRead( struct bbs_Context* cpA,
194                                                           struct bts_Flt16Vec* ptrA, 
195                                                           const uint16* memPtrA,
196                                                           struct bbs_MemSeg* mspA )
197 {
198         uint32 memSizeL;
199         if( bbs_Context_error( cpA ) ) return 0;
200         memPtrA += bbs_memRead32( &memSizeL, memPtrA );
201         memPtrA += bbs_Int16Arr_memRead( cpA, &ptrA->arrE, memPtrA, mspA );
202         memPtrA += bbs_memRead16( &ptrA->expE, memPtrA );
203
204         if( memSizeL != bts_Flt16Vec_memSize( cpA, ptrA ) )
205         {
206                 bbs_ERR0( bbs_ERR_CORRUPT_DATA, "uint32 bts_Flt16Vec_memRead( const struct bts_Flt16Vec* ptrA, const void* memPtrA ):\n"
207                    "size mismatch" ); 
208                 return 0;
209         }
210         return memSizeL;
211 }
212
213 /* ------------------------------------------------------------------------- */
214         
215 /* ========================================================================= */
216 /*                                                                           */
217 /* ---- \ghd{ exec functions } --------------------------------------------- */
218 /*                                                                           */
219 /* ========================================================================= */
220
221 /* ------------------------------------------------------------------------- */
222
223 void bts_Flt16Vec_maximizeMantisse( struct bbs_Context* cpA, struct bts_Flt16Vec* ptrA )
224 {
225     uint32 maxAbsL = bts_Flt16Vec_maxAbs( cpA, ptrA );
226         int16 shlL = 0;
227
228         if( maxAbsL == 0 ) return; /* cannot maximize 0 */
229
230         while( maxAbsL < 0x4000 )
231         {
232                 shlL++;
233                 maxAbsL <<= 1;
234         }
235
236         if( shlL > 0 )
237         {
238                 uint32 iL;
239                 uint32 sizeL = ptrA->arrE.sizeE;
240                 int16* dstL = ptrA->arrE.arrPtrE;
241                 for( iL = 0; iL < sizeL; iL++ ) dstL[ iL ] <<= shlL;
242                 ptrA->expE -= shlL;
243         }
244 }
245
246 /* ------------------------------------------------------------------------- */
247
248 uint32 bts_Flt16Vec_maximizeAbsValue( struct bbs_Context* cpA, struct bts_Flt16Vec* ptrA )
249 {
250     int32 maxAbsL = bts_Flt16Vec_maxAbs( cpA, ptrA );
251         int32 fL;
252         if( maxAbsL == 0 ) return 0; /* vector is zero */
253
254         fL = ( int32 )0x7FFF0000 / maxAbsL;
255
256         {
257                 uint32 iL;
258                 uint32 sizeL = ptrA->arrE.sizeE;
259                 int16* dstL = ptrA->arrE.arrPtrE;
260                 for( iL = 0; iL < sizeL; iL++ ) dstL[ iL ] = ( ( int32 )dstL[ iL ] * fL + 32768 ) >> 16;
261         }
262
263         return fL;
264 }
265
266 /* ------------------------------------------------------------------------- */
267
268 void bts_Flt16Vec_zeroAverage( struct bbs_Context* cpA, struct bts_Flt16Vec* ptrA )
269 {
270         uint16 iL;
271         uint16 sizeL = ptrA->arrE.sizeE;
272         int16* dstL = ptrA->arrE.arrPtrE;
273         int16 avgL = bts_Flt16Vec_avg( cpA, ptrA );
274         for( iL = 0; iL < sizeL; iL++ ) dstL[ iL ] -= avgL;
275 }
276
277 /* ------------------------------------------------------------------------- */
278
279 void bts_Flt16Vec_normalize( struct bbs_Context* cpA, struct bts_Flt16Vec* ptrA )
280 {
281         uint32 normL = bts_Flt16Vec_norm( cpA, ptrA );
282
283         if( normL == 0 ) 
284         {
285                 /* vector is zero - do nothing */
286                 return; 
287         }
288         else
289         {
290                 int16* dstL = ptrA->arrE.arrPtrE;
291                 uint16 iL;
292                 uint16 sizeL = ptrA->arrE.sizeE;
293             int16 expL = 0;
294                 int32 fL;
295
296                 /* let norm occupy 17 bits */
297                 if( ( normL & 0xFFFE0000 ) != 0 )
298                 {
299                         while( ( ( normL >> -expL ) & 0xFFFE0000 ) != 0 ) expL--;
300                         normL >>= -expL;
301                 }
302                 else
303                 {
304                         while( ( ( normL <<  expL ) & 0xFFFF0000 ) == 0 ) expL++;
305                         normL <<=  expL;
306                 }
307
308                 /* fL is positive and occupies only 16 bits - a product with int16 fits in int32 */
309                 fL = ( uint32 )0xFFFFFFFF / normL;
310
311                 /* multiply with factor */
312                 for( iL = 0; iL < sizeL; iL++ ) dstL[ iL ] = ( ( ( ( int32 )dstL[ iL ] * fL ) >> 15 ) + 1 ) >> 1;
313
314                 /* set exponent */
315                 ptrA->expE = expL - 16;
316         }
317 /*
318         {
319                 uint32 testNormL = bts_Flt16Vec_norm( cpA, ptrA );
320                 printf( "test norm %f\n", ( float )testNormL / ( 1 << -ptrA->expE ) );
321         }
322 */
323 }
324
325 /* ------------------------------------------------------------------------- */
326
327 void bts_Flt16Vec_setZero( struct bbs_Context* cpA, struct bts_Flt16Vec* ptrA )
328 {
329         bbs_Int16Arr_fill( cpA, &ptrA->arrE, 0 );
330         ptrA->expE = 0;
331 }
332
333 /* ------------------------------------------------------------------------- */
334
335 void bts_Flt16Vec_mul( struct bbs_Context* cpA, struct bts_Flt16Vec* ptrA, int32 valA, int16 expA )
336 {
337         int32 valL = valA;
338         int16 expL = expA;
339
340         if( valL == 0 )
341         {
342                 bts_Flt16Vec_setZero( cpA, ptrA );
343                 return;
344         }
345         else
346         {
347                 uint32 iL;
348                 uint32 sizeL = ptrA->arrE.sizeE;
349                 int16* dstL = ptrA->arrE.arrPtrE;
350
351                 /* adjust valL to maximum 16 bit accuracy  */
352                 uint32 absValL = valL > 0 ? valL : -valL;
353                 if( ( absValL & 0xFFFF8000 ) != 0 )
354                 {
355                         int32 shrL = 0;
356                         while( ( absValL & 0xFFFF8000 ) != 0 )
357                         {
358                                 absValL >>= 1;
359                                 shrL++;
360                         }
361
362                         if( shrL > 0 ) 
363                         {
364                                 valL = ( ( valL >> ( shrL - 1 ) ) + 1 ) >> 1;
365                                 expL += shrL;
366                                 if( valL >= 0x08000 ) valL = 0x07FFF; /* saturate */
367                         }
368                 }
369                 else
370                 {
371                         int32 shlL = 0;
372                         while( ( absValL & 0xFFFFC000 ) == 0 )
373                         {
374                                 absValL <<= 1;
375                                 shlL++;
376                         }
377
378                         valL <<= shlL;
379                         expL -= shlL;
380                 }
381
382                 for( iL = 0; iL < sizeL; iL++ )
383                 {
384                         dstL[ iL ] = ( ( ( ( int32 )dstL[ iL ] * valL ) >> 15 ) + 1 ) >> 1;
385                 }
386                 ptrA->expE += expL + 16;
387         }
388 }
389
390 /* ------------------------------------------------------------------------- */
391
392 void bts_Flt16Vec_dotPtrd( struct bbs_Context* cpA, struct bts_Flt16Vec* vp1A, struct bts_Flt16Vec* vp2A, int32* manPtrA, int32* expPtrA )
393 {
394         bbs_DEF_fNameL( "void bts_Flt16Vec_dotPtrd( struct bbs_Context* cpA, struct bts_Flt16Vec* vp1A, struct bts_Flt16Vec* vp2A, int32* matPtrA, int32* expPtrA )" )
395         uint16 iL;
396         uint16 sizeL = vp1A->arrE.sizeE;
397         const int16* arr1L = vp1A->arrE.arrPtrE;
398         const int16* arr2L = vp2A->arrE.arrPtrE;
399         int16 shrm1L = -1; /* shift minus 1 */
400         int32 sumL;
401
402         if( vp1A->arrE.sizeE != vp2A->arrE.sizeE )
403         {
404                 bbs_ERROR1( "%s:\nVectors have different size", fNameL );
405                 return;
406         }
407
408         sumL = 0;
409         /* shrm1L == -1 */
410         for( iL = 0; iL < sizeL; iL++ )
411         {
412                 sumL += ( int32 )arr1L[ iL ] * ( int32 )arr2L[ iL ];
413                 if( ( ( ( sumL > 0 ) ? sumL : -sumL ) & 0xC0000000 ) != 0 ) break;
414         }
415
416         if( iL < sizeL )
417         {
418                 /* danger of overflow: increase shift; adjust sum */
419                 shrm1L++;
420                 sumL = ( ( sumL >> 1 ) + 1 ) >> 1;
421
422                 /* shrm1L == 0 */
423                 for( iL = 0; iL < sizeL; iL++ )
424                 {
425                         sumL += ( int32 )( ( arr1L[ iL ] + 1 ) >> 1 ) * ( int32 )( ( arr2L[ iL ] + 1 ) >> 1 );
426                         if( ( ( ( sumL > 0 ) ? sumL : -sumL ) & 0xC0000000 ) != 0 ) break;
427                 }
428
429                 for( iL = 0; iL < sizeL; iL++ )
430                 {
431                         if( ( ( ( sumL > 0 ) ? sumL : -sumL ) & 0xC0000000 ) != 0 )
432                         {
433                                 /* danger of overflow: increase shift; adjust sum */
434                                 shrm1L++;
435                                 sumL = ( ( sumL >> 1 ) + 1 ) >> 1;
436                         }
437
438                         sumL += ( int32 )( ( ( arr1L[ iL ] >> shrm1L ) + 1 ) >> 1 ) * ( int32 )( ( ( arr2L[ iL ] >> shrm1L ) + 1 ) >> 1 );
439                 }
440         }
441
442         if( manPtrA != NULL ) *manPtrA = sumL;
443         if( expPtrA != NULL ) *expPtrA = vp1A->expE + vp2A->expE + ( ( shrm1L + 1 ) << 1 );
444 }
445
446 /* ------------------------------------------------------------------------- */
447
448 void bts_Flt16Vec_append( struct bbs_Context* cpA, struct bts_Flt16Vec* ptrA, struct bts_Flt16Vec* srcPtrA )
449 {
450         if( ptrA->arrE.sizeE == 0 ) 
451         {
452                 bts_Flt16Vec_copy( cpA, ptrA, srcPtrA );
453         }
454         else
455         {
456                 uint32 idxL = ptrA->arrE.sizeE;
457                 bts_Flt16Vec_size( cpA, ptrA, idxL + srcPtrA->arrE.sizeE );
458
459                 /* copy data */
460                 bbs_memcpy16( ptrA->arrE.arrPtrE + idxL, srcPtrA->arrE.arrPtrE, srcPtrA->arrE.sizeE );
461
462                 /* equalize exponent */
463                 if( ptrA->expE > srcPtrA->expE )
464                 {
465                         uint32 iL;
466                         uint32 sizeL = srcPtrA->arrE.sizeE;
467                         uint32 shrL = ptrA->expE - srcPtrA->expE;
468                         int16* dstL = ptrA->arrE.arrPtrE + idxL;
469                         for( iL = 0; iL < sizeL; iL++ ) dstL[ iL ] = ( ( dstL[ iL ] >> ( shrL - 1 ) ) + 1 ) >> 1;
470                 }
471                 else if( ptrA->expE < srcPtrA->expE )
472                 {
473                         uint32 iL;
474                         uint32 sizeL = idxL;
475                         uint32 shrL = srcPtrA->expE - ptrA->expE;
476                         int16* dstL = ptrA->arrE.arrPtrE;
477                         for( iL = 0; iL < sizeL; iL++ ) dstL[ iL ] = ( ( dstL[ iL ] >> ( shrL - 1 ) ) + 1 ) >> 1;
478                         ptrA->expE = srcPtrA->expE;
479                 }
480         }
481 }
482
483 /* ------------------------------------------------------------------------- */
484
485 /* ========================================================================= */
486