Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit 9f225e9

Browse files
committed
Introduce helper SIMD functions for small byte arrays
vector8_min - helper for emulating ">=" semantics vector8_highbit_mask - used to turn the result of a vector comparison into a bitmask Masahiko Sawada Reviewed by Nathan Bossart, with additional adjustments by me Discussion: https://postgr.es/m/CAFBsxsHbBm_M22gLBO%2BAZT4mfMq3L_oX3wdKZxjeNnT7fHsYMQ%40mail.gmail.com
1 parent 60c0782 commit 9f225e9

File tree

1 file changed

+47
-0
lines changed

1 file changed

+47
-0
lines changed

src/include/port/simd.h

+47
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ static inline bool vector8_has_le(const Vector8 v, const uint8 c);
7979
static inline bool vector8_is_highbit_set(const Vector8 v);
8080
#ifndef USE_NO_SIMD
8181
static inline bool vector32_is_highbit_set(const Vector32 v);
82+
static inline uint32 vector8_highbit_mask(const Vector8 v);
8283
#endif
8384

8485
/* arithmetic operations */
@@ -96,6 +97,7 @@ static inline Vector8 vector8_ssub(const Vector8 v1, const Vector8 v2);
9697
*/
9798
#ifndef USE_NO_SIMD
9899
static inline Vector8 vector8_eq(const Vector8 v1, const Vector8 v2);
100+
static inline Vector8 vector8_min(const Vector8 v1, const Vector8 v2);
99101
static inline Vector32 vector32_eq(const Vector32 v1, const Vector32 v2);
100102
#endif
101103

@@ -299,6 +301,36 @@ vector32_is_highbit_set(const Vector32 v)
299301
}
300302
#endif /* ! USE_NO_SIMD */
301303

304+
/*
305+
* Return a bitmask formed from the high-bit of each element.
306+
*/
307+
#ifndef USE_NO_SIMD
308+
static inline uint32
309+
vector8_highbit_mask(const Vector8 v)
310+
{
311+
#ifdef USE_SSE2
312+
return (uint32) _mm_movemask_epi8(v);
313+
#elif defined(USE_NEON)
314+
/*
315+
* Note: It would be faster to use vget_lane_u64 and vshrn_n_u16, but that
316+
* returns a uint64, making it inconvenient to combine mask values from
317+
* multiple vectors.
318+
*/
319+
static const uint8 mask[16] = {
320+
1 << 0, 1 << 1, 1 << 2, 1 << 3,
321+
1 << 4, 1 << 5, 1 << 6, 1 << 7,
322+
1 << 0, 1 << 1, 1 << 2, 1 << 3,
323+
1 << 4, 1 << 5, 1 << 6, 1 << 7,
324+
};
325+
326+
uint8x16_t masked = vandq_u8(vld1q_u8(mask), (uint8x16_t) vshrq_n_s8(v, 7));
327+
uint8x16_t maskedhi = vextq_u8(masked, masked, 8);
328+
329+
return (uint32) vaddvq_u16((uint16x8_t) vzip1q_u8(masked, maskedhi));
330+
#endif
331+
}
332+
#endif /* ! USE_NO_SIMD */
333+
302334
/*
303335
* Return the bitwise OR of the inputs
304336
*/
@@ -372,4 +404,19 @@ vector32_eq(const Vector32 v1, const Vector32 v2)
372404
}
373405
#endif /* ! USE_NO_SIMD */
374406

407+
/*
408+
* Given two vectors, return a vector with the minimum element of each.
409+
*/
410+
#ifndef USE_NO_SIMD
411+
static inline Vector8
412+
vector8_min(const Vector8 v1, const Vector8 v2)
413+
{
414+
#ifdef USE_SSE2
415+
return _mm_min_epu8(v1, v2);
416+
#elif defined(USE_NEON)
417+
return vminq_u8(v1, v2);
418+
#endif
419+
}
420+
#endif /* ! USE_NO_SIMD */
421+
375422
#endif /* SIMD_H */

0 commit comments

Comments
 (0)