- int diff = abs(lval - rptr[d]);\r
- cbuf[d] = (uchar)diff;\r
- hsad[d] = (ushort)(hsad[d] + diff);\r
+ __m128i rv = _mm_loadu_si128((const __m128i*)(rptr + d));\r
+ __m128i hsad_l = _mm_load_si128((__m128i*)(hsad + d));\r
+ __m128i hsad_h = _mm_load_si128((__m128i*)(hsad + d + 8));\r
+ __m128i diff = _mm_adds_epu8(_mm_subs_epu8(lv, rv), _mm_subs_epu8(rv, lv));\r
+ _mm_store_si128((__m128i*)(cbuf + d), diff);\r
+ hsad_l = _mm_add_epi16(hsad_l, _mm_unpacklo_epi8(diff,z));\r
+ hsad_h = _mm_add_epi16(hsad_h, _mm_unpackhi_epi8(diff,z));\r
+ _mm_store_si128((__m128i*)(hsad + d), hsad_l);\r
+ _mm_store_si128((__m128i*)(hsad + d + 8), hsad_h);\r