Warning, /graphics/krita/3rdparty/ext_heif/0003-arm-asm-primitives.patch is written in an unsupported language. File is not indexed.

0001 Fixes "arm assembly fail to compile on 1.8"
0002 
0003 Downloaded from upstream bug report:
0004 https://bitbucket.org/multicoreware/x265/issues/406
0005 
0006 Signed-off-by: Bernd Kuhls <bernd.kuhls@t-online.de>
0007 
0008 --- ./source/common/arm/asm-primitives.cpp.orig 2018-05-21 02:33:10.000000000 -0600
0009 +++ ./source/common/arm/asm-primitives.cpp      2018-05-28 20:38:37.302378303 -0600
0010 @@ -48,77 +48,77 @@ void setupAssemblyPrimitives(EncoderPrim
0011          p.ssim_4x4x2_core = PFX(ssim_4x4x2_core_neon);
0012  
0013          // addAvg
0014 -         p.pu[LUMA_4x4].addAvg   = PFX(addAvg_4x4_neon);
0015 -         p.pu[LUMA_4x8].addAvg   = PFX(addAvg_4x8_neon);
0016 -         p.pu[LUMA_4x16].addAvg  = PFX(addAvg_4x16_neon);
0017 -         p.pu[LUMA_8x4].addAvg   = PFX(addAvg_8x4_neon);
0018 -         p.pu[LUMA_8x8].addAvg   = PFX(addAvg_8x8_neon);
0019 -         p.pu[LUMA_8x16].addAvg  = PFX(addAvg_8x16_neon);
0020 -         p.pu[LUMA_8x32].addAvg  = PFX(addAvg_8x32_neon);
0021 -         p.pu[LUMA_12x16].addAvg = PFX(addAvg_12x16_neon);
0022 -         p.pu[LUMA_16x4].addAvg  = PFX(addAvg_16x4_neon);
0023 -         p.pu[LUMA_16x8].addAvg  = PFX(addAvg_16x8_neon);
0024 -         p.pu[LUMA_16x12].addAvg = PFX(addAvg_16x12_neon);
0025 -         p.pu[LUMA_16x16].addAvg = PFX(addAvg_16x16_neon);
0026 -         p.pu[LUMA_16x32].addAvg = PFX(addAvg_16x32_neon);
0027 -         p.pu[LUMA_16x64].addAvg = PFX(addAvg_16x64_neon);
0028 -         p.pu[LUMA_24x32].addAvg = PFX(addAvg_24x32_neon);
0029 -         p.pu[LUMA_32x8].addAvg  = PFX(addAvg_32x8_neon);
0030 -         p.pu[LUMA_32x16].addAvg = PFX(addAvg_32x16_neon);
0031 -         p.pu[LUMA_32x24].addAvg = PFX(addAvg_32x24_neon);
0032 -         p.pu[LUMA_32x32].addAvg = PFX(addAvg_32x32_neon);
0033 -         p.pu[LUMA_32x64].addAvg = PFX(addAvg_32x64_neon);
0034 -         p.pu[LUMA_48x64].addAvg = PFX(addAvg_48x64_neon);
0035 -         p.pu[LUMA_64x16].addAvg = PFX(addAvg_64x16_neon);
0036 -         p.pu[LUMA_64x32].addAvg = PFX(addAvg_64x32_neon);
0037 -         p.pu[LUMA_64x48].addAvg = PFX(addAvg_64x48_neon);
0038 -         p.pu[LUMA_64x64].addAvg = PFX(addAvg_64x64_neon);
0039 +         p.pu[LUMA_4x4].addAvg[ALIGNED]   = PFX(addAvg_4x4_neon);
0040 +         p.pu[LUMA_4x8].addAvg[ALIGNED]   = PFX(addAvg_4x8_neon);
0041 +         p.pu[LUMA_4x16].addAvg[ALIGNED]  = PFX(addAvg_4x16_neon);
0042 +         p.pu[LUMA_8x4].addAvg[ALIGNED]   = PFX(addAvg_8x4_neon);
0043 +         p.pu[LUMA_8x8].addAvg[ALIGNED]   = PFX(addAvg_8x8_neon);
0044 +         p.pu[LUMA_8x16].addAvg[ALIGNED]  = PFX(addAvg_8x16_neon);
0045 +         p.pu[LUMA_8x32].addAvg[ALIGNED]  = PFX(addAvg_8x32_neon);
0046 +         p.pu[LUMA_12x16].addAvg[ALIGNED] = PFX(addAvg_12x16_neon);
0047 +         p.pu[LUMA_16x4].addAvg[ALIGNED]  = PFX(addAvg_16x4_neon);
0048 +         p.pu[LUMA_16x8].addAvg[ALIGNED]  = PFX(addAvg_16x8_neon);
0049 +         p.pu[LUMA_16x12].addAvg[ALIGNED] = PFX(addAvg_16x12_neon);
0050 +         p.pu[LUMA_16x16].addAvg[ALIGNED] = PFX(addAvg_16x16_neon);
0051 +         p.pu[LUMA_16x32].addAvg[ALIGNED] = PFX(addAvg_16x32_neon);
0052 +         p.pu[LUMA_16x64].addAvg[ALIGNED] = PFX(addAvg_16x64_neon);
0053 +         p.pu[LUMA_24x32].addAvg[ALIGNED] = PFX(addAvg_24x32_neon);
0054 +         p.pu[LUMA_32x8].addAvg[ALIGNED]  = PFX(addAvg_32x8_neon);
0055 +         p.pu[LUMA_32x16].addAvg[ALIGNED] = PFX(addAvg_32x16_neon);
0056 +         p.pu[LUMA_32x24].addAvg[ALIGNED] = PFX(addAvg_32x24_neon);
0057 +         p.pu[LUMA_32x32].addAvg[ALIGNED] = PFX(addAvg_32x32_neon);
0058 +         p.pu[LUMA_32x64].addAvg[ALIGNED] = PFX(addAvg_32x64_neon);
0059 +         p.pu[LUMA_48x64].addAvg[ALIGNED] = PFX(addAvg_48x64_neon);
0060 +         p.pu[LUMA_64x16].addAvg[ALIGNED] = PFX(addAvg_64x16_neon);
0061 +         p.pu[LUMA_64x32].addAvg[ALIGNED] = PFX(addAvg_64x32_neon);
0062 +         p.pu[LUMA_64x48].addAvg[ALIGNED] = PFX(addAvg_64x48_neon);
0063 +         p.pu[LUMA_64x64].addAvg[ALIGNED] = PFX(addAvg_64x64_neon);
0064  
0065          // chroma addAvg
0066 -        p.chroma[X265_CSP_I420].pu[CHROMA_420_4x2].addAvg   = PFX(addAvg_4x2_neon);
0067 -        p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].addAvg   = PFX(addAvg_4x4_neon);
0068 -        p.chroma[X265_CSP_I420].pu[CHROMA_420_4x8].addAvg   = PFX(addAvg_4x8_neon);
0069 -        p.chroma[X265_CSP_I420].pu[CHROMA_420_4x16].addAvg  = PFX(addAvg_4x16_neon);
0070 -        p.chroma[X265_CSP_I420].pu[CHROMA_420_6x8].addAvg   = PFX(addAvg_6x8_neon);
0071 -        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x2].addAvg   = PFX(addAvg_8x2_neon);
0072 -        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x4].addAvg   = PFX(addAvg_8x4_neon);
0073 -        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x6].addAvg   = PFX(addAvg_8x6_neon);
0074 -        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x8].addAvg   = PFX(addAvg_8x8_neon);
0075 -        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x16].addAvg  = PFX(addAvg_8x16_neon);
0076 -        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x32].addAvg  = PFX(addAvg_8x32_neon);
0077 -        p.chroma[X265_CSP_I420].pu[CHROMA_420_12x16].addAvg = PFX(addAvg_12x16_neon);
0078 -        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x4].addAvg  = PFX(addAvg_16x4_neon);
0079 -        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x8].addAvg  = PFX(addAvg_16x8_neon);
0080 -        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x12].addAvg = PFX(addAvg_16x12_neon);
0081 -        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x16].addAvg = PFX(addAvg_16x16_neon);
0082 -        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x32].addAvg = PFX(addAvg_16x32_neon);
0083 -        p.chroma[X265_CSP_I420].pu[CHROMA_420_24x32].addAvg = PFX(addAvg_24x32_neon);
0084 -        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x8].addAvg  = PFX(addAvg_32x8_neon);
0085 -        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x16].addAvg = PFX(addAvg_32x16_neon);
0086 -        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x24].addAvg = PFX(addAvg_32x24_neon);
0087 -        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].addAvg = PFX(addAvg_32x32_neon);
0088 -
0089 -        p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].addAvg   = PFX(addAvg_4x8_neon);
0090 -        p.chroma[X265_CSP_I422].pu[CHROMA_422_4x16].addAvg  = PFX(addAvg_4x16_neon);
0091 -        p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].addAvg  = PFX(addAvg_4x32_neon);
0092 -        p.chroma[X265_CSP_I422].pu[CHROMA_422_6x16].addAvg  = PFX(addAvg_6x16_neon);
0093 -        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x4].addAvg   = PFX(addAvg_8x4_neon);
0094 -        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x8].addAvg   = PFX(addAvg_8x8_neon);
0095 -        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x12].addAvg  = PFX(addAvg_8x12_neon);
0096 -        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x16].addAvg  = PFX(addAvg_8x16_neon);
0097 -        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x32].addAvg  = PFX(addAvg_8x32_neon);
0098 -        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x64].addAvg  = PFX(addAvg_8x64_neon);
0099 -        p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].addAvg = PFX(addAvg_12x32_neon);
0100 -        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x8].addAvg  = PFX(addAvg_16x8_neon);
0101 -        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x16].addAvg = PFX(addAvg_16x16_neon);
0102 -        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x24].addAvg = PFX(addAvg_16x24_neon);
0103 -        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x32].addAvg = PFX(addAvg_16x32_neon);
0104 -        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x64].addAvg = PFX(addAvg_16x64_neon);
0105 -        p.chroma[X265_CSP_I422].pu[CHROMA_422_24x64].addAvg = PFX(addAvg_24x64_neon);
0106 -        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x16].addAvg = PFX(addAvg_32x16_neon);
0107 -        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x32].addAvg = PFX(addAvg_32x32_neon);
0108 -        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x48].addAvg = PFX(addAvg_32x48_neon);
0109 -        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].addAvg = PFX(addAvg_32x64_neon);
0110 +        p.chroma[X265_CSP_I420].pu[CHROMA_420_4x2].addAvg[ALIGNED]   = PFX(addAvg_4x2_neon);
0111 +        p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].addAvg[ALIGNED]   = PFX(addAvg_4x4_neon);
0112 +        p.chroma[X265_CSP_I420].pu[CHROMA_420_4x8].addAvg[ALIGNED]   = PFX(addAvg_4x8_neon);
0113 +        p.chroma[X265_CSP_I420].pu[CHROMA_420_4x16].addAvg[ALIGNED]  = PFX(addAvg_4x16_neon);
0114 +        p.chroma[X265_CSP_I420].pu[CHROMA_420_6x8].addAvg[ALIGNED]   = PFX(addAvg_6x8_neon);
0115 +        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x2].addAvg[ALIGNED]   = PFX(addAvg_8x2_neon);
0116 +        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x4].addAvg[ALIGNED]   = PFX(addAvg_8x4_neon);
0117 +        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x6].addAvg[ALIGNED]   = PFX(addAvg_8x6_neon);
0118 +        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x8].addAvg[ALIGNED]   = PFX(addAvg_8x8_neon);
0119 +        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x16].addAvg[ALIGNED]  = PFX(addAvg_8x16_neon);
0120 +        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x32].addAvg[ALIGNED]  = PFX(addAvg_8x32_neon);
0121 +        p.chroma[X265_CSP_I420].pu[CHROMA_420_12x16].addAvg[ALIGNED] = PFX(addAvg_12x16_neon);
0122 +        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x4].addAvg[ALIGNED]  = PFX(addAvg_16x4_neon);
0123 +        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x8].addAvg[ALIGNED]  = PFX(addAvg_16x8_neon);
0124 +        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x12].addAvg[ALIGNED] = PFX(addAvg_16x12_neon);
0125 +        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x16].addAvg[ALIGNED] = PFX(addAvg_16x16_neon);
0126 +        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x32].addAvg[ALIGNED] = PFX(addAvg_16x32_neon);
0127 +        p.chroma[X265_CSP_I420].pu[CHROMA_420_24x32].addAvg[ALIGNED] = PFX(addAvg_24x32_neon);
0128 +        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x8].addAvg[ALIGNED]  = PFX(addAvg_32x8_neon);
0129 +        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x16].addAvg[ALIGNED] = PFX(addAvg_32x16_neon);
0130 +        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x24].addAvg[ALIGNED] = PFX(addAvg_32x24_neon);
0131 +        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].addAvg[ALIGNED] = PFX(addAvg_32x32_neon);
0132 +
0133 +        p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].addAvg[ALIGNED]   = PFX(addAvg_4x8_neon);
0134 +        p.chroma[X265_CSP_I422].pu[CHROMA_422_4x16].addAvg[ALIGNED]  = PFX(addAvg_4x16_neon);
0135 +        p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].addAvg[ALIGNED]  = PFX(addAvg_4x32_neon);
0136 +        p.chroma[X265_CSP_I422].pu[CHROMA_422_6x16].addAvg[ALIGNED]  = PFX(addAvg_6x16_neon);
0137 +        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x4].addAvg[ALIGNED]   = PFX(addAvg_8x4_neon);
0138 +        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x8].addAvg[ALIGNED]   = PFX(addAvg_8x8_neon);
0139 +        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x12].addAvg[ALIGNED]  = PFX(addAvg_8x12_neon);
0140 +        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x16].addAvg[ALIGNED]  = PFX(addAvg_8x16_neon);
0141 +        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x32].addAvg[ALIGNED]  = PFX(addAvg_8x32_neon);
0142 +        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x64].addAvg[ALIGNED]  = PFX(addAvg_8x64_neon);
0143 +        p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].addAvg[ALIGNED] = PFX(addAvg_12x32_neon);
0144 +        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x8].addAvg[ALIGNED]  = PFX(addAvg_16x8_neon);
0145 +        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x16].addAvg[ALIGNED] = PFX(addAvg_16x16_neon);
0146 +        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x24].addAvg[ALIGNED] = PFX(addAvg_16x24_neon);
0147 +        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x32].addAvg[ALIGNED] = PFX(addAvg_16x32_neon);
0148 +        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x64].addAvg[ALIGNED] = PFX(addAvg_16x64_neon);
0149 +        p.chroma[X265_CSP_I422].pu[CHROMA_422_24x64].addAvg[ALIGNED] = PFX(addAvg_24x64_neon);
0150 +        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x16].addAvg[ALIGNED] = PFX(addAvg_32x16_neon);
0151 +        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x32].addAvg[ALIGNED] = PFX(addAvg_32x32_neon);
0152 +        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x48].addAvg[ALIGNED] = PFX(addAvg_32x48_neon);
0153 +        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].addAvg[ALIGNED] = PFX(addAvg_32x64_neon);
0154  
0155          // quant
0156           p.quant = PFX(quant_neon);
0157 @@ -402,7 +402,7 @@ void setupAssemblyPrimitives(EncoderPrim
0158          p.scale2D_64to32  = PFX(scale2D_64to32_neon);
0159  
0160          // scale1D_128to64
0161 -        p.scale1D_128to64 = PFX(scale1D_128to64_neon);
0162 +        p.scale1D_128to64[ALIGNED] = PFX(scale1D_128to64_neon);
0163  
0164          // copy_count
0165          p.cu[BLOCK_4x4].copy_cnt     = PFX(copy_cnt_4_neon);
0166 @@ -411,37 +411,37 @@ void setupAssemblyPrimitives(EncoderPrim
0167          p.cu[BLOCK_32x32].copy_cnt   = PFX(copy_cnt_32_neon);
0168  
0169          // filterPixelToShort
0170 -        p.pu[LUMA_4x4].convert_p2s   = PFX(filterPixelToShort_4x4_neon);
0171 -        p.pu[LUMA_4x8].convert_p2s   = PFX(filterPixelToShort_4x8_neon);
0172 -        p.pu[LUMA_4x16].convert_p2s  = PFX(filterPixelToShort_4x16_neon);
0173 -        p.pu[LUMA_8x4].convert_p2s   = PFX(filterPixelToShort_8x4_neon);
0174 -        p.pu[LUMA_8x8].convert_p2s   = PFX(filterPixelToShort_8x8_neon);
0175 -        p.pu[LUMA_8x16].convert_p2s  = PFX(filterPixelToShort_8x16_neon);
0176 -        p.pu[LUMA_8x32].convert_p2s  = PFX(filterPixelToShort_8x32_neon);
0177 -        p.pu[LUMA_12x16].convert_p2s = PFX(filterPixelToShort_12x16_neon);
0178 -        p.pu[LUMA_16x4].convert_p2s  = PFX(filterPixelToShort_16x4_neon);
0179 -        p.pu[LUMA_16x8].convert_p2s  = PFX(filterPixelToShort_16x8_neon);
0180 -        p.pu[LUMA_16x12].convert_p2s = PFX(filterPixelToShort_16x12_neon);
0181 -        p.pu[LUMA_16x16].convert_p2s = PFX(filterPixelToShort_16x16_neon);
0182 -        p.pu[LUMA_16x32].convert_p2s = PFX(filterPixelToShort_16x32_neon);
0183 -        p.pu[LUMA_16x64].convert_p2s = PFX(filterPixelToShort_16x64_neon);
0184 -        p.pu[LUMA_24x32].convert_p2s = PFX(filterPixelToShort_24x32_neon);
0185 -        p.pu[LUMA_32x8].convert_p2s  = PFX(filterPixelToShort_32x8_neon);
0186 -        p.pu[LUMA_32x16].convert_p2s = PFX(filterPixelToShort_32x16_neon);
0187 -        p.pu[LUMA_32x24].convert_p2s = PFX(filterPixelToShort_32x24_neon);
0188 -        p.pu[LUMA_32x32].convert_p2s = PFX(filterPixelToShort_32x32_neon);
0189 -        p.pu[LUMA_32x64].convert_p2s = PFX(filterPixelToShort_32x64_neon);
0190 -        p.pu[LUMA_48x64].convert_p2s = PFX(filterPixelToShort_48x64_neon);
0191 -        p.pu[LUMA_64x16].convert_p2s = PFX(filterPixelToShort_64x16_neon);
0192 -        p.pu[LUMA_64x32].convert_p2s = PFX(filterPixelToShort_64x32_neon);
0193 -        p.pu[LUMA_64x48].convert_p2s = PFX(filterPixelToShort_64x48_neon);
0194 -        p.pu[LUMA_64x64].convert_p2s = PFX(filterPixelToShort_64x64_neon);
0195 +        p.pu[LUMA_4x4].convert_p2s[ALIGNED]   = PFX(filterPixelToShort_4x4_neon);
0196 +        p.pu[LUMA_4x8].convert_p2s[ALIGNED]   = PFX(filterPixelToShort_4x8_neon);
0197 +        p.pu[LUMA_4x16].convert_p2s[ALIGNED]  = PFX(filterPixelToShort_4x16_neon);
0198 +        p.pu[LUMA_8x4].convert_p2s[ALIGNED]   = PFX(filterPixelToShort_8x4_neon);
0199 +        p.pu[LUMA_8x8].convert_p2s[ALIGNED]   = PFX(filterPixelToShort_8x8_neon);
0200 +        p.pu[LUMA_8x16].convert_p2s[ALIGNED]  = PFX(filterPixelToShort_8x16_neon);
0201 +        p.pu[LUMA_8x32].convert_p2s[ALIGNED]  = PFX(filterPixelToShort_8x32_neon);
0202 +        p.pu[LUMA_12x16].convert_p2s[ALIGNED] = PFX(filterPixelToShort_12x16_neon);
0203 +        p.pu[LUMA_16x4].convert_p2s[ALIGNED]  = PFX(filterPixelToShort_16x4_neon);
0204 +        p.pu[LUMA_16x8].convert_p2s[ALIGNED]  = PFX(filterPixelToShort_16x8_neon);
0205 +        p.pu[LUMA_16x12].convert_p2s[ALIGNED] = PFX(filterPixelToShort_16x12_neon);
0206 +        p.pu[LUMA_16x16].convert_p2s[ALIGNED] = PFX(filterPixelToShort_16x16_neon);
0207 +        p.pu[LUMA_16x32].convert_p2s[ALIGNED] = PFX(filterPixelToShort_16x32_neon);
0208 +        p.pu[LUMA_16x64].convert_p2s[ALIGNED] = PFX(filterPixelToShort_16x64_neon);
0209 +        p.pu[LUMA_24x32].convert_p2s[ALIGNED] = PFX(filterPixelToShort_24x32_neon);
0210 +        p.pu[LUMA_32x8].convert_p2s[ALIGNED]  = PFX(filterPixelToShort_32x8_neon);
0211 +        p.pu[LUMA_32x16].convert_p2s[ALIGNED] = PFX(filterPixelToShort_32x16_neon);
0212 +        p.pu[LUMA_32x24].convert_p2s[ALIGNED] = PFX(filterPixelToShort_32x24_neon);
0213 +        p.pu[LUMA_32x32].convert_p2s[ALIGNED] = PFX(filterPixelToShort_32x32_neon);
0214 +        p.pu[LUMA_32x64].convert_p2s[ALIGNED] = PFX(filterPixelToShort_32x64_neon);
0215 +        p.pu[LUMA_48x64].convert_p2s[ALIGNED] = PFX(filterPixelToShort_48x64_neon);
0216 +        p.pu[LUMA_64x16].convert_p2s[ALIGNED] = PFX(filterPixelToShort_64x16_neon);
0217 +        p.pu[LUMA_64x32].convert_p2s[ALIGNED] = PFX(filterPixelToShort_64x32_neon);
0218 +        p.pu[LUMA_64x48].convert_p2s[ALIGNED] = PFX(filterPixelToShort_64x48_neon);
0219 +        p.pu[LUMA_64x64].convert_p2s[ALIGNED] = PFX(filterPixelToShort_64x64_neon);
0220  
0221          // Block_fill
0222 -        p.cu[BLOCK_4x4].blockfill_s   = PFX(blockfill_s_4x4_neon);
0223 -        p.cu[BLOCK_8x8].blockfill_s   = PFX(blockfill_s_8x8_neon);
0224 -        p.cu[BLOCK_16x16].blockfill_s = PFX(blockfill_s_16x16_neon);
0225 -        p.cu[BLOCK_32x32].blockfill_s = PFX(blockfill_s_32x32_neon);
0226 +        p.cu[BLOCK_4x4].blockfill_s[ALIGNED]   = PFX(blockfill_s_4x4_neon);
0227 +        p.cu[BLOCK_8x8].blockfill_s[ALIGNED]   = PFX(blockfill_s_8x8_neon);
0228 +        p.cu[BLOCK_16x16].blockfill_s[ALIGNED] = PFX(blockfill_s_16x16_neon);
0229 +        p.cu[BLOCK_32x32].blockfill_s[ALIGNED] = PFX(blockfill_s_32x32_neon);
0230  
0231          // Blockcopy_ss
0232          p.cu[BLOCK_4x4].copy_ss   = PFX(blockcopy_ss_4x4_neon);
0233 @@ -495,21 +495,21 @@ void setupAssemblyPrimitives(EncoderPrim
0234          p.chroma[X265_CSP_I422].cu[BLOCK_422_32x64].copy_sp = PFX(blockcopy_sp_32x64_neon);
0235  
0236          // pixel_add_ps
0237 -        p.cu[BLOCK_4x4].add_ps   = PFX(pixel_add_ps_4x4_neon);
0238 -        p.cu[BLOCK_8x8].add_ps   = PFX(pixel_add_ps_8x8_neon);
0239 -        p.cu[BLOCK_16x16].add_ps = PFX(pixel_add_ps_16x16_neon);
0240 -        p.cu[BLOCK_32x32].add_ps = PFX(pixel_add_ps_32x32_neon);
0241 -        p.cu[BLOCK_64x64].add_ps = PFX(pixel_add_ps_64x64_neon);
0242 +        p.cu[BLOCK_4x4].add_ps[ALIGNED]   = PFX(pixel_add_ps_4x4_neon);
0243 +        p.cu[BLOCK_8x8].add_ps[ALIGNED]   = PFX(pixel_add_ps_8x8_neon);
0244 +        p.cu[BLOCK_16x16].add_ps[ALIGNED] = PFX(pixel_add_ps_16x16_neon);
0245 +        p.cu[BLOCK_32x32].add_ps[ALIGNED] = PFX(pixel_add_ps_32x32_neon);
0246 +        p.cu[BLOCK_64x64].add_ps[ALIGNED] = PFX(pixel_add_ps_64x64_neon);
0247  
0248          // chroma add_ps
0249 -        p.chroma[X265_CSP_I420].cu[BLOCK_420_4x4].add_ps   = PFX(pixel_add_ps_4x4_neon);
0250 -        p.chroma[X265_CSP_I420].cu[BLOCK_420_8x8].add_ps   = PFX(pixel_add_ps_8x8_neon);
0251 -        p.chroma[X265_CSP_I420].cu[BLOCK_420_16x16].add_ps = PFX(pixel_add_ps_16x16_neon);
0252 -        p.chroma[X265_CSP_I420].cu[BLOCK_420_32x32].add_ps = PFX(pixel_add_ps_32x32_neon);
0253 -        p.chroma[X265_CSP_I422].cu[BLOCK_422_4x8].add_ps   = PFX(pixel_add_ps_4x8_neon);
0254 -        p.chroma[X265_CSP_I422].cu[BLOCK_422_8x16].add_ps  = PFX(pixel_add_ps_8x16_neon);
0255 -        p.chroma[X265_CSP_I422].cu[BLOCK_422_16x32].add_ps = PFX(pixel_add_ps_16x32_neon);
0256 -        p.chroma[X265_CSP_I422].cu[BLOCK_422_32x64].add_ps = PFX(pixel_add_ps_32x64_neon);
0257 +        p.chroma[X265_CSP_I420].cu[BLOCK_420_4x4].add_ps[ALIGNED]   = PFX(pixel_add_ps_4x4_neon);
0258 +        p.chroma[X265_CSP_I420].cu[BLOCK_420_8x8].add_ps[ALIGNED]   = PFX(pixel_add_ps_8x8_neon);
0259 +        p.chroma[X265_CSP_I420].cu[BLOCK_420_16x16].add_ps[ALIGNED] = PFX(pixel_add_ps_16x16_neon);
0260 +        p.chroma[X265_CSP_I420].cu[BLOCK_420_32x32].add_ps[ALIGNED] = PFX(pixel_add_ps_32x32_neon);
0261 +        p.chroma[X265_CSP_I422].cu[BLOCK_422_4x8].add_ps[ALIGNED]   = PFX(pixel_add_ps_4x8_neon);
0262 +        p.chroma[X265_CSP_I422].cu[BLOCK_422_8x16].add_ps[ALIGNED]  = PFX(pixel_add_ps_8x16_neon);
0263 +        p.chroma[X265_CSP_I422].cu[BLOCK_422_16x32].add_ps[ALIGNED] = PFX(pixel_add_ps_16x32_neon);
0264 +        p.chroma[X265_CSP_I422].cu[BLOCK_422_32x64].add_ps[ALIGNED] = PFX(pixel_add_ps_32x64_neon);
0265  
0266          // cpy2Dto1D_shr
0267          p.cu[BLOCK_4x4].cpy2Dto1D_shr   = PFX(cpy2Dto1D_shr_4x4_neon);
0268 @@ -518,10 +518,10 @@ void setupAssemblyPrimitives(EncoderPrim
0269          p.cu[BLOCK_32x32].cpy2Dto1D_shr = PFX(cpy2Dto1D_shr_32x32_neon);
0270  
0271          // ssd_s
0272 -        p.cu[BLOCK_4x4].ssd_s   = PFX(pixel_ssd_s_4x4_neon);
0273 -        p.cu[BLOCK_8x8].ssd_s   = PFX(pixel_ssd_s_8x8_neon);
0274 -        p.cu[BLOCK_16x16].ssd_s = PFX(pixel_ssd_s_16x16_neon);
0275 -        p.cu[BLOCK_32x32].ssd_s = PFX(pixel_ssd_s_32x32_neon);
0276 +        p.cu[BLOCK_4x4].ssd_s[ALIGNED]   = PFX(pixel_ssd_s_4x4_neon);
0277 +        p.cu[BLOCK_8x8].ssd_s[ALIGNED]   = PFX(pixel_ssd_s_8x8_neon);
0278 +        p.cu[BLOCK_16x16].ssd_s[ALIGNED] = PFX(pixel_ssd_s_16x16_neon);
0279 +        p.cu[BLOCK_32x32].ssd_s[ALIGNED] = PFX(pixel_ssd_s_32x32_neon);
0280  
0281          // sse_ss
0282          p.cu[BLOCK_4x4].sse_ss   = PFX(pixel_sse_ss_4x4_neon);
0283 @@ -548,10 +548,10 @@ void setupAssemblyPrimitives(EncoderPrim
0284          p.chroma[X265_CSP_I422].cu[BLOCK_422_32x64].sub_ps = PFX(pixel_sub_ps_32x64_neon);
0285  
0286          // calc_Residual
0287 -        p.cu[BLOCK_4x4].calcresidual   = PFX(getResidual4_neon);
0288 -        p.cu[BLOCK_8x8].calcresidual   = PFX(getResidual8_neon);
0289 -        p.cu[BLOCK_16x16].calcresidual = PFX(getResidual16_neon);
0290 -        p.cu[BLOCK_32x32].calcresidual = PFX(getResidual32_neon);
0291 +        p.cu[BLOCK_4x4].calcresidual[ALIGNED]   = PFX(getResidual4_neon);
0292 +        p.cu[BLOCK_8x8].calcresidual[ALIGNED]   = PFX(getResidual8_neon);
0293 +        p.cu[BLOCK_16x16].calcresidual[ALIGNED] = PFX(getResidual16_neon);
0294 +        p.cu[BLOCK_32x32].calcresidual[ALIGNED] = PFX(getResidual32_neon);
0295  
0296          // sse_pp
0297          p.cu[BLOCK_4x4].sse_pp   = PFX(pixel_sse_pp_4x4_neon);
0298 @@ -722,31 +722,31 @@ void setupAssemblyPrimitives(EncoderPrim
0299          p.pu[LUMA_64x64].sad_x4 = PFX(sad_x4_64x64_neon);
0300  
0301          // pixel_avg_pp
0302 -        p.pu[LUMA_4x4].pixelavg_pp   = PFX(pixel_avg_pp_4x4_neon);
0303 -        p.pu[LUMA_4x8].pixelavg_pp   = PFX(pixel_avg_pp_4x8_neon);
0304 -        p.pu[LUMA_4x16].pixelavg_pp  = PFX(pixel_avg_pp_4x16_neon);
0305 -        p.pu[LUMA_8x4].pixelavg_pp   = PFX(pixel_avg_pp_8x4_neon);
0306 -        p.pu[LUMA_8x8].pixelavg_pp   = PFX(pixel_avg_pp_8x8_neon);
0307 -        p.pu[LUMA_8x16].pixelavg_pp  = PFX(pixel_avg_pp_8x16_neon);
0308 -        p.pu[LUMA_8x32].pixelavg_pp  = PFX(pixel_avg_pp_8x32_neon);
0309 -        p.pu[LUMA_12x16].pixelavg_pp = PFX(pixel_avg_pp_12x16_neon);
0310 -        p.pu[LUMA_16x4].pixelavg_pp  = PFX(pixel_avg_pp_16x4_neon);
0311 -        p.pu[LUMA_16x8].pixelavg_pp  = PFX(pixel_avg_pp_16x8_neon);
0312 -        p.pu[LUMA_16x12].pixelavg_pp = PFX(pixel_avg_pp_16x12_neon);
0313 -        p.pu[LUMA_16x16].pixelavg_pp = PFX(pixel_avg_pp_16x16_neon);
0314 -        p.pu[LUMA_16x32].pixelavg_pp = PFX(pixel_avg_pp_16x32_neon);
0315 -        p.pu[LUMA_16x64].pixelavg_pp = PFX(pixel_avg_pp_16x64_neon);
0316 -        p.pu[LUMA_24x32].pixelavg_pp = PFX(pixel_avg_pp_24x32_neon);
0317 -        p.pu[LUMA_32x8].pixelavg_pp  = PFX(pixel_avg_pp_32x8_neon);
0318 -        p.pu[LUMA_32x16].pixelavg_pp = PFX(pixel_avg_pp_32x16_neon);
0319 -        p.pu[LUMA_32x24].pixelavg_pp = PFX(pixel_avg_pp_32x24_neon);
0320 -        p.pu[LUMA_32x32].pixelavg_pp = PFX(pixel_avg_pp_32x32_neon);
0321 -        p.pu[LUMA_32x64].pixelavg_pp = PFX(pixel_avg_pp_32x64_neon);
0322 -        p.pu[LUMA_48x64].pixelavg_pp = PFX(pixel_avg_pp_48x64_neon);
0323 -        p.pu[LUMA_64x16].pixelavg_pp = PFX(pixel_avg_pp_64x16_neon);
0324 -        p.pu[LUMA_64x32].pixelavg_pp = PFX(pixel_avg_pp_64x32_neon);
0325 -        p.pu[LUMA_64x48].pixelavg_pp = PFX(pixel_avg_pp_64x48_neon);
0326 -        p.pu[LUMA_64x64].pixelavg_pp = PFX(pixel_avg_pp_64x64_neon);
0327 +        p.pu[LUMA_4x4].pixelavg_pp[ALIGNED]   = PFX(pixel_avg_pp_4x4_neon);
0328 +        p.pu[LUMA_4x8].pixelavg_pp[ALIGNED]   = PFX(pixel_avg_pp_4x8_neon);
0329 +        p.pu[LUMA_4x16].pixelavg_pp[ALIGNED]  = PFX(pixel_avg_pp_4x16_neon);
0330 +        p.pu[LUMA_8x4].pixelavg_pp[ALIGNED]   = PFX(pixel_avg_pp_8x4_neon);
0331 +        p.pu[LUMA_8x8].pixelavg_pp[ALIGNED]   = PFX(pixel_avg_pp_8x8_neon);
0332 +        p.pu[LUMA_8x16].pixelavg_pp[ALIGNED]  = PFX(pixel_avg_pp_8x16_neon);
0333 +        p.pu[LUMA_8x32].pixelavg_pp[ALIGNED]  = PFX(pixel_avg_pp_8x32_neon);
0334 +        p.pu[LUMA_12x16].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_12x16_neon);
0335 +        p.pu[LUMA_16x4].pixelavg_pp[ALIGNED]  = PFX(pixel_avg_pp_16x4_neon);
0336 +        p.pu[LUMA_16x8].pixelavg_pp[ALIGNED]  = PFX(pixel_avg_pp_16x8_neon);
0337 +        p.pu[LUMA_16x12].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_16x12_neon);
0338 +        p.pu[LUMA_16x16].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_16x16_neon);
0339 +        p.pu[LUMA_16x32].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_16x32_neon);
0340 +        p.pu[LUMA_16x64].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_16x64_neon);
0341 +        p.pu[LUMA_24x32].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_24x32_neon);
0342 +        p.pu[LUMA_32x8].pixelavg_pp[ALIGNED]  = PFX(pixel_avg_pp_32x8_neon);
0343 +        p.pu[LUMA_32x16].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_32x16_neon);
0344 +        p.pu[LUMA_32x24].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_32x24_neon);
0345 +        p.pu[LUMA_32x32].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_32x32_neon);
0346 +        p.pu[LUMA_32x64].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_32x64_neon);
0347 +        p.pu[LUMA_48x64].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_48x64_neon);
0348 +        p.pu[LUMA_64x16].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_64x16_neon);
0349 +        p.pu[LUMA_64x32].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_64x32_neon);
0350 +        p.pu[LUMA_64x48].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_64x48_neon);
0351 +        p.pu[LUMA_64x64].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_64x64_neon);
0352  
0353          // planecopy
0354          p.planecopy_cp = PFX(pixel_planecopy_cp_neon);