pkgsrc/multimedia/libmpeg3/patches/patch-ah
2004-06-26 18:00:21 +00:00

310 lines
16 KiB
Text

$NetBSD: patch-ah,v 1.1 2004/06/26 18:00:21 grant Exp $
--- video/output.c.orig 2001-05-20 13:05:26.000000000 +1000
+++ video/output.c 2003-07-29 00:53:45.000000000 +1000
@@ -207,50 +207,50 @@
);
}
-static unsigned long long mpeg3_MMX_U_80 = 0x0000008000800000;
-static unsigned long long mpeg3_MMX_V_80 = 0x0000000000800080;
-static long long mpeg3_MMX_U_COEF = 0x00000058ffd30000;
-static long long mpeg3_MMX_V_COEF = 0x00000000ffea006f;
-static long long mpeg3_MMX_601_Y_COEF = 0x0000004800480048;
-static long long mpeg3_MMX_601_Y_DIFF = 0x0000000000000010;
+static unsigned long long mpeg3_MMX_U_80 = 0x0000008000800000LL;
+static unsigned long long mpeg3_MMX_V_80 = 0x0000000000800080LL;
+static long long mpeg3_MMX_U_COEF = 0x00000058ffd30000LL;
+static long long mpeg3_MMX_V_COEF = 0x00000000ffea006fLL;
+static long long mpeg3_MMX_601_Y_COEF = 0x0000004800480048LL;
+static long long mpeg3_MMX_601_Y_DIFF = 0x0000000000000010LL;
inline void mpeg3_bgra32_mmx(unsigned long y,
unsigned long u,
unsigned long v,
unsigned long *output)
{
-asm("
-/* Output will be 0x00rrggbb with the 00 trailing so this can also be used */
-/* for bgr24. */
- movd (%0), %%mm0; /* Load y 0x00000000000000yy */
- movd (%1), %%mm1; /* Load u 0x00000000000000cr */
- movq %%mm0, %%mm3; /* Copy y to temp */
- psllq $16, %%mm1; /* Shift u 0x0000000000cr0000 */
- movd (%2), %%mm2; /* Load v 0x00000000000000cb */
- psllq $16, %%mm3; /* Shift y */
- movq %%mm1, %%mm4; /* Copy u to temp */
- por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */
- psllq $16, %%mm4; /* Shift u */
- movq %%mm2, %%mm5; /* Copy v to temp */
- psllq $16, %%mm3; /* Shift y */
- por %%mm4, %%mm1; /* Overlay new u byte 0x000000cr00cr0000 */
- psllq $16, %%mm5; /* Shift v */
- por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */
- por %%mm5, %%mm2; /* Overlay new v byte 0x0000000000cb00cb */
-
-/* mm0: 0x000000yy00yy00yy mm1: 0x000000uu00uu0000 mm2: 0x0000000000vv00vv */
- psubw mpeg3_MMX_U_80, %%mm1; /* Subtract 128 from u 0x000000uu00uu0000 */
- pmullw mpeg3_MMX_U_COEF, %%mm1; /* Multiply u coeffs 0x0000uuuuuuuu0000 */
- psllw $6, %%mm0; /* Shift y coeffs 0x0000yyy0yyy0yyy0 */
- psubw mpeg3_MMX_V_80, %%mm2; /* Subtract 128 from v 0x0000000000cb00cb */
- pmullw mpeg3_MMX_V_COEF, %%mm2; /* Multiply v coeffs 0x0000crcrcrcrcrcr */
-
-/* mm0: 0x000000yy00yy00yy mm1: 0x0000uuuuuuuu0000 mm2: 0x00000000vvvvvvvv */
- paddsw %%mm1, %%mm0; /* Add u to result */
- paddsw %%mm2, %%mm0; /* Add v to result 0x0000rrrrggggbbbb */
- psraw $6, %%mm0; /* Demote precision */
- packuswb %%mm0, %%mm0; /* Pack into ARGB 0x0000000000rrggbb */
- movd %%mm0, (%3); /* Store output */
+asm("\n\
+/* Output will be 0x00rrggbb with the 00 trailing so this can also be used */\n\
+/* for bgr24. */\n\
+ movd (%0), %%mm0; /* Load y 0x00000000000000yy */\n\
+ movd (%1), %%mm1; /* Load u 0x00000000000000cr */\n\
+ movq %%mm0, %%mm3; /* Copy y to temp */\n\
+ psllq $16, %%mm1; /* Shift u 0x0000000000cr0000 */\n\
+ movd (%2), %%mm2; /* Load v 0x00000000000000cb */\n\
+ psllq $16, %%mm3; /* Shift y */\n\
+ movq %%mm1, %%mm4; /* Copy u to temp */\n\
+ por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */\n\
+ psllq $16, %%mm4; /* Shift u */\n\
+ movq %%mm2, %%mm5; /* Copy v to temp */\n\
+ psllq $16, %%mm3; /* Shift y */\n\
+ por %%mm4, %%mm1; /* Overlay new u byte 0x000000cr00cr0000 */\n\
+ psllq $16, %%mm5; /* Shift v */\n\
+ por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */\n\
+ por %%mm5, %%mm2; /* Overlay new v byte 0x0000000000cb00cb */\n\
+\n\
+/* mm0: 0x000000yy00yy00yy mm1: 0x000000uu00uu0000 mm2: 0x0000000000vv00vv */\n\
+ psubw mpeg3_MMX_U_80, %%mm1; /* Subtract 128 from u 0x000000uu00uu0000 */\n\
+ pmullw mpeg3_MMX_U_COEF, %%mm1; /* Multiply u coeffs 0x0000uuuuuuuu0000 */\n\
+ psllw $6, %%mm0; /* Shift y coeffs 0x0000yyy0yyy0yyy0 */\n\
+ psubw mpeg3_MMX_V_80, %%mm2; /* Subtract 128 from v 0x0000000000cb00cb */\n\
+ pmullw mpeg3_MMX_V_COEF, %%mm2; /* Multiply v coeffs 0x0000crcrcrcrcrcr */\n\
+\n\
+/* mm0: 0x000000yy00yy00yy mm1: 0x0000uuuuuuuu0000 mm2: 0x00000000vvvvvvvv */\n\
+ paddsw %%mm1, %%mm0; /* Add u to result */\n\
+ paddsw %%mm2, %%mm0; /* Add v to result 0x0000rrrrggggbbbb */\n\
+ psraw $6, %%mm0; /* Demote precision */\n\
+ packuswb %%mm0, %%mm0; /* Pack into ARGB 0x0000000000rrggbb */\n\
+ movd %%mm0, (%3); /* Store output */\n\
"
:
: "r" (&y), "r" (&u), "r" (&v), "r" (output));
@@ -261,39 +261,39 @@
unsigned long v,
unsigned long *output)
{
-asm("
-/* Output will be 0x00rrggbb with the 00 trailing so this can also be used */
-/* for bgr24. */
- movd (%0), %%mm0; /* Load y 0x00000000000000yy */
- psubsw mpeg3_MMX_601_Y_DIFF, %%mm0; /* Subtract 16 from y */
- movd (%1), %%mm1; /* Load u 0x00000000000000cr */
- movq %%mm0, %%mm3; /* Copy y to temp */
- psllq $16, %%mm1; /* Shift u 0x0000000000cr0000 */
- movd (%2), %%mm2; /* Load v 0x00000000000000cb */
- psllq $16, %%mm3; /* Shift y */
- movq %%mm1, %%mm4; /* Copy u to temp */
- por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */
- psllq $16, %%mm4; /* Shift u */
- movq %%mm2, %%mm5; /* Copy v to temp */
- psllq $16, %%mm3; /* Shift y */
- por %%mm4, %%mm1; /* Overlay new u byte 0x000000cr00cr0000 */
- psllq $16, %%mm5; /* Shift v */
- por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */
- por %%mm5, %%mm2; /* Overlay new v byte 0x0000000000cb00cb */
-
-/* mm0: 0x000000yy00yy00yy mm1: 0x000000uu00uu0000 mm2: 0x0000000000vv00vv */
- pmullw mpeg3_MMX_601_Y_COEF, %%mm0; /* Scale and shift y coeffs */
- psubw mpeg3_MMX_U_80, %%mm1; /* Subtract 128 from u 0x000000uu00uu0000 */
- pmullw mpeg3_MMX_U_COEF, %%mm1; /* Multiply u coeffs 0x0000uuuuuuuu0000 */
- psubw mpeg3_MMX_V_80, %%mm2; /* Subtract 128 from v 0x0000000000cb00cb */
- pmullw mpeg3_MMX_V_COEF, %%mm2; /* Multiply v coeffs 0x0000crcrcrcrcrcr */
-
-/* mm0: 0x000000yy00yy00yy mm1: 0x0000uuuuuuuu0000 mm2: 0x00000000vvvvvvvv */
- paddsw %%mm1, %%mm0; /* Add u to result */
- paddsw %%mm2, %%mm0; /* Add v to result 0x0000rrrrggggbbbb */
- psraw $6, %%mm0; /* Demote precision */
- packuswb %%mm0, %%mm0; /* Pack into ARGB 0x0000000000rrggbb */
- movd %%mm0, (%3); /* Store output */
+asm("\n\
+/* Output will be 0x00rrggbb with the 00 trailing so this can also be used */\n\
+/* for bgr24. */\n\
+ movd (%0), %%mm0; /* Load y 0x00000000000000yy */\n\
+ psubsw mpeg3_MMX_601_Y_DIFF, %%mm0; /* Subtract 16 from y */\n\
+ movd (%1), %%mm1; /* Load u 0x00000000000000cr */\n\
+ movq %%mm0, %%mm3; /* Copy y to temp */\n\
+ psllq $16, %%mm1; /* Shift u 0x0000000000cr0000 */\n\
+ movd (%2), %%mm2; /* Load v 0x00000000000000cb */\n\
+ psllq $16, %%mm3; /* Shift y */\n\
+ movq %%mm1, %%mm4; /* Copy u to temp */\n\
+ por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */\n\
+ psllq $16, %%mm4; /* Shift u */\n\
+ movq %%mm2, %%mm5; /* Copy v to temp */\n\
+ psllq $16, %%mm3; /* Shift y */\n\
+ por %%mm4, %%mm1; /* Overlay new u byte 0x000000cr00cr0000 */\n\
+ psllq $16, %%mm5; /* Shift v */\n\
+ por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */\n\
+ por %%mm5, %%mm2; /* Overlay new v byte 0x0000000000cb00cb */\n\
+\n\
+/* mm0: 0x000000yy00yy00yy mm1: 0x000000uu00uu0000 mm2: 0x0000000000vv00vv */\n\
+ pmullw mpeg3_MMX_601_Y_COEF, %%mm0; /* Scale and shift y coeffs */\n\
+ psubw mpeg3_MMX_U_80, %%mm1; /* Subtract 128 from u 0x000000uu00uu0000 */\n\
+ pmullw mpeg3_MMX_U_COEF, %%mm1; /* Multiply u coeffs 0x0000uuuuuuuu0000 */\n\
+ psubw mpeg3_MMX_V_80, %%mm2; /* Subtract 128 from v 0x0000000000cb00cb */\n\
+ pmullw mpeg3_MMX_V_COEF, %%mm2; /* Multiply v coeffs 0x0000crcrcrcrcrcr */\n\
+\n\
+/* mm0: 0x000000yy00yy00yy mm1: 0x0000uuuuuuuu0000 mm2: 0x00000000vvvvvvvv */\n\
+ paddsw %%mm1, %%mm0; /* Add u to result */\n\
+ paddsw %%mm2, %%mm0; /* Add v to result 0x0000rrrrggggbbbb */\n\
+ psraw $6, %%mm0; /* Demote precision */\n\
+ packuswb %%mm0, %%mm0; /* Pack into ARGB 0x0000000000rrggbb */\n\
+ movd %%mm0, (%3); /* Store output */\n\
"
:
: "r" (&y), "r" (&u), "r" (&v), "r" (output));
@@ -309,38 +309,38 @@
unsigned long v,
unsigned long *output)
{
-asm("
-/* Output will be 0x00bbggrr with the 00 trailing so this can also be used */
-/* for rgb24. */
- movd (%0), %%mm0; /* Load y 0x00000000000000yy */
- movd (%1), %%mm1; /* Load v 0x00000000000000vv */
- movq %%mm0, %%mm3; /* Copy y to temp */
- psllq $16, %%mm1; /* Shift v 0x0000000000vv0000 */
- movd (%2), %%mm2; /* Load u 0x00000000000000uu */
- psllq $16, %%mm3; /* Shift y */
- movq %%mm1, %%mm4; /* Copy v to temp */
- por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */
- psllq $16, %%mm4; /* Shift v */
- movq %%mm2, %%mm5; /* Copy u to temp */
- psllq $16, %%mm3; /* Shift y */
- por %%mm4, %%mm1; /* Overlay new v byte 0x000000vv00vv0000 */
- psllq $16, %%mm5; /* Shift u */
- por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */
- por %%mm5, %%mm2; /* Overlay new u byte 0x0000000000uu00uu */
-
-/* mm0: 0x000000yy00yy00yy mm1: 0x000000vv00vv0000 mm2: 0x0000000000uu00uu */
- psubw mpeg3_MMX_V_80_RGB, %%mm1; /* Subtract 128 from v 0x000000vv00vv0000 */
- pmullw mpeg3_MMX_V_COEF_RGB, %%mm1; /* Multiply v coeffs 0x0000vvvvvvvv0000 */
- psllw $6, %%mm0; /* Shift y coeffs 0x0000yyy0yyy0yyy0 */
- psubw mpeg3_MMX_U_80_RGB, %%mm2; /* Subtract 128 from u 0x0000000000uu00uu */
- pmullw mpeg3_MMX_U_COEF_RGB, %%mm2; /* Multiply u coeffs 0x0000uuuuuuuuuuuu */
-
-/* mm0: 0x000000yy00yy00yy mm1: 0x0000vvvvvvvv0000 mm2: 0x00000000uuuuuuuu */
- paddsw %%mm1, %%mm0; /* Add v to result */
- paddsw %%mm2, %%mm0; /* Add u to result 0x0000bbbbggggrrrr */
- psraw $6, %%mm0; /* Demote precision */
- packuswb %%mm0, %%mm0; /* Pack into RGBA 0x0000000000bbggrr */
- movd %%mm0, (%3); /* Store output */
+asm("\n\
+/* Output will be 0x00bbggrr with the 00 trailing so this can also be used */\n\
+/* for rgb24. */\n\
+ movd (%0), %%mm0; /* Load y 0x00000000000000yy */\n\
+ movd (%1), %%mm1; /* Load v 0x00000000000000vv */\n\
+ movq %%mm0, %%mm3; /* Copy y to temp */\n\
+ psllq $16, %%mm1; /* Shift v 0x0000000000vv0000 */\n\
+ movd (%2), %%mm2; /* Load u 0x00000000000000uu */\n\
+ psllq $16, %%mm3; /* Shift y */\n\
+ movq %%mm1, %%mm4; /* Copy v to temp */\n\
+ por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */\n\
+ psllq $16, %%mm4; /* Shift v */\n\
+ movq %%mm2, %%mm5; /* Copy u to temp */\n\
+ psllq $16, %%mm3; /* Shift y */\n\
+ por %%mm4, %%mm1; /* Overlay new v byte 0x000000vv00vv0000 */\n\
+ psllq $16, %%mm5; /* Shift u */\n\
+ por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */\n\
+ por %%mm5, %%mm2; /* Overlay new u byte 0x0000000000uu00uu */\n\
+\n\
+/* mm0: 0x000000yy00yy00yy mm1: 0x000000vv00vv0000 mm2: 0x0000000000uu00uu */\n\
+ psubw mpeg3_MMX_V_80_RGB, %%mm1; /* Subtract 128 from v 0x000000vv00vv0000 */\n\
+ pmullw mpeg3_MMX_V_COEF_RGB, %%mm1; /* Multiply v coeffs 0x0000vvvvvvvv0000 */\n\
+ psllw $6, %%mm0; /* Shift y coeffs 0x0000yyy0yyy0yyy0 */\n\
+ psubw mpeg3_MMX_U_80_RGB, %%mm2; /* Subtract 128 from u 0x0000000000uu00uu */\n\
+ pmullw mpeg3_MMX_U_COEF_RGB, %%mm2; /* Multiply u coeffs 0x0000uuuuuuuuuuuu */\n\
+\n\
+/* mm0: 0x000000yy00yy00yy mm1: 0x0000vvvvvvvv0000 mm2: 0x00000000uuuuuuuu */\n\
+ paddsw %%mm1, %%mm0; /* Add v to result */\n\
+ paddsw %%mm2, %%mm0; /* Add u to result 0x0000bbbbggggrrrr */\n\
+ psraw $6, %%mm0; /* Demote precision */\n\
+ packuswb %%mm0, %%mm0; /* Pack into RGBA 0x0000000000bbggrr */\n\
+ movd %%mm0, (%3); /* Store output */\n\
"
:
: "r" (&y), "r" (&v), "r" (&u), "r" (output));
@@ -351,39 +351,39 @@
unsigned long v,
unsigned long *output)
{
-asm("
-/* Output will be 0x00bbggrr with the 00 trailing so this can also be used */
-/* for rgb24. */
- movd (%0), %%mm0; /* Load y 0x00000000000000yy */
- psubsw mpeg3_MMX_601_Y_DIFF, %%mm0; /* Subtract 16 from y */
- movd (%1), %%mm1; /* Load v 0x00000000000000vv */
- movq %%mm0, %%mm3; /* Copy y to temp */
- psllq $16, %%mm1; /* Shift v 0x0000000000vv0000 */
- movd (%2), %%mm2; /* Load u 0x00000000000000uu */
- psllq $16, %%mm3; /* Shift y */
- movq %%mm1, %%mm4; /* Copy v to temp */
- por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */
- psllq $16, %%mm4; /* Shift v */
- movq %%mm2, %%mm5; /* Copy u to temp */
- psllq $16, %%mm3; /* Shift y */
- por %%mm4, %%mm1; /* Overlay new v byte 0x000000vv00vv0000 */
- psllq $16, %%mm5; /* Shift u */
- por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */
- por %%mm5, %%mm2; /* Overlay new u byte 0x0000000000uu00uu */
-
-/* mm0: 0x000000yy00yy00yy mm1: 0x000000vv00vv0000 mm2: 0x0000000000uu00uu */
- pmullw mpeg3_MMX_601_Y_COEF, %%mm0; /* Scale y coeffs */
- psubw mpeg3_MMX_V_80_RGB, %%mm1; /* Subtract 128 from v 0x000000vv00vv0000 */
- pmullw mpeg3_MMX_V_COEF_RGB, %%mm1; /* Multiply v coeffs 0x0000vvvvvvvv0000 */
- psubw mpeg3_MMX_U_80_RGB, %%mm2; /* Subtract 128 from u 0x0000000000uu00uu */
- pmullw mpeg3_MMX_U_COEF_RGB, %%mm2; /* Multiply u coeffs 0x0000uuuuuuuuuuuu */
-
-/* mm0: 0x000000yy00yy00yy mm1: 0x0000vvvvvvvv0000 mm2: 0x00000000uuuuuuuu */
- paddsw %%mm1, %%mm0; /* Add v to result */
- paddsw %%mm2, %%mm0; /* Add u to result 0x0000bbbbggggrrrr */
- psraw $6, %%mm0; /* Demote precision */
- packuswb %%mm0, %%mm0; /* Pack into RGBA 0x0000000000bbggrr */
- movd %%mm0, (%3); /* Store output */
+asm("\n\
+/* Output will be 0x00bbggrr with the 00 trailing so this can also be used */\n\
+/* for rgb24. */\n\
+ movd (%0), %%mm0; /* Load y 0x00000000000000yy */\n\
+ psubsw mpeg3_MMX_601_Y_DIFF, %%mm0; /* Subtract 16 from y */\n\
+ movd (%1), %%mm1; /* Load v 0x00000000000000vv */\n\
+ movq %%mm0, %%mm3; /* Copy y to temp */\n\
+ psllq $16, %%mm1; /* Shift v 0x0000000000vv0000 */\n\
+ movd (%2), %%mm2; /* Load u 0x00000000000000uu */\n\
+ psllq $16, %%mm3; /* Shift y */\n\
+ movq %%mm1, %%mm4; /* Copy v to temp */\n\
+ por %%mm3, %%mm0; /* Overlay new y byte 0x0000000000yy00yy */\n\
+ psllq $16, %%mm4; /* Shift v */\n\
+ movq %%mm2, %%mm5; /* Copy u to temp */\n\
+ psllq $16, %%mm3; /* Shift y */\n\
+ por %%mm4, %%mm1; /* Overlay new v byte 0x000000vv00vv0000 */\n\
+ psllq $16, %%mm5; /* Shift u */\n\
+ por %%mm3, %%mm0; /* Overlay new y byte 0x000000yy00yy00yy */\n\
+ por %%mm5, %%mm2; /* Overlay new u byte 0x0000000000uu00uu */\n\
+\n\
+/* mm0: 0x000000yy00yy00yy mm1: 0x000000vv00vv0000 mm2: 0x0000000000uu00uu */\n\
+ pmullw mpeg3_MMX_601_Y_COEF, %%mm0; /* Scale y coeffs */\n\
+ psubw mpeg3_MMX_V_80_RGB, %%mm1; /* Subtract 128 from v 0x000000vv00vv0000 */\n\
+ pmullw mpeg3_MMX_V_COEF_RGB, %%mm1; /* Multiply v coeffs 0x0000vvvvvvvv0000 */\n\
+ psubw mpeg3_MMX_U_80_RGB, %%mm2; /* Subtract 128 from u 0x0000000000uu00uu */\n\
+ pmullw mpeg3_MMX_U_COEF_RGB, %%mm2; /* Multiply u coeffs 0x0000uuuuuuuuuuuu */\n\
+\n\
+/* mm0: 0x000000yy00yy00yy mm1: 0x0000vvvvvvvv0000 mm2: 0x00000000uuuuuuuu */\n\
+ paddsw %%mm1, %%mm0; /* Add v to result */\n\
+ paddsw %%mm2, %%mm0; /* Add u to result 0x0000bbbbggggrrrr */\n\
+ psraw $6, %%mm0; /* Demote precision */\n\
+ packuswb %%mm0, %%mm0; /* Pack into RGBA 0x0000000000bbggrr */\n\
+ movd %%mm0, (%3); /* Store output */\n\
"
:
: "r" (&y), "r" (&v), "r" (&u), "r" (output));