summaryrefslogtreecommitdiff
path: root/pkg/libass/patch/0001-Fix-ASM-build-with-nasm.patch
blob: 423b322e67f11988b1d59b685af13ab2b6d6277e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
From 311524f544cc648293ba20c99219f82bea3b3a35 Mon Sep 17 00:00:00 2001
From: Michael Forney <mforney@mforney.org>
Date: Sun, 9 Jul 2017 12:55:56 -0700
Subject: [PATCH] Fix ASM build with nasm

---
 libass/x86/be_blur.asm       | 6 +++---
 libass/x86/blend_bitmaps.asm | 4 ++--
 libass/x86/x86inc.asm        | 4 ----
 3 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/libass/x86/be_blur.asm b/libass/x86/be_blur.asm
index 007d60d..3b09176 100644
--- a/libass/x86/be_blur.asm
+++ b/libass/x86/be_blur.asm
@@ -82,8 +82,8 @@ cglobal be_blur, 5,15,9
     movzx r10, byte [r7] ; temp1 = src[0];
     movzx r11, byte [r7 + 1] ; temp2 = src[1];
     add r10, r11; temp1 += temp2
-    movd xmm0, r10; __m128i old_pix_128 = temp2;
-    movd xmm1, r11; __m128i old_sum_128 = temp1;
+    movq xmm0, r10; __m128i old_pix_128 = temp2;
+    movq xmm1, r11; __m128i old_sum_128 = temp1;
 .width_loop
     movq xmm2, [r7 + r6]; __m128i new_pix = (src+x);
     punpcklbw xmm2, xmm6 ; new_pix = _mm_unpacklo_epi8(new_pix, temp3);
@@ -152,7 +152,7 @@ cglobal be_blur, 5,15,9
     lea r12, [r4 + r3 * 2] ; unsigned char *col_sum_buf = tmp + stride * 2;
     lea r14, [r1 - 2] ; tmpreg = (w-2);
     and r14, -16 ; tmpreg &= (~15);
-    vmovdqa ymm7, [low_word_zero wrt rip]
+    vmovdqa ymm7, [rel low_word_zero]
 .first_loop
     movzx r10, byte [r7 + r6] ; int temp1 = src[x];
     lea r11, [r8 + r10] ; int temp2 = old_pix + temp1;
diff --git a/libass/x86/blend_bitmaps.asm b/libass/x86/blend_bitmaps.asm
index 3a9b2dd..5843133 100644
--- a/libass/x86/blend_bitmaps.asm
+++ b/libass/x86/blend_bitmaps.asm
@@ -215,7 +215,7 @@ cglobal mul_bitmaps, 8,12
     imul r7, r3
     add r7, r2 ; last address
     pxor xmm2, xmm2
-    movdqa xmm3, [words_255 wrt rip]
+    movdqa xmm3, [rel words_255]
     mov r9, r6
     and r9, -8 ; &= (~8);
 .height_loop:
@@ -262,7 +262,7 @@ cglobal mul_bitmaps, 8,12
     imul r7, r3
     add r7, r2 ; last address
     vpxor ymm2, ymm2
-    vmovdqa ymm3, [words_255 wrt rip]
+    vmovdqa ymm3, [rel words_255]
     mov r9, r6
     and r9, -16 ; &= (~16);
 .height_loop:
diff --git a/libass/x86/x86inc.asm b/libass/x86/x86inc.asm
index 53e104d..a8c7408 100644
--- a/libass/x86/x86inc.asm
+++ b/libass/x86/x86inc.asm
@@ -82,9 +82,6 @@
     default rel
 %endif
 
-; Always use long nops (reduces 0x90 spam in disassembly on x86_32)
-CPU amdnop
-
 ; Macros to eliminate most code duplication between x86_32 and x86_64:
 ; Currently this works only for leaf functions which load all their arguments
 ; into registers at the start, and make no other use of the stack. Luckily that
@@ -745,7 +742,6 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
 ; All subsequent functions (up to the next INIT_CPUFLAGS) is built for the specified cpu.
 ; You shouldn't need to invoke this macro directly, it's a subroutine for INIT_MMX &co.
 %macro INIT_CPUFLAGS 0-2
-    CPU amdnop
     %if %0 >= 1
         %xdefine cpuname %1
         %assign cpuflags cpuflags_%1
-- 
2.13.2