1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
|
From 311524f544cc648293ba20c99219f82bea3b3a35 Mon Sep 17 00:00:00 2001
From: Michael Forney <mforney@mforney.org>
Date: Sun, 9 Jul 2017 12:55:56 -0700
Subject: [PATCH] Fix ASM build with nasm
---
libass/x86/be_blur.asm | 6 +++---
libass/x86/blend_bitmaps.asm | 4 ++--
libass/x86/x86inc.asm | 4 ----
3 files changed, 5 insertions(+), 9 deletions(-)
diff --git a/libass/x86/be_blur.asm b/libass/x86/be_blur.asm
index 007d60d..3b09176 100644
--- a/libass/x86/be_blur.asm
+++ b/libass/x86/be_blur.asm
@@ -82,8 +82,8 @@ cglobal be_blur, 5,15,9
movzx r10, byte [r7] ; temp1 = src[0];
movzx r11, byte [r7 + 1] ; temp2 = src[1];
add r10, r11; temp1 += temp2
- movd xmm0, r10; __m128i old_pix_128 = temp2;
- movd xmm1, r11; __m128i old_sum_128 = temp1;
+ movq xmm0, r10; __m128i old_pix_128 = temp2;
+ movq xmm1, r11; __m128i old_sum_128 = temp1;
.width_loop
movq xmm2, [r7 + r6]; __m128i new_pix = (src+x);
punpcklbw xmm2, xmm6 ; new_pix = _mm_unpacklo_epi8(new_pix, temp3);
@@ -152,7 +152,7 @@ cglobal be_blur, 5,15,9
lea r12, [r4 + r3 * 2] ; unsigned char *col_sum_buf = tmp + stride * 2;
lea r14, [r1 - 2] ; tmpreg = (w-2);
and r14, -16 ; tmpreg &= (~15);
- vmovdqa ymm7, [low_word_zero wrt rip]
+ vmovdqa ymm7, [rel low_word_zero]
.first_loop
movzx r10, byte [r7 + r6] ; int temp1 = src[x];
lea r11, [r8 + r10] ; int temp2 = old_pix + temp1;
diff --git a/libass/x86/blend_bitmaps.asm b/libass/x86/blend_bitmaps.asm
index 3a9b2dd..5843133 100644
--- a/libass/x86/blend_bitmaps.asm
+++ b/libass/x86/blend_bitmaps.asm
@@ -215,7 +215,7 @@ cglobal mul_bitmaps, 8,12
imul r7, r3
add r7, r2 ; last address
pxor xmm2, xmm2
- movdqa xmm3, [words_255 wrt rip]
+ movdqa xmm3, [rel words_255]
mov r9, r6
and r9, -8 ; &= (~8);
.height_loop:
@@ -262,7 +262,7 @@ cglobal mul_bitmaps, 8,12
imul r7, r3
add r7, r2 ; last address
vpxor ymm2, ymm2
- vmovdqa ymm3, [words_255 wrt rip]
+ vmovdqa ymm3, [rel words_255]
mov r9, r6
and r9, -16 ; &= (~16);
.height_loop:
diff --git a/libass/x86/x86inc.asm b/libass/x86/x86inc.asm
index 53e104d..a8c7408 100644
--- a/libass/x86/x86inc.asm
+++ b/libass/x86/x86inc.asm
@@ -82,9 +82,6 @@
default rel
%endif
-; Always use long nops (reduces 0x90 spam in disassembly on x86_32)
-CPU amdnop
-
; Macros to eliminate most code duplication between x86_32 and x86_64:
; Currently this works only for leaf functions which load all their arguments
; into registers at the start, and make no other use of the stack. Luckily that
@@ -745,7 +742,6 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
; All subsequent functions (up to the next INIT_CPUFLAGS) is built for the specified cpu.
; You shouldn't need to invoke this macro directly, it's a subroutine for INIT_MMX &co.
%macro INIT_CPUFLAGS 0-2
- CPU amdnop
%if %0 >= 1
%xdefine cpuname %1
%assign cpuflags cpuflags_%1
--
2.13.2
|