10 files changed, 142 insertions, 159 deletions
diff --git a/pkg/qbe/patch/0001-arm64-Handle-slots.patch b/pkg/qbe/patch/0001-arm64-Handle-slots.patch
deleted file mode 100644
index 49f266e8..00000000
--- a/pkg/qbe/patch/0001-arm64-Handle-slots.patch
+++ /dev/null
@@ -1,36 +0,0 @@
-From c8849e9c7b382f92326434a6522a26829a6e20f8 Mon Sep 17 00:00:00 2001
-From: Michael Forney <mforney@mforney.org>
-Date: Wed, 8 May 2019 18:29:28 -0700
-Subject: [PATCH] arm64: Handle slots
-
----
- arm64/emit.c | 13 +++++++++++--
- 1 file changed, 11 insertions(+), 2 deletions(-)
-
-diff --git a/arm64/emit.c b/arm64/emit.c
-index 59e1aae..9cc4a64 100644
---- a/arm64/emit.c
-+++ b/arm64/emit.c
-@@ -220,8 +220,17 @@ emitf(char *s, Ins *i, E *e)
- 			c = *s++;
- 			assert(c == '0' || c == '1');
- 			r = i->arg[c - '0'];
--			assert(isreg(r) && "TODO emit non reg addresses");
--			fprintf(e->f, "[%s]", rname(r.val, Kl));
-+			switch (rtype(r)) {
-+			default:
-+				die("TODO emit non reg addresses");
-+			case RTmp:
-+				assert(isreg(r));
-+				fprintf(e->f, "[%s]", rname(r.val, Kl));
-+				break;
-+			case RSlot:
-+				fprintf(e->f, "[sp, %"PRIu64"]", slot(r.val, e));
-+				break;
-+			}
- 			break;
- 		}
- 	}
--- 
-2.21.0
-
diff --git a/pkg/qbe/patch/0003-arm64-Prevent-stack-clobber-when-passing-structures-.patch b/pkg/qbe/patch/0001-arm64-prevent-stack-clobber-when-passing-structures-.patch
index da919376..ca21a84f 100644
--- a/pkg/qbe/patch/0003-arm64-Prevent-stack-clobber-when-passing-structures-.patch
+++ b/pkg/qbe/patch/0001-arm64-prevent-stack-clobber-when-passing-structures-.patch
@@ -1,7 +1,7 @@
-From ffd2585ef162a6dcc42011a33bd69687048ab4a8 Mon Sep 17 00:00:00 2001
+From 6c1744026545445511f1c500653bab859bc79b50 Mon Sep 17 00:00:00 2001
 From: Michael Forney <mforney@mforney.org>
 Date: Sat, 11 May 2019 19:38:13 -0700
-Subject: [PATCH] arm64: Prevent stack clobber when passing structures < 8
+Subject: [PATCH] arm64: prevent stack clobber when passing structures < 8
  bytes
 
 ---
@@ -9,10 +9,10 @@ Subject: [PATCH] arm64: Prevent stack clobber when passing structures < 8
  1 file changed, 3 insertions(+), 1 deletion(-)
 
 diff --git a/arm64/abi.c b/arm64/abi.c
-index f5b605a..4e80db2 100644
+index 8209944..f37c892 100644
 --- a/arm64/abi.c
 +++ b/arm64/abi.c
-@@ -308,12 +308,14 @@ stkblob(Ref r, Class *c, Fn *fn, Insl **ilp)
+@@ -312,12 +312,14 @@ stkblob(Ref r, Class *c, Fn *fn, Insl **ilp)
  {
  	Insl *il;
  	int al;
@@ -29,5 +29,5 @@ index f5b605a..4e80db2 100644
  	*ilp = il;
  }
 -- 
-2.21.0
+2.32.0
 
diff --git a/pkg/qbe/patch/0002-arm64-Handle-slots-in-Ocopy-operands.patch b/pkg/qbe/patch/0002-arm64-Handle-slots-in-Ocopy-operands.patch
deleted file mode 100644
index 5dbd4e6e..00000000
--- a/pkg/qbe/patch/0002-arm64-Handle-slots-in-Ocopy-operands.patch
+++ /dev/null
@@ -1,56 +0,0 @@
-From d9d890583d93f1bfdc38e4aa890350d4111b848a Mon Sep 17 00:00:00 2001
-From: Michael Forney <mforney@mforney.org>
-Date: Thu, 9 May 2019 23:32:15 -0700
-Subject: [PATCH] arm64: Handle slots in Ocopy operands
-
----
- arm64/emit.c | 25 +++++++++++++++++++++----
- 1 file changed, 21 insertions(+), 4 deletions(-)
-
-diff --git a/arm64/emit.c b/arm64/emit.c
-index 9ebcfcd..5a3fe55 100644
---- a/arm64/emit.c
-+++ b/arm64/emit.c
-@@ -218,8 +218,8 @@ emitf(char *s, Ins *i, E *e)
- 			break;
- 		case 'M':
- 			c = *s++;
--			assert(c == '0' || c == '1');
--			r = i->arg[c - '0'];
-+			assert(c == '0' || c == '1' || c == '=');
-+			r = c == '=' ? i->to : i->arg[c - '0'];
- 			switch (rtype(r)) {
- 			default:
- 				die("TODO emit non reg addresses");
-@@ -307,9 +307,26 @@ emitins(Ins *i, E *e)
- 	case Ocopy:
- 		if (req(i->to, i->arg[0]))
- 			break;
--		if (rtype(i->arg[0]) != RCon)
-+		if (rtype(i->to) == RSlot) {
-+			if (rtype(i->arg[0]) == RSlot) {
-+				emitf("ldr %?, %M0\n\tstr %?, %M=", i, e);
-+			} else {
-+				assert(isreg(i->arg[0]));
-+				emitf("str %0, %M=", i, e);
-+			}
-+			break;
-+		}
-+		assert(isreg(i->to));
-+		switch (rtype(i->arg[0])) {
-+		case RCon:
-+			loadcon(&e->fn->con[i->arg[0].val], i->to.val, i->cls, e->f);
-+			break;
-+		case RSlot:
-+			emitf("ldr %=, %M0", i, e);
-+			break;
-+		default:
- 			goto Table;
--		loadcon(&e->fn->con[i->arg[0].val], i->to.val, i->cls, e->f);
-+		}
- 		break;
- 	case Oaddr:
- 		assert(rtype(i->arg[0]) == RSlot);
--- 
-2.32.0
-
diff --git a/pkg/qbe/patch/0004-Increase-NString-to-96.patch b/pkg/qbe/patch/0002-increase-NString-to-72.patch
index 9759d2a7..e7c12ea2 100644
--- a/pkg/qbe/patch/0004-Increase-NString-to-96.patch
+++ b/pkg/qbe/patch/0002-increase-NString-to-72.patch
@@ -1,14 +1,14 @@
-From 948e221acc92d002662ffa609a252a3410a93001 Mon Sep 17 00:00:00 2001
+From 294fedc93dbeac68f0beec1eeea62be30227b025 Mon Sep 17 00:00:00 2001
 From: Michael Forney <mforney@mforney.org>
 Date: Fri, 31 May 2019 13:31:04 -0700
-Subject: [PATCH] Increase NString to 96
+Subject: [PATCH] increase NString to 72
 
 ---
  all.h | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)
 
 diff --git a/all.h b/all.h
-index 7f843a9..805a346 100644
+index 37980d3..f49b4ef 100644
 --- a/all.h
 +++ b/all.h
 @@ -31,7 +31,7 @@ typedef struct Dat Dat;
@@ -16,10 +16,10 @@ index 7f843a9..805a346 100644
  
  enum {
 -	NString = 64,
-+	NString = 96,
++	NString = 72,
  	NIns    = 1 << 20,
  	NAlign  = 3,
  	NField  = 32,
 -- 
-2.28.0
+2.32.0
 
diff --git a/pkg/qbe/patch/0005-fold-Don-t-fold-invalid-addition-subtraction-rather-.patch b/pkg/qbe/patch/0003-fold-don-t-fold-invalid-addition-subtraction-rather-.patch
index bafa164e..b2bc4039 100644
--- a/pkg/qbe/patch/0005-fold-Don-t-fold-invalid-addition-subtraction-rather-.patch
+++ b/pkg/qbe/patch/0003-fold-don-t-fold-invalid-addition-subtraction-rather-.patch
@@ -1,7 +1,7 @@
-From 264b07e0cb0ce869cfcdab0a3e66c92a99de5dee Mon Sep 17 00:00:00 2001
+From b3c8dfafafd7e749a12227c951f3faebc2572710 Mon Sep 17 00:00:00 2001
 From: Michael Forney <mforney@mforney.org>
 Date: Sun, 16 Jun 2019 01:38:27 -0700
-Subject: [PATCH] fold: Don't fold invalid addition/subtraction rather than
+Subject: [PATCH] fold: don't fold invalid addition/subtraction rather than
  failing
 
 This may happen in a branch QBE doesn't realize is unreachable,
@@ -29,7 +29,7 @@ for example (simplified from real code found in ncurses)
  1 file changed, 4 insertions(+), 7 deletions(-)
 
 diff --git a/fold.c b/fold.c
-index 0a3945f..9e1a12d 100644
+index 2081a72..50a862e 100644
 --- a/fold.c
 +++ b/fold.c
 @@ -343,7 +343,7 @@ foldint(Con *res, int op, int w, Con *cl, Con *cr)
@@ -62,5 +62,5 @@ index 0a3945f..9e1a12d 100644
  	case Oadd:  x = l.u + r.u; break;
  	case Osub:  x = l.u - r.u; break;
 -- 
-2.22.0
+2.32.0
 
diff --git a/pkg/qbe/patch/0004-gas-put-zero-data-into-.bss.patch b/pkg/qbe/patch/0004-gas-put-zero-data-into-.bss.patch
new file mode 100644
index 00000000..60813a97
--- /dev/null
+++ b/pkg/qbe/patch/0004-gas-put-zero-data-into-.bss.patch
@@ -0,0 +1,102 @@
+From e81a67355f1a53739cbfd9797ac9d687efff05e8 Mon Sep 17 00:00:00 2001
+From: Michael Forney <mforney@mforney.org>
+Date: Tue, 28 Sep 2021 11:25:05 -0700
+Subject: [PATCH] gas: put zero data into .bss
+
+---
+ gas.c | 56 ++++++++++++++++++++++++++++++++++++++++----------------
+ 1 file changed, 40 insertions(+), 16 deletions(-)
+
+diff --git a/gas.c b/gas.c
+index 8c31794..ce082dc 100644
+--- a/gas.c
++++ b/gas.c
+@@ -3,10 +3,26 @@
+ 
+ char *gasloc, *gassym;
+ 
++static void
++startdat(FILE *f, char *section, char *name, int align, int export, int zero)
++{
++	char *p;
++
++	if (section)
++		fprintf(f, ".section %s\n", section);
++	else
++		fprintf(f, "%s\n", zero ? ".bss" : ".data");
++	fprintf(f, ".balign %d\n", align);
++	p = name[0] == '"' ? "" : gassym;
++	if (export)
++		fprintf(f, ".globl %s%s\n", p, name);
++	fprintf(f, "%s%s:\n", p, name);
++}
++
+ void
+ gasemitdat(Dat *d, FILE *f)
+ {
+-	static int aligned;
++	static int aligned, export;
+ 	static char *dtoa[] = {
+ 		[DAlign] = ".balign",
+ 		[DB] = "\t.byte",
+@@ -14,34 +30,42 @@ gasemitdat(Dat *d, FILE *f)
+ 		[DW] = "\t.int",
+ 		[DL] = "\t.quad"
+ 	};
++	static char *name, *section;
++	static int64_t zero;
+ 	char *p;
+ 
+ 	switch (d->type) {
+ 	case DStart:
+ 		aligned = 0;
+-		if (d->u.str) {
+-			fprintf(f, ".section %s\n", d->u.str);
+-		} else {
+-			fprintf(f, ".data\n");
+-		}
++		zero = 0;
++		section = d->u.str;
+ 		break;
+ 	case DEnd:
++		if (zero != -1) {
++			startdat(f, section, name, aligned, export, 1);
++			fprintf(f, "\t.fill %"PRId64",1,0\n", zero);
++		}
++		break;
++	case DAlign:
++		aligned = d->u.num;
+ 		break;
+ 	case DName:
+-		if (!aligned)
+-			fprintf(f, ".balign 8\n");
+-		p = d->u.str[0] == '"' ? "" : gassym;
+-		if (d->export)
+-			fprintf(f, ".globl %s%s\n", p, d->u.str);
+-		fprintf(f, "%s%s:\n", p, d->u.str);
++		name = d->u.str;
++		export = d->export;
+ 		break;
+ 	case DZ:
+-		fprintf(f, "\t.fill %"PRId64",1,0\n", d->u.num);
++		if (zero != -1)
++			zero += d->u.num;
++		else
++			fprintf(f, "\t.fill %"PRId64",1,0\n", d->u.num);
+ 		break;
+ 	default:
+-		if (d->type == DAlign)
+-			aligned = 1;
+-
++		if (zero != -1) {
++			startdat(f, section, name, aligned, export, 0);
++			if (zero > 0)
++				fprintf(f, "\t.fill %"PRId64",1,0\n", zero);
++			zero = -1;
++		}
+ 		if (d->isstr) {
+ 			if (d->type != DB)
+ 				err("strings only supported for 'b' currently");
+-- 
+2.32.0
+
diff --git a/pkg/qbe/patch/0006-amd64-optimize-loading-0-into-floating-point-registe.patch b/pkg/qbe/patch/0005-amd64-optimize-loading-0-into-registers.patch
index af63220f..07aa7e6b 100644
--- a/pkg/qbe/patch/0006-amd64-optimize-loading-0-into-floating-point-registe.patch
+++ b/pkg/qbe/patch/0005-amd64-optimize-loading-0-into-registers.patch
@@ -1,39 +1,46 @@
-From 1e0c08a288a5f7993dd8565ace35f1ecfc614544 Mon Sep 17 00:00:00 2001
+From a11da13e22a694f8fe4a81d894d433f50ce4af6b Mon Sep 17 00:00:00 2001
 From: =?UTF-8?q?=C3=89rico=20Nogueira?= <erico.erc@gmail.com>
 Date: Sun, 11 Jul 2021 19:19:12 -0300
-Subject: [PATCH] amd64: optimize loading +0 into floating point registers
+Subject: [PATCH] amd64: optimize loading 0 into registers
 
 Loading +0 into a floating point register can be done using pxor or
 xorps instructions. Per [1], we went with pxor because it can run on all
 vector ALU ports, even if it's one byte longer.
 
-To implement it, we special case fixarg to emit Ocopy when the value is
-+0, and emitins to treat Ocopy for floating point args specially. Since
-0. == -0., we can't check if bits.d or bits.f are equal to 0. To avoid
-requiring signbit(), we inspect bits.i directly; this assumes the bits
-union is always zero-initialized.
+Similarly, an integer register can be zeroed with xor, which has a
+smaller encoding than mov with 0 immediate.
+
+To implement this, we special case fixarg to allow Ocopy when the
+value is +0 for floating point, and change emitins to emit pxor/xor
+when it encounters a copy from 0.
+
+Co-authored-by: Michael Forney <mforney@mforney.org>
 
 [1] https://stackoverflow.com/questions/39811577/does-using-mix-of-pxor-and-xorps-affect-performance/39828976
 ---
- amd64/emit.c |  8 ++++++++
+ amd64/emit.c | 12 ++++++++++++
  amd64/isel.c | 12 +++++++-----
- 2 files changed, 15 insertions(+), 5 deletions(-)
+ 2 files changed, 19 insertions(+), 5 deletions(-)
 
 diff --git a/amd64/emit.c b/amd64/emit.c
-index 09b90d5..311b8c6 100644
+index a888000..7aeeff5 100644
 --- a/amd64/emit.c
 +++ b/amd64/emit.c
-@@ -443,6 +443,14 @@ emitins(Ins i, Fn *fn, FILE *f)
+@@ -443,6 +443,18 @@ emitins(Ins i, Fn *fn, FILE *f)
  		if (req(i.to, i.arg[0]))
  			break;
  		t0 = rtype(i.arg[0]);
-+		if (isreg(i.to)
-+		&& KBASE(i.cls) == 1
-+		&& t0 == RCon
-+		&& fn->con[i.arg[0].val].type == CBits) {
-+			assert(fn->con[i.arg[0].val].bits.i == 0);
-+			emitf("pxor %D=, %D=", &i, fn, f);
-+			break;
++		if (t0 == RCon
++		&& fn->con[i.arg[0].val].type == CBits
++		&& fn->con[i.arg[0].val].bits.i == 0) {
++			if (isreg(i.to)) {
++				if (KBASE(i.cls) == 0)
++					emitf("xor%k %=, %=", &i, fn, f);
++				else
++					emitf("pxor %D=, %D=", &i, fn, f);
++				break;
++			}
++			i.cls = KWIDE(i.cls) ? Kl : Kw;
 +		}
  		if (i.cls == Kl
  		&& t0 == RCon
diff --git a/pkg/qbe/patch/0007-amd64-optimize-zeroing-of-integer-registers-as-well.patch b/pkg/qbe/patch/0007-amd64-optimize-zeroing-of-integer-registers-as-well.patch
deleted file mode 100644
index e8e73f4f..00000000
--- a/pkg/qbe/patch/0007-amd64-optimize-zeroing-of-integer-registers-as-well.patch
+++ /dev/null
@@ -1,34 +0,0 @@
-From 1b61d04de8d62821eec915eec6bde6b9a0a2d1c9 Mon Sep 17 00:00:00 2001
-From: Michael Forney <mforney@mforney.org>
-Date: Mon, 30 Aug 2021 13:40:48 -0700
-Subject: [PATCH] amd64: optimize zeroing of integer registers as well
-
----
- amd64/emit.c | 10 ++++++----
- 1 file changed, 6 insertions(+), 4 deletions(-)
-
-diff --git a/amd64/emit.c b/amd64/emit.c
-index 311b8c6..015b921 100644
---- a/amd64/emit.c
-+++ b/amd64/emit.c
-@@ -444,11 +444,13 @@ emitins(Ins i, Fn *fn, FILE *f)
- 			break;
- 		t0 = rtype(i.arg[0]);
- 		if (isreg(i.to)
--		&& KBASE(i.cls) == 1
- 		&& t0 == RCon
--		&& fn->con[i.arg[0].val].type == CBits) {
--			assert(fn->con[i.arg[0].val].bits.i == 0);
--			emitf("pxor %D=, %D=", &i, fn, f);
-+		&& fn->con[i.arg[0].val].type == CBits
-+		&& fn->con[i.arg[0].val].bits.i == 0) {
-+			if (KBASE(i.cls) == 0)
-+				emitf("xor%k %=, %=", &i, fn, f);
-+			else
-+				emitf("pxor %D=, %D=", &i, fn, f);
- 			break;
- 		}
- 		if (i.cls == Kl
--- 
-2.32.0
-
diff --git a/pkg/qbe/src b/pkg/qbe/src
-Subproject 6a69210b0faf33ad4feb6adc97d094022c52097
+Subproject 900805a8fe5cfa799966c4ef221524e967c44ca
diff --git a/pkg/qbe/ver b/pkg/qbe/ver
index 0009cc0a..60ebdd5e 100644
--- a/pkg/qbe/ver
+++ b/pkg/qbe/ver
@@ -1 +1 @@
-6a69210b0f r0
+900805a8fe r0