Skip to content

Commit a595933

Browse files
committed
crypto/sha1: provide optimised assembly for riscv64
Provide an optimised assembly implementation of sha1 for riscv64. goos: linux goarch: riscv64 pkg: crypto/sha1 cpu: Spacemit(R) X60 │ oldsha1 │ newsha1 │ │ sec/op │ sec/op vs base │ Hash8Bytes/New-8 2.136µ ± 0% 1.173µ ± 0% -45.09% (p=0.000 n=8) Hash8Bytes/Sum-8 2.079µ ± 0% 1.116µ ± 0% -46.32% (p=0.000 n=8) Hash320Bytes/New-8 10.704µ ± 0% 4.954µ ± 0% -53.72% (p=0.000 n=8) Hash320Bytes/Sum-8 10.645µ ± 0% 4.872µ ± 0% -54.23% (p=0.000 n=8) Hash1K/New-8 29.66µ ± 0% 13.38µ ± 0% -54.90% (p=0.000 n=8) Hash1K/Sum-8 29.63µ ± 0% 13.24µ ± 0% -55.32% (p=0.000 n=8) Hash8K/New-8 226.8µ ± 1% 104.7µ ± 2% -53.84% (p=0.000 n=8) Hash8K/Sum-8 226.7µ ± 1% 102.9µ ± 1% -54.62% (p=0.000 n=8) geomean 19.72µ 9.387µ -52.40% │ oldsha1 │ newsha1 │ │ B/s │ B/s vs base │ Hash8Bytes/New-8 3.572Mi ± 0% 6.504Mi ± 0% +82.11% (p=0.000 n=8) Hash8Bytes/Sum-8 3.672Mi ± 0% 6.838Mi ± 0% +86.23% (p=0.000 n=8) Hash320Bytes/New-8 28.51Mi ± 0% 61.60Mi ± 0% +116.02% (p=0.000 n=8) Hash320Bytes/Sum-8 28.67Mi ± 0% 62.64Mi ± 0% +118.51% (p=0.000 n=8) Hash1K/New-8 32.92Mi ± 0% 73.00Mi ± 0% +121.74% (p=0.000 n=8) Hash1K/Sum-8 32.96Mi ± 0% 73.76Mi ± 0% +123.78% (p=0.000 n=8) Hash8K/New-8 34.44Mi ± 1% 74.61Mi ± 2% +116.61% (p=0.000 n=8) Hash8K/Sum-8 34.46Mi ± 1% 75.93Mi ± 1% +120.37% (p=0.000 n=8) geomean 18.51Mi 38.89Mi +110.07% Change-Id: I3d4d05fe19872412fdf77a337395e0bf84c41dd5 Reviewed-on: https://go-review.googlesource.com/c/go/+/732560 Reviewed-by: Roland Shoemaker <roland@golang.org> Reviewed-by: Dmitri Shuralyov <dmitshur@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Joel Sing <joel@sing.id.au>
1 parent 86f7959 commit a595933

3 files changed

Lines changed: 227 additions & 2 deletions

File tree

src/crypto/sha1/sha1block_decl.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// Use of this source code is governed by a BSD-style
33
// license that can be found in the LICENSE file.
44

5-
//go:build (386 || arm || loong64) && !purego
5+
//go:build (386 || arm || loong64 || riscv64) && !purego
66

77
package sha1
88

src/crypto/sha1/sha1block_generic.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// Use of this source code is governed by a BSD-style
33
// license that can be found in the LICENSE file.
44

5-
//go:build (!386 && !amd64 && !arm && !arm64 && !loong64 && !s390x) || purego
5+
//go:build (!386 && !amd64 && !arm && !arm64 && !loong64 && !riscv64 && !s390x) || purego
66

77
package sha1
88

Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
// Copyright 2025 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
//go:build !purego
6+
7+
#include "textflag.h"
8+
9+
#define LOAD(index) \
10+
MOVBU ((index*4)+0)(X29), X5; \
11+
MOVBU ((index*4)+1)(X29), X6; \
12+
MOVBU ((index*4)+2)(X29), X7; \
13+
MOVBU ((index*4)+3)(X29), X8; \
14+
SLL $24, X5; \
15+
SLL $16, X6; \
16+
OR X5, X6, X5; \
17+
SLL $8, X7; \
18+
OR X5, X7, X5; \
19+
OR X5, X8, X5; \
20+
MOVW X5, (index*4)(X19)
21+
22+
#define SHUFFLE(index) \
23+
MOVWU (((index)&0xf)*4)(X19), X5; \
24+
MOVWU (((index-3)&0xf)*4)(X19), X6; \
25+
MOVWU (((index-8)&0xf)*4)(X19), X7; \
26+
MOVWU (((index-14)&0xf)*4)(X19), X8; \
27+
XOR X6, X5; \
28+
XOR X7, X5; \
29+
XOR X8, X5; \
30+
RORW $31, X5; \
31+
MOVW X5, (((index)&0xf)*4)(X19)
32+
33+
// f = d ^ (b & (c ^ d))
34+
#define FUNC1(a, b, c, d, e) \
35+
XOR c, d, X7; \
36+
AND b, X7; \
37+
XOR d, X7
38+
39+
// f = b ^ c ^ d
40+
#define FUNC2(a, b, c, d, e) \
41+
XOR b, c, X7; \
42+
XOR d, X7
43+
44+
// f = (b & c) | ((b | c) & d)
45+
#define FUNC3(a, b, c, d, e) \
46+
OR b, c, X8; \
47+
AND b, c, X6; \
48+
AND d, X8; \
49+
OR X6, X8, X7
50+
51+
#define FUNC4 FUNC2
52+
53+
#define MIX(a, b, c, d, e, key) \
54+
RORW $2, b; \
55+
ADD X7, e; \
56+
RORW $27, a, X8; \
57+
ADD X5, e; \
58+
ADD key, e; \
59+
ADD X8, e
60+
61+
#define ROUND1(a, b, c, d, e, index) \
62+
LOAD(index); \
63+
FUNC1(a, b, c, d, e); \
64+
MIX(a, b, c, d, e, X15)
65+
66+
#define ROUND1x(a, b, c, d, e, index) \
67+
SHUFFLE(index); \
68+
FUNC1(a, b, c, d, e); \
69+
MIX(a, b, c, d, e, X15)
70+
71+
#define ROUND2(a, b, c, d, e, index) \
72+
SHUFFLE(index); \
73+
FUNC2(a, b, c, d, e); \
74+
MIX(a, b, c, d, e, X16)
75+
76+
#define ROUND3(a, b, c, d, e, index) \
77+
SHUFFLE(index); \
78+
FUNC3(a, b, c, d, e); \
79+
MIX(a, b, c, d, e, X17)
80+
81+
#define ROUND4(a, b, c, d, e, index) \
82+
SHUFFLE(index); \
83+
FUNC4(a, b, c, d, e); \
84+
MIX(a, b, c, d, e, X18)
85+
86+
// func block(dig *Digest, p []byte)
87+
TEXT ·block(SB),NOSPLIT,$64-32
88+
MOV p_base+8(FP), X29
89+
MOV p_len+16(FP), X30
90+
SRL $6, X30
91+
SLL $6, X30
92+
93+
ADD X29, X30, X28
94+
BEQ X28, X29, end
95+
96+
ADD $8, X2, X19 // message schedule buffer on stack
97+
98+
MOV dig+0(FP), X20
99+
MOVWU (0*4)(X20), X10 // a = H0
100+
MOVWU (1*4)(X20), X11 // b = H1
101+
MOVWU (2*4)(X20), X12 // c = H2
102+
MOVWU (3*4)(X20), X13 // d = H3
103+
MOVWU (4*4)(X20), X14 // e = H4
104+
105+
MOV $·_K(SB), X21
106+
MOVW (0*4)(X21), X15
107+
MOVW (1*4)(X21), X16
108+
MOVW (2*4)(X21), X17
109+
MOVW (3*4)(X21), X18
110+
111+
loop:
112+
MOVW X10, X22
113+
MOVW X11, X23
114+
MOVW X12, X24
115+
MOVW X13, X25
116+
MOVW X14, X26
117+
118+
ROUND1(X10, X11, X12, X13, X14, 0)
119+
ROUND1(X14, X10, X11, X12, X13, 1)
120+
ROUND1(X13, X14, X10, X11, X12, 2)
121+
ROUND1(X12, X13, X14, X10, X11, 3)
122+
ROUND1(X11, X12, X13, X14, X10, 4)
123+
ROUND1(X10, X11, X12, X13, X14, 5)
124+
ROUND1(X14, X10, X11, X12, X13, 6)
125+
ROUND1(X13, X14, X10, X11, X12, 7)
126+
ROUND1(X12, X13, X14, X10, X11, 8)
127+
ROUND1(X11, X12, X13, X14, X10, 9)
128+
ROUND1(X10, X11, X12, X13, X14, 10)
129+
ROUND1(X14, X10, X11, X12, X13, 11)
130+
ROUND1(X13, X14, X10, X11, X12, 12)
131+
ROUND1(X12, X13, X14, X10, X11, 13)
132+
ROUND1(X11, X12, X13, X14, X10, 14)
133+
ROUND1(X10, X11, X12, X13, X14, 15)
134+
135+
ROUND1x(X14, X10, X11, X12, X13, 16)
136+
ROUND1x(X13, X14, X10, X11, X12, 17)
137+
ROUND1x(X12, X13, X14, X10, X11, 18)
138+
ROUND1x(X11, X12, X13, X14, X10, 19)
139+
140+
ROUND2(X10, X11, X12, X13, X14, 20)
141+
ROUND2(X14, X10, X11, X12, X13, 21)
142+
ROUND2(X13, X14, X10, X11, X12, 22)
143+
ROUND2(X12, X13, X14, X10, X11, 23)
144+
ROUND2(X11, X12, X13, X14, X10, 24)
145+
ROUND2(X10, X11, X12, X13, X14, 25)
146+
ROUND2(X14, X10, X11, X12, X13, 26)
147+
ROUND2(X13, X14, X10, X11, X12, 27)
148+
ROUND2(X12, X13, X14, X10, X11, 28)
149+
ROUND2(X11, X12, X13, X14, X10, 29)
150+
ROUND2(X10, X11, X12, X13, X14, 30)
151+
ROUND2(X14, X10, X11, X12, X13, 31)
152+
ROUND2(X13, X14, X10, X11, X12, 32)
153+
ROUND2(X12, X13, X14, X10, X11, 33)
154+
ROUND2(X11, X12, X13, X14, X10, 34)
155+
ROUND2(X10, X11, X12, X13, X14, 35)
156+
ROUND2(X14, X10, X11, X12, X13, 36)
157+
ROUND2(X13, X14, X10, X11, X12, 37)
158+
ROUND2(X12, X13, X14, X10, X11, 38)
159+
ROUND2(X11, X12, X13, X14, X10, 39)
160+
161+
ROUND3(X10, X11, X12, X13, X14, 40)
162+
ROUND3(X14, X10, X11, X12, X13, 41)
163+
ROUND3(X13, X14, X10, X11, X12, 42)
164+
ROUND3(X12, X13, X14, X10, X11, 43)
165+
ROUND3(X11, X12, X13, X14, X10, 44)
166+
ROUND3(X10, X11, X12, X13, X14, 45)
167+
ROUND3(X14, X10, X11, X12, X13, 46)
168+
ROUND3(X13, X14, X10, X11, X12, 47)
169+
ROUND3(X12, X13, X14, X10, X11, 48)
170+
ROUND3(X11, X12, X13, X14, X10, 49)
171+
ROUND3(X10, X11, X12, X13, X14, 50)
172+
ROUND3(X14, X10, X11, X12, X13, 51)
173+
ROUND3(X13, X14, X10, X11, X12, 52)
174+
ROUND3(X12, X13, X14, X10, X11, 53)
175+
ROUND3(X11, X12, X13, X14, X10, 54)
176+
ROUND3(X10, X11, X12, X13, X14, 55)
177+
ROUND3(X14, X10, X11, X12, X13, 56)
178+
ROUND3(X13, X14, X10, X11, X12, 57)
179+
ROUND3(X12, X13, X14, X10, X11, 58)
180+
ROUND3(X11, X12, X13, X14, X10, 59)
181+
182+
ROUND4(X10, X11, X12, X13, X14, 60)
183+
ROUND4(X14, X10, X11, X12, X13, 61)
184+
ROUND4(X13, X14, X10, X11, X12, 62)
185+
ROUND4(X12, X13, X14, X10, X11, 63)
186+
ROUND4(X11, X12, X13, X14, X10, 64)
187+
ROUND4(X10, X11, X12, X13, X14, 65)
188+
ROUND4(X14, X10, X11, X12, X13, 66)
189+
ROUND4(X13, X14, X10, X11, X12, 67)
190+
ROUND4(X12, X13, X14, X10, X11, 68)
191+
ROUND4(X11, X12, X13, X14, X10, 69)
192+
ROUND4(X10, X11, X12, X13, X14, 70)
193+
ROUND4(X14, X10, X11, X12, X13, 71)
194+
ROUND4(X13, X14, X10, X11, X12, 72)
195+
ROUND4(X12, X13, X14, X10, X11, 73)
196+
ROUND4(X11, X12, X13, X14, X10, 74)
197+
ROUND4(X10, X11, X12, X13, X14, 75)
198+
ROUND4(X14, X10, X11, X12, X13, 76)
199+
ROUND4(X13, X14, X10, X11, X12, 77)
200+
ROUND4(X12, X13, X14, X10, X11, 78)
201+
ROUND4(X11, X12, X13, X14, X10, 79)
202+
203+
ADD X22, X10
204+
ADD X23, X11
205+
ADD X24, X12
206+
ADD X25, X13
207+
ADD X26, X14
208+
209+
ADD $64, X29
210+
BNE X28, X29, loop
211+
212+
end:
213+
MOVW X10, (0*4)(X20)
214+
MOVW X11, (1*4)(X20)
215+
MOVW X12, (2*4)(X20)
216+
MOVW X13, (3*4)(X20)
217+
MOVW X14, (4*4)(X20)
218+
219+
RET
220+
221+
GLOBL ·_K(SB),RODATA,$16
222+
DATA ·_K+0(SB)/4, $0x5A827999
223+
DATA ·_K+4(SB)/4, $0x6ED9EBA1
224+
DATA ·_K+8(SB)/4, $0x8F1BBCDC
225+
DATA ·_K+12(SB)/4, $0xCA62C1D6

0 commit comments

Comments
 (0)