Skip to content

Commit 64baebd

Browse files
committed
use syntax package hash
1 parent dbbeccc commit 64baebd

File tree

3 files changed

+50
-81
lines changed

3 files changed

+50
-81
lines changed

src/cmd/compile/internal/syntax/parser_test.go

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"path/filepath"
1414
"regexp"
1515
"runtime"
16+
"slices"
1617
"strings"
1718
"sync"
1819
"testing"
@@ -56,7 +57,6 @@ func BenchmarkParseStdLib(b *testing.B) {
5657
base *PosBase
5758
data []byte
5859
}
59-
var largestfile *file
6060
var files []file
6161
goroot := testenv.GOROOT(b)
6262
dirs := []string{
@@ -80,18 +80,35 @@ func BenchmarkParseStdLib(b *testing.B) {
8080
data: data,
8181
base: NewFileBase(filename),
8282
})
83-
f := &files[len(files)-1]
84-
if largestfile == nil || len(f.data) > len(largestfile.data) {
85-
largestfile = f
86-
}
8783
})
8884
}
85+
slices.SortStableFunc(files, func(a, b file) int {
86+
return len(a.data) - len(b.data)
87+
})
8988
b.ResetTimer()
90-
for i := 0; i < b.N; i++ {
91-
var buf bytes.Reader
92-
buf.Reset(largestfile.data)
93-
Parse(largestfile.base, &buf, nil, nil, 0)
89+
const numberOfFiles = 10
90+
if len(files) < numberOfFiles*2 {
91+
b.Error("too few files matched to run")
9492
}
93+
b.Run(fmt.Sprintf("longest %d files", numberOfFiles), func(b *testing.B) {
94+
var buf bytes.Reader
95+
for i := 0; i < b.N; i++ {
96+
for _, file := range files[len(files)-numberOfFiles:] {
97+
buf.Reset(file.data)
98+
Parse(file.base, &buf, nil, nil, 0)
99+
}
100+
}
101+
})
102+
103+
b.Run(fmt.Sprintf("shortest %d files", numberOfFiles), func(b *testing.B) {
104+
var buf bytes.Reader
105+
for i := 0; i < b.N; i++ {
106+
for _, file := range files[:numberOfFiles] {
107+
buf.Reset(file.data)
108+
Parse(file.base, &buf, nil, nil, 0)
109+
}
110+
}
111+
})
95112
}
96113

97114
func TestStdLib(t *testing.T) {

src/cmd/compile/internal/syntax/scanner.go

Lines changed: 3 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -381,10 +381,7 @@ func (s *scanner) ident() {
381381
// possibly a keyword
382382
lit := s.segment()
383383
if len(lit) >= 2 {
384-
// tok := keywordMap[hash(lit)]
385-
// tok := keywords[keywordsIndex(lit)]
386-
tok := keywordRuntimeMap[string(lit)]
387-
if tok != 0 && tokStrFast(tok) == string(lit) {
384+
if tok := keywordMap[hash(lit)]; tok != 0 && tokStrFast(tok) == string(lit) {
388385
s.nlsemi = contains(1<<_Break|1<<_Continue|1<<_Fallthrough|1<<_Return, tok)
389386
s.tok = tok
390387
return
@@ -425,47 +422,15 @@ func hash(s []byte) uint {
425422
}
426423

427424
var keywordMap [1 << 6]token // size must be power of two
428-
var keywordRuntimeMap = make(map[string]token)
429-
var keywords [256]Token
430-
431-
// keywordsIndex maps an identifier to an index in keywords array.
432-
func keywordsIndex(maybeKeyword []byte) uint8 {
433-
if len(maybeKeyword) <= 3 {
434-
return maybeKeyword[0]
435-
}
436-
// This hash was adjusted by hand. Finding the working combinations
437-
// for this hash is quite straightforward, even when restricting all
438-
// operations to power-of-two multiplications and addition/subtractions
439-
// for performance reasons since multiplication of an integer by a power-of-two
440-
// can be optimized to a bitshift which is faster on some architectures.
441-
//
442-
// Here is a list of hashes that also works for current keyword set:
443-
// h = v0 + v1*2 + v2*4 + v3*8
444-
// h = v0 + v1*4 + v2*8 + v3
445-
// h = v0 + v1*2 + (v2+v3)*2
446-
// h = v0*4 + v1*2 + v2*2 + v3*2
447-
// h = v0*4 + v1*2 + v2*v3
448-
v0 := maybeKeyword[0]
449-
v1 := maybeKeyword[1]
450-
v2 := maybeKeyword[2]
451-
v3 := maybeKeyword[3]
452-
h := v0 + v1*8 + v2 - v3
453-
return h
454-
}
455425

456426
func init() {
457427
// populate keywordMap
458428
for tok := _Break; tok <= _Var; tok++ {
459-
kws := tok.String()
460-
kw := []byte(kws)
461-
i := keywordsIndex(kw)
462-
h := hash(kw)
463-
if keywordMap[h] != 0 || keywords[i] != 0 {
429+
h := hash([]byte(tok.String()))
430+
if keywordMap[h] != 0 {
464431
panic("imperfect hash")
465432
}
466-
keywords[i] = tok
467433
keywordMap[h] = tok
468-
keywordRuntimeMap[kws] = tok
469434
}
470435
}
471436

src/go/token/token.go

Lines changed: 21 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -279,47 +279,31 @@ func (op Token) Precedence() int {
279279
return LowestPrec
280280
}
281281

282-
var keywords [256]Token
283-
284-
func init() {
285-
for i := keyword_beg + 1; i < keyword_end; i++ {
286-
keywords[keywordsIndex(i.String())] = i
287-
}
282+
// hash is a perfect hash function for keywords.
283+
// It assumes that s has at least length 2.
284+
func hash(s string) uint {
285+
return (uint(s[0])<<4 ^ uint(s[1]) + uint(len(s))) & uint(len(keywordMap)-1)
288286
}
289287

290-
// keywordsIndex maps an identifier to an index in keywords array.
291-
func keywordsIndex(maybeKeyword string) uint8 {
292-
if len(maybeKeyword) <= 3 {
293-
// If adding a 2 or 3 letter keyword that starts with `i`(if),`f`(for) or `g`(go)
294-
// you'd need to add logic to this if statement to differentiate between them.
295-
if len(maybeKeyword) == 0 {
296-
return 0
288+
var keywordMap [1 << 6]Token // size must be power of two
289+
290+
func init() {
291+
// populate keywordMap
292+
for tok := keyword_beg + 1; tok < keyword_end; tok++ {
293+
h := hash(tok.String())
294+
if keywordMap[h] != 0 {
295+
panic("imperfect hash")
297296
}
298-
return maybeKeyword[0]
297+
keywordMap[h] = tok
299298
}
300-
// This hash was adjusted by hand. Finding the working combinations
301-
// for this hash is quite straightforward, even when restricting all
302-
// operations to power-of-two multiplications and addition/subtractions
303-
// for performance reasons since multiplication of an integer by a power-of-two
304-
// can be optimized to a bitshift which is faster on some architectures.
305-
//
306-
// Here is a list of hashes that also works for current keyword set:
307-
// h = v0 + v1*2 + v2*4 + v3*8
308-
// h = v0 + v1*4 + v2*8 + v3
309-
// h = v0 + v1*2 + (v2+v3)*2
310-
// h = v0*4 + v1*2 + v2*2 + v3*2
311-
// h = v0*4 + v1*2 + v2*v3
312-
v0 := maybeKeyword[0]
313-
v1 := maybeKeyword[1]
314-
v2 := maybeKeyword[2]
315-
v3 := maybeKeyword[3]
316-
h := v0 + v1*8 + v2 - v3
317-
return h
318299
}
319300

320301
// Lookup maps an identifier to its keyword token or [IDENT] (if not a keyword).
321302
func Lookup(ident string) Token {
322-
maybeMatch := keywords[keywordsIndex(ident)]
303+
if len(ident) < 2 {
304+
return IDENT
305+
}
306+
maybeMatch := keywordMap[hash(ident)]
323307
if maybeMatch != 0 && maybeMatch.String() == ident {
324308
return maybeMatch
325309
}
@@ -350,7 +334,10 @@ func IsExported(name string) bool {
350334

351335
// IsKeyword reports whether name is a Go keyword, such as "func" or "return".
352336
func IsKeyword(ident string) bool {
353-
tok := keywords[keywordsIndex(ident)]
337+
if len(ident) < 2 {
338+
return false
339+
}
340+
tok := keywordMap[hash(ident)]
354341
return tok != 0 && tok.String() == ident
355342
}
356343

0 commit comments

Comments
 (0)