@@ -3,7 +3,6 @@ package normalize
33import (
44 "errors"
55 "fmt"
6- "net"
76 "slices"
87 "strings"
98
@@ -209,12 +208,7 @@ func initTokens(lexer *lexmachine.Lexer,
209208 addTokens := func (patterns []TokenPattern ) {
210209 for _ , p := range patterns {
211210 if p .mask == 0 || builtinPatterns & p .mask != 0 {
212- switch p .mask {
213- case pIp :
214- lexer .Add ([]byte (p .RE ), newIpToken (p .Placeholder ))
215- default :
216- lexer .Add ([]byte (p .RE ), newToken (p .Placeholder ))
217- }
211+ lexer .Add ([]byte (p .RE ), newToken (p .Placeholder ))
218212 }
219213 }
220214 }
@@ -270,39 +264,6 @@ func newToken(placeholder string) lexmachine.Action {
270264 }
271265}
272266
273- func newIpToken (placeholder string ) lexmachine.Action {
274- return func (s * lexmachine.Scanner , m * machines.Match ) (any , error ) {
275- // skip `\w<match>\w`
276- if m .TC > 0 && isWord (s .Text [m .TC - 1 ]) ||
277- m .TC + len (m .Bytes ) < len (s .Text ) && isWord (s .Text [m .TC + len (m .Bytes )]) {
278- return nil , nil
279- }
280-
281- // Fallback IP parser.
282- // Scans for IP-like patterns until end, then validates with net.ParseIP.
283- // Necessary because lexer's own pattern matching can be incomplete.
284- begin , end := m .TC , m .TC
285-
286- for end < len (s .Text ) {
287- if ! isIPChar (s .Text [end ]) {
288- break
289- }
290- end ++
291- }
292-
293- candidate := string (s .Text [begin :end ])
294- if net .ParseIP (candidate ) == nil {
295- return nil , nil
296- }
297-
298- return token {
299- placeholder : placeholder ,
300- begin : begin ,
301- end : end ,
302- }, nil
303- }
304- }
305-
306267func (n * tokenNormalizer ) normalizeByScanner (out []byte , scanner * lexmachine.Scanner ) []byte {
307268 prevEnd := 0
308269 for tokRaw , err , eos := scanner .Next (); ! eos ; tokRaw , err , eos = scanner .Next () {
@@ -496,13 +457,6 @@ func isWord(c byte) bool {
496457 c == '_'
497458}
498459
499- func isIPChar (c byte ) bool {
500- return (c >= '0' && c <= '9' ) ||
501- (c >= 'a' && c <= 'f' ) ||
502- (c >= 'A' && c <= 'F' ) ||
503- c == ':' || c == '.'
504- }
505-
506460// [lexmachine] pkg doesn't support 'exactly' re syntax (a{3}, a{3,6}),
507461// so we use [strings.Repeat] instead
508462var builtinTokenPatterns = []TokenPattern {
@@ -569,11 +523,9 @@ var builtinTokenPatterns = []TokenPattern{
569523 mask : pDatetime ,
570524 },
571525 {
526+ // IPv4 only
572527 Placeholder : placeholderByPattern [pIp ],
573- RE : fmt .Sprintf (`%s|%s` ,
574- strings .TrimSuffix (strings .Repeat (`(25[0-5]|(2[0-4]|1?[0-9])?[0-9])\.` , 4 ), `\.` ),
575- `[0-9a-fA-F:]*:[0-9a-fA-F:]*` ,
576- ),
528+ RE : strings .TrimSuffix (strings .Repeat (`(25[0-5]|(2[0-4]|1?[0-9])?[0-9])\.` , 4 ), `\.` ),
577529
578530 mask : pIp ,
579531 },
0 commit comments