Skip to content

Commit 7225fbc

Browse files
authored
Revert "add IPv6 address normalization (#943)" (#958)
This reverts commit 7243a0d.
1 parent 246b574 commit 7225fbc

3 files changed

Lines changed: 17 additions & 65 deletions

File tree

plugin/action/hash/normalize/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ We support a set of patterns out of the box.
2626
| 11 | uuid | `<uuid>` | 7c1811ed-e98f-4c9c-a9f9-58c757ff494f |
2727
| 12 | hash | `<hash>` | 48757ec9f04efe7faacec8722f3476339b125a6b6172b8a69ff3aa329e0bd0ff<br>a94a8fe5ccb19ba61c4c0873d391e987982fbbd3<br>098f6bcd4621d373cade4e832627b4f6 |
2828
| 13 | datetime | `<datetime>` | 2025-01-13T10:20:40.999999Z<br>2025-01-13T10:20:40+04:00<br>2025-01-13 10:20:40<br>2025-01-13<br>10:20:40 |
29-
| 14 | ip | `<ip>` | **IPv4:** 1.2.3.4<br>**IPv6:** 2001:db8:3333:4444:5555:6666:1.2.3.4 |
29+
| 14 | ip | `<ip>` | 1.2.3.4<br>01.102.103.104 |
3030
| 15 | duration | `<duration>` | -1m5s<br>1w2d3h4m5s6ms7us8ns |
3131
| 16 | hex | `<hex>` | 0x13eb85e69dfbc0758b12acdaae36287d<br>0X553026A59C |
3232
| 17 | float | `<float>` | 100.23<br>-4.56 |

plugin/action/hash/normalize/token_normalizer.go

Lines changed: 3 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ package normalize
33
import (
44
"errors"
55
"fmt"
6-
"net"
76
"slices"
87
"strings"
98

@@ -209,12 +208,7 @@ func initTokens(lexer *lexmachine.Lexer,
209208
addTokens := func(patterns []TokenPattern) {
210209
for _, p := range patterns {
211210
if p.mask == 0 || builtinPatterns&p.mask != 0 {
212-
switch p.mask {
213-
case pIp:
214-
lexer.Add([]byte(p.RE), newIpToken(p.Placeholder))
215-
default:
216-
lexer.Add([]byte(p.RE), newToken(p.Placeholder))
217-
}
211+
lexer.Add([]byte(p.RE), newToken(p.Placeholder))
218212
}
219213
}
220214
}
@@ -270,39 +264,6 @@ func newToken(placeholder string) lexmachine.Action {
270264
}
271265
}
272266

273-
func newIpToken(placeholder string) lexmachine.Action {
274-
return func(s *lexmachine.Scanner, m *machines.Match) (any, error) {
275-
// skip `\w<match>\w`
276-
if m.TC > 0 && isWord(s.Text[m.TC-1]) ||
277-
m.TC+len(m.Bytes) < len(s.Text) && isWord(s.Text[m.TC+len(m.Bytes)]) {
278-
return nil, nil
279-
}
280-
281-
// Fallback IP parser.
282-
// Scans for IP-like patterns until end, then validates with net.ParseIP.
283-
// Necessary because lexer's own pattern matching can be incomplete.
284-
begin, end := m.TC, m.TC
285-
286-
for end < len(s.Text) {
287-
if !isIPChar(s.Text[end]) {
288-
break
289-
}
290-
end++
291-
}
292-
293-
candidate := string(s.Text[begin:end])
294-
if net.ParseIP(candidate) == nil {
295-
return nil, nil
296-
}
297-
298-
return token{
299-
placeholder: placeholder,
300-
begin: begin,
301-
end: end,
302-
}, nil
303-
}
304-
}
305-
306267
func (n *tokenNormalizer) normalizeByScanner(out []byte, scanner *lexmachine.Scanner) []byte {
307268
prevEnd := 0
308269
for tokRaw, err, eos := scanner.Next(); !eos; tokRaw, err, eos = scanner.Next() {
@@ -496,13 +457,6 @@ func isWord(c byte) bool {
496457
c == '_'
497458
}
498459

499-
func isIPChar(c byte) bool {
500-
return (c >= '0' && c <= '9') ||
501-
(c >= 'a' && c <= 'f') ||
502-
(c >= 'A' && c <= 'F') ||
503-
c == ':' || c == '.'
504-
}
505-
506460
// [lexmachine] pkg doesn't support 'exactly' re syntax (a{3}, a{3,6}),
507461
// so we use [strings.Repeat] instead
508462
var builtinTokenPatterns = []TokenPattern{
@@ -569,11 +523,9 @@ var builtinTokenPatterns = []TokenPattern{
569523
mask: pDatetime,
570524
},
571525
{
526+
// IPv4 only
572527
Placeholder: placeholderByPattern[pIp],
573-
RE: fmt.Sprintf(`%s|%s`,
574-
strings.TrimSuffix(strings.Repeat(`(25[0-5]|(2[0-4]|1?[0-9])?[0-9])\.`, 4), `\.`),
575-
`[0-9a-fA-F:]*:[0-9a-fA-F:]*`,
576-
),
528+
RE: strings.TrimSuffix(strings.Repeat(`(25[0-5]|(2[0-4]|1?[0-9])?[0-9])\.`, 4), `\.`),
577529

578530
mask: pIp,
579531
},

plugin/action/hash/normalize/token_normalizer_test.go

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -280,23 +280,23 @@ func TestTokenNormalizerBuiltin(t *testing.T) {
280280
name: "ip",
281281
inputs: []string{
282282
"some 1.2.3.4 here",
283-
"some 101.102.103.104 here",
283+
"some 01.102.103.104 here",
284284

285285
// IPv6 Normal
286-
"some 2001:db8:3333:4444:5555:DDDD:EEEE:FFFF here",
287-
"some :: here",
288-
"some 2001:db8:: here",
289-
"some ::1234:5678 here",
290-
"some 2001:0db8:0001:0000:0000:0ab9:C0A8:0102 here",
291-
"some 2001:db8::1234:5678 here",
286+
//"some 2001:db8:3333:4444:5555:DDDD:EEEE:FFFF here",
287+
//"some :: here",
288+
//"some 2001:db8:: here",
289+
//"some ::1234:5678 here",
290+
//"some 2001:0db8:0001:0000:0000:0ab9:C0A8:0102 here",
291+
//"some 2001:db8::1234:5678 here",
292292

293293
// IPv6 Dual
294-
"some 2001:db8:3333:4444:5555:6666:1.2.3.4 here",
295-
"some ::11.22.33.44 here",
296-
"some 2001:db8::123.123.123.123 here",
297-
"some ::1234:5678:91.123.4.56 here",
298-
"some ::1234:5678:1.2.3.4 here",
299-
"some 2001:db8::1234:5678:5.6.7.8 here",
294+
//"some 2001:db8:3333:4444:5555:6666:1.2.3.4 here",
295+
//"some ::11.22.33.44 here",
296+
//"some 2001:db8::123.123.123.123 here",
297+
//"some ::1234:5678:91.123.4.56 here",
298+
//"some ::1234:5678:1.2.3.4 here",
299+
//"some 2001:db8::1234:5678:5.6.7.8 here",
300300
},
301301
patterns: "ip",
302302
want: "some <ip> here",

0 commit comments

Comments
 (0)