|  | // Copyright 2011 The Go Authors. All rights reserved. | 
|  | // Use of this source code is governed by a BSD-style | 
|  | // license that can be found in the LICENSE file. | 
|  |  | 
|  | package syntax | 
|  |  | 
|  | // Note to implementers: | 
|  | // In this package, re is always a *Regexp and r is always a rune. | 
|  |  | 
|  | import ( | 
|  | "strconv" | 
|  | "strings" | 
|  | "unicode" | 
|  | ) | 
|  |  | 
|  | // A Regexp is a node in a regular expression syntax tree. | 
|  | type Regexp struct { | 
|  | Op       Op // operator | 
|  | Flags    Flags | 
|  | Sub      []*Regexp  // subexpressions, if any | 
|  | Sub0     [1]*Regexp // storage for short Sub | 
|  | Rune     []rune     // matched runes, for OpLiteral, OpCharClass | 
|  | Rune0    [2]rune    // storage for short Rune | 
|  | Min, Max int        // min, max for OpRepeat | 
|  | Cap      int        // capturing index, for OpCapture | 
|  | Name     string     // capturing name, for OpCapture | 
|  | } | 
|  |  | 
|  | //go:generate stringer -type Op -trimprefix Op | 
|  |  | 
|  | // An Op is a single regular expression operator. | 
|  | type Op uint8 | 
|  |  | 
|  | // Operators are listed in precedence order, tightest binding to weakest. | 
|  | // Character class operators are listed simplest to most complex | 
|  | // (OpLiteral, OpCharClass, OpAnyCharNotNL, OpAnyChar). | 
|  |  | 
|  | const ( | 
|  | OpNoMatch        Op = 1 + iota // matches no strings | 
|  | OpEmptyMatch                   // matches empty string | 
|  | OpLiteral                      // matches Runes sequence | 
|  | OpCharClass                    // matches Runes interpreted as range pair list | 
|  | OpAnyCharNotNL                 // matches any character except newline | 
|  | OpAnyChar                      // matches any character | 
|  | OpBeginLine                    // matches empty string at beginning of line | 
|  | OpEndLine                      // matches empty string at end of line | 
|  | OpBeginText                    // matches empty string at beginning of text | 
|  | OpEndText                      // matches empty string at end of text | 
|  | OpWordBoundary                 // matches word boundary `\b` | 
|  | OpNoWordBoundary               // matches word non-boundary `\B` | 
|  | OpCapture                      // capturing subexpression with index Cap, optional name Name | 
|  | OpStar                         // matches Sub[0] zero or more times | 
|  | OpPlus                         // matches Sub[0] one or more times | 
|  | OpQuest                        // matches Sub[0] zero or one times | 
|  | OpRepeat                       // matches Sub[0] at least Min times, at most Max (Max == -1 is no limit) | 
|  | OpConcat                       // matches concatenation of Subs | 
|  | OpAlternate                    // matches alternation of Subs | 
|  | ) | 
|  |  | 
|  | const opPseudo Op = 128 // where pseudo-ops start | 
|  |  | 
|  | // Equal reports whether x and y have identical structure. | 
|  | func (x *Regexp) Equal(y *Regexp) bool { | 
|  | if x == nil || y == nil { | 
|  | return x == y | 
|  | } | 
|  | if x.Op != y.Op { | 
|  | return false | 
|  | } | 
|  | switch x.Op { | 
|  | case OpEndText: | 
|  | // The parse flags remember whether this is \z or \Z. | 
|  | if x.Flags&WasDollar != y.Flags&WasDollar { | 
|  | return false | 
|  | } | 
|  |  | 
|  | case OpLiteral, OpCharClass: | 
|  | if len(x.Rune) != len(y.Rune) { | 
|  | return false | 
|  | } | 
|  | for i, r := range x.Rune { | 
|  | if r != y.Rune[i] { | 
|  | return false | 
|  | } | 
|  | } | 
|  |  | 
|  | case OpAlternate, OpConcat: | 
|  | if len(x.Sub) != len(y.Sub) { | 
|  | return false | 
|  | } | 
|  | for i, sub := range x.Sub { | 
|  | if !sub.Equal(y.Sub[i]) { | 
|  | return false | 
|  | } | 
|  | } | 
|  |  | 
|  | case OpStar, OpPlus, OpQuest: | 
|  | if x.Flags&NonGreedy != y.Flags&NonGreedy || !x.Sub[0].Equal(y.Sub[0]) { | 
|  | return false | 
|  | } | 
|  |  | 
|  | case OpRepeat: | 
|  | if x.Flags&NonGreedy != y.Flags&NonGreedy || x.Min != y.Min || x.Max != y.Max || !x.Sub[0].Equal(y.Sub[0]) { | 
|  | return false | 
|  | } | 
|  |  | 
|  | case OpCapture: | 
|  | if x.Cap != y.Cap || x.Name != y.Name || !x.Sub[0].Equal(y.Sub[0]) { | 
|  | return false | 
|  | } | 
|  | } | 
|  | return true | 
|  | } | 
|  |  | 
|  | // writeRegexp writes the Perl syntax for the regular expression re to b. | 
|  | func writeRegexp(b *strings.Builder, re *Regexp) { | 
|  | switch re.Op { | 
|  | default: | 
|  | b.WriteString("<invalid op" + strconv.Itoa(int(re.Op)) + ">") | 
|  | case OpNoMatch: | 
|  | b.WriteString(`[^\x00-\x{10FFFF}]`) | 
|  | case OpEmptyMatch: | 
|  | b.WriteString(`(?:)`) | 
|  | case OpLiteral: | 
|  | if re.Flags&FoldCase != 0 { | 
|  | b.WriteString(`(?i:`) | 
|  | } | 
|  | for _, r := range re.Rune { | 
|  | escape(b, r, false) | 
|  | } | 
|  | if re.Flags&FoldCase != 0 { | 
|  | b.WriteString(`)`) | 
|  | } | 
|  | case OpCharClass: | 
|  | if len(re.Rune)%2 != 0 { | 
|  | b.WriteString(`[invalid char class]`) | 
|  | break | 
|  | } | 
|  | b.WriteRune('[') | 
|  | if len(re.Rune) == 0 { | 
|  | b.WriteString(`^\x00-\x{10FFFF}`) | 
|  | } else if re.Rune[0] == 0 && re.Rune[len(re.Rune)-1] == unicode.MaxRune && len(re.Rune) > 2 { | 
|  | // Contains 0 and MaxRune. Probably a negated class. | 
|  | // Print the gaps. | 
|  | b.WriteRune('^') | 
|  | for i := 1; i < len(re.Rune)-1; i += 2 { | 
|  | lo, hi := re.Rune[i]+1, re.Rune[i+1]-1 | 
|  | escape(b, lo, lo == '-') | 
|  | if lo != hi { | 
|  | b.WriteRune('-') | 
|  | escape(b, hi, hi == '-') | 
|  | } | 
|  | } | 
|  | } else { | 
|  | for i := 0; i < len(re.Rune); i += 2 { | 
|  | lo, hi := re.Rune[i], re.Rune[i+1] | 
|  | escape(b, lo, lo == '-') | 
|  | if lo != hi { | 
|  | b.WriteRune('-') | 
|  | escape(b, hi, hi == '-') | 
|  | } | 
|  | } | 
|  | } | 
|  | b.WriteRune(']') | 
|  | case OpAnyCharNotNL: | 
|  | b.WriteString(`(?-s:.)`) | 
|  | case OpAnyChar: | 
|  | b.WriteString(`(?s:.)`) | 
|  | case OpBeginLine: | 
|  | b.WriteString(`(?m:^)`) | 
|  | case OpEndLine: | 
|  | b.WriteString(`(?m:$)`) | 
|  | case OpBeginText: | 
|  | b.WriteString(`\A`) | 
|  | case OpEndText: | 
|  | if re.Flags&WasDollar != 0 { | 
|  | b.WriteString(`(?-m:$)`) | 
|  | } else { | 
|  | b.WriteString(`\z`) | 
|  | } | 
|  | case OpWordBoundary: | 
|  | b.WriteString(`\b`) | 
|  | case OpNoWordBoundary: | 
|  | b.WriteString(`\B`) | 
|  | case OpCapture: | 
|  | if re.Name != "" { | 
|  | b.WriteString(`(?P<`) | 
|  | b.WriteString(re.Name) | 
|  | b.WriteRune('>') | 
|  | } else { | 
|  | b.WriteRune('(') | 
|  | } | 
|  | if re.Sub[0].Op != OpEmptyMatch { | 
|  | writeRegexp(b, re.Sub[0]) | 
|  | } | 
|  | b.WriteRune(')') | 
|  | case OpStar, OpPlus, OpQuest, OpRepeat: | 
|  | if sub := re.Sub[0]; sub.Op > OpCapture || sub.Op == OpLiteral && len(sub.Rune) > 1 { | 
|  | b.WriteString(`(?:`) | 
|  | writeRegexp(b, sub) | 
|  | b.WriteString(`)`) | 
|  | } else { | 
|  | writeRegexp(b, sub) | 
|  | } | 
|  | switch re.Op { | 
|  | case OpStar: | 
|  | b.WriteRune('*') | 
|  | case OpPlus: | 
|  | b.WriteRune('+') | 
|  | case OpQuest: | 
|  | b.WriteRune('?') | 
|  | case OpRepeat: | 
|  | b.WriteRune('{') | 
|  | b.WriteString(strconv.Itoa(re.Min)) | 
|  | if re.Max != re.Min { | 
|  | b.WriteRune(',') | 
|  | if re.Max >= 0 { | 
|  | b.WriteString(strconv.Itoa(re.Max)) | 
|  | } | 
|  | } | 
|  | b.WriteRune('}') | 
|  | } | 
|  | if re.Flags&NonGreedy != 0 { | 
|  | b.WriteRune('?') | 
|  | } | 
|  | case OpConcat: | 
|  | for _, sub := range re.Sub { | 
|  | if sub.Op == OpAlternate { | 
|  | b.WriteString(`(?:`) | 
|  | writeRegexp(b, sub) | 
|  | b.WriteString(`)`) | 
|  | } else { | 
|  | writeRegexp(b, sub) | 
|  | } | 
|  | } | 
|  | case OpAlternate: | 
|  | for i, sub := range re.Sub { | 
|  | if i > 0 { | 
|  | b.WriteRune('|') | 
|  | } | 
|  | writeRegexp(b, sub) | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | func (re *Regexp) String() string { | 
|  | var b strings.Builder | 
|  | writeRegexp(&b, re) | 
|  | return b.String() | 
|  | } | 
|  |  | 
|  | const meta = `\.+*?()|[]{}^$` | 
|  |  | 
|  | func escape(b *strings.Builder, r rune, force bool) { | 
|  | if unicode.IsPrint(r) { | 
|  | if strings.ContainsRune(meta, r) || force { | 
|  | b.WriteRune('\\') | 
|  | } | 
|  | b.WriteRune(r) | 
|  | return | 
|  | } | 
|  |  | 
|  | switch r { | 
|  | case '\a': | 
|  | b.WriteString(`\a`) | 
|  | case '\f': | 
|  | b.WriteString(`\f`) | 
|  | case '\n': | 
|  | b.WriteString(`\n`) | 
|  | case '\r': | 
|  | b.WriteString(`\r`) | 
|  | case '\t': | 
|  | b.WriteString(`\t`) | 
|  | case '\v': | 
|  | b.WriteString(`\v`) | 
|  | default: | 
|  | if r < 0x100 { | 
|  | b.WriteString(`\x`) | 
|  | s := strconv.FormatInt(int64(r), 16) | 
|  | if len(s) == 1 { | 
|  | b.WriteRune('0') | 
|  | } | 
|  | b.WriteString(s) | 
|  | break | 
|  | } | 
|  | b.WriteString(`\x{`) | 
|  | b.WriteString(strconv.FormatInt(int64(r), 16)) | 
|  | b.WriteString(`}`) | 
|  | } | 
|  | } | 
|  |  | 
|  | // MaxCap walks the regexp to find the maximum capture index. | 
|  | func (re *Regexp) MaxCap() int { | 
|  | m := 0 | 
|  | if re.Op == OpCapture { | 
|  | m = re.Cap | 
|  | } | 
|  | for _, sub := range re.Sub { | 
|  | if n := sub.MaxCap(); m < n { | 
|  | m = n | 
|  | } | 
|  | } | 
|  | return m | 
|  | } | 
|  |  | 
|  | // CapNames walks the regexp to find the names of capturing groups. | 
|  | func (re *Regexp) CapNames() []string { | 
|  | names := make([]string, re.MaxCap()+1) | 
|  | re.capNames(names) | 
|  | return names | 
|  | } | 
|  |  | 
|  | func (re *Regexp) capNames(names []string) { | 
|  | if re.Op == OpCapture { | 
|  | names[re.Cap] = re.Name | 
|  | } | 
|  | for _, sub := range re.Sub { | 
|  | sub.capNames(names) | 
|  | } | 
|  | } |