Source File
simplify.go
Belonging Package
regexp/syntax
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package syntax
// Simplify returns a regexp equivalent to re but without counted repetitions
// and with various other simplifications, such as rewriting /(?:a+)+/ to /a+/.
// The resulting regexp will execute correctly but its string representation
// will not produce the same parse tree, because capturing parentheses
// may have been duplicated or removed. For example, the simplified form
// for /(x){1,2}/ is /(x)(x)?/ but both parentheses capture as $1.
// The returned regexp may share structure with or be the original.
func ( *Regexp) () *Regexp {
if == nil {
return nil
}
switch .Op {
case OpCapture, OpConcat, OpAlternate:
// Simplify children, building new Regexp if children change.
:=
for , := range .Sub {
:= .()
if == && != {
// Start a copy.
= new(Regexp)
* = *
.Rune = nil
.Sub = append(.Sub0[:0], .Sub[:]...)
}
if != {
.Sub = append(.Sub, )
}
}
return
case OpStar, OpPlus, OpQuest:
:= .Sub[0].()
return simplify1(.Op, .Flags, , )
case OpRepeat:
// Special special case: x{0} matches the empty string
// and doesn't even need to consider x.
if .Min == 0 && .Max == 0 {
return &Regexp{Op: OpEmptyMatch}
}
// The fun begins.
:= .Sub[0].()
// x{n,} means at least n matches of x.
if .Max == -1 {
// Special case: x{0,} is x*.
if .Min == 0 {
return simplify1(OpStar, .Flags, , nil)
}
// Special case: x{1,} is x+.
if .Min == 1 {
return simplify1(OpPlus, .Flags, , nil)
}
// General case: x{4,} is xxxx+.
:= &Regexp{Op: OpConcat}
.Sub = .Sub0[:0]
for := 0; < .Min-1; ++ {
.Sub = append(.Sub, )
}
.Sub = append(.Sub, simplify1(OpPlus, .Flags, , nil))
return
}
// Special case x{0} handled above.
// Special case: x{1} is just x.
if .Min == 1 && .Max == 1 {
return
}
// General case: x{n,m} means n copies of x and m copies of x?
// The machine will do less work if we nest the final m copies,
// so that x{2,5} = xx(x(x(x)?)?)?
// Build leading prefix: xx.
var *Regexp
if .Min > 0 {
= &Regexp{Op: OpConcat}
.Sub = .Sub0[:0]
for := 0; < .Min; ++ {
.Sub = append(.Sub, )
}
}
// Build and attach suffix: (x(x(x)?)?)?
if .Max > .Min {
:= simplify1(OpQuest, .Flags, , nil)
for := .Min + 1; < .Max; ++ {
:= &Regexp{Op: OpConcat}
.Sub = append(.Sub0[:0], , )
= simplify1(OpQuest, .Flags, , nil)
}
if == nil {
return
}
.Sub = append(.Sub, )
}
if != nil {
return
}
// Some degenerate case like min > max or min < max < 0.
// Handle as impossible match.
return &Regexp{Op: OpNoMatch}
}
return
}
// simplify1 implements Simplify for the unary OpStar,
// OpPlus, and OpQuest operators. It returns the simple regexp
// equivalent to
//
// Regexp{Op: op, Flags: flags, Sub: {sub}}
//
// under the assumption that sub is already simple, and
// without first allocating that structure. If the regexp
// to be returned turns out to be equivalent to re, simplify1
// returns re instead.
//
// simplify1 is factored out of Simplify because the implementation
// for other operators generates these unary expressions.
// Letting them call simplify1 makes sure the expressions they
// generate are simple.
func ( Op, Flags, , *Regexp) *Regexp {
// Special case: repeat the empty string as much as
// you want, but it's still the empty string.
if .Op == OpEmptyMatch {
return
}
// The operators are idempotent if the flags match.
if == .Op && &NonGreedy == .Flags&NonGreedy {
return
}
if != nil && .Op == && .Flags&NonGreedy == &NonGreedy && == .Sub[0] {
return
}
= &Regexp{Op: , Flags: }
.Sub = append(.Sub0[:0], )
return
}
The pages are generated with Golds v0.4.9. (GOOS=linux GOARCH=amd64)