package syntax
import (
)
type Regexp struct {
Op Op
Flags Flags
Sub []*Regexp
Sub0 [1]*Regexp
Rune []rune
Rune0 [2]rune
Min, Max int
Cap int
Name string
}
type Op uint8
const (
OpNoMatch Op = 1 + iota
OpEmptyMatch
OpLiteral
OpCharClass
OpAnyCharNotNL
OpAnyChar
OpBeginLine
OpEndLine
OpBeginText
OpEndText
OpWordBoundary
OpNoWordBoundary
OpCapture
OpStar
OpPlus
OpQuest
OpRepeat
OpConcat
OpAlternate
)
const opPseudo Op = 128
func ( *Regexp) ( *Regexp) bool {
if == nil || == nil {
return ==
}
if .Op != .Op {
return false
}
switch .Op {
case OpEndText:
if .Flags&WasDollar != .Flags&WasDollar {
return false
}
case OpLiteral, OpCharClass:
return slices.Equal(.Rune, .Rune)
case OpAlternate, OpConcat:
return slices.EqualFunc(.Sub, .Sub, (*Regexp).)
case OpStar, OpPlus, OpQuest:
if .Flags&NonGreedy != .Flags&NonGreedy || !.Sub[0].(.Sub[0]) {
return false
}
case OpRepeat:
if .Flags&NonGreedy != .Flags&NonGreedy || .Min != .Min || .Max != .Max || !.Sub[0].(.Sub[0]) {
return false
}
case OpCapture:
if .Cap != .Cap || .Name != .Name || !.Sub[0].(.Sub[0]) {
return false
}
}
return true
}
type printFlags uint8
const (
flagI printFlags = 1 << iota
flagM
flagS
flagOff
flagPrec
negShift = 5
)
func (, *Regexp, printFlags, *map[*Regexp]printFlags) {
if * == nil {
* = make(map[*Regexp]printFlags)
}
(*)[] =
(*)[] |= flagOff
}
func ( *Regexp, *map[*Regexp]printFlags) (, printFlags) {
switch .Op {
default:
return 0, 0
case OpLiteral:
for , := range .Rune {
if minFold <= && <= maxFold && unicode.SimpleFold() != {
if .Flags&FoldCase != 0 {
return flagI, 0
} else {
return 0, flagI
}
}
}
return 0, 0
case OpCharClass:
for := 0; < len(.Rune); += 2 {
:= max(minFold, .Rune[])
:= min(maxFold, .Rune[+1])
for := ; <= ; ++ {
for := unicode.SimpleFold(); != ; = unicode.SimpleFold() {
if !( <= && <= ) && !inCharClass(, .Rune) {
return 0, flagI
}
}
}
}
return 0, 0
case OpAnyCharNotNL:
return 0, flagS
case OpAnyChar:
return flagS, 0
case OpBeginLine, OpEndLine:
return flagM, 0
case OpEndText:
if .Flags&WasDollar != 0 {
return 0, flagM
}
return 0, 0
case OpCapture, OpStar, OpPlus, OpQuest, OpRepeat:
return (.Sub[0], )
case OpConcat, OpAlternate:
var , , printFlags
:= 0
:= 0
:= false
for , := range .Sub {
, := (, )
if & != 0 || & != 0 {
if != 0 {
addSpan(.Sub[], .Sub[], , )
}
= 0
= 0
=
= true
}
|=
|=
|=
if != 0 {
=
}
if == 0 && == {
++
}
}
if ! {
return ,
}
if != 0 {
addSpan(.Sub[], .Sub[], , )
}
return 0,
}
}
func ( *strings.Builder, *Regexp, printFlags, map[*Regexp]printFlags) {
|= []
if &flagPrec != 0 && &^(flagOff|flagPrec) != 0 && &flagOff != 0 {
&^= flagPrec
}
if &^(flagOff|flagPrec) != 0 {
.WriteString(`(?`)
if &flagI != 0 {
.WriteString(`i`)
}
if &flagM != 0 {
.WriteString(`m`)
}
if &flagS != 0 {
.WriteString(`s`)
}
if &((flagM|flagS)<<negShift) != 0 {
.WriteString(`-`)
if &(flagM<<negShift) != 0 {
.WriteString(`m`)
}
if &(flagS<<negShift) != 0 {
.WriteString(`s`)
}
}
.WriteString(`:`)
}
if &flagOff != 0 {
defer .WriteString(`)`)
}
if &flagPrec != 0 {
.WriteString(`(?:`)
defer .WriteString(`)`)
}
switch .Op {
default:
.WriteString("<invalid op" + strconv.Itoa(int(.Op)) + ">")
case OpNoMatch:
.WriteString(`[^\x00-\x{10FFFF}]`)
case OpEmptyMatch:
.WriteString(`(?:)`)
case OpLiteral:
for , := range .Rune {
escape(, , false)
}
case OpCharClass:
if len(.Rune)%2 != 0 {
.WriteString(`[invalid char class]`)
break
}
.WriteRune('[')
if len(.Rune) == 0 {
.WriteString(`^\x00-\x{10FFFF}`)
} else if .Rune[0] == 0 && .Rune[len(.Rune)-1] == unicode.MaxRune && len(.Rune) > 2 {
.WriteRune('^')
for := 1; < len(.Rune)-1; += 2 {
, := .Rune[]+1, .Rune[+1]-1
escape(, , == '-')
if != {
if != +1 {
.WriteRune('-')
}
escape(, , == '-')
}
}
} else {
for := 0; < len(.Rune); += 2 {
, := .Rune[], .Rune[+1]
escape(, , == '-')
if != {
if != +1 {
.WriteRune('-')
}
escape(, , == '-')
}
}
}
.WriteRune(']')
case OpAnyCharNotNL, OpAnyChar:
.WriteString(`.`)
case OpBeginLine:
.WriteString(`^`)
case OpEndLine:
.WriteString(`$`)
case OpBeginText:
.WriteString(`\A`)
case OpEndText:
if .Flags&WasDollar != 0 {
.WriteString(`$`)
} else {
.WriteString(`\z`)
}
case OpWordBoundary:
.WriteString(`\b`)
case OpNoWordBoundary:
.WriteString(`\B`)
case OpCapture:
if .Name != "" {
.WriteString(`(?P<`)
.WriteString(.Name)
.WriteRune('>')
} else {
.WriteRune('(')
}
if .Sub[0].Op != OpEmptyMatch {
(, .Sub[0], [.Sub[0]], )
}
.WriteRune(')')
case OpStar, OpPlus, OpQuest, OpRepeat:
:= printFlags(0)
:= .Sub[0]
if .Op > OpCapture || .Op == OpLiteral && len(.Rune) > 1 {
= flagPrec
}
(, , , )
switch .Op {
case OpStar:
.WriteRune('*')
case OpPlus:
.WriteRune('+')
case OpQuest:
.WriteRune('?')
case OpRepeat:
.WriteRune('{')
.WriteString(strconv.Itoa(.Min))
if .Max != .Min {
.WriteRune(',')
if .Max >= 0 {
.WriteString(strconv.Itoa(.Max))
}
}
.WriteRune('}')
}
if .Flags&NonGreedy != 0 {
.WriteRune('?')
}
case OpConcat:
for , := range .Sub {
:= printFlags(0)
if .Op == OpAlternate {
= flagPrec
}
(, , , )
}
case OpAlternate:
for , := range .Sub {
if > 0 {
.WriteRune('|')
}
(, , 0, )
}
}
}
func ( *Regexp) () string {
var strings.Builder
var map[*Regexp]printFlags
, := calcFlags(, &)
|= ( &^ flagI) << negShift
if != 0 {
|= flagOff
}
writeRegexp(&, , , )
return .String()
}
const meta = `\.+*?()|[]{}^$`
func ( *strings.Builder, rune, bool) {
if unicode.IsPrint() {
if strings.ContainsRune(meta, ) || {
.WriteRune('\\')
}
.WriteRune()
return
}
switch {
case '\a':
.WriteString(`\a`)
case '\f':
.WriteString(`\f`)
case '\n':
.WriteString(`\n`)
case '\r':
.WriteString(`\r`)
case '\t':
.WriteString(`\t`)
case '\v':
.WriteString(`\v`)
default:
if < 0x100 {
.WriteString(`\x`)
:= strconv.FormatInt(int64(), 16)
if len() == 1 {
.WriteRune('0')
}
.WriteString()
break
}
.WriteString(`\x{`)
.WriteString(strconv.FormatInt(int64(), 16))
.WriteString(`}`)
}
}
func ( *Regexp) () int {
:= 0
if .Op == OpCapture {
= .Cap
}
for , := range .Sub {
if := .(); < {
=
}
}
return
}
func ( *Regexp) () []string {
:= make([]string, .MaxCap()+1)
.capNames()
return
}
func ( *Regexp) ( []string) {
if .Op == OpCapture {
[.Cap] = .Name
}
for , := range .Sub {
.()
}
}