Source File
regexp.go
Belonging Package
regexp
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package regexp implements regular expression search.
//
// The syntax of the regular expressions accepted is the same
// general syntax used by Perl, Python, and other languages.
// More precisely, it is the syntax accepted by RE2 and described at
// https://golang.org/s/re2syntax, except for \C.
// For an overview of the syntax, see the [regexp/syntax] package.
//
// The regexp implementation provided by this package is
// guaranteed to run in time linear in the size of the input.
// (This is a property not guaranteed by most open source
// implementations of regular expressions.) For more information
// about this property, see https://swtch.com/~rsc/regexp/regexp1.html
// or any book about automata theory.
//
// All characters are UTF-8-encoded code points.
// Following [utf8.DecodeRune], each byte of an invalid UTF-8 sequence
// is treated as if it encoded utf8.RuneError (U+FFFD).
//
// There are 16 methods of [Regexp] that match a regular expression and identify
// the matched text. Their names are matched by this regular expression:
//
// Find(All)?(String)?(Submatch)?(Index)?
//
// If 'All' is present, the routine matches successive non-overlapping
// matches of the entire expression. Empty matches abutting a preceding
// match are ignored. The return value is a slice containing the successive
// return values of the corresponding non-'All' routine. These routines take
// an extra integer argument, n. If n >= 0, the function returns at most n
// matches/submatches; otherwise, it returns all of them.
//
// If 'String' is present, the argument is a string; otherwise it is a slice
// of bytes; return values are adjusted as appropriate.
//
// If 'Submatch' is present, the return value is a slice identifying the
// successive submatches of the expression. Submatches are matches of
// parenthesized subexpressions (also known as capturing groups) within the
// regular expression, numbered from left to right in order of opening
// parenthesis. Submatch 0 is the match of the entire expression, submatch 1 is
// the match of the first parenthesized subexpression, and so on.
//
// If 'Index' is present, matches and submatches are identified by byte index
// pairs within the input string: result[2*n:2*n+2] identifies the indexes of
// the nth submatch. The pair for n==0 identifies the match of the entire
// expression. If 'Index' is not present, the match is identified by the text
// of the match/submatch. If an index is negative or text is nil, it means that
// subexpression did not match any string in the input. For 'String' versions
// an empty string means either no match or an empty match.
//
// There is also a subset of the methods that can be applied to text read from
// an [io.RuneReader]: [Regexp.MatchReader], [Regexp.FindReaderIndex],
// [Regexp.FindReaderSubmatchIndex].
//
// This set may grow. Note that regular expression matches may need to
// examine text beyond the text returned by a match, so the methods that
// match text from an [io.RuneReader] may read arbitrarily far into the input
// before returning.
//
// (There are a few other methods that do not match this pattern.)
package regexp
import (
)
// Regexp is the representation of a compiled regular expression.
// A Regexp is safe for concurrent use by multiple goroutines,
// except for configuration methods, such as [Regexp.Longest].
type Regexp struct {
expr string // as passed to Compile
prog *syntax.Prog // compiled program
onepass *onePassProg // onepass program or nil
numSubexp int
maxBitStateLen int
subexpNames []string
prefix string // required prefix in unanchored matches
prefixBytes []byte // prefix, as a []byte
prefixRune rune // first rune in prefix
prefixEnd uint32 // pc for last rune in prefix
mpool int // pool for machines
matchcap int // size of recorded match lengths
prefixComplete bool // prefix is the entire regexp
cond syntax.EmptyOp // empty-width conditions required at start of match
minInputLen int // minimum length of the input in bytes
// This field can be modified by the Longest method,
// but it is otherwise read-only.
longest bool // whether regexp prefers leftmost-longest match
}
// String returns the source text used to compile the regular expression.
func ( *Regexp) () string {
return .expr
}
// Copy returns a new [Regexp] object copied from re.
// Calling [Regexp.Longest] on one copy does not affect another.
//
// Deprecated: In earlier releases, when using a [Regexp] in multiple goroutines,
// giving each goroutine its own copy helped to avoid lock contention.
// As of Go 1.12, using Copy is no longer necessary to avoid lock contention.
// Copy may still be appropriate if the reason for its use is to make
// two copies with different [Regexp.Longest] settings.
func ( *Regexp) () *Regexp {
:= *
return &
}
// Compile parses a regular expression and returns, if successful,
// a [Regexp] object that can be used to match against text.
//
// When matching against text, the regexp returns a match that
// begins as early as possible in the input (leftmost), and among those
// it chooses the one that a backtracking search would have found first.
// This so-called leftmost-first matching is the same semantics
// that Perl, Python, and other implementations use, although this
// package implements it without the expense of backtracking.
// For POSIX leftmost-longest matching, see [CompilePOSIX].
func ( string) (*Regexp, error) {
return compile(, syntax.Perl, false)
}
// CompilePOSIX is like [Compile] but restricts the regular expression
// to POSIX ERE (egrep) syntax and changes the match semantics to
// leftmost-longest.
//
// That is, when matching against text, the regexp returns a match that
// begins as early as possible in the input (leftmost), and among those
// it chooses a match that is as long as possible.
// This so-called leftmost-longest matching is the same semantics
// that early regular expression implementations used and that POSIX
// specifies.
//
// However, there can be multiple leftmost-longest matches, with different
// submatch choices, and here this package diverges from POSIX.
// Among the possible leftmost-longest matches, this package chooses
// the one that a backtracking search would have found first, while POSIX
// specifies that the match be chosen to maximize the length of the first
// subexpression, then the second, and so on from left to right.
// The POSIX rule is computationally prohibitive and not even well-defined.
// See https://swtch.com/~rsc/regexp/regexp2.html#posix for details.
func ( string) (*Regexp, error) {
return compile(, syntax.POSIX, true)
}
// Longest makes future searches prefer the leftmost-longest match.
// That is, when matching against text, the regexp returns a match that
// begins as early as possible in the input (leftmost), and among those
// it chooses a match that is as long as possible.
// This method modifies the [Regexp] and may not be called concurrently
// with any other methods.
func ( *Regexp) () {
.longest = true
}
func ( string, syntax.Flags, bool) (*Regexp, error) {
, := syntax.Parse(, )
if != nil {
return nil,
}
:= .MaxCap()
:= .CapNames()
= .Simplify()
, := syntax.Compile()
if != nil {
return nil,
}
:= .NumCap
if < 2 {
= 2
}
:= &Regexp{
expr: ,
prog: ,
onepass: compileOnePass(),
numSubexp: ,
subexpNames: ,
cond: .StartCond(),
longest: ,
matchcap: ,
minInputLen: minInputLen(),
}
if .onepass == nil {
.prefix, .prefixComplete = .Prefix()
.maxBitStateLen = maxBitStateLen()
} else {
.prefix, .prefixComplete, .prefixEnd = onePassPrefix()
}
if .prefix != "" {
// TODO(rsc): Remove this allocation by adding
// IndexString to package bytes.
.prefixBytes = []byte(.prefix)
.prefixRune, _ = utf8.DecodeRuneInString(.prefix)
}
:= len(.Inst)
:= 0
for matchSize[] != 0 && matchSize[] < {
++
}
.mpool =
return , nil
}
// Pools of *machine for use during (*Regexp).doExecute,
// split up by the size of the execution queues.
// matchPool[i] machines have queue size matchSize[i].
// On a 64-bit system each queue entry is 16 bytes,
// so matchPool[0] has 16*2*128 = 4kB queues, etc.
// The final matchPool is a catch-all for very large queues.
var (
matchSize = [...]int{128, 512, 2048, 16384, 0}
matchPool [len(matchSize)]sync.Pool
)
// get returns a machine to use for matching re.
// It uses the re's machine cache if possible, to avoid
// unnecessary allocation.
func ( *Regexp) () *machine {
, := matchPool[.mpool].Get().(*machine)
if ! {
= new(machine)
}
.re =
.p = .prog
if cap(.matchcap) < .matchcap {
.matchcap = make([]int, .matchcap)
for , := range .pool {
.cap = make([]int, .matchcap)
}
}
// Allocate queues if needed.
// Or reallocate, for "large" match pool.
:= matchSize[.mpool]
if == 0 { // large pool
= len(.prog.Inst)
}
if len(.q0.sparse) < {
.q0 = queue{make([]uint32, ), make([]entry, 0, )}
.q1 = queue{make([]uint32, ), make([]entry, 0, )}
}
return
}
// put returns a machine to the correct machine pool.
func ( *Regexp) ( *machine) {
.re = nil
.p = nil
.inputs.clear()
matchPool[.mpool].Put()
}
// minInputLen walks the regexp to find the minimum length of any matchable input.
func ( *syntax.Regexp) int {
switch .Op {
default:
return 0
case syntax.OpAnyChar, syntax.OpAnyCharNotNL, syntax.OpCharClass:
return 1
case syntax.OpLiteral:
:= 0
for , := range .Rune {
if == utf8.RuneError {
++
} else {
+= utf8.RuneLen()
}
}
return
case syntax.OpCapture, syntax.OpPlus:
return (.Sub[0])
case syntax.OpRepeat:
return .Min * (.Sub[0])
case syntax.OpConcat:
:= 0
for , := range .Sub {
+= ()
}
return
case syntax.OpAlternate:
:= (.Sub[0])
var int
for , := range .Sub[1:] {
= ()
if < {
=
}
}
return
}
}
// MustCompile is like [Compile] but panics if the expression cannot be parsed.
// It simplifies safe initialization of global variables holding compiled regular
// expressions.
func ( string) *Regexp {
, := Compile()
if != nil {
panic(`regexp: Compile(` + quote() + `): ` + .Error())
}
return
}
// MustCompilePOSIX is like [CompilePOSIX] but panics if the expression cannot be parsed.
// It simplifies safe initialization of global variables holding compiled regular
// expressions.
func ( string) *Regexp {
, := CompilePOSIX()
if != nil {
panic(`regexp: CompilePOSIX(` + quote() + `): ` + .Error())
}
return
}
func ( string) string {
if strconv.CanBackquote() {
return "`" + + "`"
}
return strconv.Quote()
}
// NumSubexp returns the number of parenthesized subexpressions in this [Regexp].
func ( *Regexp) () int {
return .numSubexp
}
// SubexpNames returns the names of the parenthesized subexpressions
// in this [Regexp]. The name for the first sub-expression is names[1],
// so that if m is a match slice, the name for m[i] is SubexpNames()[i].
// Since the Regexp as a whole cannot be named, names[0] is always
// the empty string. The slice should not be modified.
func ( *Regexp) () []string {
return .subexpNames
}
// SubexpIndex returns the index of the first subexpression with the given name,
// or -1 if there is no subexpression with that name.
//
// Note that multiple subexpressions can be written using the same name, as in
// (?P<bob>a+)(?P<bob>b+), which declares two subexpressions named "bob".
// In this case, SubexpIndex returns the index of the leftmost such subexpression
// in the regular expression.
func ( *Regexp) ( string) int {
if != "" {
for , := range .subexpNames {
if == {
return
}
}
}
return -1
}
const endOfText rune = -1
// input abstracts different representations of the input text. It provides
// one-character lookahead.
type input interface {
step(pos int) (r rune, width int) // advance one rune
canCheckPrefix() bool // can we look ahead without losing info?
hasPrefix(re *Regexp) bool
index(re *Regexp, pos int) int
context(pos int) lazyFlag
}
// inputString scans a string.
type inputString struct {
str string
}
func ( *inputString) ( int) (rune, int) {
if < len(.str) {
:= .str[]
if < utf8.RuneSelf {
return rune(), 1
}
return utf8.DecodeRuneInString(.str[:])
}
return endOfText, 0
}
func ( *inputString) () bool {
return true
}
func ( *inputString) ( *Regexp) bool {
return strings.HasPrefix(.str, .prefix)
}
func ( *inputString) ( *Regexp, int) int {
return strings.Index(.str[:], .prefix)
}
func ( *inputString) ( int) lazyFlag {
, := endOfText, endOfText
// 0 < pos && pos <= len(i.str)
if uint(-1) < uint(len(.str)) {
= rune(.str[-1])
if >= utf8.RuneSelf {
, _ = utf8.DecodeLastRuneInString(.str[:])
}
}
// 0 <= pos && pos < len(i.str)
if uint() < uint(len(.str)) {
= rune(.str[])
if >= utf8.RuneSelf {
, _ = utf8.DecodeRuneInString(.str[:])
}
}
return newLazyFlag(, )
}
// inputBytes scans a byte slice.
type inputBytes struct {
str []byte
}
func ( *inputBytes) ( int) (rune, int) {
if < len(.str) {
:= .str[]
if < utf8.RuneSelf {
return rune(), 1
}
return utf8.DecodeRune(.str[:])
}
return endOfText, 0
}
func ( *inputBytes) () bool {
return true
}
func ( *inputBytes) ( *Regexp) bool {
return bytes.HasPrefix(.str, .prefixBytes)
}
func ( *inputBytes) ( *Regexp, int) int {
return bytes.Index(.str[:], .prefixBytes)
}
func ( *inputBytes) ( int) lazyFlag {
, := endOfText, endOfText
// 0 < pos && pos <= len(i.str)
if uint(-1) < uint(len(.str)) {
= rune(.str[-1])
if >= utf8.RuneSelf {
, _ = utf8.DecodeLastRune(.str[:])
}
}
// 0 <= pos && pos < len(i.str)
if uint() < uint(len(.str)) {
= rune(.str[])
if >= utf8.RuneSelf {
, _ = utf8.DecodeRune(.str[:])
}
}
return newLazyFlag(, )
}
// inputReader scans a RuneReader.
type inputReader struct {
r io.RuneReader
atEOT bool
pos int
}
func ( *inputReader) ( int) (rune, int) {
if !.atEOT && != .pos {
return endOfText, 0
}
, , := .r.ReadRune()
if != nil {
.atEOT = true
return endOfText, 0
}
.pos +=
return ,
}
func ( *inputReader) () bool {
return false
}
func ( *inputReader) ( *Regexp) bool {
return false
}
func ( *inputReader) ( *Regexp, int) int {
return -1
}
func ( *inputReader) ( int) lazyFlag {
return 0 // not used
}
// LiteralPrefix returns a literal string that must begin any match
// of the regular expression re. It returns the boolean true if the
// literal string comprises the entire regular expression.
func ( *Regexp) () ( string, bool) {
return .prefix, .prefixComplete
}
// MatchReader reports whether the text returned by the [io.RuneReader]
// contains any match of the regular expression re.
func ( *Regexp) ( io.RuneReader) bool {
return .doMatch(, nil, "")
}
// MatchString reports whether the string s
// contains any match of the regular expression re.
func ( *Regexp) ( string) bool {
return .doMatch(nil, nil, )
}
// Match reports whether the byte slice b
// contains any match of the regular expression re.
func ( *Regexp) ( []byte) bool {
return .doMatch(nil, , "")
}
// MatchReader reports whether the text returned by the [io.RuneReader]
// contains any match of the regular expression pattern.
// More complicated queries need to use [Compile] and the full [Regexp] interface.
func ( string, io.RuneReader) ( bool, error) {
, := Compile()
if != nil {
return false,
}
return .MatchReader(), nil
}
// MatchString reports whether the string s
// contains any match of the regular expression pattern.
// More complicated queries need to use [Compile] and the full [Regexp] interface.
func ( string, string) ( bool, error) {
, := Compile()
if != nil {
return false,
}
return .MatchString(), nil
}
// Match reports whether the byte slice b
// contains any match of the regular expression pattern.
// More complicated queries need to use [Compile] and the full [Regexp] interface.
func ( string, []byte) ( bool, error) {
, := Compile()
if != nil {
return false,
}
return .Match(), nil
}
// ReplaceAllString returns a copy of src, replacing matches of the [Regexp]
// with the replacement string repl.
// Inside repl, $ signs are interpreted as in [Regexp.Expand].
func ( *Regexp) (, string) string {
:= 2
if strings.Contains(, "$") {
= 2 * (.numSubexp + 1)
}
:= .replaceAll(nil, , , func( []byte, []int) []byte {
return .expand(, , nil, , )
})
return string()
}
// ReplaceAllLiteralString returns a copy of src, replacing matches of the [Regexp]
// with the replacement string repl. The replacement repl is substituted directly,
// without using [Regexp.Expand].
func ( *Regexp) (, string) string {
return string(.replaceAll(nil, , 2, func( []byte, []int) []byte {
return append(, ...)
}))
}
// ReplaceAllStringFunc returns a copy of src in which all matches of the
// [Regexp] have been replaced by the return value of function repl applied
// to the matched substring. The replacement returned by repl is substituted
// directly, without using [Regexp.Expand].
func ( *Regexp) ( string, func(string) string) string {
:= .replaceAll(nil, , 2, func( []byte, []int) []byte {
return append(, ([[0]:[1]])...)
})
return string()
}
func ( *Regexp) ( []byte, string, int, func( []byte, []int) []byte) []byte {
:= 0 // end position of the most recent match
:= 0 // position where we next look for a match
var []byte
var int
if != nil {
= len()
} else {
= len()
}
if > .prog.NumCap {
= .prog.NumCap
}
var [2]int
for <= {
:= .doExecute(nil, , , , , [:0])
if len() == 0 {
break // no more matches
}
// Copy the unmatched characters before this match.
if != nil {
= append(, [:[0]]...)
} else {
= append(, [:[0]]...)
}
// Now insert a copy of the replacement string, but not for a
// match of the empty string immediately after another match.
// (Otherwise, we get double replacement for patterns that
// match both empty and nonempty strings.)
if [1] > || [0] == 0 {
= (, )
}
= [1]
// Advance past this match; always advance at least one character.
var int
if != nil {
_, = utf8.DecodeRune([:])
} else {
_, = utf8.DecodeRuneInString([:])
}
if + > [1] {
+=
} else if +1 > [1] {
// This clause is only needed at the end of the input
// string. In that case, DecodeRuneInString returns width=0.
++
} else {
= [1]
}
}
// Copy the unmatched characters after the last match.
if != nil {
= append(, [:]...)
} else {
= append(, [:]...)
}
return
}
// ReplaceAll returns a copy of src, replacing matches of the [Regexp]
// with the replacement text repl.
// Inside repl, $ signs are interpreted as in [Regexp.Expand].
func ( *Regexp) (, []byte) []byte {
:= 2
if bytes.IndexByte(, '$') >= 0 {
= 2 * (.numSubexp + 1)
}
:= ""
:= .replaceAll(, "", , func( []byte, []int) []byte {
if len() != len() {
= string()
}
return .expand(, , , "", )
})
return
}
// ReplaceAllLiteral returns a copy of src, replacing matches of the [Regexp]
// with the replacement bytes repl. The replacement repl is substituted directly,
// without using [Regexp.Expand].
func ( *Regexp) (, []byte) []byte {
return .replaceAll(, "", 2, func( []byte, []int) []byte {
return append(, ...)
})
}
// ReplaceAllFunc returns a copy of src in which all matches of the
// [Regexp] have been replaced by the return value of function repl applied
// to the matched byte slice. The replacement returned by repl is substituted
// directly, without using [Regexp.Expand].
func ( *Regexp) ( []byte, func([]byte) []byte) []byte {
return .replaceAll(, "", 2, func( []byte, []int) []byte {
return append(, ([[0]:[1]])...)
})
}
// Bitmap used by func special to check whether a character needs to be escaped.
var specialBytes [16]byte
// special reports whether byte b needs to be escaped by QuoteMeta.
func ( byte) bool {
return < utf8.RuneSelf && specialBytes[%16]&(1<<(/16)) != 0
}
func () {
for , := range []byte(`\.+*?()|[]{}^$`) {
specialBytes[%16] |= 1 << ( / 16)
}
}
// QuoteMeta returns a string that escapes all regular expression metacharacters
// inside the argument text; the returned string is a regular expression matching
// the literal text.
func ( string) string {
// A byte loop is correct because all metacharacters are ASCII.
var int
for = 0; < len(); ++ {
if special([]) {
break
}
}
// No meta characters found, so return original string.
if >= len() {
return
}
:= make([]byte, 2*len()-)
copy(, [:])
:=
for ; < len(); ++ {
if special([]) {
[] = '\\'
++
}
[] = []
++
}
return string([:])
}
// The number of capture values in the program may correspond
// to fewer capturing expressions than are in the regexp.
// For example, "(a){0}" turns into an empty program, so the
// maximum capture in the program is 0 but we need to return
// an expression for \1. Pad appends -1s to the slice a as needed.
func ( *Regexp) ( []int) []int {
if == nil {
// No match.
return nil
}
:= (1 + .numSubexp) * 2
for len() < {
= append(, -1)
}
return
}
// allMatches calls deliver at most n times
// with the location of successive matches in the input text.
// The input text is b if non-nil, otherwise s.
func ( *Regexp) ( string, []byte, int, func([]int)) {
var int
if == nil {
= len()
} else {
= len()
}
for , , := 0, 0, -1; < && <= ; {
:= .doExecute(nil, , , , .prog.NumCap, nil)
if len() == 0 {
break
}
:= true
if [1] == {
// We've found an empty match.
if [0] == {
// We don't allow an empty match right
// after a previous match, so ignore it.
= false
}
var int
if == nil {
:= inputString{str: }
_, = .step()
} else {
:= inputBytes{str: }
_, = .step()
}
if > 0 {
+=
} else {
= + 1
}
} else {
= [1]
}
= [1]
if {
(.pad())
++
}
}
}
// Find returns a slice holding the text of the leftmost match in b of the regular expression.
// A return value of nil indicates no match.
func ( *Regexp) ( []byte) []byte {
var [2]int
:= .doExecute(nil, , "", 0, 2, [:0])
if == nil {
return nil
}
return [[0]:[1]:[1]]
}
// FindIndex returns a two-element slice of integers defining the location of
// the leftmost match in b of the regular expression. The match itself is at
// b[loc[0]:loc[1]].
// A return value of nil indicates no match.
func ( *Regexp) ( []byte) ( []int) {
:= .doExecute(nil, , "", 0, 2, nil)
if == nil {
return nil
}
return [0:2]
}
// FindString returns a string holding the text of the leftmost match in s of the regular
// expression. If there is no match, the return value is an empty string,
// but it will also be empty if the regular expression successfully matches
// an empty string. Use [Regexp.FindStringIndex] or [Regexp.FindStringSubmatch] if it is
// necessary to distinguish these cases.
func ( *Regexp) ( string) string {
var [2]int
:= .doExecute(nil, nil, , 0, 2, [:0])
if == nil {
return ""
}
return [[0]:[1]]
}
// FindStringIndex returns a two-element slice of integers defining the
// location of the leftmost match in s of the regular expression. The match
// itself is at s[loc[0]:loc[1]].
// A return value of nil indicates no match.
func ( *Regexp) ( string) ( []int) {
:= .doExecute(nil, nil, , 0, 2, nil)
if == nil {
return nil
}
return [0:2]
}
// FindReaderIndex returns a two-element slice of integers defining the
// location of the leftmost match of the regular expression in text read from
// the [io.RuneReader]. The match text was found in the input stream at
// byte offset loc[0] through loc[1]-1.
// A return value of nil indicates no match.
func ( *Regexp) ( io.RuneReader) ( []int) {
:= .doExecute(, nil, "", 0, 2, nil)
if == nil {
return nil
}
return [0:2]
}
// FindSubmatch returns a slice of slices holding the text of the leftmost
// match of the regular expression in b and the matches, if any, of its
// subexpressions, as defined by the 'Submatch' descriptions in the package
// comment.
// A return value of nil indicates no match.
func ( *Regexp) ( []byte) [][]byte {
var [4]int
:= .doExecute(nil, , "", 0, .prog.NumCap, [:0])
if == nil {
return nil
}
:= make([][]byte, 1+.numSubexp)
for := range {
if 2* < len() && [2*] >= 0 {
[] = [[2*]:[2*+1]:[2*+1]]
}
}
return
}
// Expand appends template to dst and returns the result; during the
// append, Expand replaces variables in the template with corresponding
// matches drawn from src. The match slice should have been returned by
// [Regexp.FindSubmatchIndex].
//
// In the template, a variable is denoted by a substring of the form
// $name or ${name}, where name is a non-empty sequence of letters,
// digits, and underscores. A purely numeric name like $1 refers to
// the submatch with the corresponding index; other names refer to
// capturing parentheses named with the (?P<name>...) syntax. A
// reference to an out of range or unmatched index or a name that is not
// present in the regular expression is replaced with an empty slice.
//
// In the $name form, name is taken to be as long as possible: $1x is
// equivalent to ${1x}, not ${1}x, and, $10 is equivalent to ${10}, not ${1}0.
//
// To insert a literal $ in the output, use $$ in the template.
func ( *Regexp) ( []byte, []byte, []byte, []int) []byte {
return .expand(, string(), , "", )
}
// ExpandString is like [Regexp.Expand] but the template and source are strings.
// It appends to and returns a byte slice in order to give the calling
// code control over allocation.
func ( *Regexp) ( []byte, string, string, []int) []byte {
return .expand(, , nil, , )
}
func ( *Regexp) ( []byte, string, []byte, string, []int) []byte {
for len() > 0 {
, , := strings.Cut(, "$")
if ! {
break
}
= append(, ...)
=
if != "" && [0] == '$' {
// Treat $$ as $.
= append(, '$')
= [1:]
continue
}
, , , := extract()
if ! {
// Malformed; treat $ as raw text.
= append(, '$')
continue
}
=
if >= 0 {
if 2*+1 < len() && [2*] >= 0 {
if != nil {
= append(, [[2*]:[2*+1]]...)
} else {
= append(, [[2*]:[2*+1]]...)
}
}
} else {
for , := range .subexpNames {
if == && 2*+1 < len() && [2*] >= 0 {
if != nil {
= append(, [[2*]:[2*+1]]...)
} else {
= append(, [[2*]:[2*+1]]...)
}
break
}
}
}
}
= append(, ...)
return
}
// extract returns the name from a leading "name" or "{name}" in str.
// (The $ has already been removed by the caller.)
// If it is a number, extract returns num set to that number; otherwise num = -1.
func ( string) ( string, int, string, bool) {
if == "" {
return
}
:= false
if [0] == '{' {
= true
= [1:]
}
:= 0
for < len() {
, := utf8.DecodeRuneInString([:])
if !unicode.IsLetter() && !unicode.IsDigit() && != '_' {
break
}
+=
}
if == 0 {
// empty name is not okay
return
}
= [:]
if {
if >= len() || [] != '}' {
// missing closing brace
return
}
++
}
// Parse number.
= 0
for := 0; < len(); ++ {
if [] < '0' || '9' < [] || >= 1e8 {
= -1
break
}
= *10 + int([]) - '0'
}
// Disallow leading zeros.
if [0] == '0' && len() > 1 {
= -1
}
= [:]
= true
return
}
// FindSubmatchIndex returns a slice holding the index pairs identifying the
// leftmost match of the regular expression in b and the matches, if any, of
// its subexpressions, as defined by the 'Submatch' and 'Index' descriptions
// in the package comment.
// A return value of nil indicates no match.
func ( *Regexp) ( []byte) []int {
return .pad(.doExecute(nil, , "", 0, .prog.NumCap, nil))
}
// FindStringSubmatch returns a slice of strings holding the text of the
// leftmost match of the regular expression in s and the matches, if any, of
// its subexpressions, as defined by the 'Submatch' description in the
// package comment.
// A return value of nil indicates no match.
func ( *Regexp) ( string) []string {
var [4]int
:= .doExecute(nil, nil, , 0, .prog.NumCap, [:0])
if == nil {
return nil
}
:= make([]string, 1+.numSubexp)
for := range {
if 2* < len() && [2*] >= 0 {
[] = [[2*]:[2*+1]]
}
}
return
}
// FindStringSubmatchIndex returns a slice holding the index pairs
// identifying the leftmost match of the regular expression in s and the
// matches, if any, of its subexpressions, as defined by the 'Submatch' and
// 'Index' descriptions in the package comment.
// A return value of nil indicates no match.
func ( *Regexp) ( string) []int {
return .pad(.doExecute(nil, nil, , 0, .prog.NumCap, nil))
}
// FindReaderSubmatchIndex returns a slice holding the index pairs
// identifying the leftmost match of the regular expression of text read by
// the [io.RuneReader], and the matches, if any, of its subexpressions, as defined
// by the 'Submatch' and 'Index' descriptions in the package comment. A
// return value of nil indicates no match.
func ( *Regexp) ( io.RuneReader) []int {
return .pad(.doExecute(, nil, "", 0, .prog.NumCap, nil))
}
const startSize = 10 // The size at which to start a slice in the 'All' routines.
// FindAll is the 'All' version of [Regexp.Find]; it returns a slice of all successive
// matches of the expression, as defined by the 'All' description in the
// package comment.
// A return value of nil indicates no match.
func ( *Regexp) ( []byte, int) [][]byte {
if < 0 {
= len() + 1
}
var [][]byte
.allMatches("", , , func( []int) {
if == nil {
= make([][]byte, 0, startSize)
}
= append(, [[0]:[1]:[1]])
})
return
}
// FindAllIndex is the 'All' version of [Regexp.FindIndex]; it returns a slice of all
// successive matches of the expression, as defined by the 'All' description
// in the package comment.
// A return value of nil indicates no match.
func ( *Regexp) ( []byte, int) [][]int {
if < 0 {
= len() + 1
}
var [][]int
.allMatches("", , , func( []int) {
if == nil {
= make([][]int, 0, startSize)
}
= append(, [0:2])
})
return
}
// FindAllString is the 'All' version of [Regexp.FindString]; it returns a slice of all
// successive matches of the expression, as defined by the 'All' description
// in the package comment.
// A return value of nil indicates no match.
func ( *Regexp) ( string, int) []string {
if < 0 {
= len() + 1
}
var []string
.allMatches(, nil, , func( []int) {
if == nil {
= make([]string, 0, startSize)
}
= append(, [[0]:[1]])
})
return
}
// FindAllStringIndex is the 'All' version of [Regexp.FindStringIndex]; it returns a
// slice of all successive matches of the expression, as defined by the 'All'
// description in the package comment.
// A return value of nil indicates no match.
func ( *Regexp) ( string, int) [][]int {
if < 0 {
= len() + 1
}
var [][]int
.allMatches(, nil, , func( []int) {
if == nil {
= make([][]int, 0, startSize)
}
= append(, [0:2])
})
return
}
// FindAllSubmatch is the 'All' version of [Regexp.FindSubmatch]; it returns a slice
// of all successive matches of the expression, as defined by the 'All'
// description in the package comment.
// A return value of nil indicates no match.
func ( *Regexp) ( []byte, int) [][][]byte {
if < 0 {
= len() + 1
}
var [][][]byte
.allMatches("", , , func( []int) {
if == nil {
= make([][][]byte, 0, startSize)
}
:= make([][]byte, len()/2)
for := range {
if [2*] >= 0 {
[] = [[2*]:[2*+1]:[2*+1]]
}
}
= append(, )
})
return
}
// FindAllSubmatchIndex is the 'All' version of [Regexp.FindSubmatchIndex]; it returns
// a slice of all successive matches of the expression, as defined by the
// 'All' description in the package comment.
// A return value of nil indicates no match.
func ( *Regexp) ( []byte, int) [][]int {
if < 0 {
= len() + 1
}
var [][]int
.allMatches("", , , func( []int) {
if == nil {
= make([][]int, 0, startSize)
}
= append(, )
})
return
}
// FindAllStringSubmatch is the 'All' version of [Regexp.FindStringSubmatch]; it
// returns a slice of all successive matches of the expression, as defined by
// the 'All' description in the package comment.
// A return value of nil indicates no match.
func ( *Regexp) ( string, int) [][]string {
if < 0 {
= len() + 1
}
var [][]string
.allMatches(, nil, , func( []int) {
if == nil {
= make([][]string, 0, startSize)
}
:= make([]string, len()/2)
for := range {
if [2*] >= 0 {
[] = [[2*]:[2*+1]]
}
}
= append(, )
})
return
}
// FindAllStringSubmatchIndex is the 'All' version of
// [Regexp.FindStringSubmatchIndex]; it returns a slice of all successive matches of
// the expression, as defined by the 'All' description in the package
// comment.
// A return value of nil indicates no match.
func ( *Regexp) ( string, int) [][]int {
if < 0 {
= len() + 1
}
var [][]int
.allMatches(, nil, , func( []int) {
if == nil {
= make([][]int, 0, startSize)
}
= append(, )
})
return
}
// Split slices s into substrings separated by the expression and returns a slice of
// the substrings between those expression matches.
//
// The slice returned by this method consists of all the substrings of s
// not contained in the slice returned by [Regexp.FindAllString]. When called on an expression
// that contains no metacharacters, it is equivalent to [strings.SplitN].
//
// Example:
//
// s := regexp.MustCompile("a*").Split("abaabaccadaaae", 5)
// // s: ["", "b", "b", "c", "cadaaae"]
//
// The count determines the number of substrings to return:
// - n > 0: at most n substrings; the last substring will be the unsplit remainder;
// - n == 0: the result is nil (zero substrings);
// - n < 0: all substrings.
func ( *Regexp) ( string, int) []string {
if == 0 {
return nil
}
if len(.expr) > 0 && len() == 0 {
return []string{""}
}
:= .FindAllStringIndex(, )
:= make([]string, 0, len())
:= 0
:= 0
for , := range {
if > 0 && len() >= -1 {
break
}
= [0]
if [1] != 0 {
= append(, [:])
}
= [1]
}
if != len() {
= append(, [:])
}
return
}
// AppendText implements [encoding.TextAppender]. The output
// matches that of calling the [Regexp.String] method.
//
// Note that the output is lossy in some cases: This method does not indicate
// POSIX regular expressions (i.e. those compiled by calling [CompilePOSIX]), or
// those for which the [Regexp.Longest] method has been called.
func ( *Regexp) ( []byte) ([]byte, error) {
return append(, .String()...), nil
}
// MarshalText implements [encoding.TextMarshaler]. The output
// matches that of calling the [Regexp.AppendText] method.
//
// See [Regexp.AppendText] for more information.
func ( *Regexp) () ([]byte, error) {
return .AppendText(nil)
}
// UnmarshalText implements [encoding.TextUnmarshaler] by calling
// [Compile] on the encoded value.
func ( *Regexp) ( []byte) error {
, := Compile(string())
if != nil {
return
}
* = *
return nil
}
The pages are generated with Golds v0.7.6. (GOOS=linux GOARCH=amd64)