Source File
difflib.go
Belonging Package
gotest.tools/v3/internal/difflib
/*Package difflib is a partial port of Python difflib module.
Original source: https://github.com/pmezard/go-difflib
This file is trimmed to only the parts used by this repository.
*/
package difflib // import "gotest.tools/v3/internal/difflib"
func (, int) int {
if < {
return
}
return
}
func (, int) int {
if > {
return
}
return
}
// Match stores line numbers of size of match
type Match struct {
A int
B int
Size int
}
// OpCode identifies the type of diff
type OpCode struct {
Tag byte
I1 int
I2 int
J1 int
J2 int
}
// SequenceMatcher compares sequence of strings. The basic
// algorithm predates, and is a little fancier than, an algorithm
// published in the late 1980's by Ratcliff and Obershelp under the
// hyperbolic name "gestalt pattern matching". The basic idea is to find
// the longest contiguous matching subsequence that contains no "junk"
// elements (R-O doesn't address junk). The same idea is then applied
// recursively to the pieces of the sequences to the left and to the right
// of the matching subsequence. This does not yield minimal edit
// sequences, but does tend to yield matches that "look right" to people.
//
// SequenceMatcher tries to compute a "human-friendly diff" between two
// sequences. Unlike e.g. UNIX(tm) diff, the fundamental notion is the
// longest *contiguous* & junk-free matching subsequence. That's what
// catches peoples' eyes. The Windows(tm) windiff has another interesting
// notion, pairing up elements that appear uniquely in each sequence.
// That, and the method here, appear to yield more intuitive difference
// reports than does diff. This method appears to be the least vulnerable
// to synching up on blocks of "junk lines", though (like blank lines in
// ordinary text files, or maybe "<P>" lines in HTML files). That may be
// because this is the only method of the 3 that has a *concept* of
// "junk" <wink>.
//
// Timing: Basic R-O is cubic time worst case and quadratic time expected
// case. SequenceMatcher is quadratic time for the worst case and has
// expected-case behavior dependent in a complicated way on how many
// elements the sequences have in common; best case time is linear.
type SequenceMatcher struct {
a []string
b []string
b2j map[string][]int
IsJunk func(string) bool
autoJunk bool
bJunk map[string]struct{}
matchingBlocks []Match
fullBCount map[string]int
bPopular map[string]struct{}
opCodes []OpCode
}
// NewMatcher returns a new SequenceMatcher
func (, []string) *SequenceMatcher {
:= SequenceMatcher{autoJunk: true}
.SetSeqs(, )
return &
}
// SetSeqs sets two sequences to be compared.
func ( *SequenceMatcher) (, []string) {
.SetSeq1()
.SetSeq2()
}
// SetSeq1 sets the first sequence to be compared. The second sequence to be compared is
// not changed.
//
// SequenceMatcher computes and caches detailed information about the second
// sequence, so if you want to compare one sequence S against many sequences,
// use .SetSeq2(s) once and call .SetSeq1(x) repeatedly for each of the other
// sequences.
//
// See also SetSeqs() and SetSeq2().
func ( *SequenceMatcher) ( []string) {
if & == &.a {
return
}
.a =
.matchingBlocks = nil
.opCodes = nil
}
// SetSeq2 sets the second sequence to be compared. The first sequence to be compared is
// not changed.
func ( *SequenceMatcher) ( []string) {
if & == &.b {
return
}
.b =
.matchingBlocks = nil
.opCodes = nil
.fullBCount = nil
.chainB()
}
func ( *SequenceMatcher) () {
// Populate line -> index mapping
:= map[string][]int{}
for , := range .b {
:= []
= append(, )
[] =
}
// Purge junk elements
.bJunk = map[string]struct{}{}
if .IsJunk != nil {
:= .bJunk
for := range {
if .IsJunk() {
[] = struct{}{}
}
}
for := range {
delete(, )
}
}
// Purge remaining popular elements
:= map[string]struct{}{}
:= len(.b)
if .autoJunk && >= 200 {
:= /100 + 1
for , := range {
if len() > {
[] = struct{}{}
}
}
for := range {
delete(, )
}
}
.bPopular =
.b2j =
}
func ( *SequenceMatcher) ( string) bool {
, := .bJunk[]
return
}
// Find longest matching block in a[alo:ahi] and b[blo:bhi].
//
// If IsJunk is not defined:
//
// Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where
// alo <= i <= i+k <= ahi
// blo <= j <= j+k <= bhi
// and for all (i',j',k') meeting those conditions,
// k >= k'
// i <= i'
// and if i == i', j <= j'
//
// In other words, of all maximal matching blocks, return one that
// starts earliest in a, and of all those maximal matching blocks that
// start earliest in a, return the one that starts earliest in b.
//
// If IsJunk is defined, first the longest matching block is
// determined as above, but with the additional restriction that no
// junk element appears in the block. Then that block is extended as
// far as possible by matching (only) junk elements on both sides. So
// the resulting block never matches on junk except as identical junk
// happens to be adjacent to an "interesting" match.
//
// If no blocks match, return (alo, blo, 0).
func ( *SequenceMatcher) (, , , int) Match {
// CAUTION: stripping common prefix or suffix would be incorrect.
// E.g.,
// ab
// acab
// Longest matching block is "ab", but if common prefix is
// stripped, it's "a" (tied with "b"). UNIX(tm) diff does so
// strip, so ends up claiming that ab is changed to acab by
// inserting "ca" in the middle. That's minimal but unintuitive:
// "it's obvious" that someone inserted "ac" at the front.
// Windiff ends up at the same place as diff, but by pairing up
// the unique 'b's and then matching the first two 'a's.
, , := , , 0
// find longest junk-free match
// during an iteration of the loop, j2len[j] = length of longest
// junk-free match ending with a[i-1] and b[j]
:= map[int]int{}
for := ; != ; ++ {
// look at all instances of a[i] in b; note that because
// b2j has no junk keys, the loop is skipped if a[i] is junk
:= map[int]int{}
for , := range .b2j[.a[]] {
// a[i] matches b[j]
if < {
continue
}
if >= {
break
}
:= [-1] + 1
[] =
if > {
, , = -+1, -+1,
}
}
=
}
// Extend the best by non-junk elements on each end. In particular,
// "popular" non-junk elements aren't in b2j, which greatly speeds
// the inner loop above, but also means "the best" match so far
// doesn't contain any junk *or* popular non-junk elements.
for > && > && !.isBJunk(.b[-1]) &&
.a[-1] == .b[-1] {
, , = -1, -1, +1
}
for + < && + < &&
!.isBJunk(.b[+]) &&
.a[+] == .b[+] {
+= 1
}
// Now that we have a wholly interesting match (albeit possibly
// empty!), we may as well suck up the matching junk on each
// side of it too. Can't think of a good reason not to, and it
// saves post-processing the (possibly considerable) expense of
// figuring out what to do with it. In the case of an empty
// interesting match, this is clearly the right thing to do,
// because no other kind of match is possible in the regions.
for > && > && .isBJunk(.b[-1]) &&
.a[-1] == .b[-1] {
, , = -1, -1, +1
}
for + < && + < &&
.isBJunk(.b[+]) &&
.a[+] == .b[+] {
+= 1
}
return Match{A: , B: , Size: }
}
// GetMatchingBlocks returns a list of triples describing matching subsequences.
//
// Each triple is of the form (i, j, n), and means that
// a[i:i+n] == b[j:j+n]. The triples are monotonically increasing in
// i and in j. It's also guaranteed that if (i, j, n) and (i', j', n') are
// adjacent triples in the list, and the second is not the last triple in the
// list, then i+n != i' or j+n != j'. IOW, adjacent triples never describe
// adjacent equal blocks.
//
// The last triple is a dummy, (len(a), len(b), 0), and is the only
// triple with n==0.
func ( *SequenceMatcher) () []Match {
if .matchingBlocks != nil {
return .matchingBlocks
}
var func(, , , int, []Match) []Match
= func(, , , int, []Match) []Match {
:= .findLongestMatch(, , , )
, , := .A, .B, .Size
if .Size > 0 {
if < && < {
= (, , , , )
}
= append(, )
if + < && + < {
= (+, , +, , )
}
}
return
}
:= (0, len(.a), 0, len(.b), nil)
// It's possible that we have adjacent equal blocks in the
// matching_blocks list now.
:= []Match{}
, , := 0, 0, 0
for , := range {
// Is this block adjacent to i1, j1, k1?
, , := .A, .B, .Size
if + == && + == {
// Yes, so collapse them -- this just increases the length of
// the first block by the length of the second, and the first
// block so lengthened remains the block to compare against.
+=
} else {
// Not adjacent. Remember the first block (k1==0 means it's
// the dummy we started with), and make the second block the
// new block to compare against.
if > 0 {
= append(, Match{, , })
}
, , = , ,
}
}
if > 0 {
= append(, Match{, , })
}
= append(, Match{len(.a), len(.b), 0})
.matchingBlocks =
return .matchingBlocks
}
// GetOpCodes returns a list of 5-tuples describing how to turn a into b.
//
// Each tuple is of the form (tag, i1, i2, j1, j2). The first tuple
// has i1 == j1 == 0, and remaining tuples have i1 == the i2 from the
// tuple preceding it, and likewise for j1 == the previous j2.
//
// The tags are characters, with these meanings:
//
// 'r' (replace): a[i1:i2] should be replaced by b[j1:j2]
//
// 'd' (delete): a[i1:i2] should be deleted, j1==j2 in this case.
//
// 'i' (insert): b[j1:j2] should be inserted at a[i1:i1], i1==i2 in this case.
//
// 'e' (equal): a[i1:i2] == b[j1:j2]
func ( *SequenceMatcher) () []OpCode {
if .opCodes != nil {
return .opCodes
}
, := 0, 0
:= .GetMatchingBlocks()
:= make([]OpCode, 0, len())
for , := range {
// invariant: we've pumped out correct diffs to change
// a[:i] into b[:j], and the next matching block is
// a[ai:ai+size] == b[bj:bj+size]. So we need to pump
// out a diff to change a[i:ai] into b[j:bj], pump out
// the matching block, and move (i,j) beyond the match
, , := .A, .B, .Size
:= byte(0)
if < && < {
= 'r'
} else if < {
= 'd'
} else if < {
= 'i'
}
if > 0 {
= append(, OpCode{, , , , })
}
, = +, +
// the list of matching blocks is terminated by a
// sentinel with size 0
if > 0 {
= append(, OpCode{'e', , , , })
}
}
.opCodes =
return .opCodes
}
// GetGroupedOpCodes isolates change clusters by eliminating ranges with no changes.
//
// Return a generator of groups with up to n lines of context.
// Each group is in the same format as returned by GetOpCodes().
func ( *SequenceMatcher) ( int) [][]OpCode {
if < 0 {
= 3
}
:= .GetOpCodes()
if len() == 0 {
= []OpCode{{'e', 0, 1, 0, 1}}
}
// Fixup leading and trailing groups if they show no changes.
if [0].Tag == 'e' {
:= [0]
, , , := .I1, .I2, .J1, .J2
[0] = OpCode{.Tag, max(, -), , max(, -), }
}
if [len()-1].Tag == 'e' {
:= [len()-1]
, , , := .I1, .I2, .J1, .J2
[len()-1] = OpCode{.Tag, , min(, +), , min(, +)}
}
:= +
:= [][]OpCode{}
:= []OpCode{}
for , := range {
, , , := .I1, .I2, .J1, .J2
// End the current group and start a new one whenever
// there is a large range with no changes.
if .Tag == 'e' && - > {
= append(, OpCode{.Tag, , min(, +),
, min(, +)})
= append(, )
= []OpCode{}
, = max(, -), max(, -)
}
= append(, OpCode{.Tag, , , , })
}
if len() > 0 && !(len() == 1 && [0].Tag == 'e') {
= append(, )
}
return
}
The pages are generated with Golds v0.4.9. (GOOS=linux GOARCH=amd64)