Source File
difflib.go
Belonging Package
gotest.tools/v3/internal/difflib
/*Package difflib is a partial port of Python difflib module.Original source: https://github.com/pmezard/go-difflibThis file is trimmed to only the parts used by this repository.*/package difflib // import "gotest.tools/v3/internal/difflib"func (, int) int {if < {return}return}func (, int) int {if > {return}return}// Match stores line numbers of size of matchtype Match struct {A intB intSize int}// OpCode identifies the type of difftype OpCode struct {Tag byteI1 intI2 intJ1 intJ2 int}// SequenceMatcher compares sequence of strings. The basic// algorithm predates, and is a little fancier than, an algorithm// published in the late 1980's by Ratcliff and Obershelp under the// hyperbolic name "gestalt pattern matching". The basic idea is to find// the longest contiguous matching subsequence that contains no "junk"// elements (R-O doesn't address junk). The same idea is then applied// recursively to the pieces of the sequences to the left and to the right// of the matching subsequence. This does not yield minimal edit// sequences, but does tend to yield matches that "look right" to people.//// SequenceMatcher tries to compute a "human-friendly diff" between two// sequences. Unlike e.g. UNIX(tm) diff, the fundamental notion is the// longest *contiguous* & junk-free matching subsequence. That's what// catches peoples' eyes. The Windows(tm) windiff has another interesting// notion, pairing up elements that appear uniquely in each sequence.// That, and the method here, appear to yield more intuitive difference// reports than does diff. This method appears to be the least vulnerable// to synching up on blocks of "junk lines", though (like blank lines in// ordinary text files, or maybe "<P>" lines in HTML files). That may be// because this is the only method of the 3 that has a *concept* of// "junk" <wink>.//// Timing: Basic R-O is cubic time worst case and quadratic time expected// case. SequenceMatcher is quadratic time for the worst case and has// expected-case behavior dependent in a complicated way on how many// elements the sequences have in common; best case time is linear.type SequenceMatcher struct {a []stringb []stringb2j map[string][]intIsJunk func(string) boolautoJunk boolbJunk map[string]struct{}matchingBlocks []MatchfullBCount map[string]intbPopular map[string]struct{}opCodes []OpCode}// NewMatcher returns a new SequenceMatcherfunc (, []string) *SequenceMatcher {:= SequenceMatcher{autoJunk: true}.SetSeqs(, )return &}// SetSeqs sets two sequences to be compared.func ( *SequenceMatcher) (, []string) {.SetSeq1().SetSeq2()}// SetSeq1 sets the first sequence to be compared. The second sequence to be compared is// not changed.//// SequenceMatcher computes and caches detailed information about the second// sequence, so if you want to compare one sequence S against many sequences,// use .SetSeq2(s) once and call .SetSeq1(x) repeatedly for each of the other// sequences.//// See also SetSeqs() and SetSeq2().func ( *SequenceMatcher) ( []string) {if & == &.a {return}.a =.matchingBlocks = nil.opCodes = nil}// SetSeq2 sets the second sequence to be compared. The first sequence to be compared is// not changed.func ( *SequenceMatcher) ( []string) {if & == &.b {return}.b =.matchingBlocks = nil.opCodes = nil.fullBCount = nil.chainB()}func ( *SequenceMatcher) () {// Populate line -> index mapping:= map[string][]int{}for , := range .b {:= []= append(, )[] =}// Purge junk elements.bJunk = map[string]struct{}{}if .IsJunk != nil {:= .bJunkfor := range {if .IsJunk() {[] = struct{}{}}}for := range {delete(, )}}// Purge remaining popular elements:= map[string]struct{}{}:= len(.b)if .autoJunk && >= 200 {:= /100 + 1for , := range {if len() > {[] = struct{}{}}}for := range {delete(, )}}.bPopular =.b2j =}func ( *SequenceMatcher) ( string) bool {, := .bJunk[]return}// Find longest matching block in a[alo:ahi] and b[blo:bhi].//// If IsJunk is not defined://// Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where//// alo <= i <= i+k <= ahi// blo <= j <= j+k <= bhi//// and for all (i',j',k') meeting those conditions,//// k >= k'// i <= i'// and if i == i', j <= j'//// In other words, of all maximal matching blocks, return one that// starts earliest in a, and of all those maximal matching blocks that// start earliest in a, return the one that starts earliest in b.//// If IsJunk is defined, first the longest matching block is// determined as above, but with the additional restriction that no// junk element appears in the block. Then that block is extended as// far as possible by matching (only) junk elements on both sides. So// the resulting block never matches on junk except as identical junk// happens to be adjacent to an "interesting" match.//// If no blocks match, return (alo, blo, 0).func ( *SequenceMatcher) (, , , int) Match {// CAUTION: stripping common prefix or suffix would be incorrect.// E.g.,// ab// acab// Longest matching block is "ab", but if common prefix is// stripped, it's "a" (tied with "b"). UNIX(tm) diff does so// strip, so ends up claiming that ab is changed to acab by// inserting "ca" in the middle. That's minimal but unintuitive:// "it's obvious" that someone inserted "ac" at the front.// Windiff ends up at the same place as diff, but by pairing up// the unique 'b's and then matching the first two 'a's., , := , , 0// find longest junk-free match// during an iteration of the loop, j2len[j] = length of longest// junk-free match ending with a[i-1] and b[j]:= map[int]int{}for := ; != ; ++ {// look at all instances of a[i] in b; note that because// b2j has no junk keys, the loop is skipped if a[i] is junk:= map[int]int{}for , := range .b2j[.a[]] {// a[i] matches b[j]if < {continue}if >= {break}:= [-1] + 1[] =if > {, , = -+1, -+1,}}=}// Extend the best by non-junk elements on each end. In particular,// "popular" non-junk elements aren't in b2j, which greatly speeds// the inner loop above, but also means "the best" match so far// doesn't contain any junk *or* popular non-junk elements.for > && > && !.isBJunk(.b[-1]) &&.a[-1] == .b[-1] {, , = -1, -1, +1}for + < && + < &&!.isBJunk(.b[+]) &&.a[+] == .b[+] {+= 1}// Now that we have a wholly interesting match (albeit possibly// empty!), we may as well suck up the matching junk on each// side of it too. Can't think of a good reason not to, and it// saves post-processing the (possibly considerable) expense of// figuring out what to do with it. In the case of an empty// interesting match, this is clearly the right thing to do,// because no other kind of match is possible in the regions.for > && > && .isBJunk(.b[-1]) &&.a[-1] == .b[-1] {, , = -1, -1, +1}for + < && + < &&.isBJunk(.b[+]) &&.a[+] == .b[+] {+= 1}return Match{A: , B: , Size: }}// GetMatchingBlocks returns a list of triples describing matching subsequences.//// Each triple is of the form (i, j, n), and means that// a[i:i+n] == b[j:j+n]. The triples are monotonically increasing in// i and in j. It's also guaranteed that if (i, j, n) and (i', j', n') are// adjacent triples in the list, and the second is not the last triple in the// list, then i+n != i' or j+n != j'. IOW, adjacent triples never describe// adjacent equal blocks.//// The last triple is a dummy, (len(a), len(b), 0), and is the only// triple with n==0.func ( *SequenceMatcher) () []Match {if .matchingBlocks != nil {return .matchingBlocks}var func(, , , int, []Match) []Match= func(, , , int, []Match) []Match {:= .findLongestMatch(, , , ), , := .A, .B, .Sizeif .Size > 0 {if < && < {= (, , , , )}= append(, )if + < && + < {= (+, , +, , )}}return}:= (0, len(.a), 0, len(.b), nil)// It's possible that we have adjacent equal blocks in the// matching_blocks list now.:= []Match{}, , := 0, 0, 0for , := range {// Is this block adjacent to i1, j1, k1?, , := .A, .B, .Sizeif + == && + == {// Yes, so collapse them -- this just increases the length of// the first block by the length of the second, and the first// block so lengthened remains the block to compare against.+=} else {// Not adjacent. Remember the first block (k1==0 means it's// the dummy we started with), and make the second block the// new block to compare against.if > 0 {= append(, Match{, , })}, , = , ,}}if > 0 {= append(, Match{, , })}= append(, Match{len(.a), len(.b), 0}).matchingBlocks =return .matchingBlocks}// GetOpCodes returns a list of 5-tuples describing how to turn a into b.//// Each tuple is of the form (tag, i1, i2, j1, j2). The first tuple// has i1 == j1 == 0, and remaining tuples have i1 == the i2 from the// tuple preceding it, and likewise for j1 == the previous j2.//// The tags are characters, with these meanings://// 'r' (replace): a[i1:i2] should be replaced by b[j1:j2]//// 'd' (delete): a[i1:i2] should be deleted, j1==j2 in this case.//// 'i' (insert): b[j1:j2] should be inserted at a[i1:i1], i1==i2 in this case.//// 'e' (equal): a[i1:i2] == b[j1:j2]func ( *SequenceMatcher) () []OpCode {if .opCodes != nil {return .opCodes}, := 0, 0:= .GetMatchingBlocks():= make([]OpCode, 0, len())for , := range {// invariant: we've pumped out correct diffs to change// a[:i] into b[:j], and the next matching block is// a[ai:ai+size] == b[bj:bj+size]. So we need to pump// out a diff to change a[i:ai] into b[j:bj], pump out// the matching block, and move (i,j) beyond the match, , := .A, .B, .Size:= byte(0)if < && < {= 'r'} else if < {= 'd'} else if < {= 'i'}if > 0 {= append(, OpCode{, , , , })}, = +, +// the list of matching blocks is terminated by a// sentinel with size 0if > 0 {= append(, OpCode{'e', , , , })}}.opCodes =return .opCodes}// GetGroupedOpCodes isolates change clusters by eliminating ranges with no changes.//// Return a generator of groups with up to n lines of context.// Each group is in the same format as returned by GetOpCodes().func ( *SequenceMatcher) ( int) [][]OpCode {if < 0 {= 3}:= .GetOpCodes()if len() == 0 {= []OpCode{{'e', 0, 1, 0, 1}}}// Fixup leading and trailing groups if they show no changes.if [0].Tag == 'e' {:= [0], , , := .I1, .I2, .J1, .J2[0] = OpCode{.Tag, maxInt(, -), , maxInt(, -), }}if [len()-1].Tag == 'e' {:= [len()-1], , , := .I1, .I2, .J1, .J2[len()-1] = OpCode{.Tag, , minInt(, +), , minInt(, +)}}:= +:= [][]OpCode{}:= []OpCode{}for , := range {, , , := .I1, .I2, .J1, .J2// End the current group and start a new one whenever// there is a large range with no changes.if .Tag == 'e' && - > {= append(, OpCode{.Tag, , minInt(, +),, minInt(, +)})= append(, )= []OpCode{}, = maxInt(, -), maxInt(, -)}= append(, OpCode{.Tag, , , , })}if len() > 0 && !(len() == 1 && [0].Tag == 'e') {= append(, )}return}
The pages are generated with Golds v0.7.6. (GOOS=linux GOARCH=amd64)