Source File
diff.go
Belonging Package
github.com/google/go-cmp/cmp/internal/diff
// Copyright 2017, The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.// Package diff implements an algorithm for producing edit-scripts.// The edit-script is a sequence of operations needed to transform one list// of symbols into another (or vice-versa). The edits allowed are insertions,// deletions, and modifications. The summation of all edits is called the// Levenshtein distance as this problem is well-known in computer science.//// This package prioritizes performance over accuracy. That is, the run time// is more important than obtaining a minimal Levenshtein distance.package diffimport ()// EditType represents a single operation within an edit-script.type EditType uint8const (// Identity indicates that a symbol pair is identical in both list X and Y.Identity EditType = iota// UniqueX indicates that a symbol only exists in X and not Y.UniqueX// UniqueY indicates that a symbol only exists in Y and not X.UniqueY// Modified indicates that a symbol pair is a modification of each other.Modified)// EditScript represents the series of differences between two lists.type EditScript []EditType// String returns a human-readable string representing the edit-script where// Identity, UniqueX, UniqueY, and Modified are represented by the// '.', 'X', 'Y', and 'M' characters, respectively.func ( EditScript) () string {:= make([]byte, len())for , := range {switch {case Identity:[] = '.'case UniqueX:[] = 'X'case UniqueY:[] = 'Y'case Modified:[] = 'M'default:panic("invalid edit-type")}}return string()}// stats returns a histogram of the number of each type of edit operation.func ( EditScript) () ( struct{ , , , int }) {for , := range {switch {case Identity:.++case UniqueX:.++case UniqueY:.++case Modified:.++default:panic("invalid edit-type")}}return}// Dist is the Levenshtein distance and is guaranteed to be 0 if and only if// lists X and Y are equal.func ( EditScript) () int { return len() - .stats().NI }// LenX is the length of the X list.func ( EditScript) () int { return len() - .stats().NY }// LenY is the length of the Y list.func ( EditScript) () int { return len() - .stats().NX }// EqualFunc reports whether the symbols at indexes ix and iy are equal.// When called by Difference, the index is guaranteed to be within nx and ny.type EqualFunc func(ix int, iy int) Result// Result is the result of comparison.// NumSame is the number of sub-elements that are equal.// NumDiff is the number of sub-elements that are not equal.type Result struct{ NumSame, NumDiff int }// BoolResult returns a Result that is either Equal or not Equal.func ( bool) Result {if {return Result{NumSame: 1} // Equal, Similar} else {return Result{NumDiff: 2} // Not Equal, not Similar}}// Equal indicates whether the symbols are equal. Two symbols are equal// if and only if NumDiff == 0. If Equal, then they are also Similar.func ( Result) () bool { return .NumDiff == 0 }// Similar indicates whether two symbols are similar and may be represented// by using the Modified type. As a special case, we consider binary comparisons// (i.e., those that return Result{1, 0} or Result{0, 1}) to be similar.//// The exact ratio of NumSame to NumDiff to determine similarity may change.func ( Result) () bool {// Use NumSame+1 to offset NumSame so that binary comparisons are similar.return .NumSame+1 >= .NumDiff}var randBool = rand.New(rand.NewSource(time.Now().Unix())).Intn(2) == 0// Difference reports whether two lists of lengths nx and ny are equal// given the definition of equality provided as f.//// This function returns an edit-script, which is a sequence of operations// needed to convert one list into the other. The following invariants for// the edit-script are maintained:// - eq == (es.Dist()==0)// - nx == es.LenX()// - ny == es.LenY()//// This algorithm is not guaranteed to be an optimal solution (i.e., one that// produces an edit-script with a minimal Levenshtein distance). This algorithm// favors performance over optimality. The exact output is not guaranteed to// be stable and may change over time.func (, int, EqualFunc) ( EditScript) {// This algorithm is based on traversing what is known as an "edit-graph".// See Figure 1 from "An O(ND) Difference Algorithm and Its Variations"// by Eugene W. Myers. Since D can be as large as N itself, this is// effectively O(N^2). Unlike the algorithm from that paper, we are not// interested in the optimal path, but at least some "decent" path.//// For example, let X and Y be lists of symbols:// X = [A B C A B B A]// Y = [C B A B A C]//// The edit-graph can be drawn as the following:// A B C A B B A// ┌─────────────┐// C │_|_|\|_|_|_|_│ 0// B │_|\|_|_|\|\|_│ 1// A │\|_|_|\|_|_|\│ 2// B │_|\|_|_|\|\|_│ 3// A │\|_|_|\|_|_|\│ 4// C │ | |\| | | | │ 5// └─────────────┘ 6// 0 1 2 3 4 5 6 7//// List X is written along the horizontal axis, while list Y is written// along the vertical axis. At any point on this grid, if the symbol in// list X matches the corresponding symbol in list Y, then a '\' is drawn.// The goal of any minimal edit-script algorithm is to find a path from the// top-left corner to the bottom-right corner, while traveling through the// fewest horizontal or vertical edges.// A horizontal edge is equivalent to inserting a symbol from list X.// A vertical edge is equivalent to inserting a symbol from list Y.// A diagonal edge is equivalent to a matching symbol between both X and Y.// Invariants:// - 0 ≤ fwdPath.X ≤ (fwdFrontier.X, revFrontier.X) ≤ revPath.X ≤ nx// - 0 ≤ fwdPath.Y ≤ (fwdFrontier.Y, revFrontier.Y) ≤ revPath.Y ≤ ny//// In general:// - fwdFrontier.X < revFrontier.X// - fwdFrontier.Y < revFrontier.Y//// Unless, it is time for the algorithm to terminate.:= path{+1, point{0, 0}, make(EditScript, 0, (+)/2)}:= path{-1, point{, }, make(EditScript, 0)}:= .point // Forward search frontier:= .point // Reverse search frontier// Search budget bounds the cost of searching for better paths.// The longest sequence of non-matching symbols that can be tolerated is// approximately the square-root of the search budget.:= 4 * ( + ) // O(n)// Running the tests with the "cmp_debug" build tag prints a visualization// of the algorithm running in real-time. This is educational for// understanding how the algorithm works. See debug_enable.go.= debug.Begin(, , , &.es, &.es)// The algorithm below is a greedy, meet-in-the-middle algorithm for// computing sub-optimal edit-scripts between two lists.//// The algorithm is approximately as follows:// - Searching for differences switches back-and-forth between// a search that starts at the beginning (the top-left corner), and// a search that starts at the end (the bottom-right corner).// The goal of the search is connect with the search// from the opposite corner.// - As we search, we build a path in a greedy manner,// where the first match seen is added to the path (this is sub-optimal,// but provides a decent result in practice). When matches are found,// we try the next pair of symbols in the lists and follow all matches// as far as possible.// - When searching for matches, we search along a diagonal going through// through the "frontier" point. If no matches are found,// we advance the frontier towards the opposite corner.// - This algorithm terminates when either the X coordinates or the// Y coordinates of the forward and reverse frontier points ever intersect.// This algorithm is correct even if searching only in the forward direction// or in the reverse direction. We do both because it is commonly observed// that two lists commonly differ because elements were added to the front// or end of the other list.//// Non-deterministically start with either the forward or reverse direction// to introduce some deliberate instability so that we have the flexibility// to change this algorithm in the future.if flags.Deterministic || randBool {goto} else {goto}:{// Forward search from the beginning.if .X >= .X || .Y >= .Y || == 0 {goto}for , , := false, false, 0; !( && ) && > 0; ++ {// Search in a diagonal pattern for a match.:= zigzag():= point{.X + , .Y - }switch {case .X >= .X || .Y < .Y:= true // Hit top-right cornercase .Y >= .Y || .X < .X:= true // Hit bottom-left cornercase (.X, .Y).Equal():// Match found, so connect the path to this point..connect(, ).append(Identity)// Follow sequence of matches as far as possible.for .X < .X && .Y < .Y {if !(.X, .Y).Equal() {break}.append(Identity)}= .point, = true, truedefault:-- // Match not found}debug.Update()}// Advance the frontier towards reverse point.if .X-.X >= .Y-.Y {.X++} else {.Y++}goto}:{// Reverse search from the end.if .X >= .X || .Y >= .Y || == 0 {goto}for , , := false, false, 0; !( && ) && > 0; ++ {// Search in a diagonal pattern for a match.:= zigzag():= point{.X - , .Y + }switch {case .X >= .X || .Y < .Y:= true // Hit bottom-left cornercase .Y >= .Y || .X < .X:= true // Hit top-right cornercase (.X-1, .Y-1).Equal():// Match found, so connect the path to this point..connect(, ).append(Identity)// Follow sequence of matches as far as possible.for .X < .X && .Y < .Y {if !(.X-1, .Y-1).Equal() {break}.append(Identity)}= .point, = true, truedefault:-- // Match not found}debug.Update()}// Advance the frontier towards forward point.if .X-.X >= .Y-.Y {.X--} else {.Y--}goto}:// Join the forward and reverse paths and then append the reverse path..connect(.point, )for := len(.es) - 1; >= 0; -- {:= .es[].es = .es[:].append()}debug.Finish()return .es}type path struct {dir int // +1 if forward, -1 if reversepoint // Leading point of the EditScript pathes EditScript}// connect appends any necessary Identity, Modified, UniqueX, or UniqueY types// to the edit-script to connect p.point to dst.func ( *path) ( point, EqualFunc) {if .dir > 0 {// Connect in forward direction.for .X > .X && .Y > .Y {switch := (.X, .Y); {case .Equal():.append(Identity)case .Similar():.append(Modified)case .X-.X >= .Y-.Y:.append(UniqueX)default:.append(UniqueY)}}for .X > .X {.append(UniqueX)}for .Y > .Y {.append(UniqueY)}} else {// Connect in reverse direction.for .X > .X && .Y > .Y {switch := (.X-1, .Y-1); {case .Equal():.append(Identity)case .Similar():.append(Modified)case .Y-.Y >= .X-.X:.append(UniqueY)default:.append(UniqueX)}}for .X > .X {.append(UniqueX)}for .Y > .Y {.append(UniqueY)}}}func ( *path) ( EditType) {.es = append(.es, )switch {case Identity, Modified:.add(.dir, .dir)case UniqueX:.add(.dir, 0)case UniqueY:.add(0, .dir)}debug.Update()}type point struct{ X, Y int }func ( *point) (, int) { .X += ; .Y += }// zigzag maps a consecutive sequence of integers to a zig-zag sequence.//// [0 1 2 3 4 5 ...] => [0 -1 +1 -2 +2 ...]func ( int) int {if &1 != 0 {= ^}return >> 1}
The pages are generated with Golds v0.7.6. (GOOS=linux GOARCH=amd64)