cmd/jsonimports: add static analyzer for consistent "json" imports (#17669)
This migrates an internal tool to open source so that we can run it on the tailscale.com module as well. We add the "util/safediff" also as a dependency of the tool. This PR does not yet set up a CI to run this analyzer. Updates tailscale/corp#791 Signed-off-by: Joe Tsai <joetsai@digital-static.net>main
parent
09a2a1048d
commit
fcb614a53e
@ -0,0 +1,175 @@ |
||||
// Copyright (c) Tailscale Inc & AUTHORS
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
package main |
||||
|
||||
import ( |
||||
"bytes" |
||||
"go/ast" |
||||
"go/format" |
||||
"go/parser" |
||||
"go/token" |
||||
"go/types" |
||||
"path" |
||||
"slices" |
||||
"strconv" |
||||
"strings" |
||||
|
||||
"tailscale.com/util/must" |
||||
) |
||||
|
||||
// mustFormatFile formats a Go source file and adjust "json" imports.
|
||||
// It panics if there are any parsing errors.
|
||||
//
|
||||
// - "encoding/json" is imported under the name "jsonv1" or "jsonv1std"
|
||||
// - "encoding/json/v2" is rewritten to import "github.com/go-json-experiment/json" instead
|
||||
// - "encoding/json/jsontext" is rewritten to import "github.com/go-json-experiment/json/jsontext" instead
|
||||
// - "github.com/go-json-experiment/json" is imported under the name "jsonv2"
|
||||
// - "github.com/go-json-experiment/json/v1" is imported under the name "jsonv1"
|
||||
//
|
||||
// If no changes to the file is made, it returns input.
|
||||
func mustFormatFile(in []byte) (out []byte) { |
||||
fset := token.NewFileSet() |
||||
f := must.Get(parser.ParseFile(fset, "", in, parser.ParseComments)) |
||||
|
||||
// Check for the existence of "json" imports.
|
||||
jsonImports := make(map[string][]*ast.ImportSpec) |
||||
for _, imp := range f.Imports { |
||||
switch pkgPath := must.Get(strconv.Unquote(imp.Path.Value)); pkgPath { |
||||
case |
||||
"encoding/json", |
||||
"encoding/json/v2", |
||||
"encoding/json/jsontext", |
||||
"github.com/go-json-experiment/json", |
||||
"github.com/go-json-experiment/json/v1", |
||||
"github.com/go-json-experiment/json/jsontext": |
||||
jsonImports[pkgPath] = append(jsonImports[pkgPath], imp) |
||||
} |
||||
} |
||||
if len(jsonImports) == 0 { |
||||
return in |
||||
} |
||||
|
||||
// Best-effort local type-check of the file
|
||||
// to resolve local declarations to detect shadowed variables.
|
||||
typeInfo := &types.Info{Uses: make(map[*ast.Ident]types.Object)} |
||||
(&types.Config{ |
||||
Error: func(err error) {}, |
||||
}).Check("", fset, []*ast.File{f}, typeInfo) |
||||
|
||||
// Rewrite imports to instead use "github.com/go-json-experiment/json".
|
||||
// This ensures that code continues to build even if
|
||||
// goexperiment.jsonv2 is *not* specified.
|
||||
// As of https://github.com/go-json-experiment/json/pull/186,
|
||||
// imports to "github.com/go-json-experiment/json" are identical
|
||||
// to the standard library if built with goexperiment.jsonv2.
|
||||
for fromPath, toPath := range map[string]string{ |
||||
"encoding/json/v2": "github.com/go-json-experiment/json", |
||||
"encoding/json/jsontext": "github.com/go-json-experiment/json/jsontext", |
||||
} { |
||||
for _, imp := range jsonImports[fromPath] { |
||||
imp.Path.Value = strconv.Quote(toPath) |
||||
jsonImports[toPath] = append(jsonImports[toPath], imp) |
||||
} |
||||
delete(jsonImports, fromPath) |
||||
} |
||||
|
||||
// While in a transitory state, where both v1 and v2 json imports
|
||||
// may exist in our codebase, always explicitly import with
|
||||
// either jsonv1 or jsonv2 in the package name to avoid ambiguities
|
||||
// when looking at a particular Marshal or Unmarshal call site.
|
||||
renames := make(map[string]string) // mapping of old names to new names
|
||||
deletes := make(map[*ast.ImportSpec]bool) // set of imports to delete
|
||||
for pkgPath, imps := range jsonImports { |
||||
var newName string |
||||
switch pkgPath { |
||||
case "encoding/json": |
||||
newName = "jsonv1" |
||||
// If "github.com/go-json-experiment/json/v1" is also imported,
|
||||
// then use jsonv1std for "encoding/json" to avoid a conflict.
|
||||
if len(jsonImports["github.com/go-json-experiment/json/v1"]) > 0 { |
||||
newName += "std" |
||||
} |
||||
case "github.com/go-json-experiment/json": |
||||
newName = "jsonv2" |
||||
case "github.com/go-json-experiment/json/v1": |
||||
newName = "jsonv1" |
||||
} |
||||
|
||||
// Rename the import if different than expected.
|
||||
if oldName := importName(imps[0]); oldName != newName && newName != "" { |
||||
renames[oldName] = newName |
||||
pos := imps[0].Pos() // preserve original positioning
|
||||
imps[0].Name = ast.NewIdent(newName) |
||||
imps[0].Name.NamePos = pos |
||||
} |
||||
|
||||
// For all redundant imports, use the first imported name.
|
||||
for _, imp := range imps[1:] { |
||||
renames[importName(imp)] = importName(imps[0]) |
||||
deletes[imp] = true |
||||
} |
||||
} |
||||
if len(deletes) > 0 { |
||||
f.Imports = slices.DeleteFunc(f.Imports, func(imp *ast.ImportSpec) bool { |
||||
return deletes[imp] |
||||
}) |
||||
for _, decl := range f.Decls { |
||||
if genDecl, ok := decl.(*ast.GenDecl); ok && genDecl.Tok == token.IMPORT { |
||||
genDecl.Specs = slices.DeleteFunc(genDecl.Specs, func(spec ast.Spec) bool { |
||||
return deletes[spec.(*ast.ImportSpec)] |
||||
}) |
||||
} |
||||
} |
||||
} |
||||
if len(renames) > 0 { |
||||
ast.Walk(astVisitor(func(n ast.Node) bool { |
||||
if sel, ok := n.(*ast.SelectorExpr); ok { |
||||
if id, ok := sel.X.(*ast.Ident); ok { |
||||
// Just because the selector looks like "json.Marshal"
|
||||
// does not mean that it is referencing the "json" package.
|
||||
// There could be a local "json" declaration that shadows
|
||||
// the package import. Check partial type information
|
||||
// to see if there was a local declaration.
|
||||
if obj, ok := typeInfo.Uses[id]; ok { |
||||
if _, ok := obj.(*types.PkgName); !ok { |
||||
return true |
||||
} |
||||
} |
||||
|
||||
if newName, ok := renames[id.String()]; ok { |
||||
id.Name = newName |
||||
} |
||||
} |
||||
} |
||||
return true |
||||
}), f) |
||||
} |
||||
|
||||
bb := new(bytes.Buffer) |
||||
must.Do(format.Node(bb, fset, f)) |
||||
return must.Get(format.Source(bb.Bytes())) |
||||
} |
||||
|
||||
// importName is the local package name used for an import.
|
||||
// If no explicit local name is used, then it uses string parsing
|
||||
// to derive the package name from the path, relying on the convention
|
||||
// that the package name is the base name of the package path.
|
||||
func importName(imp *ast.ImportSpec) string { |
||||
if imp.Name != nil { |
||||
return imp.Name.String() |
||||
} |
||||
pkgPath, _ := strconv.Unquote(imp.Path.Value) |
||||
pkgPath = strings.TrimRight(pkgPath, "/v0123456789") // exclude version directories
|
||||
return path.Base(pkgPath) |
||||
} |
||||
|
||||
// astVisitor is a function that implements [ast.Visitor].
|
||||
type astVisitor func(ast.Node) bool |
||||
|
||||
func (f astVisitor) Visit(node ast.Node) ast.Visitor { |
||||
if !f(node) { |
||||
return nil |
||||
} |
||||
return f |
||||
} |
||||
@ -0,0 +1,162 @@ |
||||
// Copyright (c) Tailscale Inc & AUTHORS
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
package main |
||||
|
||||
import ( |
||||
"go/format" |
||||
"testing" |
||||
|
||||
"tailscale.com/util/must" |
||||
"tailscale.com/util/safediff" |
||||
) |
||||
|
||||
func TestFormatFile(t *testing.T) { |
||||
tests := []struct{ in, want string }{{ |
||||
in: `package foobar |
||||
|
||||
import ( |
||||
"encoding/json" |
||||
jsonv2exp "github.com/go-json-experiment/json" |
||||
) |
||||
|
||||
func main() { |
||||
json.Marshal() |
||||
jsonv2exp.Marshal() |
||||
{ |
||||
var json T // deliberately shadow "json" package name
|
||||
json.Marshal() // should not be re-written
|
||||
} |
||||
} |
||||
`, |
||||
want: `package foobar |
||||
|
||||
import ( |
||||
jsonv1 "encoding/json" |
||||
jsonv2 "github.com/go-json-experiment/json" |
||||
) |
||||
|
||||
func main() { |
||||
jsonv1.Marshal() |
||||
jsonv2.Marshal() |
||||
{ |
||||
var json T // deliberately shadow "json" package name
|
||||
json.Marshal() // should not be re-written
|
||||
} |
||||
} |
||||
`, |
||||
}, { |
||||
in: `package foobar |
||||
|
||||
import ( |
||||
"github.com/go-json-experiment/json" |
||||
jsonv2exp "github.com/go-json-experiment/json" |
||||
) |
||||
|
||||
func main() { |
||||
json.Marshal() |
||||
jsonv2exp.Marshal() |
||||
} |
||||
`, |
||||
want: `package foobar |
||||
import ( |
||||
jsonv2 "github.com/go-json-experiment/json" |
||||
) |
||||
func main() { |
||||
jsonv2.Marshal() |
||||
jsonv2.Marshal() |
||||
} |
||||
`, |
||||
}, { |
||||
in: `package foobar |
||||
import "github.com/go-json-experiment/json/v1" |
||||
func main() { |
||||
json.Marshal() |
||||
} |
||||
`, |
||||
want: `package foobar |
||||
import jsonv1 "github.com/go-json-experiment/json/v1" |
||||
func main() { |
||||
jsonv1.Marshal() |
||||
} |
||||
`, |
||||
}, { |
||||
in: `package foobar |
||||
import ( |
||||
"encoding/json" |
||||
jsonv1in2 "github.com/go-json-experiment/json/v1" |
||||
) |
||||
func main() { |
||||
json.Marshal() |
||||
jsonv1in2.Marshal() |
||||
} |
||||
`, |
||||
want: `package foobar |
||||
import ( |
||||
jsonv1std "encoding/json" |
||||
jsonv1 "github.com/go-json-experiment/json/v1" |
||||
) |
||||
func main() { |
||||
jsonv1std.Marshal() |
||||
jsonv1.Marshal() |
||||
} |
||||
`, |
||||
}, { |
||||
in: `package foobar |
||||
import ( |
||||
"encoding/json" |
||||
jsonv1in2 "github.com/go-json-experiment/json/v1" |
||||
) |
||||
func main() { |
||||
json.Marshal() |
||||
jsonv1in2.Marshal() |
||||
} |
||||
`, |
||||
want: `package foobar |
||||
import ( |
||||
jsonv1std "encoding/json" |
||||
jsonv1 "github.com/go-json-experiment/json/v1" |
||||
) |
||||
func main() { |
||||
jsonv1std.Marshal() |
||||
jsonv1.Marshal() |
||||
} |
||||
`, |
||||
}, { |
||||
in: `package foobar |
||||
import ( |
||||
"encoding/json" |
||||
j2 "encoding/json/v2" |
||||
"encoding/json/jsontext" |
||||
) |
||||
func main() { |
||||
json.Marshal() |
||||
j2.Marshal() |
||||
jsontext.NewEncoder |
||||
} |
||||
`, |
||||
want: `package foobar |
||||
import ( |
||||
jsonv1 "encoding/json" |
||||
jsonv2 "github.com/go-json-experiment/json" |
||||
"github.com/go-json-experiment/json/jsontext" |
||||
) |
||||
func main() { |
||||
jsonv1.Marshal() |
||||
jsonv2.Marshal() |
||||
jsontext.NewEncoder |
||||
} |
||||
`, |
||||
}} |
||||
for _, tt := range tests { |
||||
got := string(must.Get(format.Source([]byte(tt.in)))) |
||||
got = string(mustFormatFile([]byte(got))) |
||||
want := string(must.Get(format.Source([]byte(tt.want)))) |
||||
if got != want { |
||||
diff, _ := safediff.Lines(got, want, -1) |
||||
t.Errorf("mismatch (-got +want)\n%s", diff) |
||||
t.Error(got) |
||||
t.Error(want) |
||||
} |
||||
} |
||||
} |
||||
@ -0,0 +1,124 @@ |
||||
// Copyright (c) Tailscale Inc & AUTHORS
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
// The jsonimports tool formats all Go source files in the repository
|
||||
// to enforce that "json" imports are consistent.
|
||||
//
|
||||
// With Go 1.25, the "encoding/json/v2" and "encoding/json/jsontext"
|
||||
// packages are now available under goexperiment.jsonv2.
|
||||
// This leads to possible confusion over the following:
|
||||
//
|
||||
// - "encoding/json"
|
||||
// - "encoding/json/v2"
|
||||
// - "encoding/json/jsontext"
|
||||
// - "github.com/go-json-experiment/json/v1"
|
||||
// - "github.com/go-json-experiment/json"
|
||||
// - "github.com/go-json-experiment/json/jsontext"
|
||||
//
|
||||
// In order to enforce consistent usage, we apply the following rules:
|
||||
//
|
||||
// - Until the Go standard library formally accepts "encoding/json/v2"
|
||||
// and "encoding/json/jsontext" into the standard library
|
||||
// (i.e., they are no longer considered experimental),
|
||||
// we forbid any code from directly importing those packages.
|
||||
// Go code should instead import "github.com/go-json-experiment/json"
|
||||
// and "github.com/go-json-experiment/json/jsontext".
|
||||
// The latter packages contain aliases to the standard library
|
||||
// if built on Go 1.25 with the goexperiment.jsonv2 tag specified.
|
||||
//
|
||||
// - Imports of "encoding/json" or "github.com/go-json-experiment/json/v1"
|
||||
// must be explicitly imported under the package name "jsonv1".
|
||||
// If both packages need to be imported, then the former should
|
||||
// be imported under the package name "jsonv1std".
|
||||
//
|
||||
// - Imports of "github.com/go-json-experiment/json"
|
||||
// must be explicitly imported under the package name "jsonv2".
|
||||
//
|
||||
// The latter two rules exist to provide clarity when reading code.
|
||||
// Without them, it is unclear whether "json.Marshal" refers to v1 or v2.
|
||||
// With them, however, it is clear that "jsonv1.Marshal" is calling v1 and
|
||||
// that "jsonv2.Marshal" is calling v2.
|
||||
//
|
||||
// TODO(@joetsai): At this present moment, there is no guidance given on
|
||||
// whether to use v1 or v2 for newly written Go source code.
|
||||
// I will write a document in the near future providing more guidance.
|
||||
// Feel free to continue using v1 "encoding/json" as you are accustomed to.
|
||||
package main |
||||
|
||||
import ( |
||||
"bytes" |
||||
"flag" |
||||
"fmt" |
||||
"os" |
||||
"os/exec" |
||||
"runtime" |
||||
"strings" |
||||
"sync" |
||||
|
||||
"tailscale.com/syncs" |
||||
"tailscale.com/util/must" |
||||
"tailscale.com/util/safediff" |
||||
) |
||||
|
||||
func main() { |
||||
update := flag.Bool("update", false, "update all Go source files") |
||||
flag.Parse() |
||||
|
||||
// Change working directory to Git repository root.
|
||||
repoRoot := strings.TrimSuffix(string(must.Get(exec.Command( |
||||
"git", "rev-parse", "--show-toplevel", |
||||
).Output())), "\n") |
||||
must.Do(os.Chdir(repoRoot)) |
||||
|
||||
// Iterate over all indexed files in the Git repository.
|
||||
var printMu sync.Mutex |
||||
var group sync.WaitGroup |
||||
sema := syncs.NewSemaphore(runtime.NumCPU()) |
||||
var numDiffs int |
||||
files := string(must.Get(exec.Command("git", "ls-files").Output())) |
||||
for file := range strings.Lines(files) { |
||||
sema.Acquire() |
||||
group.Go(func() { |
||||
defer sema.Release() |
||||
|
||||
// Ignore non-Go source files.
|
||||
file = strings.TrimSuffix(file, "\n") |
||||
if !strings.HasSuffix(file, ".go") { |
||||
return |
||||
} |
||||
|
||||
// Format all "json" imports in the Go source file.
|
||||
srcIn := must.Get(os.ReadFile(file)) |
||||
srcOut := mustFormatFile(srcIn) |
||||
|
||||
// Print differences with each formatted file.
|
||||
if !bytes.Equal(srcIn, srcOut) { |
||||
numDiffs++ |
||||
|
||||
printMu.Lock() |
||||
fmt.Println(file) |
||||
lines, _ := safediff.Lines(string(srcIn), string(srcOut), -1) |
||||
for line := range strings.Lines(lines) { |
||||
fmt.Print("\t", line) |
||||
} |
||||
fmt.Println() |
||||
printMu.Unlock() |
||||
|
||||
// If -update is specified, write out the changes.
|
||||
if *update { |
||||
mode := must.Get(os.Stat(file)).Mode() |
||||
must.Do(os.WriteFile(file, srcOut, mode)) |
||||
} |
||||
} |
||||
}) |
||||
} |
||||
group.Wait() |
||||
|
||||
// Report whether any differences were detected.
|
||||
if numDiffs > 0 && !*update { |
||||
fmt.Printf(`%d files with "json" imports that need formatting`+"\n", numDiffs) |
||||
fmt.Println("Please run:") |
||||
fmt.Println("\t./tool/go run tailscale.com/cmd/jsonimports -update") |
||||
os.Exit(1) |
||||
} |
||||
} |
||||
@ -0,0 +1,280 @@ |
||||
// Copyright (c) Tailscale Inc & AUTHORS
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
// Package safediff computes the difference between two lists.
|
||||
//
|
||||
// It is guaranteed to run in O(n), but may not produce an optimal diff.
|
||||
// Most diffing algorithms produce optimal diffs but run in O(n²).
|
||||
// It is safe to pass in untrusted input.
|
||||
package safediff |
||||
|
||||
import ( |
||||
"bytes" |
||||
"fmt" |
||||
"math" |
||||
"strings" |
||||
"unicode" |
||||
|
||||
"github.com/google/go-cmp/cmp" |
||||
) |
||||
|
||||
var diffTest = false |
||||
|
||||
// Lines constructs a humanly readable line-by-line diff from x to y.
|
||||
// The output (if multiple lines) is guaranteed to be no larger than maxSize,
|
||||
// by truncating the output if necessary. A negative maxSize enforces no limit.
|
||||
//
|
||||
// Example diff:
|
||||
//
|
||||
// … 440 identical lines
|
||||
// "ssh": [
|
||||
// … 35 identical lines
|
||||
// {
|
||||
// - "src": ["maisem@tailscale.com"],
|
||||
// - "dst": ["tag:maisem-test"],
|
||||
// - "users": ["maisem", "root"],
|
||||
// - "action": "check",
|
||||
// - // "recorder": ["100.12.34.56:80"],
|
||||
// + "src": ["maisem@tailscale.com"],
|
||||
// + "dst": ["tag:maisem-test"],
|
||||
// + "users": ["maisem", "root"],
|
||||
// + "action": "check",
|
||||
// + "recorder": ["node:recorder-2"],
|
||||
// },
|
||||
// … 77 identical lines
|
||||
// ],
|
||||
// … 345 identical lines
|
||||
//
|
||||
// Meaning of each line prefix:
|
||||
//
|
||||
// - '…' precedes a summary statement
|
||||
// - ' ' precedes an identical line printed for context
|
||||
// - '-' precedes a line removed from x
|
||||
// - '+' precedes a line inserted from y
|
||||
//
|
||||
// The diffing algorithm runs in O(n) and is safe to use with untrusted inputs.
|
||||
func Lines(x, y string, maxSize int) (out string, truncated bool) { |
||||
// Convert x and y into a slice of lines and compute the edit-script.
|
||||
xs := strings.Split(x, "\n") |
||||
ys := strings.Split(y, "\n") |
||||
es := diffStrings(xs, ys) |
||||
|
||||
// Modify the edit-script to support printing identical lines of context.
|
||||
const identicalContext edit = '*' // special edit code to indicate printed line
|
||||
var xi, yi int // index into xs or ys
|
||||
isIdentical := func(e edit) bool { return e == identical || e == identicalContext } |
||||
indentOf := func(s string) string { return s[:len(s)-len(strings.TrimLeftFunc(s, unicode.IsSpace))] } |
||||
for i, e := range es { |
||||
if isIdentical(e) { |
||||
// Print current line if adjacent symbols are non-identical.
|
||||
switch { |
||||
case i-1 >= 0 && !isIdentical(es[i-1]): |
||||
es[i] = identicalContext |
||||
case i+1 < len(es) && !isIdentical(es[i+1]): |
||||
es[i] = identicalContext |
||||
} |
||||
} else { |
||||
// Print any preceding or succeeding lines,
|
||||
// where the leading indent is a prefix of the current indent.
|
||||
// Indentation often indicates a parent-child relationship
|
||||
// in structured source code.
|
||||
addParents := func(ss []string, si, direction int) { |
||||
childIndent := indentOf(ss[si]) |
||||
for j := direction; i+j >= 0 && i+j < len(es) && isIdentical(es[i+j]); j += direction { |
||||
parentIndent := indentOf(ss[si+j]) |
||||
if strings.HasPrefix(childIndent, parentIndent) && len(parentIndent) < len(childIndent) && parentIndent != "" { |
||||
es[i+j] = identicalContext |
||||
childIndent = parentIndent |
||||
} |
||||
} |
||||
} |
||||
switch e { |
||||
case removed, modified: // arbitrarily use the x value for modified values
|
||||
addParents(xs, xi, -1) |
||||
addParents(xs, xi, +1) |
||||
case inserted: |
||||
addParents(ys, yi, -1) |
||||
addParents(ys, yi, +1) |
||||
} |
||||
} |
||||
if e != inserted { |
||||
xi++ |
||||
} |
||||
if e != removed { |
||||
yi++ |
||||
} |
||||
} |
||||
|
||||
// Show the line for a single hidden identical line,
|
||||
// since it occupies the same vertical height.
|
||||
for i, e := range es { |
||||
if e == identical { |
||||
prevNotIdentical := i-1 < 0 || es[i-1] != identical |
||||
nextNotIdentical := i+1 >= len(es) || es[i+1] != identical |
||||
if prevNotIdentical && nextNotIdentical { |
||||
es[i] = identicalContext |
||||
} |
||||
} |
||||
} |
||||
|
||||
// Adjust the maxSize, reserving space for the final summary.
|
||||
if maxSize < 0 { |
||||
maxSize = math.MaxInt |
||||
} |
||||
maxSize -= len(stats{len(xs) + len(ys), len(xs), len(ys)}.appendText(nil)) |
||||
|
||||
// mayAppendLine appends a line if it does not exceed maxSize.
|
||||
// Otherwise, it just updates prevStats.
|
||||
var buf []byte |
||||
var prevStats stats |
||||
mayAppendLine := func(edit edit, line string) { |
||||
// Append the stats (if non-zero) and the line text.
|
||||
// The stats reports the number of preceding identical lines.
|
||||
if !truncated { |
||||
bufLen := len(buf) // original length (in case we exceed maxSize)
|
||||
if !prevStats.isZero() { |
||||
buf = prevStats.appendText(buf) |
||||
prevStats = stats{} // just printed, so clear the stats
|
||||
} |
||||
buf = fmt.Appendf(buf, "%c %s\n", edit, line) |
||||
truncated = len(buf) > maxSize |
||||
if !truncated { |
||||
return |
||||
} |
||||
buf = buf[:bufLen] // restore original buffer contents
|
||||
} |
||||
|
||||
// Output is truncated, so just update the statistics.
|
||||
switch edit { |
||||
case identical: |
||||
prevStats.numIdentical++ |
||||
case removed: |
||||
prevStats.numRemoved++ |
||||
case inserted: |
||||
prevStats.numInserted++ |
||||
} |
||||
} |
||||
|
||||
// Process the entire edit script.
|
||||
for len(es) > 0 { |
||||
num := len(es) - len(bytes.TrimLeft(es, string(es[:1]))) |
||||
switch es[0] { |
||||
case identical: |
||||
prevStats.numIdentical += num |
||||
xs, ys = xs[num:], ys[num:] |
||||
case identicalContext: |
||||
for n := len(xs) - num; len(xs) > n; xs, ys = xs[1:], ys[1:] { |
||||
mayAppendLine(identical, xs[0]) // implies xs[0] == ys[0]
|
||||
} |
||||
case modified: |
||||
for n := len(xs) - num; len(xs) > n; xs = xs[1:] { |
||||
mayAppendLine(removed, xs[0]) |
||||
} |
||||
for n := len(ys) - num; len(ys) > n; ys = ys[1:] { |
||||
mayAppendLine(inserted, ys[0]) |
||||
} |
||||
case removed: |
||||
for n := len(xs) - num; len(xs) > n; xs = xs[1:] { |
||||
mayAppendLine(removed, xs[0]) |
||||
} |
||||
case inserted: |
||||
for n := len(ys) - num; len(ys) > n; ys = ys[1:] { |
||||
mayAppendLine(inserted, ys[0]) |
||||
} |
||||
} |
||||
es = es[num:] |
||||
} |
||||
if len(xs)+len(ys)+len(es) > 0 { |
||||
panic("BUG: slices not fully consumed") |
||||
} |
||||
|
||||
if !prevStats.isZero() { |
||||
buf = prevStats.appendText(buf) // may exceed maxSize
|
||||
} |
||||
return string(buf), truncated |
||||
} |
||||
|
||||
type stats struct{ numIdentical, numRemoved, numInserted int } |
||||
|
||||
func (s stats) isZero() bool { return s.numIdentical+s.numRemoved+s.numInserted == 0 } |
||||
|
||||
func (s stats) appendText(b []byte) []byte { |
||||
switch { |
||||
case s.numIdentical > 0 && s.numRemoved > 0 && s.numInserted > 0: |
||||
return fmt.Appendf(b, "… %d identical, %d removed, and %d inserted lines\n", s.numIdentical, s.numRemoved, s.numInserted) |
||||
case s.numIdentical > 0 && s.numRemoved > 0: |
||||
return fmt.Appendf(b, "… %d identical and %d removed lines\n", s.numIdentical, s.numRemoved) |
||||
case s.numIdentical > 0 && s.numInserted > 0: |
||||
return fmt.Appendf(b, "… %d identical and %d inserted lines\n", s.numIdentical, s.numInserted) |
||||
case s.numRemoved > 0 && s.numInserted > 0: |
||||
return fmt.Appendf(b, "… %d removed and %d inserted lines\n", s.numRemoved, s.numInserted) |
||||
case s.numIdentical > 0: |
||||
return fmt.Appendf(b, "… %d identical lines\n", s.numIdentical) |
||||
case s.numRemoved > 0: |
||||
return fmt.Appendf(b, "… %d removed lines\n", s.numRemoved) |
||||
case s.numInserted > 0: |
||||
return fmt.Appendf(b, "… %d inserted lines\n", s.numInserted) |
||||
default: |
||||
return fmt.Appendf(b, "…\n") |
||||
} |
||||
} |
||||
|
||||
// diffStrings computes an edit-script of two slices of strings.
|
||||
//
|
||||
// This calls cmp.Equal to access the "github.com/go-cmp/cmp/internal/diff"
|
||||
// implementation, which has an O(N) diffing algorithm. It is not guaranteed
|
||||
// to produce an optimal edit-script, but protects our runtime against
|
||||
// adversarial inputs that would wreck the optimal O(N²) algorithm used by
|
||||
// most diffing packages available in open-source.
|
||||
//
|
||||
// TODO(https://go.dev/issue/58893): Use "golang.org/x/tools/diff" instead?
|
||||
func diffStrings(xs, ys []string) []edit { |
||||
d := new(diffRecorder) |
||||
cmp.Equal(xs, ys, cmp.Reporter(d)) |
||||
if diffTest { |
||||
numRemoved := bytes.Count(d.script, []byte{removed}) |
||||
numInserted := bytes.Count(d.script, []byte{inserted}) |
||||
if len(xs) != len(d.script)-numInserted || len(ys) != len(d.script)-numRemoved { |
||||
panic("BUG: edit-script is inconsistent") |
||||
} |
||||
} |
||||
return d.script |
||||
} |
||||
|
||||
type edit = byte |
||||
|
||||
const ( |
||||
identical edit = ' ' // equal symbol in both x and y
|
||||
modified edit = '~' // modified symbol in both x and y
|
||||
removed edit = '-' // removed symbol from x
|
||||
inserted edit = '+' // inserted symbol from y
|
||||
) |
||||
|
||||
// diffRecorder reproduces an edit-script, essentially recording
|
||||
// the edit-script from "github.com/google/go-cmp/cmp/internal/diff".
|
||||
// This implements the cmp.Reporter interface.
|
||||
type diffRecorder struct { |
||||
last cmp.PathStep |
||||
script []edit |
||||
} |
||||
|
||||
func (d *diffRecorder) PushStep(ps cmp.PathStep) { d.last = ps } |
||||
|
||||
func (d *diffRecorder) Report(rs cmp.Result) { |
||||
if si, ok := d.last.(cmp.SliceIndex); ok { |
||||
if rs.Equal() { |
||||
d.script = append(d.script, identical) |
||||
} else { |
||||
switch xi, yi := si.SplitKeys(); { |
||||
case xi >= 0 && yi >= 0: |
||||
d.script = append(d.script, modified) |
||||
case xi >= 0: |
||||
d.script = append(d.script, removed) |
||||
case yi >= 0: |
||||
d.script = append(d.script, inserted) |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
func (d *diffRecorder) PopStep() { d.last = nil } |
||||
@ -0,0 +1,196 @@ |
||||
// Copyright (c) Tailscale Inc & AUTHORS
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
package safediff |
||||
|
||||
import ( |
||||
"strings" |
||||
"testing" |
||||
|
||||
"github.com/google/go-cmp/cmp" |
||||
) |
||||
|
||||
func init() { diffTest = true } |
||||
|
||||
func TestLines(t *testing.T) { |
||||
// The diffs shown below technically depend on the stability of cmp,
|
||||
// but that should be fine for sufficiently simple diffs like these.
|
||||
// If the output does change, that would suggest a significant regression
|
||||
// in the optimality of cmp's diffing algorithm.
|
||||
|
||||
x := `{ |
||||
"firstName": "John", |
||||
"lastName": "Smith", |
||||
"isAlive": true, |
||||
"age": 27, |
||||
"address": { |
||||
"streetAddress": "21 2nd Street", |
||||
"city": "New York", |
||||
"state": "NY", |
||||
"postalCode": "10021-3100" |
||||
}, |
||||
"phoneNumbers": [{ |
||||
"type": "home", |
||||
"number": "212 555-1234" |
||||
}, { |
||||
"type": "office", |
||||
"number": "646 555-4567" |
||||
}], |
||||
"children": [ |
||||
"Catherine", |
||||
"Thomas", |
||||
"Trevor" |
||||
], |
||||
"spouse": null |
||||
}` |
||||
y := x |
||||
y = strings.ReplaceAll(y, `"New York"`, `"Los Angeles"`) |
||||
y = strings.ReplaceAll(y, `"NY"`, `"CA"`) |
||||
y = strings.ReplaceAll(y, `"646 555-4567"`, `"315 252-8888"`) |
||||
|
||||
wantDiff := ` |
||||
… 5 identical lines |
||||
"address": { |
||||
"streetAddress": "21 2nd Street", |
||||
- "city": "New York", |
||||
- "state": "NY", |
||||
+ "city": "Los Angeles", |
||||
+ "state": "CA", |
||||
"postalCode": "10021-3100" |
||||
}, |
||||
… 3 identical lines |
||||
}, { |
||||
"type": "office", |
||||
- "number": "646 555-4567" |
||||
+ "number": "315 252-8888" |
||||
}], |
||||
… 7 identical lines |
||||
`[1:] |
||||
gotDiff, gotTrunc := Lines(x, y, -1) |
||||
if d := cmp.Diff(gotDiff, wantDiff); d != "" { |
||||
t.Errorf("Lines mismatch (-got +want):\n%s\ngot:\n%s\nwant:\n%s", d, gotDiff, wantDiff) |
||||
} else if gotTrunc == true { |
||||
t.Errorf("Lines: output unexpectedly truncated") |
||||
} |
||||
|
||||
wantDiff = ` |
||||
… 5 identical lines |
||||
"address": { |
||||
"streetAddress": "21 2nd Street", |
||||
- "city": "New York", |
||||
- "state": "NY", |
||||
+ "city": "Los Angeles", |
||||
… 15 identical, 1 removed, and 2 inserted lines |
||||
`[1:] |
||||
gotDiff, gotTrunc = Lines(x, y, 200) |
||||
if d := cmp.Diff(gotDiff, wantDiff); d != "" { |
||||
t.Errorf("Lines mismatch (-got +want):\n%s\ngot:\n%s\nwant:\n%s", d, gotDiff, wantDiff) |
||||
} else if gotTrunc == false { |
||||
t.Errorf("Lines: output unexpectedly not truncated") |
||||
} |
||||
|
||||
wantDiff = "… 17 identical, 3 removed, and 3 inserted lines\n" |
||||
gotDiff, gotTrunc = Lines(x, y, 0) |
||||
if d := cmp.Diff(gotDiff, wantDiff); d != "" { |
||||
t.Errorf("Lines mismatch (-got +want):\n%s\ngot:\n%s\nwant:\n%s", d, gotDiff, wantDiff) |
||||
} else if gotTrunc == false { |
||||
t.Errorf("Lines: output unexpectedly not truncated") |
||||
} |
||||
|
||||
x = `{ |
||||
"unrelated": [ |
||||
"unrelated", |
||||
], |
||||
"related": { |
||||
"unrelated": [ |
||||
"unrelated", |
||||
], |
||||
"related": { |
||||
"unrelated": [ |
||||
"unrelated", |
||||
], |
||||
"related": { |
||||
"related": "changed", |
||||
}, |
||||
"unrelated": [ |
||||
"unrelated", |
||||
], |
||||
}, |
||||
"unrelated": [ |
||||
"unrelated", |
||||
], |
||||
}, |
||||
"unrelated": [ |
||||
"unrelated", |
||||
], |
||||
}` |
||||
y = strings.ReplaceAll(x, "changed", "CHANGED") |
||||
|
||||
wantDiff = ` |
||||
… 4 identical lines |
||||
"related": { |
||||
… 3 identical lines |
||||
"related": { |
||||
… 3 identical lines |
||||
"related": { |
||||
- "related": "changed", |
||||
+ "related": "CHANGED", |
||||
}, |
||||
… 3 identical lines |
||||
}, |
||||
… 3 identical lines |
||||
}, |
||||
… 4 identical lines |
||||
`[1:] |
||||
gotDiff, gotTrunc = Lines(x, y, -1) |
||||
if d := cmp.Diff(gotDiff, wantDiff); d != "" { |
||||
t.Errorf("Lines mismatch (-got +want):\n%s\ngot:\n%s\nwant:\n%s", d, gotDiff, wantDiff) |
||||
} else if gotTrunc == true { |
||||
t.Errorf("Lines: output unexpectedly truncated") |
||||
} |
||||
|
||||
x = `{ |
||||
"ACLs": [ |
||||
{ |
||||
"Action": "accept", |
||||
"Users": ["group:all"], |
||||
"Ports": ["tag:tmemes:80"], |
||||
}, |
||||
], |
||||
}` |
||||
y = strings.ReplaceAll(x, "tag:tmemes:80", "tag:tmemes:80,8383") |
||||
wantDiff = ` |
||||
{ |
||||
"ACLs": [ |
||||
{ |
||||
"Action": "accept", |
||||
"Users": ["group:all"], |
||||
- "Ports": ["tag:tmemes:80"], |
||||
+ "Ports": ["tag:tmemes:80,8383"], |
||||
}, |
||||
], |
||||
} |
||||
`[1:] |
||||
gotDiff, gotTrunc = Lines(x, y, -1) |
||||
if d := cmp.Diff(gotDiff, wantDiff); d != "" { |
||||
t.Errorf("Lines mismatch (-got +want):\n%s\ngot:\n%s\nwant:\n%s", d, gotDiff, wantDiff) |
||||
} else if gotTrunc == true { |
||||
t.Errorf("Lines: output unexpectedly truncated") |
||||
} |
||||
} |
||||
|
||||
func FuzzDiff(f *testing.F) { |
||||
f.Fuzz(func(t *testing.T, x, y string, maxSize int) { |
||||
const maxInput = 1e3 |
||||
if len(x) > maxInput { |
||||
x = x[:maxInput] |
||||
} |
||||
if len(y) > maxInput { |
||||
y = y[:maxInput] |
||||
} |
||||
diff, _ := Lines(x, y, maxSize) // make sure this does not panic
|
||||
if strings.Count(diff, "\n") > 1 && maxSize >= 0 && len(diff) > maxSize { |
||||
t.Fatal("maxSize exceeded") |
||||
} |
||||
}) |
||||
} |
||||
Loading…
Reference in new issue