Skip to content

Commit

Permalink
Merge pull request #251 from vmarkovtsev/master
Browse files Browse the repository at this point in the history
Fix the problem with missing positions in --typos-dataset
  • Loading branch information
vmarkovtsev committed Mar 21, 2019
2 parents 9e3954e + 92dc2f9 commit 71f2f83
Show file tree
Hide file tree
Showing 4 changed files with 102 additions and 32 deletions.
8 changes: 8 additions & 0 deletions internal/core/pipeline.go
Original file line number Diff line number Diff line change
Expand Up @@ -894,3 +894,11 @@ func LoadCommitsFromFile(path string, repository *git.Repository) ([]*object.Com
}
return commits, nil
}

// GetSensibleRemote extracts a remote URL of the repository to identify it.
func GetSensibleRemote(repository *git.Repository) string {
if r, err := repository.Remotes(); err == nil && len(r) > 0 {
return r[0].Config().URLs[0]
}
return "<no remote>"
}
4 changes: 1 addition & 3 deletions internal/plumbing/ticks.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,7 @@ func (ticks *TicksSinceStart) Initialize(repository *git.Repository) error {
delete(ticks.commits, key)
}
}
if r, err := repository.Remotes(); err == nil && len(r) > 0 {
ticks.remote = r[0].Config().URLs[0]
}
ticks.remote = core.GetSensibleRemote(repository)
return nil
}

Expand Down
60 changes: 37 additions & 23 deletions leaves/research/typos.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"bytes"
"fmt"
"io"
"log"
"unicode/utf8"

"github.com/gogo/protobuf/proto"
Expand Down Expand Up @@ -34,6 +35,8 @@ type TyposDatasetBuilder struct {
lcontext *levenshtein.Context
// xpather filters identifiers.
xpather uast_items.ChangesXPather
// remote carries the repository remote URL (for debugging)
remote string
}

// TyposResult is returned by TyposDatasetBuilder.Finalize() and carries the found typo-fix
Expand Down Expand Up @@ -121,6 +124,7 @@ func (tdb *TyposDatasetBuilder) Initialize(repository *git.Repository) error {
}
tdb.lcontext = &levenshtein.Context{}
tdb.xpather.XPath = "//uast:Identifier"
tdb.remote = core.GetSensibleRemote(repository)
return nil
}

Expand Down Expand Up @@ -150,33 +154,35 @@ func (tdb *TyposDatasetBuilder) Consume(deps map[string]interface{}) (map[string
linesAfter := bytes.Split(cache[change.Change.To.TreeEntry.Hash].Data, []byte{'\n'})
diff := diffs[change.Change.To.Name]
var lineNumBefore, lineNumAfter int
clear := false
var candidates []candidate
focusedLinesBefore := map[int]bool{}
focusedLinesAfter := map[int]bool{}
removedSize := 0
for _, edit := range diff.Diffs {
size := utf8.RuneCountInString(edit.Text)
switch edit.Type {
case diffmatchpatch.DiffDelete:
lineNumBefore += size
clear = size == 1
removedSize = size
case diffmatchpatch.DiffInsert:
if size == 1 && clear {
dist := tdb.lcontext.Distance(
string(linesBefore[lineNumBefore-1]),
string(linesAfter[lineNumAfter]))
if dist <= tdb.MaximumAllowedDistance {
candidates = append(candidates, candidate{lineNumBefore - 1, lineNumAfter})
focusedLinesBefore[lineNumBefore-1] = true
focusedLinesAfter[lineNumAfter] = true
if size == removedSize {
for i := 0; i < size; i++ {
lb := lineNumBefore - size + i
la := lineNumAfter + i
dist := tdb.lcontext.Distance(string(linesBefore[lb]), string(linesAfter[la]))
if dist <= tdb.MaximumAllowedDistance {
candidates = append(candidates, candidate{lb, la})
focusedLinesBefore[lb] = true
focusedLinesAfter[la] = true
}
}
}
lineNumAfter += size
clear = false
removedSize = 0
case diffmatchpatch.DiffEqual:
lineNumBefore += size
lineNumAfter += size
clear = false
removedSize = 0
}
}
if len(candidates) == 0 {
Expand All @@ -190,25 +196,33 @@ func (tdb *TyposDatasetBuilder) Consume(deps map[string]interface{}) (map[string
removedIdentifiers := map[int][]nodes.Node{}
for _, n := range nodesAdded {
pos := uast.PositionsOf(n.(nodes.Object))
if pos.Start() != nil {
line := int(pos.Start().Line) - 1
if focusedLinesAfter[line] {
addedIdentifiers[line] = append(addedIdentifiers[line], n)
}
if pos.Start() == nil {
log.Printf("repo %s commit %s file %s adds identifier %s with no position",
tdb.remote, commit.String(), change.Change.To.Name,
n.(nodes.Object)["Name"].(nodes.String))
continue
}
line := int(pos.Start().Line) - 1
if focusedLinesAfter[line] {
addedIdentifiers[line] = append(addedIdentifiers[line], n)
}
}
for _, n := range nodesRemoved {
pos := uast.PositionsOf(n.(nodes.Object))
if pos.Start() == nil {
log.Printf("repo %s commit %s file %s removes identifier %s with no position",
tdb.remote, commit.String(), change.Change.To.Name,
n.(nodes.Object)["Name"].(nodes.String))
continue
}
line := int(pos.Start().Line) - 1
if pos.Start() != nil {
if focusedLinesBefore[line] {
removedIdentifiers[line] = append(removedIdentifiers[line], n)
}
if focusedLinesBefore[line] {
removedIdentifiers[line] = append(removedIdentifiers[line], n)
}
}
for _, c := range candidates {
nodesBefore := addedIdentifiers[c.Before]
nodesAfter := removedIdentifiers[c.After]
nodesBefore := removedIdentifiers[c.Before]
nodesAfter := addedIdentifiers[c.After]
if len(nodesBefore) == 1 && len(nodesAfter) == 1 {
idBefore := string(nodesBefore[0].(nodes.Object)["Name"].(nodes.String))
idAfter := string(nodesAfter[0].(nodes.Object)["Name"].(nodes.String))
Expand Down
62 changes: 56 additions & 6 deletions leaves/research/typos_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ import (

"github.com/gogo/protobuf/proto"
"github.com/stretchr/testify/assert"
"gopkg.in/bblfsh/client-go.v3/tools"
"gopkg.in/bblfsh/sdk.v2/uast"
"gopkg.in/bblfsh/sdk.v2/uast/nodes"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/object"
"gopkg.in/src-d/hercules.v10/internal/core"
Expand Down Expand Up @@ -69,7 +72,7 @@ func AddHash(t *testing.T, cache map[plumbing.Hash]*items.CachedBlob, hash strin
cache[objhash] = cb
}

func TestTyposDatasetConsume(t *testing.T) {
func bakeTyposDeps(t *testing.T) map[string]interface{} {
deps := map[string]interface{}{}
cache := map[plumbing.Hash]*items.CachedBlob{}
AddHash(t, cache, "b9a12fd144274c99c7c9a0a32a0268f8b36d2f2c")
Expand Down Expand Up @@ -133,25 +136,72 @@ func TestTyposDatasetConsume(t *testing.T) {
diffResult, err := fd.Consume(deps)
assert.Nil(t, err)
deps[items.DependencyFileDiff] = diffResult[items.DependencyFileDiff]
return deps
}

func TestTyposDatasetConsume(t *testing.T) {
deps := bakeTyposDeps(t)
tbd := &TyposDatasetBuilder{}
assert.Nil(t, tbd.Initialize(test.Repository))
res, err := tbd.Consume(deps)
assert.Nil(t, res)
assert.Nil(t, err)
assert.Len(t, tbd.typos, 4)
assert.Equal(t, tbd.typos[0].Wrong, "TestZeroInitializeFile")
assert.Equal(t, tbd.typos[0].Correct, "TestZeroInitialize")
assert.Len(t, tbd.typos, 26)
assert.Equal(t, tbd.typos[0].Wrong, "TestInitialize")
assert.Equal(t, tbd.typos[0].Correct, "TestInitializeFile")
assert.Equal(t, tbd.typos[0].Commit, plumbing.NewHash(
"84165d3b02647fae12cc026c7a580045246e8c98"))
assert.Equal(t, tbd.typos[0].File, "file_test.go")
assert.Equal(t, tbd.typos[0].Line, 74)
assert.Equal(t, tbd.typos[0].Line, 19)

deps[core.DependencyIsMerge] = true
res, err = tbd.Consume(deps)
assert.Nil(t, res)
assert.Nil(t, err)
assert.Len(t, tbd.typos, 4)
assert.Len(t, tbd.typos, 26)
}

func dropPositions(root nodes.Node, target string) {
for element := range tools.Iterate(tools.NewIterator(root, tools.PreOrder)) {
obj, isobj := element.(nodes.Object)
if !isobj {
continue
}
nameval, exists := obj["Name"]
if !exists {
continue
}
if name, isstr := nameval.(nodes.String); !isstr || string(name) != target {
continue
}
posval, exists := obj[uast.KeyPos]
if !exists {
continue
}
posobj, isobj := posval.(nodes.Object)
if !isobj {
continue
}
for k, v := range posobj {
po, _ := v.(nodes.Object)
if uast.AsPosition(po) != nil {
posobj[k] = nil
}
}
}
}

func TestTyposDatasetConsumeMissingPosition(t *testing.T) {
deps := bakeTyposDeps(t)
uastChanges := deps[uast_items.DependencyUastChanges].([]uast_items.Change)
dropPositions(uastChanges[0].Before, "TestZeroInitialize")
dropPositions(uastChanges[0].After, "TestZeroInitializeFile")
tbd := &TyposDatasetBuilder{}
assert.Nil(t, tbd.Initialize(test.Repository))
res, err := tbd.Consume(deps)
assert.Nil(t, res)
assert.Nil(t, err)
assert.Len(t, tbd.typos, 25)
}

func fixtureTyposDataset() *TyposDatasetBuilder {
Expand Down

0 comments on commit 71f2f83

Please sign in to comment.