Skip to content

Commit

Permalink
Merge pull request #341 from vmarkovtsev/master
Browse files Browse the repository at this point in the history
Add exact signature id matching
  • Loading branch information
vmarkovtsev committed Jan 10, 2020
2 parents bd565d1 + 464dc7f commit 9c2eaa7
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 48 deletions.
97 changes: 67 additions & 30 deletions internal/plumbing/identity/identity.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ type Detector struct {
PeopleDict map[string]int
// ReversedPeopleDict maps developer id -> description
ReversedPeopleDict []string
// ExactSignatures chooses the matching algorithm: opportunistic email || name
// or exact email && name
ExactSignatures bool

l core.Logger
}
Expand All @@ -43,6 +46,10 @@ const (
// ConfigIdentityDetectorPeopleDictPath is the name of the configuration option
// (Detector.Configure()) which allows to set the external PeopleDict mapping from a file.
ConfigIdentityDetectorPeopleDictPath = "IdentityDetector.PeopleDictPath"
// ConfigIdentityDetectorExactSignatures is the name of the configuration option
// (Detector.Configure()) which changes the matching algorithm to exact signature (name + email)
// correspondence.
ConfigIdentityDetectorExactSignatures = "IdentityDetector.ExactSignatures"
// FactIdentityDetectorPeopleCount is the name of the fact which is inserted in
// Detector.Configure(). It is equal to the overall number of unique authors
// (the length of ReversedPeopleDict).
Expand Down Expand Up @@ -78,7 +85,13 @@ func (detector *Detector) ListConfigurationOptions() []core.ConfigurationOption
Description: "Path to the file with developer -> name|email associations.",
Flag: "people-dict",
Type: core.PathConfigurationOption,
Default: ""},
Default: ""}, {
Name: ConfigIdentityDetectorExactSignatures,
Description: "Disable separate name/email matching. This will lead to considerbly more " +
"identities and should not be normally used.",
Flag: "exact-signatures",
Type: core.BoolConfigurationOption,
Default: false},
}
return options[:]
}
Expand All @@ -96,6 +109,9 @@ func (detector *Detector) Configure(facts map[string]interface{}) error {
if val, exists := facts[FactIdentityDetectorReversedPeopleDict].([]string); exists {
detector.ReversedPeopleDict = val
}
if val, exists := facts[ConfigIdentityDetectorExactSignatures].(bool); exists {
detector.ExactSignatures = val
}
if detector.PeopleDict == nil || detector.ReversedPeopleDict == nil {
peopleDictPath, _ := facts[ConfigIdentityDetectorPeopleDictPath].(string)
if peopleDictPath != "" {
Expand Down Expand Up @@ -133,13 +149,19 @@ func (detector *Detector) Initialize(repository *git.Repository) error {
// in Provides(). If there was an error, nil is returned.
func (detector *Detector) Consume(deps map[string]interface{}) (map[string]interface{}, error) {
commit := deps[core.DependencyCommit].(*object.Commit)
var authorID int
var exists bool
signature := commit.Author
authorID, exists := detector.PeopleDict[strings.ToLower(signature.Email)]
if !exists {
authorID, exists = detector.PeopleDict[strings.ToLower(signature.Name)]
if !detector.ExactSignatures {
authorID, exists = detector.PeopleDict[strings.ToLower(signature.Email)]
if !exists {
authorID = AuthorMissing
authorID, exists = detector.PeopleDict[strings.ToLower(signature.Name)]
}
} else {
authorID, exists = detector.PeopleDict[strings.ToLower(signature.String())]
}
if !exists {
authorID = AuthorMissing
}
return map[string]interface{}{DependencyAuthor: authorID}, nil
}
Expand Down Expand Up @@ -184,7 +206,8 @@ func (detector *Detector) GeneratePeopleDict(commits []*object.Commit) {
size := 0

mailmapFile, err := commits[len(commits)-1].File(".mailmap")
if err == nil {
// TODO(vmarkovtsev): properly handle .mailmap if ExactSignatures
if !detector.ExactSignatures && err == nil {
mailMapContents, err := mailmapFile.Contents()
if err == nil {
mailmap := ParseMailmap(mailMapContents)
Expand Down Expand Up @@ -239,34 +262,48 @@ func (detector *Detector) GeneratePeopleDict(commits []*object.Commit) {
}

for _, commit := range commits {
email := strings.ToLower(commit.Author.Email)
name := strings.ToLower(commit.Author.Name)
id, exists := dict[email]
if exists {
_, exists := dict[name]
if !exists {
dict[name] = id
names[id] = append(names[id], name)
if !detector.ExactSignatures {
email := strings.ToLower(commit.Author.Email)
name := strings.ToLower(commit.Author.Name)
id, exists := dict[email]
if exists {
_, exists := dict[name]
if !exists {
dict[name] = id
names[id] = append(names[id], name)
}
continue
}
id, exists = dict[name]
if exists {
dict[email] = id
emails[id] = append(emails[id], email)
continue
}
dict[email] = size
dict[name] = size
emails[size] = append(emails[size], email)
names[size] = append(names[size], name)
size++
} else { // !detector.ExactSignatures
sig := strings.ToLower(commit.Author.String())
if _, exists := dict[sig]; !exists {
dict[sig] = size
size++
}
continue
}
id, exists = dict[name]
if exists {
dict[email] = id
emails[id] = append(emails[id], email)
continue
}
dict[email] = size
dict[name] = size
emails[size] = append(emails[size], email)
names[size] = append(names[size], name)
size++
}
reverseDict := make([]string, size)
for _, val := range dict {
sort.Strings(names[val])
sort.Strings(emails[val])
reverseDict[val] = strings.Join(names[val], "|") + "|" + strings.Join(emails[val], "|")
if !detector.ExactSignatures {
for _, val := range dict {
sort.Strings(names[val])
sort.Strings(emails[val])
reverseDict[val] = strings.Join(names[val], "|") + "|" + strings.Join(emails[val], "|")
}
} else {
for key, val := range dict {
reverseDict[val] = key
}
}
detector.PeopleDict = dict
detector.ReversedPeopleDict = reverseDict
Expand Down
62 changes: 45 additions & 17 deletions internal/plumbing/identity/identity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,9 @@ func TestIdentityDetectorMeta(t *testing.T) {
assert.Equal(t, len(id.Provides()), 1)
assert.Equal(t, id.Provides()[0], DependencyAuthor)
opts := id.ListConfigurationOptions()
assert.Len(t, opts, 1)
assert.Len(t, opts, 2)
assert.Equal(t, opts[0].Name, ConfigIdentityDetectorPeopleDictPath)
assert.Equal(t, opts[1].Name, ConfigIdentityDetectorExactSignatures)
logger := core.NewLogger()
assert.NoError(t, id.Configure(map[string]interface{}{
core.ConfigLogger: logger,
Expand Down Expand Up @@ -150,6 +151,28 @@ func TestIdentityDetectorConsume(t *testing.T) {
assert.Equal(t, res[DependencyAuthor].(int), AuthorMissing)
}

func TestIdentityDetectorConsumeExact(t *testing.T) {
commit, _ := test.Repository.CommitObject(plumbing.NewHash(
"5c0e755dd85ac74584d9988cc361eccf02ce1a48"))
deps := map[string]interface{}{}
deps[core.DependencyCommit] = commit
id := fixtureIdentityDetector()
id.ExactSignatures = true
id.PeopleDict = map[string]int{
"vadim markovtsev <gmarkhor@gmail.com>": 0,
"vadim markovtsev <vadim@sourced.tech>": 1,
}
res, err := id.Consume(deps)
assert.Nil(t, err)
assert.Equal(t, res[DependencyAuthor].(int), 1)
commit, _ = test.Repository.CommitObject(plumbing.NewHash(
"8a03b5620b1caa72ec9cb847ea88332621e2950a"))
deps[core.DependencyCommit] = commit
res, err = id.Consume(deps)
assert.Nil(t, err)
assert.Equal(t, res[DependencyAuthor].(int), AuthorMissing)
}

func TestIdentityDetectorLoadPeopleDict(t *testing.T) {
id := fixtureIdentityDetector()
err := id.LoadPeopleDict(path.Join("..", "..", "test_data", "identities"))
Expand All @@ -175,22 +198,6 @@ func TestIdentityDetectorLoadPeopleDictWrongPath(t *testing.T) {
assert.NotNil(t, err)
}

/*
// internal compiler error in 1.8
func TestGeneratePeopleDict(t *testing.T) {
id := fixtureIdentityDetector()
commits := make([]*object.Commit, 0)
iter, err := test.Repository.CommitObjects()
for ; err != io.EOF; commit, err := iter.Next() {
if err != nil {
panic(err)
}
commits = append(commits, commit)
}
id.GeneratePeopleDict(commits)
}
*/

func TestIdentityDetectorGeneratePeopleDict(t *testing.T) {
id := fixtureIdentityDetector()
commits := make([]*object.Commit, 0)
Expand Down Expand Up @@ -244,6 +251,27 @@ func TestIdentityDetectorGeneratePeopleDict(t *testing.T) {
assert.NotEqual(t, id.ReversedPeopleDict[len(id.ReversedPeopleDict)-1], AuthorMissingName)
}

func TestIdentityDetectorGeneratePeopleDictExact(t *testing.T) {
id := fixtureIdentityDetector()
id.ExactSignatures = true
commits := make([]*object.Commit, 0)
iter, err := test.Repository.CommitObjects()
commit, err := iter.Next()
for ; err != io.EOF; commit, err = iter.Next() {
if err != nil {
panic(err)
}
commits = append(commits, commit)
}
id.GeneratePeopleDict(commits)
ass := assert.New(t)
ass.Equal(len(id.PeopleDict), len(id.ReversedPeopleDict))
ass.True(len(id.ReversedPeopleDict) >= 24)
ass.Contains(id.PeopleDict, "vadim markovtsev <vadim@sourced.tech>")
ass.Contains(id.PeopleDict, "vadim markovtsev <vadim@athenian.co>")
ass.NotEqual(id.ReversedPeopleDict[len(id.ReversedPeopleDict)-1], AuthorMissingName)
}

func TestIdentityDetectorLoadPeopleDictInvalidPath(t *testing.T) {
id := fixtureIdentityDetector()
ipath := "/xxxyyyzzzInvalidPath!hehe"
Expand Down
2 changes: 1 addition & 1 deletion python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
description="Python companion for github.com/src-d/hercules to visualize the results.",
long_description=long_description,
long_description_content_type="text/markdown",
version="10.7.0",
version="10.7.1",
license="Apache-2.0",
author="source{d}",
author_email="machine-learning@sourced.tech",
Expand Down

0 comments on commit 9c2eaa7

Please sign in to comment.