From 1b86739f3bd767c69635b1ca16685dfe26def047 Mon Sep 17 00:00:00 2001 From: Vadim Markovtsev Date: Tue, 14 Jan 2020 21:19:08 +0100 Subject: [PATCH] Add --renames-timeout Signed-off-by: Vadim Markovtsev --- internal/plumbing/renames.go | 34 ++++++++++++++++++++++++++++--- internal/plumbing/renames_test.go | 14 ++++++++++++- python/setup.py | 2 +- 3 files changed, 45 insertions(+), 5 deletions(-) diff --git a/internal/plumbing/renames.go b/internal/plumbing/renames.go index 9d513adc..42059609 100644 --- a/internal/plumbing/renames.go +++ b/internal/plumbing/renames.go @@ -1,6 +1,7 @@ package plumbing import ( + "fmt" "path/filepath" "sort" "strings" @@ -28,6 +29,9 @@ type RenameAnalysis struct { // set it to the default value of 80 (80%). SimilarityThreshold int + // Timeout is the maximum time allowed to spend computing renames in a single commit. + Timeout time.Duration + repository *git.Repository l core.Logger @@ -39,10 +43,18 @@ const ( // CGit's default is 50%. Ours is 80% because 50% can be too computationally expensive. RenameAnalysisDefaultThreshold = 80 + // RenameAnalysisDefaultTimeout is the default value of RenameAnalysis.Timeout (in milliseconds). + RenameAnalysisDefaultTimeout = 60000 + // ConfigRenameAnalysisSimilarityThreshold is the name of the configuration option // (RenameAnalysis.Configure()) which sets the similarity threshold. ConfigRenameAnalysisSimilarityThreshold = "RenameAnalysis.SimilarityThreshold" + // ConfigRenameAnalysisTimeout is the name of the configuration option + // (RenameAnalysis.Configure()) which sets the maximum time allowed to spend + // computing renames in a single commit. + ConfigRenameAnalysisTimeout = "RenameAnalysis.Timeout" + // RenameAnalysisMinimumSize is the minimum size of a blob to be considered. RenameAnalysisMinimumSize = 32 @@ -84,7 +96,13 @@ func (ra *RenameAnalysis) ListConfigurationOptions() []core.ConfigurationOption Description: "The threshold on the similarity index used to detect renames.", Flag: "M", Type: core.IntConfigurationOption, - Default: RenameAnalysisDefaultThreshold}, + Default: RenameAnalysisDefaultThreshold}, { + Name: ConfigRenameAnalysisTimeout, + Description: "The maximum time (milliseconds) allowed to spend computing " + + "renames in a single commit. 0 sets the default.", + Flag: "renames-timeout", + Type: core.IntConfigurationOption, + Default: RenameAnalysisDefaultTimeout}, } return options[:] } @@ -97,6 +115,12 @@ func (ra *RenameAnalysis) Configure(facts map[string]interface{}) error { if val, exists := facts[ConfigRenameAnalysisSimilarityThreshold].(int); exists { ra.SimilarityThreshold = val } + if val, exists := facts[ConfigRenameAnalysisTimeout].(int); exists { + if val < 0 { + return fmt.Errorf("negative renames detection timeout is not allowed: %d", val) + } + ra.Timeout = time.Duration(val) * time.Millisecond + } return nil } @@ -109,6 +133,9 @@ func (ra *RenameAnalysis) Initialize(repository *git.Repository) error { RenameAnalysisDefaultThreshold) ra.SimilarityThreshold = RenameAnalysisDefaultThreshold } + if ra.Timeout == 0 { + ra.Timeout = time.Duration(RenameAnalysisDefaultTimeout) * time.Millisecond + } ra.repository = repository return nil } @@ -119,6 +146,7 @@ func (ra *RenameAnalysis) Initialize(repository *git.Repository) error { // This function returns the mapping with analysis results. The keys must be the same as // in Provides(). If there was an error, nil is returned. func (ra *RenameAnalysis) Consume(deps map[string]interface{}) (map[string]interface{}, error) { + beginTime := time.Now() changes := deps[DependencyTreeChanges].(object.Changes) cache := deps[DependencyBlobCache].(map[plumbing.Hash]*CachedBlob) @@ -225,7 +253,7 @@ func (ra *RenameAnalysis) Consume(deps map[string]interface{}) (map[string]inter }() aStart := 0 // we will try to find a matching added blob for each deleted blob - for d := 0; d < deletedBlobsA.Len(); d++ { + for d := 0; d < deletedBlobsA.Len() && time.Now().Sub(beginTime) < ra.Timeout; d++ { myBlob := cache[deletedBlobsA[d].change.From.TreeEntry.Hash] mySize := deletedBlobsA[d].size myName := filepath.Base(deletedBlobsA[d].change.From.Name) @@ -283,7 +311,7 @@ func (ra *RenameAnalysis) Consume(deps map[string]interface{}) (map[string]inter wg.Done() }() dStart := 0 - for a := 0; a < addedBlobsB.Len(); a++ { + for a := 0; a < addedBlobsB.Len() && time.Now().Sub(beginTime) < ra.Timeout; a++ { myBlob := cache[addedBlobsB[a].change.To.TreeEntry.Hash] mySize := addedBlobsB[a].size myName := filepath.Base(addedBlobsB[a].change.To.Name) diff --git a/internal/plumbing/renames_test.go b/internal/plumbing/renames_test.go index 57718a4e..43d4bac7 100644 --- a/internal/plumbing/renames_test.go +++ b/internal/plumbing/renames_test.go @@ -8,6 +8,7 @@ import ( "path" "sync" "testing" + "time" "github.com/stretchr/testify/assert" "gopkg.in/src-d/go-git.v4/plumbing" @@ -31,14 +32,17 @@ func TestRenameAnalysisMeta(t *testing.T) { assert.Equal(t, ra.Requires()[0], DependencyBlobCache) assert.Equal(t, ra.Requires()[1], DependencyTreeChanges) opts := ra.ListConfigurationOptions() - assert.Len(t, opts, 1) + assert.Len(t, opts, 2) assert.Equal(t, opts[0].Name, ConfigRenameAnalysisSimilarityThreshold) + assert.Equal(t, opts[1].Name, ConfigRenameAnalysisTimeout) ra.SimilarityThreshold = 0 assert.NoError(t, ra.Configure(map[string]interface{}{ ConfigRenameAnalysisSimilarityThreshold: 70, + ConfigRenameAnalysisTimeout: 1000, })) assert.Equal(t, ra.SimilarityThreshold, 70) + assert.Equal(t, ra.Timeout, time.Second) logger := core.NewLogger() assert.NoError(t, ra.Configure(map[string]interface{}{ @@ -46,6 +50,7 @@ func TestRenameAnalysisMeta(t *testing.T) { })) assert.Equal(t, logger, ra.l) assert.Equal(t, ra.SimilarityThreshold, 70) + assert.Equal(t, ra.Timeout, time.Second) } func TestRenameAnalysisRegistration(t *testing.T) { @@ -138,6 +143,13 @@ func TestRenameAnalysisConsume(t *testing.T) { assert.Nil(t, err) renamed = res[DependencyTreeChanges].(object.Changes) assert.Equal(t, len(renamed), 3) + + ra.SimilarityThreshold = 37 + ra.Timeout = time.Microsecond + res, err = ra.Consume(deps) + assert.Nil(t, err) + renamed = res[DependencyTreeChanges].(object.Changes) + assert.Equal(t, len(renamed), 3) } func TestSortableChanges(t *testing.T) { diff --git a/python/setup.py b/python/setup.py index 67a58064..bb58dc6d 100644 --- a/python/setup.py +++ b/python/setup.py @@ -22,7 +22,7 @@ description="Python companion for github.com/src-d/hercules to visualize the results.", long_description=long_description, long_description_content_type="text/markdown", - version="10.7.1", + version="10.7.2", license="Apache-2.0", author="source{d}", author_email="machine-learning@sourced.tech",