From 39ac96680979d6e0907115462bab242feff6a5ff Mon Sep 17 00:00:00 2001 From: "Ali R. Vahdati" Date: Mon, 13 May 2024 11:49:18 +0200 Subject: [PATCH] Refactor `GetDatasetDetailsPublished` and add more tests --- cmd/datasetPublishData/main.go | 3 + cmd/datasetPublishDataRetrieve/main.go | 7 +- datasetUtils/getDatasetDetailsPublished.go | 103 ++++++++++-------- .../getDatasetDetailsPublished_test.go | 84 ++++++++++++++ 4 files changed, 148 insertions(+), 49 deletions(-) diff --git a/cmd/datasetPublishData/main.go b/cmd/datasetPublishData/main.go index b06b1b7..0d6dcf0 100644 --- a/cmd/datasetPublishData/main.go +++ b/cmd/datasetPublishData/main.go @@ -141,6 +141,9 @@ func main() { // get sourceFolder and other dataset related info for all Datasets datasetDetails, urls := datasetUtils.GetDatasetDetailsPublished(client, APIServer, datasetList) + if datasetDetails == nil && urls == nil { + fmt.Println("No dataset details were retrieved.") + } // assemble rsync commands to be submitted batchCommands := assembleRsyncCommands(datasetDetails) diff --git a/cmd/datasetPublishDataRetrieve/main.go b/cmd/datasetPublishDataRetrieve/main.go index 49afdfa..09caac1 100644 --- a/cmd/datasetPublishDataRetrieve/main.go +++ b/cmd/datasetPublishDataRetrieve/main.go @@ -108,8 +108,11 @@ func main() { datasetList, _, _ := datasetUtils.GetDatasetsOfPublication(client, APIServer, *publishedDataId) // get sourceFolder and other dataset related info for all Datasets and print them - datasetUtils.GetDatasetDetailsPublished(client, APIServer, datasetList) - + datasetDetails, urls := datasetUtils.GetDatasetDetailsPublished(client, APIServer, datasetList) + if datasetDetails == nil && urls == nil { + fmt.Println("No dataset details were retrieved.") + } + if !*retrieveFlag { color.Set(color.FgRed) log.Printf("\n\nNote: you run in 'dry' mode to simply check what would happen.") diff --git a/datasetUtils/getDatasetDetailsPublished.go b/datasetUtils/getDatasetDetailsPublished.go index 5367612..62ecc44 100644 --- a/datasetUtils/getDatasetDetailsPublished.go +++ b/datasetUtils/getDatasetDetailsPublished.go @@ -29,11 +29,6 @@ The function returns two lists: - A list of URLs for the datasets. */ func GetDatasetDetailsPublished(client *http.Client, APIServer string, datasetList []string) ([]Dataset, []string) { - outputDatasetDetails := make([]Dataset, 0) - urls := make([]string, 0) - sizeArray := make([]int, 0) - numFilesArray := make([]int, 0) - log.Println("Dataset ID Size[MB] Owner SourceFolder") log.Println("====================================================================================================") @@ -44,58 +39,72 @@ func GetDatasetDetailsPublished(client *http.Client, APIServer string, datasetLi if end > len(datasetList) { end = len(datasetList) } - - var filter = `{"where":{"pid":{"inq":["` + - strings.Join(datasetList[i:end], `","`) + - `"]}},"fields":{"pid":true,"sourceFolder":true,"size":true,"ownerGroup":true,"numberOfFiles":true}}` - - v := url.Values{} - v.Set("filter", filter) - v.Add("isPublished", "true") - - var myurl = APIServer + "/Datasets?" + v.Encode() - // log.Println("Url:", myurl) - - resp, err := client.Get(myurl) + + filter := createFilter(datasetList[i:end]) + + resp, err := makeRequest(client, APIServer, filter) if err != nil { log.Fatal("Get dataset details failed:", err) } defer resp.Body.Close() - + if resp.StatusCode == 200 { - body, _ := io.ReadAll(resp.Body) + outputDatasetDetails, urls := processDatasetDetails(resp, datasetList[i:end]) + return outputDatasetDetails, urls + } else { + log.Printf("Querying dataset details failed with status code %v\n", resp.StatusCode) + } + } + + return nil, nil +} - datasetDetails := make([]Dataset, 0) +func createFilter(datasetList []string) string { + return `{"where":{"pid":{"inq":["` + + strings.Join(datasetList, `","`) + + `"]}},"fields":{"pid":true,"sourceFolder":true,"size":true,"ownerGroup":true,"numberOfFiles":true}}` +} - _ = json.Unmarshal(body, &datasetDetails) +func makeRequest(client *http.Client, APIServer string, filter string) (*http.Response, error) { + v := url.Values{} + v.Set("filter", filter) + v.Add("isPublished", "true") + + var myurl = APIServer + "/Datasets?" + v.Encode() + + return client.Get(myurl) +} - // verify if details were actually found for all available Datasets - for _, datasetId := range datasetList[i:end] { - detailsFound := false - for _, datasetDetail := range datasetDetails { - if datasetDetail.Pid == datasetId { - detailsFound = true - outputDatasetDetails = append(outputDatasetDetails, datasetDetail) - color.Set(color.FgGreen) - log.Printf("%s %9d %v %v\n", datasetId, datasetDetail.Size/1024./1024., datasetDetail.OwnerGroup, datasetDetail.SourceFolder) - color.Unset() - //https: //doi2.psi.ch/datasets/das/work/p16/p16628/20181012_lungs/large_volume_360/R2-6/stitching/data_final_volume_fullresolution/ - url := "https://" + PUBLISHServer + "/datasets" + datasetDetail.SourceFolder - urls = append(urls, url) - sizeArray = append(sizeArray, datasetDetail.Size) - numFilesArray = append(numFilesArray, datasetDetail.NumberOfFiles) - break - } - } - if !detailsFound { - color.Set(color.FgRed) - log.Printf("Dataset %s no infos found in catalog - will not be copied !\n", datasetId) - color.Unset() - } +func processDatasetDetails(resp *http.Response, datasetList []string) ([]Dataset, []string) { + outputDatasetDetails := make([]Dataset, 0) + urls := make([]string, 0) + + body, _ := io.ReadAll(resp.Body) + + datasetDetails := make([]Dataset, 0) + + _ = json.Unmarshal(body, &datasetDetails) + + for _, datasetId := range datasetList { + detailsFound := false + for _, datasetDetail := range datasetDetails { + if datasetDetail.Pid == datasetId { + detailsFound = true + outputDatasetDetails = append(outputDatasetDetails, datasetDetail) + color.Set(color.FgGreen) + log.Printf("%s %9d %v %v\n", datasetId, datasetDetail.Size/1024./1024., datasetDetail.OwnerGroup, datasetDetail.SourceFolder) + color.Unset() + url := "https://" + PUBLISHServer + "/datasets" + datasetDetail.SourceFolder + urls = append(urls, url) + break } - } else { - log.Printf("Querying dataset details failed with status code %v\n", resp.StatusCode) + } + if !detailsFound { + color.Set(color.FgRed) + log.Printf("Dataset %s no infos found in catalog - will not be copied !\n", datasetId) + color.Unset() } } + return outputDatasetDetails, urls } diff --git a/datasetUtils/getDatasetDetailsPublished_test.go b/datasetUtils/getDatasetDetailsPublished_test.go index 38836bc..2352090 100644 --- a/datasetUtils/getDatasetDetailsPublished_test.go +++ b/datasetUtils/getDatasetDetailsPublished_test.go @@ -4,6 +4,8 @@ import ( "net/http" "net/http/httptest" "testing" + "io" + "strings" ) // This test checks that the function correctly parses the response from the server. @@ -29,3 +31,85 @@ func TestGetDatasetDetailsPublished(t *testing.T) { t.Errorf("Unexpected URLs: %v", urls) } } + +func TestGetDatasetDetailsPublished_MissingDatasets(t *testing.T) { + // Create a mock HTTP server that returns a list of datasets that does not include all the requested datasets + server := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) { + rw.Write([]byte(`[{"pid": "1", "sourceFolder": "/folder1", "size": 100, "ownerGroup": "group1", "numberOfFiles": 10}]`)) + })) + defer server.Close() + + // Create a new HTTP client + client := &http.Client{} + + // Call the function with the mock server's URL and a list of dataset IDs + datasets, urls := GetDatasetDetailsPublished(client, server.URL, []string{"1", "2"}) + + // Since the server does not return details for all the requested datasets, the function should log a message for the missing datasets. + // We can't directly test this with the `testing` package + if len(datasets) != 1 || datasets[0].Pid != "1" { + t.Errorf("Unexpected datasets: %v", datasets) + } + if len(urls) != 1 || urls[0] != "https://doi2.psi.ch/datasets/folder1" { + t.Errorf("Unexpected URLs: %v", urls) + } +} + +func TestGetDatasetDetailsPublished_EmptyList(t *testing.T) { + // Create a mock HTTP server that always returns an empty list of datasets + server := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) { + rw.Write([]byte(`[]`)) + })) + defer server.Close() + + // Create a new HTTP client + client := &http.Client{} + + // Call the function with the mock server's URL and a list of dataset IDs + datasets, urls := GetDatasetDetailsPublished(client, server.URL, []string{"1"}) + + // Since the server returns an empty list, the function should return empty lists as well + if len(datasets) != 0 || len(urls) != 0 { + t.Errorf("Expected empty lists, got %v and %v", datasets, urls) + } +} + +func TestCreateFilter(t *testing.T) { + datasetList := []string{"1", "2", "3"} + expected := `{"where":{"pid":{"inq":["1","2","3"]}},"fields":{"pid":true,"sourceFolder":true,"size":true,"ownerGroup":true,"numberOfFiles":true}}` + filter := createFilter(datasetList) + if filter != expected { + t.Errorf("Expected %s, got %s", expected, filter) + } +} + +func TestMakeRequest(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) { + rw.Write([]byte(`OK`)) + })) + defer server.Close() + + client := &http.Client{} + filter := `{"where":{"pid":{"inq":["1"]}},"fields":{"pid":true,"sourceFolder":true,"size":true,"ownerGroup":true,"numberOfFiles":true}}` + resp, err := makeRequest(client, server.URL, filter) + if err != nil { + t.Errorf("Expected no error, got %v", err) + } + if resp.StatusCode != http.StatusOK { + t.Errorf("Expected status code 200, got %v", resp.StatusCode) + } +} + +func TestProcessDatasetDetails(t *testing.T) { + resp := &http.Response{ + Body: io.NopCloser(strings.NewReader(`[{"pid": "1", "sourceFolder": "/folder1", "size": 100, "ownerGroup": "group1", "numberOfFiles": 10}]`)), + } + datasetList := []string{"1"} + datasets, urls := processDatasetDetails(resp, datasetList) + if len(datasets) != 1 || datasets[0].Pid != "1" { + t.Errorf("Unexpected datasets: %v", datasets) + } + if len(urls) != 1 || urls[0] != "https://doi2.psi.ch/datasets/folder1" { + t.Errorf("Unexpected URLs: %v", urls) + } +}