Skip to content

Commit

Permalink
Refactor GetDatasetDetailsPublished and add more tests
Browse files Browse the repository at this point in the history
  • Loading branch information
kavir1698 committed May 13, 2024
1 parent abb143a commit 39ac966
Show file tree
Hide file tree
Showing 4 changed files with 148 additions and 49 deletions.
3 changes: 3 additions & 0 deletions cmd/datasetPublishData/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,9 @@ func main() {

// get sourceFolder and other dataset related info for all Datasets
datasetDetails, urls := datasetUtils.GetDatasetDetailsPublished(client, APIServer, datasetList)
if datasetDetails == nil && urls == nil {
fmt.Println("No dataset details were retrieved.")
}

// assemble rsync commands to be submitted
batchCommands := assembleRsyncCommands(datasetDetails)
Expand Down
7 changes: 5 additions & 2 deletions cmd/datasetPublishDataRetrieve/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,11 @@ func main() {
datasetList, _, _ := datasetUtils.GetDatasetsOfPublication(client, APIServer, *publishedDataId)

// get sourceFolder and other dataset related info for all Datasets and print them
datasetUtils.GetDatasetDetailsPublished(client, APIServer, datasetList)

datasetDetails, urls := datasetUtils.GetDatasetDetailsPublished(client, APIServer, datasetList)
if datasetDetails == nil && urls == nil {
fmt.Println("No dataset details were retrieved.")
}

if !*retrieveFlag {
color.Set(color.FgRed)
log.Printf("\n\nNote: you run in 'dry' mode to simply check what would happen.")
Expand Down
103 changes: 56 additions & 47 deletions datasetUtils/getDatasetDetailsPublished.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,6 @@ The function returns two lists:
- A list of URLs for the datasets.
*/
func GetDatasetDetailsPublished(client *http.Client, APIServer string, datasetList []string) ([]Dataset, []string) {
outputDatasetDetails := make([]Dataset, 0)
urls := make([]string, 0)
sizeArray := make([]int, 0)
numFilesArray := make([]int, 0)

log.Println("Dataset ID Size[MB] Owner SourceFolder")
log.Println("====================================================================================================")

Expand All @@ -44,58 +39,72 @@ func GetDatasetDetailsPublished(client *http.Client, APIServer string, datasetLi
if end > len(datasetList) {
end = len(datasetList)
}

var filter = `{"where":{"pid":{"inq":["` +
strings.Join(datasetList[i:end], `","`) +
`"]}},"fields":{"pid":true,"sourceFolder":true,"size":true,"ownerGroup":true,"numberOfFiles":true}}`

v := url.Values{}
v.Set("filter", filter)
v.Add("isPublished", "true")

var myurl = APIServer + "/Datasets?" + v.Encode()
// log.Println("Url:", myurl)

resp, err := client.Get(myurl)

filter := createFilter(datasetList[i:end])

resp, err := makeRequest(client, APIServer, filter)
if err != nil {
log.Fatal("Get dataset details failed:", err)
}
defer resp.Body.Close()

if resp.StatusCode == 200 {
body, _ := io.ReadAll(resp.Body)
outputDatasetDetails, urls := processDatasetDetails(resp, datasetList[i:end])
return outputDatasetDetails, urls
} else {
log.Printf("Querying dataset details failed with status code %v\n", resp.StatusCode)
}
}

return nil, nil
}

datasetDetails := make([]Dataset, 0)
func createFilter(datasetList []string) string {
return `{"where":{"pid":{"inq":["` +
strings.Join(datasetList, `","`) +
`"]}},"fields":{"pid":true,"sourceFolder":true,"size":true,"ownerGroup":true,"numberOfFiles":true}}`
}

_ = json.Unmarshal(body, &datasetDetails)
func makeRequest(client *http.Client, APIServer string, filter string) (*http.Response, error) {
v := url.Values{}
v.Set("filter", filter)
v.Add("isPublished", "true")

var myurl = APIServer + "/Datasets?" + v.Encode()

return client.Get(myurl)
}

// verify if details were actually found for all available Datasets
for _, datasetId := range datasetList[i:end] {
detailsFound := false
for _, datasetDetail := range datasetDetails {
if datasetDetail.Pid == datasetId {
detailsFound = true
outputDatasetDetails = append(outputDatasetDetails, datasetDetail)
color.Set(color.FgGreen)
log.Printf("%s %9d %v %v\n", datasetId, datasetDetail.Size/1024./1024., datasetDetail.OwnerGroup, datasetDetail.SourceFolder)
color.Unset()
//https: //doi2.psi.ch/datasets/das/work/p16/p16628/20181012_lungs/large_volume_360/R2-6/stitching/data_final_volume_fullresolution/
url := "https://" + PUBLISHServer + "/datasets" + datasetDetail.SourceFolder
urls = append(urls, url)
sizeArray = append(sizeArray, datasetDetail.Size)
numFilesArray = append(numFilesArray, datasetDetail.NumberOfFiles)
break
}
}
if !detailsFound {
color.Set(color.FgRed)
log.Printf("Dataset %s no infos found in catalog - will not be copied !\n", datasetId)
color.Unset()
}
func processDatasetDetails(resp *http.Response, datasetList []string) ([]Dataset, []string) {
outputDatasetDetails := make([]Dataset, 0)
urls := make([]string, 0)

body, _ := io.ReadAll(resp.Body)

datasetDetails := make([]Dataset, 0)

_ = json.Unmarshal(body, &datasetDetails)

for _, datasetId := range datasetList {
detailsFound := false
for _, datasetDetail := range datasetDetails {
if datasetDetail.Pid == datasetId {
detailsFound = true
outputDatasetDetails = append(outputDatasetDetails, datasetDetail)
color.Set(color.FgGreen)
log.Printf("%s %9d %v %v\n", datasetId, datasetDetail.Size/1024./1024., datasetDetail.OwnerGroup, datasetDetail.SourceFolder)
color.Unset()
url := "https://" + PUBLISHServer + "/datasets" + datasetDetail.SourceFolder
urls = append(urls, url)
break
}
} else {
log.Printf("Querying dataset details failed with status code %v\n", resp.StatusCode)
}
if !detailsFound {
color.Set(color.FgRed)
log.Printf("Dataset %s no infos found in catalog - will not be copied !\n", datasetId)
color.Unset()
}
}

return outputDatasetDetails, urls
}
84 changes: 84 additions & 0 deletions datasetUtils/getDatasetDetailsPublished_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import (
"net/http"
"net/http/httptest"
"testing"
"io"
"strings"
)

// This test checks that the function correctly parses the response from the server.
Expand All @@ -29,3 +31,85 @@ func TestGetDatasetDetailsPublished(t *testing.T) {
t.Errorf("Unexpected URLs: %v", urls)
}
}

func TestGetDatasetDetailsPublished_MissingDatasets(t *testing.T) {
// Create a mock HTTP server that returns a list of datasets that does not include all the requested datasets
server := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) {
rw.Write([]byte(`[{"pid": "1", "sourceFolder": "/folder1", "size": 100, "ownerGroup": "group1", "numberOfFiles": 10}]`))
}))
defer server.Close()

// Create a new HTTP client
client := &http.Client{}

// Call the function with the mock server's URL and a list of dataset IDs
datasets, urls := GetDatasetDetailsPublished(client, server.URL, []string{"1", "2"})

// Since the server does not return details for all the requested datasets, the function should log a message for the missing datasets.
// We can't directly test this with the `testing` package
if len(datasets) != 1 || datasets[0].Pid != "1" {
t.Errorf("Unexpected datasets: %v", datasets)
}
if len(urls) != 1 || urls[0] != "https://doi2.psi.ch/datasets/folder1" {
t.Errorf("Unexpected URLs: %v", urls)
}
}

func TestGetDatasetDetailsPublished_EmptyList(t *testing.T) {
// Create a mock HTTP server that always returns an empty list of datasets
server := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) {
rw.Write([]byte(`[]`))
}))
defer server.Close()

// Create a new HTTP client
client := &http.Client{}

// Call the function with the mock server's URL and a list of dataset IDs
datasets, urls := GetDatasetDetailsPublished(client, server.URL, []string{"1"})

// Since the server returns an empty list, the function should return empty lists as well
if len(datasets) != 0 || len(urls) != 0 {
t.Errorf("Expected empty lists, got %v and %v", datasets, urls)
}
}

func TestCreateFilter(t *testing.T) {
datasetList := []string{"1", "2", "3"}
expected := `{"where":{"pid":{"inq":["1","2","3"]}},"fields":{"pid":true,"sourceFolder":true,"size":true,"ownerGroup":true,"numberOfFiles":true}}`
filter := createFilter(datasetList)
if filter != expected {
t.Errorf("Expected %s, got %s", expected, filter)
}
}

func TestMakeRequest(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) {
rw.Write([]byte(`OK`))
}))
defer server.Close()

client := &http.Client{}
filter := `{"where":{"pid":{"inq":["1"]}},"fields":{"pid":true,"sourceFolder":true,"size":true,"ownerGroup":true,"numberOfFiles":true}}`
resp, err := makeRequest(client, server.URL, filter)
if err != nil {
t.Errorf("Expected no error, got %v", err)
}
if resp.StatusCode != http.StatusOK {
t.Errorf("Expected status code 200, got %v", resp.StatusCode)
}
}

func TestProcessDatasetDetails(t *testing.T) {
resp := &http.Response{
Body: io.NopCloser(strings.NewReader(`[{"pid": "1", "sourceFolder": "/folder1", "size": 100, "ownerGroup": "group1", "numberOfFiles": 10}]`)),
}
datasetList := []string{"1"}
datasets, urls := processDatasetDetails(resp, datasetList)
if len(datasets) != 1 || datasets[0].Pid != "1" {
t.Errorf("Unexpected datasets: %v", datasets)
}
if len(urls) != 1 || urls[0] != "https://doi2.psi.ch/datasets/folder1" {
t.Errorf("Unexpected URLs: %v", urls)
}
}

0 comments on commit 39ac966

Please sign in to comment.