Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/v4backend #115

Open
wants to merge 25 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
3c46560
v4 compat changes
consolethinks Oct 1, 2024
f44116d
fix tests for v4 changes
consolethinks Oct 2, 2024
62389a4
add response body check to ChechMetadataValidity
consolethinks Oct 2, 2024
1114b60
change local_api_server address to match changes in scicatlive
consolethinks Oct 4, 2024
f7ca9db
v4 changes for ingestion command
consolethinks Oct 4, 2024
d864d4c
fix tests for v4 changes
consolethinks Oct 4, 2024
9d8c99b
v4 fixes for datasetArchiver
consolethinks Oct 4, 2024
2dfb02c
v4 updates for datasetGetProposal command
consolethinks Oct 4, 2024
7dfa34c
fix createJob test for v4 changes
consolethinks Oct 4, 2024
335a371
BE v4 changes
Oct 7, 2024
3831c94
general code cleanup: better error handling, preallocating some slices
consolethinks Oct 8, 2024
4757845
fix auth test
consolethinks Oct 8, 2024
fc006ee
move transferType.go to cliutils
consolethinks Oct 10, 2024
c3b8c15
createJob update to v4 BE
consolethinks Oct 10, 2024
22c6991
fix bearer token mapping to header
consolethinks Oct 10, 2024
919ea52
fix createJob_test for v4 changes
consolethinks Oct 10, 2024
f2c0564
v4 fixes for getProposal
consolethinks Oct 10, 2024
a5c70ba
cleanup and update datasetRetriever for v4
consolethinks Oct 11, 2024
5bbab98
remove unused parameter
consolethinks Oct 11, 2024
3c832ee
small fixes for datasetRetriever
consolethinks Oct 11, 2024
bbd0f58
fix ownerGroup setting when requesting archival jobs
consolethinks Oct 18, 2024
dcb57b2
write test for transfer type
consolethinks Oct 18, 2024
48b81b5
use /users/{id}/userIdentity endpoint
consolethinks Oct 18, 2024
ca8c254
changes to conform with CI checks
consolethinks Oct 18, 2024
adc4cb1
check auth token insertion
consolethinks Oct 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 20 additions & 5 deletions cmd/commands/datasetRetriever.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,10 @@ For further help see "` + MANUAL + `"`,
Args: exactArgsWithVersionException(1),
Run: func(cmd *cobra.Command, args []string) {
//consts & vars
const PROD_API_SERVER string = "https://dacat.psi.ch/api/v3"
const TEST_API_SERVER string = "https://dacat-qa.psi.ch/api/v3"
const DEV_API_SERVER string = "https://dacat-development.psi.ch/api/v3"

const PROD_RSYNC_RETRIEVE_SERVER string = "pb-retrieve.psi.ch"
const TEST_RSYNC_RETRIEVE_SERVER string = "pbt-retrieve.psi.ch"
const DEV_RSYNC_RETRIEVE_SERVER string = "arematest2in.psi.ch"
const LOCAL_RSYNC_RETRIEVE_SERVER string = "localhost"

// const PROD_RSYNC_RETRIEVE_SERVER string = "ebarema4in.psi.ch"
// const TEST_RSYNC_RETRIEVE_SERVER string = "ebaremat1in.psi.ch"
Expand Down Expand Up @@ -116,6 +113,7 @@ For further help see "` + MANUAL + `"`,
ownerGroup, _ := cmd.Flags().GetString("ownergroup")
testenvFlag, _ := cmd.Flags().GetBool("testenv")
devenvFlag, _ := cmd.Flags().GetBool("devenv")
localenvFlag, _ := cmd.Flags().GetBool("localenv")
showVersion, _ := cmd.Flags().GetBool("version")

if datasetUtils.TestFlags != nil {
Expand All @@ -141,6 +139,10 @@ For further help see "` + MANUAL + `"`,

datasetUtils.CheckForNewVersion(client, APP, VERSION)

if localenvFlag {
APIServer = LOCAL_API_SERVER
RSYNCServer = LOCAL_RSYNC_RETRIEVE_SERVER
}
if devenvFlag {
APIServer = DEV_API_SERVER
RSYNCServer = DEV_RSYNC_RETRIEVE_SERVER
Expand Down Expand Up @@ -186,10 +188,22 @@ For further help see "` + MANUAL + `"`,
}

// get sourceFolder and other dataset related info for all Datasets
datasetDetails, err := datasetUtils.GetDatasetDetails(client, APIServer, user["accessToken"], datasetList, ownerGroup)
datasetDetails, missingDatasetIds, err := datasetUtils.GetDatasetDetails(client, APIServer, user["accessToken"], datasetList, ownerGroup)
if err != nil {
log.Fatal(err)
}
fmt.Printf("\nFound datasets:\n")
fmt.Println("Dataset ID Size[MB] Owner SourceFolder")
fmt.Println("====================================================================================================")
for _, datasetDetail := range datasetDetails {
log.Printf("%s %9d %v %v\n", datasetId, datasetDetail.Size/1024./1024., datasetDetail.OwnerGroup, datasetDetail.SourceFolder)
}
if len(missingDatasetIds) > 0 {
fmt.Printf("\nThe following dataset id's were missing or had non-matching ownerGroups so they won't be copied: \n")
minottic marked this conversation as resolved.
Show resolved Hide resolved
for _, id := range missingDatasetIds {
fmt.Printf(" - \"%s\"\n", id)
}
}

// assemble rsync commands to be submitted
batchCommands, destinationFolders := assembleRsyncCommands(user["username"], datasetDetails, destinationPath)
Expand Down Expand Up @@ -218,6 +232,7 @@ func init() {
datasetRetrieverCmd.Flags().String("ownergroup", "", "Defines to fetch only datasets of the specified ownerGroup (default is to fetch all available datasets)")
datasetRetrieverCmd.Flags().Bool("testenv", false, "Use test environment (qa) (default is to use production system)")
datasetRetrieverCmd.Flags().Bool("devenv", false, "Use development environment (default is to use production system)")
datasetRetrieverCmd.Flags().Bool("localenv", false, "Use local environment instead of production environment (developers only)")

datasetRetrieverCmd.MarkFlagsMutuallyExclusive("testenv", "devenv")
}
11 changes: 6 additions & 5 deletions datasetUtils/getAvailableDatasets.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@ package datasetUtils
import (
"fmt"
"os/exec"
"regexp"
"strings"

version "github.com/mcuadros/go-version"
"regexp"
)

const DatasetIdPrefix = "20.500.11935"
Expand Down Expand Up @@ -50,13 +51,13 @@ func fetchDatasetsFromServer(username string, RSYNCServer string) ([]string, err
if err != nil {
return nil, fmt.Errorf("error getting rsync version: %w", err)
}

cmd := buildRsyncCommand(username, RSYNCServer, versionNumber)
out, err := cmd.Output()
if err != nil {
return nil, err
}

return parseRsyncOutput(out), nil
}

Expand Down Expand Up @@ -87,14 +88,14 @@ var getRsyncVersion = func() (string, error) {
return "", err
}
version := string(output)

// Use a regular expression to find the version number
re := regexp.MustCompile(`\d+\.\d+\.\d+`)
versionNumber := re.FindString(version)
if versionNumber == "" {
return "", fmt.Errorf("could not find version number in rsync version string: %s", version)
}

return versionNumber, nil
}

Expand Down
39 changes: 14 additions & 25 deletions datasetUtils/getDatasetDetails.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,10 @@ import (
"encoding/json"
"fmt"
"io"
"log"
"net/http"
"net/url"
"slices"
"strings"

"github.com/fatih/color"
)

type Dataset struct {
Expand All @@ -35,10 +33,9 @@ The function sends HTTP GET requests to the API server in chunks of 100 datasets
Returns:
- A slice of Dataset structs containing the details of the datasets that match the owner group filter.
*/
func GetDatasetDetails(client *http.Client, APIServer string, accessToken string, datasetList []string, ownerGroup string) ([]Dataset, error) {
outputDatasetDetails := make([]Dataset, 0)
log.Println("Dataset ID Size[MB] Owner SourceFolder")
log.Println("====================================================================================================")
func GetDatasetDetails(client *http.Client, APIServer string, accessToken string, datasetList []string, ownerGroup string) ([]Dataset, []string, error) {
var returnedDatasets []Dataset
var missingDatasetIds []string

// split large request into chunks
chunkSize := 100
Expand All @@ -58,31 +55,23 @@ func GetDatasetDetails(client *http.Client, APIServer string, accessToken string

datasetDetails, err := fetchDatasetDetails(client, accessToken, myurl)
if err != nil {
return nil, err
return nil, nil, err
}

for _, datasetId := range datasetList[i:end] {
detailsFound := false
for _, datasetDetail := range datasetDetails {
if datasetDetail.Pid == datasetId {
detailsFound = true
if ownerGroup == "" || ownerGroup == datasetDetail.OwnerGroup {
outputDatasetDetails = append(outputDatasetDetails, datasetDetail)
color.Set(color.FgGreen)
}
log.Printf("%s %9d %v %v\n", datasetId, datasetDetail.Size/1024./1024., datasetDetail.OwnerGroup, datasetDetail.SourceFolder)
color.Unset()
break
}
datasetHasIdAndOwnerGroup := func(dataset Dataset) bool {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do I understand right that this, in addition to the filter here, checks if the ownerGroup is empty or it's equal to dataset.ownerGroup? If so, couldn't one achieve this by modifying the filter? Then (and also if not changing the filter), to find the missing datasets, one could transform datasetDetails into a map and loop over datasetList[i:end]. The ones which are not in the map are the ones missing. This will be O(n) (map creation) + O(m) (datasetList[i:end] loop), while the current implementation is O(~n (indexFunction) *m (loop)) (given the linear search, if I am not mistaken)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's also a good idea to add a test for this

Copy link
Collaborator Author

@consolethinks consolethinks Oct 18, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I actually wanted to optimize it but I ended up just simplifying the original code. I'll try to apply your suggestion. (TODO)

return dataset.Pid == datasetId && (ownerGroup == "" || dataset.OwnerGroup == ownerGroup)
}
if !detailsFound {
color.Set(color.FgRed)
log.Printf("Dataset %s no infos found in catalog - will not be copied !\n", datasetId)
color.Unset()

i := slices.IndexFunc(datasetDetails, datasetHasIdAndOwnerGroup) // linear search!
if i >= 0 {
returnedDatasets = append(returnedDatasets, datasetDetails[i]) // found id
} else {
missingDatasetIds = append(missingDatasetIds, datasetId) // id missing
}
}
}
return outputDatasetDetails, nil
return returnedDatasets, missingDatasetIds, nil
}

func fetchDatasetDetails(client *http.Client, token string, url string) ([]Dataset, error) {
Expand Down
42 changes: 21 additions & 21 deletions datasetUtils/getDatasetDetails_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,19 @@ func TestGetDatasetDetails_EmptyList(t *testing.T) {
}))
// Close the server when test finishes
defer server.Close()

// Use the mock server's URL as the API
APIServer := server.URL
accessToken := "testToken"
datasetList := []string{}
ownerGroup := "group1"

// Create a new HTTP client
client := &http.Client{}

// Call the function to be tested
datasets, _ := GetDatasetDetails(client, APIServer, accessToken, datasetList, ownerGroup)
datasets, _, _ := GetDatasetDetails(client, APIServer, accessToken, datasetList, ownerGroup)

// Check the result
if len(datasets) != 0 {
t.Errorf("Expected 0 datasets, got %d", len(datasets))
Expand All @@ -41,19 +41,19 @@ func TestGetDatasetDetails_Non200StatusCode(t *testing.T) {
}))
// Close the server when test finishes
defer server.Close()

// Use the mock server's URL as the API
APIServer := server.URL
accessToken := "testToken"
datasetList := []string{"123"}
ownerGroup := "group1"

// Create a new HTTP client
client := &http.Client{}

// Call the function to be tested
datasets, _ := GetDatasetDetails(client, APIServer, accessToken, datasetList, ownerGroup)
datasets, _, _ := GetDatasetDetails(client, APIServer, accessToken, datasetList, ownerGroup)

// Check the result
if len(datasets) != 0 {
t.Errorf("Expected 0 datasets, got %d", len(datasets))
Expand All @@ -68,19 +68,19 @@ func TestGetDatasetDetails_DatasetNotFound(t *testing.T) {
}))
// Close the server when test finishes
defer server.Close()

// Use the mock server's URL as the API
APIServer := server.URL
accessToken := "testToken"
datasetList := []string{"123"}
ownerGroup := "group1"

// Create a new HTTP client
client := &http.Client{}

// Call the function to be tested
datasets, _ := GetDatasetDetails(client, APIServer, accessToken, datasetList, ownerGroup)
datasets, _, _ := GetDatasetDetails(client, APIServer, accessToken, datasetList, ownerGroup)

// Check the result
if len(datasets) != 0 {
t.Errorf("Expected 0 datasets, got %d", len(datasets))
Expand All @@ -95,19 +95,19 @@ func TestGetDatasetDetails_DatasetFound(t *testing.T) {
}))
// Close the server when test finishes
defer server.Close()

// Use the mock server's URL as the API
APIServer := server.URL
accessToken := "testToken"
datasetList := []string{"123"}
ownerGroup := "group1"

// Create a new HTTP client
client := &http.Client{}

// Call the function to be tested
datasets, _ := GetDatasetDetails(client, APIServer, accessToken, datasetList, ownerGroup)
datasets, _, _ := GetDatasetDetails(client, APIServer, accessToken, datasetList, ownerGroup)

// Check the result
if len(datasets) != 1 {
t.Errorf("Expected 1 dataset, got %d", len(datasets))
Expand All @@ -117,4 +117,4 @@ func TestGetDatasetDetails_DatasetFound(t *testing.T) {
t.Errorf("Dataset details do not match expected values")
}
}
}
}