Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clearly defined certifier #2035

Merged
merged 20 commits into from
Jul 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
172 changes: 172 additions & 0 deletions cmd/guaccollect/cmd/license.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
//
// Copyright 2024 The GUAC Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package cmd

import (
"context"
"fmt"
"net/http"
"os"
"time"

"github.com/Khan/genqlient/graphql"
"github.com/guacsec/guac/pkg/certifier"
"github.com/guacsec/guac/pkg/certifier/certify"
"github.com/guacsec/guac/pkg/certifier/clearlydefined"
"github.com/guacsec/guac/pkg/cli"
"github.com/guacsec/guac/pkg/logging"
"github.com/spf13/cobra"
"github.com/spf13/viper"
)

type cdOptions struct {
graphqlEndpoint string
headerFile string
// address for pubsub connection
pubsubAddr string
// address for blob store
blobAddr string
// poll location
poll bool
// interval between certifier running again
interval time.Duration
// enable/disable message publish to queue
publishToQueue bool
// days since the last vulnerability scan was run.
// 0 means only run once
daysSinceLastScan int
// sets artificial latency on the certifier (default to nil)
addedLatency *time.Duration
// sets the batch size for pagination query for the certifier
batchSize int
}

var cdCmd = &cobra.Command{
Use: "cd [flags]",
Short: "runs the clearly defined certifier",
Long: `
guaccollect cd runs the clearly defined certifier queries clearly defined for the package and source license information
that are collected in guac. Ingestion to GUAC happens via an event stream (NATS)
to allow for decoupling of the collectors from the ingestion into GUAC.

Each collector collects the "document" and stores it in the blob store for further
evaluation. The collector creates a CDEvent (https://cdevents.dev/) that is published via
the event stream. The downstream guacingest subscribes to the stream and retrieves the "document" from the blob store for
processing and ingestion.

Various blob stores can be used (such as S3, Azure Blob, Google Cloud Bucket) as documented here: https://gocloud.dev/howto/blob/
For example: "s3://my-bucket?region=us-west-1"

Specific authentication method vary per cloud provider. Please follow the documentation per implementation to ensure
you have access to read and write to the respective blob store.`,
Run: func(cmd *cobra.Command, args []string) {
opts, err := validateCDFlags(
viper.GetString("gql-addr"),
viper.GetString("header-file"),
viper.GetString("pubsub-addr"),
viper.GetString("blob-addr"),
viper.GetString("interval"),
viper.GetBool("service-poll"),
viper.GetBool("publish-to-queue"),
viper.GetInt("last-scan"),
viper.GetString("certifier-latency"),
viper.GetInt("certifier-batch-size"),
)
if err != nil {
fmt.Printf("unable to validate flags: %v\n", err)
_ = cmd.Help()
os.Exit(1)
}

ctx := logging.WithLogger(context.Background())
logger := logging.FromContext(ctx)

if err := certify.RegisterCertifier(clearlydefined.NewClearlyDefinedCertifier, certifier.CertifierClearlyDefined); err != nil {
logger.Fatalf("unable to register certifier: %v", err)
}

transport := cli.HTTPHeaderTransport(ctx, opts.headerFile, http.DefaultTransport)
httpClient := http.Client{Transport: transport}
gqlclient := graphql.NewClient(opts.graphqlEndpoint, &httpClient)

packageQueryFunc, err := getPackageQuery(gqlclient, opts.daysSinceLastScan, opts.batchSize, opts.addedLatency)
if err != nil {
logger.Errorf("error: %v", err)
os.Exit(1)
}

initializeNATsandCertifier(ctx, opts.blobAddr, opts.pubsubAddr, opts.poll, opts.publishToQueue, opts.interval, packageQueryFunc())
},
}

func validateCDFlags(
graphqlEndpoint,
headerFile,
pubsubAddr,
blobAddr,
interval string,
poll bool,
pubToQueue bool,
daysSince int,
certifierLatencyStr string,
batchSize int) (cdOptions, error) {

var opts cdOptions

opts.graphqlEndpoint = graphqlEndpoint
opts.headerFile = headerFile
opts.pubsubAddr = pubsubAddr
opts.blobAddr = blobAddr
opts.poll = poll
opts.publishToQueue = pubToQueue

i, err := time.ParseDuration(interval)
if err != nil {
return opts, fmt.Errorf("failed to parser duration with error: %w", err)
}
opts.interval = i
opts.daysSinceLastScan = daysSince

if certifierLatencyStr != "" {
addedLatency, err := time.ParseDuration(certifierLatencyStr)
if err != nil {
return opts, fmt.Errorf("failed to parser duration with error: %w", err)
}
opts.addedLatency = &addedLatency
} else {
opts.addedLatency = nil
}

opts.batchSize = batchSize

return opts, nil
}

func init() {
set, err := cli.BuildFlags([]string{"interval",
"last-scan", "header-file", "certifier-latency",
"certifier-batch-size"})
if err != nil {
fmt.Fprintf(os.Stderr, "failed to setup flag: %v", err)
os.Exit(1)
}
cdCmd.PersistentFlags().AddFlagSet(set)
if err := viper.BindPFlags(cdCmd.PersistentFlags()); err != nil {
fmt.Fprintf(os.Stderr, "failed to bind flags: %v", err)
os.Exit(1)
}
rootCmd.AddCommand(cdCmd)
}
4 changes: 2 additions & 2 deletions cmd/guaccollect/cmd/osv.go
Original file line number Diff line number Diff line change
Expand Up @@ -222,10 +222,10 @@ func initializeNATsandCertifier(ctx context.Context, blobAddr, pubsubAddr string
// Collect
errHandler := func(err error) bool {
if err == nil {
logger.Info("osv certifier ended gracefully")
logger.Info("certifier ended gracefully")
return true
}
logger.Errorf("osv certifier ended with error: %v", err)
logger.Errorf("certifier ended with error: %v", err)
// Continue to emit any documents still in the docChan
return true
}
Expand Down
20 changes: 12 additions & 8 deletions cmd/guacingest/cmd/ingest.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,13 @@ import (
)

type options struct {
pubsubAddr string
blobAddr string
csubClientOptions csub_client.CsubClientOptions
graphqlEndpoint string
headerFile string
queryVulnOnIngestion bool
pubsubAddr string
blobAddr string
csubClientOptions csub_client.CsubClientOptions
graphqlEndpoint string
headerFile string
queryVulnOnIngestion bool
queryLicenseOnIngestion bool
}

func ingest(cmd *cobra.Command, args []string) {
Expand All @@ -58,6 +59,7 @@ func ingest(cmd *cobra.Command, args []string) {
viper.GetBool("csub-tls"),
viper.GetBool("csub-tls-skip-verify"),
viper.GetBool("add-vuln-on-ingest"),
viper.GetBool("add-license-on-ingest"),
args)
if err != nil {
fmt.Printf("unable to validate flags: %v\n", err)
Expand Down Expand Up @@ -97,7 +99,7 @@ func ingest(cmd *cobra.Command, args []string) {
defer csubClient.Close()

emit := func(d *processor.Document) error {
if err := ingestor.Ingest(ctx, d, opts.graphqlEndpoint, transport, csubClient, opts.queryVulnOnIngestion); err != nil {
if err := ingestor.Ingest(ctx, d, opts.graphqlEndpoint, transport, csubClient, opts.queryVulnOnIngestion, opts.queryLicenseOnIngestion); err != nil {
var urlErr *url.Error
if errors.As(err, &urlErr) {
return fmt.Errorf("unable to ingest document due to connection error with graphQL %q : %w", d.SourceInformation.Source, urlErr)
Expand Down Expand Up @@ -127,7 +129,8 @@ func ingest(cmd *cobra.Command, args []string) {
wg.Wait()
}

func validateFlags(pubsubAddr, blobAddr, csubAddr, graphqlEndpoint, headerFile string, csubTls, csubTlsSkipVerify bool, queryVulnIngestion bool, args []string) (options, error) {
func validateFlags(pubsubAddr, blobAddr, csubAddr, graphqlEndpoint, headerFile string, csubTls, csubTlsSkipVerify bool,
queryVulnIngestion bool, queryLicenseIngestion bool, args []string) (options, error) {
var opts options
opts.pubsubAddr = pubsubAddr
opts.blobAddr = blobAddr
Expand All @@ -139,6 +142,7 @@ func validateFlags(pubsubAddr, blobAddr, csubAddr, graphqlEndpoint, headerFile s
opts.graphqlEndpoint = graphqlEndpoint
opts.headerFile = headerFile
opts.queryVulnOnIngestion = queryVulnIngestion
opts.queryLicenseOnIngestion = queryLicenseIngestion

return opts, nil
}
2 changes: 1 addition & 1 deletion cmd/guacingest/cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ func init() {
cobra.OnInitialize(cli.InitConfig)

set, err := cli.BuildFlags([]string{"pubsub-addr", "blob-addr", "csub-addr", "gql-addr",
"header-file", "add-vuln-on-ingest"})
"header-file", "add-vuln-on-ingest", "add-license-on-ingest"})
if err != nil {
fmt.Fprintf(os.Stderr, "failed to setup flag: %v", err)
os.Exit(1)
Expand Down
20 changes: 11 additions & 9 deletions cmd/guacone/cmd/deps_dev.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,10 @@ type depsDevOptions struct {
// query for dependencies
retrieveDependencies bool
// gql endpoint
graphqlEndpoint string
headerFile string
queryVulnOnIngestion bool
graphqlEndpoint string
headerFile string
queryVulnOnIngestion bool
queryLicenseOnIngestion bool
// sets artificial latency on the deps.dev collector (default to nil)
addedLatency *time.Duration
}
Expand Down Expand Up @@ -86,7 +87,7 @@ var depsDevCmd = &cobra.Command{
emit := func(d *processor.Document) error {
totalNum += 1

if err := ingestor.Ingest(ctx, d, opts.graphqlEndpoint, transport, csc, opts.queryVulnOnIngestion); err != nil {
if err := ingestor.Ingest(ctx, d, opts.graphqlEndpoint, transport, csc, opts.queryVulnOnIngestion, opts.queryLicenseOnIngestion); err != nil {
gotErr = true
return fmt.Errorf("unable to ingest document: %w", err)
}
Expand Down Expand Up @@ -138,11 +139,12 @@ var depsDevCmd = &cobra.Command{

func validateDepsDevFlags(args []string) (*depsDevOptions, client.Client, error) {
opts := &depsDevOptions{
poll: viper.GetBool("poll"),
retrieveDependencies: viper.GetBool("retrieve-dependencies"),
graphqlEndpoint: viper.GetString("gql-addr"),
headerFile: viper.GetString("header-file"),
queryVulnOnIngestion: viper.GetBool("add-vuln-on-ingest"),
poll: viper.GetBool("poll"),
retrieveDependencies: viper.GetBool("retrieve-dependencies"),
graphqlEndpoint: viper.GetString("gql-addr"),
headerFile: viper.GetString("header-file"),
queryVulnOnIngestion: viper.GetBool("add-vuln-on-ingest"),
queryLicenseOnIngestion: viper.GetBool("add-license-on-ingest"),
}

addedLatencyStr := viper.GetString("deps-dev-latency")
Expand Down
12 changes: 8 additions & 4 deletions cmd/guacone/cmd/files.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,9 @@ type fileOptions struct {
graphqlEndpoint string
headerFile string
// csub client options for identifier strings
csubClientOptions csub_client.CsubClientOptions
queryVulnOnIngestion bool
csubClientOptions csub_client.CsubClientOptions
queryVulnOnIngestion bool
queryLicenseOnIngestion bool
}

var filesCmd = &cobra.Command{
Expand All @@ -67,6 +68,7 @@ var filesCmd = &cobra.Command{
viper.GetBool("csub-tls"),
viper.GetBool("csub-tls-skip-verify"),
viper.GetBool("add-vuln-on-ingest"),
viper.GetBool("add-license-on-ingest"),
args)
if err != nil {
fmt.Printf("unable to validate flags: %v\n", err)
Expand Down Expand Up @@ -127,7 +129,7 @@ var filesCmd = &cobra.Command{

emit := func(d *processor.Document) error {
totalNum += 1
if err := ingestor.Ingest(ctx, d, opts.graphqlEndpoint, transport, csubClient, opts.queryVulnOnIngestion); err != nil {
if err := ingestor.Ingest(ctx, d, opts.graphqlEndpoint, transport, csubClient, opts.queryVulnOnIngestion, opts.queryLicenseOnIngestion); err != nil {
gotErr = true
filesWithErrors = append(filesWithErrors, d.SourceInformation.Source)
return fmt.Errorf("unable to ingest document: %w", err)
Expand Down Expand Up @@ -159,7 +161,8 @@ var filesCmd = &cobra.Command{
},
}

func validateFilesFlags(keyPath, keyID, graphqlEndpoint, headerFile, csubAddr string, csubTls, csubTlsSkipVerify bool, queryVulnIngestion bool, args []string) (fileOptions, error) {
func validateFilesFlags(keyPath, keyID, graphqlEndpoint, headerFile, csubAddr string, csubTls, csubTlsSkipVerify bool,
queryVulnIngestion bool, queryLicenseIngestion bool, args []string) (fileOptions, error) {
var opts fileOptions
opts.graphqlEndpoint = graphqlEndpoint
opts.headerFile = headerFile
Expand All @@ -186,6 +189,7 @@ func validateFilesFlags(keyPath, keyID, graphqlEndpoint, headerFile, csubAddr st
opts.csubClientOptions = csubOpts
opts.path = args[0]
opts.queryVulnOnIngestion = queryVulnIngestion
opts.queryLicenseOnIngestion = queryLicenseIngestion
return opts, nil
}

Expand Down
19 changes: 11 additions & 8 deletions cmd/guacone/cmd/gcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,12 @@ import (
)

type gcsOptions struct {
graphqlEndpoint string
headerFile string
csubClientOptions csub_client.CsubClientOptions
bucket string
queryVulnOnIngestion bool
graphqlEndpoint string
headerFile string
csubClientOptions csub_client.CsubClientOptions
bucket string
queryVulnOnIngestion bool
queryLicenseOnIngestion bool
}

const gcsCredentialsPathFlag = "gcp-credentials-path"
Expand All @@ -59,6 +60,7 @@ var gcsCmd = &cobra.Command{
viper.GetBool("csub-tls"),
viper.GetBool("csub-tls-skip-verify"),
viper.GetBool("add-vuln-on-ingest"),
viper.GetBool("add-license-on-ingest"),
args)
if err != nil {
fmt.Printf("unable to validate flags: %v\n", err)
Expand Down Expand Up @@ -110,7 +112,7 @@ var gcsCmd = &cobra.Command{

emit := func(d *processor.Document) error {
totalNum += 1
err := ingestor.Ingest(ctx, d, opts.graphqlEndpoint, transport, csubClient, opts.queryVulnOnIngestion)
err := ingestor.Ingest(ctx, d, opts.graphqlEndpoint, transport, csubClient, opts.queryVulnOnIngestion, opts.queryLicenseOnIngestion)

if err != nil {
gotErr = true
Expand Down Expand Up @@ -140,7 +142,8 @@ var gcsCmd = &cobra.Command{
},
}

func validateGCSFlags(gqlEndpoint, headerFile, csubAddr, credentialsPath string, csubTls, csubTlsSkipVerify bool, queryVulnIngestion bool, args []string) (gcsOptions, error) {
func validateGCSFlags(gqlEndpoint, headerFile, csubAddr, credentialsPath string, csubTls, csubTlsSkipVerify bool,
queryVulnIngestion bool, queryLicenseIngestion bool, args []string) (gcsOptions, error) {
var opts gcsOptions
opts.graphqlEndpoint = gqlEndpoint
opts.headerFile = headerFile
Expand All @@ -160,7 +163,7 @@ func validateGCSFlags(gqlEndpoint, headerFile, csubAddr, credentialsPath string,
return opts, fmt.Errorf("expected either --%s flag or GOOGLE_APPLICATION_CREDENTIALS environment variable", gcsCredentialsPathFlag)
}
opts.queryVulnOnIngestion = queryVulnIngestion

opts.queryLicenseOnIngestion = queryLicenseIngestion
return opts, nil
}

Expand Down
Loading
Loading