diff --git a/CHANGELOG.md b/CHANGELOG.md index 88448d0..8ed05ea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] - ReleaseDate +### Added +- [PR#261](https://github.com/EmbarkStudios/cargo-about/pull/261) resolved [#246](https://github.com/EmbarkStudios/cargo-about/issues/246) by adding an `--offline` (as well as `--locked` and `--frozen`) option to the `generate` command. + ## [0.6.4] - 2024-08-12 ### Fixed - [PR#254](https://github.com/EmbarkStudios/cargo-about/pull/254) reverted unintended `id` -> `short_id` field rename. diff --git a/docs/src/cli/generate/README.md b/docs/src/cli/generate/README.md index c59b976..68f76bf 100644 --- a/docs/src/cli/generate/README.md +++ b/docs/src/cli/generate/README.md @@ -16,6 +16,25 @@ Disables the `default` feature for a crate when determining which crates to cons Scan licenses for the entire workspace, not just the active package. +### [`--locked`](https://doc.rust-lang.org/cargo/commands/cargo-fetch.html#option-cargo-fetch---locked) + +Asserts that the exact same dependencies and versions are used as when the existing Cargo.lock file was originally generated. Cargo will exit with an error when either of the following scenarios arises: + +* The lock file is missing. +* Cargo attempted to change the lock file due to a different dependency resolution. + +### [`--offline`](https://doc.rust-lang.org/cargo/commands/cargo-fetch.html#option-cargo-fetch---offline) + +Prevents Cargo and `cargo-about` from accessing the network for any reason. Without this flag, Cargo will stop with an error if it needs to access the network and the network is not available. With this flag, Cargo will attempt to proceed without the network if possible. + +Beware that this may result in different dependency resolution than online mode. Cargo will restrict itself to crates that are downloaded locally, even if there might be a newer version as indicated in the local copy of the index. See the cargo-fetch(1) command to download dependencies before going offline. + +`cargo-about` will also not query clearlydefined.io for license information, meaning that user provided clarifications won't be used, and some ambiguous/complicated license files might be missed by `cargo-about`. Additionally, clarifications that use license files from the crate's source repository will not be applied, meaning that `cargo-about` will fallback to using the default license text rather than the one in the source repository, losing eg. copyright or other unique information. + +### [`--frozen`](https://doc.rust-lang.org/cargo/commands/cargo-fetch.html#option-cargo-fetch---frozen) + +Equivalent to specifying both `--locked` and `--offline`. + ### `--fail` Exits with a non-zero exit code if any crate's license cannot be reasonably determined diff --git a/src/cargo-about/clarify.rs b/src/cargo-about/clarify.rs index 3de8a45..d82d46f 100644 --- a/src/cargo-about/clarify.rs +++ b/src/cargo-about/clarify.rs @@ -61,7 +61,7 @@ pub fn cmd(args: Args) -> anyhow::Result<()> { .with_context(|| format!("unable to read file '{full_path}'"))? } Subcommand::Repo { rev, repo } => { - let gc = GitCache::default(); + let gc = GitCache::online(); gc.retrieve_remote(repo.as_str(), &rev, &args.path) .context("failed to retrieve remote file")? @@ -95,7 +95,7 @@ pub fn cmd(args: Args) -> anyhow::Result<()> { let pkg: MinPkg = toml::from_str(&manifest).context("failed to deserialize Cargo.toml")?; - let gc = GitCache::default(); + let gc = GitCache::online(); let vcs_info = GitCache::parse_vcs_info(&crate_path.join(".cargo_vcs_info.json")) .context("failed to read sha1")?; @@ -216,9 +216,7 @@ pub fn cmd(args: Args) -> anyhow::Result<()> { let overall_expression = spdx::Expression::parse(&final_expression).map_err(|e| { anyhow::anyhow!( - "failed to parse '{}' as the total expression for all of the licenses: {}", - final_expression, - e, + "failed to parse '{final_expression}' as the total expression for all of the licenses: {e}", ) })?; diff --git a/src/cargo-about/generate.rs b/src/cargo-about/generate.rs index b244fc2..8c5daea 100644 --- a/src/cargo-about/generate.rs +++ b/src/cargo-about/generate.rs @@ -32,15 +32,15 @@ pub struct Args { /// Defaults to `/about.toml` if not specified #[clap(short, long)] config: Option, - /// The confidence threshold required for license files - /// to be positively identified: 0.0 - 1.0 + /// The confidence threshold required for license files to be positively identified: 0.0 - 1.0 #[clap(long, default_value = "0.8")] threshold: f32, - /// The name of the template to use when rendering. If only passing a - /// single template file to `templates` this is not used. + /// The name of the template to use when rendering. + /// + /// If only passing a single template file to `templates` this is not used. #[clap(short, long)] name: Option, - /// A file to write the generated output to. Typically an .html file. + /// A file to write the generated output to, typically an .html file. #[clap(short, long)] output_file: Option, /// Space-separated list of features to activate @@ -52,8 +52,27 @@ pub struct Args { /// Do not activate the `default` feature #[clap(long)] no_default_features: bool, - /// The path of the Cargo.toml for the root crate, defaults to the - /// current crate or workspace in the current working directory + /// Run without accessing the network. + /// + /// In addition to cargo not fetching crates, this will mean that only + /// local files will be crawled for license information. + /// 1. clearlydefined.io will not be used, so some more ambiguous/complicated + /// license files might be ignored + /// 2. Crates that are improperly packaged and don't contain their LICENSE + /// file(s) will fallback to the default license file, missing eg. + /// copyright information in the license that would be retrieved from + /// the original git repo for the crate in question + #[arg(long)] + pub(crate) offline: bool, + /// Assert that `Cargo.lock` will remain unchanged + #[arg(long)] + pub(crate) locked: bool, + /// Equivalent to specifying both `--locked` and `--offline` + #[arg(long)] + pub(crate) frozen: bool, + /// The path of the Cargo.toml for the root crate. + /// + /// Defaults to the current crate or workspace in the current working directory #[clap(short, long)] manifest_path: Option, /// Scan licenses for the entire workspace, not just the active package @@ -66,8 +85,11 @@ pub struct Args { /// The format of the output, defaults to `handlebars`. #[clap(long, default_value_t)] format: OutputFormat, - /// The template(s) or template directory to use. Must either be a `.hbs` - /// file, or have at least one `.hbs` file in it if it is a directory. + /// The template(s) or template directory to use. + /// + /// Must either be a `.hbs` file, or have at least one `.hbs` file in it if + /// it is a directory. + /// /// Required if `--format` is not `json` templates: Option, } @@ -157,6 +179,11 @@ pub fn cmd(args: Args, color: crate::Color) -> anyhow::Result<()> { args.all_features, args.features.clone(), args.workspace, + krates::LockOptions { + frozen: args.frozen, + locked: args.locked, + offline: args.offline, + }, &cfg, )); }); @@ -222,15 +249,16 @@ pub fn cmd(args: Args, color: crate::Color) -> anyhow::Result<()> { log::info!("gathered {} crates", krates.len()); - let client = reqwest::blocking::ClientBuilder::new() - .timeout(std::time::Duration::from_secs( - cfg.clearly_defined_timeout_secs.unwrap_or(30), - )) - .build()?; - let summary = licenses::Gatherer::with_store(std::sync::Arc::new(store), client.into()) + let client = if !args.offline && !args.frozen { + Some(reqwest::blocking::ClientBuilder::new().build()?) + } else { + None + }; + + let summary = licenses::Gatherer::with_store(std::sync::Arc::new(store)) .with_confidence_threshold(args.threshold) .with_max_depth(cfg.max_depth.map(|md| md as _)) - .gather(&krates, &cfg); + .gather(&krates, &cfg, client); let (files, resolved) = licenses::resolution::resolve(&summary, &cfg.accepted, &cfg.crates, args.fail); diff --git a/src/lib.rs b/src/lib.rs index 85d7fd2..4014d2c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -100,10 +100,12 @@ pub fn get_all_crates( all_features: bool, features: Vec, workspace: bool, + lock_opts: krates::LockOptions, cfg: &licenses::config::Config, ) -> anyhow::Result { let mut mdc = krates::Cmd::new(); mdc.manifest_path(cargo_toml); + mdc.lock_opts(lock_opts); // The metadata command builder is weird and only allows you to specify // one of these, but really you might need to do multiple of them @@ -170,8 +172,7 @@ pub fn to_hex(bytes: &[u8]) -> String { pub fn validate_sha256(buffer: &str, expected: &str) -> anyhow::Result<()> { anyhow::ensure!( expected.len() == 64, - "checksum '{}' length is {} instead of expected 64", - expected, + "checksum '{expected}' length is {} instead of expected 64", expected.len() ); @@ -193,7 +194,7 @@ pub fn validate_sha256(buffer: &str, expected: &str) -> anyhow::Result<()> { b'a'..=b'f' => exp[0] - b'a' + 10, b'0'..=b'9' => exp[0] - b'0', c => { - anyhow::bail!("invalid byte in checksum '{}' @ {}: {}", expected, ind, c); + anyhow::bail!("invalid byte in checksum '{expected}' @ {ind}: {c}"); } }; @@ -204,12 +205,12 @@ pub fn validate_sha256(buffer: &str, expected: &str) -> anyhow::Result<()> { b'a'..=b'f' => exp[1] - b'a' + 10, b'0'..=b'9' => exp[1] - b'0', c => { - anyhow::bail!("invalid byte in checksum '{}' @ {}: {}", expected, ind, c); + anyhow::bail!("invalid byte in checksum '{expected}' @ {ind}: {c}"); } }; if digest[ind] != cur { - anyhow::bail!("checksum mismatch, expected {}", expected); + anyhow::bail!("checksum mismatch, expected '{expected}'"); } } diff --git a/src/licenses.rs b/src/licenses.rs index 6b4893e..b8587e1 100644 --- a/src/licenses.rs +++ b/src/licenses.rs @@ -122,16 +122,14 @@ impl<'krate> Eq for KrateLicense<'krate> {} pub struct Gatherer { store: Arc, - cd_client: cd::client::Client, threshold: f32, max_depth: Option, } impl Gatherer { - pub fn with_store(store: Arc, client: cd::client::Client) -> Self { + pub fn with_store(store: Arc) -> Self { Self { store, - cd_client: client, threshold: 0.8, max_depth: None, } @@ -151,6 +149,7 @@ impl Gatherer { self, krates: &'krate Krates, cfg: &config::Config, + client: Option, ) -> Vec> { let mut licensed_krates = Vec::with_capacity(krates.len()); @@ -167,7 +166,8 @@ impl Gatherer { .optimize(false) .max_passes(1); - let git_cache = fetch::GitCache::default(); + let is_offline = client.is_none(); + let git_cache = fetch::GitCache::maybe_offline(client); // If we're ignoring crates that are private, just add them // to the list so all of the following gathers ignore them @@ -204,7 +204,27 @@ impl Gatherer { // can get previously gathered license information + any possible // curations so that we only need to fallback to scanning local crate // sources if it's not already in clearly-defined - self.gather_clearly_defined(krates, cfg, &strategy, &mut licensed_krates); + if !is_offline && !cfg.no_clearly_defined { + match reqwest::blocking::ClientBuilder::new() + .timeout(std::time::Duration::from_secs( + cfg.clearly_defined_timeout_secs.unwrap_or(30), + )) + .build() + { + Ok(client) => { + self.gather_clearly_defined( + krates, + cfg, + client.into(), + &strategy, + &mut licensed_krates, + ); + } + Err(err) => { + log::error!("failed to build clearlydefined.io HTTP client: {err:#}"); + } + } + } // Finally, crawl the crate sources on disk to try and determine licenses self.gather_file_system(krates, &strategy, &mut licensed_krates); @@ -244,7 +264,7 @@ impl Gatherer { ); } Err(e) => { - log::warn!("failed to validate all files specified in clarification for crate {krate}: {e}"); + log::warn!("failed to validate all files specified in clarification for crate {krate}: {e:#}"); } } } @@ -255,6 +275,7 @@ impl Gatherer { &self, krates: &'k Krates, cfg: &config::Config, + client: cd::client::Client, strategy: &askalono::ScanStrategy<'_>, licensed_krates: &mut Vec>, ) { @@ -292,7 +313,7 @@ impl Gatherer { ); let collected: Vec<_> = reqs.par_bridge().filter_map(|req| { - match self.cd_client.execute::(req) { + match client.execute::(req) { Ok(response) => { Some(response.definitions.into_iter().filter_map(|def| { if def.described.is_none() { @@ -345,7 +366,7 @@ impl Gatherer { Some(text) } Err(err) => { - log::warn!("failed to read license from '{}' for crate '{}': {}", path, krate, err); + log::warn!("failed to read license from '{path}' for crate '{krate}': {err}"); return None; } } diff --git a/src/licenses/fetch.rs b/src/licenses/fetch.rs index b9e4086..0cc3778 100644 --- a/src/licenses/fetch.rs +++ b/src/licenses/fetch.rs @@ -98,13 +98,27 @@ pub struct VcsInfo { /// but not in the actual published package is due to it being in the root but /// not copied into each sub-crate in the repository, we can just not re-retrieve /// the same file multiple times -#[derive(Clone, Default)] +#[derive(Clone)] pub struct GitCache { cache: Arc>>>, - http_client: Client, + http_client: Option, } impl GitCache { + pub fn maybe_offline(http_client: Option) -> Self { + Self { + http_client, + cache: Default::default(), + } + } + + pub fn online() -> Self { + Self { + http_client: Some(Client::new()), + cache: Default::default(), + } + } + #[allow(clippy::unused_self)] fn retrieve_local( &self, @@ -159,6 +173,11 @@ impl GitCache { let repo_url = url::Url::parse(repo) .with_context(|| format!("unable to parse repository url '{repo}'"))?; + let http_client = self + .http_client + .as_ref() + .context("unable to fetch remote repository data in offline mode")?; + // Unfortunately the HTTP retrieval methods for most of the popular // providers require an API token to use, so instead we just use a // third party CDN, `raw.githack.com` for now until I can find a better @@ -169,7 +188,7 @@ impl GitCache { let flavor = GitHostFlavor::from_repo(&repo_url)?; flavor - .fetch(&self.http_client, &repo_url, rev, path) + .fetch(http_client, &repo_url, rev, path) .with_context(|| format!("failed to fetch contents of '{path}' from repo '{repo}'")) } diff --git a/src/licenses/resolution.rs b/src/licenses/resolution.rs index 1642a17..c391344 100644 --- a/src/licenses/resolution.rs +++ b/src/licenses/resolution.rs @@ -139,7 +139,7 @@ pub fn resolve( if fail_on_missing { resolved.diagnostics.push(Diagnostic::new(Severity::Error).with_message(msg)); } else { - log::warn!("{}", msg); + log::warn!("{msg}"); } return Some(resolved);