Skip to content

Commit

Permalink
fix: Remove any YAML front matter from ScanCode license files
Browse files Browse the repository at this point in the history
ScanCode 32.0.0 started to prepend its `*.LICENSE` files with
YAML-encoded metadata, see [1]. This is a hot fix to remove this header,
if present, from the license files. A better solution will be
implemented later as part of a larger refactoring of license providers.

Different ScanCode versions also differ in whether license files come
with a final newline or not. Align on not having a final newline to make
tests pass either way.

[1]: aboutcode-org/scancode-toolkit#3100

Signed-off-by: Sebastian Schuberth <sebastian@doubleopen.org>
  • Loading branch information
sschuberth committed Nov 17, 2023
1 parent 1098569 commit b9c038e
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
"dataLicense" : "CC0-1.0",
"comment" : "some document comment",
"hasExtractedLicensingInfos" : [ {
"extractedText" : "ASMUS License\n\nDisclaimer and legal rights\n---------------------------\n\nThis file contains bugs. All representations to the contrary are void.\n\nSource code in this file and the accompanying headers and included \nfiles may be distributed free of charge by anyone, as long as full \ncredit is given and any and all liabilities are assumed by the \nrecipient.\n",
"extractedText" : "ASMUS License\n\nDisclaimer and legal rights\n---------------------------\n\nThis file contains bugs. All representations to the contrary are void.\n\nSource code in this file and the accompanying headers and included \nfiles may be distributed free of charge by anyone, as long as full \ncredit is given and any and all liabilities are assumed by the \nrecipient.",
"licenseId" : "LicenseRef-scancode-asmus"
}, {
"extractedText" : "To anyone who acknowledges that the file \"sRGB Color Space Profile.icm\" \nis provided \"AS IS\" WITH NO EXPRESS OR IMPLIED WARRANTY:\npermission to use, copy and distribute this file for any purpose is hereby \ngranted without fee, provided that the file is not changed including the HP \ncopyright notice tag, and that the name of Hewlett-Packard Company not be \nused in advertising or publicity pertaining to distribution of the software \nwithout specific, written prior permission. Hewlett-Packard Company makes \nno representations about the suitability of this software for any purpose.",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ hasExtractedLicensingInfos:
\nThis file contains bugs. All representations to the contrary are void.\n\nSource\
\ code in this file and the accompanying headers and included \nfiles may be distributed\
\ free of charge by anyone, as long as full \ncredit is given and any and all\
\ liabilities are assumed by the \nrecipient.\n"
\ liabilities are assumed by the \nrecipient."
licenseId: "LicenseRef-scancode-asmus"
- extractedText: "To anyone who acknowledges that the file \"sRGB Color Space Profile.icm\"\
\ \nis provided \"AS IS\" WITH NO EXPRESS OR IMPLIED WARRANTY:\npermission to\
Expand Down
18 changes: 16 additions & 2 deletions utils/spdx/src/main/kotlin/Utils.kt
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,12 @@ fun getLicenseTextReader(
): (() -> String)? {
return if (id.startsWith(LICENSE_REF_PREFIX)) {
getLicenseTextResource(id)?.let { { it.readText() } }
?: addScanCodeLicenseTextsDir(licenseTextDirectories).firstNotNullOfOrNull {
getLicenseTextFile(id, it)?.let { file -> { file.readText() } }
?: addScanCodeLicenseTextsDir(licenseTextDirectories).firstNotNullOfOrNull { dir ->
getLicenseTextFile(id, dir)?.let { file ->
{
file.readText().removeYamlFrontMatter()
}
}
}
} else {
SpdxLicense.forId(id.removeSuffix("+"))?.let { { it.text } }
Expand Down Expand Up @@ -159,5 +163,15 @@ private fun getLicenseTextFile(id: String, dir: File): File? =
}
}

internal fun String.removeYamlFrontMatter(): String {
val lines = lines()

// Remove any YAML front matter enclosed by "---" from ScanCode license files.
val licenseLines = lines.takeUnless { it.first() == "---" }
?: lines.drop(1).dropWhile { it != "---" }.drop(1)

return licenseLines.dropWhile { it.isEmpty() }.joinToString("\n").trimEnd()
}

private fun addScanCodeLicenseTextsDir(licenseTextDirectories: List<File>): List<File> =
(listOfNotNull(scanCodeLicenseTextDir) + licenseTextDirectories).distinct()
54 changes: 53 additions & 1 deletion utils/spdx/src/test/kotlin/UtilsTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ class UtilsTest : WordSpec() {
"getLicenseText provided a custom dir" should {
"return the custom license text for a license ID not known by ort but in custom dir" {
val id = "LicenseRef-ort-abc"
val text = "a\nb\nc\n"
val text = "a\nb\nc"

setupTempFile(id, text)

Expand All @@ -197,5 +197,57 @@ class UtilsTest : WordSpec() {
getLicenseText("LicenseRef-not-present", handleExceptions = true, listOf(tempDir)) should beNull()
}
}

"removeYamlFrontMatter" should {
"remove a YAML front matter" {
val text = """
---
key: alasir
short_name: Alasir Licence
name: The Alasir Licence
category: Proprietary Free
owner: Alasir
homepage_url: http://alasir.com/licence/TAL.txt
spdx_license_key: LicenseRef-scancode-alasir
---
The Alasir Licence
This is a free software. It's provided as-is and carries absolutely no
warranty or responsibility by the author and the contributors, neither in
general nor in particular. No matter if this software is able or unable to
cause any damage to your or third party's computer hardware, software, or any
other asset available, neither the author nor a separate contributor may be
found liable for any harm or its consequences resulting from either proper or
improper use of the software, even if advised of the possibility of certain
injury as such and so forth.
""".trimIndent()

text.removeYamlFrontMatter() shouldBe """
The Alasir Licence
This is a free software. It's provided as-is and carries absolutely no
warranty or responsibility by the author and the contributors, neither in
general nor in particular. No matter if this software is able or unable to
cause any damage to your or third party's computer hardware, software, or any
other asset available, neither the author nor a separate contributor may be
found liable for any harm or its consequences resulting from either proper or
improper use of the software, even if advised of the possibility of certain
injury as such and so forth.
""".trimIndent()
}

"remove trailing whitespace" {
"last sentence\n".removeYamlFrontMatter() shouldBe "last sentence"
}

"remove leading empty lines" {
"\nfirst sentence".removeYamlFrontMatter() shouldBe "first sentence"
}

"keep leading whitespace" {
" indented title".removeYamlFrontMatter() shouldBe " indented title"
}
}
}
}

0 comments on commit b9c038e

Please sign in to comment.