Skip to content

Commit

Permalink
Move remaining two top level functions into Ingestion namespace
Browse files Browse the repository at this point in the history
  • Loading branch information
finestructure committed Dec 18, 2024
1 parent 365c0af commit 8630be3
Show file tree
Hide file tree
Showing 2 changed files with 118 additions and 114 deletions.
174 changes: 89 additions & 85 deletions Sources/App/Commands/Ingestion.swift
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,8 @@ enum Ingestion {
/// - mode: process a single `Package.Id` or a `limit` number of packages
/// - Returns: future
static func ingest(client: Client,
database: Database,
mode: SPICommand.Mode) async throws {
database: Database,
mode: SPICommand.Mode) async throws {
let start = DispatchTime.now().uptimeNanoseconds
defer { AppMetrics.ingestDurationSeconds?.time(since: start) }

Expand Down Expand Up @@ -197,10 +197,10 @@ enum Ingestion {
s3Readme = .error("\(error)")
}

let fork = await getFork(on: database, parent: metadata.repository?.parent)
let fork = await Ingestion.getFork(on: database, parent: metadata.repository?.parent)

try await run { () async throws(Ingestion.Error.UnderlyingError) in
try await updateRepository(on: database, for: repo, metadata: metadata, licenseInfo: license, readmeInfo: readme, s3Readme: s3Readme, fork: fork)
try await Ingestion.updateRepository(on: database, for: repo, metadata: metadata, licenseInfo: license, readmeInfo: readme, s3Readme: s3Readme, fork: fork)
} rethrowing: {
Ingestion.Error(packageId: package.model.id!, underlyingError: $0)
}
Expand Down Expand Up @@ -281,108 +281,111 @@ enum Ingestion {
// but let's play it safe and not risk a server crash, unlikely as it may be.
}
}
}


/// Insert or update `Repository` of given `Package` with given `Github.Metadata`.
/// - Parameters:
/// - database: `Database` object
/// - package: package to update
/// - metadata: `Github.Metadata` with data for update
/// - Returns: future
func updateRepository(on database: Database,
for repository: Repository,
metadata: Github.Metadata,
licenseInfo: Github.License?,
readmeInfo: Github.Readme?,
s3Readme: S3Readme?,
fork: Fork? = nil) async throws(Ingestion.Error.UnderlyingError) {
@Dependency(\.environment) var environment
if environment.shouldFail(failureMode: .noRepositoryMetadata) {
throw .noRepositoryMetadata(owner: repository.owner, name: repository.name)
}
if environment.shouldFail(failureMode: .repositorySaveFailed) {
throw .repositorySaveFailed(owner: repository.owner,
name: repository.name,
details: "TestError")
}
if environment.shouldFail(failureMode: .repositorySaveUniqueViolation) {
throw .repositorySaveUniqueViolation(owner: repository.owner,
name: repository.name,
details: "TestError")
}
guard let repoMetadata = metadata.repository else {
throw .noRepositoryMetadata(owner: repository.owner, name: repository.name)
}
/// Insert or update `Repository` of given `Package` with given `Github.Metadata`.
/// - Parameters:
/// - database: `Database` object
/// - package: package to update
/// - metadata: `Github.Metadata` with data for update
/// - Returns: future
static func updateRepository(on database: Database,
for repository: Repository,
metadata: Github.Metadata,
licenseInfo: Github.License?,
readmeInfo: Github.Readme?,
s3Readme: S3Readme?,
fork: Fork? = nil) async throws(Ingestion.Error.UnderlyingError) {
@Dependency(\.environment) var environment
if environment.shouldFail(failureMode: .noRepositoryMetadata) {
throw .noRepositoryMetadata(owner: repository.owner, name: repository.name)
}
if environment.shouldFail(failureMode: .repositorySaveFailed) {
throw .repositorySaveFailed(owner: repository.owner,
name: repository.name,
details: "TestError")
}
if environment.shouldFail(failureMode: .repositorySaveUniqueViolation) {
throw .repositorySaveUniqueViolation(owner: repository.owner,
name: repository.name,
details: "TestError")
}
guard let repoMetadata = metadata.repository else {
throw .noRepositoryMetadata(owner: repository.owner, name: repository.name)
}

repository.defaultBranch = repoMetadata.defaultBranch
repository.forks = repoMetadata.forkCount
repository.fundingLinks = repoMetadata.fundingLinks?.compactMap(FundingLink.init(from:)) ?? []
repository.hasSPIBadge = readmeInfo?.containsSPIBadge()
repository.homepageUrl = repoMetadata.homepageUrl?.trimmed
repository.isArchived = repoMetadata.isArchived
repository.isInOrganization = repoMetadata.isInOrganization
repository.keywords = Set(repoMetadata.topics.map { $0.lowercased() }).sorted()
repository.lastIssueClosedAt = repoMetadata.lastIssueClosedAt
repository.lastPullRequestClosedAt = repoMetadata.lastPullRequestClosedAt
repository.license = .init(from: repoMetadata.licenseInfo)
repository.licenseUrl = licenseInfo?.htmlUrl
repository.name = repoMetadata.repositoryName
repository.openIssues = repoMetadata.openIssues.totalCount
repository.openPullRequests = repoMetadata.openPullRequests.totalCount
repository.owner = repoMetadata.repositoryOwner
repository.ownerName = repoMetadata.owner.name
repository.ownerAvatarUrl = repoMetadata.owner.avatarUrl
repository.s3Readme = s3Readme
repository.readmeHtmlUrl = readmeInfo?.htmlUrl
repository.releases = repoMetadata.releases.nodes.map(Release.init(from:))
repository.stars = repoMetadata.stargazerCount
repository.summary = repoMetadata.description
repository.forkedFrom = fork

repository.defaultBranch = repoMetadata.defaultBranch
repository.forks = repoMetadata.forkCount
repository.fundingLinks = repoMetadata.fundingLinks?.compactMap(FundingLink.init(from:)) ?? []
repository.hasSPIBadge = readmeInfo?.containsSPIBadge()
repository.homepageUrl = repoMetadata.homepageUrl?.trimmed
repository.isArchived = repoMetadata.isArchived
repository.isInOrganization = repoMetadata.isInOrganization
repository.keywords = Set(repoMetadata.topics.map { $0.lowercased() }).sorted()
repository.lastIssueClosedAt = repoMetadata.lastIssueClosedAt
repository.lastPullRequestClosedAt = repoMetadata.lastPullRequestClosedAt
repository.license = .init(from: repoMetadata.licenseInfo)
repository.licenseUrl = licenseInfo?.htmlUrl
repository.name = repoMetadata.repositoryName
repository.openIssues = repoMetadata.openIssues.totalCount
repository.openPullRequests = repoMetadata.openPullRequests.totalCount
repository.owner = repoMetadata.repositoryOwner
repository.ownerName = repoMetadata.owner.name
repository.ownerAvatarUrl = repoMetadata.owner.avatarUrl
repository.s3Readme = s3Readme
repository.readmeHtmlUrl = readmeInfo?.htmlUrl
repository.releases = repoMetadata.releases.nodes.map(Release.init(from:))
repository.stars = repoMetadata.stargazerCount
repository.summary = repoMetadata.description
repository.forkedFrom = fork

do {
try await repository.save(on: database)
} catch let error as PSQLError where error.isUniqueViolation {
let details = error.serverInfo?[.message] ?? ""
throw Ingestion.Error.UnderlyingError.repositorySaveUniqueViolation(owner: repository.owner,
name: repository.name,
details: details)
} catch let error as PSQLError {
let details = error.serverInfo?[.message] ?? ""
throw Ingestion.Error.UnderlyingError.repositorySaveFailed(owner: repository.owner,
name: repository.name,
details: details)
} catch {
throw Ingestion.Error.UnderlyingError.repositorySaveFailed(owner: repository.owner,
name: repository.name,
details: "\(error)")
do {
try await repository.save(on: database)
} catch let error as PSQLError where error.isUniqueViolation {
let details = error.serverInfo?[.message] ?? ""
throw Ingestion.Error.UnderlyingError.repositorySaveUniqueViolation(owner: repository.owner,
name: repository.name,
details: details)
} catch let error as PSQLError {
let details = error.serverInfo?[.message] ?? ""
throw Ingestion.Error.UnderlyingError.repositorySaveFailed(owner: repository.owner,
name: repository.name,
details: details)
} catch {
throw Ingestion.Error.UnderlyingError.repositorySaveFailed(owner: repository.owner,
name: repository.name,
details: "\(error)")
}
}
}

func getFork(on database: Database, parent: Github.Metadata.Parent?) async -> Fork? {
guard let parentUrl = parent?.normalizedURL else { return nil }
static func getFork(on database: Database, parent: Github.Metadata.Parent?) async -> Fork? {
guard let parentUrl = parent?.normalizedURL else { return nil }

if let packageId = try? await Package.query(on: database)
.filter(\.$url, .custom("ilike"), parentUrl)
.first()?.id {
return .parentId(id: packageId, fallbackURL: parentUrl)
} else {
return .parentURL(parentUrl)
if let packageId = try? await Package.query(on: database)
.filter(\.$url, .custom("ilike"), parentUrl)
.first()?.id {
return .parentId(id: packageId, fallbackURL: parentUrl)
} else {
return .parentURL(parentUrl)
}
}
}


// Helper to ensure the canonical source for these critical fields is the same in all the places where we need them
private extension Github.Metadata {
var repositoryOwner: String? { repository?.repositoryOwner }
var repositoryName: String? { repository?.repositoryName }
}


// Helper to ensure the canonical source for these critical fields is the same in all the places where we need them
private extension Github.Metadata.Repository {
var repositoryOwner: String? { owner.login }
var repositoryName: String? { name }
}


private extension Github.Metadata.Parent {
// Returns a normalized version of the URL. Adding a `.git` if not present.
var normalizedURL: String? {
Expand All @@ -394,6 +397,7 @@ private extension Github.Metadata.Parent {
}
}


private extension Ingestion.Error {
static func invalidURL(packageId: Package.Id, url: String) -> Self {
Ingestion.Error(packageId: packageId, underlyingError: .invalidURL(url))
Expand Down
58 changes: 29 additions & 29 deletions Tests/AppTests/IngestionTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -97,12 +97,12 @@ class IngestionTests: AppTestCase {
let repo = Repository(packageId: try pkg.requireID())

// MUT
try await updateRepository(on: app.db,
for: repo,
metadata: .mock(owner: "foo", repository: "bar"),
licenseInfo: .init(htmlUrl: ""),
readmeInfo: .init(html: "", htmlUrl: "", imagesToCache: []),
s3Readme: nil)
try await Ingestion.updateRepository(on: app.db,
for: repo,
metadata: .mock(owner: "foo", repository: "bar"),
licenseInfo: .init(htmlUrl: ""),
readmeInfo: .init(html: "", htmlUrl: "", imagesToCache: []),
s3Readme: nil)

// validate
do {
Expand Down Expand Up @@ -154,16 +154,16 @@ class IngestionTests: AppTestCase {
summary: "package desc")

// MUT
try await updateRepository(on: app.db,
for: repo,
metadata: md,
licenseInfo: .init(htmlUrl: "license url"),
readmeInfo: .init(etag: "etag",
html: "readme html https://img.shields.io/endpoint?url=https%3A%2F%2Fswiftpackageindex.com",
htmlUrl: "readme html url",
imagesToCache: []),
s3Readme: .cached(s3ObjectUrl: "url", githubEtag: "etag"),
fork: .parentURL("https://github.com/foo/bar.git"))
try await Ingestion.updateRepository(on: app.db,
for: repo,
metadata: md,
licenseInfo: .init(htmlUrl: "license url"),
readmeInfo: .init(etag: "etag",
html: "readme html https://img.shields.io/endpoint?url=https%3A%2F%2Fswiftpackageindex.com",
htmlUrl: "readme html url",
imagesToCache: []),
s3Readme: .cached(s3ObjectUrl: "url", githubEtag: "etag"),
fork: .parentURL("https://github.com/foo/bar.git"))

// validate
do {
Expand Down Expand Up @@ -229,14 +229,14 @@ class IngestionTests: AppTestCase {
summary: "package desc")

// MUT
try await updateRepository(on: app.db,
for: repo,
metadata: md,
licenseInfo: .init(htmlUrl: "license url"),
readmeInfo: .init(html: "readme html",
htmlUrl: "readme html url",
imagesToCache: []),
s3Readme: nil)
try await Ingestion.updateRepository(on: app.db,
for: repo,
metadata: md,
licenseInfo: .init(htmlUrl: "license url"),
readmeInfo: .init(html: "readme html",
htmlUrl: "readme html url",
imagesToCache: []),
s3Readme: nil)

// validate
do {
Expand Down Expand Up @@ -660,23 +660,23 @@ class IngestionTests: AppTestCase {
try await Package(url: "https://github.com/bar/forked.git", processingStage: .analysis).save(on: app.db)

// test lookup when package is in the index
let fork = await getFork(on: app.db, parent: .init(url: "https://github.com/foo/parent.git"))
let fork = await Ingestion.getFork(on: app.db, parent: .init(url: "https://github.com/foo/parent.git"))
XCTAssertEqual(fork, .parentId(id: .id0, fallbackURL: "https://github.com/foo/parent.git"))

// test lookup when package is in the index but with different case in URL
let fork2 = await getFork(on: app.db, parent: .init(url: "https://github.com/Foo/Parent.git"))
let fork2 = await Ingestion.getFork(on: app.db, parent: .init(url: "https://github.com/Foo/Parent.git"))
XCTAssertEqual(fork2, .parentId(id: .id0, fallbackURL: "https://github.com/Foo/Parent.git"))

// test whem metadata repo url doesn't have `.git` at end
let fork3 = await getFork(on: app.db, parent: .init(url: "https://github.com/Foo/Parent"))
let fork3 = await Ingestion.getFork(on: app.db, parent: .init(url: "https://github.com/Foo/Parent"))
XCTAssertEqual(fork3, .parentId(id: .id0, fallbackURL: "https://github.com/Foo/Parent.git"))

// test lookup when package is not in the index
let fork4 = await getFork(on: app.db, parent: .init(url: "https://github.com/some/other.git"))
let fork4 = await Ingestion.getFork(on: app.db, parent: .init(url: "https://github.com/some/other.git"))
XCTAssertEqual(fork4, .parentURL("https://github.com/some/other.git"))

// test lookup when parent url is nil
let fork5 = await getFork(on: app.db, parent: nil)
let fork5 = await Ingestion.getFork(on: app.db, parent: nil)
XCTAssertEqual(fork5, nil)
}
}
Expand Down

0 comments on commit 8630be3

Please sign in to comment.