Skip to content

Commit

Permalink
macOS 13 support (#40)
Browse files Browse the repository at this point in the history
* Initial macOS 13 support

* Handle watchos case

* Test on macos 13 and 14 runners

* Update test script for macos 13 simulators

* Use m1 macos 13 runners

* Use m1 macos 13 runners with appropriate matrix

* Use m1 macos 13 runners with appropriate matrix

* Reduce ios requirement to 16

* Only test watchos on macos 14

* Add ios to the avaudioapplication test

* Add note on version choices

* Remove missing os versions from Package.swift

* Add missing os checks

* Remove excess os checks causing warnings

* Test mel with cpuonly on simulator
  • Loading branch information
ZachNagengast authored Mar 7, 2024
1 parent e9a6c14 commit 880f95e
Show file tree
Hide file tree
Showing 16 changed files with 82 additions and 40 deletions.
16 changes: 13 additions & 3 deletions .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,15 @@ on:

jobs:
build-and-test:
runs-on: macos-14
strategy:
matrix:
os: [macos-13-xlarge, macos-14]
include:
- os: macos-13-xlarge
ios-version: "16.1" # oldest available version
- os: macos-14
ios-version: "17.2" # latest available version
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- uses: maxim-lobanov/setup-xcode@v1
Expand Down Expand Up @@ -40,14 +48,16 @@ jobs:
run: |
set -o pipefail
xcodebuild clean build-for-testing -scheme whisperkit-Package -destination generic/platform=iOS | xcpretty
xcodebuild test -only-testing WhisperKitTests/UnitTests -scheme whisperkit-Package -destination "platform=iOS Simulator,OS=17.2,name=iPhone 15" | xcpretty
xcodebuild test -only-testing WhisperKitTests/UnitTests -scheme whisperkit-Package -destination "platform=iOS Simulator,OS=${{ matrix.ios-version }},name=iPhone 15" | xcpretty
- name: Build and Test - watchOS
if: matrix.os == 'macos-14'
run: |
set -o pipefail
xcodebuild clean build-for-testing -scheme whisperkit-Package -destination generic/platform=watchOS | xcpretty
xcodebuild test -only-testing WhisperKitTests/UnitTests -scheme whisperkit-Package -destination "platform=watchOS Simulator,OS=10.2,name=Apple Watch Ultra 2 (49mm)" | xcpretty
- name: Build and Test - visionOS
if: matrix.os == 'macos-14'
run: |
set -o pipefail
xcodebuild clean build-for-testing -scheme whisperkit-Package -destination generic/platform=visionOS | xcpretty
xcodebuild test -only-testing WhisperKitTests/UnitTests -scheme whisperkit-Package -destination "platform=visionOS Simulator,name=Apple Vision Pro" | xcpretty
xcodebuild test -only-testing WhisperKitTests/UnitTests -scheme whisperkit-Package -destination "platform=visionOS Simulator,name=Apple Vision Pro" | xcpretty
15 changes: 7 additions & 8 deletions Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,8 @@ import PackageDescription
let package = Package(
name: "whisperkit",
platforms: [
.iOS(.v17),
.macOS(.v14),
.watchOS(.v10),
.visionOS(.v1)
.iOS(.v16),
.macOS(.v13),
],
products: [
.library(
Expand All @@ -18,7 +16,8 @@ let package = Package(
),
.executable(
name: "transcribe",
targets: ["WhisperKitCLI"])
targets: ["WhisperKitCLI"]
),
],
dependencies: [
.package(url: "https://github.com/huggingface/swift-transformers.git", exact: "0.1.2"),
Expand All @@ -35,7 +34,7 @@ let package = Package(
name: "WhisperKitCLI",
dependencies: [
"WhisperKit",
.product(name: "ArgumentParser", package: "swift-argument-parser")
.product(name: "ArgumentParser", package: "swift-argument-parser"),
]
),
.testTarget(
Expand All @@ -51,11 +50,11 @@ let package = Package(
"Makefile",
"README.md",
"LICENSE",
"CONTRIBUTING.md"
"CONTRIBUTING.md",
],
resources: [
.process("Tests/WhisperKitTests/Resources"),
.copy("Models/whisperkit-coreml")
.copy("Models/whisperkit-coreml"),
]
),
]
Expand Down
2 changes: 1 addition & 1 deletion Sources/WhisperKit/Core/AudioEncoder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ public protocol AudioEncoding {
func encodeFeatures(_ features: MLMultiArray) async throws -> MLMultiArray?
}

@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public class AudioEncoder: AudioEncoding, WhisperMLModel {
public var model: MLModel?

Expand Down
31 changes: 28 additions & 3 deletions Sources/WhisperKit/Core/AudioProcessor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ public extension AudioProcessing {
}
}

@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public class AudioProcessor: NSObject, AudioProcessing {
public var audioEngine: AVAudioEngine?
public var audioSamples: ContiguousArray<Float> = []
Expand Down Expand Up @@ -314,7 +314,32 @@ public class AudioProcessor: NSObject, AudioProcessing {
}

public static func requestRecordPermission() async -> Bool {
await AVAudioApplication.requestRecordPermission()
if #available(macOS 14, iOS 17, *) {
return await AVAudioApplication.requestRecordPermission()
} else {
#if os(watchOS)
// watchOS does not support AVCaptureDevice
return true
#else
let microphoneStatus = AVCaptureDevice.authorizationStatus(for: .audio)
switch microphoneStatus {
case .notDetermined:
return await withCheckedContinuation { continuation in
AVCaptureDevice.requestAccess(for: .audio) { granted in
continuation.resume(returning: granted)
}
}
case .restricted, .denied:
Logging.error("Microphone access denied")
return false
case .authorized:
return true
@unknown default:
Logging.error("Unknown authorization status")
return false
}
#endif
}
}

#if os(macOS)
Expand Down Expand Up @@ -412,7 +437,7 @@ public class AudioProcessor: NSObject, AudioProcessing {

// MARK: - Streaming

@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public extension AudioProcessor {
/// We have a new buffer, process and store it.
/// NOTE: Assumes audio is 16khz mono
Expand Down
3 changes: 3 additions & 0 deletions Sources/WhisperKit/Core/AudioStreamTranscriber.swift
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import Foundation

@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public extension AudioStreamTranscriber {
struct State {
public var isRecording: Bool = false
Expand All @@ -17,9 +18,11 @@ public extension AudioStreamTranscriber {
}
}

@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public typealias AudioStreamTranscriberCallback = (AudioStreamTranscriber.State, AudioStreamTranscriber.State) -> Void

/// Responsible for streaming audio from the microphone, processing it, and transcribing it in real-time.
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public actor AudioStreamTranscriber {
private var state: AudioStreamTranscriber.State = .init() {
didSet {
Expand Down
2 changes: 1 addition & 1 deletion Sources/WhisperKit/Core/FeatureExtractor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ public protocol FeatureExtracting {
func logMelSpectrogram(fromAudio inputAudio: MLMultiArray) async throws -> MLMultiArray?
}

@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public class FeatureExtractor: FeatureExtracting, WhisperMLModel {
public var model: MLModel?

Expand Down
6 changes: 3 additions & 3 deletions Sources/WhisperKit/Core/LogitsFilter.swift
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ public protocol LogitsFiltering {
func filterLogits(_ logits: MLMultiArray, withTokens tokens: [Int]) -> MLMultiArray
}

@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public class SuppressTokensFilter: LogitsFiltering {
let suppressTokens: [Int]
private let suppressTokenIndexes: [[NSNumber]]
Expand All @@ -25,7 +25,7 @@ public class SuppressTokensFilter: LogitsFiltering {
}
}

@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public class SuppressBlankFilter: LogitsFiltering {
let suppressBlankTokens: [Int]
let sampleBegin: Int
Expand All @@ -46,7 +46,7 @@ public class SuppressBlankFilter: LogitsFiltering {
}
}

@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public class TimestampRulesFilter: LogitsFiltering {
let tokenizer: Tokenizer
let sampleBegin: Int
Expand Down
16 changes: 8 additions & 8 deletions Sources/WhisperKit/Core/Models.swift
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import NaturalLanguage
import Tokenizers

#if os(watchOS) || arch(arm64)
@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public typealias FloatType = Float16
#else
public typealias FloatType = Float
Expand Down Expand Up @@ -200,7 +200,7 @@ public struct DecodingCache {
/// - logProbThreshold: If the average log probability over sampled tokens is below this value, treat as failed.
/// - noSpeechThreshold: If the no speech probability is higher than this value AND the average log
/// probability over sampled tokens is below `logProbThreshold`, consider the segment as silent.
@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public struct DecodingOptions {
public var verbose: Bool
public var task: DecodingTask
Expand Down Expand Up @@ -489,7 +489,7 @@ public class MelSpectrogramInput: MLFeatureProvider {
}

/// Model Prediction Output Type
@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public class MelSpectrogramOutput: MLFeatureProvider {
/// Source provided by CoreML
private let provider: MLFeatureProvider
Expand Down Expand Up @@ -526,7 +526,7 @@ public class MelSpectrogramOutput: MLFeatureProvider {
// MARK: AudioEncoder

/// Model Prediction Input Type
@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public class AudioEncoderInput: MLFeatureProvider {
/// melspectrogram_features as 1 × {80,128} × 1 × 3000 4-dimensional array of floats
public var melspectrogram_features: MLMultiArray
Expand All @@ -552,7 +552,7 @@ public class AudioEncoderInput: MLFeatureProvider {
}

/// Model Prediction Output Type
@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public class AudioEncoderOutput: MLFeatureProvider {
/// Source provided by CoreML
private let provider: MLFeatureProvider
Expand Down Expand Up @@ -589,7 +589,7 @@ public class AudioEncoderOutput: MLFeatureProvider {
// MARK: TextDecoder

/// Model Prediction Input Type
@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public class TextDecoderInput: MLFeatureProvider {
/// input_ids as 1 element vector of 32-bit integers
public var input_ids: MLMultiArray
Expand Down Expand Up @@ -657,7 +657,7 @@ public class TextDecoderInput: MLFeatureProvider {
}

/// Model Prediction Output Type
@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public class TextDecoderOutput: MLFeatureProvider {
/// Source provided by CoreML
private let provider: MLFeatureProvider
Expand Down Expand Up @@ -764,7 +764,7 @@ public class TextDecoderCachePrefillInput: MLFeatureProvider {
}

/// Model Prediction Output Type
@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public class TextDecoderCachePrefillOutput: MLFeatureProvider {
/// Source provided by CoreML
private let provider: MLFeatureProvider
Expand Down
4 changes: 2 additions & 2 deletions Sources/WhisperKit/Core/SegmentSeeker.swift
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import CoreML
import Foundation
import Tokenizers

@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public protocol SegmentSeeking {
func findSeekPointAndSegments(
decodingResult: DecodingResult,
Expand Down Expand Up @@ -34,7 +34,7 @@ public protocol SegmentSeeking {
) throws -> [TranscriptionSegment]?
}

@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public class SegmentSeeker: SegmentSeeking {
public init() {}

Expand Down
6 changes: 3 additions & 3 deletions Sources/WhisperKit/Core/TextDecoder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import Accelerate
import CoreML
import Tokenizers

@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public protocol TextDecoding {
var tokenizer: Tokenizer? { get set }
var prefillData: WhisperMLModel? { get set }
Expand Down Expand Up @@ -43,7 +43,7 @@ public protocol TextDecoding {
)
}

@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public extension TextDecoding {
func prepareDecoderInputs(withPrompt initialPrompt: [Int]) -> DecodingInputs? {
let tokenShape = [NSNumber(value: 1), NSNumber(value: initialPrompt.count)]
Expand Down Expand Up @@ -234,7 +234,7 @@ public class TextDecoderContextPrefill: WhisperMLModel {
public var model: MLModel?
}

@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public class TextDecoder: TextDecoding, WhisperMLModel {
public var model: MLModel?
public var tokenizer: Tokenizer?
Expand Down
2 changes: 1 addition & 1 deletion Sources/WhisperKit/Core/TokenSampler.swift
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ public struct SamplingResult {
public var completed: Bool
}

@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public class GreedyTokenSampler: TokenSampling {
public var temperature: FloatType
public var eotToken: Int
Expand Down
1 change: 1 addition & 0 deletions Sources/WhisperKit/Core/Utils.swift
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ extension MLMultiArray {
}
}

@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
func initMLMultiArray(shape: [NSNumber], dataType: MLMultiArrayDataType, initialValue: Any) -> MLMultiArray {
let multiArray = try! MLMultiArray(shape: shape, dataType: dataType)

Expand Down
3 changes: 2 additions & 1 deletion Sources/WhisperKit/Core/WhisperKit.swift
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,13 @@ import Hub
import TensorUtils
import Tokenizers

@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public protocol Transcriber {
func transcribe(audioPath: String, decodeOptions: DecodingOptions?, callback: TranscriptionCallback) async throws -> TranscriptionResult?
func transcribe(audioArray: [Float], decodeOptions: DecodingOptions?, callback: TranscriptionCallback) async throws -> TranscriptionResult?
}

@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
public class WhisperKit: Transcriber {
/// Models
public var modelVariant: ModelVariant = .tiny
Expand Down
2 changes: 1 addition & 1 deletion Sources/WhisperKitCLI/transcribe.swift
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import Foundation

import WhisperKit

@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
@main
struct WhisperKitCLI: AsyncParsableCommand {
@Option(help: "Path to audio file")
Expand Down
2 changes: 1 addition & 1 deletion Tests/WhisperKitTests/FunctionalTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import CoreML
@testable import WhisperKit
import XCTest

@available(macOS 14, iOS 17, watchOS 10, visionOS 1, *)
@available(macOS 13, iOS 16, watchOS 10, visionOS 1, *)
final class FunctionalTests: XCTestCase {
func testInitLarge() async {
let modelPath = largev3ModelPath()
Expand Down
Loading

0 comments on commit 880f95e

Please sign in to comment.