Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Save, Read and Delete Audio files,and add Audio Visualizer for display #38

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions Example/OSSSpeechKit.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -520,7 +520,7 @@
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
CODE_SIGN_IDENTITY = "iPhone Developer";
CODE_SIGN_STYLE = Automatic;
DEVELOPMENT_TEAM = "";
DEVELOPMENT_TEAM = JL4FNX8668;
GCC_OPTIMIZATION_LEVEL = 0;
INFOPLIST_FILE = OSSSpeechKit/Info.plist;
IPHONEOS_DEPLOYMENT_TARGET = 13.0;
Expand All @@ -544,7 +544,7 @@
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
CODE_SIGN_IDENTITY = "iPhone Developer";
CODE_SIGN_STYLE = Automatic;
DEVELOPMENT_TEAM = "";
DEVELOPMENT_TEAM = JL4FNX8668;
GCC_OPTIMIZATION_LEVEL = 0;
INFOPLIST_FILE = OSSSpeechKit/Info.plist;
IPHONEOS_DEPLOYMENT_TARGET = 13.0;
Expand All @@ -567,7 +567,7 @@
buildSettings = {
CODE_SIGN_IDENTITY = "iPhone Developer";
CODE_SIGN_STYLE = Automatic;
DEVELOPMENT_TEAM = "";
DEVELOPMENT_TEAM = JL4FNX8668;
FRAMEWORK_SEARCH_PATHS = (
"$(SDKROOT)/Developer/Library/Frameworks",
"$(inherited)",
Expand Down Expand Up @@ -599,7 +599,7 @@
buildSettings = {
CODE_SIGN_IDENTITY = "iPhone Developer";
CODE_SIGN_STYLE = Automatic;
DEVELOPMENT_TEAM = "";
DEVELOPMENT_TEAM = JL4FNX8668;
FRAMEWORK_SEARCH_PATHS = (
"$(SDKROOT)/Developer/Library/Frameworks",
"$(inherited)",
Expand Down
11 changes: 11 additions & 0 deletions Example/OSSSpeechKit/CountryLanguageListTableViewController.swift
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,17 @@ extension CountryLanguageListTableViewController {
}

extension CountryLanguageListTableViewController: OSSSpeechDelegate {
func deleteVoiceFile(withFinish finish: Bool, withError error: Error?) {

}

func voiceFilePathTranscription(withText text: String) {

}

func didFinishListening(withAudioFileURL url: URL, withText text: String) {
print("Translation completed: \(text). And user voice file path: \(url.absoluteString)")
}

func didCompleteTranslation(withText text: String) {
print("Translation completed: \(text)")
Expand Down
4 changes: 2 additions & 2 deletions Example/Podfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@ EXTERNAL SOURCES:
SPEC CHECKSUMS:
OSSSpeechKit: ea0fd8151e7e338bc6ddc6bb749455fc3b33cfde

PODFILE CHECKSUM: 619c7767d93bbf8bc7a5c2d0a1d118e435561c49
PODFILE CHECKSUM: 74abb7e61e1f9880a3040420923d3dad8dfbc311

COCOAPODS: 1.11.3
COCOAPODS: 1.12.0
12 changes: 12 additions & 0 deletions Example/Tests/OSSSpeechTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,18 @@ class OSSSpeechTests: XCTestCase {
}

extension OSSSpeechTests: OSSSpeechDelegate {
func deleteVoiceFile(withFinish finish: Bool, withError error: Error?) {

}

func voiceFilePathTranscription(withText text: String) {

}

func didFinishListening(withAudioFileURL url: URL, withText text: String) {
print("Translation completed with text: \(text). And user voice file path: \(url.absoluteString)")
}

func didCompleteTranslation(withText text: String) {
print("Translation completed with text: \(text)")
}
Expand Down
196 changes: 186 additions & 10 deletions OSSSpeechKit/Classes/OSSSpeech.swift
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,14 @@ public enum OSSSpeechKitErrorType: Int {
case invalidAudioEngine = -6
/// Voice recognition is unavailable.
case recogniserUnavailble = -7
/// Voice record is invalid
case invalidRecordVoice = -8
/// Voice record file path is Invalid
case invalidVoiceFilePath = -9
/// Voice record file path can not delete
case invalidDeleteVoiceFilePath = -10
/// Voice record file path can not transcription
case invalidTranscriptionFilePath = -11

/// The OSSSpeechKit error message string.
///
Expand All @@ -88,6 +96,14 @@ public enum OSSSpeechKitErrorType: Int {
return OSSSpeechUtility().getString(forLocalizedName: "OSSSpeechKitErrorType_messageInvalidAudioEngine", defaultValue: "The audio engine is unavailable. Please try again soon.")
case .recogniserUnavailble:
return OSSSpeechUtility().getString(forLocalizedName: "OSSSpeechKitErrorType_messageRecogniserUnavailable", defaultValue: "The Speech Recognition service is currently unavailable.")
case .invalidRecordVoice:
return OSSSpeechUtility().getString(forLocalizedName: "OSSSpeechKitErrorType_messageInvalidRecordVoice", defaultValue: "The user voice recoeder service is not working.")
case .invalidVoiceFilePath:
return OSSSpeechUtility().getString(forLocalizedName: "OSSSpeechKitErrorType_messageInvalidVoiceFolePath", defaultValue: "The user voice file path can not create.")
case .invalidDeleteVoiceFilePath:
return OSSSpeechUtility().getString(forLocalizedName: "OSSSpeechKitErrorType_messageInvalidDeleteVoiceFilePath", defaultValue: "The user voice file path can not delete.")
case .invalidTranscriptionFilePath:
return OSSSpeechUtility().getString(forLocalizedName: "OSSSpeechKitErrorType_messageInvalidTranscriptionFilePath", defaultValue: "Voice record file path can not transcription.")
}
}

Expand All @@ -97,7 +113,8 @@ public enum OSSSpeechKitErrorType: Int {
public var errorRequestType: String {
switch self {
case .noMicrophoneAccess,
.invalidAudioEngine:
.invalidAudioEngine,
.invalidRecordVoice:
return OSSSpeechUtility().getString(forLocalizedName: "OSSSpeechKitErrorType_requestTypeNoMicAccess", defaultValue: "Recording")
case .invalidUtterance:
return OSSSpeechUtility().getString(forLocalizedName: "OSSSpeechKitErrorType_requestTypeInvalidUtterance", defaultValue: "Speech or Recording")
Expand All @@ -106,6 +123,10 @@ public enum OSSSpeechKitErrorType: Int {
.invalidSpeechRequest,
.recogniserUnavailble:
return OSSSpeechUtility().getString(forLocalizedName: "OSSSpeechKitErrorType_requestTypeInvalidSpeech", defaultValue: "Speech")
case .invalidVoiceFilePath,.invalidDeleteVoiceFilePath:
return OSSSpeechUtility().getString(forLocalizedName: "OSSSpeechKitErrorType_requestTypeInvalidFilePath", defaultValue: "File")
case .invalidTranscriptionFilePath:
return OSSSpeechUtility().getString(forLocalizedName: "OSSSpeechKitErrorType_requestTypeInvalidTranscriptionFilePath", defaultValue: "Transcription")
}
}

Expand Down Expand Up @@ -150,6 +171,8 @@ public enum OSSSpeechRecognitionTaskType: Int {
public protocol OSSSpeechDelegate: AnyObject {
/// When the microphone has finished accepting audio, this delegate will be called with the final best text output.
func didFinishListening(withText text: String)
///When the microphone has finished accepting recording, this function will be called with the final best text output or voice file path.
func didFinishListening(withAudioFileURL url: URL,withText text: String)
/// Handle returning authentication status to user - primary use is for non-authorized state.
func authorizationToMicrophone(withAuthentication type: OSSSpeechKitAuthorizationStatus)
/// If the speech recogniser and request fail to set up, this method will be called.
Expand All @@ -158,13 +181,31 @@ public protocol OSSSpeechDelegate: AnyObject {
func didCompleteTranslation(withText text: String)
/// Error handling function.
func didFailToProcessRequest(withError error: Error?)
/// When delete some voice file,this delegate will be return success or not
func deleteVoiceFile(withFinish finish: Bool ,withError error: Error?)
/// Get the content according to the path of the voice file
func voiceFilePathTranscription(withText text:String)
}


/// Speech is the primary interface. To use, set the voice and then call `.speak(string: "your string")`
public class OSSSpeech: NSObject {

// MARK: - Private Properties

/// A user voice recoder
private var audioRecorder: AVAudioRecorder?
/// When we record the user voice and success,so return audio URL options.
private var audioFileURL: URL!
/// User can save audio record or not defult true
public var saveRecord:Bool = true
/// Sound wave values
private var soundSamples = [Float]()
/// Show sound wave timer
private var levelTimer:Timer?
/// Show sound wave value
public var onUpdate: (([Float]) -> Void)?

/// An object that produces synthesized speech from text utterances and provides controls for monitoring or controlling ongoing speech.
private var speechSynthesizer: AVSpeechSynthesizer!

Expand Down Expand Up @@ -193,7 +234,7 @@ public class OSSSpeech: NSObject {
/// The object used to enable translation of strings to synthsized voice.
public var utterance: OSSUtterance?

#if !os(macOS)
#if !os(macOS)
/// An AVAudioSession that ensure volume controls are correct in various scenarios
private var session: AVAudioSession?

Expand All @@ -209,7 +250,7 @@ public class OSSSpeech: NSObject {
session = newValue
}
}
#endif
#endif

/// This property handles permission authorization.
/// This property is intentionally named vaguely to prevent accidental overriding.
Expand Down Expand Up @@ -344,8 +385,9 @@ public class OSSSpeech: NSObject {
#if !os(macOS)
do {
let category: AVAudioSession.Category = isRecording ? .playAndRecord : .playback
try audioSession.setCategory(category, options: .duckOthers)
try audioSession.setCategory(category, options: isRecording ? .defaultToSpeaker : .duckOthers)
try audioSession.setActive(true, options: .notifyOthersOnDeactivation)
isRecording ? try audioSession.setActive(true) : try audioSession.setActive(true, options: .notifyOthersOnDeactivation)
return true
} catch {
if isRecording {
Expand Down Expand Up @@ -388,10 +430,10 @@ public class OSSSpeech: NSObject {
private func requestMicPermission() {
#if !os(macOS)
audioSession.requestRecordPermission {[weak self] allowed in
guard let self = self else { return }
guard let self = self else { return }
if !allowed {
self.debugLog(object: self, message: "Microphone permission was denied.")
self.delegate?.authorizationToMicrophone(withAuthentication: .denied)
self.debugLog(object: self, message: "Microphone permission was denied.")
self.delegate?.authorizationToMicrophone(withAuthentication: .denied)
return
}
self.getMicroPhoneAuthorization()
Expand Down Expand Up @@ -422,6 +464,10 @@ public class OSSSpeech: NSObject {
}
let node = engine.inputNode
node.removeTap(onBus: 0)

audioRecorder?.stop()
stopVisualizerTimer()

if node.inputFormat(forBus: 0).channelCount == 0 {
node.reset()
}
Expand Down Expand Up @@ -525,16 +571,134 @@ public class OSSSpeech: NSObject {
return
}
if let audioRequest = request {
if recogniser.supportsOnDeviceRecognition {
audioRequest.requiresOnDeviceRecognition = shouldUseOnDeviceRecognition
}
if #available(iOS 13, *) {
if recogniser.supportsOnDeviceRecognition {
audioRequest.requiresOnDeviceRecognition = shouldUseOnDeviceRecognition
}
}
recogniser.delegate = self
recogniser.defaultTaskHint = recognitionTaskType.taskType
recognitionTask = recogniser.recognitionTask(with: audioRequest, delegate: self)
} else {
delegate?.didFailToCommenceSpeechRecording()
delegate?.didFailToProcessRequest(withError: OSSSpeechKitErrorType.invalidSpeechRequest.error)
}

if self.saveRecord {
readyToRecord()
}
}

/// When we use the speech function then record the user voice
private func readyToRecord()
{
let dateFormatter = DateFormatter()
dateFormatter.dateFormat = "yyyy-MM-dd-HH:mm:ss"
let dateString = dateFormatter.string(from: Date())

audioFileURL = getDocumentsDirectory().appendingPathComponent("\(dateString)-osKit.m4a")

let audioSettings = [AVFormatIDKey: Int(kAudioFormatMPEG4AAC),
AVSampleRateKey: 12000,
AVNumberOfChannelsKey: 1,
AVEncoderAudioQualityKey: AVAudioQuality.high.rawValue
]

do {
audioRecorder = try AVAudioRecorder(url: self.audioFileURL!, settings: audioSettings)
audioRecorder?.isMeteringEnabled = true
audioRecorder?.delegate = self
audioRecorder?.prepareToRecord()
audioRecorder?.record()
soundSamples.removeAll()
visualizerTimer()
} catch {
delegate?.didFailToProcessRequest(withError: OSSSpeechKitErrorType.invalidRecordVoice.error)
}
}

/// Get sound wave values
private func visualizerTimer() {
let interval:Double = 0.01
audioRecorder?.record(forDuration: interval)

levelTimer = Timer(timeInterval: interval, repeats: true, block: { [weak self] _ in
self?.audioRecorder?.updateMeters()
let decibels = self?.audioRecorder?.averagePower(forChannel: 0) ?? -160
let normalizedValue = pow(10, decibels / 20)
self?.soundSamples.append(normalizedValue)
self?.onUpdate?(self?.soundSamples ?? [])
self?.audioRecorder?.record(forDuration: interval)
})

RunLoop.current.add(levelTimer!, forMode: .default)
}

/// Stop get sound wave
private func stopVisualizerTimer() {
onUpdate?(soundSamples)
soundSamples.removeAll()
levelTimer?.invalidate()
}

/// Get documents directory
public func getDocumentsDirectory() -> URL {
let paths = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)
let documentsDirectory = paths[0]
return documentsDirectory
}

/// Delete one voice file(s)
public func deleteVoiceFolderItem(url:URL?) {

let fileManager = FileManager.default
let folderURL = getDocumentsDirectory()
do {
let contents = try fileManager.contentsOfDirectory(at: folderURL, includingPropertiesForKeys: nil,options: .skipsHiddenFiles)
for fileURL in contents {
guard let pathUrl = url else {
if fileURL.absoluteString.contains("-osKit.m4a") {
try fileManager.removeItem(at: fileURL)
}
return
}
if fileURL.absoluteString == pathUrl.absoluteString {
try fileManager.removeItem(at: fileURL)
delegate?.deleteVoiceFile(withFinish: true, withError: nil)
}
}
guard url != nil else {
delegate?.deleteVoiceFile(withFinish: true, withError: nil)
return
}
} catch {
delegate?.deleteVoiceFile(withFinish: false, withError: OSSSpeechKitErrorType.invalidDeleteVoiceFilePath.error)
}
}

/// Transcription voice file path
public func recognizeSpeech(filePath: URL,finalBlock:((_ text:String)->Void)? = nil) {
let identifier = voice?.voiceType.rawValue ?? OSSVoiceEnum.UnitedStatesEnglish.rawValue
speechRecognizer = SFSpeechRecognizer(locale: Locale(identifier: identifier))
guard let audioFile = try? AVAudioFile(forReading: filePath) else {
return
}
let request = SFSpeechURLRecognitionRequest(url: audioFile.url)
speechRecognizer!.recognitionTask(with: request, resultHandler: { (result, error) in
if let result = result {
if result.isFinal {
let transcription = result.bestTranscription.formattedString
if finalBlock != nil {
finalBlock!(transcription)
}
else {
self.delegate?.voiceFilePathTranscription(withText: transcription)
}
}
} else if error != nil {
self.delegate?.didFailToProcessRequest(withError: OSSSpeechKitErrorType.invalidTranscriptionFilePath.error)
}
})
}
}

Expand All @@ -547,6 +711,9 @@ extension OSSSpeech: SFSpeechRecognitionTaskDelegate, SFSpeechRecognizerDelegate
public func speechRecognitionTask(_ task: SFSpeechRecognitionTask, didFinishSuccessfully successfully: Bool) {
recognitionTask = nil
delegate?.didFinishListening(withText: spokenText)
if saveRecord{
delegate?.didFinishListening(withAudioFileURL: audioFileURL!, withText: spokenText)
}
setSession(isRecording: false)
}

Expand All @@ -568,6 +735,15 @@ extension OSSSpeech: SFSpeechRecognitionTaskDelegate, SFSpeechRecognizerDelegate

/// Docs available by Google searching for SFSpeechRecognizerDelegate
public func speechRecognizer(_ speechRecognizer: SFSpeechRecognizer, availabilityDidChange available: Bool) {}
}

// MARK: AVAudioRecorderDelegate
extension OSSSpeech: AVAudioRecorderDelegate {
public func audioRecorderDidFinishRecording(_ recorder: AVAudioRecorder, successfully flag: Bool) {
if flag {
audioRecorder?.stop()
stopVisualizerTimer()
}
}
}
#endif