2015-10-11 27 views
8

sto cercando di eseguire FFT su un file audio per trovare la frequenza utilizzando il framework accelerare. Ho adattato il codice (probabilmente sbagliato) da questa domanda: Spectrogram from AVAudioPCMBuffer using Accelerate framework in SwiftSwift FFT - Complesso problema diviso

Anche se, le grandezze da 'spettro' sono o '', 'inf' o 'nan', e il ' reale 'e' imag 'componenti dello split split stampano risultati simili; indicando che questa è la causa del problema come: 'magnitude = sqrt (reale, 2) + pow (imag, 2)'. Correggimi se ho torto, ma penso che il resto del codice è ok

Perché sto ricevendo questi risultati e come posso risolverlo (cosa dovrebbero essere i componenti divisi), e cosa sto facendo di sbagliato? Si prega di tenere presente che io sono molto nuovo per FFT e campionamento e non ho idea di come impostare questo per un file audio, quindi qualsiasi aiuto sarebbe molto apprezzato.Grazie

Ecco il codice che sto usando:

// get audio file 
    let fileURL:NSURL = NSBundle.mainBundle().URLForResource("foo", withExtension: "mp3")! 
    let audioFile = try! AVAudioFile(forReading: fileURL) 
    let fileFormat = audioFile.processingFormat 
    let frameCount = UInt32(audioFile.length) 

    let buffer = AVAudioPCMBuffer(PCMFormat: fileFormat, frameCapacity: frameCount) 
    let audioEngine = AVAudioEngine() 
    let playerNode = AVAudioPlayerNode() 
    audioMixerNode = audioEngine.mainMixerNode 

    let bufferSize = Int(frameCount) 
    let channels: NSArray = [Int(buffer.format.channelCount)] 
    let channelCount = channels.count 
    let floats1 = [Int(buffer.frameLength)] 
    for var i=0; i<channelCount; ++i { 
     channelSamples.append([]) 
     let firstSample = buffer.format.interleaved ? i : i*bufferSize 
     for var j=firstSample; j<bufferSize; j+=buffer.stride*2 { 
      channelSamples[i].append(DSPComplex(real: buffer.floatChannelData.memory[j], imag: buffer.floatChannelData.memory[j+buffer.stride])) 
     } 
    } 

    // connect node 
    audioEngine.attachNode(playerNode) 
    audioEngine.connect(playerNode, to: audioMixerNode, format: playerNode.outputFormatForBus(0)) 

    // Set up the transform 
    let log2n = UInt(round(log2(Double(bufferSize)))) 
    let fftSetup = vDSP_create_fftsetup(log2n, Int32(kFFTRadix2)) 

    // Create the complex split value to hold the output of the transform 
    // why doesn't this work? 
    var realp = [Float](count: bufferSize/2, repeatedValue: 0) 
    var imagp = [Float](count: bufferSize/2, repeatedValue: 0) 
    var output = DSPSplitComplex(realp: &realp, imagp: &imagp) 

    vDSP_ctoz(UnsafePointer(channelSamples), 2, &output, 1, UInt(bufferSize/2)) 

    // Do the fast Fourier forward transform 
    vDSP_fft_zrip(fftSetup, &output, 1, log2n, Int32(FFT_FORWARD)) 

    // Convert the complex output to magnitude 
    var fft = [Float](count:Int(bufferSize/2), repeatedValue:0.0) 
    let bufferOver2: vDSP_Length = vDSP_Length(bufferSize/2) 
    vDSP_zvmags(&output, 1, &fft, 1, bufferOver2) 

    var spectrum = [Float]() 
    for var i=0; i<bufferSize/2; ++i { 
     let imag = output.imagp[i] 
     let real = output.realp[i] 
     let magnitude = sqrt(pow(real,2)+pow(imag,2)) 
     spectrum.append(magnitude) } 

    // Release the setup 
    vDSP_destroy_fftsetup(fftSetup) 

risposta

3

C'erano un paio di problemi con il tuo codice:

  1. non stavate leggendo nei campioni di file audio
  2. channelSamples era pieno in modo non corretto
  3. vDSP_fft_zrip stava leggendo oltre la fine del array. si aspetta 2^campioni log2N
  4. vDSP_fft_zrip 's uscita è packed e i calcoli si aspettano spacchettato

    let fileURL:NSURL = NSBundle.mainBundle().URLForResource("foo", withExtension: "mp3")! 
    let audioFile = try! AVAudioFile(forReading: fileURL) 
    let frameCount = UInt32(audioFile.length) 
    
    let buffer = AVAudioPCMBuffer(PCMFormat: audioFile.processingFormat, frameCapacity: frameCount) 
    do { 
        try audioFile.readIntoBuffer(buffer, frameCount:frameCount) 
    } catch { 
    
    } 
    let log2n = UInt(round(log2(Double(frameCount)))) 
    
    let bufferSizePOT = Int(1 << log2n) 
    
    // Set up the transform 
    let fftSetup = vDSP_create_fftsetup(log2n, Int32(kFFTRadix2)) 
    
    // create packed real input 
    var realp = [Float](count: bufferSizePOT/2, repeatedValue: 0) 
    var imagp = [Float](count: bufferSizePOT/2, repeatedValue: 0) 
    var output = DSPSplitComplex(realp: &realp, imagp: &imagp) 
    
    vDSP_ctoz(UnsafePointer<DSPComplex>(buffer.floatChannelData.memory), 2, &output, 1, UInt(bufferSizePOT/2)) 
    
    // Do the fast Fourier forward transform, packed input to packed output 
    vDSP_fft_zrip(fftSetup, &output, 1, log2n, Int32(FFT_FORWARD)) 
    
    
    // you can calculate magnitude squared here, with care 
    // as the first result is wrong! read up on packed formats 
    var fft = [Float](count:Int(bufferSizePOT/2), repeatedValue:0.0) 
    let bufferOver2: vDSP_Length = vDSP_Length(bufferSizePOT/2) 
    vDSP_zvmags(&output, 1, &fft, 1, bufferOver2) 
    
    
    // Release the setup 
    vDSP_destroy_fftsetup(fftSetup)