Trying to get the frequencies of a .wav file in Python

前端 未结 3 1018
[愿得一人]
[愿得一人] 2020-12-31 11:28

I know that questions about .wav files in Python have been just about beaten to death, but I am extremely frustrated as no one\'s answer seems to be working for me. What I\'

相关标签:
3条回答
  • 2020-12-31 11:57

    If you'd like to detect pitch of a sound (and it seems you do), then in terms of Python libraries your best bet is aubio. Please consult this example for implementation.

    import sys
    from aubio import source, pitch
    
    win_s = 4096
    hop_s = 512 
    
    s = source(your_file, samplerate, hop_s)
    samplerate = s.samplerate
    
    tolerance = 0.8
    
    pitch_o = pitch("yin", win_s, hop_s, samplerate)
    pitch_o.set_unit("midi")
    pitch_o.set_tolerance(tolerance)
    
    pitches = []
    confidences = []
    
    total_frames = 0
    while True:
        samples, read = s()
        pitch = pitch_o(samples)[0]
        pitches += [pitch]
        confidence = pitch_o.get_confidence()
        confidences += [confidence]
        total_frames += read
        if read < hop_s: break
    
    print("Average frequency = " + str(np.array(pitches).mean()) + " hz")
    

    Be sure to check docs on pitch detection methods.

    I also thought you might be interested in estimation of mean frequency and some other audio parameters without using any special libraries. Let's just use numpy! This should give you much better insight into how such audio features can be calculated. It's based off specprop from seewave package. Check docs for meaning of computed features.

    import numpy as np
    
    def spectral_properties(y: np.ndarray, fs: int) -> dict:
        spec = np.abs(np.fft.rfft(y))
        freq = np.fft.rfftfreq(len(y), d=1 / fs)
        spec = np.abs(spec)
        amp = spec / spec.sum()
        mean = (freq * amp).sum()
        sd = np.sqrt(np.sum(amp * ((freq - mean) ** 2)))
        amp_cumsum = np.cumsum(amp)
        median = freq[len(amp_cumsum[amp_cumsum <= 0.5]) + 1]
        mode = freq[amp.argmax()]
        Q25 = freq[len(amp_cumsum[amp_cumsum <= 0.25]) + 1]
        Q75 = freq[len(amp_cumsum[amp_cumsum <= 0.75]) + 1]
        IQR = Q75 - Q25
        z = amp - amp.mean()
        w = amp.std()
        skew = ((z ** 3).sum() / (len(spec) - 1)) / w ** 3
        kurt = ((z ** 4).sum() / (len(spec) - 1)) / w ** 4
    
        result_d = {
            'mean': mean,
            'sd': sd,
            'median': median,
            'mode': mode,
            'Q25': Q25,
            'Q75': Q75,
            'IQR': IQR,
            'skew': skew,
            'kurt': kurt
        }
    
        return result_d
    
    0 讨论(0)
  • 2020-12-31 11:59

    I felt the OPs frustration - it shouldnt be so hard to find how to get values of the sprectrogram instead of seeing the spectrogram image if someone needs to:

    #!/usr/bin/env python
    
    import librosa
    import sys
    import numpy as np
    import matplotlib.pyplot as plt
    import librosa.display
    
    np.set_printoptions(threshold=sys.maxsize)
    
    filename = 'filename.wav'
    Fs = 44100
    clip, sample_rate = librosa.load(filename, sr=Fs)
    
    n_fft = 1024  # frame length 
    start = 0 
    
    hop_length=512
    
    #commented out code to display Spectrogram
    X = librosa.stft(clip, n_fft=n_fft, hop_length=hop_length)
    #Xdb = librosa.amplitude_to_db(abs(X))
    #plt.figure(figsize=(14, 5))
    #librosa.display.specshow(Xdb, sr=Fs, x_axis='time', y_axis='hz') 
    #If to pring log of frequencies  
    #librosa.display.specshow(Xdb, sr=Fs, x_axis='time', y_axis='log')
    #plt.colorbar()
    
    #librosa.display.waveplot(clip, sr=Fs)
    #plt.show()
    
    #now print all values 
    
    t_samples = np.arange(clip.shape[0]) / Fs
    t_frames = np.arange(X.shape[1]) * hop_length / Fs
    #f_hertz = np.arange(N / 2 + 1) * Fs / N       # Works only when N is even
    f_hertz = np.fft.rfftfreq(n_fft, 1 / Fs)         # Works also when N is odd
    
    #example
    print('Time (seconds) of last sample:', t_samples[-1])
    print('Time (seconds) of last frame: ', t_frames[-1])
    print('Frequency (Hz) of last bin:   ', f_hertz[-1])
    
    print('Time (seconds) :', len(t_samples))
    
    #prints array of time frames 
    print('Time of frames (seconds) : ', t_frames)
    #prints array of frequency bins
    print('Frequency (Hz) : ', f_hertz)
    
    print('Number of frames : ', len(t_frames))
    print('Number of bins : ', len(f_hertz))
    
    #This code is working to printout frame by frame intensity of each frequency
    #on top line gives freq bins
    curLine = 'Bins,'
    for b in range(1, len(f_hertz)):
        curLine += str(f_hertz[b]) + ','
    print(curLine)
    
    curLine = ''
    for f in range(1, len(t_frames)):
        curLine = str(t_frames[f]) + ','
        for b in range(1, len(f_hertz)): #for each frame, we get list of bin values printed
            curLine += str("%.02f" % np.abs(X[b, f])) + ','
            #remove format of the float for full details if needed
            #curLine += str(np.abs(X[b, f])) + ','
            #print other useful info like phase of frequency bin b at frame f.
            #curLine += str("%.02f" % np.angle(X[b, f])) + ',' 
        print(curLine)
    
    0 讨论(0)
  • 2020-12-31 12:03

    Try something along the below, it worked for me with a sine wave file with a freq of 1234 I generated from this page.

    from scipy.io import wavfile
    
    def freq(file, start_time, end_time):
        sample_rate, data = wavfile.read(file)
        start_point = int(sample_rate * start_time / 1000)
        end_point = int(sample_rate * end_time / 1000)
        length = (end_time - start_time) / 1000
        counter = 0
        for i in range(start_point, end_point):
            if data[i] < 0 and data[i+1] > 0:
                counter += 1
        return counter/length    
    
    freq("sin.wav", 1000 ,2100)
    1231.8181818181818
    

    edited: cleaned up for loop a bit

    0 讨论(0)
提交回复
热议问题