Python librosa 模块，stft() 实例源码

我们从Python开源项目中，提取了以下43个代码示例，用于说明如何使用librosa.stft()。

项目：Mendelssohn 作者：diggerdu | 项目源码 | 文件源码

def stft(wav, n_fft=1024, overlap=4, dt=tf.int32, absp=False):
    assert (wav.shape[0] > n_fft)
    X = tf.placeholder(dtype=dt,shape=wav.shape)
    X = tf.cast(X,tf.float32)
    hop = n_fft / overlap

    ## prepare constant variable
    Pi = tf.constant(np.pi, dtype=tf.float32)
    W = tf.constant(scipy.hanning(n_fft), dtype=tf.float32)
    S = tf.pack([tf.fft(tf.cast(tf.multiply(W,X[i:i+n_fft]),\
            tf.complex64)) for i in range(1, wav.shape[0] - n_fft, hop)])
    abs_S = tf.complex_abs(S)
    sess = tf.Session()
    if absp:
        return sess.run(abs_S, feed_dict={X:wav})
    else:
        return sess.run(S, feed_dict={X:wav})

项目：magenta 作者：tensorflow | 项目源码 | 文件源码

def griffin_lim(mag, phase_angle, n_fft, hop, num_iters):
  """Iterative algorithm for phase retrival from a magnitude spectrogram.

  Args:
    mag: Magnitude spectrogram.
    phase_angle: Initial condition for phase.
    n_fft: Size of the FFT.
    hop: Stride of FFT. Defaults to n_fft/2.
    num_iters: Griffin-Lim iterations to perform.

  Returns:
    audio: 1-D array of float32 sound samples.
  """
  fft_config = dict(n_fft=n_fft, win_length=n_fft, hop_length=hop, center=True)
  ifft_config = dict(win_length=n_fft, hop_length=hop, center=True)
  complex_specgram = inv_magphase(mag, phase_angle)
  for i in range(num_iters):
    audio = librosa.istft(complex_specgram, **ifft_config)
    if i != num_iters - 1:
      complex_specgram = librosa.stft(audio, **fft_config)
      _, phase = librosa.magphase(complex_specgram)
      phase_angle = np.angle(phase)
      complex_specgram = inv_magphase(mag, phase_angle)
  return audio

项目：speech-enhancement-WGAN 作者：jerrygood0703 | 项目源码 | 文件源码

def make_spectrum(self, filename, use_normalize):
        sr, y = wav.read(filename)
        if sr != 16000:
            raise ValueError('Sampling rate is expected to be 16kHz!')
        if y.dtype!='float32':
            y = np.float32(y/32767.)

        D=librosa.stft(y,n_fft=512,hop_length=256,win_length=512,window=scipy.signal.hamming)
        Sxx=np.log10(abs(D)**2) 
        if use_normalize:
            mean = np.mean(Sxx, axis=1).reshape((257,1))
            std = np.std(Sxx, axis=1).reshape((257,1))+1e-12
            Sxx = (Sxx-mean)/std  
        slices = []
        for i in range(0, Sxx.shape[1]-self.FRAMELENGTH, self.OVERLAP):
            slices.append(Sxx[:,i:i+self.FRAMELENGTH])
        return np.array(slices)

项目：odin 作者：imito | 项目源码 | 文件源码

def test_stft_istft(self):
        try:
            import librosa
            ds = F.load_digit_wav()
            name = ds.keys()[0]
            path = ds[name]

            y, _ = speech.read(path, pcm=True)
            hop_length = int(0.01 * 8000)
            stft = signal.stft(y, n_fft=256, hop_length=hop_length, window='hann')
            stft_ = librosa.stft(y, n_fft=256, hop_length=hop_length, window='hann')
            self.assertTrue(np.allclose(stft, stft_.T))

            y1 = signal.istft(stft, hop_length=hop_length, window='hann')
            y2 = librosa.istft(stft_, hop_length=hop_length, window='hann')
            self.assertTrue(np.allclose(y1, y2))
        except ImportError:
            print("test_stft_istft require librosa.")

项目：Tacotron 作者：barronalex | 项目源码 | 文件源码

def griffinlim(spectrogram, n_iter=50, window='hann', n_fft=2048, win_length=2048, hop_length=-1, verbose=False):
    if hop_length == -1:
        hop_length = n_fft // 4

    angles = np.exp(2j * np.pi * np.random.rand(*spectrogram.shape))

    t = tqdm(range(n_iter), ncols=100, mininterval=2.0, disable=not verbose)
    for i in t:
        full = np.abs(spectrogram).astype(np.complex) * angles
        inverse = librosa.istft(full, hop_length = hop_length, win_length = win_length, window = window)
        rebuilt = librosa.stft(inverse, n_fft = n_fft, hop_length = hop_length, win_length = win_length, window = window)
        angles = np.exp(1j * np.angle(rebuilt))

        if verbose:
            diff = np.abs(spectrogram) - np.abs(rebuilt)
            t.set_postfix(loss=np.linalg.norm(diff, 'fro'))

    full = np.abs(spectrogram).astype(np.complex) * angles
    inverse = librosa.istft(full, hop_length = hop_length, win_length = win_length, window = window)

    return inverse

项目：audio 作者：willfrey | 项目源码 | 文件源码

def __call__(self, y):
        """Short-time Fourier transform (STFT).

        Returns a real-valued matrix
        Returns a complex-valued matrix D such that
            `np.abs(D[f, t])` is the magnitude of frequency bin `f`
            at frame `t`

            `np.angle(D[f, t])` is the phase of frequency bin `f`
            at frame `t`

        Parameters
        ----------
        y : np.ndarray [shape=(n,)], real-valued
            the input signal (audio time series)

        Returns
        -------
        D : np.ndarray [shape=(1 + n_fft/2, t), dtype=dtype]
        STFT matrix

        """
        return librosa.stft(y, **self.__dict__)

项目：Sound-classification-on-Raspberry-Pi-with-Tensorflow 作者：GianlucaPaolocci | 项目源码 | 文件源码

def extract_features(file_name):
    X, sample_rate = librosa.load(file_name)
    stft = np.abs(librosa.stft(X))
    mfccs = np.array(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=8).T)
    chroma = np.array(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T)
    mel = np.array(librosa.feature.melspectrogram(X, sr=sample_rate).T)
    contrast = np.array(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T)
    tonnetz = np.array(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T)
    return mfccs,chroma,mel,contrast,tonnetz

项目：Sound-classification-on-Raspberry-Pi-with-Tensorflow 作者：GianlucaPaolocci | 项目源码 | 文件源码

def extract_features():
    X = sounddevice.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1)
    sounddevice.wait()
    X= np.squeeze(X)
    stft = np.abs(librosa.stft(X))
    mfccs = np.array(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=8).T)
    chroma = np.array(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T)
    mel = np.array(librosa.feature.melspectrogram(X, sr=sample_rate).T)
    contrast = np.array(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T)
    tonnetz = np.array(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T)
    ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
    features = np.vstack([features,ext_features])
    return features

项目：speechless 作者：JuliusKunze | 项目源码 | 文件源码

def _complex_spectrogram(self) -> ndarray:
        return librosa.stft(y=self.get_raw_audio(), n_fft=self.fourier_window_length, hop_length=self.hop_length)

项目：Tacotron_pytorch 作者：root20 | 项目源码 | 文件源码

def _griffin_lim(S, n_fft, win_length, hop_length, num_iters):
    angles = np.exp(2j * np.pi * np.random.rand(*S.shape))
    S_complex = np.abs(S).astype(np.complex)
    for i in range(num_iters):
        if i > 0:
            angles = np.exp(1j * np.angle(librosa.stft(y=y, n_fft=n_fft, hop_length=hop_length, win_length=win_length)))
        y = librosa.istft(S_complex * angles, hop_length=hop_length, win_length=win_length)
    return y

项目：source_separation_ml_jeju 作者：hjkwon0609 | 项目源码 | 文件源码

def create_spectrogram_from_audio(data):
    global setting
    spectrogram = librosa.stft(data, n_fft=Config.n_fft, hop_length=Config.hop_length).transpose()

    # divide the real and imaginary components of each element 
    # concatenate the matrix with the real components and the matrix with imaginary components
    # (DataCorruptionError when saving complex numbers in TFRecords)

    # concatenated = np.concatenate([np.real(spectrogram), np.imag(spectrogram)], axis=1)
    return spectrogram # [num_time_frames, num_freq_bins]

项目：TensorFlow_AudioSet_Example 作者：DantesLegacy | 项目源码 | 文件源码

def plot_log_power_specgram(sound_names,raw_sounds):
    i = 1
    fig = plt.figure(figsize=(25,60), dpi = 900)
    for n,f in zip(sound_names,raw_sounds):
        plt.subplot(10,1,i)
        D = librosa.logamplitude(np.abs(librosa.stft(f))**2, ref_power=np.max)
        librosa.display.specshow(D,x_axis='time' ,y_axis='log')
        plt.title(n.title())
        i += 1
    plt.suptitle('Figure 3: Log power spectrogram',x=0.5, y=0.915,fontsize=18)
    plt.show()

项目：TensorFlow_AudioSet_Example 作者：DantesLegacy | 项目源码 | 文件源码

def extract_feature(file_name):
    X, sample_rate = librosa.load(file_name)
    stft = np.abs(librosa.stft(X))
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
    mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
    return mfccs,chroma,mel,contrast,tonnetz

项目：tacotron 作者：jinfagang | 项目源码 | 文件源码

def get_spectrograms(sound_file):
    '''Extracts melspectrogram and log magnitude from given `sound_file`.
    Args:
      sound_file: A string. Full path of a sound file.

    Returns:
      Transposed S: A 2d array. A transposed melspectrogram with shape of (T, n_mels)
      Transposed magnitude: A 2d array.Has shape of (T, 1+hp.n_fft//2)
    '''
    # Loading sound file
    y, sr = librosa.load(sound_file, sr=None)  # or set sr to hp.sr.

    # stft. D: (1+n_fft//2, T)
    D = librosa.stft(y=y,
                     n_fft=hp.n_fft,
                     hop_length=hp.hop_length,
                     win_length=hp.win_length)

    # magnitude spectrogram
    magnitude = np.abs(D)  # (1+n_fft/2, T)

    # power spectrogram
    power = magnitude ** 2  # (1+n_fft/2, T)

    # mel spectrogram
    S = librosa.feature.melspectrogram(S=power, n_mels=hp.n_mels)  # (n_mels, T)

    return np.transpose(S.astype(np.float32)), np.transpose(magnitude.astype(np.float32))  # (T, n_mels), (T, 1+n_fft/2)

项目：tacotron 作者：jinfagang | 项目源码 | 文件源码

def spectrogram2wav(spectrogram):
    '''
    spectrogram: [t, f], i.e. [t, nfft // 2 + 1]
    '''
    spectrogram = spectrogram.T  # [f, t]
    X_best = copy.deepcopy(spectrogram)  # [f, t]
    for i in range(hp.n_iter):
        X_t = invert_spectrogram(X_best)
        est = librosa.stft(X_t, hp.n_fft, hp.hop_length, win_length=hp.win_length)  # [f, t]
        phase = est / np.maximum(1e-8, np.abs(est))  # [f, t]
        X_best = spectrogram * phase  # [f, t]
    X_t = invert_spectrogram(X_best)

    return np.real(X_t)

项目：DDAE 作者：jerrygood0703 | 项目源码 | 文件源码

def make_spectrum_phase(y, FRAMESIZE, OVERLAP, FFTSIZE):
    D=librosa.stft(y,n_fft=FRAMESIZE,hop_length=OVERLAP,win_length=FFTSIZE,window=scipy.signal.hamming)
    Sxx = np.log10(abs(D)**2) 
    phase = np.exp(1j * np.angle(D))
    mean = np.mean(Sxx, axis=1).reshape((257,1))
    std = np.std(Sxx, axis=1).reshape((257,1))+1e-12
    Sxx = (Sxx-mean)/std  
    return Sxx, phase, mean, std

项目：gcForest 作者：kingfengji | 项目源码 | 文件源码

def get_feature_aqibsaeed_1(X, sr, au_path=None):
    """
    http://aqibsaeed.github.io/2016-09-03-urban-sound-classification-part-1/
    """
    import librosa
    if au_path is not None:
        X, sr = librosa.load(au_path)
    stft = np.abs(librosa.stft(X))
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sr, n_mfcc=40).T,axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sr).T,axis=0)
    mel = np.mean(librosa.feature.melspectrogram(X, sr=sr).T,axis=0)
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sr).T,axis=0)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sr).T,axis=0)
    feature = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
    return feature

项目：pumpp 作者：bmcfee | 项目源码 | 文件源码

def transform_audio(self, y):
        '''Compute the STFT magnitude and phase.

        Parameters
        ----------
        y : np.ndarray
            The audio buffer

        Returns
        -------
        data : dict
            data['mag'] : np.ndarray, shape=(n_frames, 1 + n_fft//2)
                STFT magnitude

            data['phase'] : np.ndarray, shape=(n_frames, 1 + n_fft//2)
                STFT phase
        '''
        n_frames = self.n_frames(get_duration(y=y, sr=self.sr))

        D = stft(y, hop_length=self.hop_length,
                 n_fft=self.n_fft)

        D = fix_length(D, n_frames)

        mag, phase = magphase(D)
        if self.log:
            mag = amplitude_to_db(mag, ref=np.max)

        return {'mag': mag.T[self.idx].astype(np.float32),
                'phase': np.angle(phase.T)[self.idx].astype(np.float32)}

项目：tartarus 作者：sergiooramas | 项目源码 | 文件源码

def compute_spec(audio_file,spectro_file):
    # Get actual audio
    audio, sr = librosa.load(audio_file, sr=config['resample_sr'])
    # Compute spectrogram
    if config['spectrogram_type']=='cqt':
        spec = librosa.cqt(audio, sr=sr, hop_length=config['hop'], n_bins=config['cqt_bins'], real=False)
    elif config['spectrogram_type']=='mel':
        spec = librosa.feature.melspectrogram(y=audio, sr=sr, hop_length=config['hop'],n_fft=config['n_fft'],n_mels=config['n_mels'])
    elif config['spectrogram_type']=='stft':
        spec = librosa.stft(y=audio,n_fft=config['n_fft'])
    # Write results:
    with open(spectro_file, "w") as f:
        pickle.dump(spec, f, protocol=-1) # spec shape: MxN.

项目：Mendelssohn 作者：diggerdu | 项目源码 | 文件源码

def expand(self, audio):
        ori_len = audio.shape[0]
        tmp = resample(audio, r=0.5, type='sinc_best')
        down_len = tmp.shape[0]
        tmp = resample(tmp, r=(ori_len+1) / float(down_len), type='sinc_best')
        tmp = librosa.stft(audio, 1024)
        phase = np.divide(tmp, np.abs(tmp))
        spec_input = np.abs(librosa.stft(audio, 1024))[0:n_input, ::]
        spec_input = spec_input[::, 0:spec_input.shape[1]//n_len*n_len]
        spec_input = np.split(spec_input,
                              spec_input.shape[1]//n_len, axis=1)
        spec_input = np.asarray(spec_input)
        spec_input = np.expand_dims(spec_input, axis=-1)
        feed_dict = {self.input_op: np.log1p(spec_input) / 12.0}
        debug = self.sess.run(self.debug_op, feed_dict=feed_dict)
        np.save('debug.npy', debug)
        S = self.sess.run(self.eva_op, feed_dict=feed_dict)
        S[S >= 5e3] = 5e3
        S[S <= 0] = 0
        print ('mean', np.mean(S))
        print (np.sum(np.isinf(S)))
        S = np.squeeze(np.concatenate(np.split(S, S.shape[0]), axis=2),
                       axis=(0, -1))
        phase = phase[..., :S.shape[1]]
        print (phase.shape)
        print (S.shape)
        print (np.sum(np.isinf(np.multiply(S, phase))))

        X = librosa.istft(np.multiply(S, phase))
        return X

项目：tacotron 作者：Kyubyong | 项目源码 | 文件源码

def get_spectrograms(sound_file): 
    '''Extracts melspectrogram and log magnitude from given `sound_file`.
    Args:
      sound_file: A string. Full path of a sound file.

    Returns:
      Transposed S: A 2d array. A transposed melspectrogram with shape of (T, n_mels)
      Transposed magnitude: A 2d array.Has shape of (T, 1+hp.n_fft//2)
    '''
    # Loading sound file
    y, sr = librosa.load(sound_file, sr=hp.sr) # or set sr to hp.sr.

    # stft. D: (1+n_fft//2, T)
    D = librosa.stft(y=y,
                     n_fft=hp.n_fft, 
                     hop_length=hp.hop_length, 
                     win_length=hp.win_length) 

    # magnitude spectrogram
    magnitude = np.abs(D) #(1+n_fft/2, T)

    # power spectrogram
    power = magnitude**2 #(1+n_fft/2, T) 

    # mel spectrogram
    S = librosa.feature.melspectrogram(S=power, n_mels=hp.n_mels) #(n_mels, T)

    return np.transpose(S.astype(np.float32)), np.transpose(magnitude.astype(np.float32)) # (T, n_mels), (T, 1+n_fft/2)

项目：tacotron 作者：Kyubyong | 项目源码 | 文件源码

def spectrogram2wav(spectrogram):
    '''
    spectrogram: [t, f], i.e. [t, nfft // 2 + 1]
    '''
    spectrogram = spectrogram.T  # [f, t]
    X_best = copy.deepcopy(spectrogram)  # [f, t]
    for i in range(hp.n_iter):
        X_t = invert_spectrogram(X_best)
        est = librosa.stft(X_t, hp.n_fft, hp.hop_length, win_length=hp.win_length)  # [f, t]
        phase = est / np.maximum(1e-8, np.abs(est))  # [f, t]
        X_best = spectrogram * phase  # [f, t]
    X_t = invert_spectrogram(X_best)

    return np.real(X_t)