我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.hanning()。
def stft(sig, frameSize, overlapFac=0.75, window=np.hanning): """ short time fourier transform of audio signal """ win = window(frameSize) hopSize = int(frameSize - np.floor(overlapFac * frameSize)) # zeros at beginning (thus center of 1st window should be for sample nr. 0) # samples = np.append(np.zeros(np.floor(frameSize / 2.0)), sig) samples = np.array(sig, dtype='float64') # cols for windowing cols = np.ceil((len(samples) - frameSize) / float(hopSize)) + 1 # zeros at end (thus samples can be fully covered by frames) # samples = np.append(samples, np.zeros(frameSize)) frames = stride_tricks.as_strided( samples, shape=(cols, frameSize), strides=(samples.strides[0] * hopSize, samples.strides[0])).copy() frames *= win return np.fft.rfft(frames) # all the definition of the flowing variable can be found # train_net.py
def hanning(M): """Returns the Hanning window. The Hanning window is defined as .. math:: w(n) = 0.5 - 0.5\\cos\\left(\\frac{2\\pi{n}}{M-1}\\right) \\qquad 0 \\leq n \\leq M-1 Args: M (:class:`~int`): Number of points in the output window. If zero or less, an empty array is returned. Returns: ~cupy.ndarray: Output ndarray. .. seealso:: :func:`numpy.hanning` """ if M < 1: return from_data.array([]) if M == 1: return basic.ones(1, float) n = ranges.arange(0, M) return 0.5 - 0.5 * trigonometric.cos(2.0 * numpy.pi * n / (M - 1))
def ams_extractor(x, sr, win_len, shift_len, order): from scipy.signal import hilbert envelope = np.abs(hilbert(x)) for i in range(order-1): envelope = np.abs(hilbert(envelope)) envelope = envelope * 1./3. frames = (len(envelope) - win_len) // shift_len hanning_window = np.hanning(win_len) ams_feature = np.zeros(shape=(15, frames)) wts = cal_triangle_window(0, sr//2, win_len, 15, 15.6, 400) for i in range(frames): one_frame = x[i*shift_len:i*shift_len+win_len] one_frame = one_frame * hanning_window frame_fft = np.abs(np.fft.fft(one_frame, win_len)) ams_feature[:,i] = np.matmul(wts, frame_fft) return ams_feature
def cochleagram_extractor(xx, sr, win_len, shift_len, channel_number, win_type): fcoefs, f = make_erb_filters(sr, channel_number, 50) fcoefs = np.flipud(fcoefs) xf = erb_frilter_bank(xx, fcoefs) if win_type == 'hanning': window = np.hanning(channel_number) elif win_type == 'hamming': window = np.hamming(channel_number) elif win_type == 'triangle': window = (1 - (np.abs(channel_number - 1 - 2 * np.arange(1, channel_number + 1, 1)) / (channel_number + 1))) else: window = np.ones(channel_number) window = window.reshape((channel_number, 1)) xe = np.power(xf, 2.0) frames = 1 + ((np.size(xe, 1)-win_len) // shift_len) cochleagram = np.zeros((channel_number, frames)) for i in range(frames): one_frame = np.multiply(xe[:, i*shift_len:i*shift_len+win_len], np.repeat(window, win_len, 1)) cochleagram[:, i] = np.sqrt(np.mean(one_frame, 1)) cochleagram = np.where(cochleagram == 0.0, np.finfo(float).eps, cochleagram) return cochleagram
def log_power_spectrum_extractor(x, win_len, shift_len, win_type, is_log=False): samples = x.shape[0] frames = (samples - win_len) // shift_len stft = np.zeros((win_len, frames), dtype=np.complex64) spect = np.zeros((win_len // 2 + 1, frames), dtype=np.float64) if win_type == 'hanning': window = np.hanning(win_len) elif win_type == 'hamming': window = np.hamming(win_len) elif win_type == 'rectangle': window = np.ones(win_len) for i in range(frames): one_frame = x[i*shift_len: i*shift_len+win_len] windowed_frame = np.multiply(one_frame, window) stft[:, i] = np.fft.fft(windowed_frame, win_len) if is_log: spect[:, i] = np.log(np.power(np.abs(stft[0: win_len//2+1, i]), 2.)) else: spect[:, i] = np.power(np.abs(stft[0: win_len//2+1, i]), 2.) return spect
def stft_extractor(x, win_len, shift_len, win_type): samples = x.shape[0] frames = (samples - win_len) // shift_len stft = np.zeros((win_len, frames), dtype=np.complex64) spect = np.zeros((win_len // 2 + 1, frames), dtype=np.complex64) if win_type == 'hanning': window = np.hanning(win_len) elif win_type == 'hamming': window = np.hamming(win_len) elif win_type == 'rectangle': window = np.ones(win_len) for i in range(frames): one_frame = x[i*shift_len: i*shift_len+win_len] windowed_frame = np.multiply(one_frame, window) stft[:, i] = np.fft.fft(windowed_frame, win_len) spect[:, i] = stft[: win_len//2+1, i] return spect
def ams_extractor(x, sr, win_len, shift_len, order=1, decimate_coef=1./4.): from scipy.signal import hilbert envelope = np.abs(hilbert(x)) for i in range(order-1): envelope = np.abs(hilbert(envelope)) envelope = envelope * decimate_coef frames = (len(envelope) - win_len) // shift_len hanning_window = np.hanning(win_len) ams_feature = np.zeros(shape=(15, frames)) wts = cal_triangle_window(0, sr//2, win_len, 15, 15.6, 400) for i in range(frames): one_frame = x[i*shift_len:i*shift_len+win_len] one_frame = one_frame * hanning_window frame_fft = np.abs(np.fft.fft(one_frame, win_len)) ams_feature[:,i] = np.matmul(wts, frame_fft) return ams_feature
def unknown_feature_extractor(x, sr, win_len, shift_len, barks, inner_win, inner_shift, win_type, method_version): x_spectrum = stft_extractor(x, win_len, shift_len, win_type) coef = get_fft_bark_mat(sr, win_len, barks, 20, sr//2) bark_spect = np.matmul(coef, x_spectrum) ams = np.zeros((barks, inner_win//2+1, (bark_spect.shape[1] - inner_win)//inner_shift)) for i in range(barks): channel_stft = stft_extractor(bark_spect[i, :], inner_win, inner_shift, 'hanning') if method_version == 'v1': ams[i, :, :] = 20 * np.log(np.abs(channel_stft[:inner_win//2+1, :(bark_spect.shape[1] - inner_win)//inner_shift])) elif method_version == 'v2': channel_amplitude = np.abs(channel_stft[:inner_win//2+1, :(bark_spect.shape[1] - inner_win)//inner_shift]) channel_angle = np.angle(channel_stft[:inner_win//2+1, :(bark_spect.shape[1] - inner_win)//inner_shift]) channel_angle = channel_angle - (np.floor(channel_angle / (2.*np.pi)) * (2.*np.pi)) ams[i, :, :] = np.power(channel_amplitude, 1./3.) * channel_angle else: ams[i, :, :] = np.abs(channel_stft) return ams
def spectrum_extractor(x, win_len, shift_len, win_type, is_log): samples = x.shape[0] frames = (samples - win_len) // shift_len stft = np.zeros((win_len, frames), dtype=np.complex64) spectrum = np.zeros((win_len // 2 + 1, frames), dtype=np.float64) if win_type == 'hanning': window = np.hanning(win_len) elif win_type == 'hamming': window = np.hamming(win_len) elif win_type == 'triangle': window = (1 - (np.abs(win_len - 1 - 2 * np.arange(1, win_len + 1, 1)) / (win_len + 1))) else: window = np.ones(win_len) for i in range(frames): one_frame = x[i*shift_len: i*shift_len+win_len] windowed_frame = np.multiply(one_frame, window) stft[:, i] = np.fft.fft(windowed_frame, win_len) if is_log: spectrum[:, i] = np.log(np.abs(stft[0: win_len//2+1, i])) else: spectrum[:, i] = np.abs(stft[0: win_len // 2 + 1:, i]) return spectrum
def stft(sig, frameSize, overlapFac=0.75, window=np.hanning): """ short time fourier transform of audio signal """ win = window(frameSize) hopSize = int(frameSize - np.floor(overlapFac * frameSize)) # zeros at beginning (thus center of 1st window should be for sample nr. 0) # samples = np.append(np.zeros(np.floor(frameSize / 2.0)), sig) samples = np.array(sig, dtype='float64') # cols for windowing cols = np.floor((len(samples) - frameSize) / float(hopSize)) # zeros at end (thus samples can be fully covered by frames) # samples = np.append(samples, np.zeros(frameSize)) frames = stride_tricks.as_strided( samples, shape=(cols, frameSize), strides=(samples.strides[0] * hopSize, samples.strides[0])).copy() frames *= win return np.fft.rfft(frames)
def stft(sig, frameSize, overlapFac=0.75, window=np.hanning): """ short time fourier transform of audio signal """ win = window(frameSize) hopSize = int(frameSize - np.floor(overlapFac * frameSize)) # zeros at beginning (thus center of 1st window should be for sample nr. 0) # samples = np.append(np.zeros(np.floor(frameSize / 2.0)), sig) samples = np.array(sig, dtype='float64') # cols for windowing cols = np.ceil((len(samples) - frameSize) / float(hopSize)) + 1 # zeros at end (thus samples can be fully covered by frames) # samples = np.append(samples, np.zeros(frameSize)) frames = stride_tricks.as_strided( samples, shape=(cols, frameSize), strides=(samples.strides[0] * hopSize, samples.strides[0])).copy() frames *= win return np.fft.rfft(frames)
def stft(sig, frameSize, overlapFac=0.75, window=np.hanning): """ short time fourier transform of audio signal """ win = window(frameSize) hopSize = int(frameSize - np.floor(overlapFac * frameSize)) # zeros at beginning (thus center of 1st window should be for sample nr. 0) # samples = np.append(np.zeros(np.floor(frameSize / 2.0)), sig) samples = np.array(sig, dtype='float64') # cols for windowing cols = np.ceil((len(samples) - frameSize) / float(hopSize)) + 1 # zeros at end (thus samples can be fully covered by frames) samples = np.append(samples, np.zeros(frameSize)) frames = stride_tricks.as_strided( samples, shape=(cols, frameSize), strides=(samples.strides[0] * hopSize, samples.strides[0])).copy() frames *= win return np.fft.rfft(frames)
def __init__(self, sampling_rate=16000, frame_width=0.032, frame_shift=0.01, num_mel_filters=40, window_func="hanning", using_delta=True, using_delta_delta=True): assert window_func in ["hanning", "hamming"] self.sampling_rate = sampling_rate self.frame_width = frame_width self.sampling_rate = sampling_rate self.frame_width = frame_width self.frame_shift = frame_shift self.num_fft = int(sampling_rate * frame_width) self.num_mel_filters = num_mel_filters if window_func == "hanning": self.window_func = lambda x:np.hanning(x) elif winfunc == "hamming": self.window_func = lambda x:np.hamming(x) self.using_delta = using_delta self.using_delta_delta = using_delta_delta self.fbank = fft.get_filterbanks(nfft=self.num_fft, nfilt=num_mel_filters, samplerate=sampling_rate)
def periodic_hann(window_length): """Calculate a "periodic" Hann window. The classic Hann window is defined as a raised cosine that starts and ends on zero, and where every value appears twice, except the middle point for an odd-length window. Matlab calls this a "symmetric" window and np.hanning() returns it. However, for Fourier analysis, this actually represents just over one cycle of a period N-1 cosine, and thus is not compactly expressed on a length-N Fourier basis. Instead, it's better to use a raised cosine that ends just before the final zero value - i.e. a complete cycle of a period-N cosine. Matlab calls this a "periodic" window. This routine calculates it. Args: window_length: The number of points in the returned window. Returns: A 1D np.array containing the periodic hann window. """ return 0.5 - (0.5 * np.cos(2 * np.pi / window_length * np.arange(window_length)))
def stft_magnitude(signal, fft_length, hop_length=None, window_length=None): """Calculate the short-time Fourier transform magnitude. Args: signal: 1D np.array of the input time-domain signal. fft_length: Size of the FFT to apply. hop_length: Advance (in samples) between each frame passed to FFT. window_length: Length of each block of samples to pass to FFT. Returns: 2D np.array where each row contains the magnitudes of the fft_length/2+1 unique values of the FFT for the corresponding frame of input samples. """ frames = frame(signal, window_length, hop_length) # Apply frame window to each frame. We use a periodic Hann (cosine of period # window_length) instead of the symmetric Hann of np.hanning (period # window_length-1). window = periodic_hann(window_length) windowed_frames = frames * window return np.abs(np.fft.rfft(windowed_frames, int(fft_length))) # Mel spectrum constants and functions.
def compute_pairwise_shifts(imstack): # Calculates the pairwise shifts for images in a stack of format [frame, x, y]. # returns shift vector as [y, x] for each pair, a 2 x N-1 array where N is num_frames scan_shape = imstack.shape num_pairs = scan_shape[0]-1 print('Correcting ' + str(num_pairs) + ' frames...') # Prepare window function (Hann) win = np.outer(np.hanning(scan_shape[1]),np.hanning(scan_shape[2])) # Pairwise shifts shift = np.zeros((2, num_pairs)) for iPair in range(0, num_pairs): image = imstack[iPair] offset_image = imstack[iPair+1] shift[:,iPair], error, diffphase = register_translation_hybrid(image*win, offset_image*win, exponent = 0.3, upsample_factor = 100) # Shifts are defined as [y, x] where y is shift of imaging location # with respect to positive y axis, similarly for x return shift
def __init__(self, window_size, segments_buf=None): self._window_size = window_size if segments_buf is None: segments_buf = int(SAMPLE_RATE / window_size) self._segments_buf = segments_buf self._thresholding_window_size = THRESHOLD_WINDOW_SIZE assert self._thresholding_window_size <= segments_buf self._last_spectrum = np.zeros(window_size, dtype=np.int16) self._last_flux = deque( np.zeros(segments_buf, dtype=np.int16), segments_buf) self._last_prunned_flux = 0 self._hanning_window = np.hanning(window_size) # The zeros which will be used to double each segment size self._inner_pad = np.zeros(window_size) # To ignore the first peak just after starting the application self._first_peak = True
def managed_window(self,axismanager, data, windowaxis): def window_axis_function(a, window): a = a * window return a newaxis = axismanager.current_axis_number(windowaxis) N = data.shape[newaxis] window = np.hanning(N) # Save "window summed and squared" (see Numerical Recipes) wss = np.sum(window**2.0)/float(N) # Apply window windoweddata = np.apply_along_axis(window_axis_function, newaxis, data, window) return windoweddata, wss
def measureLoop(self): """ Measure 10 values, add them to buffer and remove the 10 oldest values. """ if self.stopRequest: self.stopRequest = False self.unlock() return data = np.zeros((100, self._data_logic.getChannels())) data[:, 0] = np.array([self._data_logic.getData() for i in range(100)]) self.buf = np.roll(self.buf, -100, axis=0) self.buf[-101:-1] = data w = np.hanning(self.window_len) s = np.r_[self.buf[self.window_len-1:0:-1], self.buf, self.buf[-1:-self.window_len:-1]] for channel in range(self._data_logic.getChannels()): convolved = np.convolve(w/w.sum(), s[:, channel], mode='valid') self.smooth[:, channel] = convolved self.sigRepeat.emit()
def undo_stft(spect, hop_size, frame_len=None, unwindow='auto'): """ Undoes an SFTF via overlap-add, returning a numpy array of samples. """ # transform into time domain spect = np.fft.irfft(spect, n=frame_len, axis=1) # overlap-and-add num_frames, frame_len = spect.shape win = np.hanning(frame_len) #win = np.sin(np.pi * np.arange(frame_len) / frame_len) #win = 1 if unwindow == 'auto': unwindow = (hop_size <= frame_len//2) samples = np.zeros((num_frames - 1) * hop_size + frame_len) if unwindow: factors = np.zeros_like(samples) for idx, frame in enumerate(spect): oidx = int(idx*hop_size) samples[oidx:oidx+frame_len] += frame * win if unwindow: factors[oidx:oidx+frame_len] += win**2 if unwindow: np.maximum(factors, .1 * factors.max(), factors) samples /= factors return samples
def stft(sig, frameSize, overlapFac=0.75, window=np.hanning): """ short time fourier transform of audio signal """ win = window(frameSize) hopSize = int(frameSize - np.floor(overlapFac * frameSize)) # zeros at beginning (thus center of 1st window should be for sample nr. 0) # samples = np.append(np.zeros(np.floor(frameSize / 2.0)), sig) samples = np.array(sig, dtype='float64') # cols for windowing cols = np.ceil((len(samples) - frameSize) / float(hopSize)) # zeros at end (thus samples can be fully covered by frames) # samples = np.append(samples, np.zeros(frameSize)) frames = stride_tricks.as_strided( samples, shape=(cols, frameSize), strides=(samples.strides[0] * hopSize, samples.strides[0])).copy() frames *= win return np.fft.rfft(frames)
def test_high_frequency_completion(self): path = dirpath + '/data/test16000.wav' fs, x = wavfile.read(path) f0rate = 0.5 shifter = Shifter(fs, f0rate=f0rate) mod_x = shifter.f0transform(x, completion=False) mod_xc = shifter.f0transform(x, completion=True) assert len(mod_x) == len(mod_xc) N = 512 fl = int(fs * 25 / 1000) win = np.hanning(fl) sts = [1000, 5000, 10000, 20000] for st in sts: # confirm w/o completion f_mod_x = fft(mod_x[st: st + fl] / 2**16 * win) amp_mod_x = 20.0 * np.log10(np.abs(f_mod_x)) # confirm w/ completion f_mod_xc = fft(mod_xc[st: st + fl] / 2**16 * win) amp_mod_xc = 20.0 * np.log10(np.abs(f_mod_xc)) assert np.mean(amp_mod_x[N // 4:] < np.mean(amp_mod_xc[N // 4:]))
def analysis_with_del_comp_from_est_file(v_in_sig, est_file, fs, nFFT=None, win_func=np.hanning, b_ph_unv_zero=False, nwin_per_pitch_period=0.5): if nFFT is None: # If fft length is not provided, some standard values are assumed. if fs==48000: nFFT=4096 elif fs==16000: nFFT=2048 # Pitch Marks:------------------------------------------------------------- v_pm_sec, v_voi = la.read_reaper_est_file(est_file, check_len_smpls=len(v_in_sig), fs=fs) v_pm_smpls = v_pm_sec * fs m_sp, m_ph, v_shift, m_frms, m_fft = analysis_with_del_comp_from_pm(v_in_sig, v_pm_smpls, nFFT, win_func=win_func, nwin_per_pitch_period=nwin_per_pitch_period) if b_ph_unv_zero: m_ph = m_ph * v_voi[:,None] return m_sp, m_ph, v_shift, v_voi, m_frms, m_fft #============================================================================== # From (after) 'analysis_with_del_comp': # new: returns voi/unv decision.
def stft(self, samples, window_size, overlap_factor=0.5, window_function=np.hanning): """ Perform Short-time Fourier transform to get the spectrogram for the given samples :param samples: Complex samples :param window_size: Size of DFT window :param overlap_factor: Value between 0 (= No Overlapping) and 1 (= Full overlapping) of windows :param window_function: Function for DFT window :return: short-time Fourier transform of the given signal """ window = window_function(window_size) # hop size determines by how many samples the window is advanced hop_size = window_size - int(overlap_factor * window_size) # pad with zeros to ensure last window fits signal padded_samples = np.append(samples, np.zeros((len(samples) - window_size) % hop_size)) num_frames = ((len(padded_samples) - window_size) // hop_size) + 1 frames = [padded_samples[i*hop_size:i*hop_size+window_size] * window for i in range(num_frames)] return np.fft.fft(frames)
def stft(sig, frame_size, overlap_fac=0.5, window=np.hanning): """ short time fourier transform of audio signal """ win = window(frame_size) hop_size = int(frame_size - np.floor(overlap_fac * frame_size)) # zeros at beginning (thus center of 1st window should be for sample nr. 0) samples = np.append(np.zeros(np.floor(frame_size / 2.0)), sig) # cols for windowing cols = np.ceil((len(samples) - frame_size) / float(hop_size)) + 1 # zeros at end (thus samples can be fully covered by frames) samples = np.append(samples, np.zeros(frame_size)) frames = stride_tricks.as_strided( samples, shape=(cols, frame_size), strides=( samples.strides[0] * hop_size, samples.strides[0] ) ).copy() frames *= win return np.fft.rfft(frames)
def __init__(self, configs, chunks, chunksize, channels, rate): self.configs = configs self.channels = channels self.chunksize = chunksize self.channel_len = chunks*chunksize self.fft_len = self.channel_len//2 + 1 # See numpy.fft.rfft self.fft_freqs_in_hertz = np.fft.rfftfreq(self.channel_len, d=1.0/rate) endpoint_notes = configs.settings.getmultistr('fft', 'endpoint_notes') self.notespace = process.generic.notespace( endpoint_notes[0], endpoint_notes[1], step=1.0/6) # XXX self.window = np.hanning(self.channel_len) self.sensitivity = configs.settings.getfloat('fft', 'sensitivity') self.compute_weights(self.sensitivity) logger.debug("FFT length: {}".format(self.fft_len)) # Create a pyfftw.FFTW object a = pyfftw.empty_aligned( self.channel_len, dtype='int16', n=pyfftw.simd_alignment) self.fft = pyfftw.builders.rfft( a, overwrite_input=True, threads=multiprocessing.cpu_count())
def hann(args): """ window = hann(args) Computes a Hann window, with NumPy's function hanning(args). """ return np.hanning(args) # FUNCTIONS FOR TIME-FREQUENCY REPRESENTATION
def iFFT(Y, output_length=None, window=False): """ Inverse real-valued Fourier Transform Parameters ---------- Y : array_like Frequency domain data [Nsignals x Nbins] output_length : int, optional Lenght of returned time-domain signal (Default: 2 x len(Y) + 1) win : boolean, optional Weights the resulting time-domain signal with a Hann Returns ------- y : array_like Reconstructed time-domain signal """ Y = _np.atleast_2d(Y) y = _np.fft.irfft(Y, n=output_length) if window: if window not in {'hann', 'hamming', 'blackman', 'kaiser'}: raise ValueError('Selected window must be one of hann, hamming, blackman or kaiser') no_of_signals, no_of_samples = y.shape if window == 'hann': window_array = _np.hanning(no_of_samples) elif window == 'hamming': window_array = _np.hamming(no_of_samples) elif window == 'blackman': window_array = _np.blackman(no_of_samples) elif window == 'kaiser': window_array = _np.kaiser(no_of_samples, 3) y = window_array * y return y
def stft(sig, frameSize, overlapFac=0.5, window=np.hanning): win = window(frameSize) hopSize = int(frameSize - np.floor(overlapFac * frameSize)) # zeros at beginning (thus center of 1st window should be for sample nr. 0) samples = np.append(np.zeros(np.floor(frameSize/2.0)), sig) # cols for windowing cols = np.ceil( (len(samples) - frameSize) / float(hopSize)) + 1 # zeros at end (thus samples can be fully covered by frames) samples = np.append(samples, np.zeros(frameSize)) frames = stride_tricks.as_strided(samples, shape=(cols, frameSize), strides=(samples.strides[0]*hopSize, samples.strides[0])).copy() frames *= win return np.fft.rfft(frames)
def make_filterbank(self): erb_max = hz2erb(self.sr/2.0) erb_freqs = np.arange(0, self.n_bins) * erb_max / float(self.n_bins - 1) self.hz_freqs = erb2hz(erb_freqs) self.widths = np.round(0.5 * (self.n_bins - 1) / erb_max * 9.26 * 0.00437 * self.sr * np.exp(-erb_freqs / 9.26) - 0.5) self.filters = [] for b in range(self.n_bins): w = self.widths[b] f = self.hz_freqs[b] exponential = np.exp( np.complex(0,1) * 2 * np.pi * f / self.sr * np.arange(-w, w + 1)) self.filters.append(np.hanning(2 * w + 1) * exponential)
def __init__(self, window = None, fft_size = 1024, hop_size = 512): if window is None: self.window = np.hanning(fft_size) else: self.window = window self.fft_size = fft_size self.hop_size = hop_size self.window_size = len(self.window) self.half_window = int(np.floor(len(self.window) / 2.0))
def __init__(self, window = None, fft_size = 1024, hop_size = 512, sample_rate = 44100): if window is None: self.window = np.hanning(fft_size) else: self.window = window self.fft_size = fft_size self.hop_size = hop_size self.sample_rate = sample_rate self.window_size = len(self.window) self.half_window = int(np.floor(len(self.window) / 2.0))
def __init__(self, img, start_pos, HOG_flag=0, dataformat=1, resize=1): self.HOG_flag = HOG_flag self.padding = 2 self.dataformat = dataformat self.resize = resize self.img_size = img.shape[0],img.shape[1] if self.dataformat: w,h = start_pos[2]-start_pos[0],start_pos[3]-start_pos[1] self.pos = start_pos[0],start_pos[1],w,h else: self.pos = start_pos if self.resize: self.pos = tuple([ele/2 for ele in self.pos]) self.img_size = img.shape[0]/2, img.shape[1]/2 img = cv2.resize(img,self.img_size[::-1]) object_size = self.pos[2:] if self.HOG_flag: self.target_size = 32,32 self.l = 0.0001 self.sigma = 0.6 self.f = 0.012 else: self.target_size = object_size[0]*self.padding,object_size[1]*self.padding self.l = 0.0001 self.sigma = 0.2 self.f = 0.02 output_sigma_factor = 1/float(8) output_sigma = np.sqrt(np.prod(self.target_size)) * output_sigma_factor self.cos_window = np.outer(np.hanning(self.target_size[0]), np.hanning(self.target_size[1])) self.y = tracker.generate_gaussian(self.target_size, output_sigma) x = tracker.get_window(img, self.pos, self.padding) x = tracker.getFeature(x, self.cos_window,self.HOG_flag) self.alpha = tracker.train(x, self.y, self.sigma, self.l) self.z = x
def ideal_binary_mask(noisy_speech, clean_speech, snr, sr): win_len = sr * 0.02 shift_len = sr * 0.01 noisy_spect = stft_extractor(noisy_speech, win_len, shift_len, 'hanning') clean_spect = stft_extractor(clean_speech, win_len, shift_len, 'hanning') spect_snr = np.power(np.abs(clean_spect), 2.0) / np.power(np.abs(noisy_spect - clean_spect), 2.0) ibm = np.where(spect_snr > 10**(0.1*snr), 1, 0) return ibm
def ideal_ratio_mask(noisy_speech, clean_speech, sr): win_len = sr * 0.02 shift_len = sr * 0.01 noisy_spect = stft_extractor(noisy_speech, win_len, shift_len, 'hanning') clean_spect = stft_extractor(clean_speech, win_len, shift_len, 'hanning') irm = np.abs(clean_spect) / np.abs(noisy_spect) return irm
def synthesis_speech(noisy_speech, ideal_mask, win_type, win_len, shift_len, syn_method='A&R'): samples = noisy_speech.shape[0] frames = (samples - win_len) // shift_len if win_type == 'hanning': window = np.hanning(win_len) elif win_type == 'hamming': window = np.hamming(win_len) elif win_type == 'rectangle': window = np.ones(win_len) to_ifft = np.zeros(win_len, dtype=np.complex64) clean_speech = np.zeros((frames-1)*shift_len+win_len, dtype=np.float32) window_sum = np.zeros((frames-1)*shift_len+win_len, dtype=np.float32) for i in range(frames): one_frame = noisy_speech[i * shift_len: i * shift_len + win_len] windowed_frame = np.multiply(one_frame, window) stft = np.fft.fft(windowed_frame, win_len) masked_abs = np.abs(stft[:win_len//2+1]) * ideal_mask[:, i] to_ifft[:win_len//2+1] = masked_abs * np.exp(1j * np.angle(stft[:win_len//2+1])) to_ifft[win_len//2+1:] = np.conj(to_ifft[win_len//2-1:0:-1]) speech_seg = np.real(np.fft.ifft(to_ifft, win_len)) if syn_method == 'A&R' or syn_method == 'ALLEN & RABINER': clean_speech[i*shift_len:i*shift_len+win_len] += speech_seg window_sum[i*shift_len:i*shift_len+win_len] += window elif syn_method == 'G&L' or syn_method == 'GRIFFIN & LIM': speech_seg = np.multiply(speech_seg, window) clean_speech[i * shift_len:i * shift_len + win_len] += speech_seg window_sum[i * shift_len:i * shift_len + win_len] += np.power(window, 2.) # if i > 0: # clean_speech[i*shift_len: (i-1)*shift_len+win_len] *= 0.5 window_sum = np.where(window_sum < 1e-2, 1e-2, window_sum) return clean_speech / window_sum
def cochleagram_extractor(xx, sr, win_len, shift_len, channel_number, win_type): fcoefs, f = make_erb_filters(sr, channel_number, 50) fcoefs = np.flipud(fcoefs) xf = erb_frilter_bank(xx, fcoefs) if win_type == 'hanning': window = np.hanning(channel_number) elif win_type == 'hamming': window = np.hamming(channel_number) elif win_type == 'triangle': window = (1 - (np.abs(channel_number - 1 - 2 * np.arange(1, channel_number + 1, 1)) / (channel_number + 1))) else: window = np.ones(channel_number) window = window.reshape((channel_number, 1)) xe = np.power(xf, 2.0) frames = 1 + ((np.size(xe, 1)-win_len) // shift_len) cochleagram = np.zeros((channel_number, frames)) for i in range(frames): one_frame = np.multiply(xe[:, i*shift_len:i*shift_len+win_len], np.repeat(window, win_len, 1)) cochleagram[:, i] = np.sqrt(np.mean(one_frame, 1)) # c1 = np.where(c1 == 0.0, np.finfo(float).eps, c1) cochleagram = np.where(cochleagram == 0.0, np.finfo(float).eps, cochleagram) cochleagram = np.power(cochleagram, 1./3) return cochleagram
def ideal_binary_mask(noisy_speech, clean_speech, snr): noisy_spect = stft_extractor(noisy_speech, 320, 160, 'hanning') clean_spect = stft_extractor(clean_speech, 320, 160, 'hanning') spect_snr = np.power(np.abs(clean_spect), 2.0) / np.power(np.abs(noisy_spect - clean_spect), 2.0) ibm = np.where(spect_snr > 10**(0.1*snr), 1, 0) return ibm
def synthesis_speech(ns, mk, win_type, win_len, shift_len, syn_method='A&R'): samples = ns.shape[0] frames = (samples - win_len) // shift_len if win_type == 'hanning': window = np.hanning(win_len) elif win_type == 'hamming': window = np.hamming(win_len) elif win_type == 'rectangle': window = np.ones(win_len) to_ifft = np.zeros(win_len, dtype=np.complex64) clean_speech = np.zeros((frames-1)*shift_len+win_len, dtype=np.float32) window_sum = np.zeros((frames-1)*shift_len+win_len, dtype=np.float32) for i in range(frames): one_frame = ns[i * shift_len: i * shift_len + win_len] windowed_frame = np.multiply(one_frame, window) stft = np.fft.fft(windowed_frame, win_len) masked_abs = np.abs(stft[:win_len//2+1]) * mk[:, i] to_ifft[:win_len//2+1] = masked_abs * np.exp(1j * np.angle(stft[:win_len//2+1])) to_ifft[win_len//2+1:] = np.conj(to_ifft[win_len//2-1:0:-1]) speech_seg = np.real(np.fft.ifft(to_ifft, 320)) if syn_method == 'A&R' or syn_method == 'ALLEN & RABINER': clean_speech[i*shift_len:i*shift_len+win_len] += speech_seg window_sum[i*shift_len:i*shift_len+win_len] += window elif syn_method == 'G&L' or syn_method == 'GRIFFIN & LIM': speech_seg = np.multiply(speech_seg, window) clean_speech[i * shift_len:i * shift_len + win_len] += speech_seg window_sum[i * shift_len:i * shift_len + win_len] += np.power(window, 2.) # if i > 0: # clean_speech[i*shift_len: (i-1)*shift_len+win_len] *= 0.5 window_sum = np.where(window_sum < 1e-2, 1e-2, window_sum) return clean_speech / window_sum
def getCutlerDavisFrequencyScore(I, doPlot = False): """ Compute the frequency score suggested by Cutler and Davis, with a slight modification using Kurtosis instead of mean versus standard deviation :param I: An Nxd matrix representing a video with N frames at a resolution of d pixels :doPlot: If true, show the SSM and average power spectrum across all columns """ N = I.shape[0] (D, _) = getSSM(I, N) F = np.zeros(N) #For linearly detrending A = np.ones((N, 2)) A[:, 1] = np.arange(N) #Compute the power spectrum column by column for i in range(N): x = D[:, i] #Linearly detrend mb = np.linalg.lstsq(A, x)[0] y = x - A.dot(mb) #Apply Hann Window y = y*np.hanning(N) #Add on power spectrum F += np.abs(np.fft.fft(y))**2 #Compute kurtosis of normalized averaged power spectrum F = F/np.sum(F) F[0:2] = 0 #Ignore DC component F[-1] = 0 kurt = scipy.stats.kurtosis(F, fisher = False) M = np.mean(F) S = np.std(F) if doPlot: plt.subplot(121) plt.imshow(D, cmap='afmhot', interpolation = 'none') plt.subplot(122) plt.plot(F) plt.hold(True) plt.plot([0, N], [M, M], 'b') plt.plot([0, N], [M+2*S, M+2*S]) plt.title("Kurtosis = %.3g"%kurt) return (np.max(F) - M)/S
def hanningWindow(nPix, percentage): """ Return a Hanning window in 2D Args: size (int): size of the final image percentage (TYPE): percentage of the image that is apodized Returns: real: 2D apodization mask """ M = np.ceil(nPix*percentage/100.0) win = np.hanning(M) winOut = np.ones(nPix) winOut[0:M/2] = win[0:M/2] winOut[-M/2:] = win[-M/2:] return np.outer(winOut, winOut) # @jit # def conv(spec, psf, nPixBorder): # nx, ny, nlambda = spec.shape # nxPSF, nyPSF, nPSF = psf.shape # out = np.zeros_like(spec) # for i in range(nx-2*nPixBorder): # for j in range(ny-2*nPixBorder): # for k in range(nxPSF): # for l in range(nyPSF): # out[i,j,0] += spec[i+k-nxPSF/2+nPixBorder,j+l-nyPSF/2+nPixBorder,0] * psf[k,l,i] # return out
def fft(data, fs): n = data.shape[-1] window = np.hanning(n) windowed = data * window spectrum = np.fft.fft(windowed) freq = np.fft.fftfreq(n, 1 / fs) half_n = np.ceil(n / 2) spectrum_half = (2 / n) * spectrum[..., :half_n] freq_half = freq[:half_n] return freq_half, np.abs(spectrum_half)
def smooth1d(x, window_len): s = np.r_[2*x[0] - x[window_len:1:-1], x, 2*x[-1] - x[-1:-window_len:-1]] w = np.hanning(window_len) y = np.convolve(w/w.sum(), s, mode='same') return y[window_len-1:-window_len+1]
def _init_stretching(self): # Resp. index of current audio chunk and computed phase self._i1, self._i2 = 0, 0 self._N, self._H = self.chunk_size, int(self.chunk_size / 4) self._win = numpy.hanning(self._N) self._phi = numpy.zeros(self._N, dtype=self.y.dtype) self._sy = numpy.zeros(len(self.y), dtype=self.y.dtype) if not hasattr(self, '_sf'): self.stretch_factor = 1.0 self._zero_padding()
def get_spectral_features(audio, fs, lf_limit=20): """ This function calculates the spectral centroid and spectral spread of an audio array. :param audio: Audio array :param fs: Sample rate of audio file :param lf_limit: Low frequency limit, in Hz, to be analysed. Defaults to 20Hz. :return: Returns the spectral centroid and spectral spread """ # use a hanning window window = np.hanning(len(audio)) next_pow_2 = int(pow(2, np.ceil(np.log2(len(window))))) # get frequency domain representation spectrum = np.fft.fft((window * audio), next_pow_2) spectrum = np.absolute(spectrum[0:int(len(spectrum) / 2) + 1]) freq = np.arange(0, len(spectrum), 1) * (fs / (2.0 * (len(spectrum) - 1))) # find lowest frequency index, zeros used to unpack result lf_limit_idx = np.where(freq >= lf_limit)[0][0] spectrum = spectrum[lf_limit_idx:] freq = freq[lf_limit_idx:] # calculate centroid and spread centroid = sum(spectrum * freq) / float(sum(spectrum)) spread = np.sqrt(sum(((freq - centroid) ** 2) * spectrum) / sum(spectrum)) return centroid, spread
def spectrogram(samples, sample_rate, frame_len, fps, batch=50): """ Computes a magnitude spectrogram for a given vector of samples at a given sample rate (in Hz), frame length (in samples) and frame rate (in Hz). Allows to transform multiple frames at once for improved performance (with a default value of 50, more is not always better). Returns a numpy array. """ if len(samples) < frame_len: return np.empty((0, frame_len // 2 + 1), dtype=samples.dtype) win = np.hanning(frame_len) hopsize = sample_rate // fps num_frames = max(0, (len(samples) - frame_len) // hopsize + 1) batch = min(batch, num_frames) if batch <= 1 or not samples.flags.c_contiguous: rfft = rfft_builder(samples[:frame_len], n=frame_len) spect = np.vstack(np.abs(rfft(samples[pos:pos + frame_len] * win)) for pos in range(0, len(samples) - frame_len + 1, int(hopsize))) else: rfft = rfft_builder(np.empty((batch, frame_len), samples.dtype), n=frame_len, threads=1) frames = np.lib.stride_tricks.as_strided( samples, shape=(num_frames, frame_len), strides=(samples.strides[0] * hopsize, samples.strides[0])) spect = [np.abs(rfft(frames[pos:pos + batch] * win)) for pos in range(0, num_frames - batch + 1, batch)] if num_frames % batch: spect.extend(spectrogram( samples[(num_frames // batch * batch) * hopsize:], sample_rate, frame_len, fps, batch=1)) spect = np.vstack(spect) return spect
def stft(sig, frameSize, overlapFac=0.5, window=np.hanning): win = window(frameSize) hopSize = int(frameSize - np.floor(overlapFac * frameSize)) # zeros at beginning (thus center of 1st window should be for sample nr. 0) samples = np.append(np.zeros(frameSize/2), sig) # cols for windowing cols = np.ceil( (len(samples) - frameSize) / float(hopSize)) + 1 # zeros at end (thus samples can be fully covered by frames) samples = np.append(samples, np.zeros(frameSize)) cols = int(cols) frames = stride_tricks.as_strided(samples, shape=(cols, frameSize), strides=(samples.strides[0]*hopSize, samples.strides[0])).copy() frames *= win return np.fft.rfft(frames)