Python librosa 模块,logamplitude() 实例源码


项目:magenta    作者:tensorflow    | 项目源码 | 文件源码
def wav_to_spec(wav_audio, hparams):
  """Transforms the contents of a wav file into a series of spectrograms."""
  if hparams.spec_type == 'raw':
    spec = _wav_to_framed_samples(wav_audio, hparams)
    if hparams.spec_type == 'cqt':
      spec = _wav_to_cqt(wav_audio, hparams)
    elif hparams.spec_type == 'mel':
      spec = _wav_to_mel(wav_audio, hparams)
      raise ValueError('Invalid spec_type: {}'.format(hparams.spec_type))

    if hparams.spec_log_amplitude:
      spec = librosa.logamplitude(spec)

  return spec
项目:EUSIPCO2017    作者:Veleslavia    | 项目源码 | 文件源码
def compute_spectrograms(filename):
    out_rate = 12000
    N_FFT = 512
    HOP_LEN = 256

    frames, rate = librosa.load(filename, sr=out_rate, mono=True)
    if len(frames) < out_rate*3:
        # if less then 3 second - can't process
        raise Exception("Audio duration is too short")

    logam = librosa.logamplitude
    melgram = librosa.feature.melspectrogram
    x = logam(melgram(y=frames, sr=out_rate, hop_length=HOP_LEN,
                      n_fft=N_FFT, n_mels=N_MEL_BANDS) ** 2,

    # now going through spectrogram with the stride of the segment duration
    for start_idx in range(0, x.shape[1] - SEGMENT_DUR + 1, SEGMENT_DUR):
        yield x[:, start_idx:start_idx + SEGMENT_DUR]
项目:gcForest    作者:kingfengji    | 项目源码 | 文件源码
def get_feature_aqibsaeed_conv(X, sr, au_path=None):
    import librosa
    def windows(data, window_size):
        start = 0
        while start < len(data):
            yield start, start + window_size
            start += (window_size / 2)
    bands = 60
    frames = 41
    window_size = 512 * (frames - 1)
    for (start,end) in windows(X, window_size):
        if(len(X[start:end]) == window_size):
            signal = X[start:end]
            melspec = librosa.feature.melspectrogram(signal, n_mels = bands)
            logspec = librosa.logamplitude(melspec)
            logspec = logspec.T.flatten()[:, np.newaxis].T
项目:tartarus    作者:sergiooramas    | 项目源码 | 文件源码
def prepare_testset(dataset_name):
    test_folder=common.DATA_DIR+'/spectro_%s_testset/' % dataset_name
    if not os.path.exists(test_folder):
    items = open(common.DATASETS_DIR+'/items_index_test_%s.tsv' % dataset_name).read().splitlines()
    testset = []
    testset_index = []
    for t,track_id in enumerate(items):
        if MSD:
            msd_folder = track_id[2]+"/"+track_id[3]+"/"+track_id[4]+"/"
            msd_folder = ""
        file = spec_folder+msd_folder+track_id+".pk"
            spec = pickle.load(open(file))
            spec = librosa.logamplitude(np.abs(spec) ** 2,ref_power=np.max).T
            pickle.dump(spec, open(test_folder+track_id+".pk","wb"))
            if t%1000==0:
                print t
            print "no exist", file
项目:speechT    作者:timediv    | 项目源码 | 文件源码
def calc_power_spectrogram(audio_data, samplerate, n_mels=128, n_fft=512, hop_length=160):
  Calculate power spectrogram from the given raw audio data

    audio_data: numpyarray of raw audio wave
    samplerate: the sample rate of the `audio_data`
    n_mels: the number of mels to generate
    n_fft: the window size of the fft
    hop_length: the hop length for the window

  Returns: the spectrogram in the form [time, n_mels]

  spectrogram = librosa.feature.melspectrogram(audio_data, sr=samplerate, n_mels=n_mels, n_fft=n_fft, hop_length=hop_length)

  # convert to log scale (dB)
  log_spectrogram = librosa.logamplitude(spectrogram, ref_power=np.max)

  # normalize
  normalized_spectrogram = normalize(log_spectrogram)

  return normalized_spectrogram.T
项目:echonet    作者:karoldvl    | 项目源码 | 文件源码
def _generate_spectrograms(self):
        for row in tqdm(self.meta.itertuples(), total=len(self.meta)):
            specfile = self.work_dir + row.filename + '.mel.spec.npy'

            if os.path.exists(specfile):

            audio = load_audio(self.data_dir + 'audio/' + row.filename, 44100)
            # audio *= 1.0 / np.max(np.abs(audio))

            spec = librosa.feature.melspectrogram(audio, sr=44100, n_fft=self.FFT, fmax=self.FMAX,
                                                  hop_length=self.HOP, n_mels=self.BANDS)
            # spec = librosa.logamplitude(spec)
            freqs = librosa.core.mel_frequencies(n_mels=self.BANDS, fmax=self.FMAX)
            spec = librosa.core.perceptual_weighting(spec, freqs, ref_power=np.max)

            reduced_spec = skim.measure.block_reduce(spec, block_size=(3, 2), func=np.mean)
  , spec.astype('float16'), allow_pickle=False)
  [:-4] + '.ds.npy', reduced_spec.astype('float16'), allow_pickle=False)
项目:gcforest    作者:w821881341    | 项目源码 | 文件源码
def get_feature_aqibsaeed_conv(X, sr, au_path=None):
    import librosa
    def windows(data, window_size):
        start = 0
        while start < len(data):
            yield start, start + window_size
            start += (window_size / 2)
    bands = 60
    frames = 41
    window_size = 512 * (frames - 1)
    for (start,end) in windows(X, window_size):
        if(len(X[start:end]) == window_size):
            signal = X[start:end]
            melspec = librosa.feature.melspectrogram(signal, n_mels = bands)
            logspec = librosa.logamplitude(melspec)
            logspec = logspec.T.flatten()[:, np.newaxis].T
项目:crnn-music-genre-classification    作者:meetshah1995    | 项目源码 | 文件源码
def log_scale_melspectrogram(path, plot=False):
    signal, sr = lb.load(path, sr=Fs)
    n_sample = signal.shape[0]
    n_sample_fit = int(DURA*Fs)

    if n_sample < n_sample_fit:
        signal = np.hstack((signal, np.zeros((int(DURA*Fs) - n_sample,))))
    elif n_sample > n_sample_fit:
        signal = signal[(n_sample-n_sample_fit)/2:(n_sample+n_sample_fit)/2]

    melspect = lb.logamplitude(lb.feature.melspectrogram(y=signal, sr=Fs, hop_length=N_OVERLAP, n_fft=N_FFT, n_mels=N_MELS)**2, ref_power=1.0)

    if plot:
        melspect = melspect[np.newaxis, :]

    return melspect
项目:TensorFlow_AudioSet_Example    作者:DantesLegacy    | 项目源码 | 文件源码
def plot_log_power_specgram(sound_names,raw_sounds):
    i = 1
    fig = plt.figure(figsize=(25,60), dpi = 900)
    for n,f in zip(sound_names,raw_sounds):
        D = librosa.logamplitude(np.abs(librosa.stft(f))**2, ref_power=np.max)
        librosa.display.specshow(D,x_axis='time' ,y_axis='log')
        i += 1
    plt.suptitle('Figure 3: Log power spectrogram',x=0.5, y=0.915,fontsize=18)
项目:EUSIPCO2017    作者:Veleslavia    | 项目源码 | 文件源码
def compute_spectrograms(filename):
    out_rate = 22050

    frames, rate = librosa.load(filename, sr=out_rate, mono=True)
    if len(frames) < out_rate:
        # if less then 1 second - can't process
        raise Exception("Audio duration is too short")

    normalized_audio = _normalize(frames)
    melspectr = librosa.feature.melspectrogram(y=normalized_audio, sr=out_rate, n_mels=N_MEL_BANDS, fmax=out_rate/2)
    logmelspectr = librosa.logamplitude(melspectr**2, ref_power=1.0)

    # now going through spectrogram with the stride of the segment duration
    for start_idx in range(0, logmelspectr.shape[1] - SEGMENT_DUR + 1, SEGMENT_DUR):
        yield logmelspectr[:, start_idx:start_idx + SEGMENT_DUR]
项目:laughter    作者:ganesh-srinivas    | 项目源码 | 文件源码
def extract_features(filename):
    y, sr = librosa.load(filename)
    y = shape_sound_clip(y)
    S = librosa.feature.melspectrogram(y, sr=sr, n_mels=128)
    log_S = librosa.logamplitude(S, ref_power=np.max)
    mfcc = librosa.feature.mfcc(S=log_S, n_mfcc=13)
    return mfcc.flatten()
项目:panotti    作者:drscotthawley    | 项目源码 | 文件源码
def make_melgram(mono_sig, sr):
    melgram = librosa.logamplitude(librosa.feature.melspectrogram(mono_sig, 
        sr=sr, n_mels=96),ref_power=1.0)[np.newaxis,np.newaxis,:,:]
    return melgram

# turn multichannel audio as multiple melgram layers
项目:MusicGenreClassification    作者:mlachmish    | 项目源码 | 文件源码
def prepossessingAudio(audioPath, ppFilePath):
    print 'Prepossessing ' + audioPath

    featuresArray = []
            y, sr = librosa.load(audioPath, offset=i / 1000.0, duration=HAMMING_SIZE / 1000.0)

            # Let's make and display a mel-scaled power (energy-squared) spectrogram
            S = librosa.feature.melspectrogram(y, sr=sr, n_mels=128)

            # Convert to log scale (dB). We'll use the peak power as reference.
            log_S = librosa.logamplitude(S, ref_power=np.max)

            mfcc = librosa.feature.mfcc(S=log_S, sr=sr, n_mfcc=13)

            # featuresArray.append(S)

            if len(featuresArray) == 599:

    print 'storing pp file: ' + ppFilePath

    f = open(ppFilePath, 'w')
项目:MusicGenreClassification    作者:mlachmish    | 项目源码 | 文件源码
def prepossessingAudio(audioPath, ppFilePath):
    print 'Prepossessing ' + audioPath

    featuresArray = []
            y, sr = librosa.load(audioPath, offset=i / 1000.0, duration=HAMMING_SIZE / 1000.0)

            # Let's make and display a mel-scaled power (energy-squared) spectrogram
            S = librosa.feature.melspectrogram(y, sr=sr, n_mels=128)

            # Convert to log scale (dB). We'll use the peak power as reference.
            log_S = librosa.logamplitude(S, ref_power=np.max)

            mfcc = librosa.feature.mfcc(S=log_S, sr=sr, n_mfcc=13)
            # featuresArray.append(mfcc)


            if len(featuresArray) == 599:

    print 'storing pp file: ' + ppFilePath

    f = open(ppFilePath, 'w')
项目:echonet    作者:karoldvl    | 项目源码 | 文件源码
def _generate_spectrograms(self):
        for row in tqdm(self.meta.itertuples(), total=len(self.meta)):
            specfile = self.work_dir + row.filename + '.orig.spec.npy'

            if os.path.exists(specfile):

            audio = load_audio(self.data_dir + 'audio/' + row.filename, 22050)
            audio *= 1.0 / np.max(np.abs(audio))

            spec = librosa.feature.melspectrogram(audio, sr=22050, n_fft=1024,
                                                  hop_length=512, n_mels=self.bands)
            spec = librosa.logamplitude(spec)
  , spec, allow_pickle=False)
项目:coversongs-dual-convnet    作者:markostam    | 项目源码 | 文件源码
def feature_extract(songfile_name):
    takes: filename
    outputs: audio feature representation from that file (currently cqt)
    **assumes working directory contains raw song files**
    returns a tuple containing songfile name and numpy array of song features
    song_loc = os.path.abspath(songfile_name)
    y, sr = librosa.load(song_loc)
    desire_spect_len = 2580
    C = librosa.cqt(y=y, sr=sr, hop_length=512, fmin=None,
                    n_bins=84, bins_per_octave=12, tuning=None,
                    filter_scale=1, norm=1, sparsity=0.01, real=False)
    # get log-power spectrogram with noise floor of -80dB
    C = librosa.logamplitude(C**2, ref_power=np.max)
    # scale log-power spectrogram to positive integer value for smaller footpint
    noise_floor_db = 80
    scaling_factor = (2**16 - 1)/noise_floor_db
    C += noise_floor_db
    C *= scaling_factor
    C = C.astype('uint16')
    # if spectral respresentation too long, crop it, otherwise, zero-pad
    if C.shape[1] >= desire_spect_len:
        C = C[:,0:desire_spect_len]
        C = np.pad(C,((0,0),(0,desire_spect_len-C.shape[1])), 'constant')
    return songfile_name, C
项目:coversongs-dual-convnet    作者:markostam    | 项目源码 | 文件源码
def create_feature_matrix_spark(song_files):
    # cqt wrapper
    def log_cqt(y,sr):
        C =  librosa.cqt(y=y, sr=sr, hop_length=512, fmin=None, 
        n_bins=84, bins_per_octave=12, tuning=None,
        filter_scale=1, norm=1, sparsity=0.01, real=True)
        # get log-power spectrogram with noise floor of -80dB
        C = librosa.logamplitude(C**2, ref_power=np.max)
        # scale log-power spectrogram to positive integer value for smaller footpint
        noise_floor_db = 80
        scaling_factor = (2**16 - 1)/noise_floor_db
        C += noise_floor_db
        C *= scaling_factor
        C = C.astype('uint16')
        return C
    # padding wrapper
    def padding(C,desired_spect_len):
        if C.shape[1] >= desired_spect_len:
            C = C[:,0:desired_spect_len]
            C = np.pad(C,((0,0),(0,desired_spect_len-C.shape[1])), 'constant')
        return C
    # load try-catch wrapper
    def try_load(filename):
            sys.stdout.write('Processing: %s \r' % os.path.basename(filename))
            return librosa.load(filename)
    # transormations
    filesRDD = sc.parallelize(song_files)
    rawAudioRDD = x: (os.path.basename(x),try_load(x))).filter(lambda x: x[1] != None)
    rawCQT = x: (x[int(0)], log_cqt(x[int(1)][int(0)],x[int(1)][int(1)])))
    paddedCQT = x: (x[0],padding(x[1],2580)))
    return paddedCQT.collect()
项目:audio-classifier-keras-cnn    作者:drscotthawley    | 项目源码 | 文件源码
def preprocess_dataset(inpath="Samples/", outpath="Preproc/"):

    if not os.path.exists(outpath):
        os.mkdir( outpath, 0755 );   # make a new directory for preproc'd files

    class_names = get_class_names(path=inpath)   # get the names of the subdirectories
    nb_classes = len(class_names)
    print("class_names = ",class_names)
    for idx, classname in enumerate(class_names):   # go through the subdirs

        if not os.path.exists(outpath+classname):
            os.mkdir( outpath+classname, 0755 );   # make a new subdirectory for preproc class

        class_files = os.listdir(inpath+classname)
        n_files = len(class_files)
        n_load = n_files
        print(' class name = {:14s} - {:3d}'.format(classname,idx),
            ", ",n_files," files in this class",sep="")

        printevery = 20
        for idx2, infilename in enumerate(class_files):
            audio_path = inpath + classname + '/' + infilename
            if (0 == idx2 % printevery):
                print('\r Loading class: {:14s} ({:2d} of {:2d} classes)'.format(classname,idx+1,nb_classes),
                       ", file ",idx2+1," of ",n_load,": ",audio_path,sep="")
            #start = timer()
            aud, sr = librosa.load(audio_path, sr=None)
            melgram = librosa.logamplitude(librosa.feature.melspectrogram(aud, sr=sr, n_mels=96),ref_power=1.0)[np.newaxis,np.newaxis,:,:]
            outfile = outpath + classname + '/' + infilename+'.npy'
项目:audio    作者:willfrey    | 项目源码 | 文件源码
def __call__(self, S):
        return librosa.logamplitude(S, **self.__dict__)
项目:product-color-classifier    作者:two-tap    | 项目源码 | 文件源码
def preprocess_input(audio_path, dim_ordering='default'):
    '''Reads an audio file and outputs a Mel-spectrogram.
    if dim_ordering == 'default':
        dim_ordering = K.image_dim_ordering()
    assert dim_ordering in {'tf', 'th'}

    if librosa_exists():
        import librosa
        raise RuntimeError('Librosa is required to process audio files.\n' +
                           'Install it via `pip install librosa` \nor visit ' +
                           ' for details.')

    # mel-spectrogram parameters
    SR = 12000
    N_FFT = 512
    N_MELS = 96
    HOP_LEN = 256
    DURA = 29.12

    src, sr = librosa.load(audio_path, sr=SR)
    n_sample = src.shape[0]
    n_sample_wanted = int(DURA * SR)

    # trim the signal at the center
    if n_sample < n_sample_wanted:  # if too short
        src = np.hstack((src, np.zeros((int(DURA * SR) - n_sample,))))
    elif n_sample > n_sample_wanted:  # if too long
        src = src[(n_sample - n_sample_wanted) / 2:
                  (n_sample + n_sample_wanted) / 2]

    logam = librosa.logamplitude
    melgram = librosa.feature.melspectrogram
    x = logam(melgram(y=src, sr=SR, hop_length=HOP_LEN,
                      n_fft=N_FFT, n_mels=N_MELS) ** 2,

    if dim_ordering == 'th':
        x = np.expand_dims(x, axis=0)
    elif dim_ordering == 'tf':
        x = np.expand_dims(x, axis=3)
    return x
项目:deep-learning-models    作者:fchollet    | 项目源码 | 文件源码
def preprocess_input(audio_path, dim_ordering='default'):
    '''Reads an audio file and outputs a Mel-spectrogram.
    if dim_ordering == 'default':
        dim_ordering = K.image_dim_ordering()
    assert dim_ordering in {'tf', 'th'}

    if librosa_exists():
        import librosa
        raise RuntimeError('Librosa is required to process audio files.\n' +
                           'Install it via `pip install librosa` \nor visit ' +
                           ' for details.')

    # mel-spectrogram parameters
    SR = 12000
    N_FFT = 512
    N_MELS = 96
    HOP_LEN = 256
    DURA = 29.12

    src, sr = librosa.load(audio_path, sr=SR)
    n_sample = src.shape[0]
    n_sample_wanted = int(DURA * SR)

    # trim the signal at the center
    if n_sample < n_sample_wanted:  # if too short
        src = np.hstack((src, np.zeros((int(DURA * SR) - n_sample,))))
    elif n_sample > n_sample_wanted:  # if too long
        src = src[(n_sample - n_sample_wanted) / 2:
                  (n_sample + n_sample_wanted) / 2]

    logam = librosa.logamplitude
    melgram = librosa.feature.melspectrogram
    x = logam(melgram(y=src, sr=SR, hop_length=HOP_LEN,
                      n_fft=N_FFT, n_mels=N_MELS) ** 2,

    if dim_ordering == 'th':
        x = np.expand_dims(x, axis=0)
    elif dim_ordering == 'tf':
        x = np.expand_dims(x, axis=3)
    return x
项目:keras    作者:GeekLiB    | 项目源码 | 文件源码
def preprocess_input(audio_path, dim_ordering='default'):
    '''Reads an audio file and outputs a Mel-spectrogram.
    if dim_ordering == 'default':
        dim_ordering = K.image_dim_ordering()
    assert dim_ordering in {'tf', 'th'}

    if librosa_exists():
        import librosa
        raise RuntimeError('Librosa is required to process audio files.\n' +
                           'Install it via `pip install librosa` \nor visit ' +
                           ' for details.')

    # mel-spectrogram parameters
    SR = 12000
    N_FFT = 512
    N_MELS = 96
    HOP_LEN = 256
    DURA = 29.12

    src, sr = librosa.load(audio_path, sr=SR)
    n_sample = src.shape[0]
    n_sample_wanted = int(DURA * SR)

    # trim the signal at the center
    if n_sample < n_sample_wanted:  # if too short
        src = np.hstack((src, np.zeros((int(DURA * SR) - n_sample,))))
    elif n_sample > n_sample_wanted:  # if too long
        src = src[(n_sample - n_sample_wanted) / 2:
                  (n_sample + n_sample_wanted) / 2]

    logam = librosa.logamplitude
    melgram = librosa.feature.melspectrogram
    x = logam(melgram(y=src, sr=SR, hop_length=HOP_LEN,
                      n_fft=N_FFT, n_mels=N_MELS) ** 2,

    if dim_ordering == 'th':
        x = np.expand_dims(x, axis=0)
    elif dim_ordering == 'tf':
        x = np.expand_dims(x, axis=3)
    return x
项目:magenta    作者:tensorflow    | 项目源码 | 文件源码
def ispecgram(spec,
  """Inverse Spectrogram using librosa.

    spec: 3-D specgram array [freqs, time, (mag_db, dphase)].
    n_fft: Size of the FFT.
    hop_length: Stride of FFT. Defaults to n_fft/2.
    mask: Reverse the mask of the phase derivative by the magnitude.
    log_mag: Use the logamplitude.
    re_im: Output Real and Imag. instead of logMag and dPhase.
    dphase: Use derivative of phase instead of phase.
    mag_only: Specgram contains no phase.
    num_iters: Number of griffin-lim iterations for mag_only.

    audio: 1-D array of sound samples. Peak normalized to 1.
  if not hop_length:
    hop_length = n_fft // 2

  ifft_config = dict(win_length=n_fft, hop_length=hop_length, center=True)

  if mag_only:
    mag = spec[:, :, 0]
    phase_angle = np.pi * np.random.rand(*mag.shape)
  elif re_im:
    spec_real = spec[:, :, 0] + 1.j * spec[:, :, 1]
    mag, p = spec[:, :, 0], spec[:, :, 1]
    if mask and log_mag:
      p /= (mag + 1e-13 * np.random.randn(*mag.shape))
    if dphase:
      # Roll up phase
      phase_angle = np.cumsum(p * np.pi, axis=1)
      phase_angle = p * np.pi

  # Magnitudes
  if log_mag:
    mag = (mag - 1.0) * 120.0
    mag = 10**(mag / 20.0)
  phase = np.cos(phase_angle) + 1.j * np.sin(phase_angle)
  spec_real = mag * phase

  if mag_only:
    audio = griffin_lim(
        mag, phase_angle, n_fft, hop_length, num_iters=num_iters)
    audio = librosa.core.istft(spec_real, **ifft_config)
  return np.squeeze(audio / audio.max())
项目:mr-gan    作者:Healthcare-Robotics    | 项目源码 | 文件源码
def dataset(modalities=0, forcetempTime=4, contactmicTime=0.2, leaveObjectOut=False, verbose=False):
    materials = ['plastic', 'glass', 'fabric', 'metal', 'wood', 'ceramic']
    X = []
    y = []
    objects = dict()
    for m, material in enumerate(materials):
        if verbose:
            print 'Processing', material
        with open('data_processed/processed_0.1sbefore_%s_times_%.2f_%.2f.pkl' % (material, forcetempTime, contactmicTime), 'rb') as f:
            allData = pickle.load(f)
            for j, (objName, objData) in enumerate(allData.iteritems()):
                if leaveObjectOut:
                    objects[objName] = {'x': [], 'y': []}
                    X = objects[objName]['x']
                    y = objects[objName]['y']
                for i in xrange(len(objData['temperature'])):

                    if modalities > 2:
                        # Mel-scaled power (energy-squared) spectrogram
                        sr = 48000
                        S = librosa.feature.melspectrogram(np.array(objData['contact'][i]), sr=sr, n_mels=128)
                        # Convert to log scale (dB)
                        log_S = librosa.logamplitude(S, ref_power=np.max)

                    if modalities == 0:
                        X.append(objData['force0'][i] + objData['force1'][i])
                    elif modalities == 1:
                    elif modalities == 2:
                        X.append(objData['temperature'][i] + objData['force0'][i] + objData['force1'][i])
                    elif modalities == 3:
                    elif modalities == 4:
                        X.append(objData['temperature'][i] + log_S.flatten().tolist())
                    elif modalities == 5:
                        X.append(objData['temperature'][i] + objData['force0'][i] + objData['force1'][i] + log_S.flatten().tolist())
                    elif modalities == 6:
                        X.append(objData['force0'][i] + objData['force1'][i] + log_S.flatten().tolist())

    if leaveObjectOut:
        return objects
        X = np.array(X)
        y = np.array(y)
        if verbose:
            print 'X:', np.shape(X), 'y:', np.shape(y)
        return X, y
项目:mr-gan    作者:Healthcare-Robotics    | 项目源码 | 文件源码
def dataset(modalities=0, forcetempTime=4, contactmicTime=0.2, leaveObjectOut=False, verbose=False):
    materials = ['plastic', 'glass', 'fabric', 'metal', 'wood', 'ceramic']
    X = []
    y = []
    objects = dict()
    for m, material in enumerate(materials):
        if verbose:
            print 'Processing', material
        with open('data_processed/processed_0.1sbefore_%s_times_%.2f_%.2f.pkl' % (material, forcetempTime, contactmicTime), 'rb') as f:
            allData = pickle.load(f)
            for j, (objName, objData) in enumerate(allData.iteritems()):
                if leaveObjectOut:
                    objects[objName] = {'x': [], 'y': []}
                    X = objects[objName]['x']
                    y = objects[objName]['y']
                for i in xrange(len(objData['temperature'])):

                    if modalities > 2:
                        # Mel-scaled power (energy-squared) spectrogram
                        sr = 48000
                        S = librosa.feature.melspectrogram(np.array(objData['contact'][i]), sr=sr, n_mels=128)
                        # Convert to log scale (dB)
                        log_S = librosa.logamplitude(S, ref_power=np.max)

                    if modalities == 0:
                        X.append(objData['force0'][i] + objData['force1'][i])
                    elif modalities == 1:
                    elif modalities == 2:
                        X.append(objData['temperature'][i] + objData['force0'][i] + objData['force1'][i])
                    elif modalities == 3:
                    elif modalities == 4:
                        X.append(objData['temperature'][i] + log_S.flatten().tolist())
                    elif modalities == 5:
                        X.append(objData['temperature'][i] + objData['force0'][i] + objData['force1'][i] + log_S.flatten().tolist())
                    elif modalities == 6:
                        X.append(objData['force0'][i] + objData['force1'][i] + log_S.flatten().tolist())

    if leaveObjectOut:
        return objects
        X = np.array(X)
        y = np.array(y)
        if verbose:
            print 'X:', np.shape(X), 'y:', np.shape(y)
        return X, y
项目:mr-gan    作者:Healthcare-Robotics    | 项目源码 | 文件源码
def dataset(modalities=0, forcetempTime=4, contactmicTime=0.2, leaveObjectOut=False, verbose=False, deriv=False):
    materials = ['plastic', 'glass', 'fabric', 'metal', 'wood', 'ceramic']
    X = []
    y = []
    objects = dict()
    for m, material in enumerate(materials):
        if verbose:
            print 'Processing', material
        with open('data_processed/processed_0.1sbefore_%s_times_%.2f_%.2f.pkl' % (material, forcetempTime, contactmicTime), 'rb') as f:
            allData = pickle.load(f)
            for j, (objName, objData) in enumerate(allData.iteritems()):
                if leaveObjectOut:
                    objects[objName] = {'x': [], 'y': []}
                    X = objects[objName]['x']
                    y = objects[objName]['y']
                for i in xrange(len(objData['temperature'])):

                    if deriv:
                        objData['force0'][i] = firstDeriv(objData['force0'][i], objData['forceTime'][i])
                        objData['force1'][i] = firstDeriv(objData['force1'][i], objData['forceTime'][i])
                        objData['temperature'][i] = firstDeriv(objData['temperature'][i], objData['temperatureTime'][i])

                    if modalities > 2:
                        # Mel-scaled power (energy-squared) spectrogram
                        sr = 48000
                        S = librosa.feature.melspectrogram(np.array(objData['contact'][i]), sr=sr, n_mels=128)
                        # Convert to log scale (dB)
                        log_S = librosa.logamplitude(S, ref_power=np.max)

                    if modalities == 0:
                        X.append(objData['force0'][i] + objData['force1'][i])
                    elif modalities == 1:
                    elif modalities == 2:
                        X.append(objData['temperature'][i] + objData['force0'][i] + objData['force1'][i])
                    elif modalities == 3:
                    elif modalities == 4:
                        X.append(objData['temperature'][i] + log_S.flatten().tolist())
                    elif modalities == 5:
                        X.append(objData['temperature'][i] + objData['force0'][i] + objData['force1'][i] + log_S.flatten().tolist())
                    elif modalities == 6:
                        X.append(objData['force0'][i] + objData['force1'][i] + log_S.flatten().tolist())

    if leaveObjectOut:
        return objects
        X = np.array(X)
        y = np.array(y)
        if verbose:
            print 'X:', np.shape(X), 'y:', np.shape(y)
        return X, y
项目:deep-learning-keras-projects    作者:jasmeetsb    | 项目源码 | 文件源码
def preprocess_input(audio_path, dim_ordering='default'):
    """Reads an audio file and outputs a Mel-spectrogram.

    # Arguments
        audio_path: path to the target audio file.
        dim_ordering: data format for the output spectrogram image.

    # Returns
        3D Numpy tensor encoding the Mel-spectrogram.

    # Raises
        ImportError: if librosa is not available.
    if dim_ordering == 'default':
        dim_ordering = K.image_dim_ordering()
    assert dim_ordering in {'tf', 'th'}

    if librosa is None:
        raise ImportError('Librosa is required to process audio files. '
                          'Install it via `pip install librosa` or visit '
                          ' for details.')

    # mel-spectrogram parameters
    sr = 12000
    n_fft = 512
    n_mels = 96
    hop_length = 256
    duration = 29.12

    src, sr = librosa.load(audio_path, sr=sr)
    n_sample = src.shape[0]
    n_sample_wanted = int(duration * sr)

    # trim the signal at the center
    if n_sample < n_sample_wanted:  # if too short
        src = np.hstack((src, np.zeros((int(duration * sr) - n_sample,))))
    elif n_sample > n_sample_wanted:  # if too long
        src = src[(n_sample - n_sample_wanted) // 2:
                  (n_sample + n_sample_wanted) // 2]

    logam = librosa.logamplitude
    melgram = librosa.feature.melspectrogram
    x = logam(melgram(y=src, sr=sr, hop_length=hop_length,
                      n_fft=n_fft, n_mels=n_mels) ** 2,

    if dim_ordering == 'th':
        x = np.expand_dims(x, axis=0)
    elif dim_ordering == 'tf':
        x = np.expand_dims(x, axis=3)
    return x
项目:vgg16-vgg19-resnet-inception-xception-example    作者:yong-ho    | 项目源码 | 文件源码
def preprocess_input(audio_path, dim_ordering='default'):
    '''Reads an audio file and outputs a Mel-spectrogram.
    if dim_ordering == 'default':
        dim_ordering = K.image_dim_ordering()
    assert dim_ordering in {'tf', 'th'}

    if librosa_exists():
        import librosa
        raise RuntimeError('Librosa is required to process audio files.\n' +
                           'Install it via `pip install librosa` \nor visit ' +
                           ' for details.')

    # mel-spectrogram parameters
    SR = 12000
    N_FFT = 512
    N_MELS = 96
    HOP_LEN = 256
    DURA = 29.12

    src, sr = librosa.load(audio_path, sr=SR)
    n_sample = src.shape[0]
    n_sample_wanted = int(DURA * SR)

    # trim the signal at the center
    if n_sample < n_sample_wanted:  # if too short
        src = np.hstack((src, np.zeros((int(DURA * SR) - n_sample,))))
    elif n_sample > n_sample_wanted:  # if too long
        src = src[(n_sample - n_sample_wanted) / 2:
                  (n_sample + n_sample_wanted) / 2]

    logam = librosa.logamplitude
    melgram = librosa.feature.melspectrogram
    x = logam(melgram(y=src, sr=SR, hop_length=HOP_LEN,
                      n_fft=N_FFT, n_mels=N_MELS) ** 2,

    if dim_ordering == 'th':
        x = np.expand_dims(x, axis=0)
    elif dim_ordering == 'tf':
        x = np.expand_dims(x, axis=3)
    return x
项目:product-category-classifier    作者:two-tap    | 项目源码 | 文件源码
def preprocess_input(audio_path, dim_ordering='default'):
    '''Reads an audio file and outputs a Mel-spectrogram.
    if dim_ordering == 'default':
        dim_ordering = K.image_dim_ordering()
    assert dim_ordering in {'tf', 'th'}

    if librosa_exists():
        import librosa
        raise RuntimeError('Librosa is required to process audio files.\n' +
                           'Install it via `pip install librosa` \nor visit ' +
                           ' for details.')

    # mel-spectrogram parameters
    SR = 12000
    N_FFT = 512
    N_MELS = 96
    HOP_LEN = 256
    DURA = 29.12

    src, sr = librosa.load(audio_path, sr=SR)
    n_sample = src.shape[0]
    n_sample_wanted = int(DURA * SR)

    # trim the signal at the center
    if n_sample < n_sample_wanted:  # if too short
        src = np.hstack((src, np.zeros((int(DURA * SR) - n_sample,))))
    elif n_sample > n_sample_wanted:  # if too long
        src = src[(n_sample - n_sample_wanted) / 2:
                  (n_sample + n_sample_wanted) / 2]

    logam = librosa.logamplitude
    melgram = librosa.feature.melspectrogram
    x = logam(melgram(y=src, sr=SR, hop_length=HOP_LEN,
                      n_fft=N_FFT, n_mels=N_MELS) ** 2,

    if dim_ordering == 'th':
        x = np.expand_dims(x, axis=0)
    elif dim_ordering == 'tf':
        x = np.expand_dims(x, axis=3)
    return x
项目:keras-customized    作者:ambrite    | 项目源码 | 文件源码
def preprocess_input(audio_path, dim_ordering='default'):
    '''Reads an audio file and outputs a Mel-spectrogram.
    if dim_ordering == 'default':
        dim_ordering = K.image_dim_ordering()
    assert dim_ordering in {'tf', 'th'}

    if librosa_exists():
        import librosa
        raise RuntimeError('Librosa is required to process audio files.\n' +
                           'Install it via `pip install librosa` \nor visit ' +
                           ' for details.')

    # mel-spectrogram parameters
    SR = 12000
    N_FFT = 512
    N_MELS = 96
    HOP_LEN = 256
    DURA = 29.12

    src, sr = librosa.load(audio_path, sr=SR)
    n_sample = src.shape[0]
    n_sample_wanted = int(DURA * SR)

    # trim the signal at the center
    if n_sample < n_sample_wanted:  # if too short
        src = np.hstack((src, np.zeros((int(DURA * SR) - n_sample,))))
    elif n_sample > n_sample_wanted:  # if too long
        src = src[(n_sample - n_sample_wanted) / 2:
                  (n_sample + n_sample_wanted) / 2]

    logam = librosa.logamplitude
    melgram = librosa.feature.melspectrogram
    x = logam(melgram(y=src, sr=SR, hop_length=HOP_LEN,
                      n_fft=N_FFT, n_mels=N_MELS) ** 2,

    if dim_ordering == 'th':
        x = np.expand_dims(x, axis=0)
    elif dim_ordering == 'tf':
        x = np.expand_dims(x, axis=3)
    return x
项目:dcase2016_task4    作者:pafoster    | 项目源码 | 文件源码
def feature_extraction(y=None, fs=None, statistics=True, include_mfcc0=True, include_delta=True, include_acceleration=True, mfcc_params=None, delta_params=None, acceleration_params=None):
    # Extract features, Mel Frequency Cepstral Coefficients
    eps = numpy.spacing(1)

    # Windowing function
    if mfcc_params['window'] == 'hamming_asymmetric':
        window = scipy.signal.hamming(mfcc_params['n_fft'], sym=False)
    elif mfcc_params['window'] == 'hamming_symmetric':
        window = scipy.signal.hamming(mfcc_params['n_fft'], sym=True)
    elif mfcc_params['window'] == 'hann_asymmetric':
        window = scipy.signal.hann(mfcc_params['n_fft'], sym=False)
    elif mfcc_params['window'] == 'hann_symmetric':
        window = scipy.signal.hann(mfcc_params['n_fft'], sym=True)
        window = None

    # Calculate Static Coefficients
    magnitude_spectrogram = numpy.abs(librosa.stft(y + eps, n_fft=mfcc_params['n_fft'], win_length=mfcc_params['win_length'], hop_length=mfcc_params['hop_length'], window=window))**2
    mel_basis = librosa.filters.mel(sr=fs, n_fft=mfcc_params['n_fft'], n_mels=mfcc_params['n_mels'], fmin=mfcc_params['fmin'], fmax=mfcc_params['fmax'], htk=mfcc_params['htk'])
    mel_spectrum =, magnitude_spectrogram)
    mfcc = librosa.feature.mfcc(S=librosa.logamplitude(mel_spectrum))

    # Collect the feature matrix
    feature_matrix = mfcc
    if include_delta:
        # Delta coefficients
        mfcc_delta =, **delta_params)

        # Add Delta Coefficients to feature matrix
        feature_matrix = numpy.vstack((feature_matrix, mfcc_delta))

    if include_acceleration:
        # Acceleration coefficients (aka delta)
        mfcc_delta2 =, order=2, **acceleration_params)

        # Add Acceleration Coefficients to feature matrix
        feature_matrix = numpy.vstack((feature_matrix, mfcc_delta2))

    if not include_mfcc0:
        # Omit mfcc0
        feature_matrix = feature_matrix[1:, :]

    feature_matrix = feature_matrix.T

    # Collect into data structure
    if statistics:
        return {
            'feat': feature_matrix,
            'stat': {
                'mean': numpy.mean(feature_matrix, axis=0),
                'std': numpy.std(feature_matrix, axis=0),
                'N': feature_matrix.shape[0],
                'S1': numpy.sum(feature_matrix, axis=0),
                'S2': numpy.sum(feature_matrix ** 2, axis=0),
        return {
            'feat': feature_matrix}
项目:keras    作者:NVIDIA    | 项目源码 | 文件源码
def preprocess_input(audio_path, dim_ordering='default'):
    """Reads an audio file and outputs a Mel-spectrogram.

    # Arguments
        audio_path: path to the target audio file.
        dim_ordering: data format for the output spectrogram image.

    # Returns
        3D Numpy tensor encoding the Mel-spectrogram.

    # Raises
        ImportError: if librosa is not available.
    if dim_ordering == 'default':
        dim_ordering = K.image_dim_ordering()
    assert dim_ordering in {'tf', 'th'}

    if librosa is None:
        raise ImportError('Librosa is required to process audio files. '
                          'Install it via `pip install librosa` or visit '
                          ' for details.')

    # mel-spectrogram parameters
    sr = 12000
    n_fft = 512
    n_mels = 96
    hop_length = 256
    duration = 29.12

    src, sr = librosa.load(audio_path, sr=sr)
    n_sample = src.shape[0]
    n_sample_wanted = int(duration * sr)

    # trim the signal at the center
    if n_sample < n_sample_wanted:  # if too short
        src = np.hstack((src, np.zeros((int(duration * sr) - n_sample,))))
    elif n_sample > n_sample_wanted:  # if too long
        src = src[(n_sample - n_sample_wanted) // 2:
                  (n_sample + n_sample_wanted) // 2]

    logam = librosa.logamplitude
    melgram = librosa.feature.melspectrogram
    x = logam(melgram(y=src, sr=sr, hop_length=hop_length,
                      n_fft=n_fft, n_mels=n_mels) ** 2,

    if dim_ordering == 'th':
        x = np.expand_dims(x, axis=0)
    elif dim_ordering == 'tf':
        x = np.expand_dims(x, axis=3)
    return x