项目:PySAT    作者:USGS-Astrogeology    | 项目源码 | 文件源码
def dim_red(self, col, method, params, kws, load_fit=None):
        if method == 'PCA':
            self.do_dim_red = PCA(*params, **kws)
        if method == 'FastICA':
            self.do_dim_red = FastICA(*params, **kws)
        if method == 't-SNE':
            self.do_dim_red = TSNE(*params, **kws)
        if method == 'LLE':
            self.do_dim_red = LocallyLinearEmbedding(*params, **kws)
        if method == 'JADE-ICA':
            self.do_dim_red = JADE(*params, **kws)
        # TODO: Add ICA-JADE here
        if load_fit:
            self.do_dim_red = load_fit
            if method != 't-SNE':
                dim_red_result = self.do_dim_red.transform(self.df[col])
                dim_red_result = self.do_dim_red.fit_transform(self.df[col])

        for i in list(range(1, dim_red_result.shape[1] + 1)):  # will need to revisit this for other methods that don't use n_components to make sure column names still mamke sense
            self.df[(method, str(i))] = dim_red_result[:, i - 1]

        return self.do_dim_red
项目:thunder-factorization    作者:thunder-project    | 项目源码 | 文件源码
def test_ica(eng):
    t = linspace(0, 10, 100)
    s1 = sin(t)
    s2 = square(sin(2*t))
    x = c_[s1, s2, s1+s2]
    x += 0.001*random.randn(*x.shape)
    x = fromarray(x, engine=eng)

    def normalize_ICA(s, aT):
        a = aT.T
        c = a.sum(axis=0)
        return s*c, (a/c).T

    from sklearn.decomposition import FastICA
    ica = FastICA(n_components=2, fun='cube', random_state=0)
    s1 = ica.fit_transform(x.toarray())
    aT1 = ica.mixing_.T
    s1, aT1 = normalize_ICA(s1, aT1)

    s2, aT2 = ICA(k=2, svd_method='direct', max_iter=200, seed=0).fit(x)
    s2, aT2 = normalize_ICA(s2, aT2)
    assert allclose_sign_permute(s1, s2, atol=tol)
    assert allclose_sign_permute(aT1, aT2, atol=tol)
项目:PySAT    作者:USGS-Astrogeology    | 项目源码 | 文件源码
def fit(self, x, y, i=0):
        # if gaussian processes are being used, data dimensionality needs to be reduced before fitting
        if self.method[i] == 'GP':
            if self.reduce_dim == 'FastICA':
                print('Reducing dimensionality with ICA')
                do_ica = FastICA(n_components=self.n_components)
                self.do_reduce_dim =
            if self.reduce_dim == 'PCA':
                print('Reducing dimensionality with PCA')
                do_pca = PCA(n_components=self.n_components)
                self.do_reduce_dim =

            x = self.do_reduce_dim.transform(x)
            print('Training model...')
  , y)
            self.goodfit = True
            self.goodfit = False
            if self.method[i] == 'GP':
                print('Model failed to train! (For GP this does not always indicate a problem, especially for low numbers of components.)')
                print('Model failed to train!')

        if self.ransac:
            self.outliers = np.logical_not(self.model.inlier_mask_)
            print(str(np.sum(self.outliers)) + ' outliers removed with RANSAC')
项目:PySAT    作者:USGS-Astrogeology    | 项目源码 | 文件源码
def ica(self, col, nc=None, load_fit=None):
        if nc:
            self.do_ica = FastICA(n_components=nc)
        if load_fit:  # use this to load a previous fit rather than fit the current data
            self.do_ica = load_fit
        ica_result = self.do_ica.transform(self.df[col])
        for i in list(range(1, self.do_ica.n_components + 1)):
            self.df[('ICA', i)] = ica_result[:, i - 1]
项目:image-text-matching    作者:llltttppp    | 项目源码 | 文件源码
def generate_icamodel(train_vocabulary='./vocabulary/vocabulary_nv_4w.txt',model_path='./model/ICA/ica_ourword2vec.model'):
    train_vocab =[v.strip() for v in open(train_vocabulary,'r').readlines()]
    train_sample = np.zeros([len(train_vocab),300])
    for i,v in enumerate(train_vocab):
        word = v.split(' ')[0]
            train_sample[i]= word2vec_model[word]
            print word
    ica = FastICA(300,max_iter=800)

项目:MENGEL    作者:CodeSpaceHQ    | 项目源码 | 文件源码
def test_independent_component_analyzer(self):
项目:eezzy    作者:3Blades    | 项目源码 | 文件源码
def ICA_results(data, n_comps=None):
    ica = ICA(n_components=n_comps)
    model =
    out_data = {'model' : model, 'reconstruction error': ica.components_ }
    return 'ICA', out_data
项目:iris-Clustering-python-PTVS    作者:mjbahmani    | 项目源码 | 文件源码
def ReduceDimension(X):
    from sklearn.decomposition import FastICA 
    reducer = FastICA(n_components=2)
    x_r = reducer.fit_transform(X)
    yield 'ICA',x_r[:,0],x_r[:,1]

项目:iris-Clustering-python-PTVS    作者:mjbahmani    | 项目源码 | 文件源码
def ReduceDimension(X):
    from sklearn.decomposition import FastICA 
    reducer = FastICA(n_components=2)
    x_r = reducer.fit_transform(X)
    yield 'ICA',x_r[:,0],x_r[:,1]

项目:CryptoCurrencyTrader    作者:llens    | 项目源码 | 文件源码
def preprocessing_inputs(strategy_dictionary, fitting_inputs_scaled):
    if strategy_dictionary['preprocessing'] == 'PCA':
        fitting_inputs_scaled = pca_transform(fitting_inputs_scaled)

    if strategy_dictionary['preprocessing'] == 'FastICA':
        fitting_inputs_scaled, strategy_dictionary = fast_ica_transform(strategy_dictionary, fitting_inputs_scaled)

    return fitting_inputs_scaled, strategy_dictionary
项目:CryptoCurrencyTrader    作者:llens    | 项目源码 | 文件源码
def fast_ica_transform(strategy_dictionary, fitting_inputs_scaled):

        ica = FastICA()

        fitting_inputs_scaled = ica.transform(fitting_inputs_scaled)

        strategy_dictionary['preprocessing'] = 'None'

    return fitting_inputs_scaled, strategy_dictionary
项目:thunder-factorization    作者:thunder-project    | 项目源码 | 文件源码
def _fit_local(self, data):

        from sklearn.decomposition import FastICA
        from numpy import random
        model = FastICA(n_components=self.k, fun="cube", max_iter=self.max_iter, tol=self.tol, random_state=self.seed)
        signals = model.fit_transform(data)
        return signals, model.mixing_.T
项目:brainiak    作者:brainiak    | 项目源码 | 文件源码
def __init__(
            self, n_iter=50, rank=None,
            auto_nuisance=True, n_nureg=None, nureg_zscore=True,
            baseline_single=False, logS_range=1.0, SNR_prior='exp',
            SNR_bins=21, rho_bins=20, tol=1e-4, optimizer='BFGS',
            minimize_options={'gtol': 1e-4, 'disp': False,
                              'maxiter': 20}, random_state=None,

        self.n_iter = n_iter
        self.rank = rank
        self.auto_nuisance = auto_nuisance
        self.n_nureg = n_nureg
        self.nureg_zscore = nureg_zscore
        if auto_nuisance:
            assert (n_nureg is None) \
                or (isinstance(n_nureg, int) and n_nureg > 0), \
                'n_nureg should be a positive integer or None'\
                ' if auto_nuisance is True.'
        if self.nureg_zscore:
            self.preprocess_residual = lambda x: _zscore(x)
            self.preprocess_residual = lambda x: x
        if nureg_method == 'FA':
            self.nureg_method = lambda x: FactorAnalysis(n_components=x)
        elif nureg_method == 'PCA':
            self.nureg_method = lambda x: PCA(n_components=x, whiten=True)
        elif nureg_method == 'SPCA':
            self.nureg_method = lambda x: SparsePCA(n_components=x,
                                                    max_iter=20, tol=tol)
        elif nureg_method == 'ICA':
            self.nureg_method = lambda x: FastICA(n_components=x,
            raise ValueError('nureg_method can only be FA, PCA, '
                             'SPCA(for sparse PCA) or ICA')
        self.baseline_single = baseline_single
        if type(logS_range) is int:
            logS_range = float(logS_range)
        self.logS_range = logS_range
        assert SNR_prior in ['unif', 'lognorm', 'exp'], \
            'SNR_prior can only be chosen from ''unif'', ''lognorm''' \
            ' and ''exp'''
        self.SNR_prior = SNR_prior
        self.SNR_bins = SNR_bins
        self.rho_bins = rho_bins
        self.tol = tol
        self.optimizer = optimizer
        self.minimize_options = minimize_options
        self.random_state = random_state
        self.anneal_speed = anneal_speed
项目:Kaggle-Mercedes-Benz-Greener-Manufacturing-33th-Solution    作者:arvidzt    | 项目源码 | 文件源码
def get_additional_features(train,test,magic=False,ID=False):
    col = list(test.columns)
    if ID!=True:
    n_comp = 12
    # tSVD
    tsvd = TruncatedSVD(n_components=n_comp, random_state=420)
    tsvd_results_train = tsvd.fit_transform(train[col])
    tsvd_results_test = tsvd.transform(test[col])
    # PCA
    pca = PCA(n_components=n_comp, random_state=420)
    pca2_results_train = pca.fit_transform(train[col])
    pca2_results_test = pca.transform(test[col])
    # ICA
    ica = FastICA(n_components=n_comp, random_state=420)
    ica2_results_train = ica.fit_transform(train[col])
    ica2_results_test = ica.transform(test[col])
    # GRP
    grp = GaussianRandomProjection(n_components=n_comp, eps=0.1, random_state=420)
    grp_results_train = grp.fit_transform(train[col])
    grp_results_test = grp.transform(test[col])
    # SRP
    srp = SparseRandomProjection(n_components=n_comp, dense_output=True, random_state=420)
    srp_results_train = srp.fit_transform(train[col])
    srp_results_test = srp.transform(test[col])
    for i in range(1, n_comp + 1):
        train['tsvd_' + str(i)] = tsvd_results_train[:, i - 1]
        test['tsvd_' + str(i)] = tsvd_results_test[:, i - 1]
        train['pca_' + str(i)] = pca2_results_train[:, i - 1]
        test['pca_' + str(i)] = pca2_results_test[:, i - 1]
        train['ica_' + str(i)] = ica2_results_train[:, i - 1]
        test['ica_' + str(i)] = ica2_results_test[:, i - 1]
        train['grp_' + str(i)] = grp_results_train[:, i - 1]
        test['grp_' + str(i)] = grp_results_test[:, i - 1]
        train['srp_' + str(i)] = srp_results_train[:, i - 1]
        test['srp_' + str(i)] = srp_results_test[:, i - 1]
    if magic==True:
        magic_mat = train[['ID','X0','y']]
        magic_mat = magic_mat.groupby(['X0'])['y'].mean()
        magic_mat = pd.DataFrame({'X0':magic_mat.index,'magic':list(magic_mat)})
        mean_magic = magic_mat['magic'].mean()
        train = train.merge(magic_mat,on='X0',how='left')
        test = test.merge(magic_mat,on='X0',how = 'left')
        test['magic'] = test['magic'].fillna(mean_magic)
    return train,test

## Preparing stacking functions. Each one takes the out of bag values as the Input

## xgb will not be used in this case, but still post it here.
项目:scikit-discovery    作者:MITHaystack    | 项目源码 | 文件源码
def process(self, obj_data):
        Perform component analysis on data:

        Results are added to the data wrapper as a dictionary with
        results['CA'] = Eigenvenctors
        results['Projection'] = Projection on to the eigenvectors

        @param obj_data: Data wrapper containing the data

        num_components = self.ap_paramList[0]()
        component_type = self.ap_paramList[1]()
        start_time = self.ap_paramList[2]()
        end_time = self.ap_paramList[3]()

        results = dict()
        results['start_date'] = start_time
        results['end_date'] = end_time

        if len(self.ap_paramList) >= 5:
            label_names = self.ap_paramList[4]()
            label_names = None

        cut_data = []
        for label, data, err in obj_data.getIterator():
            if label_names == None or label in label_names:

        cut_data = np.array(cut_data)

        if len(cut_data) > 0:
            if component_type == 'ICA' :
                ca = FastICA(n_components = num_components)
                ca = PCA(n_components = num_components)

            time_projection = ca.fit_transform(cut_data.T)
            results['CA'] = ca
            results['Projection'] = time_projection

            results['CA'] = None
            results['Projection'] = None

        obj_data.addResult(self.str_description, results)
项目:scikit-discovery    作者:MITHaystack    | 项目源码 | 文件源码
def process(self, obj_data):
        Perform component analysis on data

        Results are added to the data wrapper as a dictionary with
        results['CA'] = Eigenvenctors
        results['Projection'] = Projection on to the eigenvectors

        @param obj_data: Data wrapper

        component_type = self.ap_paramList[0]()
        start_time = self.ap_paramList[1]()
        end_time = self.ap_paramList[2]()

        num_components = self.n_components

        results = dict()
        results['start_date'] = start_time
        results['end_date'] = end_time

        cut_data = []
        label_list = []
        for label, data  in obj_data.getIterator():
            for column in self.column_names:
                cut_data.append(data.loc[start_time:end_time, column])

        cut_data = np.array(cut_data)

        if len(cut_data) > 0:
            if component_type == 'ICA' :
                ca = FastICA(n_components = num_components)
                ca = PCA(n_components = num_components)

                time_projection = ca.fit_transform(cut_data.T)
                results['CA'] = ca
                results['Projection'] = time_projection

            results['CA'] = None
            results['Projection'] = None

        results['labels'] = label_list

        obj_data.addResult(self.str_description, results)