Python numpy 模块，sum() 实例源码

我们从Python开源项目中，提取了以下50个代码示例，用于说明如何使用numpy.sum()。

项目：pyku 作者：dubvulture | 项目源码 | 文件源码

def remove_artifacts(self, image):
        """
        Remove the connected components that are not within the parameters
        Operates in place
        :param image: sudoku's thresholded image w/o grid
        :return: None
        """
        labeled, features = label(image, structure=CROSS)
        lbls = np.arange(1, features + 1)
        areas = extract_feature(image, labeled, lbls, np.sum,
                                np.uint32, 0)
        sides = extract_feature(image, labeled, lbls, min_side,
                                np.float32, 0, True)
        diags = extract_feature(image, labeled, lbls, diagonal,
                                np.float32, 0, True)

        for index in lbls:
            area = areas[index - 1] / 255
            side = sides[index - 1]
            diag = diags[index - 1]
            if side < 5 or side > 20 \
                    or diag < 15 or diag > 25 \
                    or area < 40:
                image[labeled == index] = 0
        return None

项目：pyku 作者：dubvulture | 项目源码 | 文件源码

def remove_artifacts(self, image):
        """
        Remove the connected components that are not within the parameters
        Operates in place
        :param image: sudoku's thresholded image w/o grid
        :return: None
        """
        labeled, features = label(image, structure=CROSS)
        lbls = np.arange(1, features + 1)
        areas = extract_feature(image, labeled, lbls, np.sum,
                                np.uint32, 0)
        sides = extract_feature(image, labeled, lbls, min_side,
                                np.float32, 0, True)
        diags = extract_feature(image, labeled, lbls, diagonal,
                                np.float32, 0, True)

        for index in lbls:
            area = areas[index - 1] / 255
            side = sides[index - 1]
            diag = diags[index - 1]
            if side < 5 or side > 20 \
                    or diag < 15 or diag > 25 \
                    or area < 40:
                image[labeled == index] = 0
        return None

项目：namegenderclassifier 作者：joaoalvarenga | 项目源码 | 文件源码

def evaluate(self, dataset):
        predictions = self.predict(dataset[:,0])
        confusion_matrix = sklearn_confusion_matrix(dataset[:,1], predictions, labels=self.__classes)

        precisions = []
        recalls = []
        accuracies = []

        for gender in self.__classes:
            idx = self.__classes_indexes[gender]
            precision = 1
            recall = 1
            if np.sum(confusion_matrix[idx,:]) > 0:
                precision = confusion_matrix[idx][idx]/np.sum(confusion_matrix[idx,:])
            if np.sum(confusion_matrix[:, idx]) > 0:
                recall = confusion_matrix[idx][idx]/np.sum(confusion_matrix[:, idx])
            precisions.append(precision)
            recalls.append(recall)

        precision = np.mean(precisions)
        recall = np.mean(recalls)
        f1 = (2*(precision*recall))/float(precision+recall)
        accuracy = np.sum(confusion_matrix.diagonal())/float(np.sum(confusion_matrix))

        return precision, recall, accuracy, f1

项目：TDOSE 作者：kasperschmidt | 项目源码 | 文件源码

def reshape_array(array, newsize, pixcombine='sum'):
    """
    Reshape an array to a give size using either the sum, mean or median of the pixels binned

    Note that the old array dimensions have to be multiples of the new array dimensions

    --- INPUT ---
    array           Array to reshape (combine pixels)
    newsize         New size of array
    pixcombine      The method to combine the pixels with. Choices are sum, mean and median

    """
    sh = newsize[0],array.shape[0]//newsize[0],newsize[1],array.shape[1]//newsize[1]
    pdb.set_trace()
    if pixcombine == 'sum':
        reshapedarray = array.reshape(sh).sum(-1).sum(1)
    elif pixcombine == 'mean':
        reshapedarray = array.reshape(sh).mean(-1).mean(1)
    elif pixcombine == 'median':
        reshapedarray = array.reshape(sh).median(-1).median(1)

    return reshapedarray
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =

项目：pylspm 作者：lseman | 项目源码 | 文件源码

def do_work_pso(item, nclusters, data, LVcsv, Mcsv, scheme, reg, h, maximo, population):
    output = pd.DataFrame(population[item].position)
    output.columns = ['Split']
    dataSplit = pd.concat([data, output], axis=1)
    f1 = []
    results = []
    for i in range(nclusters):
        dataSplited = (dataSplit.loc[dataSplit['Split']
                                     == i]).drop('Split', axis=1)
        dataSplited.index = range(len(dataSplited))

        try:
            results.append(PyLSpm(dataSplited, LVcsv, Mcsv, scheme,
                                  reg, 0, 50, HOC='true'))

            resid = results[i].residuals()[3]
            f1.append(resid)
        except:
            f1.append(10000)
#    print((1 / np.sum(f1)))
    return (1 / np.sum(f1))

项目：pylspm 作者：lseman | 项目源码 | 文件源码

def do_work_ga(item, nclusters, data, LVcsv, Mcsv, scheme, reg, h, maximo, population):
    output = pd.DataFrame(population[item].genes)
    output.columns = ['Split']
    dataSplit = pd.concat([data, output], axis=1)
    f1 = []
    results = []
    for i in range(nclusters):
        dataSplited = (dataSplit.loc[dataSplit['Split']
                                     == i]).drop('Split', axis=1)
        dataSplited.index = range(len(dataSplited))

        try:
            results.append(PyLSpm(dataSplited, LVcsv, Mcsv, scheme,
                                  reg, 0, 50, HOC='true'))

            resid = results[i].residuals()[3]
            f1.append(resid)
        except:
            f1.append(10000)
    return (1 / np.sum(f1))

# Main

项目：pylspm 作者：lseman | 项目源码 | 文件源码

def xloads(self):
        # Xloadings
        A = self.data_.transpose().values
        B = self.fscores.transpose().values
        A_mA = A - A.mean(1)[:, None]
        B_mB = B - B.mean(1)[:, None]

        ssA = (A_mA**2).sum(1)
        ssB = (B_mB**2).sum(1)

        xloads_ = (np.dot(A_mA, B_mB.T) /
                   np.sqrt(np.dot(ssA[:, None], ssB[None])))
        xloads = pd.DataFrame(
            xloads_, index=self.manifests, columns=self.latent)

        return xloads

项目：pylspm 作者：lseman | 项目源码 | 文件源码

def do_work_pso(data, LVcsv, Mcsv, scheme, reg, h, maximo):
    output = pd.DataFrame(population[item].position)
    output.columns = ['Split']
    dataSplit = pd.concat([data, output], axis=1)
    f1 = []
    results = []
    for i in range(nclusters):
        dataSplited = (dataSplit.loc[dataSplit['Split']
                                     == i]).drop('Split', axis=1)
        dataSplited.index = range(len(dataSplited))

        try:
            results.append(PyLSpm(dataSplited, LVcsv, Mcsv, scheme,
                                  reg, 0, 50, HOC='true'))

            resid = results[i].residuals()[3]
            f1.append(resid)
        except:
            f1.append(10000)
    print((1 / np.sum(f1)))
    return (1 / np.sum(f1))

项目：pylspm 作者：lseman | 项目源码 | 文件源码

def do_work_pso(self, item):
        output = pd.DataFrame(self.population[item].position)
        output.columns = ['Split']
        dataSplit = pd.concat([self.data, output], axis=1)
        f1 = []
        results = []
        for i in range(self.nclusters):
            dataSplited = (dataSplit.loc[dataSplit['Split']
                                         == i]).drop('Split', axis=1)
            dataSplited.index = range(len(dataSplited))

            try:
                results.append(PyLSpm(dataSplited, self.LVcsv, self.Mcsv, self.scheme,
                                      self.reg, 0, 50, HOC='true'))

                resid = results[i].residuals()[3]
                f1.append(resid)
            except:
                f1.append(10000)
        print((1 / np.sum(f1)))
        return (1 / np.sum(f1))

项目：pylspm 作者：lseman | 项目源码 | 文件源码

def do_work_tabu(self, item):
        output = pd.DataFrame(self.population[item])
        output.columns = ['Split']
        dataSplit = pd.concat([self.data, output], axis=1)
        f1 = []
        results = []
        for i in range(self.nclusters):
            dataSplited = (dataSplit.loc[dataSplit['Split']
                                         == i]).drop('Split', axis=1)
            dataSplited.index = range(len(dataSplited))

            try:
                results.append(PyLSpm(dataSplited, self.LVcsv, self.Mcsv, self.scheme,
                                      self.reg, 0, 50, HOC='true'))

                resid = results[i].residuals()[3]
                f1.append(resid)
            except:
                f1.append(10000)

        cost = (np.sum(f1))
        print(1 / cost)
        return [self.population[item], cost]

项目：pylspm 作者：lseman | 项目源码 | 文件源码

def KMO(data):

    cor_ = pd.DataFrame.corr(data)
    invCor = np.linalg.inv(cor_)
    rows = cor_.shape[0]
    cols = cor_.shape[1]
    A = np.ones((rows, cols))

    for i in range(rows):
        for j in range(i, cols):
            A[i, j] = - (invCor[i, j]) / (np.sqrt(invCor[i, i] * invCor[j, j]))
            A[j, i] = A[i, j]

    num = np.sum(np.sum((cor_)**2)) - np.sum(np.sum(np.diag(cor_**2)))
    den = num + (np.sum(np.sum(A**2)) - np.sum(np.sum(np.diag(A**2))))
    kmo = num / den

    return kmo

项目：GYM_DRL 作者：Kyushik | 项目源码 | 文件源码

def xavier_initializer(shape):
    dim_sum = np.sum(shape)
    if len(shape) == 1:
        dim_sum += 1
    bound = np.sqrt(2.0 / dim_sum)
    return tf.random_uniform(shape, minval=-bound, maxval=bound)

# # Assigning network variables to target network variables 
# def assign_network_to_target():
#   update_wfc = tf.assign(w_fc_target, w_fc)
#   update_bfc = tf.assign(b_fc_target, b_fc)

#   sess.run(update_wfc)
#   sess.run(update_bfc)

#   cell_target = cell 

# Input

项目：pytorch_tutorial 作者：soravux | 项目源码 | 文件源码

def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        # A different (control flow based) way to control dropout
        if self.training:
            x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        else:
            x = F.relu(F.max_pool2d(self.conv2(x), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        if self.training:
            x = F.dropout(x, training=True)
        x = self.fc2(x)

        # Check for NaNs and infinites
        nans = np.sum(np.isnan(x.data.numpy()))
        infs = np.sum(np.isinf(x.data.numpy()))
        if nans > 0:
            print("There is {} NaN at the output layer".format(nans))
        if infs > 0:
            print("There is {} infinite values at the output layer".format(infs))

        return F.log_softmax(x)

项目：pytorch_tutorial 作者：soravux | 项目源码 | 文件源码

def test():
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        if args.cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data, volatile=True), Variable(target)
        output = model(data)
        test_loss += F.nll_loss(output, target, size_average=False).data[0] # sum up batch loss
        pred = output.data.max(1)[1] # get the index of the max log-probability
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

项目：spyking-circus 作者：spyking-circus | 项目源码 | 文件源码

def score_samples(self, X):
        """Return the log-likelihood of each sample
        See. "Pattern Recognition and Machine Learning"
        by C. Bishop, 12.2.1 p. 574
        or http://www.miketipping.com/papers/met-mppca.pdf
        Parameters
        ----------
        X: array, shape(n_samples, n_features)
            The data.
        Returns
        -------
        ll: array, shape (n_samples,)
            Log-likelihood of each sample under the current model
        """
        check_is_fitted(self, 'mean_')

        X = check_array(X)
        Xr = X - self.mean_
        n_features = X.shape[1]
        log_like = np.zeros(X.shape[0])
        precision = self.get_precision()
        log_like = -.5 * (Xr * (np.dot(Xr, precision))).sum(axis=1)
        log_like -= .5 * (n_features * log(2. * np.pi)
                          - fast_logdet(precision))
        return log_like

项目：youtube-8m 作者：wangheda | 项目源码 | 文件源码

def main():
    files = tf.gfile.Glob(flags.FLAGS.src_path_1)
    labels_uni = np.zeros([4716,1])
    labels_matrix = np.zeros([4716,4716])
    for file in files:
        labels_all = get_video_input_feature(file)
        print(len(labels_all[0][2]),len(labels_all[0][3]),len(labels_all[0][4]),len(labels_all[0][5]))
        """
        for labels in labels_all:
            for i in range(len(labels)):
                labels_uni[labels[i]] += 1
                for j in range(len(labels)):
                    labels_matrix[labels[i],labels[j]] += 1
    labels_matrix = labels_matrix/labels_uni
    labels_matrix = labels_matrix/(np.sum(labels_matrix,axis=0)-1.0)
    for i in range(4716):
        labels_matrix[i,i] = 1.0
    np.savetxt('labels_uni.out', labels_uni, delimiter=',')
    np.savetxt('labels_matrix.out', labels_matrix, delimiter=',')"""

项目：youtube-8m 作者：wangheda | 项目源码 | 文件源码

def calculate_gap(predictions, actuals, top_k=20):
  """Performs a local (numpy) calculation of the global average precision.

  Only the top_k predictions are taken for each of the videos.

  Args:
    predictions: Matrix containing the outputs of the model.
      Dimensions are 'batch' x 'num_classes'.
    actuals: Matrix containing the ground truth labels.
      Dimensions are 'batch' x 'num_classes'.
    top_k: How many predictions to use per video.

  Returns:
    float: The global average precision.
  """
  gap_calculator = ap_calculator.AveragePrecisionCalculator()
  sparse_predictions, sparse_labels, num_positives = top_k_by_class(predictions, actuals, top_k)
  gap_calculator.accumulate(flatten(sparse_predictions), flatten(sparse_labels), sum(num_positives))
  return gap_calculator.peek_ap_at_n()

项目：youtube-8m 作者：wangheda | 项目源码 | 文件源码

def calculate_gap(predictions, actuals, top_k=20):
  """Performs a local (numpy) calculation of the global average precision.

  Only the top_k predictions are taken for each of the videos.

  Args:
    predictions: Matrix containing the outputs of the model.
      Dimensions are 'batch' x 'num_classes'.
    actuals: Matrix containing the ground truth labels.
      Dimensions are 'batch' x 'num_classes'.
    top_k: How many predictions to use per video.

  Returns:
    float: The global average precision.
  """
  gap_calculator = ap_calculator.AveragePrecisionCalculator()
  sparse_predictions, sparse_labels, num_positives = top_k_by_class(predictions, actuals, top_k)
  gap_calculator.accumulate(flatten(sparse_predictions), flatten(sparse_labels), sum(num_positives))
  return gap_calculator.peek_ap_at_n()

项目：youtube-8m 作者：wangheda | 项目源码 | 文件源码

def format_lines(video_ids, predictions, labels, top_k):
  batch_size = len(video_ids)
  for video_index in range(batch_size):
    n_recall = max(int(numpy.sum(labels[video_index])), 1)
    # labels
    label_indices = numpy.argpartition(labels[video_index], -n_recall)[-n_recall:]
    label_predictions = [(class_index, predictions[video_index][class_index]) 
                           for class_index in label_indices]
    label_predictions = sorted(label_predictions, key=lambda p: -p[1])
    label_str = "\t".join(["%d\t%f"%(x,y) for x,y in label_predictions])
    # predictions
    top_k_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:]
    top_k_predictions = [(class_index, predictions[video_index][class_index])
                         for class_index in top_k_indices]
    top_k_predictions = sorted(top_k_predictions, key=lambda p: -p[1])
    top_k_str = "\t".join(["%d\t%f"%(x,y) for x,y in top_k_predictions])
    # compute PERR
    top_n_indices = numpy.argpartition(predictions[video_index], -n_recall)[-n_recall:]
    positives = [labels[video_index][class_index] 
                 for class_index in top_n_indices]
    perr = sum(positives) / float(n_recall)
    # URL
    url = "https://www.youtube.com/watch?v=" + video_ids[video_index].decode('utf-8')
    yield url + "\t" + str(1-perr) + "\t" + top_k_str + "\t" + label_str + "\n"

项目：youtube-8m 作者：wangheda | 项目源码 | 文件源码

def calculate_gap(predictions, actuals, top_k=20):
  """Performs a local (numpy) calculation of the global average precision.

  Only the top_k predictions are taken for each of the videos.

  Args:
    predictions: Matrix containing the outputs of the model.
      Dimensions are 'batch' x 'num_classes'.
    actuals: Matrix containing the ground truth labels.
      Dimensions are 'batch' x 'num_classes'.
    top_k: How many predictions to use per video.

  Returns:
    float: The global average precision.
  """
  gap_calculator = ap_calculator.AveragePrecisionCalculator()
  sparse_predictions, sparse_labels, num_positives = top_k_by_class(predictions, actuals, top_k)
  gap_calculator.accumulate(flatten(sparse_predictions), flatten(sparse_labels), sum(num_positives))
  return gap_calculator.peek_ap_at_n()

项目：MKLMM 作者：omerwe | 项目源码 | 文件源码

def getTrainKernel(self, params):
        self.checkParams(params)
        if (self.sameParams(params)): return self.cache['getTrainKernel']

        ell = np.exp(params[0])
        if (self.K_sq is None): K = sq_dist(self.X_scaled.T / ell)  #precompute squared distances
        else: K = self.K_sq / ell**2        
        self.cache['K_sq_scaled'] = K

        # # # #manual computation (just for sanity checks)
        # # # K1 = np.exp(-K / 2.0)
        # # # K2 = np.zeros((self.X_scaled.shape[0], self.X_scaled.shape[0]))
        # # # for i1 in xrange(self.X_scaled.shape[0]):
            # # # for i2 in xrange(i1, self.X_scaled.shape[0]):
                # # # diff = self.X_scaled[i1,:] - self.X_scaled[i2,:]
                # # # K2[i1, i2] = np.exp(-np.sum(diff**2) / (2*ell))
                # # # K2[i2, i1] = K2[i1, i2]               
        # # # print np.max((K1-K2)**2)
        # # # sys.exit(0)

        K_exp = np.exp(-K / 2.0)
        self.cache['getTrainKernel'] = K_exp
        self.saveParams(params)
        return K_exp

项目：MKLMM 作者：omerwe | 项目源码 | 文件源码

def getTrainTestKernel(self, params, Xtest):
        self.checkParams(params)
        ell2 = np.exp(2*params[0])

        z = Xtest / np.sqrt(Xtest.shape[1])
        S = 1 + self.X_scaled.dot(z.T)
        sz = 1 + np.sum(z**2, axis=1)
        sqrtEll2Psx = np.sqrt(ell2+self.sx)
        sqrtEll2Psz = np.sqrt(ell2+sz)
        K = S / np.outer(sqrtEll2Psx, sqrtEll2Psz)
        return np.arcsin(K)

项目：pyTBA 作者：Thing342 | 项目源码 | 文件源码

def match_matrix(event: Event):
    """Returns a numpy participation matrix for the qualification matches in this event, used for calculating OPR.

        Each row in the matrix corresponds to a single alliance in a match, meaning that there will be two rows (one for
    red, one for blue) per match. Each column represents a single team, ordered by team number. If a team participated
    on a certain alliance, the value at that row and column would be 1, otherwise, it would be 0. For example, an
    event with teams 1-7 that featured a match that pitted teams 1, 3, and 5 against 2, 4, and 6 would have a match
    matrix that looks like this (sans labels):

                                #1  #2  #3  #4  #5  #6  #7
                    qm1_red     1   0   1   0   1   0   0
                    qm1_blue    0   1   0   1   0   1   0
    """
    match_list = []
    for match in filter(lambda match: match['comp_level'] == 'qm', event.matches):
        matchRow = []
        for team in event.teams:
            matchRow.append(1 if team['key'] in match['alliances']['red']['teams'] else 0)
        match_list.append(matchRow)
        matchRow = []
        for team in event.teams:
            matchRow.append(1 if team['key'] in match['alliances']['blue']['teams'] else 0)
        match_list.append(matchRow)

    mat = numpy.array(match_list)
    sum_matches = numpy.sum(mat, axis=0)
    avg_team_matches = sum(sum_matches) / float(len(sum_matches))
    return mat[:, numpy.apply_along_axis(numpy.count_nonzero, 0, mat) > avg_team_matches - 2]

项目：pyku 作者：dubvulture | 项目源码 | 文件源码

def compute_angle(pt0, pt1, pt2):
    """
    Given 3 points, compute the cosine of the angle from pt0
    :type pt0: numpy.array
    :type pt1: numpy.array
    :type pt2: numpy.array
    :return: cosine of angle
    """
    a = pt0 - pt1
    b = pt0 - pt2
    return (np.sum(a * b)) / (np.linalg.norm(a) * np.linalg.norm(b))

项目：pyku 作者：dubvulture | 项目源码 | 文件源码

def _zoning(image):
        """
        It works better with DSIZE = 28
        ~0.9967 precision and recall
        :param image:
        :return: #pixels/area ratio of each zone (7x7) as feature vector
        """
        zones = []
        for i in range(0, 28, 7):
            for j in range(0, 28, 7):
                roi = image[i:i+7, j:j+7]
                val = (np.sum(roi)/255) / 49.
                zones.append(val)
        return np.array(zones, np.float32)

项目：AutoML5 作者：djajetic | 项目源码 | 文件源码

def getTypeProblem (self, solution_filename):
            ''' Get the type of problem directly from the solution file (in case we do not have an info file)'''
        if 'task' not in self.info.keys():
            solution = np.array(data_converter.file_to_array(solution_filename))
            target_num = solution.shape[1]
            self.info['target_num']=target_num
            if target_num == 1: # if we have only one column
                solution = np.ravel(solution) # flatten
                nbr_unique_values = len(np.unique(solution))
                if nbr_unique_values < len(solution)/8:
                    # Classification
                    self.info['label_num'] = nbr_unique_values
                    if nbr_unique_values == 2:
                        self.info['task'] = 'binary.classification'
                        self.info['target_type'] = 'Binary'
                    else:
                        self.info['task'] = 'multiclass.classification'
                        self.info['target_type'] = 'Categorical'
                else:
                    # Regression
                    self.info['label_num'] = 0
                    self.info['task'] = 'regression'
                    self.info['target_type'] = 'Numerical'     
            else:
                # Multilabel or multiclass       
                self.info['label_num'] = target_num
                self.info['target_type'] = 'Binary' 
                if any(item > 1 for item in map(np.sum,solution.astype(int))):
                    self.info['task'] = 'multilabel.classification'     
                else:
                    self.info['task'] = 'multiclass.classification'        
        return self.info['task']

项目：AutoML5 作者：djajetic | 项目源码 | 文件源码

def binarize_predictions(array, task='binary.classification'):
    ''' Turn predictions into decisions {0,1} by selecting the class with largest 
    score for multiclass problems and thresholding at 0.5 for other cases.'''
    # add a very small random value as tie breaker (a bit bad because this changes the score every time)
    # so to make sure we get the same result every time, we seed it    
    #eps = 1e-15
    #np.random.seed(sum(array.shape))
    #array = array + eps*np.random.rand(array.shape[0],array.shape[1])
    bin_array = np.zeros(array.shape)
    if (task != 'multiclass.classification') or (array.shape[1]==1): 
        bin_array[array>=0.5] = 1
    else:        
        sample_num=array.shape[0]
        for i in range(sample_num):
            j = np.argmax(array[i,:])
            bin_array[i,j] = 1        
    return bin_array

项目：AutoML5 作者：djajetic | 项目源码 | 文件源码

def acc_stat (solution, prediction):
    ''' Return accuracy statistics TN, FP, TP, FN
     Assumes that solution and prediction are binary 0/1 vectors.'''
     # This uses floats so the results are floats
    TN = sum(np.multiply((1-solution), (1-prediction)))
    FN = sum(np.multiply(solution, (1-prediction)))
    TP = sum(np.multiply(solution, prediction))
    FP = sum(np.multiply((1-solution), prediction))
    #print "TN =",TN
    #print "FP =",FP
    #print "TP =",TP
    #print "FN =",FN
    return (TN, FP, TP, FN)

项目：AutoML5 作者：djajetic | 项目源码 | 文件源码

def pac_metric (solution, prediction, task='binary.classification'):
    ''' Probabilistic Accuracy based on log_loss metric. 
    We assume the solution is in {0, 1} and prediction in [0, 1].
    Otherwise, run normalize_array.''' 
    debug_flag=False
    [sample_num, label_num] = solution.shape
    if label_num==1: task='binary.classification'
    eps = 1e-15
    the_log_loss = log_loss(solution, prediction, task)
    # Compute the base log loss (using the prior probabilities)    
    pos_num = 1.* sum(solution) # float conversion!
    frac_pos = pos_num / sample_num # prior proba of positive class
    the_base_log_loss = prior_log_loss(frac_pos, task)
    # Alternative computation of the same thing (slower)    
    # Should always return the same thing except in the multi-label case
    # For which the analytic solution makes more sense
    if debug_flag:
        base_prediction = np.empty(prediction.shape)
        for k in range(sample_num): base_prediction[k,:] = frac_pos
        base_log_loss = log_loss(solution, base_prediction, task)  
        diff = np.array(abs(the_base_log_loss-base_log_loss))
        if len(diff.shape)>0: diff=max(diff)
        if(diff)>1e-10: 
            print('Arrggh {} != {}'.format(the_base_log_loss,base_log_loss))
    # Exponentiate to turn into an accuracy-like score.
    # In the multi-label case, we need to average AFTER taking the exp 
    # because it is an NL operation
    pac = mvmean(np.exp(-the_log_loss)) 
    base_pac = mvmean(np.exp(-the_base_log_loss))
    # Normalize: 0 for random, 1 for perfect    
    score = (pac - base_pac) / sp.maximum(eps, (1 - base_pac))
    return score

项目：AutoML5 作者：djajetic | 项目源码 | 文件源码

def auc_metric(solution, prediction, task='binary.classification'):
    ''' Normarlized Area under ROC curve (AUC).
    Return Gini index = 2*AUC-1 for  binary classification problems.
    Should work for a vector of binary 0/1 (or -1/1)"solution" and any discriminant values
    for the predictions. If solution and prediction are not vectors, the AUC
    of the columns of the matrices are computed and averaged (with no weight).
    The same for all classification problems (in fact it treats well only the
    binary and multilabel classification problems).'''
    #auc = metrics.roc_auc_score(solution, prediction, average=None)
    # There is a bug in metrics.roc_auc_score: auc([1,0,0],[1e-10,0,0]) incorrect
    label_num=solution.shape[1]
    auc=np.empty(label_num)
    for k in range(label_num):
        r_ = tiedrank(prediction[:,k])
        s_ = solution[:,k]
        if sum(s_)==0: print('WARNING: no positive class example in class {}'.format(k+1))
        npos = sum(s_==1)
        nneg = sum(s_<1)
        auc[k] = (sum(r_[s_==1]) - npos*(npos+1)/2) / (nneg*npos)
    return 2*mvmean(auc)-1


### END CLASSIFICATION METRICS 

# ======= Specialized scores ========
# We run all of them for all tasks even though they don't make sense for some tasks

项目：AutoML5 作者：djajetic | 项目源码 | 文件源码

def prior_log_loss(frac_pos, task = 'binary.classification'):
    ''' Baseline log loss. For multiplr classes ot labels return the volues for each column'''
    eps = 1e-15   
    frac_pos_ = sp.maximum (eps, frac_pos)
    if (task != 'multiclass.classification'): # binary case
        frac_neg = 1-frac_pos
        frac_neg_ = sp.maximum (eps, frac_neg)
        pos_class_log_loss_ = - frac_pos * np.log(frac_pos_)
        neg_class_log_loss_ = - frac_neg * np.log(frac_neg_)
        base_log_loss = pos_class_log_loss_ + neg_class_log_loss_
        # base_log_loss = mvmean(base_log_loss)
        # print('binary {}'.format(base_log_loss))
        # In the multilabel case, the right thing i to AVERAGE not sum
        # We return all the scores so we can normalize correctly later on
    else: # multiclass case
        fp = frac_pos_ / sum(frac_pos_) # Need to renormalize the lines in multiclass case
        # Only ONE label is 1 in the multiclass case active for each line
        pos_class_log_loss_ = - frac_pos * np.log(fp)
        base_log_loss = np.sum(pos_class_log_loss_) 
    return base_log_loss

# sklearn implementations for comparison

项目：AutoML5 作者：djajetic | 项目源码 | 文件源码

def num_lines (filename):
    ''' Count the number of lines of file'''
    return sum(1 for line in open(filename))

项目：AutoML5 作者：djajetic | 项目源码 | 文件源码

def tp_filter(X, Y, feat_num=1000, verbose=True):
    ''' TP feature selection in the spirit of the winners of the KDD cup 2001
    Only for binary classification and sparse matrices'''

    if issparse(X) and len(Y.shape)==1  and len(set(Y))==2 and (sum(Y)/Y.shape[0])<0.1: 
        if verbose: print("========= Filtering features...")
        Posidx=Y>0
        #npos = sum(Posidx)
        #Negidx=Y<=0
        #nneg = sum(Negidx)

        nz=X.nonzero()
        mx=X[nz].max()
        if X[nz].min()==mx: # sparse binary
            if mx!=1: X[nz]=1
            tp=csr_matrix.sum(X[Posidx,:], axis=0)
            #fn=npos-tp
            #fp=csr_matrix.sum(X[Negidx,:], axis=0)
            #tn=nneg-fp
        else:
            tp=np.sum(X[Posidx,:]>0, axis=0)
            #tn=np.sum(X[Negidx,:]<=0, axis=0)
            #fn=np.sum(X[Posidx,:]<=0, axis=0)
            #fp=np.sum(X[Negidx,:]>0, axis=0)

        tp=np.ravel(tp)
        idx=sorted(range(len(tp)), key=tp.__getitem__, reverse=True)   
        return idx[0:feat_num]
    else:
        feat_num = X.shape[1]
        return range(feat_num)

项目：AutoML5 作者：djajetic | 项目源码 | 文件源码

def predict(self, X):
        prediction = self.predict_method(X)
        # Calibrate proba
        if self.task != 'regression' and self.postprocessor!=None:          
            prediction = self.postprocessor.predict_proba(prediction)
        # Keep only 2nd column because the second one is 1-first    
        if self.target_num==1 and len(prediction.shape)>1 and prediction.shape[1]>1:
            prediction = prediction[:,1]
        # Make sure the normalization is correct
        if self.task=='multiclass.classification':
            eps = 1e-15
            norma = np.sum(prediction, axis=1)
            for k in range(prediction.shape[0]):
                prediction[k,:] /= sp.maximum(norma[k], eps)  
        return prediction

项目：AutoML5 作者：djajetic | 项目源码 | 文件源码

def fit(self, X, Y):
        if len(Y.shape)==1: 
            Y = np.array([Y]).transpose() # Transform vector into column matrix
            # This is NOT what we want: Y = Y.reshape( -1, 1 ), because Y.shape[1] out of range
        self.n_target = Y.shape[1]                 # Num target values = num col of Y
        self.n_label = len(set(Y.ravel()))         # Num labels = num classes (categories of categorical var if n_target=1 or n_target if labels are binary )
        # Create the right number of copies of the predictor instance
        if len(self.predictors)!=self.n_target:
            predictorInstance = self.predictors[0]
            self.predictors = [predictorInstance]
            for i in range(1,self.n_target):
                self.predictors.append(copy.copy(predictorInstance))
        # Fit all predictors
        for i in range(self.n_target):
            # Update the number of desired prodictos
            if hasattr(self.predictors[i], 'n_estimators'):
                self.predictors[i].n_estimators=self.n_estimators
            # Subsample if desired
            if self.balance:
                pos = Y[:,i]>0
                neg = Y[:,i]<=0
                if sum(pos)<sum(neg): 
                    chosen = pos
                    not_chosen = neg
                else: 
                    chosen = neg
                    not_chosen = pos
                num = sum(chosen)
                idx=filter(lambda(x): x[1]==True, enumerate(not_chosen))
                idx=np.array(zip(*idx)[0])
                np.random.shuffle(idx)
                chosen[idx[0:min(num, len(idx))]]=True
                # Train with chosen samples            
                self.predictors[i].fit(X[chosen,:],Y[chosen,i])
            else:
                self.predictors[i].fit(X,Y[:,i])
        return

项目：lang-reps 作者：chaitanyamalaviya | 项目源码 | 文件源码

def get_batch_loss(self, input_batch, output_batch):

        dynet.renew_cg()

        # Dimension: maxSentLength * minibatch_size
        wids = []
        wids_reversed = []

        # List of lists to store whether an input is
        # present(1)/absent(0) for an example at a time step
        # masks = [] # Dimension: maxSentLength * minibatch_size

        # tot_words = 0
        maxSentLength = max([len(sent) for sent in input_batch])
        for j in range(maxSentLength):
            wids.append([(self.src_vocab[sent[j]].i if len(sent)>j else self.src_vocab.END_TOK.i) for sent in input_batch])
            wids_reversed.append([(self.src_vocab[sent[len(sent)- j-1]].i if len(sent)>j else self.src_vocab.END_TOK.i) for sent in input_batch])
            # mask = [(1 if len(sent)>j else 0) for sent in input_batch]
            # masks.append(mask)
            #tot_words += sum(mask)

        embedded_batch = self.embed_batch_seq(wids)
        embedded_batch_reverse = self.embed_batch_seq(wids_reversed)
        encoded_batch = self.encode_batch_seq(embedded_batch, embedded_batch_reverse)

        # pass last hidden state of encoder to decoder
        return self.decode_batch(encoded_batch, output_batch)

项目：IntroToDeepLearning 作者：robb-brown | 项目源码 | 文件源码

def plotFields(layer,fieldShape=None,channel=None,figOffset=1,cmap=None,padding=0.01):
    # Receptive Fields Summary
    try:
        W = layer.W
    except:
        W = layer
    wp = W.eval().transpose();
    if len(np.shape(wp)) < 4:       # Fully connected layer, has no shape
        fields = np.reshape(wp,list(wp.shape[0:-1])+fieldShape) 
    else:           # Convolutional layer already has shape
        features, channels, iy, ix = np.shape(wp)
        if channel is not None:
            fields = wp[:,channel,:,:]
        else:
            fields = np.reshape(wp,[features*channels,iy,ix])

    perRow = int(math.floor(math.sqrt(fields.shape[0])))
    perColumn = int(math.ceil(fields.shape[0]/float(perRow)))

    fig = mpl.figure(figOffset); mpl.clf()

    # Using image grid
    from mpl_toolkits.axes_grid1 import ImageGrid
    grid = ImageGrid(fig,111,nrows_ncols=(perRow,perColumn),axes_pad=padding,cbar_mode='single')
    for i in range(0,np.shape(fields)[0]):
        im = grid[i].imshow(fields[i],cmap=cmap); 

    grid.cbar_axes[0].colorbar(im)
    mpl.title('%s Receptive Fields' % layer.name)

    # old way
    # fields2 = np.vstack([fields,np.zeros([perRow*perColumn-fields.shape[0]] + list(fields.shape[1:]))])
    # tiled = []
    # for i in range(0,perColumn*perRow,perColumn):
    #   tiled.append(np.hstack(fields2[i:i+perColumn]))
    # 
    # tiled = np.vstack(tiled)
    # mpl.figure(figOffset); mpl.clf(); mpl.imshow(tiled,cmap=cmap); mpl.title('%s Receptive Fields' % layer.name); mpl.colorbar();
    mpl.figure(figOffset+1); mpl.clf(); mpl.imshow(np.sum(np.abs(fields),0),cmap=cmap); mpl.title('%s Total Absolute Input Dependency' % layer.name); mpl.colorbar()

项目：IntroToDeepLearning 作者：robb-brown | 项目源码 | 文件源码

def plotFields(layer,fieldShape=None,channel=None,maxFields=25,figName='ReceptiveFields',cmap=None,padding=0.01):
    # Receptive Fields Summary
    W = layer.W
    wp = W.eval().transpose();
    if len(np.shape(wp)) < 4:       # Fully connected layer, has no shape
        fields = np.reshape(wp,list(wp.shape[0:-1])+fieldShape)
    else:           # Convolutional layer already has shape
        features, channels, iy, ix = np.shape(wp)
        if channel is not None:
            fields = wp[:,channel,:,:]
        else:
            fields = np.reshape(wp,[features*channels,iy,ix])

    fieldsN = min(fields.shape[0],maxFields)
    perRow = int(math.floor(math.sqrt(fieldsN)))
    perColumn = int(math.ceil(fieldsN/float(perRow)))

    fig = mpl.figure(figName); mpl.clf()

    # Using image grid
    from mpl_toolkits.axes_grid1 import ImageGrid
    grid = ImageGrid(fig,111,nrows_ncols=(perRow,perColumn),axes_pad=padding,cbar_mode='single')
    for i in range(0,fieldsN):
        im = grid[i].imshow(fields[i],cmap=cmap);

    grid.cbar_axes[0].colorbar(im)
    mpl.title('%s Receptive Fields' % layer.name)

    # old way
    # fields2 = np.vstack([fields,np.zeros([perRow*perColumn-fields.shape[0]] + list(fields.shape[1:]))])
    # tiled = []
    # for i in range(0,perColumn*perRow,perColumn):
    #   tiled.append(np.hstack(fields2[i:i+perColumn]))
    #
    # tiled = np.vstack(tiled)
    # mpl.figure(figOffset); mpl.clf(); mpl.imshow(tiled,cmap=cmap); mpl.title('%s Receptive Fields' % layer.name); mpl.colorbar();
    mpl.figure(figName+' Total'); mpl.clf(); mpl.imshow(np.sum(np.abs(fields),0),cmap=cmap); mpl.title('%s Total Absolute Input Dependency' % layer.name); mpl.colorbar()

项目：TDOSE 作者：kasperschmidt | 项目源码 | 文件源码

def analytic_convolution_gaussian(mu1,covar1,mu2,covar2):
    """
    The analytic vconvolution of two Gaussians is simply the sum of the two mean vectors
    and the two convariance matrixes

    --- INPUT ---
    mu1         The mean of the first gaussian
    covar1      The covariance matrix of of the first gaussian
    mu2         The mean of the second gaussian
    covar2      The covariance matrix of of the second gaussian

    """
    muconv    = mu1+mu2
    covarconv = covar1+covar2
    return muconv, covarconv

# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =

项目：pyballd 作者：Yurlungur | 项目源码 | 文件源码

def inner_product_to_infty(self,gf1,gf2):
        "Inner product on non-compact domain"
        factors = [s.get_scale_factor() for s in self.stencils]
        factor = np.prod(factors)
        integrand = (factor*gf1*self.weights2D*gf2*self.dRdX)
        integrand[-1] = 0
        integral = np.sum(integrand)
        return integral

项目：pyballd 作者：Yurlungur | 项目源码 | 文件源码

def get_integration_weights(order,nodes=None):
    """
    Returns the integration weights for Gauss-Lobatto quadrature
    as a function of the order of the polynomial we want to
    represent.
    See: https://en.wikipedia.org/wiki/Gaussian_quadrature
    See: arXive:gr-qc/0609020v1
    """
    if np.all(nodes == False):
        nodes=get_quadrature_points(order)
    if poly == polynomial.chebyshev.Chebyshev:
        weights = np.empty((order+1))
        weights[1:-1] = np.pi/order
        weights[0] = np.pi/(2*order)
        weights[-1] = weights[0]
        return weights
    elif poly == polynomial.legendre.Legendre:
        interior_weights = 2/((order+1)*order*poly.basis(order)(nodes[1:-1])**2)
        boundary_weights = np.array([1-0.5*np.sum(interior_weights)])
        weights = np.concatenate((boundary_weights,
                                  interior_weights,
                                  boundary_weights))
        return weights
    else:
        raise ValueError("Not a known polynomial type.")
        return False

项目：pyballd 作者：Yurlungur | 项目源码 | 文件源码

def inner_product(self,gf1,gf2):
        """Calculates the 2D inner product between grid functions
        gf1 and gf2 using the appropriate quadrature rule
        """
        factors = [s.get_scale_factor() for s in self.stencils]
        factor = np.prod(factors)
        integrand = gf1*self.weights2D*gf2
        integral_unit_cell = np.sum(integrand)
        integral_physical = integral_unit_cell*factor
        return integral_physical

项目：mpiFFT4py 作者：spectralDNS | 项目源码 | 文件源码

def compute_rhs(rhs):
    U_dealiased = work[((3,) + FFT.work_shape(dealias), float, 0)]
    curl_dealiased = work[((3,) + FFT.work_shape(dealias), float, 1)]
    for i in range(3):
        U_dealiased[i] = FFT.ifftn(U_hat[i], U_dealiased[i], dealias)

    curl_dealiased = curl(U_hat, curl_dealiased)
    rhs = cross(U_dealiased, curl_dealiased, rhs)
    P_hat[:] = sum(rhs*K_over_K2, 0, out=P_hat)
    rhs -= P_hat*K
    rhs -= nu*K2*U_hat
    return rhs

# Initialize a Taylor Green vortex

项目：pylspm 作者：lseman | 项目源码 | 文件源码

def gof(self):
        r2mean = np.mean(self.r2.T[self.endoexo()[0]].values)
        AVEmean = self.AVE().copy()

        totalblock = 0
        for i in range(self.lenlatent):
            block = self.data_[self.Variables['measurement']
                               [self.Variables['latent'] == self.latent[i]]]
            block = len(block.columns.values)
            totalblock += block
            AVEmean[self.latent[i]] = AVEmean[self.latent[i]] * block

        AVEmean = np.sum(AVEmean) / totalblock
        return np.sqrt(AVEmean * r2mean)

项目：pylspm 作者：lseman | 项目源码 | 文件源码

def cr(self):
        # Composite Reliability
        composite = pd.DataFrame(0, index=np.arange(1), columns=self.latent)

        for i in range(self.lenlatent):
            block = self.data_[self.Variables['measurement']
                               [self.Variables['latent'] == self.latent[i]]]
            p = len(block.columns)

            if(p != 1):
                cor_mat = np.cov(block.T)
                evals, evecs = np.linalg.eig(cor_mat)
                U, S, V = np.linalg.svd(cor_mat, full_matrices=False)

                indices = np.argsort(evals)
                indices = indices[::-1]
                evecs = evecs[:, indices]
                evals = evals[indices]

                loadings = V[0, :] * np.sqrt(evals[0])

                numerador = np.sum(abs(loadings))**2
                denominador = numerador + (p - np.sum(loadings ** 2))
                cr = numerador / denominador
                composite[self.latent[i]] = cr

            else:
                composite[self.latent[i]] = 1

        composite = composite.T
        return(composite)

项目：pylspm 作者：lseman | 项目源码 | 文件源码

def r2adjusted(self):
        n = len(self.data_)
        r2 = self.r2.values

        r2adjusted = pd.DataFrame(0, index=np.arange(1), columns=self.latent)

        for i in range(self.lenlatent):
            p = sum(self.LVariables['target'] == self.latent[i])
            r2adjusted[self.latent[i]] = r2[i] - \
                (p * (1 - r2[i])) / (n - p - 1)

        return r2adjusted.T

项目：pylspm 作者：lseman | 项目源码 | 文件源码

def AVE(self):
        # AVE
        return self.comunalidades().apply(lambda column: column.sum() / (column != 0).sum())

项目：pylspm 作者：lseman | 项目源码 | 文件源码

def fornell(self):
        cor_ = pd.DataFrame.corr(self.fscores)**2
        AVE = self.comunalidades().apply(lambda column: column.sum() / (column != 0).sum())
        for i in range(len(cor_)):
            cor_.ix[i, i] = AVE[i]

        return(cor_)

项目：pylspm 作者：lseman | 项目源码 | 文件源码

def fitness(self, data_, n_clusters, lvmodel, mvmodel, scheme, regression):

        output = pd.DataFrame(self.genes)
        output.columns = ['Split']
        dataSplit = pd.concat([data_, output], axis=1)
        f1 = []
        results = []
        for i in range(n_clusters):
            dataSplited = (dataSplit.loc[dataSplit['Split']
                                         == i]).drop('Split', axis=1)
            dataSplited.index = range(len(dataSplited))

            try:
                results.append(PyLSpm(dataSplited, lvmodel, mvmodel, scheme,
                                      regression, 0, 50, HOC='true'))

                sumOuterResid = pd.DataFrame.sum(
                    pd.DataFrame.sum(results[i].residuals()[1]**2))
                sumInnerResid = pd.DataFrame.sum(
                    pd.DataFrame.sum(results[i].residuals()[2]**2))
                f1.append(sumOuterResid + sumInnerResid)
            except:
                f1.append(10000)

        print((1 / np.sum(f1)))
        return (1 / np.sum(f1))

项目：pylspm 作者：lseman | 项目源码 | 文件源码

def roulettewheel(pop, fit):
    fit = fit - min(fit)
    sumf = sum(fit)
    if(sumf == 0):
        return pop[0]
    prob = [(item + sum(fit[:index])) / sumf for index, item in enumerate(fit)]
    prob_ = uniform(0, 1)
#    print(prob)
    individuo = (int(BinSearch(prob, prob_, 0, len(prob) - 1)))
    return pop[individuo]