def function_2(text): paragraphs = text.split('\n\n') count_vect = CountVectorizer() bow_matrix = count_vect.fit_transform(paragraphs) normalized_matrix = TfidfTransformer().fit_transform(bow_matrix) similarity_graph = normalized_matrix * normalized_matrix.T #term frequency/inverse doc frequency applied similarity_graph.toarray() nx_graph = nx.from_scipy_sparse_matrix(similarity_graph) scores = nx.pagerank(nx_graph) #TextRank applied ranked = sorted(((scores[i],s) for i,s in enumerate(paragraphs)), reverse=True) #Sorts all paragraphs from highest to lowest scores ten_percent = int(round(10.00/100.00 * len(ranked))) ten_percent_high_scores = ranked[0:ten_percent] summary = [x[1] for x in ten_percent_high_scores] #Takes top 10%, so the paragraphs with the highest scores (does not disturb the rank order) return "\n\n".join(summary) #Text taken from the user's uploaded PDF or URL, cleaned and formatted.
def textrank_text_summarizer(documents, num_sentences=2, feature_type='frequency'): vec, dt_matrix = build_feature_matrix(norm_sentences, feature_type='tfidf') similarity_matrix = (dt_matrix * dt_matrix.T) similarity_graph = networkx.from_scipy_sparse_matrix(similarity_matrix) scores = networkx.pagerank(similarity_graph) ranked_sentences = sorted(((score, index) for index, score in scores.items()), reverse=True) top_sentence_indices = [ranked_sentences[index][1] for index in range(num_sentences)] top_sentence_indices.sort() for index in top_sentence_indices: print sentences[index]
def plot2d(self, title=None, domain=[-1, 1], codomain=[-1, 1], predict=True): f, ax = plt.subplots() x1 = np.linspace(*domain, 100) x2 = np.linspace(*codomain, 100) n_samples, n_features = self.X_.shape G = nx.from_scipy_sparse_matrix(self.A_) pos = {i: self.X_[i] for i in range(n_samples)} cm_sc = ListedColormap(['#AAAAAA', '#FF0000', '#0000FF']) if title is not None: ax.set_title(title) ax.set_xlabel('$x_1$') ax.set_ylabel('$x_2$') ax.set_xlim(domain) ax.set_ylim(codomain) nx.draw_networkx_nodes(G, pos, ax=ax, node_size=25, node_color=self.y_, cmap=cm_sc) if predict: xx1, xx2 = np.meshgrid(x1, x2) xfull = np.c_[xx1.ravel(), xx2.ravel()] z = self.predict(xfull).reshape(100, 100) levels = np.array([-1, 0, 1]) cm_cs = plt.cm.RdYlBu if self.params['gamma_i'] != 0.0: nx.draw_networkx_edges(G, pos, ax=ax, edge_color='#AAAAAA') ax.contourf(xx1, xx2, z, levels, cmap=cm_cs, alpha=0.25) return (f, ax)
def network_layout(matrix, k=30): nbrs = NearestNeighbors(k, algorithm='brute', metric='cosine').fit(matrix) G = networkx.from_scipy_sparse_matrix(nbrs.kneighbors_graph(matrix)) node_labels = label_propagation(G, verbose=True) communities_labelprop = np.array([node_labels[i] for i in range(matrix.shape[0])]) pos = graphviz_layout(G, prog="sfdp") coords = np.array([pos[i] for i in range(len(pos))]) print(coords.shape) return coords, communities_labelprop
def load_graph_from_mat(filename, edge_attribute='type', graph_type=nx.DiGraph()): graph_as_sparse_matrix = load_mat_file(filename) graph = nx.from_scipy_sparse_matrix(graph_as_sparse_matrix, create_using=graph_type, edge_attribute=edge_attribute) # intify 'type' if that's the property used (defaults to float) if edge_attribute == 'type': for i, j, attribute in graph.edges(data=True): graph[i][j]['type'] = int(attribute['type']) return graph.copy()
def draw_graph(mst, mst_a, mol_names, dir_names, method): import networkx as nx G = nx.from_scipy_sparse_matrix(mst) if method == 'mcs': corr = 1 else: corr = 0 for i, j in zip(mst.nonzero()[0], mst.nonzero()[1]): G.edge[i][j]['label'] = '%.1f' % (mst_a[i][j] - corr) G.edge[i][j]['len'] = '3.0' for n in G.nodes(): G.node[n]['shape'] = 'box' G.node[n]['label'] = ('<' '<table border="0" cellspacing="-20" cellborder="0">' '<tr><td><img src="%s"/></td></tr>' '<tr><td bgcolor="#F0F0F0">%s</td></tr>' '</table>>' % (os.path.join(dir_names[n], mol_names[n] + os.extsep + 'svg'), mol_names[n]) ) print('Writing networkx graph pickle file %s...' % GPICKLE_FILE) nx.write_gpickle(G, GPICKLE_FILE) print('Writing DOT file %s...' % DOT_FILE) nx.write_dot(G, DOT_FILE)
def getG(self): if not hasattr(self, 'G'): if self.is_symmetric(): # Undirected Graph typeG = nx.Graph() else: # Directed Graph typeG = nx.DiGraph() self.G = nx.from_numpy_matrix(self.data, create_using=typeG) #self.G = nx.from_scipy_sparse_matrix(self.data, typeG) return self.G