我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用wordcloud.WordCloud()。
def showData(self): print('???,????···') mask = imread(self.picfile) imgcolor = ImageColorGenerator(mask) wcc = WordCloud(font_path='./msyhl.ttc', mask=mask, background_color='white', max_font_size=200, max_words=300, color_func=imgcolor ) wc = wcc.generate_from_frequencies(self.data) plt.figure() plt.imshow(wc) plt.axis('off') print('?????') plt.show()
def create_wordcloud(corpus, output, stopword_dict): lex_dic = build_lex_dic(corpus, stopword_dict=stopword_dict) total_words = get_total_words(lex_dic) ordered_freq_list = build_freq_list(lex_dic, total_words) fig = plt.figure(figsize=(10, 8), frameon=False) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.set_axis_off() fig.add_axes(ax) wordcloud = WordCloud(width=1000, height=800, max_words=100, background_color='white', relative_scaling=0.7, random_state=15, prefer_horizontal=0.5).generate_from_frequencies( ordered_freq_list[0:100]) wordcloud.recolor(random_state=42, color_func=my_color_func) ax.imshow(wordcloud) fig.savefig(output, facecolor='white')
def get_plot(limit, txt, wc_mask=wc_mask, stop = english_stopwords): wordcloud = WordCloud( max_words=limit, stopwords=stop, mask=wc_mask ).generate(txt) fig = plt.figure() fig.set_figwidth(8) fig.set_figheight(8) plt.imshow(wordcloud, interpolation="bilinear") plt.axis('off') figfile = BytesIO() plt.savefig(figfile, format='png') figfile.seek(0) figdata_png = base64.b64encode(figfile.getvalue()).decode() return figdata_png
def create_cloud(self): # Return Bing search snippets text = self.return_txt() # Get mask image from Bing image_mask = np.array(self.return_img()) # potential feature stopwords = set(STOPWORDS) # stopwords.add(search_modifier) wordcloud = WordCloud(background_color="white", mask=image_mask, stopwords=stopwords) wordcloud.generate(text) image_colors = ImageColorGenerator(image_mask) plt.imshow(image_mask, cmap=plt.cm.gray, interpolation="None") plt.imshow(wordcloud.recolor(color_func=image_colors), alpha=.8, interpolation='None') plt.axis("off") return plt
def full_wordcloud(): """ Generates wordcloud for the site. """ text = "" try: posts = Post.objects.filter().values("content") for post in posts: text += post["content"] + " " text = words_wo_stopwords(text=text) word_cloud = WordCloud(max_font_size=40, background_color="rgba(255, 255, 255, 0)", width=350, height=600, mode="RGBA").generate(text) fig = plt.figure(frameon=False) fig.patch.set_visible(False) ax = fig.add_axes([0, 0, 1, 1]) ax.axis('off') ax.imshow(word_cloud, interpolation='bilinear') plt.savefig(join(settings.STATIC_ROOT, 'images', 'wordcloud.png')) plt.close() except Exception as err: print(err)
def posts_wordcloud(): """ Generates wordcloud foeach post. """ posts = Post.objects.filter().exclude(content="") for post in posts: try: image_file = join(settings.STATIC_ROOT, "wordcloud", "{0}.png".format(post.slug)) if not isfile(image_file): text = words_wo_stopwords(text=post.content) if len(text) > 100: word_cloud = WordCloud(max_font_size=40, background_color="rgba(255, 255, 255, 0)", width=800, height=350, mode="RGBA").generate(text) fig = plt.figure(frameon=False) fig.patch.set_visible(False) ax = fig.add_axes([0, 0, 1, 1]) ax.axis('off') ax.imshow(word_cloud, interpolation='bilinear') plt.savefig(image_file) plt.close() post.wordcloud = "static/wordcloud/{0}.png".format(post.slug) post.save() except Exception as err: print(err)
def make_wordcloud(entry): """ Makes singular wordcloud for a post. """ text = words_wo_stopwords(text=entry.content) if len(text) > 100: word_cloud = WordCloud(max_font_size=60, background_color="rgba(255, 255, 255, 0)", mode="RGBA").generate(text) fig = plt.figure(frameon=False) fig.patch.set_visible(False) ax = fig.add_axes([0, 0, 1, 1]) ax.axis('off') ax.imshow(word_cloud, interpolation='bilinear') plt.savefig(join(settings.STATIC_ROOT, "wordcloud", "{0}.png".format(entry.slug))) plt.close() entry.wordcloud = "static/wordcloud/{0}.png".format(entry.slug) return entry
def populateCaches(self): try: cur = self.bot.conn_wc.cursor() cur.execute("SELECT msgs FROM "+self.tablename) # hashtag no limits entries = cur.fetchall() arr = [] for i in range(0, len(entries)): arr.append(entries[i][0]) if len(arr) < 1: self.serverCache = self.backupArr else: self.serverCache = arr except Exception as e: print("server cache retrieval error: \n", e) self.serverCache = self.backupArr text = " ".join(self.serverCache) print("generating word cloud") wc = WordCloud(width=1024, height=1024, max_words=200000, stopwords=self.STOPWORDS).generate(text) # take it to the limit wc.to_file(self.serverImage)
def get_result(url_set): line_set = [] for url in url_set: wb_data = requests.get(url,headers = headers) soup = BeautifulSoup(wb_data.text,'lxml') a = soup.select('span.ctt') for i in range(len(a)): text = re.sub('<[^>]*>', '',a[i].text) text = re.sub('??', ' ', text) text = re.sub('[\W]+', ' ', text) line_set.append(text) #print(text) #writer.writerow((i,text)) word_list = [" ".join(jieba.cut(sentence)) for sentence in line_set] new_text = ' '.join(word_list) wordcloud = WordCloud(font_path="C:/Python34/Lib/site-packages/wordcloud/simhei.ttf", background_color="black").generate(new_text) plt.imshow(wordcloud) plt.axis("off") plt.show()
def _get_wordcloud(img, patch, words, word_to_frequency=None, **wordcloud_kwargs): # get the boolean mask corresponding to each patch path = patch.get_path() mask = path.contains_points(img.pixel_coordinates).reshape((img.y_resolution, img.x_resolution)) # make mask matplotlib-venn compatible mask = (~mask * 255).astype(np.uint8) # black indicates mask position mask = np.flipud(mask) # origin is in upper left # create wordcloud wc = WordCloud(mask=mask, background_color=None, mode="RGBA", **wordcloud_kwargs) if not word_to_frequency: text = " ".join(words) wc.generate(text) else: wc.generate_from_frequencies({word: word_to_frequency[word] for word in words}) return wc
def world_cloud(): """ ???? """ counter = {} with open(os.path.join("data", "post_pre_desc_counter.csv"), "r", encoding="utf-8") as f: f_csv = csv.reader(f) for row in f_csv: counter[row[0]] = counter.get(row[0], int(row[1])) pprint(counter) file_path = os.path.join("font", "msyh.ttf") wc = WordCloud(font_path=file_path, max_words=100, height=600, width=1200).generate_from_frequencies(counter) plt.imshow(wc) plt.axis('off') plt.show() wc.to_file(os.path.join("images", "wc.jpg"))
def plot_cloud(text): # mask, max_words = np.array(Image.open(path.join(d, "uno_mask.png"))), 200 mask, max_words = np.array(Image.open(path.join(d, "mav_mask.png"))), 300 stopwords = STOPWORDS.union(common_words) wordcloud = WordCloud(background_color="white", width=2400, height=2400, mask=mask, stopwords=stopwords, max_words=max_words).generate(text)#.recolor(color_func=grey_color_func, random_state=3) # Open a plot of the generated image. plt.imshow(wordcloud, interpolation='bilinear') plt.axis("off") # import IPython; IPython.embed() fig = plt.gcf() fig.set_size_inches(18.5, 10.5) canvas = FigureCanvas(fig) png_output = BytesIO() canvas.print_png(png_output) return png_output.getvalue()
def wcloud(text): mask = np.array(Image.open("face_mask.png")) #choose mask stopwords = set(STOPWORDS) wc = WordCloud(background_color="white", mask=mask, max_words=80, stopwords=stopwords, width=800, height=400, mode="RGB", relative_scaling=0.5, ) text = clean_text(text) wc.generate(text) #save image file_name = raw_input("Enter any name for the Word Cloud image:") +'.png' wc.to_file(file_name) return
def lyrics(): with open('lyrics.json', 'r', encoding='utf-8') as f: data = json.load(f) tokens = list() for v in data.values(): # ??????, ???????? 2 ??, ????? tokens += [seg for seg in jieba.cut(v) if seg.split() and len(seg) > 1] # ?? tokens ????????? counter = Counter(tokens) print(counter.most_common(10)) # ???, ??????????? wcloud = WordCloud(font_path='NotoSansMonoCJKtc-Regular.otf').generate(' '.join(tokens)) plt.imshow(wcloud) plt.axis('off') plt.show()
def generateWordCloud(text): # read the mask / color image # taken from http://jirkavinse.deviantart.com/art/quot-Real-Life-quot-Alice-282261010 d = path.dirname(__file__) cloud_coloring = np.array(Image.open(path.join(d, "us-mask-white.png"))) stopwords = set(STOPWORDS) stopwords.add("said") wc = WordCloud(background_color="black", max_words=2000, mask=cloud_coloring, stopwords=stopwords, max_font_size=40, random_state=42) # generate word cloud wc.generate(text) # create coloring from image image_colors = ImageColorGenerator(cloud_coloring) # show plt.imshow(wc) plt.axis("off") plt.show()
def generateTable(text, n=5): # Start by getting a frequency dictionary d = path.dirname(__file__) cloud_coloring = np.array(Image.open(path.join(d, "us-mask-white.png"))) stopwords = set(STOPWORDS) stopwords.add("said") wc = WordCloud(background_color="black", max_words=2000, mask=cloud_coloring, stopwords=stopwords, max_font_size=40, random_state=42) frequenciesDict = wc.process_text(text) words = frequenciesDict.keys() freq = frequenciesDict.values() frequencies = pd.DataFrame({ 'words' : words, 'frequencies' : freq }) frequencies.sort_values('frequencies', ascending = False, inplace = True) print '\nTop 5 Terms\n' print frequencies.head(n = n).to_string(index = False) print '\n'
def generate_image(): data = [] jieba.analyse.set_stop_words("./stopwords.txt") with codecs.open("weibo1.txt", 'r', encoding="utf-8") as f: for text in f.readlines(): data.extend(jieba.analyse.extract_tags(text, topK=20)) data = " ".join(data) mask_img = imread('./52f90c9a5131c.jpg', flatten=True) wordcloud = WordCloud( font_path='msyh.ttc', background_color='white', mask=mask_img ).generate(data) plt.imshow(wordcloud.recolor(color_func=grey_color_func, random_state=3), interpolation="bilinear") plt.axis('off') plt.savefig('./heart2.jpg', dpi=1600)
def save_cloud(frequencies, output, options={}, color_func=None,canvas_width=0, canvas_height=0): base_options = copy(WORD_CLOUD_DEFAULTS) base_options.update(options) clean_options = { x : base_options[x] for x in base_options if base_options[x] is not None} wordcloud = WordCloud(**clean_options).generate_from_frequencies(frequencies) if(color_func): wordcloud = wordcloud.recolor(color_func=color_func) image = wordcloud.to_image() if clean_options.get("height") != clean_options.get("width") and not canvas_width and not canvas_height: canvas_height = clean_options.get("height") canvas_width = clean_options.get("width") if(canvas_width and canvas_height): final_image = Image.new(image.mode, (canvas_width, canvas_height), clean_options.get("background_color")) offset = ((final_image.size[0] - image.size[0]) / 2, (final_image.size[1] - image.size[1]) / 2) final_image.paste(image, offset) return final_image.save(output) return image.save(output)
def main(): client = pymongo.MongoClient(host='127.0.0.1', port=27017) dbName = client['cnblogs'] table = dbName['articles'] wc = WordCloud( font_path='msyh.ttc', background_color='#ccc', width=600, height=600) if not os.path.exists('wordcloudimgs'): os.mkdir('wordcloudimgs') threads = [] queue = Queue() titleThread = MyThread(getTitle, (queue, table)) imgThread = MyThread(getImg, (queue, wc)) threads.append(imgThread) threads.append(titleThread) for t in threads: t.start() for t in threads: t.join()
def generate_wordcloud(words_list, mask_path): text = ' '.join(words_list) # print text mask = np.array(Image.open(mask_path)) # stopwords = set(STOPWORDS) # stopwords.add(u'') wc = WordCloud(font_path = 'data/SourceHanSerifCN-Regular.otf', background_color = 'white', max_words = 2000, mask = mask) # ?????????? wc.generate(text) # wc.to_file('data/path/to/file') plt.imshow(wc, interpolation='bilinear') plt.axis("off") plt.show()
def generate_ciyun_pic(): import matplotlib.pyplot as plt from wordcloud import WordCloud import jieba from cv2 import imread text_from_file_with_apath = open('./{}lyric.txt'.format(singer), 'r').read().replace('??', '').replace('??', '') wordlist_after_jieba = jieba.cut(text_from_file_with_apath, cut_all = True) wl_space_split = " ".join(wordlist_after_jieba) mask_img = imread('./mask.jpg')# , flatten=True) my_wordcloud = WordCloud( font_path='msyh.ttc', background_color='white', mask=mask_img ).generate(wl_space_split) plt.imshow(my_wordcloud) plt.axis("off") plt.show()
def title_word_cloud(): """ ??????? """ text = '' wc = WordCloud(background_color='white', # ?????? stopwords=STOPWORDS, max_words=1000, # ????????? font_path='C:/Python27/Lib/site-packages/matplotlib/mpl-data/fonts/ttf/simhei.ttf', # ????????????????? max_font_size=50, # ??????? random_state=30, # ?????????????????????? ) with open('rent_ave.csv') as csvfile: reader = [each for each in csv.DictReader(csvfile)] for row in reader: text += row[u'title'] + ' ' print jieba_clear_text(text) wc.generate(jieba_clear_text(text)) plt.imshow(wc) plt.axis('off') plt.show()
def save_cloud(frequencies, output, options={}, color_func=None,canvas_width=0, canvas_height=0): base_options = copy(WORD_CLOUD_DEFAULTS) base_options.update(options) clean_options = { x : base_options[x] for x in base_options if base_options[x] is not None} wordcloud = WordCloud(**clean_options).generate_from_frequencies(frequencies) if(color_func): wordcloud = wordcloud.recolor(color_func=color_func) image = wordcloud.to_image() if clean_options.get("height") != clean_options.get("width") and not canvas_width and not canvas_height: canvas_height = clean_options.get("height") canvas_width = clean_options.get("width") if(canvas_width and canvas_height): final_image = Image.new(image.mode, (canvas_width, canvas_height), clean_options.get("background_color")) offset = (int((final_image.size[0] - image.size[0]) / 2), int((final_image.size[1] - image.size[1]) / 2)) final_image.paste(image, offset) return final_image.save(output) return image.save(output)
def drawWordCloud(word_text, filename): mask = imread('hello.jpg') my_wordcloud = WordCloud( background_color='white', # ?????? mask=mask, # ?????? max_words=2000, # ????????? stopwords=STOPWORDS, # ????? font_path='/System/Library/Fonts/Hiragino Sans GB W6.ttc', # ????????????????? max_font_size=50, # ??????? random_state=30, # ?????????????????????? scale=1 ).generate(word_text) image_colors = ImageColorGenerator(mask) my_wordcloud.recolor(color_func=image_colors) # ???????? plt.imshow(my_wordcloud) plt.axis("off") plt.show() # ???? my_wordcloud.to_file(filename=filename) print()
def drawWordCloud(word_text, filename): mask = imread('bike.jpg') my_wordcloud = WordCloud( background_color='white', # ?????? mask=mask, # ?????? max_words=2000, # ????????? stopwords=STOPWORDS, # ????? font_path='/System/Library/Fonts/Hiragino Sans GB W6.ttc', # ????????????????? max_font_size=50, # ??????? random_state=30, # ?????????????????????? scale=1.3 ).generate(word_text) image_colors = ImageColorGenerator(mask) my_wordcloud.recolor(color_func=image_colors) # ???????? plt.imshow(my_wordcloud) plt.axis("off") plt.show() # ???? my_wordcloud.to_file(filename=filename) print()
def analyze(content): # ????? ???? content ? string ??? ???? # ????? ??? nouns ?? ??? ?? nouns=t.nouns(str(content)) # ????? ?? trash=["??","????","??","??","??","??","?????"] for i in trash: for j in nouns: if i==j: nouns.remove(i) ko=nltk.Text(nouns,name="??") #ranking??? ??? ????? ?? ranking=ko.vocab().most_common(100) tmpData=dict(ranking) # ?????? ?? wordcloud=WordCloud(font_path="/Library/Fonts/AppleGothic.ttf",relative_scaling=0.2,background_color="white",).generate_from_frequencies(tmpData) #matplotlib ?????? ?? ??????? ??? ???? ??? plt.figure(figsize=(16,8)) plt.imshow(wordcloud) plt.axis("off") plt.show() # ??? ??(??? ????? ???? ???? ? ?????? ??? ??)
def wordcloud_visualization(corpus, topics, num_docs=None, min_df=0.1, ngrams=1, weighting='tf', max_df=0.7, mds='pcoa', *args, **kwargs): font = pkg_resources.resource_filename(__name__, "fonts/ZillaSlab-Medium.ttf") print(font) model, doc_term_matrix, vectorizer = build_model( corpus, topics, num_docs, ngrams, weighting, min_df, max_df ) prep_data = prepare(model.model, doc_term_matrix, vectorizer, mds=mds) ti = prep_data.topic_info topic_labels = ti.groupby(['Category']).groups.keys() plt.clf() topics = [] for label in topic_labels: out = StringIO() df = ti[ti.Category == label].sort_values(by='Total', ascending=False)[:20] tf = dict(df[['Term', 'Total']].to_dict('split')['data']) wc = wordcloud.WordCloud(font_path=font, width=600, height=300, background_color='white') wc.fit_words(tf) plt.imshow(wc) plt.axis('off') plt.savefig(out) out.seek(0) topics.append((label, out.read())) return topics """ Category Freq Term Total loglift logprob term 478 Default 738.000000 specie 738.000000 1.0000 1.0000 ... ... ... ... ... ... ... 191 Topic10 25.344278 space 145.983738 1.8935 -5.0376 190 Topic10 32.076070 green 193.201661 1.8488 -4.8020 319 Topic10 12.129367 aspect 73.063725 1.8488 -5.7745 """
def plot_topic(self, topic_idx): ''' Function to plot a wordcloud based on a topic INPUT: topic_idx: index of topic from NMF clustering ''' title = raw_input('Enter a title for this plot: ') num_reviews = self.labels[:, topic_idx].sum() word_freq = self.topic_word_frequency(topic_idx) wc = WordCloud(width=2000, height=1000, max_words=150, background_color='white') wc.fit_words(word_freq) fig = plt.figure(figsize=(16, 8)) ax = fig.add_subplot(111) ax.set_title('Topic {}: {}\nNumber of Reviews in Topic: {}'.format( topic_idx, title, num_reviews), fontsize=24) ax.axis('off') ax.imshow(wc) name = 'topic_' + str(topic_idx) + '.png' if self.pro_or_con == 'pro': img_path = os.path.join('images', 'positive') else: img_path = os.path.join('images', 'negative') plt.savefig(os.path.join(img_path, name)) plt.show()
def get_wc(word_dic,fontname,savename,photoname): '''??4?????????????????????????''' colors = imread(photoname) wc = WordCloud(background_color='white', mask=colors, font_path=fontname, max_font_size=150) wc.generate_from_frequencies(word_dic) plt.imshow(wc) plt.axis('off') wc.to_file(savename) print('get the photo {} !'.format(savename))
def tag_cloud(link=22656, lim_num_tags=200, image_dims=(400, 200), out_filepath="TagCloud.png"): """ Generate tag cloud and save it as an image. Parameters ---------- link : same as used for the function taginfo. num_tags : same as used for the function taginfo. image_dims : tuple of two elements. Image dimensions of the tag cloud image to be saved. out_filepath : string Output image filepath. Output ------ None """ W, H = image_dims # Wordcloud image size (width, height) font_path = "fonts/ShortStack-Regular.ttf" # Font path info = taginfo(link=link, lim_num_tags=lim_num_tags) if info is None: print("Error : No webpage found!") else: if len(info) == 0: print("Error : No tags found!") else: # Successfully extracted tag info WC = WordCloud(font_path=font_path, width=W, height=H, max_words=len(info)).generate_from_frequencies(info) WC.to_image().save(out_filepath) print("Tag Cloud Saved as " + out_filepath)
def set_wordcloud_image(words): if words: # WordCloud Option wc = WordCloud( background_color=settings.WORDCLOUD_BACKGROUND_COLOR, width=settings.WORDCLOUD_WIDTH, height=settings.WORDCLOUD_HEIGHT, max_words=settings.WORDCLOUD_MAX_WORDS, max_font_size=settings.WORDCLOUD_MAX_FRONT_SIZE, scale=settings.WORDCLOUD_SCALE, ) wordcloud_img = wc.generate(words).to_image() return wordcloud_img return None
def word_cloud(f): wordcloud = WordCloud().generate_from_frequencies(f) # Open a plot of the generated image. plt.imshow(wordcloud) plt.axis("off") plt.savefig('out/word_cloud.png', dpi=300, format='png')
def generate_wordcloud(): from wordcloud import WordCloud wordcloud = WordCloud(background_color="white") from operator import itemgetter item1 = itemgetter(1) frequencies = sorted(label_counts.items(), key=item1, reverse=True) wordcloud.generate_from_frequencies(frequencies) # save image import matplotlib.pyplot as plt plt.imshow(wordcloud) plt.axis("off") plt.savefig('photo_tags')
def main(): parser = argparse.ArgumentParser() parser.add_argument('--url', metavar='URL', default=None, help='input the url') parser.add_argument('--output', metavar='OUTPUT', default='./wordcloud.jpg', help='input the output_file') parser.add_argument('--input', metavar='INPUT_FIEL', default=None, help='input the input_file') parser.add_argument('--model', metavar='INPUT_IMAGE_MODEL', default=None, help='input the input_image_model') parser.add_argument('--ttf', metavar='INPUT_TTF', default='./font/simhei.ttf', help='input the typeface') parser.add_argument('--width', metavar='INPUT_WIDTH', default=1800, type=int, help='input the image width') parser.add_argument('--height', metavar='INPUT_HEIGHT', default=1000, type=int, help='input the image height') parser.add_argument('--bg', metavar='INPUT_BACKGROUND_COLOR', default='black', help='input the image background_color') parser.add_argument('--margin', metavar='INPUT_MARGIN', default=5, type=int, help='input the image margin') parser.add_argument('--max_font_size', metavar='INPUT_max_font_size', default=60, type=int, help='input the max_font_size') args = parser.parse_args() url = args.url output_file = args.output input_file = args.input model_path = args.model typeface = args.ttf max_font_size=args.max_font_size width = args.width height = args.height background_color = args.bg margin = args.margin try: image_mask = np.array(PIL.Image.open(model_path)) except: image_mask=None wordcloud = WordCloud(font_path=typeface, mask=image_mask, max_font_size=max_font_size, background_color=background_color, margin=margin, width=width, height=height) try: txt_join = get_txt(input_file) wordcloud_ = wordcloud.generate(txt_join) except: html_text = get_html_text(url) wordcloud_ = wordcloud.generate(html_text) image = wordcloud_.to_image() image.save(output_file)
def show(self): wordcloud = WordCloud( font_path=u'./static/simheittf/simhei.ttf', background_color="black", max_words=40, margin=5, width=1000, height=800) wordcloud = wordcloud.generate(self.seg_text) plt.figure() plt.imshow(wordcloud) plt.axis("off") plt.show()
def save_wordcloud_image(frequencies, filename): wordcloud = WordCloud(width=1024, height=786, min_font_size=1).fit_words(frequencies) fig = plt.figure() fig.set_figwidth(12) fig.set_figheight(16) plt.imshow(wordcloud) plt.axis("off") plt.savefig(filename, facecolor='k', bbox_inches='tight') print('imaged created')
def generate_image(words, image): graph = np.array(image) wc = WordCloud(font_path=os.path.join(CUR_DIR, 'fonts/simhei.ttf'), background_color='white', max_words=MAX_WORDS, mask=graph) wc.generate_from_frequencies(words) image_color = ImageColorGenerator(graph) return wc, image_color
def wordcloudplot(txt,name): path = 'msyh.ttf' path = unicode(path, 'utf8').encode('gb18030') alice_mask = np.array(PIL.Image.open('jay.jpg')) wordcloud = WordCloud(font_path=path, background_color="white", margin=5, width=1800, height=800, mask=alice_mask, max_words=2000, max_font_size=60, random_state=42) wordcloud = wordcloud.generate(txt) wordcloud.to_file('../songs/'+name+'/'+name+'.jpg') plt.imshow(wordcloud) plt.axis("off") plt.show()
def get_wordcloud(file_path): with open(file_path, 'r') as f: text = f.read() wordcloud = WordCloud(max_font_size=200, min_font_size=25, prefer_horizontal=1, background_color='white', margin=0, relative_scaling=0.5, colormap='copper', collocations=False, width=1600, height=800).generate(text) plt.figure() plt.imshow(wordcloud, interpolation="bilinear") plt.axis("off") plt.show()
def createImage(self, arr, saveName): text = " ".join(arr) savedir = path.join(self.d,self.e, saveName) # local image gets overwritten each time. will this break if too many requests? wc = WordCloud(max_words=20000, stopwords=self.STOPWORDS).generate(text) wc.to_file(savedir) return savedir # idk how it subscribes to the event... but it works!
def generate(self, title, text): wordcloud = WordCloud(max_font_size=40).generate(text) plt.figure() plt.imshow(wordcloud, interpolation='bilinear') plt.axis("off") # plt.show() filename = title + '.png' plt.savefig(filename, bbox_inches='tight')
def draw_wordcloud(file_name): with codecs.open(file_name,encoding='utf-8') as f: comment_text=f.read() color_mask=imread('template.png') #?????? stopwords = ['png','douban','com','href','https','img','img3','class','source','icon','shire',u'??',u'??',u'??',u'??',u'??',u'??', u'??', u'??', u'??', u'??', u'??', u'??', u'??', u'??', u'??', u'??', u'??', u'??', u'??', u'??', u'??', u'??', u'??'] font = r'C:\Windows\Fonts\simfang.ttf' cloud=WordCloud(font_path=font,background_color='white',max_words=20000,max_font_size=200,min_font_size=4,mask=color_mask,stopwords=stopwords) word_cloud=cloud.generate(comment_text) #???? word_cloud.to_file('pjl_cloud.jpg')
def draw_wordcloud(file_name): with codecs.open(file_name,encoding='utf-8') as f: comment_text=f.read() color_mask=imread('template.png') #?????? stopwords = [u'??', u'??', u'??', u'??', u'??', u'??', u'??', u'??', u'??', u'??', u'??', u'??', u'??', u'??', u'??', u'??', u'??', u'??'] font = r'C:\Windows\Fonts\simfang.ttf' cloud=WordCloud(font_path=font,background_color='white',max_words=20000,max_font_size=200,min_font_size=4,mask=color_mask,stopwords=stopwords) word_cloud=cloud.generate(comment_text) #???? word_cloud.to_file('pjl_cloud.jpg')
def show(self): # wordcloud = WordCloud(max_font_size=40, relative_scaling=.5) wordcloud = WordCloud(font_path=u'./static/simheittf/simhei.ttf', background_color="black", margin=5, width=1800, height=800) wordcloud = wordcloud.generate(self.seg_text) plt.figure() plt.imshow(wordcloud) plt.axis("off") plt.show()
def get_wordclud(file_set): line_set = [] for j in range(10): reader=csv.reader(open(file_set[j], 'r')) for line in reader: line_set.append(line[1]) word_list = [" ".join(jieba.cut(sentence)) for sentence in line_set] new_text = ' '.join(word_list) wordcloud = WordCloud(font_path="C:/Python34/Lib/site-packages/wordcloud/simhei.ttf", background_color="black").generate(new_text) plt.imshow(wordcloud) plt.axis("off") plt.show()
def gen_wordcloud(): then = common_functions.queryrange(1) body = '''{ "size" : 10000, "query": { "constant_score": { "filter": { "range": { "epoch": { "from": '''+then+''' } } } } } }''' text = common_functions.pull_mailtext_24hrs(es, es_collection_name, body, keywords_list).lower() print text print wc = WordCloud(background_color="white", max_words=40) fileloc = "/home/pierre/es_email_intel/wordcloud.png" try: wc.generate(text) wc.to_file(fileloc) print 'Finished!' return except: target = open(fileloc, 'w') target.truncate() target.close() print 'Except!' return
def drawPic(text,Pic): #img=imread(Pic,flatten=True) w=WordCloud(font_path="C:/Windows/Fonts/simhei.ttf",background_color='white').generate(text) plt.imshow(w) plt.axis("off") plt.savefig("F:/EduSpider/edubug.jpg",dpi=600)
def get_word_to_cloud(self): for file in self.file_list: with codecs.open('../spider/' + file, "r",encoding='utf-8', errors='ignore') as string: #?????????????????? string = string.read().upper() #??????????? res = jieba.cut(string, HMM=False) reslist = list(res) wordDict = {} #??????????? for i in reslist: if i not in self.dic_list: continue if i in wordDict: wordDict[i]=wordDict[i]+1 else: wordDict[i] = 1 #??????? coloring = imread('test.jpeg') #??????????????? wc = WordCloud(font_path='msyh.ttc',mask=coloring, background_color="white", max_words=50, max_font_size=40, random_state=42) wc.generate_from_frequencies(wordDict) #???? wc.to_file("%s.png"%(file)) #???????
def __init__(self, fontPath): self.client = MongoClient() self.coll = self.client[dbName][collName] self.fontPath = fontPath self.wordCloud = WordCloud(font_path=self.fontPath, width=400, height=400, max_words=100) if not os.path.exists(self.imgDir): os.mkdir(self.imgDir) logging.info('GroupTagCloud connected to MongoDB.')
def generate_img(data): mask_img = imread('./heart-mask.jpg') wordcloud = WordCloud( font_path='/Library/Fonts/Songti.ttc', background_color='white', mask=mask_img ).generate(data) plt.imshow(wordcloud) plt.axis('off') # plt.show() plt.savefig('./heart.jpg', dpi=600)