def __init__(self, *a, **kw): super(Movie, self).__init__(*a, **kw) self.log_dir = 'log/%s' % self.name self.sql = SqlHelper() self.headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'en-US,en;q=0.5', 'Connection': 'keep-alive', 'Host': 'movie.douban.com', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:51.0) Gecko/20100101 Firefox/51.0', } utils.make_dir(self.log_dir) self.init()
def __init__(self, *a, **kw): super(Movieurls, self).__init__(*a, **kw) self.log_dir = 'log/%s' % self.name utils.make_dir(self.log_dir) self.sql = SqlHelper() self.headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'en-US,en;q=0.5', 'Connection': 'keep-alive', 'Host': 'movie.douban.com', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:51.0) Gecko/20100101 Firefox/51.0', } self.init()
def __init__(self, *a, **kw): super(Bookurls, self).__init__(*a, **kw) self.log_dir = 'log/%s' % self.name utils.make_dir(self.log_dir) self.sql = SqlHelper() self.headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'en-US,en;q=0.5', 'Connection': 'keep-alive', 'Host': 'book.douban.com', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:51.0) Gecko/20100101 Firefox/51.0', } self.init()
def __init__(self, *a, **kw): super(Book, self).__init__(*a, **kw) self.log_dir = 'log/%s' % self.name self.sql = SqlHelper() self.headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'en-US,en;q=0.5', 'Connection': 'keep-alive', 'Host': 'book.douban.com', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:51.0) Gecko/20100101 Firefox/51.0', } utils.make_dir(self.log_dir) self.init()
def __init__(self, name = None, **kwargs): super(JDCommentSpider, self).__init__(name, **kwargs) self.url = kwargs.get("url") self.guid = kwargs.get('guid', 'guid') self.product_id = kwargs.get('product_id') # self.url = 'https://item.jd.com/11478178241.html' # self.url = 'https://item.jd.com/4142680.html' # self.url = 'https://item.jd.com/3133859.html' # self.url = 'https://item.jd.com/3995645.html' # self.product_id = 3995645 self.log('product_id:%s' % self.product_id) self.item_table = 'item_%s' % self.product_id self.urls_key = '%s_urls' % self.product_id self.log_dir = 'log/%s' % self.product_id self.is_record_page = False self.sql = kwargs.get('sql') self.red = kwargs.get('red') proxymng.red = self.red if self.is_record_page: utils.make_dir(self.log_dir) self.init()
def __init__(self, name = None, **kwargs): super(JDItemInfoSpider, self).__init__(name, **kwargs) self.url = kwargs.get("url") self.guid = kwargs.get('guid', 'guid') self.product_id = kwargs.get('product_id') # self.url = 'https://item.jd.com/11478178241.html' # self.url = 'https://item.jd.com/4142680.html' # self.url = 'https://item.jd.com/3133859.html' # self.url = 'https://item.jd.com/3995645.html' # self.product_id = 3995645 self.log('product_id:%s' % self.product_id) self.item_table = 'item_%s' % self.product_id self.urls_key = '%s_urls' % self.product_id self.log_dir = 'log/%s' % self.product_id self.is_record_page = False self.sql = kwargs.get('sql') self.red = kwargs.get('red') if self.is_record_page: utils.make_dir(self.log_dir)
def init(self): self.meta = { 'download_timeout': self.timeout, } self.dir_log = 'log/proxy/%s' % self.name utils.make_dir(self.dir_log) self.sql.init_proxy_table(config.free_ipproxy_table)
def init(self): self.dir_log = 'log/validator/%s' % self.name utils.make_dir(self.dir_log) self.sql.init_proxy_table(self.name)
def __init__(self, name = None, **kwargs): super(JDSpider, self).__init__(name, **kwargs) self.product_id = kwargs.get('product_id', -1) self.log('product_id:%s' % self.product_id) self.item_table = 'item_%s' % self.product_id self.product_page = '%s_page' % self.product_id self.log_dir = 'log/%s' % self.product_id self.is_record_page = False if self.is_record_page: utils.make_dir(self.log_dir) self.sql = SqlHelper() self.red = redis.StrictRedis(host = config.redis_host, port = config.redis_part, db = config.redis_db, password = config.redis_pass)
def make_dir(fol): if not os.path.isdir(fol): os.makedirs(fol) return fol
def call_script(script_fname, args, log_name=''): if args.blender_fol == '': args.blender_fol = get_blender_dir() if not op.isdir(args.blender_fol): print('No Blender folder!') return logs_fol = utils.make_dir(op.join(utils.get_parent_fol(__file__, 4), 'logs')) if log_name == '': log_name = utils.namebase(script_fname) if len(args.subjects) == 0: args.subjects = [args.subject] for subject in args.subjects: args.subject = subject args.subjects = '' print('*********** {} ***********'.format(subject)) call_args = create_call_args(args) blend_fname = get_subject_fname(args) log_fname = op.join(logs_fol, '{}.log'.format(log_name)) cmd = '{blender_exe} {blend_fname} --background --python {script_fname} {call_args}'.format( # > {log_fname} blender_exe=op.join(args.blender_fol, 'blender'), blend_fname = blend_fname, script_fname = script_fname, call_args=call_args, log_fname = log_fname) mmvt_addon_fol = utils.get_parent_fol(__file__, 2) os.chdir(mmvt_addon_fol) print(cmd) utils.run_script(cmd) print('Finish! For more details look in {}'.format(log_fname))
def get_figures_dir(args): figures_dir = op.join(get_mmvt_dir(), args.subject, 'figures') make_dir(figures_dir) return figures_dir
def build_vocab(words, vocab_size): """ Build vocabulary of VOCAB_SIZE most frequent words """ dictionary = dict() count = [('UNK', -1)] count.extend(Counter(words).most_common(vocab_size - 1)) index = 0 utils.make_dir('processed') with open('processed/vocab_1000.tsv', "w") as f: for word, _ in count: dictionary[word] = index if index < 1000: f.write(word + "\n") index += 1 index_dictionary = dict(zip(dictionary.values(), dictionary.keys())) return dictionary, index_dictionary
def __init__(self, *a, **kwargs): super(AssetStoreSpider, self).__init__(*a, **kwargs) # ????????? self.dir_plugins = 'Plugins/' self.dir_all = self.dir_plugins + 'all' utils.make_dir(self.dir_plugins) utils.make_dir(self.dir_all) # ????????? self.plugin_list = [] self.sql = SqlHelper() self.table_name = config.assetstore_table_name self.priority_adjust = 2 # unity ??? self.unity_version = '' # ?? header self.headers = { 'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3', 'Connection': 'keep-alive', 'Host': 'www.assetstore.unity3d.com', 'Referer': 'https://www.assetstore.unity3d.com/en/', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:50.0) Gecko/20100101 Firefox/50.0', 'X-Kharma-Version': self.unity_version, 'X-Requested-With': 'UnityAssetStore', 'X-Unity-Session': '26c4202eb475d02864b40827dfff11a14657aa41', } self.init()
def train_model(model, batch_gen, num_train_steps, weights_fld): saver = tf.train.Saver() # defaults to saving all variables - in this case embed_matrix, nce_weight, nce_bias initial_step = 0 utils.make_dir('checkpoints') with tf.Session() as sess: sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(os.path.dirname('checkpoints/checkpoint')) # if that checkpoint exists, restore from checkpoint if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) total_loss = 0.0 # we use this to calculate late average loss in the last SKIP_STEP steps writer = tf.summary.FileWriter('improved_graph/lr' + str(LEARNING_RATE), sess.graph) initial_step = model.global_step.eval() for index in range(initial_step, initial_step + num_train_steps): centers, targets = next(batch_gen) feed_dict={model.center_words: centers, model.target_words: targets} loss_batch, _, summary = sess.run([model.loss, model.optimizer, model.summary_op], feed_dict=feed_dict) writer.add_summary(summary, global_step=index) total_loss += loss_batch if (index + 1) % SKIP_STEP == 0: print('Average loss at step {}: {:5.1f}'.format(index, total_loss / SKIP_STEP)) total_loss = 0.0 saver.save(sess, 'checkpoints/skip-gram', index) #################### # code to visualize the embeddings. uncomment the below to visualize embeddings # run "'tensorboard --logdir='processed'" to see the embeddings # final_embed_matrix = sess.run(model.embed_matrix) # # it has to variable. constants don't work here. you can't reuse model.embed_matrix # embedding_var = tf.Variable(final_embed_matrix[:1000], name='embedding') # sess.run(embedding_var.initializer) # config = projector.ProjectorConfig() # summary_writer = tf.summary.FileWriter('processed') # # add embedding to the config file # embedding = config.embeddings.add() # embedding.tensor_name = embedding_var.name # # link this tensor to its metadata file, in this case the first 500 words of vocab # embedding.metadata_path = 'processed/vocab_1000.tsv' # # saves a configuration file that TensorBoard will read during startup. # projector.visualize_embeddings(summary_writer, config) # saver_embed = tf.train.Saver([embedding_var]) # saver_embed.save(sess, 'processed/model3.ckpt', 1)
def get_categories(self, response): self.write_file(self.dir_plugins + 'categories.json', response.body) # ????? json ?? categories = json.loads(response.body) for category in categories.get('categories'): name = category.get('name', '') subs = category.get('subs', '') dir_name = self.dir_plugins + name utils.make_dir(dir_name) if subs is not '': self.get_all_subs(subs, dir_name) else: # ???? name = category.get('name', '') count = category.get('count', 0) id = category.get('id', 0) child_subs = category.get('subs', '') plugin = {} plugin['name'] = name plugin['count'] = count plugin['id'] = id plugin['dir_name'] = dir_name if child_subs == '': plugin['child'] = 'yes' else: plugin['child'] = 'no' self.plugin_list.append(plugin) for plugin in self.plugin_list: id = plugin.get('id', '') count = plugin.get('count') dir_name = plugin.get('dir_name') name = plugin.get('name') yield Request( url = 'https://www.assetstore.unity3d.com/api/en-US/search/results.json?q=' + 'category:' + id + \ '&rows=' + count + '&page=' + str(1) + '&order_by=popularity' + '&engine=solr', method = 'GET', dont_filter = True, headers = self.headers, meta = { 'dir_name': dir_name, 'name': name, 'id': id, 'download_timeout': 60, 'is_proxy': False, }, callback = self.get_plugin_list, errback = self.error_parse, ) # ??????? unity ?? # ????????????
def train_model(model, batch_gen, num_train_steps, weights_fld): saver = tf.train.Saver( ) # defaults to saving all variables - in this case embed_matrix, nce_weight, nce_bias initial_step = 0 utils.make_dir('checkpoints') with tf.Session() as sess: sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state( os.path.dirname('checkpoints/checkpoint')) # if that checkpoint exists, restore from checkpoint if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) total_loss = 0.0 # we use this to calculate late average loss in the last SKIP_STEP steps writer = tf.summary.FileWriter( 'improved_graph/lr' + str(LEARNING_RATE), sess.graph) initial_step = model.global_step.eval() for index in range(initial_step, initial_step + num_train_steps): centers, targets = next(batch_gen) feed_dict = { model.center_words: centers, model.target_words: targets } loss_batch, _, summary = sess.run( [model.loss, model.optimizer, model.summary_op], feed_dict=feed_dict) writer.add_summary(summary, global_step=index) total_loss += loss_batch if (index + 1) % SKIP_STEP == 0: print('Average loss at step {}: {:5.1f}'.format( index, total_loss / SKIP_STEP)) total_loss = 0.0 saver.save(sess, 'checkpoints/skip-gram', index) #################### # code to visualize the embeddings. uncomment the below to visualize embeddings # run "'tensorboard --logdir='processed'" to see the embeddings final_embed_matrix = sess.run(model.embed_matrix) # # it has to variable. constants don't work here. you can't reuse model.embed_matrix embedding_var = tf.Variable( final_embed_matrix[:1000], name='embedding') sess.run(embedding_var.initializer) config = projector.ProjectorConfig() summary_writer = tf.summary.FileWriter('processed') # # add embedding to the config file embedding = config.embeddings.add() embedding.tensor_name = embedding_var.name # # link this tensor to its metadata file, in this case the first 500 words of vocab embedding.metadata_path = 'processed/vocab_1000.tsv' # # saves a configuration file that TensorBoard will read during startup. projector.visualize_embeddings(summary_writer, config) saver_embed = tf.train.Saver([embedding_var]) saver_embed.save(sess, 'processed/model3.ckpt', 1)