我们从Python开源项目中,提取了以下6个代码示例,用于说明如何使用json.html()。
def get_random_dois(n): url = u"http://api.crossref.org/works?filter=from-pub-date:2006-01-01&sample={}".format(n) r = requests.get(url) items = r.json()["message"]["items"] dois = [item["DOI"] for item in items] print dois # from https://github.com/elastic/elasticsearch-py/issues/374 # to work around unicode problem # import elasticsearch # class JSONSerializerPython2(elasticsearch.serializer.JSONSerializer): # """Override elasticsearch library serializer to ensure it encodes utf characters during json dump. # See original at: https://github.com/elastic/elasticsearch-py/blob/master/elasticsearch/serializer.py#L42 # A description of how ensure_ascii encodes unicode characters to ensure they can be sent across the wire # as ascii can be found here: https://docs.python.org/2/library/json.html#basic-usage # """ # def dumps(self, data): # # don't serialize strings # if isinstance(data, elasticsearch.compat.string_types): # return data # try: # return json.dumps(data, default=self.default, ensure_ascii=True) # except (ValueError, TypeError) as e: # raise elasticsearch.exceptions.SerializationError(data, e)
def get_tree(page): page = page.replace(" ", " ") # otherwise starts-with for lxml doesn't work try: tree = html.fromstring(page) except (etree.XMLSyntaxError, etree.ParserError) as e: print u"not parsing, beause etree error in get_tree: {}".format(e) tree = None return tree
def elasticsearch_pages(context, sort, page): result_limit = int(os.environ['RESULT_LIMIT']) max_result_limit = int(os.environ['MAX_RESULT_LIMIT']) start = (page - 1) * result_limit end = start + result_limit domain_query = Q("term", is_banned=False) if context["is_up"]: domain_query = domain_query & Q("term", is_up=True) if not context["show_fh_default"]: domain_query = domain_query & Q("term", is_crap=False) if not context["show_subdomains"]: domain_query = domain_query & Q("term", is_subdomain=False) if context["rep"] == "genuine": domain_query = domain_query & Q("term", is_genuine=True) if context["rep"] == "fake": domain_query = domain_query & Q("term", is_fake=True) limit = max_result_limit if context["more"] else result_limit has_parent_query = Q("has_parent", type="domain", query=domain_query) if context['phrase']: query = Search().filter(has_parent_query).query(Q("match_phrase", body_stripped=context['search'])) else: query = Search().filter(has_parent_query).query(Q("match", body_stripped=context['search'])) query = query.highlight_options(order='score', encoder='html').highlight('body_stripped')[start:end] query = query.source(['title','domain_id','created_at', 'visited_at']).params(request_cache=True) if context["sort"] == "onion": query = query.sort("_parent") elif context["sort"] == "visited_at": query = query.sort("-visited_at") elif context["sort"] == "created_at": query = query.sort("-created_at") elif context["sort"] == "last_seen": query = query.sort("-visited_at") return query.execute()
def main(): # load files # TODO: json loading is different every time, use object_pairs_hook? # https://docs.python.org/3/library/json.html#json.load with open('../slack-data/users.json', 'r', encoding='utf-8') as users_json: users = json.load(users_json) with open('../slack-data/channels.json', 'r', encoding='utf-8') as channels_json: channels = json.load(channels_json) with open('../slack-data/privateChannels.json', 'r', encoding='utf-8') as private_channels_json: private_channels = json.load(private_channels_json) # merge channels with private channels channels = channels + private_channels # merge from "per-channel" to "per-user" messages collection users_messages = flatten_messages(channels) # remove users with not enough messages as over-sampling their messages can lead to overfitting users_messages = discard_insufficient_data_users(users_messages, users) # stem words in messages users_messages = stem_messages(users_messages) # make all remained users have equal number of messages users_messages = balance_messages(users_messages) messages_output = [] authors_output = [] for user_id, messages in users_messages.items(): for message in messages: authors_output.append(user_index_by_id(user_id, users)) messages_output.append(message) pickle.dump(messages_output, open('messages.pkl', 'wb')) pickle.dump(authors_output, open('authors.pkl', 'wb')) print('Saved a total of ' + str(len(messages_output)) + ' processed messages')
def json_config(jfile, jobj_hook=None, jwrite_obj=None, jappend=None): """ Simple interface to json library functions. Reads JSON data into object dictionary or appends json data to existing file. See the json library documentation for more info. `json <https://docs.python.org/3/library/json.html>`_ Parameters ---------- jfile : str json file path. jobj_hook : function (default: None) Decoder. If None, decodes to dict. jwrite_obj : obj (default: None) Obj to write to existing json file ``jfile``. Evaluated before ``jappend``. jappend : obj (default: None) New data to append to existing json file ``jfile``. """ # write if file does not exist. if jwrite_obj is not None: # Write `jwrite_obj` if file does not exist. if not any([os.path.isfile(jfile), os.path.isfile(os.path.abspath(jfile)), jwrite_obj]): print('writing `jwrite_obj` to new json `jfile`.') with open(jfile, 'w') as f: json.dump(jwrite_obj, f, sort_keys=True, ensure_ascii=False) else: print('No json in path provided.') return if jappend is not None: with open(jfile, 'r+') as f: json_dict = json.load(f, object_hook=None) json_dict.update(jappend) f.seek(0) f.truncate() # todo: Improve to only truncate if needed. # print(len(f.readlines())) json.dump(json_dict, f, sort_keys=True, indent=4) f.close() return with open(jfile) as f: if jobj_hook is not None: return json.load(f, object_hook=jobj_hook) return json.load(f)
def to_cjson(self, buf=None, **kwargs): """Write a cjson file or return dictionary. The cjson format is specified `here <https://github.com/OpenChemistry/chemicaljson>`_. Args: buf (str): If it is a filepath, the data is written to filepath. If it is None, a dictionary with the cjson information is returned. kwargs: The keyword arguments are passed into the ``dump`` function of the `json library <https://docs.python.org/3/library/json.html>`_. Returns: dict: """ cjson_dict = {'chemical json': 0} cjson_dict['atoms'] = {} atomic_number = constants.elements['atomic_number'].to_dict() cjson_dict['atoms'] = {'elements': {}} cjson_dict['atoms']['elements']['number'] = [ int(atomic_number[x]) for x in self['atom']] cjson_dict['atoms']['coords'] = {} coords = self.loc[:, ['x', 'y', 'z']].values.reshape(len(self) * 3) cjson_dict['atoms']['coords']['3d'] = [float(x) for x in coords] bonds = [] bond_dict = self.get_bonds() for i in bond_dict: for b in bond_dict[i]: bonds += [int(i), int(b)] bond_dict[b].remove(i) cjson_dict['bonds'] = {'connections': {}} cjson_dict['bonds']['connections']['index'] = bonds if buf is not None: with open(buf, mode='w') as f: f.write(json.dumps(cjson_dict, **kwargs)) else: return cjson_dict