我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用rdflib.Graph()。
def get_do_metadata(): # from the do owl file, get do labels, descriptions g = Graph() g.parse(DO_OWL_PATH) disease_ontology = Literal('disease_ontology', datatype=URIRef('http://www.w3.org/2001/XMLSchema#string')) query = """ SELECT * WHERE { ?id oboInOwl:hasOBONamespace ?disease_ontology . ?id rdfs:label ?label . OPTIONAL {?id obo:IAO_0000115 ?descr} FILTER NOT EXISTS {?id owl:deprecated ?dep} } """ rows = g.query(query, initBindings={'disease_ontology': disease_ontology}) res = [{str(k): str(v) for k, v in binding.items()} for binding in rows.bindings] df = pd.DataFrame(res) df.drop_duplicates(subset=['id'], inplace=True) df.fillna("", inplace=True) do = df.to_dict("records") do = {purl_to_curie(x['id']): x for x in do} return do
def test_graph_parse(self): # collect graphs graphs = [] # loop through Content-Types, save parsed graphs content_types = [ 'application/ld+json', 'application/n-triples', 'application/rdf+xml', 'text/n3', 'text/plain', 'text/turtle' ] for content_type in content_types: logger.debug("testing parsing of Content-Type: %s" % content_type) foo = repo.get_resource('%s/foo' % testing_container_uri, response_format=content_type) # test that graph was parsed correctly assert type(foo.rdf.graph) == rdflib.graph.Graph # create child container foo/bar (basic container)
def __readGraphIriFile(self, graphfile): """Search for a graph uri in graph file and return it. Args: graphfile: String containing the path of a graph file Returns: graphuri: String with the graph URI """ try: with open(graphfile, 'r') as f: graphuri = f.readline().strip() except FileNotFoundError: logger.debug("File not found {}".format(graphfile)) return try: urlparse(graphuri) logger.debug("Graph URI {} found in {}".format(graphuri, graphfile)) except Exception: graphuri = None logger.debug("No graph URI found in {}".format(graphfile)) return graphuri
def getgraphfromfile(self): """Return a Conjunctive Graph generated from the referenced file. Returns: A ConjunctiveGraph """ graph = ConjunctiveGraph() try: graph.parse(self.path, format='nquads', publicID='http://localhost:5000/') logger.debug('Success: File', self.path, 'parsed') except KeyError as e: # Given file contains non valid rdf data # logger.debug('Error: File', self.path, 'not parsed') # self.__setcontent([[None][None][None][None]]) pass return graph
def __init__(self, namespace={}, prefixes='', newqueries={}): self.graph = rdflib.Graph() self.namespace.update(namespace) self.prefixes += prefixes # run all given "SELECT" queries through prepareQuery function. for (id, query) in newqueries.iteritems(): leader = query.strip()[0:6] if leader == 'SELECT': # prepareQuery only works on SELECT ... self.queries[id] = rdflib.plugins.sparql.prepareQuery(query, initNs = self.namespace) print "Adding SELECT query" elif leader == 'DELETE' or leader == 'INSERT': self.queries[id] = query print "Adding DEL/INS query" #self.queries.update(queries) #print "Done query prep."
def notification(self, iri, **kwargs): """ Retrieve a single LDN notification and decode into a Python object. """ headers = kwargs.pop("headers", dict()) if 'accept' not in headers: headers['accept'] = kwargs.pop("accept", self.accept_headers) r = requests.get(iri, headers=headers, **kwargs) r.raise_for_status() mime_type = self.content_type_to_mime_type(r.headers['content-type']) if mime_type == self.JSON_LD: return r.json() else: g = Graph().parse(data=r.text, format=mime_type) return json.loads(str(g.serialize(format="json-ld"), 'utf-8'))
def __init__(self, filename='sc.config'): """Initialize Parameters ---------- :param filename: string For renaming the configuration file name. Default value. Returns ------- :returns: none """ self.graph = rdflib.Graph() self.filename = filename if os.environ.get('SC_HOME'): self.config_path = os.getenv('SC_HOME') else: os.environ['SC_HOME'] = os.environ['HOME'] + '/.sc/' self.config_path = os.getenv('SC_HOME')
def __init__(self, graph, compatibility_mode=False): '''Class constructor Graph is an rdflib.Graph instance. In compatibility mode, some fields are modified to maintain compatibility with previous versions of the ckanext-dcat parsers (eg adding the `dcat_` prefix or storing comma separated lists instead of JSON dumps). ''' self.g = graph self.compatibility_mode = compatibility_mode # Cache for mappings of licenses URL/title to ID built when needed in # _license(). self._licenceregister_cache = None
def test_publisher_ref(self): data = '''<?xml version="1.0" encoding="utf-8" ?> <rdf:RDF xmlns:dct="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"> <rdfs:SomeClass rdf:about="http://example.org"> <dct:publisher rdf:resource="http://orgs.vocab.org/some-org" /> </rdfs:SomeClass> </rdf:RDF> ''' g = Graph() g.parse(data=data) p = RDFProfile(g) publisher = p._publisher(URIRef('http://example.org'), DCT.publisher) eq_(publisher['uri'], 'http://orgs.vocab.org/some-org')
def test_dataset_license_from_distribution_by_uri(self): # license_id retrieved from the URI of dcat:license object g = Graph() dataset = URIRef("http://example.org/datasets/1") g.add((dataset, RDF.type, DCAT.Dataset)) distribution = URIRef("http://example.org/datasets/1/ds/1") g.add((dataset, DCAT.distribution, distribution)) g.add((distribution, RDF.type, DCAT.Distribution)) g.add((distribution, DCT.license, URIRef("http://www.opendefinition.org/licenses/cc-by"))) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g dataset = [d for d in p.datasets()][0] eq_(dataset['license_id'], 'cc-by')
def test_dataset_license_from_distribution_by_title(self): # license_id retrieved from dct:title of dcat:license object g = Graph() dataset = URIRef("http://example.org/datasets/1") g.add((dataset, RDF.type, DCAT.Dataset)) distribution = URIRef("http://example.org/datasets/1/ds/1") g.add((distribution, RDF.type, DCAT.Distribution)) g.add((dataset, DCAT.distribution, distribution)) license = BNode() g.add((distribution, DCT.license, license)) g.add((license, DCT.title, Literal("Creative Commons Attribution"))) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g dataset = [d for d in p.datasets()][0] eq_(dataset['license_id'], 'cc-by')
def test_distribution_access_url(self): g = Graph() dataset1 = URIRef("http://example.org/datasets/1") g.add((dataset1, RDF.type, DCAT.Dataset)) distribution1_1 = URIRef("http://example.org/datasets/1/ds/1") g.add((distribution1_1, RDF.type, DCAT.Distribution)) g.add((distribution1_1, DCAT.accessURL, Literal('http://access.url.org'))) g.add((dataset1, DCAT.distribution, distribution1_1)) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] resource = datasets[0]['resources'][0] eq_(resource['url'], u'http://access.url.org') assert 'download_url' not in resource
def test_distribution_download_url(self): g = Graph() dataset1 = URIRef("http://example.org/datasets/1") g.add((dataset1, RDF.type, DCAT.Dataset)) distribution1_1 = URIRef("http://example.org/datasets/1/ds/1") g.add((distribution1_1, RDF.type, DCAT.Distribution)) g.add((distribution1_1, DCAT.downloadURL, Literal('http://download.url.org'))) g.add((dataset1, DCAT.distribution, distribution1_1)) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] resource = datasets[0]['resources'][0] eq_(resource['url'], u'http://download.url.org') eq_(resource['download_url'], u'http://download.url.org')
def test_distribution_format_imt_and_format(self): g = Graph() dataset1 = URIRef("http://example.org/datasets/1") g.add((dataset1, RDF.type, DCAT.Dataset)) distribution1_1 = URIRef("http://example.org/datasets/1/ds/1") g.add((distribution1_1, RDF.type, DCAT.Distribution)) g.add((distribution1_1, DCAT.mediaType, Literal('text/csv'))) g.add((distribution1_1, DCT['format'], Literal('CSV'))) g.add((dataset1, DCAT.distribution, distribution1_1)) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] resource = datasets[0]['resources'][0] eq_(resource['format'], u'CSV') eq_(resource['mimetype'], u'text/csv')
def test_distribution_format_format_only(self): g = Graph() dataset1 = URIRef("http://example.org/datasets/1") g.add((dataset1, RDF.type, DCAT.Dataset)) distribution1_1 = URIRef("http://example.org/datasets/1/ds/1") g.add((distribution1_1, RDF.type, DCAT.Distribution)) g.add((distribution1_1, DCT['format'], Literal('CSV'))) g.add((dataset1, DCAT.distribution, distribution1_1)) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] resource = datasets[0]['resources'][0] eq_(resource['format'], u'CSV')
def test_distribution_format_imt_only(self): g = Graph() dataset1 = URIRef("http://example.org/datasets/1") g.add((dataset1, RDF.type, DCAT.Dataset)) distribution1_1 = URIRef("http://example.org/datasets/1/ds/1") g.add((distribution1_1, RDF.type, DCAT.Distribution)) g.add((distribution1_1, DCAT.mediaType, Literal('text/csv'))) g.add((dataset1, DCAT.distribution, distribution1_1)) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] resource = datasets[0]['resources'][0] if toolkit.check_ckan_version(min_version='2.3'): eq_(resource['format'], u'CSV') eq_(resource['mimetype'], u'text/csv') else: eq_(resource['format'], u'text/csv')
def test_distribution_format_imt_only_normalize_false(self): g = Graph() dataset1 = URIRef("http://example.org/datasets/1") g.add((dataset1, RDF.type, DCAT.Dataset)) distribution1_1 = URIRef("http://example.org/datasets/1/ds/1") g.add((distribution1_1, RDF.type, DCAT.Distribution)) g.add((distribution1_1, DCAT.mediaType, Literal('text/csv'))) g.add((dataset1, DCAT.distribution, distribution1_1)) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] resource = datasets[0]['resources'][0] eq_(resource['format'], u'text/csv') eq_(resource['mimetype'], u'text/csv')
def test_distribution_format_format_only_normalize_false(self): g = Graph() dataset1 = URIRef("http://example.org/datasets/1") g.add((dataset1, RDF.type, DCAT.Dataset)) distribution1_1 = URIRef("http://example.org/datasets/1/ds/1") g.add((distribution1_1, RDF.type, DCAT.Distribution)) g.add((distribution1_1, DCT['format'], Literal('CSV'))) g.add((dataset1, DCAT.distribution, distribution1_1)) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] resource = datasets[0]['resources'][0] eq_(resource['format'], u'CSV') assert 'mimetype' not in resource
def test_distribution_format_imt_normalized(self): g = Graph() dataset1 = URIRef("http://example.org/datasets/1") g.add((dataset1, RDF.type, DCAT.Dataset)) distribution1_1 = URIRef("http://example.org/datasets/1/ds/1") g.add((distribution1_1, RDF.type, DCAT.Distribution)) g.add((distribution1_1, DCAT.mediaType, Literal('text/unknown-imt'))) g.add((dataset1, DCAT.distribution, distribution1_1)) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] resource = datasets[0]['resources'][0] eq_(resource['format'], u'text/unknown-imt') eq_(resource['mimetype'], u'text/unknown-imt')
def test_distribution_format_format_normalized(self): g = Graph() dataset1 = URIRef("http://example.org/datasets/1") g.add((dataset1, RDF.type, DCAT.Dataset)) distribution1_1 = URIRef("http://example.org/datasets/1/ds/1") g.add((distribution1_1, RDF.type, DCAT.Distribution)) g.add((distribution1_1, DCAT.mediaType, Literal('text/csv'))) g.add((distribution1_1, DCT['format'], Literal('Comma Separated Values'))) g.add((dataset1, DCAT.distribution, distribution1_1)) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] resource = datasets[0]['resources'][0] if toolkit.check_ckan_version(min_version='2.3'): eq_(resource['format'], u'CSV') eq_(resource['mimetype'], u'text/csv') else: eq_(resource['format'], u'Comma Separated Values')
def test_spatial_rdfs_label(self): g = Graph() dataset = URIRef('http://example.org/datasets/1') g.add((dataset, RDF.type, DCAT.Dataset)) spatial_uri = URIRef('http://geonames/Newark') g.add((dataset, DCT.spatial, spatial_uri)) g.add((spatial_uri, RDF.type, DCT.Location)) g.add((spatial_uri, RDFS.label, Literal('Newark'))) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] extras = self._extras(datasets[0]) eq_(extras['spatial_text'], 'Newark')
def test_spatial_wkt_only(self): g = Graph() dataset = URIRef('http://example.org/datasets/1') g.add((dataset, RDF.type, DCAT.Dataset)) spatial_uri = URIRef('http://geonames/Newark') g.add((dataset, DCT.spatial, spatial_uri)) g.add((spatial_uri, RDF.type, DCT.Location)) g.add((spatial_uri, LOCN.geometry, Literal('POINT (67 89)', datatype=GSP.wktLiteral))) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] extras = self._extras(datasets[0]) # NOTE: geomet returns floats for coordinates on WKT -> GeoJSON eq_(extras['spatial'], '{"type": "Point", "coordinates": [67.0, 89.0]}')
def test_spatial_literal_only(self): g = Graph() dataset = URIRef('http://example.org/datasets/1') g.add((dataset, RDF.type, DCAT.Dataset)) g.add((dataset, DCT.spatial, Literal('Newark'))) p = RDFParser(profiles=['euro_dcat_ap']) p.g = g datasets = [d for d in p.datasets()] extras = self._extras(datasets[0]) eq_(extras['spatial_text'], 'Newark') assert_true('spatial_uri' not in extras) assert_true('spatial' not in extras)
def sentics_api(self, concept, parsed_graph=None): """ Return sentics of a concept. If you pass a parsed graph, the method do not load the rdf again. """ concept_sentics_uri = self.concept_base_uri + concept + "/sentics" sentics = { "pleasantness": 0, "attention": 0, "sensitivity": 0, "aptitude": 0 } if parsed_graph is None: graph = rdflib.Graph() parsed_graph = graph.parse(concept_sentics_uri, format="xml") result, _ = self._output(concept_sentics_uri) sentics["pleasantness"] = result[3] sentics["attention"] = result[0] sentics["sensitivity"] = result[1] sentics["aptitude"] = result[2] return sentics
def polarity(self, concept, parsed_graph=None): """ Return the polarity of a concept. If you pass a parsed graph, the method do not load the rdf again. """ concept_polarity_uri = self.concept_base_uri+concept+"/polarity" if parsed_graph is None: try: graph = rdflib.Graph() parsed_graph = graph.parse(concept_polarity_uri, format="xml") result, _ = self._output(concept_polarity_uri) return result[0] except Exception: return 0
def _output(self, url): """ Downloads and returns the output avoiding w3.org error """ response = requests.get(url) html = response.text html = html.replace('w3.org', 'www.w3.org') graph = rdflib.Graph() parsed_graph = graph.parse(data=html, format="xml") result = [] stresult = [] for s, p, o in parsed_graph: if type(o) == rdflib.term.Literal: result.append(o.toPython()) else: stresult.append(o.toPython()) return result, stresult
def setUp(self): self.graph = rdflib.Graph() self.entity = rdflib.URIRef("https://bibcat.org/test-entity") self.simple_title_bnode = rdflib.BNode() self.graph.add((self.entity, rdflib.RDF.type, BF.Title)) self.graph.add((self.entity, BF.title, self.simple_title_bnode)) self.graph.add((self.simple_title_bnode, BF.mainTitle, rdflib.Literal("This is a test"))) self.top_title_bnode = rdflib.BNode() self.graph.add((self.entity, BF.title, self.top_title_bnode)) secondary_title_bnode = rdflib.BNode() self.graph.add((self.top_title_bnode, rdflib.RDF.type, BF.Topic)) self.graph.add((self.top_title_bnode, rdflib.RDFS.label, rdflib.Literal("This is a title and a name"))) self.graph.add((self.top_title_bnode, SCHEMA.name, secondary_title_bnode)) self.graph.add((secondary_title_bnode, rdflib.RDF.value, rdflib.Literal("This is a name")))
def setUp(self): self.graph = rdflib.Graph() self.entity_one = rdflib.URIRef("https://bibcat.org/test-entity") self.graph.add((self.entity_one, rdflib.RDF.type, rdflib.RDFS.Resource)) self.graph.add((self.entity_one, rdflib.RDFS.label, rdflib.Literal("Test Entity One", lang="en"))) self.entity_two = rdflib.URIRef("https://bibcat.org/test-entity-two") self.graph.add((self.entity_two, rdflib.RDF.type, rdflib.RDFS.Resource)) self.graph.add((self.entity_two, rdflib.RDFS.label, rdflib.Literal("Test Entity Two", lang="en"))) title_bnode = rdflib.BNode() self.graph.add((self.entity_two, BF.title, title_bnode)) self.graph.add((title_bnode, rdflib.RDF.type, BF.Title)) self.graph.add((title_bnode, BF.subTitle, rdflib.Literal("Subtitle ")))
def create_rdf_list(graph, nodes): """Creates a RDF List with the ordering based on the nodes. Returns a blank node that functions in the object role for adding a triple. Args: graph(rdflib.Graph|rdflib.ConjuctiveGraph): Source graph nodes(list): Python list of nodes """ if len(nodes) < 1: return rdflib.RDF.nil ordered_bnode = rdflib.BNode() graph.add((ordered_bnode, rdflib.RDF.first, nodes[0])) graph.add((ordered_bnode, rdflib.RDF.rest, create_rdf_list(graph, nodes[1:]))) return ordered_bnode
def replace_iri(graph, old_iri, new_iri): """Replaces old IRI with a new IRI in the graph Args: ---- graph: rdflib.Graph old_iri: rdflib.URIRef, Old IRI new_iri: rdflib.URIRef, New IRI """ if old_iri == new_iri: # Otherwise deletes all occurrences of the iri in the # graph return for pred, obj in graph.predicate_objects(subject=old_iri): graph.add((new_iri, pred, obj)) graph.remove((old_iri, pred, obj)) for subj, pred in graph.subject_predicates(object=old_iri): graph.add((subj, pred, new_iri)) graph.remove((subj, pred, old_iri))
def __add_creators__(self, work_graph, work_uri, instance_uri): """Method takes a new work graph and instance uri, queries for relators:creators of instance uri and adds values to work graph Args: work_graph(rdflib.Graph): RDF Graph of new BF Work instance_uri(rdflib.URIRef): URI of BF Instance """ instance_key = str(instance_uri) if instance_key in self.processed: for code in self.creator_codes: if not code in self.processed[instance_key]: continue relator = getattr(NS_MGR.relators, code) for agent_uri in self.processed[instance_key][code]: work_graph.add((work_uri, relator, agent_uri))
def __add_work_title__(self, work_graph, work_uri, instance_uri): """Method takes a new work graph and instance uri, queries for bf:InstanceTitle of instance uri and adds values to work graph Args: work_graph(rdflib.Graph): RDF Graph of new BF Work instance_uri(rdflib.URIRef): URI of BF Instance """ instance_key = str(instance_uri) if instance_key in self.processed and\ "title" in self.processed[instance_key]: work_title_bnode = rdflib.BNode() work_graph.add((work_uri, NS_MGR.bf.title, work_title_bnode)) work_graph.add((work_title_bnode, NS_MGR.rdf.type, NS_MGR.bf.WorkTitle)) for row in self.processed[instance_key]["title"]: main_title, subtitle = row["mainTitle"], row["subtitle"] work_graph.add((work_title_bnode, NS_MGR.bf.mainTitle, rdflib.Literal(main_title))) if subtitle: work_graph.add((work_title_bnode, NS_MGR.bf.subtitle, rdflib.Literal(subtitle)))
def graph_member(self, ldp_root, c_id, obj=None): if not obj: obj = self.member() node = URIRef(ldp_root+encoder.encode(c_id)+"/member/"+encoder.encode(obj.id)) mappings = URIRef(node+"#mappings") g = Graph(identifier=node) g.add((node, RDF.type, RDA.Member)) g.add((node, DCTERMS.identifier, Literal(obj.id))) g.add((node, RDA.location, Literal(obj.location))) if hasattr(obj, 'datatype'): g.add((node, RDA.datatype, Literal(obj.datatype))) if hasattr(obj, 'ontology'): g.add((node, RDA.ontology, Literal(obj.ontology))) if hasattr(obj, 'mappings'): g.add((node, RDA.mappings, mappings)) mp = obj.mappings if hasattr(mp, 'role'): g.add((mappings, RDA.role, URIRef(obj.mappings.role))) if hasattr(mp, 'index'): g.add((mappings, RDA.itemIndex, Literal(obj.mappings.index))) if hasattr(mp, 'dateAdded'): g.add((mappings, RDA.dateAdded, Literal(obj.mappings.dateAdded))) return g
def html_table_parser(self, res_name): """ Method to instantiate HtmlTableParser, analyze tables and then give in output a list of tables. :param res_name: resource that has to be analyzed :return: list of tables found """ html_doc_tree = self.html_object_getter(res_name) # if html doc is defined if html_doc_tree: graph = rdflib.Graph() # instantiate html table parser html_table_parser = HtmlTableParser.HtmlTableParser(html_doc_tree, self.chapter, graph, self.topic, res_name, self.utils, False) # if there are tables to analyze if html_table_parser: # analyze and parse tables html_table_parser.analyze_tables() return html_table_parser.all_tables # if there aren't tables to analyze result will be empty else: return "" # if html doc is not defined result will be empty else: return ""
def __init__(self, edam_url): """ :param edam_url: path to EDAM.owl file :type edam_url: STRING All the EDAM ontology will be contained in a dictionnary (self.edam_ontology). """ if edam_url is None: LOGGER.info("Loading EDAM info from http://edamontology.org/EDAM.owl") self.edam_ontology = rdflib.Graph() self.edam_ontology.parse("http://edamontology.org/EDAM.owl") # Get version of EDAM ontology version_query = """SELECT ?version WHERE { <http://edamontology.org> doap:Version ?version}""" for row in self.edam_ontology.query(version_query): self.version = row[0] break else: pass
def rdf(request): uri = request.GET['uri'] g = Graph() annotations = Annotation.objects.filter(uri=uri) for annotation in annotations: if annotation.title: g.add( ( URIRef(annotation.uri), URIRef("http://localhost/metawiki/index.php/Special:URIResolver/Property-3ATitle"), Literal(annotation.title) ) ) if annotation.notes: g.add( ( URIRef(annotation.uri), URIRef("http://localhost/metawiki/index.php/Special:URIResolver/Property-3ANotes"), Literal(annotation.notes) ) ) for tag in annotation.tags.all(): g.add( ( URIRef(annotation.uri), URIRef("http://localhost/metawiki/index.php/Special:URIResolver/Property-3ATag"), Literal(tag.prefLabel) ) ) status = HttpResponse(g.serialize( format='xml' ) ) status["Content-Type"] = "application/rdf+xml" return status
def get_triples(org, out_file, weeks=1, span=None, format="turtle"): g = Graph() if span is not None: records = get_publications_for_org(org, span=span) else: records = get_publications_for_org(org, weeks=int(weeks)) num = 0 for num, rec in enumerate(records): g += rec.to_rdf() trips = len(g) console("{} records found. {} triples created.".format(num or 0, trips)) if trips > 0: if out_file is not None: output_graph(g, destination=out_file, format=format) else: print output_graph(g, format=format)
def add_vcard(self, position, name): """ :param position: number in author order :param name: name as string - last, first, middle :return: rdflib.Graph """ g = Graph() # vcard individual vci_uri = D['vcard-individual-' + position + '-' + self.localid] g.add((vci_uri, RDF.type, VCARD.Individual)) # vcard name vcn_uri = D['vcard-name-' + position + '-' + self.localid] g.add((vcn_uri, RDF.type, VCARD.Name)) g.add((vcn_uri, RDFS.label, Literal(name))) # Parse name into first, last, middle name = HumanName(name) g.add((vcn_uri, VCARD.givenName, Literal(name.first))) g.add((vcn_uri, VCARD.familyName, Literal(name.last))) if name.middle != "": g.add((vcn_uri, VIVO.middleName, Literal(name.middle))) # Relate vcard individual to vcard name g.add((vci_uri, VCARD.hasName, vcn_uri)) return vci_uri, g
def authorship(self): """ Add authorship statements and vcards for authors. :return: rdflib.Graph """ g = Graph() for num, au in enumerate(self.authors()): position = str(num + 1) vcard_individual_uri, vcard_stmts = self.add_vcard(position, au) g += vcard_stmts # Authorship aship_uri = D['authorship-' + position + '-' + self.localid] g.add((aship_uri, RDF.type, VIVO.Authorship)) g.add((aship_uri, VIVO.rank, Literal(int(position)))) # Relate pub and authorship g.add((aship_uri, VIVO.relates, self.pub_uri)) # Relate vcard and authorship g.add((aship_uri, VIVO.relates, vcard_individual_uri)) return g
def _vcard_email(self): g = Graph() try: emails = [e for e in self.profile["emails"].split("|")] except KeyError: try: emails = [self.profile['email']] except KeyError: emails = [] for email in emails: vt = Resource(g, self.vcard_email_uri) vt.set(RDF.type, VCARD.Work) # Label probably not necessary vt.set(RDFS.label, Literal(email)) vt.set(VCARD.email, Literal(email)) return g
def org_total_counts(orgs): #pcounts = incites_api.get_total_pubs(name) g = Graph() for org_name in orgs: org_uri = waan_uri(org_name) ln = local_name(org_uri) pcounts = load_incites_json_file(org_name, 'total') for item in pcounts: curi = D['pubcount-' + ln + '-' + str(item['year'])] g.add((curi, RDF.type, WOS.InCitesPubPerYear)) g.add((curi, RDFS.label, Literal("{} - {}".format(item['year'], item['count'])))) g.add((curi, WOS.number, Literal(item['count']))) g.add((curi, WOS.year, Literal(item['year']))) g.add((org_uri, VIVO.relates, curi)) ng = "http://localhost/data/incites-pub-year-counts" backend.sync_updates(ng, g) return True
def org_total_cites(orgs): g = Graph() for org_name in orgs: org_uri = waan_uri(org_name) #print>>sys.stderr, "Processing", org_name, "total cites" ln = local_name(org_uri) tc = load_incites_json_file(org_name, 'cites') for item in tc: curi = D['citecount-' + ln + '-' + str(item['year'])] g.add((curi, RDF.type, WOS.InCitesCitesPerYear)) g.add((curi, RDFS.label, Literal("{} - {}".format(item['year'], item['count'])))) g.add((curi, WOS.number, Literal(item['count']))) g.add((curi, WOS.year, Literal(item['year']))) g.add((org_uri, VIVO.relates, curi)) #print g.serialize(format="turtle") ng = "http://localhost/data/incites-total-cites-year-counts" backend.sync_updates(ng, g) return True
def org_top_categories(orgs): g = Graph() for org_name in orgs: #print>>sys.stderr, "Processing", org_name, "top categories" org_uri = waan_uri(org_name) ln = local_name(org_uri) top_cat = load_incites_json_file(org_name, 'categories') for item in top_cat: cat = item['category'] category_uri = get_category_uri(cat) curi = D['topcategory-'] + ln + slugify(cat) g.add((curi, RDF.type, WOS.InCitesTopCategory)) g.add((curi, RDFS.label, Literal("{} - {}".format(org_name, cat)))) g.add((curi, WOS.number, Literal(item['count']))) g.add((curi, VIVO.relates, category_uri)) g.add((curi, VIVO.relates, org_uri)) #print g.serialize(format="turtle") ng = "http://localhost/data/incites-top-categories" backend.sync_updates(ng, g) return True
def run(self): g = Graph() wos_top = D['wos-topics'] g.add((wos_top, RDF.type, WOS.TopTopic)) g.add((wos_top, RDFS.label, Literal("Web of Science Subject Schemas"))) with open(self.input_file) as inf: for row in csv.DictReader(inf): ra = row['Research Area (eASCA)'] category = row['WoS Category (tASCA)'] broad, ra1, ra2 = self.chunk_ras(ra) broad_uri, cg = self.do_term(broad, clz=WOS.BroadDiscipline) g.add((broad_uri, SKOS.broader, wos_top)) g += cg ra1_uri, cg = self.do_term(ra1, broader=broad_uri, clz=WOS.ResearchArea, uri_prefix="wosra") g += cg ra2_uri = None if ra2 is not None: ra2_uri, cg = self.do_term(ra2, broader=ra1_uri, clz=WOS.ResearchArea, uri_prefix="wosra") g += cg cat_uri, cg = self.do_term(category, broader=ra2_uri or ra1_uri, clz=WOS.Category) g += cg self.serialize(g)
def add_grant(grant, pub_uri): """ Create a funder and grant(s). """ g = Graph() if grant.get("agency") is None: logger.info("No agency found for {} with ids.".format(pub_uri, ";".join(grant.get("ids", [])))) return g slug = slugify(grant["agency"]) uri = D['funder-' + slug] g.add((uri, RDF.type, WOS.Funder)) g.add((uri, RDFS.label, Literal(grant["agency"]))) for gid in grant["ids"]: label = "{} - {}".format(grant["agency"], gid) guri = D['grant-'] + slugify(label) g.add((guri, RDF.type, WOS.Grant)) g.add((guri, RDFS.label, Literal(label))) g.add((guri, WOS.grantId, Literal(gid))) g.add((guri, VIVO.relates, uri)) g.add((guri, VIVO.relates, pub_uri)) return g
def addressships(self): g = Graph() addresses = self.addresses() for addr in addresses: addr_uri = self.addr_uri(addr["full_address"], addr["number"]) org = addr["organization"] r = Resource(g, addr_uri) r.set(RDF.type, WOS.Address) r.set(RDFS.label, Literal(addr['full_address'])) r.set(WOS.organizationName, Literal(org)) r.set(WOS.sequenceNumber, Literal(addr['number'])) # relation to author set by authorship # relate to pub r.set(VIVO.relates, self.uri) # sub orgs for idx, suborg in enumerate(addr["sub_organizations"]): label = "{}, {}".format(suborg, org) so_uri = self.sub_org_uri(label) r.add(VIVO.relates, so_uri) # relate unified orgs for uorg in addr["unified_orgs"]: uo_uri = waan_uri(uorg) r.add(VIVO.relates, uo_uri) return g
def get_existing_address(uri): vstore = backend.get_store() rq = rq_prefixes + """ SELECT ?address WHERE { ?uri vivo:relatedBy ?address. ?address a wos:Address . } """ rmg = Graph() addr_uris = [] for row in vstore.query(rq, initBindings={'uri': uri}): addr_uris.append(row.address) rmg.add((row.address, VIVO.relates, uri)) rmg.add((uri, VIVO.relatedBy, row.address)) return addr_uris, rmg
def ingest_graph(graph): sparql = """SELECT DISTINCT ?subject WHERE { ?subject ?pred ?obj . }""" for row in graph.query(sparql): subject = row[0] fedora_result = requests.post(FEDORA_URL) fedora_subject = rdflib.URIRef(fedora_result.text) subject_graph = rdflib.Graph() subject_graph.parse(str(fedora_subject)) subject_graph.namespace_manager.bind( 'schema', 'http://schema.org/') subject_graph.namespace_manager.bind( 'owl', str(rdflib.OWL)) subject_graph.add((fedora_subject, rdflib.OWL.sameAs, subject)) for pred, obj in graph.predicate_objects( subject=subject): subject_graph.add((fedora_subject, pred, obj)) print(subject_graph.serialize(format='turtle').decode()) update_result = requests.put(str(fedora_subject), data=subject_graph.serialize(format='turtle'), headers={"Content-Type": "text/turtle"})
def xquery_socket(raw_xml): """Function takes raw_xml and converts to BIBFRAME RDF Args: raw_xml -- Raw XML """ xquery_server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) xquery_server.connect(('localhost', 8089)) xquery_server.sendall(raw_xml + b'\n') rdf_xml = b'' while 1: data = xquery_server.recv(1024) if not data: break rdf_xml += data xquery_server.close() bf_graph = rdflib.Graph() for namespace in [("bf", "http://bibframe.org/vocab/"), ("schema", "http://schema.org/")]: bf_graph.namespace_manager.bind(namespace[0], namespace[1]) bf_graph.parse(data=rdf_xml.decode(), format='xml') return bf_graph
def extract_genome_acc(prot_rdf): """ Extracts and returns the assembly accession from the proteome rdf which provided as input. Returns -1 if not available prot_rdf: A Uniprot's proteome rdf url or file path """ g = Graph() response = requests.get(prot_rdf).status_code if response == httplib.OK: g.load(prot_rdf) for s, p, o in g: if string.find(o, "GCA") != -1: return os.path.split(o)[1] return -1 # -----------------------------------------------------------------------------