Project:SPARQL/examples: Difference between revisions

From Protist-Prokaryote Symbiosis Database
Jump to navigation Jump to search
Line 318: Line 318:
==Statistics==
==Statistics==


===Count interactions per host taxon, report in descending order===
===Summary: Counts of hosts, symbionts, interactions, references===
 
<sparql tryit="1">
PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX pps: <https://ppsdb.wikibase.cloud/prop/>
 
SELECT ?hostCount ?statementCount ?symbiontCount ?unknownCount ?refCount WHERE {
  # distinct host taxa and 'interacts with' statements, including to unknown symbionts
  { SELECT (COUNT (DISTINCT ?host) AS ?hostCount) (COUNT (DISTINCT ?s) AS ?statementCount) WHERE { ?host pps:P19 ?s. } }
  # distinct named symbiont items
  { SELECT (COUNT (DISTINCT ?symbiont) AS ?symbiontCount) WHERE { ?h ppt:P19 ?symbiont . FILTER (!ISBLANK (?symbiont)) } }
  # statements where symbiont is unknown
  { SELECT (COUNT (DISTINCT ?symbiont) AS ?unknownCount) WHERE { ?h ppt:P19 ?symbiont . FILTER (ISBLANK (?symbiont)) } }
  # reference items
  { SELECT (COUNT (DISTINCT ?ref) AS ?refCount) WHERE { ?ref ppt:P18 pp:Q3 } }
}
</sparql>
 
===Host taxa sorted by number of interactions===


Treat these counts with a pinch of salt, because species concepts differ.
Treat these counts with a pinch of salt, because species concepts differ.
Line 331: Line 350:
</sparql>
</sparql>


===Count interactions per symbiont taxon, report in descending order===
===Symbiont taxa sorted by number of interactions===


<sparql tryit="1">
<sparql tryit="1">

Revision as of 13:52, 20 March 2024

List all taxon items

PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>

SELECT DISTINCT ?item ?itemLabel WHERE {
  ?item ppt:P18 pp:Q2.
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}

Try it!


List all interaction types and RO mapping if available

PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>

SELECT DISTINCT ?item ?itemLabel ?roterm WHERE {
  ?item ppt:P18 pp:Q1.
  OPTIONAL {
    ?item ppt:P16 ?roterm
  }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}

Try it!


List all body parts and GO mapping if available

PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>

SELECT DISTINCT ?item ?itemLabel ?goterm WHERE {
  ?item ppt:P18 pp:Q5.
  OPTIONAL {
    ?item ppt:P17 ?goterm
  }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}

Try it!


List all analytical techniques and Wikidata mapping if available

PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>

SELECT DISTINCT ?item ?itemLabel ?wikidata WHERE {
  ?item ppt:P18 pp:Q6.
  OPTIONAL {
    ?item ppt:P2 ?wikidata
  }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}

Try it!


List all interactions, optionally the localization, interaction type, and references

PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX pps: <https://ppsdb.wikibase.cloud/prop/>
PREFIX ppss: <https://ppsdb.wikibase.cloud/prop/statement/>
PREFIX ppsq: <https://ppsdb.wikibase.cloud/prop/qualifier/>
PREFIX ppsr: <https://ppsdb.wikibase.cloud/prop/reference/>

SELECT DISTINCT ?host ?hostLabel ?typeLabel ?symbiontLabel ?localizationLabel ?doi ?statedin WHERE {
  ?host pps:P19 ?interaction.
  ?interaction ppss:P19 ?symbiont.
  OPTIONAL { ?interaction ppsq:P20 ?localization. }
  OPTIONAL { ?interaction ppsq:P26 ?type. }
  OPTIONAL {
    ?interaction prov:wasDerivedFrom ?refnode.
    OPTIONAL { ?refnode ppsr:P27 ?doi }
    OPTIONAL { ?refnode ppsr:P23 ?statedin }
  }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}

Try it!


Draw graph of all hosts connected to Megaira polyxenophila and their other interactions

Megaira polyxenophila Q141 is an intracellular bacterium in a wide diversity of host taxa. This query includes sub-species of Megaira polyxenophila.

#defaultView:Graph
PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX pps: <https://ppsdb.wikibase.cloud/prop/>
PREFIX ppss: <https://ppsdb.wikibase.cloud/prop/statement/>
PREFIX ppsq: <https://ppsdb.wikibase.cloud/prop/qualifier/>

SELECT DISTINCT ?host ?hostLabel ?pic ?linkto ?linktoLabel ?edgeLabel WHERE {
  ?host pps:P19 ?statement.
  ?statement ppss:P19/ppt:P29* pp:Q141 # include items with parent taxon Q141
  OPTIONAL { ?statement ppsq:P20 ?edge }
  OPTIONAL { ?host ppt:P19 ?linkto }
  OPTIONAL { ?host ppt:P33 ?pic }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}

Try it!


Draw taxonomy tree from Euglenozoa

#defaultView:Graph
PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX pps: <https://ppsdb.wikibase.cloud/prop/>
PREFIX ppss: <https://ppsdb.wikibase.cloud/prop/statement/>
PREFIX ppsq: <https://ppsdb.wikibase.cloud/prop/qualifier/>

SELECT DISTINCT ?host ?hostLabel ?parent ?parentLabel WHERE {
  ?host ppt:P29/ppt:P29* pp:Q775.
  ?host ppt:P29 ?parent.
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}

Try it!


Maintenance queries

Generate QuickStatements to add "instance of: taxon" to items with NCBI taxon IDs

PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX pps: <https://ppsdb.wikibase.cloud/prop/>
PREFIX ppss: <https://ppsdb.wikibase.cloud/prop/statement/>
PREFIX ppsq: <https://ppsdb.wikibase.cloud/prop/qualifier/>

SELECT DISTINCT ?qid ?P18 WHERE {
  ?host pps:P19 ?symbiont.
  FILTER NOT EXISTS {
    ?host ppt:P18 ?instanceof.
    ?instanceof ppt:P24* pp:Q2
  }
  ?host ppt:P11 ?ncbi
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
  BIND (ENCODE_FOR_URI(REPLACE(STR(?host), ".*Q", "Q")) AS ?qid) # article item
  BIND ("Q2" AS ?P18)
}

Try it!


Generate QuickStatements to add "instance of: placeholder taxon" to items with P28 statements

P28 "representative sequence for placeholder taxon" is intended for placeholder taxa, i.e. items that are described in the literature like taxa or provisional taxa and have a published phylogenetic marker sequence, but where a matching taxon ID in NCBI Taxonomy is not yet assigned.

PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX pps: <https://ppsdb.wikibase.cloud/prop/>
PREFIX ppss: <https://ppsdb.wikibase.cloud/prop/statement/>
PREFIX ppsq: <https://ppsdb.wikibase.cloud/prop/qualifier/>

SELECT DISTINCT ?qid ?P18 WHERE {
  ?item ppt:P28 ?repseq
  FILTER NOT EXISTS {
    ?item ppt:P18 pp:Q56
  }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
  BIND (ENCODE_FOR_URI(REPLACE(STR(?item), ".*Q", "Q")) AS ?qid) # article item
  BIND ("Q56" AS ?P18)
}

Try it!


Generate QuickStatements to link interaction hosts to their genera

Split labels, e.g. "Paramecium caudatum" on whitespace, take the first word as a putative genus name, and look for items with taxon rank "genus" that match the putative genus name. Have to avoid assigning labels like "Cryptotermes symbiont", instead use "symbiont of Cryptotermes" to avoid false matches.

PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX wikibase: <http://wikiba.se/ontology#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX bd: <http://www.bigdata.com/rdf#>
SELECT DISTINCT ?qid ?P29 WHERE {
  {
    SELECT DISTINCT ?item ?qid ?itemLabel ?maybeGenus WHERE {
      ?item ppt:P19 ?symbiont.
      FILTER(NOT EXISTS { ?item ppt:P29 ?parent. })
      BIND(ENCODE_FOR_URI(REPLACE(STR(?item), ".*Q", "Q")) AS ?qid)
      SERVICE wikibase:label {
        bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en".
        ?item rdfs:label ?itemLabel.
      }
      BIND(STRBEFORE(?itemLabel, " ") AS ?maybeGenus)
    }
  }
  {
    SELECT DISTINCT ?item2 ?item2Label WHERE {
      ?item2 ppt:P32 pp:Q601;
        rdfs:label ?item2Label.
      FILTER((LANG(?item2Label)) = "en")
    }
  }
  FILTER(?item2Label = ?maybeGenus)
  BIND(ENCODE_FOR_URI(REPLACE(STR(?item), ".*Q", "Q")) AS ?qid)
  BIND(ENCODE_FOR_URI(REPLACE(STR(?item2), ".*Q", "Q")) AS ?P29)
}

Try it!


Generate QuickStatements to create items for DOIs found in references without corresponding items

This checks for items in Wikidata with the same DOI. If references are not in Wikidata, consider adding them first with Scholia or Sourcemd.

PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX pps: <https://ppsdb.wikibase.cloud/prop/>
PREFIX ppss: <https://ppsdb.wikibase.cloud/prop/statement/>
PREFIX ppsq: <https://ppsdb.wikibase.cloud/prop/qualifier/>
PREFIX ppsr: <https://ppsdb.wikibase.cloud/prop/reference/>

SELECT DISTINCT ?qid ?Len ?Aen ?P18 ?P13 ?P2
WITH {
  SELECT DISTINCT ?host ?DOI 
  WHERE {
    ?host pps:P19 ?interaction.
    ?interaction prov:wasDerivedFrom ?refnode.
    ?refnode ppsr:P27 ?doi.
    # check that no reference item already linked in this statement
    FILTER NOT EXISTS { ?refnode ppsr:P23 ?statedin }
    # check that no reference with this DOI already exists
    ?ref ppt:P18 pp:Q3;
         ppt:P13 ?refdoi. 
    FILTER (?doi NOT IN (?refdoi))
    # wikidata standardizes DOI in uppercase
    BIND (UCASE(STR(?doi)) AS ?DOI)
  }
} AS %dois
WHERE {
  INCLUDE %dois
  SERVICE <https://query.wikidata.org/sparql> {
    # find wikidata items with this DOI
    ?wdref wdt:P356 ?DOI;
           rdfs:label ?wdlabel.
    FILTER ( LANG(?wdlabel) = "en" )
  }
  BIND ("" AS ?qid)
  # Labels and aliases do not need to be surrounded by quotes
  # unless they contain double quotes or commas
  # Replace double quotes and commas in label because lazy
  BIND (REPLACE(REPLACE(STR(?wdlabel), '\"', ""), ",", "") AS ?Len) 
  BIND (STR(?DOI) AS ?Aen)
  # Strings, external identifiers, and URLs must be surrounded by three double quotes
  # The Query Service CSV export automagically converts single quotes to triple
  BIND (CONCAT('\"', STR(?DOI), '\"') AS ?P13)
  BIND (CONCAT('\"', STR(?wdref), '\"') AS ?P2)
  BIND ("Q3" as ?P18) # instance of reference
}

Try it!


List all classes

PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>

SELECT DISTINCT ?class ?classLabel  WHERE {
  ?item ppt:P18 ?class
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}

Try it!


Taxon items without parent taxon

PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>

SELECT DISTINCT ?item ?qid ?itemLabel WHERE {
  ?item ppt:P18 pp:Q2.
  FILTER NOT EXISTS {
    ?item ppt:P29 ?parent.
  }
  BIND (ENCODE_FOR_URI(REPLACE(STR(?item), ".*Q", "Q")) AS ?qid)
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}

Try it!


Items that are in a maintenance class

PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>

SELECT DISTINCT ?item ?itemLabel ?classLabel  WHERE {
  ?item ppt:P18 ?class.
  ?class ppt:P24* pp:Q657
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}

Try it!


DOIs used in reference qualifiers but not in a reference item

PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX pps: <https://ppsdb.wikibase.cloud/prop/>
PREFIX ppss: <https://ppsdb.wikibase.cloud/prop/statement/>
PREFIX ppsq: <https://ppsdb.wikibase.cloud/prop/qualifier/>
PREFIX ppsr: <https://ppsdb.wikibase.cloud/prop/reference/>

SELECT DISTINCT ?doi 
WITH {
  SELECT DISTINCT ?ref ?refdoi WHERE {
    ?ref ppt:P18 pp:Q3;
         ppt:P13 ?refdoi. 
  } 
} AS %refs
WHERE {
  INCLUDE %refs
  ?host pps:P19 ?interaction.
  ?interaction prov:wasDerivedFrom ?refnode.
  ?refnode ppsr:P27 ?doi.
  FILTER NOT EXISTS { ?refnode ppsr:P23 ?statedin }
  FILTER (?doi NOT IN (?refdoi))
}

Try it!


Statistics

Summary: Counts of hosts, symbionts, interactions, references

PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX pps: <https://ppsdb.wikibase.cloud/prop/>

SELECT ?hostCount ?statementCount ?symbiontCount ?unknownCount ?refCount WHERE {
  # distinct host taxa and 'interacts with' statements, including to unknown symbionts
  { SELECT (COUNT (DISTINCT ?host) AS ?hostCount) (COUNT (DISTINCT ?s) AS ?statementCount) WHERE { ?host pps:P19 ?s. } }
  # distinct named symbiont items
  { SELECT (COUNT (DISTINCT ?symbiont) AS ?symbiontCount) WHERE { ?h ppt:P19 ?symbiont . FILTER (!ISBLANK (?symbiont)) } }
  # statements where symbiont is unknown
  { SELECT (COUNT (DISTINCT ?symbiont) AS ?unknownCount) WHERE { ?h ppt:P19 ?symbiont . FILTER (ISBLANK (?symbiont)) } }
  # reference items
  { SELECT (COUNT (DISTINCT ?ref) AS ?refCount) WHERE { ?ref ppt:P18 pp:Q3 } }
}

Try it!


Host taxa sorted by number of interactions

Treat these counts with a pinch of salt, because species concepts differ.

PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>

SELECT  ?host ?hostLabel (COUNT (DISTINCT ?symbiont) AS ?count) WHERE {
  ?host ppt:P19 ?symbiont.
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
} GROUP BY ?host ?hostLabel ORDER BY DESC(?count)

Try it!


Symbiont taxa sorted by number of interactions

PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>

SELECT  ?symbiont ?symbiontLabel (COUNT (DISTINCT ?host) AS ?count) WHERE {
  ?host ppt:P19 ?symbiont.
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
} GROUP BY ?symbiont ?symbiontLabel ORDER BY DESC(?count)

Try it!