Project:SPARQL/examples: Difference between revisions

From Protist-Prokaryote Symbiosis Database
Jump to navigation Jump to search
 
(44 intermediate revisions by the same user not shown)
Line 1: Line 1:
==Showcase visualizations==
===Graph (network) of all hosts connected to Megaira polyxenophila and their other interactions===
Megaira polyxenophila [[Item:Q141|Q141]] is an intracellular bacterium in a wide diversity of host taxa. This query includes sub-species of Megaira polyxenophila.
<sparql tryit="1">
#defaultView:Graph
PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX pps: <https://ppsdb.wikibase.cloud/prop/>
PREFIX ppss: <https://ppsdb.wikibase.cloud/prop/statement/>
PREFIX ppsq: <https://ppsdb.wikibase.cloud/prop/qualifier/>
SELECT DISTINCT ?host ?hostLabel ?pic ?linkto ?linktoLabel ?edgeLabel WHERE {
  ?host pps:P19 ?s.
  ?s ppss:P19/ppt:P29* pp:Q141 # include items with parent taxon Q141
  OPTIONAL {
    ?host pps:P19 ?statement.
    ?statement ppss:P19 ?linkto.
    OPTIONAL { ?statement ppsq:P20 ?edge }
  }
  OPTIONAL { ?host ppt:P33 ?pic }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
</sparql>
===Bubble chart of numbers of interactions by symbiont localization===
<sparql tryit="1">
#defaultView:BubbleChart
PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX pps: <https://ppsdb.wikibase.cloud/prop/>
PREFIX ppss: <https://ppsdb.wikibase.cloud/prop/statement/>
PREFIX ppsq: <https://ppsdb.wikibase.cloud/prop/qualifier/>
PREFIX ppsr: <https://ppsdb.wikibase.cloud/prop/reference/>
SELECT DISTINCT ?localization ?localizationLabel (COUNT (DISTINCT ?interaction) AS ?count)  WHERE {
  ?host pps:P19 ?interaction.
  ?interaction ppss:P19 ?symbiont.
  ?interaction ppsq:P20 ?localization.
  FILTER (!ISBLANK (?localization))
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
} GROUP BY ?localization ?localizationLabel
</sparql>
===List interactions where host is a member of Blattodea (cockroaches and termites), according to Wikidata===
<sparql tryit="1">
PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
SELECT DISTINCT ?host ?hostLabel ?symb ?symbLabel ?wditem
WITH {
  SELECT DISTINCT ?wditem WHERE {
    SERVICE <https://query.wikidata.org/sparql> {
      ?wditem wdt:P171* wd:Q25309.
    }
  }
} AS %wd
WHERE {
  INCLUDE %wd
  ?host ppt:P19 ?symb.
  ?host ppt:P29 ?parent.
  ?parent ppt:P2 ?wditem.
  ?host rdfs:label ?hostLabel.
  ?symb rdfs:label ?symbLabel.
} ORDER BY ?symbLabel ?hostLabel
</sparql>
===List interactions where symbiont is member of Alphaproteobacteria according to Wikidata===
Get items with a parent taxon that is mapped to a taxon in Wikidata, relying on the taxonomy that is given there.
We rely on parent taxon being mapped to Wikidata, rather than the taxon itself being mapped to Wikidata, because not all taxon items represented in this Wikibase are suitable for being added to Wikidata, e.g. informally named taxonomic units, even though there is some information on their taxonomic position.
Query works without timeout even with large taxa like Bacteria.
<sparql tryit="1">
PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
SELECT DISTINCT ?host ?hostLabel ?symb ?symbLabel ?wditem
WITH {
  SELECT DISTINCT ?wditem WHERE {
    SERVICE <https://query.wikidata.org/sparql> {
      ?wditem wdt:P171* wd:Q306579.
    }
  }
} AS %wd
WHERE {
  INCLUDE %wd
  ?host ppt:P19 ?symb.
  ?symb ppt:P29 ?parent.
  ?parent ppt:P2 ?wditem.
  ?host rdfs:label ?hostLabel.
  ?symb rdfs:label ?symbLabel.
} ORDER BY ?symbLabel ?hostLabel
</sparql>
===List symbionts localized in host nucleus and their taxonomic class===
Placing Wikidata search statement in the WITH or WHERE clause appears to depend on how large the expected results are. Want to avoid time-outs on the Wikidata query.
<sparql tryit="1">
PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX pps: <https://ppsdb.wikibase.cloud/prop/>
PREFIX ppss: <https://ppsdb.wikibase.cloud/prop/statement/>
PREFIX ppsq: <https://ppsdb.wikibase.cloud/prop/qualifier/>
PREFIX ppsr: <https://ppsdb.wikibase.cloud/prop/reference/>
SELECT DISTINCT ?host ?hostLabel ?symbiont ?symbiontLabel ?parentClass ?parentClassLabel ?localizationLabel
WITH {
  SELECT DISTINCT ?host ?hostLabel ?symbiont ?symbiontLabel ?wditem ?localizationLabel WHERE {
  ?host pps:P19 ?interaction.
  ?interaction ppss:P19 ?symbiont.
  ?symbiont ppt:P29 ?parent.
  ?parent ppt:P2 ?wditem.
  ?interaction ppsq:P20 ?localization.
  # nucleus or subclass of or part of nucleus
  ?localization ppt:P24*/ppt:P47* pp:Q78
  SERVICE wikibase:label {
    bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en".
    ?host rdfs:label ?hostLabel.
    ?symbiont rdfs:label ?symbiontLabel.
    ?localization rdfs:label ?localizationLabel.
  }
  }
} AS %wd
WHERE {
  INCLUDE %wd
  SERVICE <https://query.wikidata.org/sparql> {
    OPTIONAL {
      ?wditem wdt:P171* ?parentClass.
      ?parentClass wdt:P105 wd:Q37517.
      SERVICE wikibase:label {
        bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en".
        ?parentClass rdfs:label ?parentClassLabel.
      }
    }
  }
} ORDER BY ?parentClassLabel ?hostLabel
</sparql>
===List interactions where publication co-authored by Vittorio Boscaro is cited===
Example of using a federated query with Wikidata items.
<sparql tryit="1">
PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX pps: <https://ppsdb.wikibase.cloud/prop/>
PREFIX ppss: <https://ppsdb.wikibase.cloud/prop/statement/>
PREFIX ppsq: <https://ppsdb.wikibase.cloud/prop/qualifier/>
PREFIX ppsr: <https://ppsdb.wikibase.cloud/prop/reference/>
SELECT DISTINCT ?host ?hostLabel ?symb ?symbLabel ?statedin ?wdref
WITH {
  SELECT DISTINCT ?host ?hostLabel ?symb ?symbLabel ?statedin ?wdref
  WHERE {
    ?host pps:P19 ?interaction.
    ?host rdfs:label ?hostLabel.
    ?interaction ppss:P19 ?symb.
    ?symb rdfs:label ?symbLabel.
    ?interaction prov:wasDerivedFrom ?refnode.
    ?refnode ppsr:P23 ?statedin.
    # Wikidata URI
    ?statedin ppt:P2 ?wdref.
  }
} AS %refs
WHERE {
  INCLUDE %refs
  SERVICE <https://query.wikidata.org/sparql> {
    # Wikidata items with author Q30513710
    ?wdref wdt:P50 wd:Q30513710.
  }
}
</sparql>
===List interactions characterized by FISH but not sequencing===
<sparql tryit="1">
PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX pps: <https://ppsdb.wikibase.cloud/prop/>
PREFIX ppss: <https://ppsdb.wikibase.cloud/prop/statement/>
PREFIX ppsq: <https://ppsdb.wikibase.cloud/prop/qualifier/>
SELECT DISTINCT ?host ?hostLabel ?symb ?symbLabel WHERE {
  ?host pps:P19 ?s.
  ?s ppsq:P22 pp:Q10. # FISH
  ?s ppss:P19 ?symb.
  # Use this form because an interaction may be documented in multiple statements
  FILTER NOT EXISTS {
    ?host pps:P19 ?t.
    ?t ppss:P19 ?symb.
    ?t ppsq:P22 pp:Q13. # phylogenetic marker sequencing
  }
  FILTER NOT EXISTS {
    ?host pps:P19 ?u.
    ?u ppss:P19 ?symb.
    ?u ppsq:P22 pp:Q12. # metagenome sequencing
  }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
</sparql>
==Lists==
===List all taxon items===
===List all taxon items===
<sparql tryit="1">
<sparql tryit="1">
Line 5: Line 216:


SELECT DISTINCT ?item ?itemLabel WHERE {
SELECT DISTINCT ?item ?itemLabel WHERE {
   ?item ppt:P18 pp:Q2.
   ?item ppt:P18/ppt:P24* pp:Q2.
  # FILTER NOT EXISTS { ?item ppt:P18 pp:Q1488. } # optionally exclude higher taxa
   SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
   SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
}
</sparql>
===List all symbiotic hosts and their environmental terms===
Note that some entries have multiple claims for a single environment-related property, so the number of results will be inflated by the distinct combinations.
<sparql tryit="1">
PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
SELECT DISTINCT ?item ?itemLabel ?envmat ?envmatLabel ?envloc ?envlocLabel ?envsys ?envsysLabel WHERE {
  ?item ppt:P19 ?symbiont.
  OPTIONAL { ?item ppt:P36 ?envmat }
  OPTIONAL { ?item ppt:P40 ?envloc }
  OPTIONAL { ?item ppt:P38 ?envsys }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
} ORDER BY ?itemLabel
</sparql>
</sparql>


Line 77: Line 306:
</sparql>
</sparql>


===Draw graph of all hosts connected to Megaira polyxenophila and their other interactions===
===Count references and qualifier statements for each interaction===
 
Megaira polyxenophila [[Item:Q141|Q141]] is an intracellular bacterium in a wide diversity of host taxa. This query includes sub-species of Megaira polyxenophila.


<sparql tryit="1">
<sparql tryit="1">
#defaultView:Graph
PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX pps: <https://ppsdb.wikibase.cloud/prop/>
PREFIX pps: <https://ppsdb.wikibase.cloud/prop/>
PREFIX ppss: <https://ppsdb.wikibase.cloud/prop/statement/>
PREFIX ppss: <https://ppsdb.wikibase.cloud/prop/statement/>
PREFIX ppsq: <https://ppsdb.wikibase.cloud/prop/qualifier/>
PREFIX ppsq: <https://ppsdb.wikibase.cloud/prop/qualifier/>
 
PREFIX ppsr: <https://ppsdb.wikibase.cloud/prop/reference/>
SELECT DISTINCT ?host ?hostLabel ?pic ?linkto ?linktoLabel ?edgeLabel WHERE {
PREFIX wikibase: <http://wikiba.se/ontology#>
   ?host pps:P19 ?statement.
PREFIX prov: <http://www.w3.org/ns/prov#>
   ?statement ppss:P19/ppt:P29* pp:Q141 # include items with parent taxon Q141
PREFIX bd: <http://www.bigdata.com/rdf#>
   OPTIONAL { ?statement ppsq:P20 ?edge }
SELECT ?host ?hostLabel ?symbiont ?symbiontLabel (COUNT(DISTINCT ?refnode) AS ?refnodeCount) (COUNT(DISTINCT ?localization) AS ?localizationCount) (COUNT(DISTINCT ?type) AS ?typeCount) (COUNT(DISTINCT ?methobj) AS ?methobjCount) (COUNT(DISTINCT ?methsub) AS ?methsubCount) WHERE {
   OPTIONAL { ?host ppt:P19 ?linkto }
   ?host pps:P19 ?interaction.
   OPTIONAL { ?host ppt:P33 ?pic }
   ?interaction ppss:P19 ?symbiont.
  OPTIONAL { ?interaction ppsq:P22 ?methobj. }
  OPTIONAL { ?interaction ppsq:P42 ?methsub. }
   OPTIONAL { ?interaction ppsq:P20 ?localization. }
   OPTIONAL { ?interaction ppsq:P26 ?type. }
   OPTIONAL {
    ?interaction prov:wasDerivedFrom ?refnode.
    OPTIONAL { ?refnode ppsr:P27 ?doi. }
    OPTIONAL { ?refnode ppsr:P23 ?statedin. }
  }
   SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
   SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
}
GROUP BY ?host ?hostLabel ?symbiont ?symbiontLabel
ORDER BY ?hostLabel
</sparql>
</sparql>


===Draw taxonomy tree from Euglenozoa===
 
==Maintenance queries==
 
===Generate QuickStatements to create new parent taxon items for symbionts based on Wikidata mapping===


<sparql tryit="1">
<sparql tryit="1">
#defaultView:Graph
PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX pps: <https://ppsdb.wikibase.cloud/prop/>
PREFIX ppss: <https://ppsdb.wikibase.cloud/prop/statement/>
PREFIX ppsq: <https://ppsdb.wikibase.cloud/prop/qualifier/>


SELECT DISTINCT ?host ?hostLabel ?parent ?parentLabel WHERE {
SELECT DISTINCT ?qid ?Len ?P18 ?P2
  ?host ppt:P29/ppt:P29* pp:Q775.
WITH {
  ?host ppt:P29 ?parent.
  SELECT DISTINCT ?symb ?symbLabel ?wditem
   SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
  WHERE {
}
    ?host ppt:P19 ?symb.
    FILTER NOT EXISTS { ?symb ppt:P29 ?parent. }
    ?symb ppt:P2 ?wditem.
    ?symb rdfs:label ?symbLabel.
  }
} AS %in
WHERE {
  INCLUDE %in
   SERVICE <https://query.wikidata.org/sparql> {
    ?wditem wdt:P171 ?wdparent.
    ?wdparent rdfs:label ?wdparentlabel.
    FILTER ( LANG(?wdparentlabel) = "en" )
  }
  # Check that a parent taxon item with the same wikidata mapping
  # does not already exist
  FILTER NOT EXISTS { ?qid ppt:P2 ?wdparent }
  BIND (CONCAT('"', STR(?wdparent), '"') as ?P2)
  BIND (STR("Q1488") AS ?P18) # instance of higher taxon
  BIND (?wdparentlabel as ?Len)
} ORDER BY ?Len
</sparql>
</sparql>


==Maintenance queries==
===Generate QuickStatements to link symbiont items to their parent taxa based on Wikidata mappings===
 
===Generate QuickStatements to add "instance of: taxon" to items with NCBI taxon IDs===


<sparql tryit="1">
<sparql tryit="1">
PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX pps: <https://ppsdb.wikibase.cloud/prop/>
PREFIX ppss: <https://ppsdb.wikibase.cloud/prop/statement/>
PREFIX ppsq: <https://ppsdb.wikibase.cloud/prop/qualifier/>


SELECT DISTINCT ?qid ?P18 WHERE {
SELECT DISTINCT ?qid ?P29
  ?host pps:P19 ?symbiont.
WITH {
  FILTER NOT EXISTS {
  SELECT DISTINCT ?symb ?symbLabel ?wditem ?qid
     ?host ppt:P18 ?instanceof.
  WHERE {
     ?instanceof ppt:P24* pp:Q2
    ?host ppt:P19 ?symb.
    FILTER NOT EXISTS { ?symb ppt:P29 ?parent. }
     ?symb ppt:P2 ?wditem.
    ?symb rdfs:label ?symbLabel.
    BIND (ENCODE_FOR_URI(REPLACE(STR(?symb), ".*Q", "Q")) AS ?qid)
  }
} AS %in
WHERE {
  INCLUDE %in
  SERVICE <https://query.wikidata.org/sparql> {
    ?wditem wdt:P171 ?wdparent.
    ?wdparent rdfs:label ?wdparentlabel.
     FILTER ( LANG(?wdparentlabel) = "en" )
  }
  OPTIONAL {
    ?parent ppt:P2 ?wdparent.
    ?parent rdfs:label ?parentLabel.
    BIND (ENCODE_FOR_URI(REPLACE(STR(?parent), ".*Q", "Q")) AS ?P29)
   }
   }
  ?host ppt:P11 ?ncbi
} ORDER BY ?Len
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
  BIND (ENCODE_FOR_URI(REPLACE(STR(?host), ".*Q", "Q")) AS ?qid) # article item
  BIND ("Q2" AS ?P18)
}
</sparql>
</sparql>


===Generate QuickStatements to add "instance of: placeholder taxon" to items with P28 statements===
===Graph of entity subclasses up to 2nd order===
 
P28 "representative sequence for placeholder taxon" is intended for placeholder taxa, i.e. items that are described in the literature like taxa or provisional taxa and have a published phylogenetic marker sequence, but where a matching taxon ID in NCBI Taxonomy is not yet assigned.


<sparql tryit="1">
<sparql tryit="1">
#defaultView:Graph
PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
Line 150: Line 410:
PREFIX ppss: <https://ppsdb.wikibase.cloud/prop/statement/>
PREFIX ppss: <https://ppsdb.wikibase.cloud/prop/statement/>
PREFIX ppsq: <https://ppsdb.wikibase.cloud/prop/qualifier/>
PREFIX ppsq: <https://ppsdb.wikibase.cloud/prop/qualifier/>
PREFIX gas: <http://www.bigdata.com/rdf/gas#>


SELECT DISTINCT ?qid ?P18 WHERE {
SELECT DISTINCT ?item ?itemLabel ?linkTo ?linkToLabel WHERE {
   ?item ppt:P28 ?repseq
   SERVICE gas:service {
  FILTER NOT EXISTS {
    gas:program gas:gasClass "com.bigdata.rdf.graph.analytics.SSSP" ;
    ?item ppt:P18 pp:Q56
                gas:in pp:Q610;
                gas:traversalDirection "Reverse" ;
                gas:out ?item ;
                gas:out1 ?depth ;
                gas:maxIterations 2 ;
                gas:linkType ppt:P24 .
   }
   }
  OPTIONAL { ?item ppt:P24 ?linkTo }
   SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
   SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
  BIND (ENCODE_FOR_URI(REPLACE(STR(?item), ".*Q", "Q")) AS ?qid) # article item
  BIND ("Q56" AS ?P18)
}
}
</sparql>
</sparql>


===Generate QuickStatements to link interaction hosts to their genera===
===Taxon items without parent taxon===


Split labels, e.g. "Paramecium caudatum" on whitespace, take the first word as a putative genus name, and look for items with taxon rank "genus" that match the putative genus name. Have to avoid assigning labels like "Cryptotermes symbiont", instead use "symbiont of Cryptotermes" to avoid false matches.
Results should be empty.


<sparql tryit="1">
<sparql tryit="1">
PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX wikibase: <http://wikiba.se/ontology#>
 
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT DISTINCT ?item ?qid ?itemLabel WHERE {
PREFIX bd: <http://www.bigdata.com/rdf#>
  ?item ppt:P18/ppt:P24* pp:Q2.
SELECT DISTINCT ?qid ?P29 WHERE {
  FILTER NOT EXISTS { ?item ppt:P18 pp:Q1488. } # not a higher taxon
  {
  FILTER NOT EXISTS {
    SELECT DISTINCT ?item ?qid ?itemLabel ?maybeGenus WHERE {
    ?item ppt:P29 ?parent.
      ?item ppt:P19 ?symbiont.
      FILTER(NOT EXISTS { ?item ppt:P29 ?parent. })
      BIND(ENCODE_FOR_URI(REPLACE(STR(?item), ".*Q", "Q")) AS ?qid)
      SERVICE wikibase:label {
        bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en".
        ?item rdfs:label ?itemLabel.
      }
      BIND(STRBEFORE(?itemLabel, " ") AS ?maybeGenus)
    }
   }
   }
  {
   BIND (ENCODE_FOR_URI(REPLACE(STR(?item), ".*Q", "Q")) AS ?qid)
    SELECT DISTINCT ?item2 ?item2Label WHERE {
   SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
      ?item2 ppt:P32 pp:Q601;
        rdfs:label ?item2Label.
      FILTER((LANG(?item2Label)) = "en")
    }
  }
  FILTER(?item2Label = ?maybeGenus)
   BIND(ENCODE_FOR_URI(REPLACE(STR(?item), ".*Q", "Q")) AS ?qid)
   BIND(ENCODE_FOR_URI(REPLACE(STR(?item2), ".*Q", "Q")) AS ?P29)
}
}
</sparql>
</sparql>


===Generate QuickStatements to create items for DOIs found in references without corresponding items===
===Formally named taxon items not mapped to Wikidata===


This checks for items in Wikidata with the same DOI. If references are not in Wikidata, consider adding them first with Scholia or Sourcemd.
Should map to Wikidata URI with [[Property:P2]]; create taxon item on Wikidata if necessary.
 
Need to fix this, the lookup of DOIs is not working properly.


<sparql tryit="1">
<sparql tryit="1">
PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX pps: <https://ppsdb.wikibase.cloud/prop/>
PREFIX ppss: <https://ppsdb.wikibase.cloud/prop/statement/>
PREFIX ppsq: <https://ppsdb.wikibase.cloud/prop/qualifier/>
PREFIX ppsr: <https://ppsdb.wikibase.cloud/prop/reference/>


SELECT DISTINCT ?qid ?Len ?Aen ?P18 ?P13 ?P2
SELECT DISTINCT ?item ?qid ?itemLabel WHERE {
WITH {
   ?item ppt:P18 pp:Q2206.
   SELECT DISTINCT ?host ?DOI
  #FILTER NOT EXISTS { ?item ppt:P18 pp:Q1488. } # not a higher taxon
  WHERE {
  FILTER NOT EXISTS {
    ?host pps:P19 ?interaction.
     ?item ppt:P2 ?parent.
    ?interaction prov:wasDerivedFrom ?refnode.
    ?refnode ppsr:P27 ?doi.
    # check that no reference item already linked in this statement
    FILTER NOT EXISTS { ?refnode ppsr:P23 ?statedin }
    # check that no reference with this DOI already exists
    ?ref ppt:P18 pp:Q3;
        ppt:P13 ?refdoi.  
    FILTER (?doi NOT IN (?refdoi))
     # wikidata standardizes DOI in uppercase
    BIND (UCASE(STR(?doi)) AS ?DOI)
   }
   }
} AS %dois
   BIND (ENCODE_FOR_URI(REPLACE(STR(?item), ".*Q", "Q")) AS ?qid)
WHERE {
   SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
  INCLUDE %dois
  SERVICE <https://query.wikidata.org/sparql> {
    # find wikidata items with this DOI
    ?wdref wdt:P356 ?DOI;
          rdfs:label ?wdlabel.
    FILTER ( LANG(?wdlabel) = "en" )
  }
   BIND ("" AS ?qid)
  # Labels and aliases do not need to be surrounded by quotes
  # unless they contain double quotes or commas
  # Replace double quotes and commas in label because lazy
  BIND (REPLACE(REPLACE(STR(?wdlabel), '\"', ""), ",", "") AS ?Len)
  BIND (STR(?DOI) AS ?Aen)
   # Strings, external identifiers, and URLs must be surrounded by three double quotes
  # The Query Service CSV export automagically converts single quotes to triple
  BIND (CONCAT('\"', STR(?DOI), '\"') AS ?P13)
  BIND (CONCAT('\"', STR(?wdref), '\"') AS ?P2)
  BIND ("Q3" as ?P18) # instance of reference
}
}
</sparql>
</sparql>


===List all classes===
===Items with representative sequence record without reference===


<sparql tryit="1">
Representative sequence claims for a taxon should be referenced to ensure that the taxon concept is traceable.  
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
 
SELECT DISTINCT ?class ?classLabel  WHERE {
  ?item ppt:P18 ?class
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
</sparql>
 
===Taxon items without parent taxon===


<sparql tryit="1">
<sparql tryit="1">
PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX pps: <https://ppsdb.wikibase.cloud/prop/>


SELECT DISTINCT ?item ?qid ?itemLabel WHERE {
SELECT DISTINCT ?item ?itemLabel WHERE {
   ?item ppt:P18 pp:Q2.
   ?item pps:P34 ?repseq. # SSU rRNA sequence record
   FILTER NOT EXISTS {
  # or P46 for genome sequence
    ?item ppt:P29 ?parent.
   FILTER NOT EXISTS { ?repseq prov:wasDerivedFrom ?refnode. }
  }
   ?item rdfs:label ?itemLabel.
   BIND (ENCODE_FOR_URI(REPLACE(STR(?item), ".*Q", "Q")) AS ?qid)
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
}
</sparql>
</sparql>
Line 301: Line 505:
PREFIX ppsr: <https://ppsdb.wikibase.cloud/prop/reference/>
PREFIX ppsr: <https://ppsdb.wikibase.cloud/prop/reference/>


SELECT DISTINCT ?doi
SELECT DISTINCT ?host ?DOI
WITH {
  SELECT DISTINCT ?ref ?refdoi WHERE {
    ?ref ppt:P18 pp:Q3;
        ppt:P13 ?refdoi.
  }
} AS %refs
WHERE {
WHERE {
  INCLUDE %refs
   ?host pps:P19 ?interaction.
   ?host pps:P19 ?interaction.
   ?interaction prov:wasDerivedFrom ?refnode.
   ?interaction prov:wasDerivedFrom ?refnode.
   ?refnode ppsr:P27 ?doi.
   ?refnode ppsr:P27 ?doi.
  # check that no reference item already linked in this statement
   FILTER NOT EXISTS { ?refnode ppsr:P23 ?statedin }
   FILTER NOT EXISTS { ?refnode ppsr:P23 ?statedin }
   FILTER (?doi NOT IN (?refdoi))
   # wikidata standardizes DOI in uppercase
  BIND (UCASE(STR(?doi)) AS ?DOI)
  # check that no reference with this DOI already exists
  FILTER NOT EXISTS {
    ?ref ppt:P18 pp:Q3;
        ppt:P13 ?DOI.
  }
}
}
</sparql>
</sparql>
Line 329: Line 533:
SELECT ?hostCount ?statementCount ?symbiontCount ?unknownCount ?refCount WHERE {
SELECT ?hostCount ?statementCount ?symbiontCount ?unknownCount ?refCount WHERE {
   # distinct host taxa and 'interacts with' statements, including to unknown symbionts
   # distinct host taxa and 'interacts with' statements, including to unknown symbionts
   { SELECT (COUNT (DISTINCT ?host) AS ?hostCount) (COUNT (DISTINCT ?s) AS ?statementCount) WHERE { ?host pps:P19 ?s. } }
  # alternative paths to capture both "interacts with" (P19) and "interacts experimentally with" (P41)
   { SELECT (COUNT (DISTINCT ?host) AS ?hostCount) (COUNT (DISTINCT ?s) AS ?statementCount) WHERE { ?host pps:P19|pps:P41 ?s. } }
   # distinct named symbiont items
   # distinct named symbiont items
   { SELECT (COUNT (DISTINCT ?symbiont) AS ?symbiontCount) WHERE { ?h ppt:P19 ?symbiont . FILTER (!ISBLANK (?symbiont)) } }
   { SELECT (COUNT (DISTINCT ?symbiont) AS ?symbiontCount) WHERE { ?h ppt:P19|ppt:P41 ?symbiont . FILTER (!ISBLANK (?symbiont)) } }
   # statements where symbiont is unknown
   # statements where symbiont is unknown
   { SELECT (COUNT (DISTINCT ?symbiont) AS ?unknownCount) WHERE { ?h ppt:P19 ?symbiont . FILTER (ISBLANK (?symbiont)) } }
   { SELECT (COUNT (DISTINCT ?symbiont) AS ?unknownCount) WHERE { ?h ppt:P19|ppt:P41 ?symbiont . FILTER (ISBLANK (?symbiont)) } }
   # reference items
   # reference items
   { SELECT (COUNT (DISTINCT ?ref) AS ?refCount) WHERE { ?ref ppt:P18 pp:Q3 } }
   { SELECT (COUNT (DISTINCT ?ref) AS ?refCount) WHERE { ?ref ppt:P18 pp:Q3 } }
}
}
</sparql>
===Summary: Symbiont taxa by phylum, according to Wikidata===
NB: Wikidata taxonomy is sometimes conflicting/incomplete, especially for eukaryote higher taxonomy.
<sparql tryit="1">
PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX pps: <https://ppsdb.wikibase.cloud/prop/>
PREFIX ppss: <https://ppsdb.wikibase.cloud/prop/statement/>
PREFIX ppsq: <https://ppsdb.wikibase.cloud/prop/qualifier/>
PREFIX ppsr: <https://ppsdb.wikibase.cloud/prop/reference/>
SELECT DISTINCT ?parentPhylum ?parentPhylumLabel (COUNT (DISTINCT (?symb)) AS ?count )
WITH {
  SELECT DISTINCT ?symb ?symbLabel ?wditem WHERE {
    ?host ppt:P19|ppt:P41 ?symb.
    ?symb ppt:P29 ?parent.
    ?parent ppt:P2 ?wditem.
    SERVICE wikibase:label {
      bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en".
      ?symb rdfs:label ?symbLabel.
    }
  }
} AS %wd
WHERE {
  INCLUDE %wd
  SERVICE <https://query.wikidata.org/sparql> {
    OPTIONAL {
      VALUES ?phylumOrDivision { wd:Q334460 wd:Q38348 }
      ?wditem wdt:P171* ?parentPhylum.
      ?parentPhylum wdt:P105 ?phylumOrDivision.
      SERVICE wikibase:label {
        bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en".
        ?parentPhylum rdfs:label ?parentPhylumLabel.
      }
    }
  }
} GROUP BY ?parentPhylum ?parentPhylumLabel ORDER BY DESC(?count)
</sparql>
</sparql>


===Host taxa sorted by number of interactions===
===Host taxa sorted by number of interactions===


Treat these counts with a pinch of salt, because species concepts differ.
Counting natural interactions only. Treat these counts with a pinch of salt, because species concepts differ.


<sparql tryit="1">
<sparql tryit="1">
Line 353: Line 598:


===Symbiont taxa sorted by number of interactions===
===Symbiont taxa sorted by number of interactions===
Counting natural interactions only.


<sparql tryit="1">
<sparql tryit="1">
Line 361: Line 608:
   SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
   SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
} GROUP BY ?symbiont ?symbiontLabel ORDER BY DESC(?count)
} GROUP BY ?symbiont ?symbiontLabel ORDER BY DESC(?count)
</sparql>
===Top 20 references cited in interaction statements===
<sparql tryit="1">
PREFIX pps: <https://ppsdb.wikibase.cloud/prop/>
PREFIX ppss: <https://ppsdb.wikibase.cloud/prop/statement/>
PREFIX ppsq: <https://ppsdb.wikibase.cloud/prop/qualifier/>
PREFIX ppsr: <https://ppsdb.wikibase.cloud/prop/reference/>
SELECT ?statedin ?statedinLabel (COUNT(DISTINCT ?interaction) AS ?count) WHERE {
  ?host pps:P19 ?interaction.
  ?interaction ppss:P19 ?symbiont.
  ?interaction prov:wasDerivedFrom ?refnode.
  ?refnode ppsr:P23 ?statedin.
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
GROUP BY ?statedin ?statedinLabel
ORDER BY DESC(?count)
LIMIT 20
</sparql>
</sparql>

Latest revision as of 11:26, 22 July 2024

Showcase visualizations

Graph (network) of all hosts connected to Megaira polyxenophila and their other interactions

Megaira polyxenophila Q141 is an intracellular bacterium in a wide diversity of host taxa. This query includes sub-species of Megaira polyxenophila.

#defaultView:Graph
PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX pps: <https://ppsdb.wikibase.cloud/prop/>
PREFIX ppss: <https://ppsdb.wikibase.cloud/prop/statement/>
PREFIX ppsq: <https://ppsdb.wikibase.cloud/prop/qualifier/>

SELECT DISTINCT ?host ?hostLabel ?pic ?linkto ?linktoLabel ?edgeLabel WHERE {
  ?host pps:P19 ?s.
  ?s ppss:P19/ppt:P29* pp:Q141 # include items with parent taxon Q141
  OPTIONAL { 
    ?host pps:P19 ?statement.
    ?statement ppss:P19 ?linkto.
    OPTIONAL { ?statement ppsq:P20 ?edge }
  }
  OPTIONAL { ?host ppt:P33 ?pic }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}

Try it!


Bubble chart of numbers of interactions by symbiont localization

#defaultView:BubbleChart
PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX pps: <https://ppsdb.wikibase.cloud/prop/>
PREFIX ppss: <https://ppsdb.wikibase.cloud/prop/statement/>
PREFIX ppsq: <https://ppsdb.wikibase.cloud/prop/qualifier/>
PREFIX ppsr: <https://ppsdb.wikibase.cloud/prop/reference/>

SELECT DISTINCT ?localization ?localizationLabel (COUNT (DISTINCT ?interaction) AS ?count)  WHERE {
  ?host pps:P19 ?interaction.
  ?interaction ppss:P19 ?symbiont.
  ?interaction ppsq:P20 ?localization.
  FILTER (!ISBLANK (?localization))
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
} GROUP BY ?localization ?localizationLabel

Try it!


List interactions where host is a member of Blattodea (cockroaches and termites), according to Wikidata

PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>

SELECT DISTINCT ?host ?hostLabel ?symb ?symbLabel ?wditem 
WITH {
  SELECT DISTINCT ?wditem WHERE {
    SERVICE <https://query.wikidata.org/sparql> {
      ?wditem wdt:P171* wd:Q25309.
    }
  }
} AS %wd
WHERE {
  INCLUDE %wd
  ?host ppt:P19 ?symb.
  ?host ppt:P29 ?parent.
  ?parent ppt:P2 ?wditem. 
  ?host rdfs:label ?hostLabel.
  ?symb rdfs:label ?symbLabel.
} ORDER BY ?symbLabel ?hostLabel

Try it!


List interactions where symbiont is member of Alphaproteobacteria according to Wikidata

Get items with a parent taxon that is mapped to a taxon in Wikidata, relying on the taxonomy that is given there.

We rely on parent taxon being mapped to Wikidata, rather than the taxon itself being mapped to Wikidata, because not all taxon items represented in this Wikibase are suitable for being added to Wikidata, e.g. informally named taxonomic units, even though there is some information on their taxonomic position.

Query works without timeout even with large taxa like Bacteria.

PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>

SELECT DISTINCT ?host ?hostLabel ?symb ?symbLabel ?wditem 
WITH {
  SELECT DISTINCT ?wditem WHERE {
    SERVICE <https://query.wikidata.org/sparql> {
      ?wditem wdt:P171* wd:Q306579.
    }
  }
} AS %wd
WHERE {
  INCLUDE %wd
  ?host ppt:P19 ?symb.
  ?symb ppt:P29 ?parent.
  ?parent ppt:P2 ?wditem. 
  ?host rdfs:label ?hostLabel.
  ?symb rdfs:label ?symbLabel.
} ORDER BY ?symbLabel ?hostLabel

Try it!


List symbionts localized in host nucleus and their taxonomic class

Placing Wikidata search statement in the WITH or WHERE clause appears to depend on how large the expected results are. Want to avoid time-outs on the Wikidata query.

PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX pps: <https://ppsdb.wikibase.cloud/prop/>
PREFIX ppss: <https://ppsdb.wikibase.cloud/prop/statement/>
PREFIX ppsq: <https://ppsdb.wikibase.cloud/prop/qualifier/>
PREFIX ppsr: <https://ppsdb.wikibase.cloud/prop/reference/>

SELECT DISTINCT ?host ?hostLabel ?symbiont ?symbiontLabel ?parentClass ?parentClassLabel ?localizationLabel
WITH {
  SELECT DISTINCT ?host ?hostLabel ?symbiont ?symbiontLabel ?wditem ?localizationLabel WHERE { 
  ?host pps:P19 ?interaction.
  ?interaction ppss:P19 ?symbiont.
  ?symbiont ppt:P29 ?parent.
  ?parent ppt:P2 ?wditem.
  ?interaction ppsq:P20 ?localization.
  # nucleus or subclass of or part of nucleus
  ?localization ppt:P24*/ppt:P47* pp:Q78
  SERVICE wikibase:label {
    bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". 
    ?host rdfs:label ?hostLabel.
    ?symbiont rdfs:label ?symbiontLabel.
    ?localization rdfs:label ?localizationLabel.
  }
  }
} AS %wd
WHERE {
  INCLUDE %wd
  SERVICE <https://query.wikidata.org/sparql> {
    OPTIONAL {
      ?wditem wdt:P171* ?parentClass.
      ?parentClass wdt:P105 wd:Q37517.
      SERVICE wikibase:label {
        bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". 
        ?parentClass rdfs:label ?parentClassLabel. 
      }
    }
  }
} ORDER BY ?parentClassLabel ?hostLabel

Try it!


List interactions where publication co-authored by Vittorio Boscaro is cited

Example of using a federated query with Wikidata items.

PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX pps: <https://ppsdb.wikibase.cloud/prop/>
PREFIX ppss: <https://ppsdb.wikibase.cloud/prop/statement/>
PREFIX ppsq: <https://ppsdb.wikibase.cloud/prop/qualifier/>
PREFIX ppsr: <https://ppsdb.wikibase.cloud/prop/reference/>

SELECT DISTINCT ?host ?hostLabel ?symb ?symbLabel ?statedin ?wdref
WITH {
  SELECT DISTINCT ?host ?hostLabel ?symb ?symbLabel ?statedin ?wdref 
  WHERE {
    ?host pps:P19 ?interaction.
    ?host rdfs:label ?hostLabel.
    ?interaction ppss:P19 ?symb. 
    ?symb rdfs:label ?symbLabel. 
    ?interaction prov:wasDerivedFrom ?refnode.
    ?refnode ppsr:P23 ?statedin.
    # Wikidata URI
    ?statedin ppt:P2 ?wdref.
  }
} AS %refs
WHERE {
  INCLUDE %refs
  SERVICE <https://query.wikidata.org/sparql> {
    # Wikidata items with author Q30513710
    ?wdref wdt:P50 wd:Q30513710.
  }
}

Try it!


List interactions characterized by FISH but not sequencing

PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX pps: <https://ppsdb.wikibase.cloud/prop/>
PREFIX ppss: <https://ppsdb.wikibase.cloud/prop/statement/>
PREFIX ppsq: <https://ppsdb.wikibase.cloud/prop/qualifier/>

SELECT DISTINCT ?host ?hostLabel ?symb ?symbLabel WHERE {
  ?host pps:P19 ?s.
  ?s ppsq:P22 pp:Q10. # FISH
  ?s ppss:P19 ?symb.
  # Use this form because an interaction may be documented in multiple statements
  FILTER NOT EXISTS {
    ?host pps:P19 ?t.
    ?t ppss:P19 ?symb.
    ?t ppsq:P22 pp:Q13. # phylogenetic marker sequencing
  }
  FILTER NOT EXISTS {
    ?host pps:P19 ?u.
    ?u ppss:P19 ?symb.
    ?u ppsq:P22 pp:Q12. # metagenome sequencing
  }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}

Try it!


Lists

List all taxon items

PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>

SELECT DISTINCT ?item ?itemLabel WHERE {
  ?item ppt:P18/ppt:P24* pp:Q2.
  # FILTER NOT EXISTS { ?item ppt:P18 pp:Q1488. } # optionally exclude higher taxa
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}

Try it!


List all symbiotic hosts and their environmental terms

Note that some entries have multiple claims for a single environment-related property, so the number of results will be inflated by the distinct combinations.

PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>

SELECT DISTINCT ?item ?itemLabel ?envmat ?envmatLabel ?envloc ?envlocLabel ?envsys ?envsysLabel WHERE {
  ?item ppt:P19 ?symbiont.
  OPTIONAL { ?item ppt:P36 ?envmat }
  OPTIONAL { ?item ppt:P40 ?envloc } 
  OPTIONAL { ?item ppt:P38 ?envsys } 
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
} ORDER BY ?itemLabel

Try it!


List all interaction types and RO mapping if available

PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>

SELECT DISTINCT ?item ?itemLabel ?roterm WHERE {
  ?item ppt:P18 pp:Q1.
  OPTIONAL {
    ?item ppt:P16 ?roterm
  }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}

Try it!


List all body parts and GO mapping if available

PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>

SELECT DISTINCT ?item ?itemLabel ?goterm WHERE {
  ?item ppt:P18 pp:Q5.
  OPTIONAL {
    ?item ppt:P17 ?goterm
  }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}

Try it!


List all analytical techniques and Wikidata mapping if available

PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>

SELECT DISTINCT ?item ?itemLabel ?wikidata WHERE {
  ?item ppt:P18 pp:Q6.
  OPTIONAL {
    ?item ppt:P2 ?wikidata
  }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}

Try it!


List all interactions, optionally the localization, interaction type, and references

PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX pps: <https://ppsdb.wikibase.cloud/prop/>
PREFIX ppss: <https://ppsdb.wikibase.cloud/prop/statement/>
PREFIX ppsq: <https://ppsdb.wikibase.cloud/prop/qualifier/>
PREFIX ppsr: <https://ppsdb.wikibase.cloud/prop/reference/>

SELECT DISTINCT ?host ?hostLabel ?typeLabel ?symbiontLabel ?localizationLabel ?doi ?statedin WHERE {
  ?host pps:P19 ?interaction.
  ?interaction ppss:P19 ?symbiont.
  OPTIONAL { ?interaction ppsq:P20 ?localization. }
  OPTIONAL { ?interaction ppsq:P26 ?type. }
  OPTIONAL {
    ?interaction prov:wasDerivedFrom ?refnode.
    OPTIONAL { ?refnode ppsr:P27 ?doi }
    OPTIONAL { ?refnode ppsr:P23 ?statedin }
  }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}

Try it!


Count references and qualifier statements for each interaction

PREFIX pps: <https://ppsdb.wikibase.cloud/prop/>
PREFIX ppss: <https://ppsdb.wikibase.cloud/prop/statement/>
PREFIX ppsq: <https://ppsdb.wikibase.cloud/prop/qualifier/>
PREFIX ppsr: <https://ppsdb.wikibase.cloud/prop/reference/>
PREFIX wikibase: <http://wikiba.se/ontology#>
PREFIX prov: <http://www.w3.org/ns/prov#>
PREFIX bd: <http://www.bigdata.com/rdf#>
SELECT ?host ?hostLabel ?symbiont ?symbiontLabel (COUNT(DISTINCT ?refnode) AS ?refnodeCount) (COUNT(DISTINCT ?localization) AS ?localizationCount) (COUNT(DISTINCT ?type) AS ?typeCount) (COUNT(DISTINCT ?methobj) AS ?methobjCount) (COUNT(DISTINCT ?methsub) AS ?methsubCount) WHERE {
  ?host pps:P19 ?interaction.
  ?interaction ppss:P19 ?symbiont.
  OPTIONAL { ?interaction ppsq:P22 ?methobj. }
  OPTIONAL { ?interaction ppsq:P42 ?methsub. }
  OPTIONAL { ?interaction ppsq:P20 ?localization. }
  OPTIONAL { ?interaction ppsq:P26 ?type. }
  OPTIONAL {
    ?interaction prov:wasDerivedFrom ?refnode.
    OPTIONAL { ?refnode ppsr:P27 ?doi. }
    OPTIONAL { ?refnode ppsr:P23 ?statedin. }
  }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
GROUP BY ?host ?hostLabel ?symbiont ?symbiontLabel
ORDER BY ?hostLabel

Try it!


Maintenance queries

Generate QuickStatements to create new parent taxon items for symbionts based on Wikidata mapping

PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>

SELECT DISTINCT ?qid ?Len ?P18 ?P2
WITH {
  SELECT DISTINCT ?symb ?symbLabel ?wditem 
  WHERE {
    ?host ppt:P19 ?symb.
    FILTER NOT EXISTS { ?symb ppt:P29 ?parent. }
    ?symb ppt:P2 ?wditem.
    ?symb rdfs:label ?symbLabel.
  }
} AS %in
WHERE {
  INCLUDE %in
  SERVICE <https://query.wikidata.org/sparql> {
    ?wditem wdt:P171 ?wdparent.
    ?wdparent rdfs:label ?wdparentlabel.
    FILTER ( LANG(?wdparentlabel) = "en" )
  }
  # Check that a parent taxon item with the same wikidata mapping
  # does not already exist
  FILTER NOT EXISTS { ?qid ppt:P2 ?wdparent }
  BIND (CONCAT('"', STR(?wdparent), '"') as ?P2)
  BIND (STR("Q1488") AS ?P18) # instance of higher taxon 
  BIND (?wdparentlabel as ?Len)
} ORDER BY ?Len

Try it!


Generate QuickStatements to link symbiont items to their parent taxa based on Wikidata mappings

PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>

SELECT DISTINCT ?qid ?P29
WITH {
  SELECT DISTINCT ?symb ?symbLabel ?wditem ?qid
  WHERE {
    ?host ppt:P19 ?symb.
    FILTER NOT EXISTS { ?symb ppt:P29 ?parent. }
    ?symb ppt:P2 ?wditem.
    ?symb rdfs:label ?symbLabel.
    BIND (ENCODE_FOR_URI(REPLACE(STR(?symb), ".*Q", "Q")) AS ?qid)
  }
} AS %in
WHERE {
  INCLUDE %in
  SERVICE <https://query.wikidata.org/sparql> {
    ?wditem wdt:P171 ?wdparent.
    ?wdparent rdfs:label ?wdparentlabel.
    FILTER ( LANG(?wdparentlabel) = "en" )
  }
  OPTIONAL { 
    ?parent ppt:P2 ?wdparent.
    ?parent rdfs:label ?parentLabel.
    BIND (ENCODE_FOR_URI(REPLACE(STR(?parent), ".*Q", "Q")) AS ?P29)
  }
} ORDER BY ?Len

Try it!


Graph of entity subclasses up to 2nd order

#defaultView:Graph
PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX pps: <https://ppsdb.wikibase.cloud/prop/>
PREFIX ppss: <https://ppsdb.wikibase.cloud/prop/statement/>
PREFIX ppsq: <https://ppsdb.wikibase.cloud/prop/qualifier/>
PREFIX gas: <http://www.bigdata.com/rdf/gas#>

SELECT DISTINCT ?item ?itemLabel ?linkTo ?linkToLabel WHERE {
  SERVICE gas:service {
    gas:program gas:gasClass "com.bigdata.rdf.graph.analytics.SSSP" ;
                gas:in pp:Q610;
                gas:traversalDirection "Reverse" ;
                gas:out ?item ;
                gas:out1 ?depth ;
                gas:maxIterations 2 ;
                gas:linkType ppt:P24 .
  }
  OPTIONAL { ?item ppt:P24 ?linkTo }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}

Try it!


Taxon items without parent taxon

Results should be empty.

PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>

SELECT DISTINCT ?item ?qid ?itemLabel WHERE {
  ?item ppt:P18/ppt:P24* pp:Q2.
  FILTER NOT EXISTS { ?item ppt:P18 pp:Q1488. } # not a higher taxon
  FILTER NOT EXISTS {
    ?item ppt:P29 ?parent.
  }
  BIND (ENCODE_FOR_URI(REPLACE(STR(?item), ".*Q", "Q")) AS ?qid)
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}

Try it!


Formally named taxon items not mapped to Wikidata

Should map to Wikidata URI with Property:P2; create taxon item on Wikidata if necessary.

PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>

SELECT DISTINCT ?item ?qid ?itemLabel WHERE {
  ?item ppt:P18 pp:Q2206.
  #FILTER NOT EXISTS { ?item ppt:P18 pp:Q1488. } # not a higher taxon
  FILTER NOT EXISTS {
    ?item ppt:P2 ?parent.
  }
  BIND (ENCODE_FOR_URI(REPLACE(STR(?item), ".*Q", "Q")) AS ?qid)
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}

Try it!


Items with representative sequence record without reference

Representative sequence claims for a taxon should be referenced to ensure that the taxon concept is traceable.

PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX pps: <https://ppsdb.wikibase.cloud/prop/>

SELECT DISTINCT ?item ?itemLabel WHERE {
  ?item pps:P34 ?repseq. # SSU rRNA sequence record
  # or P46 for genome sequence
  FILTER NOT EXISTS { ?repseq prov:wasDerivedFrom ?refnode. }
  ?item rdfs:label ?itemLabel.
}

Try it!


Items that are in a maintenance class

PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>

SELECT DISTINCT ?item ?itemLabel ?classLabel  WHERE {
  ?item ppt:P18 ?class.
  ?class ppt:P24* pp:Q657
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}

Try it!


DOIs used in reference qualifiers but not in a reference item

PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX pps: <https://ppsdb.wikibase.cloud/prop/>
PREFIX ppss: <https://ppsdb.wikibase.cloud/prop/statement/>
PREFIX ppsq: <https://ppsdb.wikibase.cloud/prop/qualifier/>
PREFIX ppsr: <https://ppsdb.wikibase.cloud/prop/reference/>

SELECT DISTINCT ?host ?DOI 
WHERE {
  ?host pps:P19 ?interaction.
  ?interaction prov:wasDerivedFrom ?refnode.
  ?refnode ppsr:P27 ?doi.
  # check that no reference item already linked in this statement
  FILTER NOT EXISTS { ?refnode ppsr:P23 ?statedin }
  # wikidata standardizes DOI in uppercase
  BIND (UCASE(STR(?doi)) AS ?DOI)
  # check that no reference with this DOI already exists
  FILTER NOT EXISTS {
    ?ref ppt:P18 pp:Q3;
         ppt:P13 ?DOI. 
  }
}

Try it!


Statistics

Summary: Counts of hosts, symbionts, interactions, references

PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX pps: <https://ppsdb.wikibase.cloud/prop/>

SELECT ?hostCount ?statementCount ?symbiontCount ?unknownCount ?refCount WHERE {
  # distinct host taxa and 'interacts with' statements, including to unknown symbionts
  # alternative paths to capture both "interacts with" (P19) and "interacts experimentally with" (P41)
  { SELECT (COUNT (DISTINCT ?host) AS ?hostCount) (COUNT (DISTINCT ?s) AS ?statementCount) WHERE { ?host pps:P19|pps:P41 ?s. } }
  # distinct named symbiont items
  { SELECT (COUNT (DISTINCT ?symbiont) AS ?symbiontCount) WHERE { ?h ppt:P19|ppt:P41 ?symbiont . FILTER (!ISBLANK (?symbiont)) } }
  # statements where symbiont is unknown
  { SELECT (COUNT (DISTINCT ?symbiont) AS ?unknownCount) WHERE { ?h ppt:P19|ppt:P41 ?symbiont . FILTER (ISBLANK (?symbiont)) } }
  # reference items
  { SELECT (COUNT (DISTINCT ?ref) AS ?refCount) WHERE { ?ref ppt:P18 pp:Q3 } }
}

Try it!


Summary: Symbiont taxa by phylum, according to Wikidata

NB: Wikidata taxonomy is sometimes conflicting/incomplete, especially for eukaryote higher taxonomy.

PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX pps: <https://ppsdb.wikibase.cloud/prop/>
PREFIX ppss: <https://ppsdb.wikibase.cloud/prop/statement/>
PREFIX ppsq: <https://ppsdb.wikibase.cloud/prop/qualifier/>
PREFIX ppsr: <https://ppsdb.wikibase.cloud/prop/reference/>

SELECT DISTINCT ?parentPhylum ?parentPhylumLabel (COUNT (DISTINCT (?symb)) AS ?count )
WITH {
  SELECT DISTINCT ?symb ?symbLabel ?wditem WHERE { 
    ?host ppt:P19|ppt:P41 ?symb.
    ?symb ppt:P29 ?parent.
    ?parent ppt:P2 ?wditem.
    SERVICE wikibase:label {
      bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". 
      ?symb rdfs:label ?symbLabel.
    }
  }
} AS %wd
WHERE {
  INCLUDE %wd
  SERVICE <https://query.wikidata.org/sparql> {
    OPTIONAL {
      VALUES ?phylumOrDivision { wd:Q334460 wd:Q38348 }
      ?wditem wdt:P171* ?parentPhylum.
      ?parentPhylum wdt:P105 ?phylumOrDivision.
      SERVICE wikibase:label {
        bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". 
        ?parentPhylum rdfs:label ?parentPhylumLabel. 
      }
    }
  }
} GROUP BY ?parentPhylum ?parentPhylumLabel ORDER BY DESC(?count)

Try it!


Host taxa sorted by number of interactions

Counting natural interactions only. Treat these counts with a pinch of salt, because species concepts differ.

PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>

SELECT  ?host ?hostLabel (COUNT (DISTINCT ?symbiont) AS ?count) WHERE {
  ?host ppt:P19 ?symbiont.
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
} GROUP BY ?host ?hostLabel ORDER BY DESC(?count)

Try it!


Symbiont taxa sorted by number of interactions

Counting natural interactions only.

PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>

SELECT  ?symbiont ?symbiontLabel (COUNT (DISTINCT ?host) AS ?count) WHERE {
  ?host ppt:P19 ?symbiont.
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
} GROUP BY ?symbiont ?symbiontLabel ORDER BY DESC(?count)

Try it!


Top 20 references cited in interaction statements

PREFIX pps: <https://ppsdb.wikibase.cloud/prop/>
PREFIX ppss: <https://ppsdb.wikibase.cloud/prop/statement/>
PREFIX ppsq: <https://ppsdb.wikibase.cloud/prop/qualifier/>
PREFIX ppsr: <https://ppsdb.wikibase.cloud/prop/reference/>

SELECT ?statedin ?statedinLabel (COUNT(DISTINCT ?interaction) AS ?count) WHERE {
  ?host pps:P19 ?interaction.
  ?interaction ppss:P19 ?symbiont.
  ?interaction prov:wasDerivedFrom ?refnode.
  ?refnode ppsr:P23 ?statedin.
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
GROUP BY ?statedin ?statedinLabel
ORDER BY DESC(?count)
LIMIT 20

Try it!