Project:SPARQL/other: Difference between revisions

From Protist-Prokaryote Symbiosis Database
Jump to navigation Jump to search
No edit summary
No edit summary
 
Line 1: Line 1:
===Export for GloBI===
SPARQL query to export table with fields used by Globi. The output may need to be processed further.
<sparql tryit="1">
#List all interactions, optionally the localization, interaction type, and references
PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX pps: <https://ppsdb.wikibase.cloud/prop/>
PREFIX ppss: <https://ppsdb.wikibase.cloud/prop/statement/>
PREFIX ppsq: <https://ppsdb.wikibase.cloud/prop/qualifier/>
PREFIX ppsr: <https://ppsdb.wikibase.cloud/prop/reference/>
#SELECT DISTINCT ?argumentTypeName ?sourceTaxon ?sourceTaxonName ?sourceWdmap ?sourceTaxonId ?interactionTypeName ?interactionTypeId ?targetTaxon ?targetTaxonName ?targetWdmap ?targetTaxonId ?sourceBodyPartName ?sourceBodyPartId ?referenceDoi ?referenceCitation WHERE {
SELECT DISTINCT ?argumentTypeName ?sourceTaxonName ?sourceTaxonId ?interactionTypeName ?interactionTypeId ?targetTaxonName ?targetTaxonId ?sourceBodyPartName ?sourceBodyPartId ?referenceDoi ?referenceCitation WHERE {
  ?sourceTaxon pps:P19 ?interaction.
  ?interaction ppss:P19 ?targetTaxon.
  OPTIONAL {
    ?interaction ppsq:P20 ?sourceBodyPart.
    ?sourceBodyPart rdfs:label ?sourceBodyPartName.
    OPTIONAL { ?sourceBodyPart ppt:P17 ?sourceBodyPartId. }
    OPTIONAL { ?sourceBodyPart ppt:P44 ?sourceBodyPartId. }
  }
  OPTIONAL {
    ?interaction ppsq:P26 ?type.
    ?type rdfs:label ?typeLabel.
    OPTIONAL { ?type ppt:P16 ?interactionTypeId. }
  }
  # if no interaction type is given, then default to "host of"
  BIND (EXISTS { ?interaction ppsq:P26 ?type. } AS ?existsType )
  BIND (IF(?existsType, ?typeLabel, "host of") AS ?interactionTypeName)
  OPTIONAL {
    ?interaction prov:wasDerivedFrom ?refnode.
    # OPTIONAL { ?refnode ppsr:P27 ?doi }
    OPTIONAL {
      ?refnode ppsr:P23 ?statedIn.
      OPTIONAL { ?statedIn ppt:P13 ?referenceDoi. }
      OPTIONAL { ?statedIn ppt:P14 ?referenceCitation. }
      BIND (STR("support") AS ?argumentTypeName)
    }
    OPTIONAL {
      ?refnode ppsr:P43 ?statedIn.
      OPTIONAL { ?statedIn ppt:P13 ?referenceDoi. }
      OPTIONAL { ?statedIn ppt:P14 ?referenceCitation. }
      BIND (STR("refute") AS ?argumentTypeName)
    }
  }
  OPTIONAL {
    ?sourceTaxon ppt:P11 ?sourceTaxon_ncbi.
    BIND ( CONCAT("NCBI:txid", STR(?sourceTaxon_ncbi)) as ?sourceTaxonId )
  }
  OPTIONAL {
    ?targetTaxon ppt:P11 ?targetTaxon_ncbi.
    BIND ( CONCAT("NCBI:txid", STR(?targetTaxon_ncbi)) as ?targetTaxonId )
  }
  ?sourceTaxon rdfs:label ?sourceTaxonName .
  OPTIONAL { ?targetTaxon rdfs:label ?targetTaxonName. }
  OPTIONAL { ?sourceTaxon ppt:P2 ?sourceWdmap . }
  OPTIONAL { ?targetTaxon ppt:P2 ?targetWdmap . }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
} ORDER BY ?sourceTaxonName ?targetTaxonName
</sparql>


===Draw taxonomy tree for Eukaryota (9-rank PR2 v5.0.0 taxonomy)===
===Draw taxonomy tree for Eukaryota (9-rank PR2 v5.0.0 taxonomy)===
Line 42: Line 107:
</sparql>
</sparql>


==Deprecated queries==


===Generate QuickStatements to link interaction hosts to their genera (DEPRECATED)===
===Generate QuickStatements to link interaction hosts to their genera (DEPRECATED)===

Latest revision as of 09:13, 18 July 2024

Export for GloBI

SPARQL query to export table with fields used by Globi. The output may need to be processed further.

#List all interactions, optionally the localization, interaction type, and references
PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX pps: <https://ppsdb.wikibase.cloud/prop/>
PREFIX ppss: <https://ppsdb.wikibase.cloud/prop/statement/>
PREFIX ppsq: <https://ppsdb.wikibase.cloud/prop/qualifier/>
PREFIX ppsr: <https://ppsdb.wikibase.cloud/prop/reference/>


#SELECT DISTINCT ?argumentTypeName ?sourceTaxon ?sourceTaxonName ?sourceWdmap ?sourceTaxonId ?interactionTypeName ?interactionTypeId ?targetTaxon ?targetTaxonName ?targetWdmap ?targetTaxonId ?sourceBodyPartName ?sourceBodyPartId ?referenceDoi ?referenceCitation WHERE {
SELECT DISTINCT ?argumentTypeName ?sourceTaxonName ?sourceTaxonId ?interactionTypeName ?interactionTypeId ?targetTaxonName ?targetTaxonId ?sourceBodyPartName ?sourceBodyPartId ?referenceDoi ?referenceCitation WHERE {
  ?sourceTaxon pps:P19 ?interaction.
  ?interaction ppss:P19 ?targetTaxon.
  OPTIONAL {
    ?interaction ppsq:P20 ?sourceBodyPart. 
    ?sourceBodyPart rdfs:label ?sourceBodyPartName.
    OPTIONAL { ?sourceBodyPart ppt:P17 ?sourceBodyPartId. }
    OPTIONAL { ?sourceBodyPart ppt:P44 ?sourceBodyPartId. }
  }
  OPTIONAL {
    ?interaction ppsq:P26 ?type. 
    ?type rdfs:label ?typeLabel. 
    OPTIONAL { ?type ppt:P16 ?interactionTypeId. }
  }
  # if no interaction type is given, then default to "host of"
  BIND (EXISTS { ?interaction ppsq:P26 ?type. } AS ?existsType )
  BIND (IF(?existsType, ?typeLabel, "host of") AS ?interactionTypeName)
  OPTIONAL {
    ?interaction prov:wasDerivedFrom ?refnode.
    # OPTIONAL { ?refnode ppsr:P27 ?doi }
    OPTIONAL {
      ?refnode ppsr:P23 ?statedIn.
      OPTIONAL { ?statedIn ppt:P13 ?referenceDoi. }
      OPTIONAL { ?statedIn ppt:P14 ?referenceCitation. }
      BIND (STR("support") AS ?argumentTypeName)
    }
    OPTIONAL {
      ?refnode ppsr:P43 ?statedIn.
      OPTIONAL { ?statedIn ppt:P13 ?referenceDoi. }
      OPTIONAL { ?statedIn ppt:P14 ?referenceCitation. }
      BIND (STR("refute") AS ?argumentTypeName)
    }
  }
  OPTIONAL {
    ?sourceTaxon ppt:P11 ?sourceTaxon_ncbi. 
    BIND ( CONCAT("NCBI:txid", STR(?sourceTaxon_ncbi)) as ?sourceTaxonId )
  }
  OPTIONAL {
    ?targetTaxon ppt:P11 ?targetTaxon_ncbi. 
    BIND ( CONCAT("NCBI:txid", STR(?targetTaxon_ncbi)) as ?targetTaxonId )
  }
  ?sourceTaxon rdfs:label ?sourceTaxonName .
  OPTIONAL { ?targetTaxon rdfs:label ?targetTaxonName. }
  OPTIONAL { ?sourceTaxon ppt:P2 ?sourceWdmap . }
  OPTIONAL { ?targetTaxon ppt:P2 ?targetWdmap . }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
} ORDER BY ?sourceTaxonName ?targetTaxonName

Try it!



Draw taxonomy tree for Eukaryota (9-rank PR2 v5.0.0 taxonomy)

#defaultView:Tree
PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>

SELECT ?root ?rootLabel ?item ?itemLabel ?item2 ?item2Label ?item3 ?item3Label ?item4 ?item4Label ?item5 ?item5Label ?item6 ?item6Label ?item7 ?item7Label ?item8 ?item8Label ?item9 ?item9Label WHERE {
  BIND(pp:Q609 AS ?root)
  BIND(ppt:P29 AS ?property)
  ?item ?property ?root.
  OPTIONAL {
    ?item2 ?property ?item.
    OPTIONAL {
      ?item3 ?property ?item2.
      OPTIONAL {
        ?item4 ?property ?item3.
        OPTIONAL {
          ?item5 ?property ?item4.
          OPTIONAL {
            ?item6 ?property ?item5.
            OPTIONAL {
              ?item7 ?property ?item6.
              OPTIONAL {
                ?item8 ?property ?item7.
                OPTIONAL {
                  ?item9 ?property ?item8.
                }
              }
            }
          }
        }
      }
    }
  }
  SERVICE wikibase:label {
    bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en".
  }
}
ORDER BY (?itemLabel) (?item2Label) (?item3Label) (?item4Label) (?item5Label) (?item6Label) (?item7Label) (?item8Label) (?item9Label)

Try it!


Deprecated queries

Generate QuickStatements to link interaction hosts to their genera (DEPRECATED)

Split labels, e.g. "Paramecium caudatum" on whitespace, take the first word as a putative genus name, and look for items with taxon rank "genus" that match the putative genus name. Have to avoid assigning labels like "Cryptotermes symbiont", instead use "symbiont of Cryptotermes" to avoid false matches.

PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX wikibase: <http://wikiba.se/ontology#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX bd: <http://www.bigdata.com/rdf#>
SELECT DISTINCT ?qid ?P29 WHERE {
  {
    SELECT DISTINCT ?item ?qid ?itemLabel ?maybeGenus WHERE {
      ?item ppt:P19 ?symbiont.
      FILTER(NOT EXISTS { ?item ppt:P29 ?parent. })
      BIND(ENCODE_FOR_URI(REPLACE(STR(?item), ".*Q", "Q")) AS ?qid)
      SERVICE wikibase:label {
        bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en".
        ?item rdfs:label ?itemLabel.
      }
      BIND(STRBEFORE(?itemLabel, " ") AS ?maybeGenus)
    }
  }
  {
    SELECT DISTINCT ?item2 ?item2Label WHERE {
      ?item2 ppt:P32 pp:Q601;
        rdfs:label ?item2Label.
      FILTER((LANG(?item2Label)) = "en")
    }
  }
  FILTER(?item2Label = ?maybeGenus)
  BIND(ENCODE_FOR_URI(REPLACE(STR(?item), ".*Q", "Q")) AS ?qid)
  BIND(ENCODE_FOR_URI(REPLACE(STR(?item2), ".*Q", "Q")) AS ?P29)
}

Try it!


Generate QuickStatements to create items for DOIs found in references without corresponding items (DEPRECATED)

This checks for items in Wikidata with the same DOI. If references are not in Wikidata, consider adding them first with Scholia or Sourcemd.

Use ppsdb-utils instead.

PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX pps: <https://ppsdb.wikibase.cloud/prop/>
PREFIX ppss: <https://ppsdb.wikibase.cloud/prop/statement/>
PREFIX ppsq: <https://ppsdb.wikibase.cloud/prop/qualifier/>
PREFIX ppsr: <https://ppsdb.wikibase.cloud/prop/reference/>

SELECT DISTINCT ?qid ?Len ?Aen ?P18 ?P13 ?P2
WITH {
  SELECT DISTINCT ?host ?DOI 
  WHERE {
    ?host pps:P19 ?interaction.
    ?interaction prov:wasDerivedFrom ?refnode.
    ?refnode ppsr:P27 ?doi.
    # check that no reference item already linked in this statement
    FILTER NOT EXISTS { ?refnode ppsr:P23 ?statedin }
    # wikidata standardizes DOI in uppercase
    BIND (UCASE(STR(?doi)) AS ?DOI)
    # check that no reference with this DOI already exists
    FILTER NOT EXISTS {
      ?ref ppt:P18 pp:Q3;
           ppt:P13 ?DOI. 
    }
  }
} AS %dois
WHERE {
  INCLUDE %dois
  SERVICE <https://query.wikidata.org/sparql> {
    # find wikidata items with this DOI
    ?wdref wdt:P356 ?DOI;
           rdfs:label ?wdlabel.
    FILTER ( LANG(?wdlabel) = "en" )
  }
  BIND ("" AS ?qid)
  # Labels and aliases do not need to be surrounded by quotes
  # unless they contain double quotes or commas
  # Replace double quotes and commas in label because lazy
  BIND (REPLACE(REPLACE(STR(?wdlabel), '\"', ""), ",", "") AS ?Len) 
  BIND (STR(?DOI) AS ?Aen)
  # Strings, external identifiers, and URLs must be surrounded by three double quotes
  # The Query Service CSV export automagically converts single quotes to triple
  BIND (CONCAT('\"', STR(?DOI), '\"') AS ?P13)
  BIND (CONCAT('\"', STR(?wdref), '\"') AS ?P2)
  BIND ("Q3" as ?P18) # instance of reference
}

Try it!


Generate QuickStatements to link taxa to Wikidata items by matching NCBI taxon ID (DEPRECATED)

PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX pps: <https://ppsdb.wikibase.cloud/prop/>
PREFIX ppss: <https://ppsdb.wikibase.cloud/prop/statement/>
PREFIX ppsq: <https://ppsdb.wikibase.cloud/prop/qualifier/>
PREFIX ppsr: <https://ppsdb.wikibase.cloud/prop/reference/>

SELECT DISTINCT ?qid ?P2
WITH {
  SELECT DISTINCT ?ppitem ?qid ?ncbi ?pplabel
  WHERE {
    ?ppitem ppt:P11 ?ncbi.
    ?ppitem rdfs:label ?pplabel.
    FILTER NOT EXISTS { ?ppitem ppt:P2 ?wikidata_mapping. }
    BIND (ENCODE_FOR_URI(REPLACE(STR(?ppitem), ".*Q", "Q")) AS ?qid)
  }
} AS %ncbis
WHERE {
  INCLUDE %ncbis
  SERVICE <https://query.wikidata.org/sparql> {
    ?wditem wdt:P685 ?ncbi;
           rdfs:label ?wdlabel.
    FILTER ( LANG(?wdlabel) = "en" )
    BIND (CONCAT('\"', STR(?wditem), '\"') AS ?P2)
  }
}

Try it!