ITIS
The Plant List (TPL)
Catalogue of Life (COL)
By default src_* functions use a path to the cached database file. You can alternatively pass in your own path if you’ve put it somewhere else.
ITIS
TPL
COL
sql_collect(src_itis, "select * from hierarchy limit 5")
#> # A tibble: 5 x 5
#> hierarchy_string tsn parent_tsn level childrencount
#> * <chr> <int> <int> <int> <int>
#> 1 202422 202422 0 0 154282
#> 2 202422-846491 846491 202422 1 2666
#> 3 202422-846491-660046 660046 846491 2 2654
#> 4 202422-846491-660046-846497 846497 660046 3 7
#> 5 202422-846491-660046-846497-846508 846508 846497 4 6get a tbl
hiers <- src_itis %>% tbl("hierarchy")
#> # Source: table<hierarchy> [?? x 5]
#> # Database: postgres 9.6.0 [sacmac@localhost:5432/ITIS]
#> hierarchy_string tsn parent_tsn level childrencount
#> <chr> <int> <int> <int> <int>
#> 1 202422 202422 0 0 154282
#> 2 202422-846491 846491 202422 1 2666
#> 3 202422-846491-660046 660046 846491 2 2654
#> 4 202422-846491-660046-846497 846497 660046 3 7
#> 5 202422-846491-660046-846497-846508 846508 846497 4 6
#> 6 202422-846491-660046-846497-846508-846553 846553 846508 5 5
#> 7 202422-846491-660046-846497-846508-846553-954935 954935 846553 6 3
#> 8 202422-846491-660046-846497-846508-846553-954935-5549 5549 954935 7 2
#> 9 202422-846491-660046-846497-846508-846553-954935-5549-5550 5550 5549 8 0
#> 10 202422-846491-660046-846497-846508-846553-954936 954936 846553 6 0
#> # ... with more rowsselect certain fields
hiers %>% select(TSN, level)
#> # Source: lazy query [?? x 2]
#> # Database: postgres 9.6.0 [sacmac@localhost:5432/ITIS]
#> tsn level
#> <int> <int>
#> 1 202422 0
#> 2 846491 1
#> 3 660046 2
#> 4 846497 3
#> 5 846508 4
#> 6 846553 5
#> 7 954935 6
#> 8 5549 7
#> 9 5550 8
#> 10 954936 6
#> # ... with more rowstaxize functionsA few of the key functions from taxize have been ported to taxizedb. Support is currently limited to the NCBI taxonomy database.
children accesses the nodes immediately descending from a given taxon
children(3701, db='ncbi')
#> $`3701`
#> childtaxa_id childtaxa_name childtaxa_rank
#> 1 1837063 Arabidopsis thaliana x Arabidopsis halleri species
#> 2 1547872 Arabidopsis umezawana species
#> 3 1328956 (Arabidopsis thaliana x Arabidopsis arenosa) x Arabidopsis suecica species
#> 4 1240361 Arabidopsis thaliana x Arabidopsis arenosa species
#> 5 869750 Arabidopsis thaliana x Arabidopsis lyrata species
#> 6 412662 Arabidopsis pedemontana species
#> 7 378006 Arabidopsis arenosa x Arabidopsis thaliana species
#> 8 347883 Arabidopsis arenicola species
#> 9 302551 Arabidopsis petrogena species
#> 10 97980 Arabidopsis croatica species
#> 11 97979 Arabidopsis cebennensis species
#> 12 81970 Arabidopsis halleri species
#> 13 59690 Arabidopsis kamchatica species
#> 14 59689 Arabidopsis lyrata species
#> 15 45251 Arabidopsis neglecta species
#> 16 45249 Arabidopsis suecica species
#> 17 38785 Arabidopsis arenosa species
#> 18 3702 Arabidopsis thaliana species
#>
#> attr(,"class")
#> [1] "children"
#> attr(,"db")
#> [1] "ncbi"classification finds the lineage of a taxon
classification(3702, db='ncbi')
#> $`3702`
#> name rank id
#> 1 cellular organisms no rank 131567
#> 2 Eukaryota superkingdom 2759
#> 3 Viridiplantae kingdom 33090
#> 4 Streptophyta phylum 35493
#> 5 Streptophytina subphylum 131221
#> 6 Embryophyta no rank 3193
#> 7 Tracheophyta no rank 58023
#> 8 Euphyllophyta no rank 78536
#> 9 Spermatophyta no rank 58024
#> 10 Magnoliophyta no rank 3398
#> 11 Mesangiospermae no rank 1437183
#> 12 eudicotyledons no rank 71240
#> 13 Gunneridae no rank 91827
#> 14 Pentapetalae no rank 1437201
#> 15 rosids subclass 71275
#> 16 malvids no rank 91836
#> 17 Brassicales order 3699
#> 18 Brassicaceae family 3700
#> 19 Camelineae tribe 980083
#> 20 Arabidopsis genus 3701
#> 21 Arabidopsis thaliana species 3702
#>
#> attr(,"class")
#> [1] "classification"
#> attr(,"db")
#> [1] "ncbi"downstream finds all taxa descending from a taxon
downstream(3700, db='ncbi')
#> $`3700`
#> childtaxa_id childtaxa_name rank
#> 1 2071891 Draba taylorii species
#> 2 2071524 Rorippa tenerrima species
#> 3 2071523 Rorippa crystallina species
#> 4 2071509 Physaria calderi species
#> 5 2071468 Erysimum arenicola species
#> 6 2071452 Draba yukonensis species
#> 7 2071451 Draba thompsonii species
#> ...
#> 326 1492251 Erysimum lilacinum species
#> 327 1492250 Erysimum leucanthemum species
#> 328 1492249 Erysimum leptostylum species
#> 329 1492248 Erysimum leptophyllum species
#> 330 1492247 Erysimum leptocarpum species
#> 331 1492246 Erysimum ledebourii species
#> 332 1492245 Erysimum laxiflorum species
#> 333 1492244 Erysimum kurdicum species
#> [ reached getOption("max.print") -- omitted 2880 rows ]
#>
#> attr(,"class")
#> [1] "downstream"
#> attr(,"db")
#> [1] "ncbi"All of these functions run very fast. It only takes a few seconds to find all bacterial taxa and count them:
downstream(2, db='ncbi')[[1]] %>%
dplyr::group_by(rank) %>%
dplyr::count()
#> #> [1] 138695
#> # A tibble: 18 x 2
#> # Groups: rank [18]
#> rank n
#> <chr> <int>
#> 1 class 83
#> 2 family 483
#> 3 forma 4
#> 4 genus 3497
#> 5 no rank 37140
#> 6 order 198
#> 7 phylum 134
#> 8 species 97031
#> 9 species group 68
#> 10 species subgroup 10
#> 11 subclass 3
#> 12 subfamily 1
#> 13 subgenus 1
#> 14 suborder 8
#> 15 subphylum 1
#> 16 subspecies 10
#> 17 tribe 2
#> 18 varietas 21Several mapping functions are available for the NCBI taxonomy database: