R 程辑包 rgbif
可以非常方便地从 GBIF (Global Biodiversity Information Facility) 数据库中以物种、属、科等信息导出想要的数据。参见rgbif tutorial。
library(rgbif)
library(dplyr)
library(sf)
library(spData)
library(tmap)
library(purrr)
library(readr)
library(magrittr) # for %T>% pipe
library(rgbif) # for occ_download
library(taxize) # for get_gbifid
搜索某一物种的数据
根据物种学名搜索数据:
sq <- occ_search(scientificName = "Sinotaia quadrata")
names(sq)
# [1] "meta" "hierarchy" "data" "media" "facets"
# 若你关心的是物种的地理分布,则可以导出其经纬度信息,即 data 的第 3 和第 4 列
sq_location <- sq$data[,c(1,2,3,4)] %>%
filter(decimalLongitude != "NA" | decimalLatitude != "NA") %>%
data.frame()
# key scientificName decimalLatitude decimalLongitude
# 1 2242953256 Sinotaia quadrata (Benson, 1842) 36.38850 126.56625
# 2 2557790953 Sinotaia quadrata (Benson, 1842) -31.32859 -64.46254
# 3 2005260509 Sinotaia quadrata (Benson, 1842) 25.10170 121.51041
# 4 1880618814 Sinotaia quadrata (Benson, 1842) 24.72333 121.72417
# 5 2005272605 Sinotaia quadrata (Benson, 1842) 24.65829 121.82159
# 6 2306370637 Sinotaia quadrata (Benson, 1842) 43.81000 11.03000
可以直接在 occ_search()
函数中定义 field
,从而输出所需的列值:
sq2 <- occ_search(scientificName = "Sinotaia quadrata", fields = c("key", "scientificName", "decimalLongitude", "decimalLatitude"))
head(sq2$data)
# A tibble: 6 x 4
# key scientificName decimalLongitude decimalLatitude
# <chr> <chr> <dbl> <dbl>
# 1 2242953256 Sinotaia quadrata (Benson, 1842) 127. 36.4
# 2 2557790953 Sinotaia quadrata (Benson, 1842) -64.5 -31.3
# 3 2005260509 Sinotaia quadrata (Benson, 1842) 122. 25.1
# 4 1880618814 Sinotaia quadrata (Benson, 1842) 122. 24.7
# 5 2005272605 Sinotaia quadrata (Benson, 1842) 122. 24.7
# 6 2244501338 Sinotaia quadrata (Benson, 1842) NA NA
搜索多个物种的数据
当有很多物种需要搜索时,无需单个单个搜索。只需要做一个物种列表即可同时搜索:
splist <- c('Sinotaia quadrata', 'Sinotaia aeruginosa', 'Sinotaia purificata')
keys <- sapply(splist, function(x) name_suggest(x)$key[1], USE.NAMES = FALSE)
sps <- occ_search(taxonKey = keys)
# 这种方法有时会没有想要的经纬度信息。
此外,还可以使用你的 GBIF 账户和密码,在 R 中直接将多个物种的数据下载下来。
user <- "user_name" # 你在 GBIF 注册账户的用户名
pwd <- "password" # GBIF 账户的密码
email <- "Email" # GBIF 账户关联邮箱
# List species names that you want to search
# Make a species list by hand
file_url <- read.csv("species_list.csv", header=TRUE, sep=";")
# Get taxon keys from gbif based on species names
gbif_taxon_keys <- file_url[,1] %>% # the first column is species names
taxize::get_gbifid_(method="backbone") %>% # match names to the GBIF backbone to get taxonkeys
imap(~ .x %>% mutate(original_sciname = .y)) %>% # add original name back into data.frame
bind_rows() %T>% # combine all data.frames into one
readr::write_tsv(path = "all_matches.tsv") %>% # save as side effect for you to inspect if you want
filter(matchtype == "EXACT" & status == "ACCEPTED") %>% # get only accepted and matched names
filter(kingdom == "Plantae") %>% # remove anything that might have matched to a non-plant
pull(usagekey) # get the gbif taxonkeys
# Searched data that have coordinates
occ_download(
pred_in("taxonKey", gbif_taxon_keys),
# pred_in("basisOfRecord", c('PRESERVED_SPECIMEN','HUMAN_OBSERVATION','OBSERVATION','MACHINE_OBSERVATION')),
# pred_gt("elevation", 5000),
# pred("country", "US"),
pred("hasCoordinate", TRUE),
# pred("hasGeospatialIssue", FALSE),
# pred_gte("year", 1990),
format = "SIMPLE_CSV",
user=user,pwd=pwd,email=email
)
这时,登录你的 GBIF 账户,会发现在 Download 目录下已有当前正在下载的任务,任务完成后点击 download,即可下载 .csv 文件。
上述方法还可以有更简便的方式,即直接根据物种名称,将其 GBIF 中的 taxon Key 输出出来,根据 Key 下载数据。
# List species/genus/family names
list <- c("Sinotaia", "Heterogen", "Mekongia", "Filopaludina",
"Margarya", "Neothauma", "Viviparus", "Taia", "Torotaia", "Idiopoma",
"Bellamya", "Campeloma", "Angulyagra", "Trochopaludina",
"Cipangopaludina", "Rivularia", "Anularya", "Lioplacodes") %>% data.frame()
# Get GBIF Keys
n <- nrow(list)
list.keys <- NULL
for (i in 1:n){
name <- name_suggest(q = list[i,], rank = "genus")
tem.keys <- data.frame(name$key, list[i,])
list.keys <- rbind(list.keys, tem.keys)
}
list.keys
# Download data
occ_download(
pred_in("taxonKey", list.keys[,1]),
# pred_in("basisOfRecord", c('PRESERVED_SPECIMEN','HUMAN_OBSERVATION','OBSERVATION','MACHINE_OBSERVATION')),
# pred_gt("elevation", 5000),
# pred("country", "US"),
pred("hasCoordinate", TRUE),
# pred("hasGeospatialIssue", FALSE),
# pred_gte("year", 1990),
format = "SIMPLE_CSV",
user=user,pwd=pwd,email=email
)
数据使用
最常见的数据使用就是自定义绘制物种分布地图。根据每个个体的经纬度数据,制成 GIS 点向量,即可在各种矢量和栅格地图中绘制分布点。
# Make geometry vector of points
sqn <- nrow(sq_location)
sq_location_df <- NULL
for (i in 1:sqn){
x <- sq_location[i,]$decimalLongitude
y <- sq_location[i,]$decimalLatitude
n <- sq_location[i,]$key
tem <- st_point(c(x, y)) %>% st_sfc(crs="+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0") %>% st_sf()
sq_location_df <- rbind(sq_location_df, tem)
}
sq_location_df
# Plot the map
tm_shape(world) + tm_polygons(col="white") +
tm_shape(sq_location_df) + tm_symbols(size=0.05, col="red", border.col="red")