Pokemon Go 讓神奇寶貝的世界在現實中實現了,曾經身為一個訓練師,對神奇寶貝有許多的疑問。如今科技的方便讓我們能夠透過資料解決這些問題。本網頁原始檔和資料可在 github repo 取得。
library(dplyr)
library(magrittr)
library(ggplot2)
library(png)
library(knitr)
library(data.table)
資料來自 這份 gist
pokemon_df <- read.csv("data/pokemons.csv", stringsAsFactors = F) %>%
tbl_df %>%
mutate(PokedexHeightCM = PokedexHeightM*100)
pokemon_df %>% glimpse()
## Observations: 151
## Variables: 32
## $ PkMn <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13...
## $ Identifier <chr> "Bulbasaur", "Ivysaur", "Venusaur", "Char...
## $ EvolvesFrom <chr> "0", "Bulbasaur", "Ivysaur", "0", "Charma...
## $ EvolvesTo <chr> "Ivysaur", "Venusaur", "0", "Charmeleon",...
## $ EvoChainID <int> 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5,...
## $ EvoStage <int> 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2,...
## $ EvolutionPips <chr> "NORMAL", "NORMAL", "NORMAL", "NORMAL", "...
## $ BaseStamina <int> 90, 120, 160, 78, 116, 156, 88, 118, 158,...
## $ BaseAttack <int> 126, 156, 198, 128, 160, 212, 112, 144, 1...
## $ BaseDefense <int> 126, 158, 200, 108, 140, 182, 142, 176, 2...
## $ Type1 <chr> "GRASS", "GRASS", "GRASS", "FIRE", "FIRE"...
## $ Type2 <chr> "POISON", "POISON", "POISON", "NONE", "NO...
## $ BaseCaptureRate <dbl> 0.16, 0.08, 0.04, 0.16, 0.08, 0.04, 0.16,...
## $ BaseFleeRate <dbl> 0.10, 0.07, 0.05, 0.10, 0.07, 0.05, 0.10,...
## $ CollisionRadiusM <dbl> 0.38, 0.32, 0.76, 0.16, 0.26, 0.41, 0.23,...
## $ CollisionHeightM <dbl> 0.65, 0.64, 1.03, 0.47, 0.77, 1.01, 0.38,...
## $ CollisionHeadRadiusM <dbl> 0.27, 0.25, 0.38, 0.16, 0.23, 0.20, 0.19,...
## $ MovementType <chr> "JUMP", "JUMP", "JUMP", "JUMP", "JUMP", "...
## $ MovementTimerS <int> 10, 23, 11, 29, 23, 11, 10, 23, 14, 10, 3...
## $ JumpTimeS <dbl> 1.15, 1.50, 1.25, 1.25, 1.00, 1.00, 1.00,...
## $ AttackTimerS <int> 29, 8, 4, 10, 8, 4, 29, 8, 5, 29, 3600, 1...
## $ QuickMoves <chr> "Vine Whip, Tackle", "Razor Leaf, Vine Wh...
## $ CinematicMoves <chr> "Sludge Bomb, Seed Bomb, Power Whip", "Sl...
## $ PokemonClass <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ PokedexHeightM <dbl> 0.7, 1.0, 2.0, 0.6, 1.1, 1.7, 0.5, 1.0, 1...
## $ PokedexWeightKg <dbl> 6.9, 13.0, 100.0, 8.5, 19.0, 90.5, 9.0, 2...
## $ HeightStdDev <dbl> 0.09, 0.13, 0.25, 0.08, 0.14, 0.21, 0.06,...
## $ WeightStdDev <dbl> 0.86, 1.63, 12.50, 1.06, 2.38, 11.31, 1.1...
## $ CandyFamily <chr> "001_BULBASAUR", "001_BULBASAUR", "001_BU...
## $ CandyToEvolve <int> 25, 100, 0, 25, 100, 0, 25, 100, 0, 12, 5...
## $ AnimTime <chr> "6d56d53fdaac2a3f6d56d53f93a9ea3f00000000...
## $ PokedexHeightCM <dbl> 70, 100, 200, 60, 110, 170, 50, 100, 160,...
annotate_img <- function(img, x_center, y_center, width, height) {
annotation_raster(
img,
xmin=x_center-width/2,
xmax=x_center+width/2,
ymin=y_center-height/2,
ymax=y_center+height/2)
}
annotate_icon <- function(pokemonId, x_center, y_center, width, height) {
pokemon_icon <- paste0("icons/", pokemonId, ".png") %>% readPNG
annotate_img(pokemon_icon, x_center, y_center, width, height)
}
plot_pokemons <- function(x_coords, y_coords, icon_size = c(10, 10)) {
mapply(
annotate_icon,
pokemon_df$PkMn,
x_coords,
y_coords,
icon_size[1],
icon_size[2]
)
}
plot_2_axis <- function(x_axis, y_axis, icon_size = c(10, 10)) {
pokemon_df %>%
ggplot(aes_string(x= x_axis, y = y_axis)) +
geom_point() +
plot_pokemons(
pokemon_df[[x_axis]],
pokemon_df[[y_axis]],
icon_size
)
}
plot_2_axis("BaseAttack", "BaseDefense")
plot_2_axis("BaseAttack", "BaseStamina", icon_size=c(10, 20))
plot_2_axis("BaseDefense", "BaseStamina", icon_size = c(10, 25))
在 Pokemon go 裡,神奇寶貝物種間的強度受三樣基礎值支配:攻擊、防禦、耐力。各挑兩軸做成三張圖。越往右上角的代表越強,在最右上角的若非神獸,就是常常待在道館塔上的那些。
ash <- readPNG("images/Ash_Ketchum_DP.png")
banana <- readPNG("images/banana.png")
giraffe <- readPNG("images/giraffe.png")
pokemon_df %>%
ggplot(aes(x = PokedexWeightKg, y = PokedexHeightCM)) +
geom_point() +
scale_x_log10(limits = c(NA,1400)) +
scale_y_log10() +
geom_vline(xintercept=43.1) +
geom_hline(yintercept=165) +
annotate_img(ash, log10(43.1), log10(165), 0.2, 0.2) +
annotate_img(banana, log10(0.15), log10(20), 0.2, 0.1) +
annotate_img(giraffe, log10(828), log10(517), 0.2, 0.2) +
plot_pokemons(
log10(pokemon_df$PokedexWeightKg),
log10(pokemon_df$PokedexHeightCM),
icon_size = c(0.14, 0.1)) +
annotate("text", x = 43.1, y = 250, label = "Ash 165cm, 43Kg") +
annotate("text", x = 210, y = 1100, label = "Onix 880cm, 210Kg") +
annotate("text", x = 850, y = 800, label = "Giraffe ♀ \n517cm, 828Kg") +
annotate("text", x= 0.15, y = 220, label = "Haunter\n160cm, 0.1Kg") +
annotate("text", x= 0.15, y = 25, label = "Banana\n20cm, 0.15Kg")
同種族的神奇寶貝,是否會有離譜的體重身高分配?如下圖所示?
或是身高與體重是否有某種關係?
為回答這些問題,使用 openintro 上的一份 資料集。這份資料集搜集 4 種共 75 隻神奇寶貝進化前與進化後的資料,上面有身高體重欄位供我們了解之間的關係。
pokemon75_df <- read.csv("data/pokemon75.csv", stringsAsFactors = F) %>% tbl_df() %>%
left_join(
pokemon_df %>% select(PkMn, Identifier),
by = c("species"= "Identifier")
)
pokemon75_df %>% glimpse()
## Observations: 75
## Variables: 28
## $ name <chr> "Pidgey1", "Pidgey2", "Pidgey3", "Pidg...
## $ species <chr> "Pidgey", "Pidgey", "Pidgey", "Pidgey"...
## $ cp <int> 384, 366, 353, 338, 242, 129, 10, 25, ...
## $ hp <int> 56, 54, 55, 51, 45, 35, 10, 14, 13, 35...
## $ weight <dbl> 2.31, 1.67, 1.94, 1.73, 1.44, 2.07, 0....
## $ height <dbl> 0.34, 0.29, 0.30, 0.31, 0.27, 0.35, 0....
## $ power_up_stardust <int> 2500, 2500, 3000, 3000, 1900, 800, 200...
## $ power_up_candy <int> 2, 2, 3, 3, 2, 1, 1, 1, 1, 1, 1, 3, 1,...
## $ attack_weak <chr> "Tackle", "Quick Attack", "Quick Attac...
## $ attack_weak_type <chr> "Normal", "Normal", "Normal", "Normal"...
## $ attack_weak_value <int> 12, 10, 10, 12, 10, 10, 12, 12, 10, 12...
## $ attack_strong <chr> "Aerial Ace", "Twister", "Aerial Ace",...
## $ attack_strong_type <chr> "Flying", "Dragon", "Flying", "Flying"...
## $ attack_strong_value <int> 30, 25, 30, 30, 30, 30, 30, 25, 25, 25...
## $ cp_new <int> 694, 669, 659, 640, 457, 243, 15, 47, ...
## $ hp_new <int> 84, 81, 83, 79, 69, 52, 13, 21, 21, 54...
## $ weight_new <dbl> 2.60, 1.93, 3.51, 30.00, 1.42, 30.00, ...
## $ height_new <dbl> 1.24, 1.05, 1.11, 1.12, 0.98, 1.27, 0....
## $ power_up_stardust_new <int> 2500, 2500, 3000, 3000, 1900, 800, 200...
## $ power_up_candy_new <int> 2, 2, 3, 3, 2, 1, 1, 1, 1, 1, 1, 3, 1,...
## $ attack_weak_new <chr> "Steel Wing", "Wing Attack", "Wing Att...
## $ attack_weak_type_new <chr> "Steel", "Flying", "Flying", "Steel", ...
## $ attack_weak_value_new <int> 15, 9, 9, 15, 9, 9, 9, 15, 9, 15, 9, 1...
## $ attack_strong_new <chr> "Air Cutter", "Air Cutter", "Air Cutte...
## $ attack_strong_type_new <chr> "Flying", "Flying", "Flying", "Flying"...
## $ attack_strong_value_new <int> 30, 30, 30, 30, 25, 30, 30, 30, 25, 30...
## $ notes <chr> "", "", "", "", "", "", "", "", "", ""...
## $ PkMn <int> 16, 16, 16, 16, 16, 16, 16, 16, 16, 16...
pokemon75_df %>%
select(PkMn, species, weight, height) %>%
ggplot(aes(x= weight, y = height, color=species)) +
geom_point() +
geom_smooth(method=lm, se=FALSE) +
mapply(
annotate_icon,
pokemon75_df$PkMn,
pokemon75_df$weight,
pokemon75_df$height,
0.5,
0.01
)
身高與體重似乎呈現明顯正相關!
接著看看這些田野調查的敘述統計
summary_df <- pokemon75_df %>%
group_by(PkMn, species) %>%
summarise(
count = n(),
mean(weight),
mean(height),
sd(weight),
sd(height),
cor(weight, height)
)
summary_df %>% kable
PkMn | species | count | mean(weight) | mean(height) | sd(weight) | sd(height) | cor(weight, height) |
---|---|---|---|---|---|---|---|
10 | Caterpie | 10 | 2.899000 | 0.2950000 | 0.9122067 | 0.0295334 | 0.7900104 |
13 | Weedle | 20 | 3.374500 | 0.3065000 | 0.8949829 | 0.0391051 | 0.8886342 |
16 | Pidgey | 39 | 1.806154 | 0.3023077 | 0.4781963 | 0.0317442 | 0.9024133 |
133 | Eevee | 6 | 6.555000 | 0.2933333 | 2.5090217 | 0.0605530 | 0.9870412 |
而官方的資料
pokedex_df <- pokemon_df %>%
select(PkMn, Identifier, PokedexWeightKg, PokedexHeightM, WeightStdDev, HeightStdDev) %>%
filter(PkMn %in% summary_df$PkMn)
pokedex_df %>% kable()
PkMn | Identifier | PokedexWeightKg | PokedexHeightM | WeightStdDev | HeightStdDev |
---|---|---|---|---|---|
10 | Caterpie | 2.9 | 0.3 | 0.36 | 0.04 |
13 | Weedle | 3.2 | 0.3 | 0.40 | 0.04 |
16 | Pidgey | 1.8 | 0.3 | 0.22 | 0.04 |
133 | Eevee | 6.5 | 0.3 | 0.81 | 0.04 |
就平均數而言,圖鑑資料與田野調查非常吻合。田野的標準差可能受到樣本太少的影響,和圖鑑資料差距有點大。
蛤?你非得要做檢定和迴歸嗎?請到 Github 上看 Rmd 原始檔,裡面有做但沒有在此列出。