Pokemon Go 讓神奇寶貝的世界在現實中實現了,曾經身為一個訓練師,對神奇寶貝有許多的疑問。如今科技的方便讓我們能夠透過資料解決這些問題。本網頁原始檔和資料可在 github repo 取得。

library(dplyr)
library(magrittr)
library(ggplot2)
library(png)
library(knitr)
library(data.table)

神奇寶貝的能力值分佈如何?

資料來自 這份 gist

pokemon_df <- read.csv("data/pokemons.csv", stringsAsFactors = F) %>%
  tbl_df %>%
  mutate(PokedexHeightCM = PokedexHeightM*100) 
pokemon_df %>% glimpse()
## Observations: 151
## Variables: 32
## $ PkMn                 <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13...
## $ Identifier           <chr> "Bulbasaur", "Ivysaur", "Venusaur", "Char...
## $ EvolvesFrom          <chr> "0", "Bulbasaur", "Ivysaur", "0", "Charma...
## $ EvolvesTo            <chr> "Ivysaur", "Venusaur", "0", "Charmeleon",...
## $ EvoChainID           <int> 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5,...
## $ EvoStage             <int> 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2,...
## $ EvolutionPips        <chr> "NORMAL", "NORMAL", "NORMAL", "NORMAL", "...
## $ BaseStamina          <int> 90, 120, 160, 78, 116, 156, 88, 118, 158,...
## $ BaseAttack           <int> 126, 156, 198, 128, 160, 212, 112, 144, 1...
## $ BaseDefense          <int> 126, 158, 200, 108, 140, 182, 142, 176, 2...
## $ Type1                <chr> "GRASS", "GRASS", "GRASS", "FIRE", "FIRE"...
## $ Type2                <chr> "POISON", "POISON", "POISON", "NONE", "NO...
## $ BaseCaptureRate      <dbl> 0.16, 0.08, 0.04, 0.16, 0.08, 0.04, 0.16,...
## $ BaseFleeRate         <dbl> 0.10, 0.07, 0.05, 0.10, 0.07, 0.05, 0.10,...
## $ CollisionRadiusM     <dbl> 0.38, 0.32, 0.76, 0.16, 0.26, 0.41, 0.23,...
## $ CollisionHeightM     <dbl> 0.65, 0.64, 1.03, 0.47, 0.77, 1.01, 0.38,...
## $ CollisionHeadRadiusM <dbl> 0.27, 0.25, 0.38, 0.16, 0.23, 0.20, 0.19,...
## $ MovementType         <chr> "JUMP", "JUMP", "JUMP", "JUMP", "JUMP", "...
## $ MovementTimerS       <int> 10, 23, 11, 29, 23, 11, 10, 23, 14, 10, 3...
## $ JumpTimeS            <dbl> 1.15, 1.50, 1.25, 1.25, 1.00, 1.00, 1.00,...
## $ AttackTimerS         <int> 29, 8, 4, 10, 8, 4, 29, 8, 5, 29, 3600, 1...
## $ QuickMoves           <chr> "Vine Whip, Tackle", "Razor Leaf, Vine Wh...
## $ CinematicMoves       <chr> "Sludge Bomb, Seed Bomb, Power Whip", "Sl...
## $ PokemonClass         <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ PokedexHeightM       <dbl> 0.7, 1.0, 2.0, 0.6, 1.1, 1.7, 0.5, 1.0, 1...
## $ PokedexWeightKg      <dbl> 6.9, 13.0, 100.0, 8.5, 19.0, 90.5, 9.0, 2...
## $ HeightStdDev         <dbl> 0.09, 0.13, 0.25, 0.08, 0.14, 0.21, 0.06,...
## $ WeightStdDev         <dbl> 0.86, 1.63, 12.50, 1.06, 2.38, 11.31, 1.1...
## $ CandyFamily          <chr> "001_BULBASAUR", "001_BULBASAUR", "001_BU...
## $ CandyToEvolve        <int> 25, 100, 0, 25, 100, 0, 25, 100, 0, 12, 5...
## $ AnimTime             <chr> "6d56d53fdaac2a3f6d56d53f93a9ea3f00000000...
## $ PokedexHeightCM      <dbl> 70, 100, 200, 60, 110, 170, 50, 100, 160,...
annotate_img <- function(img, x_center, y_center, width, height) {
    annotation_raster(
      img,
      xmin=x_center-width/2,
      xmax=x_center+width/2,
      ymin=y_center-height/2,
      ymax=y_center+height/2)
}
annotate_icon <- function(pokemonId, x_center, y_center, width, height) {
  pokemon_icon <- paste0("icons/", pokemonId, ".png") %>% readPNG
  annotate_img(pokemon_icon, x_center, y_center, width, height)
}

plot_pokemons <- function(x_coords, y_coords, icon_size = c(10, 10)) {
  mapply(
    annotate_icon,
    pokemon_df$PkMn,
    x_coords,
    y_coords,
    icon_size[1],
    icon_size[2]
    )
}
plot_2_axis <- function(x_axis, y_axis, icon_size = c(10, 10)) {
  pokemon_df %>% 
  ggplot(aes_string(x= x_axis, y = y_axis)) +
  geom_point() + 
  plot_pokemons(
    pokemon_df[[x_axis]],
    pokemon_df[[y_axis]],
    icon_size
    )
}
plot_2_axis("BaseAttack", "BaseDefense")

plot_2_axis("BaseAttack", "BaseStamina", icon_size=c(10, 20))

plot_2_axis("BaseDefense", "BaseStamina", icon_size = c(10, 25))

在 Pokemon go 裡,神奇寶貝物種間的強度受三樣基礎值支配:攻擊、防禦、耐力。各挑兩軸做成三張圖。越往右上角的代表越強,在最右上角的若非神獸,就是常常待在道館塔上的那些。

神奇寶貝種族間的體重、身高分布如何?

ash <- readPNG("images/Ash_Ketchum_DP.png")
banana <- readPNG("images/banana.png")
giraffe <- readPNG("images/giraffe.png")

pokemon_df %>%
  ggplot(aes(x = PokedexWeightKg, y = PokedexHeightCM)) +
  geom_point() +
  scale_x_log10(limits = c(NA,1400)) +
  scale_y_log10() +
  geom_vline(xintercept=43.1) +
  geom_hline(yintercept=165) +
  annotate_img(ash, log10(43.1), log10(165), 0.2, 0.2) +
  annotate_img(banana, log10(0.15), log10(20), 0.2, 0.1) +
  annotate_img(giraffe, log10(828), log10(517), 0.2, 0.2) +
  plot_pokemons(
    log10(pokemon_df$PokedexWeightKg),
    log10(pokemon_df$PokedexHeightCM),
    icon_size = c(0.14, 0.1)) +
  annotate("text", x = 43.1, y = 250, label = "Ash 165cm, 43Kg") +
  annotate("text", x = 210, y = 1100, label = "Onix 880cm, 210Kg") +
  annotate("text", x = 850, y = 800, label = "Giraffe ♀ \n517cm, 828Kg") +
  annotate("text", x= 0.15, y = 220, label = "Haunter\n160cm, 0.1Kg") +
  annotate("text", x= 0.15, y = 25, label = "Banana\n20cm, 0.15Kg")

  1. 注意座標取過 log 了,好處是左下角的不會擠在一團,但是右上角大量級的神奇寶貝相對高度重量不太好感覺。
  2. 鬼系那兩隻居然進化成耿鬼後會從沒重量變成 40 公斤!(耿鬼在小智腳邊,迷唇姐背後)。一個傳說是耿鬼是鬼斯通附身在皮可西身上,因此得到重量。
  3. 大岩蛇全身都是石頭,可是比長頸鹿高卻又比長頸鹿輕
  4. 卡比獸雖然比較難追,但是一不小心人可能會比隆隆岩重哦

神奇寶貝個體間的體重身高分佈為何?

同種族的神奇寶貝,是否會有離譜的體重身高分配?如下圖所示?

或是身高與體重是否有某種關係?

為回答這些問題,使用 openintro 上的一份 資料集。這份資料集搜集 4 種共 75 隻神奇寶貝進化前與進化後的資料,上面有身高體重欄位供我們了解之間的關係。

pokemon75_df <- read.csv("data/pokemon75.csv", stringsAsFactors = F) %>% tbl_df() %>%
  left_join(
    pokemon_df %>% select(PkMn, Identifier),
    by = c("species"= "Identifier")
    )

pokemon75_df %>% glimpse() 
## Observations: 75
## Variables: 28
## $ name                    <chr> "Pidgey1", "Pidgey2", "Pidgey3", "Pidg...
## $ species                 <chr> "Pidgey", "Pidgey", "Pidgey", "Pidgey"...
## $ cp                      <int> 384, 366, 353, 338, 242, 129, 10, 25, ...
## $ hp                      <int> 56, 54, 55, 51, 45, 35, 10, 14, 13, 35...
## $ weight                  <dbl> 2.31, 1.67, 1.94, 1.73, 1.44, 2.07, 0....
## $ height                  <dbl> 0.34, 0.29, 0.30, 0.31, 0.27, 0.35, 0....
## $ power_up_stardust       <int> 2500, 2500, 3000, 3000, 1900, 800, 200...
## $ power_up_candy          <int> 2, 2, 3, 3, 2, 1, 1, 1, 1, 1, 1, 3, 1,...
## $ attack_weak             <chr> "Tackle", "Quick Attack", "Quick Attac...
## $ attack_weak_type        <chr> "Normal", "Normal", "Normal", "Normal"...
## $ attack_weak_value       <int> 12, 10, 10, 12, 10, 10, 12, 12, 10, 12...
## $ attack_strong           <chr> "Aerial Ace", "Twister", "Aerial Ace",...
## $ attack_strong_type      <chr> "Flying", "Dragon", "Flying", "Flying"...
## $ attack_strong_value     <int> 30, 25, 30, 30, 30, 30, 30, 25, 25, 25...
## $ cp_new                  <int> 694, 669, 659, 640, 457, 243, 15, 47, ...
## $ hp_new                  <int> 84, 81, 83, 79, 69, 52, 13, 21, 21, 54...
## $ weight_new              <dbl> 2.60, 1.93, 3.51, 30.00, 1.42, 30.00, ...
## $ height_new              <dbl> 1.24, 1.05, 1.11, 1.12, 0.98, 1.27, 0....
## $ power_up_stardust_new   <int> 2500, 2500, 3000, 3000, 1900, 800, 200...
## $ power_up_candy_new      <int> 2, 2, 3, 3, 2, 1, 1, 1, 1, 1, 1, 3, 1,...
## $ attack_weak_new         <chr> "Steel Wing", "Wing Attack", "Wing Att...
## $ attack_weak_type_new    <chr> "Steel", "Flying", "Flying", "Steel", ...
## $ attack_weak_value_new   <int> 15, 9, 9, 15, 9, 9, 9, 15, 9, 15, 9, 1...
## $ attack_strong_new       <chr> "Air Cutter", "Air Cutter", "Air Cutte...
## $ attack_strong_type_new  <chr> "Flying", "Flying", "Flying", "Flying"...
## $ attack_strong_value_new <int> 30, 30, 30, 30, 25, 30, 30, 30, 25, 30...
## $ notes                   <chr> "", "", "", "", "", "", "", "", "", ""...
## $ PkMn                    <int> 16, 16, 16, 16, 16, 16, 16, 16, 16, 16...
pokemon75_df %>%
  select(PkMn, species, weight, height) %>%
  ggplot(aes(x= weight, y = height, color=species)) +
  geom_point() +
  geom_smooth(method=lm, se=FALSE) +
  mapply(
    annotate_icon,
    pokemon75_df$PkMn,
    pokemon75_df$weight,
    pokemon75_df$height,
    0.5,
    0.01
    )

身高與體重似乎呈現明顯正相關!

接著看看這些田野調查的敘述統計

summary_df <- pokemon75_df %>%
  group_by(PkMn, species) %>%
  summarise(
    count = n(),
    mean(weight),
    mean(height),
    sd(weight),
    sd(height),
    cor(weight, height)
    )
summary_df %>% kable
PkMn species count mean(weight) mean(height) sd(weight) sd(height) cor(weight, height)
10 Caterpie 10 2.899000 0.2950000 0.9122067 0.0295334 0.7900104
13 Weedle 20 3.374500 0.3065000 0.8949829 0.0391051 0.8886342
16 Pidgey 39 1.806154 0.3023077 0.4781963 0.0317442 0.9024133
133 Eevee 6 6.555000 0.2933333 2.5090217 0.0605530 0.9870412

而官方的資料

pokedex_df <- pokemon_df %>%
  select(PkMn, Identifier, PokedexWeightKg, PokedexHeightM, WeightStdDev, HeightStdDev) %>%
  filter(PkMn %in% summary_df$PkMn)
pokedex_df %>% kable()
PkMn Identifier PokedexWeightKg PokedexHeightM WeightStdDev HeightStdDev
10 Caterpie 2.9 0.3 0.36 0.04
13 Weedle 3.2 0.3 0.40 0.04
16 Pidgey 1.8 0.3 0.22 0.04
133 Eevee 6.5 0.3 0.81 0.04

就平均數而言,圖鑑資料與田野調查非常吻合。田野的標準差可能受到樣本太少的影響,和圖鑑資料差距有點大。

蛤?你非得要做檢定和迴歸嗎?請到 Github 上看 Rmd 原始檔,裡面有做但沒有在此列出。