## 前処理(Setup Chunk)に書くべき
df_parline <-
read_csv("data/parliaments.csv", skip = 15) # 冒頭から15行目まではメタデータ(ヘッダーコメント )なので読み込まない
df_population <- # ファイルが大きい(220.3MB)ため、重い処理かもしれない
read_excel("data/population.xlsx", sheet = 1, skip = 16) |> # excelファイルはシートを指定できる
filter(Type == "Country/Area") |>
filter(Year == max(Year, na.rm = TRUE)) |>
rename(country = `Region, subregion, country or area *`) # rename()でカラム名を変更
oecd_ISOcode <- c(
"AU", "AT", "BE", "CA", "CL", "CO", "CR", "CZ", "DK", "EE", "FI", "FR",
"DE", "GR", "HU", "IS", "IE", "IL", "IT", "JP", "KR", "LV", "LT", "LU",
"MX", "NL", "NZ", "NO", "PL", "PT", "SK", "SI", "ES", "SE", "CH", "TR",
"GB", "US"
)
oecd_countries <- c(
"Australia", "Austria", "Belgium", "Canada", "Chile", "Colombia",
"Czech Republic", "Denmark", "Estonia", "Finland", "France", "Germany",
"Greece", "Hungary", "Iceland", "Ireland", "Israel", "Italy", "Japan",
"Korea", "Latvia", "Lithuania", "Luxembourg", "Mexico", "Netherlands",
"New Zealand", "Norway", "Poland", "Portugal", "Slovak Republic", "Slovenia",
"Spain", "Sweden", "Switzerland", "Turkey", "United Kingdom", "United States of America", "Republic of Korea"
)
df_parline_oecd <-
df_parline |>
filter(`ISO Code` %in% oecd_ISOcode) # %in% は複数の値をフィルタリングする際に使用〔特定の値の場合は==〕
df_parline_oecd <-
df_parline_oecd |>
# selectは文字通り列を選択する関数だが、選択する列名の前に「任意の名前 = カラム名」と書くことで、選択と列名の変更を同時に行うことができて便利
select(country = Country, statutory_number = `Statutory number of members`)
df_population_oecd <-
df_population |>
filter(country %in% oecd_countries)
df_population_oecd <-
df_population_oecd |>
mutate(across(`0`:`100+`, as.numeric)) |> # 年齢に関する列がすべて文字列として認識されているため、across()で指定した列(0〜100+)をまとめて as.numeric() に変換する
mutate(TotalPopulation = rowSums(across(`0`:`100+`), na.rm = TRUE)) |> # 年齢ごとの列(0歳〜100歳以上)の値をすべて足し合わせ、その合計を新しい列「TotalPopulation」に追加(欠損地は除外)
select(country, TotalPopulation) |>
mutate(TotalPopulation = round(TotalPopulation)) # TotalPopulation列の値を四捨五入して、整数に。結果は同じ列名TotalPopulation に上書き
df_pop_par <-
df_population_oecd |> # df_population_oecdをベースに、df_parline_oecdを結合
left_join(df_parline_oecd, by = "country") # countryをキーに
## 個別チャンク
df_pop_par |>
mutate(
statutory_number = as.numeric(statutory_number),
member_per_million = statutory_number / (TotalPopulation / 1000),
member_per_million = round(member_per_million, 2)
) |>
arrange(desc(member_per_million)) |>
mutate(country = factor(country, levels = country)) |> # 上記の順序で並べる(未指定だとアルファベット順に)
ggplot(aes(x = fct_reorder(country, member_per_million),
y = member_per_million)) + # fct_reorder() で棒グラフの順序をデータに合わせて並べ替え(未指定だと、逆順になる。ggplotの仕様)
geom_col(fill = "#a0564d") +
geom_text( # 値を図に加える
aes(label = glue("{member_per_million}人")), # 値に「人」を足す
hjust = -0.1,
size = 3.0
) +
expand_limits(y = 180) + # Icelandの値がはみ出るため、x軸を180に
scale_y_continuous(
breaks = seq(0, 300, 30),
expand = c(0, 0)
) +
coord_flip() + # x軸とy軸を入れ替える
labs(
x = "",
y = "人口100万人あたりの下院議員定数",
) +
theme(
axis.text.y = element_text(hjust = 0) # y軸のラベル名を左揃え(デフォルトは中央揃え)
)