This analysis explores global health and economic indicators sourced from the World Health Organization’s Global Health Observatory. The dataset covers 222 countries with the following variables:
| Variable | Description |
|---|---|
country |
Country name |
code |
Country code |
gni |
Gross National Income per capita (USD) |
skulspend |
Education expenditures per capita |
death |
Death rate per 1,000 |
healthexp |
Health expenditures per capita |
unemp |
Unemployment rate |
water |
% population with access to clean water |
skul |
% enrolled in secondary school |
maternal |
Maternal death rate per 100,000 live births |
ind <- read_csv('data/globalind.csv', col_names = TRUE)
kable(head(ind, 10), caption = "First 10 rows of the Global Health Indicators dataset")| country | code | gni | skulspend | death | healthexp | unemp | water | skul | maternal |
|---|---|---|---|---|---|---|---|---|---|
| Afghanistan | AFG | 500 | 2.86 | 7.11 | 80.29 | 11.71 | 75.09 | NA | 620 |
| Albania | ALB | 5270 | 3.10 | 10.79 | NA | 13.07 | 95.07 | 94.28 | 8 |
| Algeria | DZA | 3610 | 7.04 | 5.40 | 214.85 | 12.25 | 94.44 | NA | 78 |
| American Samoa | ASM | NA | NA | 5.60 | NA | NA | 99.77 | NA | NA |
| Andorra | AND | NA | NA | NA | 3336.92 | NA | 100.00 | NA | NA |
| Angola | AGO | 1690 | 2.42 | 7.82 | 50.74 | 10.35 | 57.17 | NA | 222 |
| Antigua and Barbuda | ATG | 15010 | 3.45 | 5.96 | 830.34 | NA | NA | NA | 21 |
| Argentina | ARG | 9010 | 5.02 | 8.51 | 863.71 | 11.46 | NA | 110.13 | 45 |
| Armenia | ARM | 4470 | 2.71 | 12.79 | 551.54 | 12.18 | 99.97 | 87.25 | 27 |
| Aruba | ABW | 24840 | NA | 9.69 | NA | NA | NA | NA | NA |
## spc_tbl_ [222 × 10] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ country : chr [1:222] "Afghanistan" "Albania" "Algeria" "American Samoa" ...
## $ code : chr [1:222] "AFG" "ALB" "DZA" "ASM" ...
## $ gni : num [1:222] 500 5270 3610 NA NA ...
## $ skulspend: num [1:222] 2.86 3.1 7.04 NA NA 2.42 3.45 5.02 2.71 NA ...
## $ death : num [1:222] 7.11 10.79 5.4 5.6 NA ...
## $ healthexp: num [1:222] 80.3 NA 214.9 NA 3336.9 ...
## $ unemp : num [1:222] 11.7 13.1 12.2 NA NA ...
## $ water : num [1:222] 75.1 95.1 94.4 99.8 100 ...
## $ skul : num [1:222] NA 94.3 NA NA NA ...
## $ maternal : num [1:222] 620 8 78 NA NA 222 21 45 27 NA ...
## - attr(*, "spec")=
## .. cols(
## .. country = col_character(),
## .. code = col_character(),
## .. gni = col_number(),
## .. skulspend = col_double(),
## .. death = col_double(),
## .. healthexp = col_number(),
## .. unemp = col_double(),
## .. water = col_double(),
## .. skul = col_double(),
## .. maternal = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
The dataset contains 222 observations across 10 variables — 2 character fields (country, code) and 8 numeric indicators.
Countries with GNI per capita exceeding $50,000, sorted by education spending:
ind %>%
select(country, code, gni, skulspend) %>%
filter(gni >= 50000) %>%
arrange(desc(skulspend)) %>%
kable(caption = "High-income countries ranked by education spending")| country | code | gni | skulspend |
|---|---|---|---|
| Iceland | ISL | 62110 | 7.72 |
| Sweden | SWE | 54830 | 7.17 |
| Denmark | DNK | 62710 | 6.38 |
| Australia | AUS | 53620 | 6.10 |
| United States | USA | 64650 | 6.05 |
| Norway | NOR | 78610 | 5.90 |
| Finland | FIN | 50060 | 5.88 |
| North America | NAC | 62515 | 5.61 |
| Netherlands | NLD | 50170 | 5.30 |
| Switzerland | CHE | 82490 | 5.22 |
| Luxembourg | LUX | 79580 | 4.97 |
| Qatar | QAT | 58420 | 3.23 |
| Ireland | IRL | 65330 | 3.10 |
| Singapore | SGP | 55260 | 2.51 |
| Bermuda | BMU | 111800 | 1.87 |
| Cayman Islands | CYM | 62420 | NA |
| Faroe Islands | FRO | 64250 | NA |
| Isle of Man | IMN | 79300 | NA |
Which country has the highest health expenditure per capita?
ind %>%
select(country, healthexp) %>%
arrange(desc(healthexp)) %>%
head(10) %>%
kable(caption = "Top 10 countries by health expenditure per capita")| country | healthexp |
|---|---|
| United States | 11702.41 |
| North America | 11076.74 |
| Switzerland | 10309.76 |
| Norway | 7704.44 |
| Luxembourg | 6757.00 |
| Denmark | 6438.39 |
| Ireland | 6092.18 |
| Sweden | 6027.97 |
| Germany | 5930.33 |
| Australia | 5901.11 |
How many countries have unemployment rates above 10%?
high_unemp <- ind %>%
filter(unemp > 10)
cat("Number of countries with unemployment > 10%:", nrow(high_unemp))## Number of countries with unemployment > 10%: 56
Among countries with 100% clean water access, which has the lowest health expenditure?
ind %>%
select(country, water, healthexp) %>%
filter(water == 100) %>%
arrange(healthexp) %>%
head(10) %>%
kable(caption = "Countries with 100% clean water access, ranked by health expenditure")| country | water | healthexp |
|---|---|---|
| Thailand | 100 | 305.09 |
| Turkmenistan | 100 | 483.74 |
| Romania | 100 | 809.59 |
| Tuvalu | 100 | 1071.31 |
| Bahrain | 100 | 1110.00 |
| Nauru | 100 | 1143.74 |
| Hungary | 100 | 1163.25 |
| Chile | 100 | 1278.18 |
| Kuwait | 100 | 1532.56 |
| Greece | 100 | 1675.12 |
# Using the psych package for comprehensive descriptive statistics
describe(ind[3:10]) %>%
kable(digits = 2, caption = "Descriptive statistics for all numeric indicators")| vars | n | mean | sd | median | trimmed | mad | min | max | range | skew | kurtosis | se | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| gni | 1 | 200 | 15296.93 | 20321.32 | 6055.00 | 10949.01 | 7168.37 | 220.00 | 111800.00 | 111580.00 | 1.92 | 3.47 | 1436.93 |
| skulspend | 2 | 161 | 4.65 | 1.94 | 4.55 | 4.50 | 1.81 | 0.00 | 13.63 | 13.63 | 1.21 | 3.54 | 0.15 |
| death | 3 | 218 | 8.11 | 2.87 | 7.52 | 7.88 | 2.31 | 1.22 | 18.00 | 16.78 | 0.80 | 0.98 | 0.19 |
| healthexp | 4 | 189 | 1253.79 | 2073.63 | 388.55 | 760.57 | 495.74 | 16.42 | 11702.41 | 11685.99 | 2.59 | 7.33 | 150.83 |
| unemp | 5 | 191 | 8.27 | 5.64 | 6.58 | 7.54 | 4.17 | 0.14 | 28.05 | 27.91 | 1.17 | 0.93 | 0.41 |
| water | 6 | 201 | 89.36 | 15.32 | 97.01 | 92.49 | 4.43 | 37.20 | 100.00 | 62.80 | -1.55 | 1.34 | 1.08 |
| skul | 7 | 121 | 93.90 | 25.25 | 100.37 | 95.05 | 18.13 | 31.42 | 157.70 | 126.28 | -0.39 | 0.23 | 2.30 |
| maternal | 8 | 189 | 143.00 | 207.71 | 60.00 | 98.84 | 78.58 | 1.00 | 1223.00 | 1222.00 | 2.43 | 6.99 | 15.11 |
Notable observations: - The mean GNI and
median GNI may diverge significantly, indicating income
inequality across countries. - Missing data is present across multiple
indicators — handled via na.rm = TRUE in calculations.
cor_matrix <- round(cor(ind[3:10], use = "pairwise.complete.obs"), 2)
kable(cor_matrix, caption = "Correlation matrix of global health indicators")| gni | skulspend | death | healthexp | unemp | water | skul | maternal | |
|---|---|---|---|---|---|---|---|---|
| gni | 1.00 | 0.11 | 0.01 | 0.93 | -0.19 | 0.46 | 0.54 | -0.41 |
| skulspend | 0.11 | 1.00 | 0.00 | 0.18 | 0.14 | 0.20 | 0.31 | -0.25 |
| death | 0.01 | 0.00 | 1.00 | 0.12 | 0.03 | 0.00 | 0.15 | 0.08 |
| healthexp | 0.93 | 0.18 | 0.12 | 1.00 | -0.12 | 0.41 | 0.49 | -0.34 |
| unemp | -0.19 | 0.14 | 0.03 | -0.12 | 1.00 | 0.05 | 0.10 | -0.02 |
| water | 0.46 | 0.20 | 0.00 | 0.41 | 0.05 | 1.00 | 0.73 | -0.78 |
| skul | 0.54 | 0.31 | 0.15 | 0.49 | 0.10 | 0.73 | 1.00 | -0.74 |
| maternal | -0.41 | -0.25 | 0.08 | -0.34 | -0.02 | -0.78 | -0.74 | 1.00 |
Key findings from the correlation matrix: - GNI and health expenditure show the strongest positive correlation - Education spending and school enrollment are positively correlated - Maternal death rate tends to be negatively correlated with income and health spending
par(mfrow = c(1, 2))
hist(ind$gni, main = "Distribution of GNI per Capita",
xlab = "GNI (USD)", col = "steelblue", border = "white")
hist(ind$healthexp, main = "Distribution of Health Expenditure",
xlab = "Health Expenditure (USD)", col = "coral", border = "white")Both distributions exhibit a strong positive skew — most countries cluster at lower values with a long tail toward wealthier nations. A log transformation can normalize these distributions:
par(mfrow = c(1, 2))
hist(log10(ind$gni), main = "Log-transformed GNI",
xlab = "log10(GNI)", col = "steelblue", border = "white")
hist(log10(ind$healthexp), main = "Log-transformed Health Expenditure",
xlab = "log10(Health Exp)", col = "coral", border = "white")plot(ind$gni, ind$healthexp,
main = "GNI vs. Health Expenditure per Capita",
xlab = "Gross National Income per Capita (USD)",
ylab = "Health Expenditure per Capita (USD)",
pch = 19,
col = rgb(0.2, 0.4, 0.6, 0.6),
cex = 1.2)
grid()The scatter plot confirms the strong positive relationship between national income and health spending.
The California schools dataset provides school-level data including poverty rates, student demographics, and academic performance.
describe(schools[5:19]) %>%
kable(digits = 2, caption = "Descriptive statistics for schools dataset")| vars | n | mean | sd | median | trimmed | mad | min | max | range | skew | kurtosis | se | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Students | 1 | 503 | 564.11 | 219.24 | 550.00 | 551.95 | 223.87 | 137.00 | 1602.00 | 1465.00 | 0.70 | 1.06 | 9.78 |
| p_minority | 2 | 503 | 48.25 | 32.06 | 42.18 | 47.20 | 37.92 | 0.18 | 100.00 | 99.82 | 0.35 | -1.24 | 1.43 |
| Diversity | 3 | 498 | 2.97 | 1.71 | 3.37 | 3.07 | 2.11 | 0.04 | 5.00 | 4.96 | -0.39 | -1.32 | 0.08 |
| teachers | 4 | 503 | 33.55 | 10.79 | 33.00 | 32.91 | 10.38 | 11.00 | 76.80 | 65.80 | 0.60 | 0.45 | 0.48 |
| spendrat | 5 | 503 | 3305.50 | 705.92 | 3190.00 | 3230.12 | 510.01 | 199.00 | 6802.00 | 6603.00 | 1.25 | 4.67 | 31.48 |
| avgsal | 6 | 503 | 31698.44 | 2849.83 | 31681.00 | 31748.12 | 3208.35 | 21009.00 | 38630.00 | 17621.00 | -0.19 | -0.28 | 127.07 |
| teachrat | 7 | 503 | 16.56 | 2.41 | 16.80 | 16.61 | 2.22 | 9.60 | 28.50 | 18.90 | 0.01 | 1.44 | 0.11 |
| p_5yearsexp | 8 | 503 | 21.97 | 7.81 | 21.60 | 21.64 | 8.01 | 3.50 | 47.20 | 43.70 | 0.44 | 0.11 | 0.35 |
| poorpct | 9 | 503 | 47.23 | 30.31 | 42.75 | 46.77 | 36.40 | 0.00 | 99.54 | 99.54 | 0.19 | -1.19 | 1.35 |
| Language limit | 10 | 503 | 13.42 | 17.99 | 5.26 | 9.50 | 7.25 | 0.00 | 80.80 | 80.80 | 1.81 | 2.59 | 0.80 |
| p_gifted | 11 | 503 | 8.26 | 7.36 | 7.01 | 7.43 | 5.93 | 0.00 | 77.25 | 77.25 | 4.09 | 30.82 | 0.33 |
| p_special | 12 | 503 | 11.66 | 4.65 | 11.15 | 11.27 | 4.18 | 1.96 | 28.43 | 26.47 | 0.84 | 0.91 | 0.21 |
| p_moved | 13 | 503 | 20.33 | 9.95 | 19.55 | 19.74 | 8.66 | 0.00 | 69.14 | 69.14 | 0.78 | 1.77 | 0.44 |
| p_passed | 14 | 503 | 69.61 | 17.17 | 72.92 | 70.78 | 18.22 | 19.79 | 99.02 | 79.23 | -0.56 | -0.49 | 0.77 |
| attendance | 15 | 503 | 93.73 | 15.90 | 96.60 | 96.45 | 0.74 | 0.00 | 98.40 | 98.40 | -5.70 | 30.63 | 0.71 |
round(cor(schools[5:19], use = "pairwise.complete.obs"), 2) %>%
kable(caption = "Correlation matrix for schools variables")| Students | p_minority | Diversity | teachers | spendrat | avgsal | teachrat | p_5yearsexp | poorpct | Language limit | p_gifted | p_special | p_moved | p_passed | attendance | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Students | 1.00 | 0.31 | -0.03 | 0.94 | -0.36 | 0.26 | 0.61 | 0.78 | 0.18 | 0.46 | 0.00 | -0.40 | 0.10 | -0.17 | 0.01 |
| p_minority | 0.31 | 1.00 | -0.27 | 0.29 | 0.16 | 0.39 | 0.19 | 0.27 | 0.84 | 0.61 | 0.22 | -0.41 | 0.54 | -0.66 | 0.10 |
| Diversity | -0.03 | -0.27 | 1.00 | -0.04 | -0.22 | -0.08 | 0.07 | -0.04 | -0.36 | -0.19 | -0.24 | 0.24 | -0.10 | 0.38 | 0.01 |
| teachers | 0.94 | 0.29 | -0.04 | 1.00 | -0.21 | 0.24 | 0.34 | 0.84 | 0.18 | 0.39 | -0.02 | -0.30 | 0.12 | -0.17 | 0.01 |
| spendrat | -0.36 | 0.16 | -0.22 | -0.21 | 1.00 | 0.17 | -0.56 | -0.12 | 0.23 | -0.05 | 0.07 | 0.22 | 0.22 | -0.19 | 0.14 |
| avgsal | 0.26 | 0.39 | -0.08 | 0.24 | 0.17 | 1.00 | 0.26 | 0.53 | 0.16 | 0.24 | 0.18 | -0.35 | 0.11 | -0.10 | 0.12 |
| teachrat | 0.61 | 0.19 | 0.07 | 0.34 | -0.56 | 0.26 | 1.00 | 0.29 | 0.04 | 0.31 | 0.07 | -0.47 | -0.05 | -0.05 | -0.02 |
| p_5yearsexp | 0.78 | 0.27 | -0.04 | 0.84 | -0.12 | 0.53 | 0.29 | 1.00 | 0.15 | 0.32 | 0.07 | -0.28 | 0.07 | -0.12 | 0.11 |
| poorpct | 0.18 | 0.84 | -0.36 | 0.18 | 0.23 | 0.16 | 0.04 | 0.15 | 1.00 | 0.59 | 0.18 | -0.18 | 0.61 | -0.72 | 0.13 |
| Language limit | 0.46 | 0.61 | -0.19 | 0.39 | -0.05 | 0.24 | 0.31 | 0.32 | 0.59 | 1.00 | 0.09 | -0.37 | 0.30 | -0.37 | 0.05 |
| p_gifted | 0.00 | 0.22 | -0.24 | -0.02 | 0.07 | 0.18 | 0.07 | 0.07 | 0.18 | 0.09 | 1.00 | -0.18 | 0.03 | -0.15 | 0.05 |
| p_special | -0.40 | -0.41 | 0.24 | -0.30 | 0.22 | -0.35 | -0.47 | -0.28 | -0.18 | -0.37 | -0.18 | 1.00 | -0.05 | 0.17 | 0.03 |
| p_moved | 0.10 | 0.54 | -0.10 | 0.12 | 0.22 | 0.11 | -0.05 | 0.07 | 0.61 | 0.30 | 0.03 | -0.05 | 1.00 | -0.49 | 0.32 |
| p_passed | -0.17 | -0.66 | 0.38 | -0.17 | -0.19 | -0.10 | -0.05 | -0.12 | -0.72 | -0.37 | -0.15 | 0.17 | -0.49 | 1.00 | -0.06 |
| attendance | 0.01 | 0.10 | 0.01 | 0.01 | 0.14 | 0.12 | -0.02 | 0.11 | 0.13 | 0.05 | 0.05 | 0.03 | 0.32 | -0.06 | 1.00 |
par(mfrow = c(1, 2))
hist(schools$poorpct, main = "Distribution of Poverty Rate",
xlab = "% Students in Poverty", col = "tomato", border = "white")
hist(log10(schools$attendance), main = "Distribution of Attendance (log)",
xlab = "log10(Attendance)", col = "mediumpurple", border = "white")plot(schools$p_moved, schools$attendanc,
main = "Student Mobility vs. Attendance",
xlab = "% Students Moved",
ylab = "Attendance Rate",
pch = 19,
col = rgb(0.6, 0.2, 0.4, 0.5))
grid()pairs(~ poorpct + p_passed + p_moved, data = schools,
main = "Scatter Matrix: Poverty, Pass Rate, Mobility")Data sources: WHO Global Health Observatory, California Schools Dataset