ggplot2
We will use an example created by the Institute for Quantitative Social Science at Harvard University. The goal will be to create a copy of a very nice graph from the economist:
image
require(ggplot2)
require(ggrepel)
dat <- read.csv("https://drive.google.com/uc?export=download&id=0B8CsRLdwqzbzUDJLa1owSVduLTA")
ggplot2
package to create this graph.CPI
(Corruption Index) vs HDI
(Human Development Index):# Calls data then the asthetics are x,y and how coloring works
pc1 <- ggplot(dat, aes(x=CPI, y=HDI, color=Region))
#geom_point() is how we choose points to be plotted
pc1 + geom_point()
(pc2 <- pc1 +
geom_smooth(aes(group = 1),
method = "lm",
formula = y ~ log(x),
se = FALSE,
color = "red")) +
geom_point()
geom_smooth
adds the line to the graph, then on top of that geom_point()
adds the points.?shape
.shape
page gives us:## A look at all 25 symbols
df2 <- data.frame(x = 1:5 , y = 1:25, z = 1:25)
s <- ggplot(df2, aes(x = x, y = y)) +
geom_point(aes(shape = z), size = 4) + scale_shape_identity() +
geom_point(aes(shape = z), size = 4, colour = "Red") +
scale_shape_identity() +
geom_point(aes(shape = z), size = 4, colour = "Red", fill = "Black") +
scale_shape_identity()
pc2 + geom_point(shape=1, size=4)
(pc3 <- pc2 +
geom_point(size = 4.2, shape = 1) +
geom_point(size = 4.3, shape = 1) +
geom_point(size = 4.1, shape = 1) +
geom_point(size = 4, shape = 1) +
geom_point(size = 3.9, shape = 1) +
geom_point(size = 3.8, shape = 1)+
geom_point(size = 3.5, shape = 1))
geom_text()
to add labels to values.pointsToLabel <- c("Russia", "Venezuela", "Iraq", "Myanmar", "Sudan",
"Afghanistan", "Congo", "Greece", "Argentina", "Brazil",
"India", "Italy", "China", "South Africa", "Spane",
"Botswana", "Cape Verde", "Bhutan", "Rwanda", "France",
"United States", "Germany", "Britain", "Barbados", "Norway", "Japan",
"New Zealand", "Singapore")
pc3 +
geom_text(aes(label = Country),
color = "gray20",
data = subset(dat, Country %in% pointsToLabel))
ggrepel
package:library("ggrepel")
(pc4 <-pc3 +
geom_text_repel(aes(label = Country),
color = "gray20",
data = subset(dat, Country %in% pointsToLabel),
force = 10))
levels
and labels
.dat$Region <- factor(dat$Region,
levels = c("EU W. Europe",
"Americas",
"Asia Pacific",
"East EU Cemt Asia",
"MENA",
"SSA"),
labels = c("OECD",
"Americas",
"Asia &\nOceania",
"Central &\nEastern Europe",
"Middle East &\nNorth Africa",
"Sub-Saharan\nAfrica"))
pc4
above to use the new data that we have adjusted.pc4$data <- dat
pc4
scales
in ggplot2.ggtitle()
.library(grid)
pc5 <- pc4 +
scale_x_continuous(name = "Corruption Perceptions Index, 2011 (10=least corrupt)",
limits = c(.9, 10.5),
breaks = 1:10) +
scale_y_continuous(name = "Human Development Index, 2011 (1=Best)",
limits = c(0.2, 1.0),
breaks = seq(0.2, 1.0, by = 0.1)) +
scale_color_manual(name = "",
values = c("#24576D",
"#099DD7",
"#28AADC",
"#248E84",
"#F2583F",
"#96503F")) +
ggtitle("Corruption and Human development")
pc6 <- pc5 +
theme_minimal() + # start with a minimal theme and add what we need
theme(text = element_text(color = "gray20"),
legend.position = c("top"), # position the legend in the upper left
legend.direction = "horizontal",
legend.justification = 0.1, # anchor point for legend.position.
legend.text = element_text(size = 11, color = "gray10"),
axis.text = element_text(face = "italic"),
axis.title.x = element_text(vjust=-10), # move title away from axis
axis.title.y = element_text(vjust = 2), # move away for axis
axis.ticks.y = element_blank(), # element_blank() is how we remove elements
axis.line = element_line(color = "gray40", size = 0.5),
axis.line.y = element_blank(),
panel.grid.major = element_line(color = "gray50", size = 0.5),
panel.grid.major.x = element_blank()
)
mR2 <- summary(lm(HDI ~ log(CPI), data = dat))$r.squared
library(grid)
#png(file = "images/econScatter10.png", width = 800, height = 600)
pc6
grid.text("Sources: Transparency International; UN Human Development Report",
x = .01, y = .01,
just = "left",
draw = TRUE, gp=gpar(fontsize=7, col="grey"))
grid.segments(x0 = 0.81, x1 = 0.825,
y0 = 0.90, y1 = 0.90,
gp = gpar(col = "red"),
draw = TRUE)
grid.text(paste0("R? = ",
as.integer(mR2*100),
"%"),
x = 0.835, y = 0.90,
gp = gpar(col = "gray20"),
draw = TRUE,
just = "left")
#dev.off()