geom_histogram(mapping = NULL, data = NULL, stat = "bin", position = "stack", ...)
aes
or aes_string
. Only
needs to be set at the layer level if you are overriding
the plot defaults.layer
. This can include aesthetics whose
values you want to set, not map. See layer
for more details.geom_histogram
is an alias for
geom_bar
plus stat_bin
so you
will need to look at the documentation for those objects
to get more information about the parameters.
By default, stat_bin
uses 30 bins - this is not a
good default, but the idea is to get you experimenting
with different binwidths. You may need to look at a few
to uncover the full story behind your data.
geom_histogram
understands the following aesthetics (required aesthetics are in bold):
x
alpha
colour
fill
linetype
size
weight
set.seed(5689) movies <- movies[sample(nrow(movies), 1000), ] # Simple examples qplot(rating, data=movies, geom="histogram")stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this. Warning message: position_stack requires constant width: output may be incorrectqplot(rating, data=movies, weight=votes, geom="histogram")stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this. Warning message: position_stack requires constant width: output may be incorrectqplot(rating, data=movies, weight=votes, geom="histogram", binwidth=1)qplot(rating, data=movies, weight=votes, geom="histogram", binwidth=0.1)Warning message: position_stack requires constant width: output may be incorrect# More complex m <- ggplot(movies, aes(x=rating)) m + geom_histogram()stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this. Warning message: position_stack requires constant width: output may be incorrectm + geom_histogram(aes(y = ..density..)) + geom_density()stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this. Warning message: position_stack requires constant width: output may be incorrectm + geom_histogram(binwidth = 1)m + geom_histogram(binwidth = 0.5)m + geom_histogram(binwidth = 0.1)Warning message: position_stack requires constant width: output may be incorrect# Add aesthetic mappings m + geom_histogram(aes(weight = votes))stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this. Warning message: position_stack requires constant width: output may be incorrectm + geom_histogram(aes(y = ..count..))stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this. Warning message: position_stack requires constant width: output may be incorrectm + geom_histogram(aes(fill = ..count..))stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this. Warning message: position_stack requires constant width: output may be incorrect# Change scales m + geom_histogram(aes(fill = ..count..)) + scale_fill_gradient("Count", low = "green", high = "red")stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this. Warning message: position_stack requires constant width: output may be incorrect# Often we don't want the height of the bar to represent the # count of observations, but the sum of some other variable. # For example, the following plot shows the number of movies # in each rating. qplot(rating, data=movies, geom="bar", binwidth = 0.1)Warning message: position_stack requires constant width: output may be incorrect# If, however, we want to see the number of votes cast in each # category, we need to weight by the votes variable qplot(rating, data=movies, geom="bar", binwidth = 0.1, weight=votes, ylab = "votes")Warning message: position_stack requires constant width: output may be incorrectm <- ggplot(movies, aes(x = votes)) # For transformed scales, binwidth applies to the transformed data. # The bins have constant width on the transformed scale. m + geom_histogram() + scale_x_log10()stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.m + geom_histogram(binwidth = 1) + scale_x_log10()m + geom_histogram() + scale_x_sqrt()stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.m + geom_histogram(binwidth = 10) + scale_x_sqrt()# For transformed coordinate systems, the binwidth applies to the # raw data. The bins have constant width on the original scale. # Using log scales does not work here, because the first # bar is anchored at zero, and so when transformed becomes negative # infinity. This is not a problem when transforming the scales, because # no observations have 0 ratings. m + geom_histogram(origin = 0) + coord_trans(x = "log10")stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.# Use origin = 0, to make sure we don't take sqrt of negative values m + geom_histogram(origin = 0) + coord_trans(x = "sqrt")stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.m + geom_histogram(origin = 0, binwidth = 1000) + coord_trans(x = "sqrt")# You can also transform the y axis. Remember that the base of the bars # has value 0, so log transformations are not appropriate m <- ggplot(movies, aes(x = rating)) m + geom_histogram(binwidth = 0.5) + scale_y_sqrt()m + geom_histogram(binwidth = 0.5) + scale_y_reverse()Warning message: Stacking not well defined when ymin != 0# Set aesthetics to fixed value m + geom_histogram(colour = "darkgreen", fill = "white", binwidth = 0.5)# Use facets m <- m + geom_histogram(binwidth = 0.5) m + facet_grid(Action ~ Comedy)# Often more useful to use density on the y axis when facetting m <- m + aes(y = ..density..) m + facet_grid(Action ~ Comedy)m + facet_wrap(~ mpaa)# Multiple histograms on the same graph # see ?position, ?position_fill, etc for more details. set.seed(6298) diamonds_small <- diamonds[sample(nrow(diamonds), 1000), ] ggplot(diamonds_small, aes(x=price)) + geom_bar()stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.hist_cut <- ggplot(diamonds_small, aes(x=price, fill=cut)) hist_cut + geom_bar() # defaults to stackingstat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.hist_cut + geom_bar(position="fill")stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.hist_cut + geom_bar(position="dodge")stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.# This is easy in ggplot2, but not visually effective. It's better # to use a frequency polygon or density plot. Like this: ggplot(diamonds_small, aes(price, ..density.., colour = cut)) + geom_freqpoly(binwidth = 1000)# Or this: ggplot(diamonds_small, aes(price, colour = cut)) + geom_density()# Or if you want to be fancy, maybe even this: ggplot(diamonds_small, aes(price, fill = cut)) + geom_density(alpha = 0.2)# Which looks better when the distributions are more distinct ggplot(diamonds_small, aes(depth, fill = cut)) + geom_density(alpha = 0.2) + xlim(55, 70)Warning message: Removed 2 rows containing non-finite values (stat_density).