#install.packages("tidyverse")
library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
Class | Chirality | x | y |
---|---|---|---|
Y | L | 0.881 | 0.216 |
Y | R | 0.435 | 0.586 |
Y | R | 0.463 | 0.337 |
Y | R | 0.526 | 0.498 |
Y | L | 0.821 | 0.021 |
N | R | 0.585 | 0.970 |
N | R | 0.795 | 0.735 |
N | R | 0.585 | 0.311 |
N | L | 0.156 | 0.889 |
N | R | 0.262 | 0.304 |
ggplot(data = <DATA>, mapping = aes(<Mapping>)) +
<GEOM_FUNCTION>()
levels(yeast_features$feature)
## [1] "CDS" "chromosome" "exon" "gene" "mRNA"
## [6] "ncRNA_gene" "pseudogene" "rRNA" "rRNA_gene" "snoRNA"
## [11] "snoRNA_gene" "snRNA" "snRNA_gene" "transcript" "tRNA_gene"
levels(yeast_features$chromosome)
## [1] "I" "II" "III" "IV" "IX" "Mito" "V" "VI" "VII" "VIII"
## [11] "X" "XI" "XII" "XIII" "XIV" "XV" "XVI"
str(yeast_features)
## Classes 'tbl_df', 'tbl' and 'data.frame': 7443 obs. of 6 variables:
## $ chromosome: Factor w/ 17 levels "I","II","III",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ feature : Factor w/ 15 levels "CDS","chromosome",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ start : int 10091 11565 12046 13363 21566 22395 24000 31567 33448 35155 ...
## $ stop : int 10399 11951 12426 13743 21850 22685 27968 32940 34701 36303 ...
## $ strand : Factor w/ 3 levels "-",".","+": 3 1 3 1 3 1 1 3 3 3 ...
## $ length : int 308 386 380 380 284 290 3968 1373 1253 1148 ...
ggplot(data = yeast_features, mapping = aes(x = length)) +
geom_histogram()
ggplot(data = yeast_features, mapping = aes(x = length)) +
geom_freqpoly()
ggplot(data = yeast_features, mapping = aes(x = length)) +
geom_freqpoly(mapping = aes(color = feature))
ggplot(data = yeast_features, mapping = aes(x = length, y = ..density..,color = feature)) +
geom_freqpoly()
a <- tibble(
Class = c("Y","Y","Y","Y","Y","N","N","N","N","N"),
Chirality = c("L","R","R","R","L","R","R","R","L","R"),
x = runif(10),
y = runif(10)
)
ggplot(data = yeast_features, mapping = aes(x = chromosome, y = length)) +
geom_point(aes(color = feature))
ggplot(data = yeast_features, mapping = aes(x = chromosome, y = length)) +
geom_point(aes(color = feature), position = "jitter")
ggplot(data = yeast_features, mapping = aes(x = chromosome, y = length)) +
geom_jitter(aes(color = feature), alpha = 1/10)
ggplot(data = yeast_features, mapping = aes(x = chromosome, y = length)) +
geom_boxplot()
ggplot(data = yeast_features, mapping = aes(x = chromosome, y = length, color = feature)) +
geom_boxplot()
ggplot(data = yeast_features, mapping = aes(x = chromosome, y = length)) +
geom_boxplot() +
geom_point(position = "jitter", shape = ".", mapping= aes(color = feature))
ggplot(data = yeast_features, mapping = aes(x = chromosome, y = length)) +
geom_boxplot() +
geom_point(position = "jitter", shape = ".", mapping= aes(color = feature)) +
scale_y_log10()
ggplot(data = yeast_features, mapping = aes(x = reorder(chromosome, length, FUN = median), y = length)) +
geom_boxplot() +
geom_point(position = "jitter", shape = ".", mapping= aes(color = feature)) +
scale_y_log10()
ggplot(data = yeast_features, mapping = aes(x = reorder(chromosome, length, FUN = median), y = length)) +
geom_boxplot() +
geom_point(position = "jitter", shape = ".", mapping= aes(color = feature)) +
scale_y_log10() +
coord_flip()
one categorical data (factors): chromosome
ggplot(data = yeast_features, mapping = aes(x = chromosome, fill = chromosome)) +
geom_bar()
two categorical data (factors): chromosome & features
ggplot(data = yeast_features, mapping = aes(x = chromosome, fill = feature)) +
geom_bar()
ggplot(data = yeast_features, mapping = aes(x = chromosome, fill = feature)) +
geom_bar(position = "fill")
ggplot(data = yeast_features, mapping = aes(x = chromosome, fill = feature)) +
geom_bar(position = "dodge")
ggplot(data = yeast_features, mapping = aes(x = feature, y = length)) +
stat_summary(fun.data = mean_sdl)
ggplot(data = yeast_features, mapping = aes(x = length, fill = chromosome)) +
geom_histogram() +
facet_wrap( ~ chromosome)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = yeast_features, mapping = aes(x = length, y = ..density..)) +
geom_histogram() +
facet_wrap(feature ~ strand, nrow = 2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = yeast_features, mapping = aes(x = start, y = stop)) +
geom_point(aes(size = feature, color = feature, shape = feature), alpha = 1/3) +
scale_x_log10() +
scale_y_log10()
ggplot(data = yeast_features, mapping = aes(x = start, y = stop)) +
geom_point(aes(size = feature, color = feature, shape = feature), alpha = 1/3) +
scale_x_log10() +
scale_y_log10() +
geom_smooth()
## `geom_smooth()` using method = 'gam'
ggplot(data = yeast_features, mapping = aes(x = start, y = stop)) +
geom_point(aes(size = feature, color = feature, shape = feature), alpha = 1/3) +
scale_x_log10() +
scale_y_log10() +
geom_smooth(method = "lm")
ggplot(data = yeast_features, mapping = aes(x = length, fill = chromosome)) +
geom_histogram(fill = "red") +
scale_x_log10() +
facet_wrap( ~ chromosome)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = yeast_features, mapping = aes(x = start, y = stop)) +
geom_point(aes(size = feature, color = feature, shape = feature), alpha = 1/3) +
scale_x_log10() +
scale_y_log10() +
geom_smooth() +
coord_cartesian(xlim = c(10,5000), ylim = c(10,5000))
## `geom_smooth()` using method = 'gam'
ggplot(data = yeast_features, mapping = aes(x = length, y = ..density..)) +
geom_freqpoly(mapping = aes(color = feature)) +
scale_x_log10() +
labs(
title = "Distribution of feature sizes",
x = "length (base pairs)",
y = "probability density"
) +
theme(legend.position = "bottom")
ggplot(data = yeast_features, mapping = aes(x = length, y = ..density..)) +
geom_freqpoly(mapping = aes(color = feature)) +
scale_x_log10() +
labs(
title = "Distribution of feature sizes",
x = "length (base pairs)",
y = "probability density"
) +
theme(legend.position = "bottom") +
theme_light()
more themes available in add-on package ggthemes
#install.packages("ggthemes")
library(ggthemes)
ggplot(data = yeast_features, mapping = aes(x = length, y = ..density..)) +
geom_freqpoly(mapping = aes(color = feature)) +
scale_x_log10() +
theme_tufte()
Don’t do this!
ggplot(data = yeast_features, mapping = aes(x = length, y = ..density..)) +
geom_freqpoly(mapping = aes(color = feature)) +
scale_x_log10() +
theme_excel()
ggplot(data = <DATA>, mapping = aes(<Mapping>)) +
<GEOM_FUNCTION>() +
<STAT_FUNCTYION>() +
<FACET_FUNCTION>() +
<SCALE_FUNCTION>() +
<THEME_FUNCTION>()
ggplot(yeast_features, aes(length, ..density..)) +
geom_freqpoly(aes(color = feature)) +
scale_x_log10() +
theme(legend.position = "bottom") +
theme_light()
## Warning: Transformation introduced infinite values in continuous x-axis
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 4 rows containing non-finite values (stat_bin).
my_plot <- ggplot(yeast_features, aes(length, ..density..)) +
geom_freqpoly(aes(color = feature)) +
scale_x_log10() +
labs(
title = "Distribution of feature sizes",
x = "length (base pairs)",
y = "probability density"
) +
theme(legend.position = "bottom") +
theme_light()
my_plot + geom_hline(yintercept = 2, color = "red")
## Warning: Transformation introduced infinite values in continuous x-axis
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 4 rows containing non-finite values (stat_bin).
Add: 1. new axis labels, 2. title, 3. trend line, and 4. change theme to tufte_theme
ggplot(data = a, aes(x = x, y = y, color = Class))+
geom_point(size = 3)
gff <- read_delim("Saccharomyces_cerevisiae.R64-1-1.34.gff3",
"\t", escape_double = FALSE, col_names = FALSE,
comment = "#", trim_ws = TRUE, skip = 24)
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_character(),
## X3 = col_character(),
## X4 = col_integer(),
## X5 = col_integer(),
## X6 = col_character(),
## X7 = col_character(),
## X8 = col_character(),
## X9 = col_character()
## )
names(gff) <- c("chromosome",
"source",
"feature",
"start",
"stop",
"unknown1",
"strand",
"unknown2",
"info"
)
#correct data types
gff$feature = as.factor(gff$feature)
gff$chromosome = as.factor(gff$chromosome)
gff$strand = as.factor(gff$strand)
yeast_features <- gff %>%
select(chromosome, feature, start, stop, strand) %>%
mutate(length = abs(start - stop)) %>%
filter(feature == "CDS" | feature == "rRNA" | feature == "snoRNA" | feature == "snRNA" | feature == "tRNA_gene")
Plot as barplots with error bars
ggplot(data = yeast_features, mapping = aes(x = feature, y = length)) +
stat_summary(fun.data = mean_sdl)
Change bin size for histogram
ggplot(data = yeast_features, mapping = aes(x = length)) +
geom_histogram()