# Goal: Joint distributions, marginal distributions, useful tables.

# First let me invent some fake data
set.seed(102)                           # This yields a good illustration.
x <- sample(1:3, 15, replace=TRUE)
education <- factor(x, labels=c("None", "School", "College"))
x <- sample(1:2, 15, replace=TRUE)
gender <- factor(x, labels=c("Male", "Female"))
age <- runif(15, min=20,max=60)

D <- data.frame(age, gender, education)
rm(x,age,gender,education)
print(D)

table(D$education) # Table about education and gender -- table(D$gender, D$education) # Joint distribution of education and gender -- table(D$gender, D$education)/nrow(D) # Add in the marginal distributions also addmargins(table(D$gender, D$education)) addmargins(table(D$gender, D$education))/nrow(D) # Generate a good LaTeX table out of it -- library(xtable) xtable(addmargins(table(D$gender, D$education))/nrow(D), digits=c(0,2,2,2,2)) # You have to do | and \hline manually. # Study age by education category by(D$age, D$gender, mean) by(D$age, D$gender, sd) by(D$age, D$gender, summary) # Two-way table showing average age depending on education & gender a <- matrix(by(D$age, list(D$gender, D$education), mean), nrow=2)
rownames(a) <- levels(D$gender) colnames(a) <- levels(D$education)