##Propensity Score Matching Example
#The data were simulated using the correlation matrix (Sigma) below
#This will require that you install the "MASS" Package in R
install.packages("MASS")
Library(MASS)
#Set seed to ensure you get the same result each time.
set.seed(10001)
#100 cases were simluated.
simdata <- as.data.frame(mvrnorm(100, mu = c(.5,18,2.5,5,3.0),
Sigma = matrix(c(1,0.2,0.4,0.6,0.5,
0.2, 1,0.7,0.2,0.8,
0.4,0.7,1,0.5,0.4,
0.6,0.2,0.5,1,0.25,
0.5,0.8,0.4,0.25,1), ncol = 5),
empirical = TRUE))
#Add column Names to the simulated data
colnames(simdata) <- c("Group","ACT", "HSGPA", "Absence", "GPA")
#Recode Group variable into binary grouping variable.
simdata$Group[simdata$Group< 0.5] <- 0
simdata$Group[simdata$Group>=0.5] <- 1
#You can save this data as a .csv file, if desired.
write.table (simdata, file = "SampleData.csv", sep =",", col.names =,)
#Null model - no covariates
#t-test of GPA by group
t.test(simdata$GPA~simdata$Group)
describeBy(simdata$GPA, simdata$Group)
#load "MatchIt" program for propenisty score matching.
#This program may need to be installed.
install.packages("MatchIt")
library(MatchIt)
#Estimate propensity scores and conduct nearest neighbor matching.
m.out <- matchit(Group ~ ACT + HSGPA + Absence , data = simdata, distance = "logit",
method = "nearest", caliper=.25, replace = TRUE)
#The nonrandom package can be used to obtain an distribution of propensity scores
#However, histograms can also be obtained using the syntax listed later (plot(m.out, type="hist"))
library(nonrandom)
ps <- pscore(data = simdata, Group ~ ACT + HSGPA + Absence)
plot.pscore(x = ps,
main = "PS distribution",
xlab = "",
par.1=list(col="red"),
par.0=list(lwd=2),
par.dens=list(kernel="gaussian"), ylim=c(0,5.5),xlim=c(0,1.0))
#Obtain graphical plots and statistical tables.
#Use the sink() to Print this output to folder to your working directory.
#The sink() line can be removed if you only want to view the output on screen in R.
sink(file="PSMoutput.txt")
plot(m.out, type="hist")
summary(m.out, interactions = FALSE, standardize = TRUE)
sink()
#Extract datafile (m.data) with match cases.
m.data<-match.data(object=m.out, group="all", distance = "distance", weights = "weights")
####Examine the matched dataset
t.test(m.data$GPA~m.data$Group)
describeBy(m.data$GPA, m.data$Group)