16 Principal components

Manifolds and planes

# Get the party names
Parties <-  read.csv("VotesS1-tally.csv", as.is=TRUE, 
                      header=FALSE, nrows=1,
                     stringsAsFactors = FALSE )
# Get rid of the first column and turn into a vector.
Parties <- as.character(Parties[,-1])

Votes <- read.csv( "VotesS1-tally.csv", skip=1 )
Votes <- subset(Votes, complete.cases(Votes) )
rownames( Votes ) <- Votes$VOTE
Votes <- Votes[,-1]
memberNames <- names(Votes)
nBallots <- nrow( Votes )
VotesM <- matrix( unlist( Votes ), nrow=nBallots )
# VotesM <- VotesM[1:nBallots,1:ncol(Votes)] # drop the ballot names
# Put in alphabetical order
inds <- order( memberNames )
VAlpha <- VotesM[,inds]
Parties <- Parties[inds]
# remove <- which( apply( VAlpha, 2, function(x) sum(abs(x))) < 100 )
# memberNames <- memberNames[-remove]
# Parties <- Parties[-remove]
# Valpha <- VAlpha[,-remove]
res <- svd( VAlpha )
showVotes <- Votes[,inds]
a1 <- res$u[,1,drop=FALSE]
b1 <- res$v[,1,drop=FALSE] %>% t
a2 <- res$u[,2,drop=FALSE]
b2 <- res$v[,2,drop=FALSE] %>% t
library(reshape2) # for melt
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
melt( VAlpha ) %>% 
  mutate( Vote=factor(value, labels=c("Nay","Abstain","Aye"))) %>% 
  ggplot( aes(x=Var1, y=Var2, fill=Vote)) +
  geom_tile() +
  xlab("Ballot") + ylab("Member of Parliament") + 
  scale_fill_manual( values=c(rgb(.2,.2,.2,.6), rgb(.95,.95,.95), rgb(209/255,148/255,12/255)))

Random2 <- data.frame( ballot48=VAlpha[48,], ballot118=VAlpha[118,] )
ggplot( Random2, aes(x=ballot48, y=ballot118)) +
  geom_point( alpha=.7, position=position_jitter(width = .1, height = .1) )+
  geom_point( alpha=.01, size=10, color="red" )
\label{fig:ballot-values} Positions of members of parliament on two ballots.

Figure 16.1: Positions of members of parliament on two ballots.

\label{fig:many-ballots} Positions of members of parliament on two ballot indices made up by the sum of groups of ballots.

Figure 16.2: Positions of members of parliament on two ballot indices made up by the sum of groups of ballots.

set.seed(101)
Best <- data.frame( one=c(b1), two=c(b2), three=res$v[,3], four=res$v[,4], five=res$v[,5] )
clusts <- kmeans( Best, centers=6)
Best$cluster <- as.factor(clusts$cluster)
# I lost the first member: Canavan.  Figure out later.
Best$actualParty <- Parties
ggplot( data=Best, aes( x=one, y=two )) +
    geom_point( shape=1, color='red', size=7, aes(x=0,y=0)) + 
  geom_point( size=5, alpha=.6, aes(color=cluster,shape=cluster) ) + 
  xlab("Best Vector from SVD") + 
  ylab("Next Best Vector from SVD") + 
  ggtitle("Political Positions of Members of Parliament")
\label{fig:ballot-PCA} The position of each member of Parliament using the two 'best' ways of summing the ballots.

Figure 16.3: The position of each member of Parliament using the two ‘best’ ways of summing the ballots.

inds1 <- order(c(res$u[,1]))
inds2 <- order(c(res$v[,1]))
VA <- VAlpha[ inds1, inds2 ]
foo <- data.frame( vote=factor(c(t(VA)), labels=c("Nay","Abstain","Aye")), 
  y=rep(1:134,times=773),
  x=rep(1:773, each=134) )
ggplot( foo, aes(x=x, y=y, fill=vote)) +
  geom_tile() +
  xlab("Ballot") + ylab("Member of Parliament") + 
  scale_fill_manual( values=c(rgb(.2,.2,.2,.6), rgb(.95,.95,.95), rgb(209/255,148/255,12/255)))