English Dialect Maps

Interpreting American English Dialect Maps

Mija Van Der Wege
June 13, 2013

Data Sampling

SurveyMapsbyState

Individual vs. Aggregate Visualizations

Key Questions:

  • What conclusions are revealed by the individual and the aggregate plots?
  • How do we interpret heat maps vs. bubbles plotted on maps?
  • How does this inform our data visualization choices?

DrinkNames DrinkNames

Multivariate Spatial Patterns

Key Questions:

  • How to use this data to determine dialect clusters?
  • What kinds of visualizations might reveal clusters?

Crayon Vowels

Working with Student-Generated Data

Figuring out the data: variables, cases, messiness

  • names()
  • levels()
  • summary()
require(DCF)
load("~/psyc200.RData")
names(psyc200_data)
 [1] "sex"          "race"         "neopiE"       "neopiN"      
 [5] "neopiC"       "neopiA"       "neopiO"       "weaponIAT"   
 [9] "careerIAT"    "empathy"      "system"       "visualRT"    
[13] "visualGoNoGo"

Summarizing Data

summary(psyc200_data)
 sex    race        neopiE         neopiN         neopiC    
 F:42   C :48   Min.   : 1.0   Min.   : 0.0   Min.   : 0.0  
 M:26   NC:20   1st Qu.:37.0   1st Qu.:15.0   1st Qu.:43.0  
                Median :54.0   Median :40.0   Median :75.0  
                Mean   :54.7   Mean   :39.7   Mean   :64.5  
                3rd Qu.:73.0   3rd Qu.:56.0   3rd Qu.:89.0  
                Max.   :97.0   Max.   :97.0   Max.   :99.0  
                NA's   :3      NA's   :3      NA's   :3     
     neopiA         neopiO       weaponIAT      careerIAT   
 Min.   : 1.0   Min.   : 0.0   Min.   :0.00   Min.   :0.00  
 1st Qu.:49.0   1st Qu.:29.0   1st Qu.:1.00   1st Qu.:1.00  
 Median :71.0   Median :64.0   Median :3.00   Median :3.00  
 Mean   :63.9   Mean   :54.5   Mean   :2.47   Mean   :2.29  
 3rd Qu.:82.0   3rd Qu.:77.0   3rd Qu.:3.00   3rd Qu.:3.00  
 Max.   :97.0   Max.   :99.0   Max.   :4.00   Max.   :4.00  
 NA's   :3      NA's   :3                                   
    empathy         system        visualRT    visualGoNoGo
 Min.   :23.0   Min.   :11.0   Min.   :191   Min.   :291  
 1st Qu.:43.0   1st Qu.:16.0   1st Qu.:255   1st Qu.:348  
 Median :50.0   Median :24.0   Median :277   Median :382  
 Mean   :49.1   Mean   :25.9   Mean   :288   Mean   :390  
 3rd Qu.:57.0   3rd Qu.:32.0   3rd Qu.:309   3rd Qu.:422  
 Max.   :70.0   Max.   :62.0   Max.   :449   Max.   :594  
 NA's   :1      NA's   :1      NA's   :3     NA's   :3    

Transforming Data

psyc200_data <- transform(psyc200_data,weaponIATcat = factor(weaponIAT, labels=c("reverse","none","slight","moderate","strong")))
psyc200_data <- transform(psyc200_data,careerIATcat = factor(careerIAT, labels=c("reverse","none","slight","moderate","strong")))
summary(psyc200_data)
 sex    race        neopiE         neopiN         neopiC    
 F:42   C :48   Min.   : 1.0   Min.   : 0.0   Min.   : 0.0  
 M:26   NC:20   1st Qu.:37.0   1st Qu.:15.0   1st Qu.:43.0  
                Median :54.0   Median :40.0   Median :75.0  
                Mean   :54.7   Mean   :39.7   Mean   :64.5  
                3rd Qu.:73.0   3rd Qu.:56.0   3rd Qu.:89.0  
                Max.   :97.0   Max.   :97.0   Max.   :99.0  
                NA's   :3      NA's   :3      NA's   :3     
     neopiA         neopiO       weaponIAT      careerIAT   
 Min.   : 1.0   Min.   : 0.0   Min.   :0.00   Min.   :0.00  
 1st Qu.:49.0   1st Qu.:29.0   1st Qu.:1.00   1st Qu.:1.00  
 Median :71.0   Median :64.0   Median :3.00   Median :3.00  
 Mean   :63.9   Mean   :54.5   Mean   :2.47   Mean   :2.29  
 3rd Qu.:82.0   3rd Qu.:77.0   3rd Qu.:3.00   3rd Qu.:3.00  
 Max.   :97.0   Max.   :99.0   Max.   :4.00   Max.   :4.00  
 NA's   :3      NA's   :3                                   
    empathy         system        visualRT    visualGoNoGo   weaponIATcat
 Min.   :23.0   Min.   :11.0   Min.   :191   Min.   :291   reverse : 2   
 1st Qu.:43.0   1st Qu.:16.0   1st Qu.:255   1st Qu.:348   none    :16   
 Median :50.0   Median :24.0   Median :277   Median :382   slight  :14   
 Mean   :49.1   Mean   :25.9   Mean   :288   Mean   :390   moderate:20   
 3rd Qu.:57.0   3rd Qu.:32.0   3rd Qu.:309   3rd Qu.:422   strong  :16   
 Max.   :70.0   Max.   :62.0   Max.   :449   Max.   :594                 
 NA's   :1      NA's   :1      NA's   :3     NA's   :3                   
   careerIATcat
 reverse : 5   
 none    :17   
 slight  :11   
 moderate:23   
 strong  :12   


Visualizing Patterns

table(psyc200_data$weaponIATcat,psyc200_data$careerIATcat)

           reverse none slight moderate strong
  reverse        1    0      0        1      0
  none           3    4      1        6      2
  slight         1    5      2        3      3
  moderate       0    6      4        6      4
  strong         0    2      4        7      3

Correlating Variables

NEOpi <- subset(psyc200_data, select = c(neopiE,neopiN,neopiA,neopiC,neopiO))
cor(NEOpi,use="complete.obs")
        neopiE   neopiN  neopiA  neopiC   neopiO
neopiE  1.0000 -0.49173  0.2382  0.1663  0.35560
neopiN -0.4917  1.00000 -0.5355 -0.3897 -0.02323
neopiA  0.2382 -0.53546  1.0000  0.3565  0.01650
neopiC  0.1663 -0.38970  0.3565  1.0000 -0.35206
neopiO  0.3556 -0.02323  0.0165 -0.3521  1.00000

Grouping Data

SexPersonality <- groupBy(psyc200_data,by=sex,c(meanE=mean(neopiE,na.rm=TRUE),meanN=mean(neopiN,na.rm=TRUE),meanC=mean(neopiC,na.rm=TRUE),meanA=mean(neopiA,na.rm=TRUE),meanO=mean(neopiO,na.rm=TRUE)))
RacePersonality <- groupBy(psyc200_data,by=race,c(meanE=mean(neopiE,na.rm=TRUE),meanN=mean(neopiN,na.rm=TRUE),meanC=mean(neopiC,na.rm=TRUE),meanA=mean(neopiA,na.rm=TRUE),meanO=mean(neopiO,na.rm=TRUE)))
print(SexPersonality)
  sex meanE meanN meanC meanA meanO
1   F 53.95 41.74 65.23 61.62 53.05
2   M 55.88 36.73 63.38 67.19 56.58
print(RacePersonality)
  race meanE meanN meanC meanA meanO
1    C 54.72 43.21 64.45 65.45 52.13
2   NC 54.72 30.67 64.61 59.67 60.56

Visualizing Patterns

Bar chart?

bwplot(neopiN ~ race, data=psyc200_data)

plot of chunk unnamed-chunk-7

Visualizing Patterns

GoNoGo <- subset(psyc200_data, select = c(visualRT, visualGoNoGo))
ggplot(data=GoNoGo,aes(x=visualRT,y=visualGoNoGo))+geom_point() + stat_smooth(method=lm) 

plot of chunk unnamed-chunk-8

summary(psyc200_data$visualRT)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
    191     255     277     288     309     449       3 
summary(psyc200_data$visualGoNoGo)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
    291     348     382     390     423     594       3