# plot_eigenwords.R # Plots the first couple of PCs of the eigenfeatures of a selected set of words. # Requires "find_colors.py" to be in the same directory # specify filename and test_words for your application filename = "eigenwords_google.txt" # eigenwords are space delimited, no header args <- commandArgs(trailingOnly = TRUE) if((!is.null(args[1])) && ( !is.na(args[1]))) filename = args[1] # pick the words to be plotted test_words = 'one two three four five six seven eight nine ten 1 2 3 4 5 6 7 8 9 10 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009' if(is.null(filename)) filename = "eigenwords_google.txt" #test_words = 'house home dog truck boat word river cat car sleep eat push drink listen carry talk disagree agree' #test_words = 'i you he she they we us them him her our his hers' #test_words = 'man woman boy girl lawyer doctor guy farmer teacher citizen mother wife father son husband brother daughter sister boss uncle pressure temperature permeability density stress viscosity gravity tension miles pounds degrees inches barrels tons acres meters bytes' #test_words = 'mary patricia tricia linda barbara elizabeth liz betsy jennifer maria susan margaret dorothy lisa nancy karen betty helen john robert bob michael mike william david richard charles joseph joe thomas tom christopher daniel dan paul donald george' #test_words = 'strikers workers employee crowd audience ballplayer athlete teacher professor mentor advisor' #test_words = 'apples pears plums oranges peaches fruit cake pie dessert truck boat car motorcycle' ################################################################################## # should check to see if input is the right filetype and file is non-empty # FYI: "colors" here really refers to "words" system(paste("./pick_words.py ", filename, ' "', test_words, '"')) # select the words we want into 'colors.txt' colors = read.table('colors.txt',quote = "",comment.char = "") n.words = dim(colors)[1] print(paste('number of words found: ', n.words)) p = dim(colors)[2] print(paste('eigenword length: ', p-1)) # mean center for (i in 2:p) colors[,i] = colors[,i] - mean(colors[,i]) # first item is the word c.mat <- as.matrix(colors[,2:p]) c.svd <- svd(c.mat) basis = c.svd$v[,1:2] # project onto the first two PCs scaling <- diag(c(1,1)) xy = c.mat %*% basis %*% scaling xlimits = c(min(xy[,1],0),max(xy[,1],0)) ylimits = c(min(xy[,2],0),max(xy[,2],0)) plot(xy,col="white",asp=1,xlab="PC 1",ylab="PC 2") # plot the names of the words text(xy,labels=colors[,1],col=c(),cex=1.5)