Pinafore
diff --git a/‎2014_acl_reganchor.tex
+120 b/‎2014_acl_reganchor.tex
+120
diff --git a/‎2014_acl_reganchor/figures.R b/‎2014_acl_reganchor/figures.R
diff --git a/‎2014_acl_reganchor/figures/20news_700_Likelihood.pdf.REMOVED.git-id
+1 b/‎2014_acl_reganchor/figures/20news_700_Likelihood.pdf.REMOVED.git-id
+1
diff --git a/‎2014_acl_reganchor/figures/20news_700_Likelihood_Beta.pdf.REMOVED.git-id
+1 b/‎2014_acl_reganchor/figures/20news_700_Likelihood_Beta.pdf.REMOVED.git-id
+1
diff --git a/‎2014_acl_reganchor/figures/20news_700_Likelihood_Beta_Corrected.pdf.REMOVED.git-id
+1 b/‎2014_acl_reganchor/figures/20news_700_Likelihood_Beta_Corrected.pdf.REMOVED.git-id
+1
diff --git a/‎2014_acl_reganchor/figures/20news_700_TI.pdf.REMOVED.git-id
+1 b/‎2014_acl_reganchor/figures/20news_700_TI.pdf.REMOVED.git-id
+1
diff --git a/‎2014_acl_reganchor/figures/20news_700_TI_Beta.pdf.REMOVED.git-id
+1 b/‎2014_acl_reganchor/figures/20news_700_TI_Beta.pdf.REMOVED.git-id
+1
diff --git a/‎2014_acl_reganchor/figures/20news_700_TI_Beta_Corrected.pdf.REMOVED.git-id
+1 b/‎2014_acl_reganchor/figures/20news_700_TI_Beta_Corrected.pdf.REMOVED.git-id
+1
diff --git a/‎2014_acl_reganchor/figures/20news_L2_0_1.pdf.REMOVED.git-id
+1 b/‎2014_acl_reganchor/figures/20news_L2_0_1.pdf.REMOVED.git-id
+1
diff --git a/‎2014_acl_reganchor/figures/20news_M_HL_chart.pdf.REMOVED.git-id
+1 b/‎2014_acl_reganchor/figures/20news_M_HL_chart.pdf.REMOVED.git-id
+1
diff --git a/‎2014_acl_reganchor/figures/20news_M_HL_chart_updated.pdf.REMOVED.git-id
+1 b/‎2014_acl_reganchor/figures/20news_M_HL_chart_updated.pdf.REMOVED.git-id
+1
diff --git a/‎2014_acl_reganchor/figures/20news_M_TI_chart.pdf.REMOVED.git-id
+1 b/‎2014_acl_reganchor/figures/20news_M_TI_chart.pdf.REMOVED.git-id
+1
diff --git a/‎2014_acl_reganchor/figures/20news_M_TI_chart_updated.pdf.REMOVED.git-id
+1 b/‎2014_acl_reganchor/figures/20news_M_TI_chart_updated.pdf.REMOVED.git-id
+1
diff --git a/‎2014_acl_reganchor/figures/20news_TestData.pdf.REMOVED.git-id
+1 b/‎2014_acl_reganchor/figures/20news_TestData.pdf.REMOVED.git-id
+1
diff --git a/‎2014_acl_reganchor/figures/Beta_Dist.pdf.REMOVED.git-id
+1 b/‎2014_acl_reganchor/figures/Beta_Dist.pdf.REMOVED.git-id
+1
diff --git a/‎2014_acl_reganchor/figures/Beta_function_005_3.pdf.REMOVED.git-id
+1 b/‎2014_acl_reganchor/figures/Beta_function_005_3.pdf.REMOVED.git-id
+1
diff --git a/‎2014_acl_reganchor/figures/Beta_function_05_05.pdf.REMOVED.git-id
+1 b/‎2014_acl_reganchor/figures/Beta_function_05_05.pdf.REMOVED.git-id
+1
diff --git a/‎2014_acl_reganchor/figures/Beta_function_2_09.pdf.REMOVED.git-id
+1 b/‎2014_acl_reganchor/figures/Beta_function_2_09.pdf.REMOVED.git-id
+1
diff --git a/‎2014_acl_reganchor/figures/Beta_function_2_3.pdf.REMOVED.git-id
+1 b/‎2014_acl_reganchor/figures/Beta_function_2_3.pdf.REMOVED.git-id
+1
diff --git a/‎2014_acl_reganchor/figures/Beta_function_combined.pdf.REMOVED.git-id
+1 b/‎2014_acl_reganchor/figures/Beta_function_combined.pdf.REMOVED.git-id
+1
diff --git a/‎2014_acl_reganchor/figures/Convergence_C.pdf
6.3 KB b/‎2014_acl_reganchor/figures/Convergence_C.pdf
6.3 KB
diff --git a/‎2014_acl_reganchor/figures/DiffC.pdf
6.15 KB b/‎2014_acl_reganchor/figures/DiffC.pdf
6.15 KB
diff --git a/‎2014_acl_reganchor/figures/Gaussian_Dist.pdf.REMOVED.git-id
+1 b/‎2014_acl_reganchor/figures/Gaussian_Dist.pdf.REMOVED.git-id
+1
diff --git a/‎2014_acl_reganchor/figures/HL.pdf
11.7 KB b/‎2014_acl_reganchor/figures/HL.pdf
11.7 KB
diff --git a/‎2014_acl_reganchor/figures/HL_HL.pdf
17.9 KB b/‎2014_acl_reganchor/figures/HL_HL.pdf
17.9 KB
diff --git a/‎2014_acl_reganchor/figures/HL_L2.jpg
35.3 KB b/‎2014_acl_reganchor/figures/HL_L2.jpg
35.3 KB
diff --git a/‎2014_acl_reganchor/figures/HL_TI.pdf
12.2 KB b/‎2014_acl_reganchor/figures/HL_TI.pdf
12.2 KB
diff --git a/‎2014_acl_reganchor/figures/M_100_700_topics.pdf.REMOVED.git-id
+1 b/‎2014_acl_reganchor/figures/M_100_700_topics.pdf.REMOVED.git-id
+1
diff --git a/‎2014_acl_reganchor/figures/M_100_700_topics_save.pdf.REMOVED.git-id
+1 b/‎2014_acl_reganchor/figures/M_100_700_topics_save.pdf.REMOVED.git-id
+1
diff --git a/‎2014_acl_reganchor/figures/M_HL.pdf
5.2 KB b/‎2014_acl_reganchor/figures/M_HL.pdf
5.2 KB
diff --git a/‎2014_acl_reganchor/figures/M_TI.pdf
5.23 KB b/‎2014_acl_reganchor/figures/M_TI.pdf
5.23 KB
diff --git a/‎2014_acl_reganchor/figures/NIPS_Beta_0_1.pdf.REMOVED.git-id
+1 b/‎2014_acl_reganchor/figures/NIPS_Beta_0_1.pdf.REMOVED.git-id
+1
diff --git a/‎2014_acl_reganchor/figures/NIPS_M_100_Likelihood.pdf.REMOVED.git-id
+1 b/‎2014_acl_reganchor/figures/NIPS_M_100_Likelihood.pdf.REMOVED.git-id
+1
diff --git a/‎2014_acl_reganchor/figures/NIPS_M_100_TI.pdf.REMOVED.git-id
+1 b/‎2014_acl_reganchor/figures/NIPS_M_100_TI.pdf.REMOVED.git-id
+1
diff --git a/‎2014_acl_reganchor/figures/Norm_0_1.pdf.REMOVED.git-id
+1 b/‎2014_acl_reganchor/figures/Norm_0_1.pdf.REMOVED.git-id
+1
diff --git a/‎2014_acl_reganchor/figures/Sensitive_M.pdf
6.43 KB b/‎2014_acl_reganchor/figures/Sensitive_M.pdf
6.43 KB
diff --git a/‎2014_acl_reganchor/figures/TI.pdf
11.6 KB b/‎2014_acl_reganchor/figures/TI.pdf
11.6 KB
diff --git a/‎2014_acl_reganchor/figures/TI_HL.pdf
12.4 KB b/‎2014_acl_reganchor/figures/TI_HL.pdf
12.4 KB
diff --git a/‎2014_acl_reganchor/figures/TI_L2.jpg
29.9 KB b/‎2014_acl_reganchor/figures/TI_L2.jpg
29.9 KB
diff --git a/‎2014_acl_reganchor/figures/TI_TI.pdf
17.9 KB b/‎2014_acl_reganchor/figures/TI_TI.pdf
17.9 KB
diff --git a/‎2014_acl_reganchor/figures/TI_TI_wiki.pdf
17.9 KB b/‎2014_acl_reganchor/figures/TI_TI_wiki.pdf
17.9 KB
diff --git a/‎2014_acl_reganchor/figures/Top20words.pdf
92.8 KB b/‎2014_acl_reganchor/figures/Top20words.pdf
92.8 KB
diff --git a/‎2014_acl_reganchor/figures/Top20words_anchors.pdf
88.6 KB b/‎2014_acl_reganchor/figures/Top20words_anchors.pdf
88.6 KB
diff --git a/‎2014_acl_reganchor/figures/UMD_logo_circ.png
50.5 KB b/‎2014_acl_reganchor/figures/UMD_logo_circ.png
50.5 KB
diff --git a/‎2014_acl_reganchor/figures/anchor_selection_M.jpg
19 KB b/‎2014_acl_reganchor/figures/anchor_selection_M.jpg
19 KB
diff --git a/‎2014_acl_reganchor/figures/density.pdf
41.2 KB b/‎2014_acl_reganchor/figures/density.pdf
41.2 KB
diff --git a/‎2014_acl_reganchor/figures/density_plot.png
10.1 KB b/‎2014_acl_reganchor/figures/density_plot.png
10.1 KB
diff --git a/‎2014_acl_reganchor/figures/diff_plots.R
+73 b/‎2014_acl_reganchor/figures/diff_plots.R
+73
diff --git a/‎2014_acl_reganchor/figures/isComparable.jpg
3.56 KB b/‎2014_acl_reganchor/figures/isComparable.jpg
3.56 KB
diff --git a/‎2014_acl_reganchor/figures/results.pdf
14.1 KB b/‎2014_acl_reganchor/figures/results.pdf
14.1 KB
diff --git a/‎2014_acl_reganchor/figures/results_option3.pdf
13.5 KB b/‎2014_acl_reganchor/figures/results_option3.pdf
13.5 KB
diff --git a/‎2014_acl_reganchor/figures/score_parallel.pdf
7.56 KB b/‎2014_acl_reganchor/figures/score_parallel.pdf
7.56 KB
diff --git a/‎2014_acl_reganchor/figures/terp.png.REMOVED.git-id
+1 b/‎2014_acl_reganchor/figures/terp.png.REMOVED.git-id
+1
diff --git a/‎2014_acl_reganchor/figures/topic_density.pdf
45.8 KB b/‎2014_acl_reganchor/figures/topic_density.pdf
45.8 KB
diff --git a/‎2014_acl_reganchor/figures/umd-logo1.jpg
16.7 KB b/‎2014_acl_reganchor/figures/umd-logo1.jpg
16.7 KB
diff --git a/‎2014_acl_reganchor/figures/umd-logo2.jpg
101 KB b/‎2014_acl_reganchor/figures/umd-logo2.jpg
101 KB
@@ -0,0 +1,120 @@
+%
+% File acl2014.tex
+%
+% Contact: [email protected], [email protected]
+%%
+%% Based on the style files for ACL-2013, which were, in turn,
+%% Based on the style files for ACL-2012, which were, in turn,
+%% based on the style files for ACL-2011, which were, in turn,
+%% based on the style files for ACL-2010, which were, in turn,
+%% based on the style files for ACL-IJCNLP-2009, which were, in turn,
+%% based on the style files for EACL-2009 and IJCNLP-2008...
+
+%% Based on the style files for EACL 2006 by
+%%[email protected] or [email protected]
+%% and that of ACL 08 by Joakim Nivre and Noah Smith
+
+\documentclass[11pt]{article}
+\usepackage{style/acl2014}
+\usepackage{times}
+\usepackage{url}
+\usepackage{latexsym}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\usepackage{booktabs}
+\usepackage{algorithm}
+\usepackage[noend]{algorithmic}
+%\usepackage[caption=false]{subfig}
+\usepackage[table]{xcolor}
+\usepackage{subfigure}
+
+\usepackage{style/mfirstuc}
+\newcommand{\etal}[2]{\makefirstuc{#1}~et~al.~\cite{#1-#2}}
+\newcommand{\cd}[1]{\bar{\bm{Q}}_{#1, \cdot}  }
+\newcommand{\citet}[1]{\newcite{#1}}
+
+\newif\ifcomment\commentfalse
+\input{style/preamble}
+
+\newcommand{\red}[1]{{\color{red}{\bf #1}}}
+\newcommand{\blue}[1]{{\color{blue}{\bf #1}}}
+\newcommand{\green}[1]{{\color{green}{\bf #1}}}
+\newcommand{\purple}[1]{{\color{purple}{\bf #1}}}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\title{Anchors Regularized: Adding Robustness and Extensibility \\
+to Scalable Topic-Modeling Algorithms}
+
+\author{Thang Nguyen  \\
+  iSchool and \abr{umiacs}, \\
+  University of Maryland \\
+  and National Library of Medicine, \\
+  National Institutes of Health \\
+  \email{[email protected]} \\\And
+  Yuening Hu \\
+  Computer Science \\
+  University of Maryland \\
+  \email{[email protected]} \\ \And
+  Jordan Boyd-Graber \\
+  iSchool and \abr{umiacs} \\
+  University of Maryland \\
+  \email{[email protected]} \\
+}
+
+\date{}
+
+
+
+\begin{document}
+
+%\maketitle
+
+% TODO
+% 1.  Explain different corpora for TI
+% 2.  Hyperparameter selection for HL
+% 3.  Discussion of HL equivalence, VB and Gibbs competitive
+% 4.  Explain why NIPS has poor WIKITI
+% 5.  Remove informed prior equation
+% 6.  Rewrite final discussion
+
+%\jbgcomment{Took a stab at improving the abstract, but not sure it's all the way
+%there yet.}
+
+\begin{abstract}
+  Spectral methods offer scalable alternatives to Markov chain Monte
+  Carlo and expectation maximization.  However, these new methods lack
+  the rich priors associated with probabilistic models.  We examine
+  Arora et al.'s anchor words algorithm for topic modeling and develop
+  new, regularized algorithms that not only mathematically resemble
+  Gaussian and Dirichlet priors but also improve the interpretability
+  of topic models.  Our new regularization approaches make these
+  efficient algorithms more flexible; we also show that these methods can
+  be combined with informed priors.
+\end{abstract}
+
+\input{2014_acl_reganchor/sections/intro}
+\input{2014_acl_reganchor/sections/background}
+\input{2014_acl_reganchor/sections/model}
+\input{2014_acl_reganchor/sections/experiments}
+\input{2014_acl_reganchor/sections/discussion}
+\input{2014_acl_reganchor/sections/conclusion}
+
+\section*{Acknowledgments}
+
+We would like to thank the anonymous reviewers, Hal Daum\'e III, Ke Wu,
+and Ke Zhai for their helpful comments.  This work was supported by
+\abr{nsf} Grant IIS-1320538.  Boyd-Graber is also supported by
+\abr{nsf} Grant CCF-1018625.  Any opinions, findings, conclusions, or
+recommendations expressed here are those of the authors and do not
+necessarily reflect the view of the sponsor.
+
+\newpage
+
+%\bibliographystyle{style/icml2013}
+\bibliographystyle{style/acl2014}
+%\bibliographystyle{apalike}
+%\footnotesize
+\bibliography{bib/journal-full,bib/thang,bib/jbg,bib/ynhu}
+
+\end{document}
@@ -0,0 +1 @@
+f3ef1ca20b78d9066d345f1ed397685f7b3573ab
@@ -0,0 +1 @@
+b99ebb9b0977d5bcdb78d100af1fb2f0c23c7071
@@ -0,0 +1 @@
+bb98e2cc9c6003dc0ac1b8327b40efdd6b668189
@@ -0,0 +1 @@
+51b3fddc6a5dfc8c110d085be5443e47ba66bd35
@@ -0,0 +1 @@
+7ff71ef45294e3196b3a5772698a51393d5c565b
@@ -0,0 +1 @@
+5f34ca6f520f2b5f404b57cee95a24626972c7bd
@@ -0,0 +1 @@
+2cac3f72fdb6cd9b25eee492f0ad370d2e850d05
@@ -0,0 +1 @@
+929643606b09f827d6ec8131e9fa30768315f0c0
@@ -0,0 +1 @@
+0e4dbd1fdad2ba1843a870cdb579dd2c98ff47e1
@@ -0,0 +1 @@
+37b07a36f124e013d1937dc60e52ef6ea8c4f715
@@ -0,0 +1 @@
+34b1ef3c16049f2b65051287c5ac6a7a10eaedb0
@@ -0,0 +1 @@
+ea8ce91056ffd5d02ec57345c3277ebe5e144752
@@ -0,0 +1 @@
+dc951b22efd866289e52857c49b445be264e397b
@@ -0,0 +1 @@
+ea59da73ff2e256a491005381262afe56444e1e3
@@ -0,0 +1 @@
+021b1ced8ee4aa70be85efe22a63d6dced0b6b21
@@ -0,0 +1 @@
+7807fd34fd814e3eac3a84fa177b145c92414de5
@@ -0,0 +1 @@
+051e622d0da12660659257c198fc4d6387907685
@@ -0,0 +1 @@
+07f7aa3de444a1963b64cf48ce48ca8ea75728ce
@@ -0,0 +1 @@
+e9250684498f480de292b8e7cd7b9f0581fac0fb
@@ -0,0 +1 @@
+980d48cc6187a013839572f8d7328a49599e8b7f
@@ -0,0 +1 @@
+739e4bdbd0a19e185cdb23f956e330f31505e838
@@ -0,0 +1 @@
+c67b1e28c45fdb142c64475d22c4da52d92bd917
@@ -0,0 +1 @@
+c8f012f334dbf9489ac2b2804fe6d775f970b030
@@ -0,0 +1 @@
+18f1409aef727c3134513370a57968e3b7eb1552
@@ -0,0 +1 @@
+86965f939b41322590525594277b4fb6299fa198
@@ -0,0 +1,73 @@
+
+library(ggplot2)
+library(GGally)
+
+parallel_plot <- function(top, bottom, old_col, new_col, word_label, topic_group) {
+  whole <- rbind(top, bottom)
+  
+  p <- ggparcoord(data=whole, columns=c(old_col,new_col), scale="globalminmax", groupColumn=topic_group)
+    
+  anchors = levels(whole[,topic_group])
+    
+  for(i in 1:nrow(whole)) {
+    row <- whole[i,]
+    color = which(anchors == row$anchor)
+    yval_old <- as.numeric(row[old_col])
+    yval_new <- as.numeric(row[new_col])
+    p <- p + geom_text(data= NULL, x = 0.9, y = yval_old, label=row$word, colour="black")
+    p <- p + geom_text(data= NULL, x = 2.1, y = yval_new, label=row$word, colour="black")
+  }
+  
+  return(p)
+}
+
+orig <- read.table("orig.txt")
+beta <- read.table("beta.txt")
+
+beta <- data.frame(score = beta$V3,
+                   type = "beta",
+                   anchor = beta$V1,
+                   word = beta$V2,
+                   key = sprintf("%s_%s", beta$V1, beta$V2))
+
+beta$rank <- ave(beta$score, beta$anchor, FUN=rank)
+beta$rank <- max(beta$rank) - beta$rank
+
+orig <- data.frame(score = orig$V3,
+                   type = "orig",
+                   anchor = orig$V1,
+                   word = orig$V2,
+                   key = sprintf("%s_%s", orig$V1, orig$V2))
+
+orig$rank <- ave(orig$score, orig$anchor, FUN=rank)
+orig$rank <- max(orig$rank) - orig$rank
+
+words <- rbind(orig, beta)
+diffs <- merge(orig, beta, by="key")
+
+diffs$word <- diffs$word.x
+diffs$anchor <- diffs$anchor.x
+diffs$rank <- diffs$rank.x - diffs$rank.y
+diffs$orig_rank <- diffs$rank.x
+diffs$beta_rank <- diffs$rank.y
+diffs$orig_score <- diffs$score.x
+diffs$beta_score <- diffs$score.y
+diffs$score <- diffs$score.x - diffs$score.y
+
+density_plot <- ggplot(words, aes(rank, log(score)), scales="free") + geom_line() + facet_grid(type ~ anchor) + ylim(c(-25, 0)) + ylab("p(word|topic)") + xlab("Rank of word in topic") + scale_x_continuous(labels = c())
+
+diff_rank <- diffs[order(diffs$rank),]
+diff_score <- diffs[order(diffs$score),]
+
+num_words <- 15
+top_diff_rank <- diff_rank[1:num_words,]
+bottom_diff_rank <- diff_rank[(dim(diff_rank)[1]-num_words):dim(diff_rank)[1],]
+
+top_diff_score <- diff_score[1:num_words,]
+bottom_diff_score <- diff_score[(dim(diff_score)[1]-num_words):dim(diff_score)[1],]
+
+
+rank_diff <- parallel_plot(top_diff_rank, bottom_diff_rank,  which(colnames(top_diff_rank)=="orig_rank"), which(colnames(top_diff_rank)=="beta_rank"),  which(colnames(top_diff_rank)=="word"), which(colnames(top_diff_rank)=="anchor"))
+score_diff <- parallel_plot(top_diff_score, bottom_diff_score,  which(colnames(top_diff_score)=="orig_score"), which(colnames(top_diff_score)=="beta_score"),  which(colnames(top_diff_score)=="word"), which(colnames(top_diff_score)=="anchor"))
+
+
@@ -0,0 +1 @@
+d71c33128faad75cd7fa7917d5d9e9290c074dea
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+f3ef1ca20b78d9066d345f1ed397685f7b3573ab`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+b99ebb9b0977d5bcdb78d100af1fb2f0c23c7071`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+bb98e2cc9c6003dc0ac1b8327b40efdd6b668189`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+51b3fddc6a5dfc8c110d085be5443e47ba66bd35`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+7ff71ef45294e3196b3a5772698a51393d5c565b`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+5f34ca6f520f2b5f404b57cee95a24626972c7bd`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+2cac3f72fdb6cd9b25eee492f0ad370d2e850d05`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+929643606b09f827d6ec8131e9fa30768315f0c0`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+0e4dbd1fdad2ba1843a870cdb579dd2c98ff47e1`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+37b07a36f124e013d1937dc60e52ef6ea8c4f715`