13

Is there an implementation of “t-SNE” for dimension reduction in mma? This technique visualizes high-dimensional data by giving each datapoint location in a two or three-dimensional map. It generally gives better results than other embedding techniques:

enter image description here

Update 2

In response to Alexey Golyshev's answer, what you suggest is just a rounda about way of doing what I've done in the answer (I'm using the same R code from the package).

enter image description here

Update 1 (example with RLink)

As per the sugesstion in the comment, I attempted to use RLink`, but it is too slow and fragile, and on 50k vectors of length only 128 it runs out of memory:

enter image description here

The code below will make a mathematica function called tsne (which relies on 3 internal R functions: x2p, Hbeta, whiten). To call it,

mnist = ExampleData[{"MachineLearning", "MNIST"}, "TrainingData"];
imgs = mnist[[;; , 1]];
lbls = mnist[[;; , 2]];
allfeatures = Standardize /@ Flatten /@ ImageData /@ imgs;
mmu = MaxMemoryUsed[];
AbsoluteTiming[clustering = tsne[features];]
Print["mem: ", MaxMemoryUsed[] - mmu]
cls = GatherBy[Thread[{lbls, clustering}], First][[All, All, 2]];
ListPlot@cls

Here is the full RLink code:

Needs["RLink`"]

InstallR[]

".Hbeta <-
  function(D, beta){
  \tP = exp(-D * beta)
  \tsumP = sum(P)
  \tif (sumP == 0){
  \t\tH = 0
  \t\tP = D * 0
  \t} else {
  \t\tH = log(sumP) + beta * sum(D %*% P) /sumP
  \t\tP = P/sumP
  \t}
  \tr = {}
  \tr$H = H
  \tr$P = P
  \tr
  }
  " // REvaluate;

".whiten <-
  function(X, row.norm=FALSE, verbose=FALSE, n.comp=ncol(X))
  {  
  \tn.comp; # forces an eval/save of n.comp
  \tif (verbose) message(\"Centering\")
     n = nrow(X)
  \tp = ncol(X)
  \tX <- scale(X, scale = FALSE)
     X <- if (row.norm) 
         t(scale(X, scale = row.norm))
     else t(X)

     if (verbose) message(\"Whitening\")
     V <- X %*% t(X)/n
     s <- La.svd(V)
     D <- diag(c(1/sqrt(s$d)))
     K <- D %*% t(s$u)
     K <- matrix(K[1:n.comp, ], n.comp, p)
     X = t(K %*% X)
  \tX
  }
  " // REvaluate;

".x2p <-
  function(X,perplexity = 15,tol = 1e-5){
  \tif (class(X) == 'dist') {
  \t\tD = X
  \t\tn = attr(D,'Size')
  \t} else{
  \t\tD = dist(X)
  \t\tn = attr(D,'Size')
  \t}

  \tD = as.matrix(D)
  \tP = matrix(0, n, n )\t\t
  \tbeta = rep(1, n)
  \tlogU = log(perplexity)
  \t
  \tfor (i in 1:n){
  \t\tbetamin = -Inf
  \t\tbetamax = Inf
  \t\tDi = D[i, -i]
  \t\thbeta = .Hbeta(Di, beta[i])
  \t\tH = hbeta$H; 
  \t\tthisP = hbeta$P
  \t\tHdiff = H - logU;
  \t\ttries = 0;

  \t\twhile(abs(Hdiff) > tol && tries < 50){
  \t\t\tif (Hdiff > 0){
  \t\t\t\tbetamin = beta[i]
  \t\t\t\tif (is.infinite(betamax)) beta[i] = beta[i] * 2
  \t\t\t\telse beta[i] = (beta[i] + betamax)/2
  \t\t\t} else{
  \t\t\t\tbetamax = beta[i]
  \t\t\t\tif (is.infinite(betamin))  beta[i] = beta[i]/ 2
  \t\t\t\telse beta[i] = ( beta[i] + betamin) / 2
  \t\t\t}
  \t\t\t
  \t\t\thbeta = .Hbeta(Di, beta[i])
  \t\t\tH = hbeta$H
  \t\t\tthisP = hbeta$P
  \t\t\tHdiff = H - logU
  \t\t\ttries = tries + 1
  \t\t}\t
  \t\t\tP[i,-i]  = thisP\t
  \t}\t
  \t
  \tr = {}
  \tr$P = P
  \tr$beta = beta
  \tsigma = sqrt(1/beta)
  \t
  \tmessage('sigma summary: ', \
paste(names(summary(sigma)),':',summary(sigma),'|',collapse=''))

  \tr 
  }" // REvaluate;

tsne = RFunction@
   "function(X, initial_config = NULL, k=2, initial_dims=30, \
perplexity=30, max_iter = 1000, min_cost=0, \
epoch_callback=NULL,whiten=TRUE, epoch=100 ){
   \tif ('dist' %in% class(X)) {
   \t\tn = attr(X,'Size')
   \t\t}
   \telse \t{
   \t\tX = as.matrix(X)
   \t\tX = X - min(X)
   \t\tX = X/max(X)
   \t\tinitial_dims = min(initial_dims,ncol(X))
   \t\tif (whiten) X<-.whiten(as.matrix(X),n.comp=initial_dims)
   \t\tn = nrow(X)
   \t}

   \tmomentum = .5
   \tfinal_momentum = .8
   \tmom_switch_iter = 250

   \tepsilon = 500
   \tmin_gain = .01
   \tinitial_P_gain = 4

   \teps = 2^(-52) # typical machine precision

   \tif (!is.null(initial_config) && is.matrix(initial_config)) {
   \t\tif (nrow(initial_config) != n | ncol(initial_config) != k){
   \t\t\tstop('initial_config argument does not match necessary \
configuration for X')
   \t\t}
   \t\tydata = initial_config
   \t\tinitial_P_gain = 1

   \t} else {
   \t\tydata = matrix(rnorm(k * n),n)
   \t}

   \tP = .x2p(X,perplexity, 1e-5)$P
   \tP = .5 * (P + t(P))

   \tP[P < eps]<-eps
   \tP = P/sum(P)

   \tP = P * initial_P_gain
   \tgrads =  matrix(0,nrow(ydata),ncol(ydata))
   \tincs =  matrix(0,nrow(ydata),ncol(ydata))
   \tgains = matrix(1,nrow(ydata),ncol(ydata))

   \tfor (iter in 1:max_iter){
   \t\tif (iter %% epoch == 0) { # epoch
   \t\t\tcost =  sum(apply(P * log((P+eps)/(Q+eps)),1,sum))
   \t\t\tmessage(\"Epoch: Iteration #\",iter,\" error is: \",cost)
   \t\t\tif (cost < min_cost) break
   \t\t\tif (!is.null(epoch_callback)) epoch_callback(ydata)

   \t\t}

   \t\tsum_ydata = apply(ydata^2, 1, sum)
   \t\tnum =  1/(1 + sum_ydata +    sweep(-2 * ydata %*% t(ydata),2, \
-t(sum_ydata)))
   \t\tdiag(num)=0
   \t\tQ = num / sum(num)
   \t\tif (any(is.nan(num))) message ('NaN in grad. descent')
   \t\tQ[Q < eps] = eps
   \t\tstiffnesses = 4 * (P-Q) * num
   \t\tfor (i in 1:n){
   \t\t\tgrads[i,] = apply(sweep(-ydata, 2, -ydata[i,]) * \
stiffnesses[,i],2,sum)
   \t\t}

   \t\tgains = ((gains + .2) * abs(sign(grads) != sign(incs)) +
   \t\t\t\t gains * .8 * abs(sign(grads) == sign(incs)))

   \t\tgains[gains < min_gain] = min_gain

   \t\tincs = momentum * incs - epsilon * (gains * grads)
   \t\tydata = ydata + incs
   \t\tydata = sweep(ydata,2,apply(ydata,2,mean))
   \t\tif (iter == mom_switch_iter) momentum = final_momentum

   \t\tif (iter == 100 && is.null(initial_config)) P = P/4

   \t}
   \tydata
   }

   ";
M.R.
  • 31,425
  • 8
  • 90
  • 281

2 Answers2

9

In version 11.1, TSNE is built into the Dimension Reduction functionality as a Method.

data = RandomReal[{-1, 1}, {1000, 3}]
ListPointPlot3D[data]

enter image description here

reduced = DimensionReduce[data, Method -> "TSNE"];
ListPlot[reduced]

enter image description here

You can also use it with the DimensionReduction function to build a DimensionReducerFunction:

reducer = DimensionReduction[data, Method -> "TSNE"]
reducer[{1, 1, 1}]

{-15.3963, 9.94978}

You can set the perplexity like so: DimensionReduction[data, Method -> {"TSNE", "Perplexity" -> 100}].

Carl Lange
  • 13,065
  • 1
  • 36
  • 70
7
  1. install R 3.2.5 (R 3.3.x is incompatible with MMA 10.4)

  2. open R and enter command install.packages('Rtsne')

https://cran.r-project.org/web/packages/Rtsne/index.html

Rtsne: T-Distributed Stochastic Neighbor Embedding using a Barnes-Hut Implementation

An R wrapper around the fast T-distributed Stochastic Neighbor Embedding implementation by Van der Maaten.

  1. open Mathematica
Needs["JLink`"]
SetOptions[InstallJava,JVMArguments->"-Xmx3g"];
Needs["RLink`"]
InstallR["RHomeLocation"->"C:\\Program Files\\R\\R-3.2.5"]
REvaluate["require(\"Rtsne\")"]

{True}

SeedRandom[2016];
data=RandomReal[{-1,1},{50000,128}];

RSet["data",data];

dim=REvaluate["Rtsne(data)"];//AbsoluteTiming

{1362.54,Null}

ListPlot[dim[[1,5]],ImageSize->Large]

enter image description here

Alexey Golyshev
  • 9,526
  • 2
  • 27
  • 57
  • I'm on OSX, I tried installing 3.2.5 from https://r.research.att.com/ but and all seems well but InstallR["RHomeLocation" -> "/usr/bin/", "RVersion" -> "3.2.5"] complains "InstallR::fail: Failed to install R. The following error was encountered: Unable to load dynamic libraries" – M.R. Aug 04 '16 at 18:37
  • I'm on OSX 10.10.5 to be exact, any suggestions? – M.R. Aug 04 '16 at 18:42
  • Isn't this equivalent to what I did? I pulled out the R code from this exact package. – M.R. Aug 05 '16 at 00:09
  • 1
    @M.R. Try this code: SetEnvironment["DYLD_LIBRARY_PATH" -> "/Library/Frameworks/R.framework/Resources/lib"]; InstallR["RHomeLocation" -> "/Library/Frameworks/R.framework/Resources", "RVersion" -> 3] Source: http://szhorvat.net/pelican/setting-up-rlink-for-mathematica.html – Alexey Golyshev Aug 05 '16 at 02:57