library(NNS)
library(data.table)
require(knitr)
require(rgl)
require(meboot)
require(tdigest)
require(dtw)Below are some examples demonstrating unsupervised learning with NNS clustering and nonlinear regression using the resulting clusters. As always, for a more thorough description and definition, please view the References.
NNS.partNNS.part is both a partitional and hierarchical clustering method. NNS iteratively partitions the joint distribution into partial moment quadrants, and then assigns a quadrant identification (1:4) at each partition.
NNS.part returns a data.table of observations along with their final quadrant identification. It also returns the regression points, which are the quadrant means used in NNS.reg.
x = seq(-5, 5, .05); y = x ^ 3
for(i in 1 : 4){NNS.part(x, y, order = i, min.obs.stop = FALSE, Voronoi = TRUE, obs.req = 0)}NNS.part offers a partitioning based on \(x\) values only NNS.part(x, y, type = "XONLY", ...), using the entire bandwidth in its regression point derivation, and shares the same limit condition as partitioning via both \(x\) and \(y\) values.
Note the partition identifications are limited to 1’s and 2’s (left and right of the partition respectively), not the 4 values per the \(x\) and \(y\) partitioning.
## $order
## [1] 4
## 
## $dt
##          x         y quadrant prior.quadrant
##   1: -5.00 -125.0000    q1111           q111
##   2: -4.95 -121.2874    q1111           q111
##   3: -4.90 -117.6490    q1111           q111
##   4: -4.85 -114.0841    q1111           q111
##   5: -4.80 -110.5920    q1111           q111
##  ---                                        
## 197:  4.80  110.5920    q2222           q222
## 198:  4.85  114.0841    q2222           q222
## 199:  4.90  117.6490    q2222           q222
## 200:  4.95  121.2874    q2222           q222
## 201:  5.00  125.0000    q2222           q222
## 
## $regression.points
##    quadrant          x           y
## 1:     q111 -4.3742412 -79.8807307
## 2:     q112 -3.0992681 -28.0828202
## 3:     q121 -1.8507319  -5.8599732
## 4:     q122 -0.5992681  -0.2594580
## 5:     q211  0.6507319   0.3130212
## 6:     q212  1.9007319   6.3553668
## 7:     q221  3.1507319  29.4685900
## 8:     q222  4.3992681  81.4792796NNS.regNNS.reg can fit any \(f(x)\), for both uni- and multivariate cases. NNS.reg returns a self-evident list of values provided below.
## $R2
## [1] 0.999999
## 
## $SE
## [1] 0.052872
## 
## $Prediction.Accuracy
## NULL
## 
## $equation
## NULL
## 
## $x.star
## NULL
## 
## $derivative
##     Coefficient X.Lower.Range X.Upper.Range
##  1: 75.12876261    -5.0000000    -4.8748036
##  2: 66.87261756    -4.8748036    -4.5748036
##  3: 59.19995270    -4.5748036    -4.4000000
##  4: 57.20187731    -4.4000000    -4.2498395
##  5: 51.09749347    -4.2498395    -4.0001605
##  6: 45.08470225    -4.0001605    -3.8500000
##  7: 43.89250000    -3.8500000    -3.8000000
##  8: 42.54783400    -3.8000000    -3.6498395
##  9: 37.24680659    -3.6498395    -3.3998395
## 10: 32.32238084    -3.3998395    -3.2500000
## 11: 31.23341561    -3.2500000    -3.1248764
## 12: 27.38778086    -3.1248764    -3.0000000
## 13: 26.65801543    -3.0000000    -2.8751236
## 14: 22.97063159    -2.8751236    -2.7500000
## 15: 21.95745269    -2.7500000    -2.5751964
## 16: 17.57702016    -2.5751964    -2.2748036
## 17: 13.51623598    -2.2748036    -1.9748036
## 18: 10.04434030    -1.9748036    -1.6998395
## 19:  7.31760408    -1.6998395    -1.4248036
## 20:  4.92860180    -1.4248036    -1.1498395
## 21:  3.06178098    -1.1498395    -0.8748036
## 22:  1.61322416    -0.8748036    -0.5998395
## 23:  0.78299574    -0.5998395    -0.4500000
## 24:  0.54250000    -0.4500000    -0.4000000
## 25:  0.42250000    -0.4000000    -0.3500000
## 26:  0.31750000    -0.3500000    -0.3000000
## 27:  0.22750000    -0.3000000    -0.2500000
## 28:  0.15250000    -0.2500000    -0.2000000
## 29:  0.09250000    -0.2000000    -0.1500000
## 30:  0.04750000    -0.1500000    -0.1000000
## 31:  0.01750000    -0.1000000    -0.0500000
## 32:  0.00250000    -0.0500000     0.0000000
## 33:  0.04482682     0.0000000     0.1997732
## 34:  0.40112542     0.1997732     0.5251964
## 35:  1.35467954     0.5251964     0.8251964
## 36:  2.83286071     0.8251964     1.1251964
## 37:  4.58369870     1.1251964     1.3000000
## 38:  5.42978118     1.3000000     1.4498395
## 39:  7.22829448     1.4498395     1.6000000
## 40:  8.08483135     1.6000000     1.7498395
## 41: 10.33276345     1.7498395     1.9000000
## 42: 11.38480094     1.9000000     2.0751964
## 43: 14.80855629     2.0751964     2.3751964
## 44: 18.79384425     2.3751964     2.5500000
## 45: 19.89250000     2.5500000     2.6000000
## 46: 20.82283262     2.6000000     2.7498395
## 47: 24.75420137     2.7498395     2.9998395
## 48: 29.11046431     2.9998395     3.1500000
## 49: 30.35129296     3.1500000     3.3001605
## 50: 35.31087601     3.3001605     3.5748036
## 51: 41.22828132     3.5748036     3.7500000
## 52: 42.75250000     3.7500000     3.8000000
## 53: 43.94792051     3.8000000     3.9501605
## 54: 49.88835362     3.9501605     4.2251964
## 55: 57.15940448     4.2251964     4.4000000
## 56: 59.19995270     4.4000000     4.5748036
## 57: 66.78519399     4.5748036     4.8751964
## 58: 75.36516319     4.8751964     5.0000000
##     Coefficient X.Lower.Range X.Upper.Range
## 
## $Point.est
## NULL
## 
## $regression.points
##              x             y
##  1: -5.0000000 -1.250000e+02
##  2: -4.8748036 -1.155942e+02
##  3: -4.5748036 -9.553237e+01
##  4: -4.4000000 -8.518400e+01
##  5: -4.2498395 -7.659454e+01
##  6: -4.0001605 -6.383657e+01
##  7: -3.8500000 -5.706662e+01
##  8: -3.8000000 -5.487200e+01
##  9: -3.6498395 -4.848300e+01
## 10: -3.3998395 -3.917129e+01
## 11: -3.2500000 -3.432812e+01
## 12: -3.1248764 -3.042009e+01
## 13: -3.0000000 -2.700000e+01
## 14: -2.8751236 -2.367104e+01
## 15: -2.7500000 -2.079688e+01
## 16: -2.5751964 -1.695863e+01
## 17: -2.2748036 -1.167862e+01
## 18: -1.9748036 -7.623753e+00
## 19: -1.6998395 -4.861919e+00
## 20: -1.4248036 -2.849316e+00
## 21: -1.1498395 -1.494127e+00
## 22: -0.8748036 -6.520275e-01
## 23: -0.5998395 -2.084487e-01
## 24: -0.4500000 -9.112500e-02
## 25: -0.4000000 -6.400000e-02
## 26: -0.3500000 -4.287500e-02
## 27: -0.3000000 -2.700000e-02
## 28: -0.2500000 -1.562500e-02
## 29: -0.2000000 -8.000000e-03
## 30: -0.1500000 -3.375000e-03
## 31: -0.1000000 -1.000000e-03
## 32: -0.0500000 -1.250000e-04
## 33:  0.0000000  0.000000e+00
## 34:  0.1997732  8.955199e-03
## 35:  0.5251964  1.394907e-01
## 36:  0.8251964  5.458945e-01
## 37:  1.1251964  1.395753e+00
## 38:  1.3000000  2.197000e+00
## 39:  1.4498395  3.010596e+00
## 40:  1.6000000  4.096000e+00
## 41:  1.7498395  5.307427e+00
## 42:  1.9000000  6.859000e+00
## 43:  2.0751964  8.853576e+00
## 44:  2.3751964  1.329614e+01
## 45:  2.5500000  1.658138e+01
## 46:  2.6000000  1.757600e+01
## 47:  2.7498395  2.069608e+01
## 48:  2.9998395  2.688463e+01
## 49:  3.1500000  3.125588e+01
## 50:  3.3001605  3.581344e+01
## 51:  3.5748036  4.551133e+01
## 52:  3.7500000  5.273438e+01
## 53:  3.8000000  5.487200e+01
## 54:  3.9501605  6.147124e+01
## 55:  4.2251964  7.519233e+01
## 56:  4.4000000  8.518400e+01
## 57:  4.5748036  9.553237e+01
## 58:  4.8751964  1.155942e+02
## 59:  5.0000000  1.250000e+02
##              x             y
## 
## $Fitted.xy
##          x         y     y.hat  NNS.ID gradient     residuals
##   1: -5.00 -125.0000 -125.0000 q444444 75.12876  0.0000000000
##   2: -4.95 -121.2874 -121.2436 q444444 75.12876  0.0438131307
##   3: -4.90 -117.6490 -117.4871 q444444 75.12876  0.1618762613
##   4: -4.85 -114.0841 -113.9355 q444441 66.87262  0.1486568920
##   5: -4.80 -110.5920 -110.5918 q444441 66.87262  0.0001627699
##  ---                                                         
## 197:  4.80  110.5920  110.5721 q111114 66.78519 -0.0198502380
## 198:  4.85  114.0841  113.9114 q111114 66.78519 -0.1727155383
## 199:  4.90  117.6490  117.4635 q111111 75.36516 -0.1855163193
## 200:  4.95  121.2874  121.2317 q111111 75.36516 -0.0556331597
## 201:  5.00  125.0000  125.0000 q111111 75.36516  0.0000000000Multivariate regressions return a plot of \(y\) and \(\hat{y}\), as well as the regression points ($RPM) and partitions ($rhs.partitions) for each regressor.
f= function(x, y) x ^ 3 + 3 * y - y ^ 3 - 3 * x
y = x ; z = expand.grid(x, y)
g = f(z[ , 1], z[ , 2])
NNS.reg(z, g, order = "max", ncores = 1)## $R2
## [1] 1
## 
## $rhs.partitions
##         Var1 Var2
##     1: -5.00   -5
##     2: -4.95   -5
##     3: -4.90   -5
##     4: -4.85   -5
##     5: -4.80   -5
##    ---           
## 40397:  4.80    5
## 40398:  4.85    5
## 40399:  4.90    5
## 40400:  4.95    5
## 40401:  5.00    5
## 
## $RPM
##        Var1  Var2         y.hat
##     1: -4.8 -4.80 -7.105427e-15
##     2: -4.8 -2.55 -8.726063e+01
##     3: -4.8 -2.50 -8.806700e+01
##     4: -4.8 -2.45 -8.883587e+01
##     5: -4.8 -2.40 -8.956800e+01
##    ---                         
## 40397: -2.6 -2.80  3.776000e+00
## 40398: -2.6 -2.75  2.770875e+00
## 40399: -2.6 -2.70  1.807000e+00
## 40400: -2.6 -2.65  8.836250e-01
## 40401: -2.6 -2.60  1.776357e-15
## 
## $Point.est
## NULL
## 
## $Fitted.xy
##         Var1 Var2          y      y.hat      NNS.ID residuals
##     1: -5.00   -5   0.000000   0.000000     201.201         0
##     2: -4.95   -5   3.562625   3.562625     402.201         0
##     3: -4.90   -5   7.051000   7.051000     603.201         0
##     4: -4.85   -5  10.465875  10.465875     804.201         0
##     5: -4.80   -5  13.808000  13.808000    1005.201         0
##    ---                                                       
## 40397:  4.80    5 -13.808000 -13.808000 39597.40401         0
## 40398:  4.85    5 -10.465875 -10.465875 39798.40401         0
## 40399:  4.90    5  -7.051000  -7.051000 39999.40401         0
## 40400:  4.95    5  -3.562625  -3.562625 40200.40401         0
## 40401:  5.00    5   0.000000   0.000000 40401.40401         0NNS.reg can inter- or extrapolate any point of interest. The NNS.reg(x, y, point.est = ...) parameter permits any sized data of similar dimensions to \(x\) and called specifically with $Point.est.
NNS.reg also provides a dimension reduction regression by including a parameter NNS.reg(x, y, dim.red.method = "cor", ...). Reducing all regressors to a single dimension using the returned equation $equation.
NNS.reg(iris[ , 1 : 4], iris[ , 5], dim.red.method = "cor", location = "topleft", ncores = 1)$equation##        Variable Coefficient
## 1: Sepal.Length   0.7980781
## 2:  Sepal.Width  -0.4402896
## 3: Petal.Length   0.9354305
## 4:  Petal.Width   0.9381792
## 5:  DENOMINATOR   4.0000000Thus, our model for this regression would be: \[Species = \frac{0.798*Sepal.Length -0.44*Sepal.Width +0.935*Petal.Length +0.938*Petal.Width}{4} \]
NNS.reg(x, y, dim.red.method = "cor", threshold = ...) offers a method of reducing regressors further by controlling the absolute value of required correlation.
NNS.reg(iris[ , 1 : 4], iris[ , 5], dim.red.method = "cor", threshold = .75, location = "topleft", ncores = 1)$equation##        Variable Coefficient
## 1: Sepal.Length   0.7980781
## 2:  Sepal.Width   0.0000000
## 3: Petal.Length   0.9354305
## 4:  Petal.Width   0.9381792
## 5:  DENOMINATOR   3.0000000Thus, our model for this further reduced dimension regression would be: \[Species = \frac{\: 0.798*Sepal.Length + 0*Sepal.Width +0.935*Petal.Length +0.938*Petal.Width}{3} \]
and the point.est = (...) operates in the same manner as the full regression above, again called with $Point.est.
NNS.reg(iris[ , 1 : 4], iris[ , 5], dim.red.method = "cor", threshold = .75, point.est = iris[1 : 10, 1 : 4], location = "topleft", ncores = 1)$Point.est##  [1] 1 1 1 1 1 1 1 1 1 1For a classification problem, we simply set NNS.reg(x, y, type = "CLASS", ...).
NNS.reg(iris[ , 1 : 4], iris[ , 5], type = "CLASS", point.est = iris[1:10, 1 : 4], location = "topleft", ncores = 1)$Point.est##  [1] 1 1 1 1 1 1 1 1 1 1NNS.stackThe NNS.stack() routine cross-validates for a given objective function the n.best parameter in the multivariate NNS.reg function as well as the threshold parameter in the dimension reduction NNS.reg version. NNS.stack can be used for classification NNS.stack(..., type = "CLASS", ...) or continuous dependent variables NNS.stack(..., type = NULL, ...).
Any objective function obj.fn can be called using expression() with the terms predicted and actual.
Note: For mixed data type regressors / features, it is suggested to use NNS.stack(..., order = "max", ...).
NNS.stack(IVs.train = iris[ , 1 : 4], 
          DV.train = iris[ , 5], 
          IVs.test = iris[1:10, 1 : 4],
          obj.fn = expression( mean(round(predicted) == actual) ),
          objective = "max",
          type = "CLASS", folds = 1, ncores = 1)## $OBJfn.reg
## [1] 0.9565217
## 
## $NNS.reg.n.best
## [1] 3
## 
## $OBJfn.dim.red
## [1] 0.9565217
## 
## $NNS.dim.red.threshold
## [1] 0.78
## 
## $reg
##  [1] 1 1 1 1 1 1 1 1 1 1
## 
## $dim.red
##  [1] 1 1 1 1 1 1 1 1 1 1
## 
## $stack
##  [1] 1 1 1 1 1 1 1 1 1 1If the user is so motivated, detailed arguments further examples are provided within the following: