Updated: 2025-12-12; 2022-01-27; 2022-01-17; 2020-09-13; 2020-08-01
Created: 2020-03-17
require(GridOnClusters)
#> Loading required package: GridOnClusters
x = rnorm(500)
y = sin(x)+rnorm(500, sd = 0)
z = cos(x)+rnorm(500, sd = 0)
data = cbind(x, y, z)
ks = 2:20
res = discretize.jointly(data, k=ks, cluster_method = "kmeans+silhouette",
grid_method = "Sort+split")
plot(res)
res = discretize.jointly(
data, k=ks, cluster_method = "kmeans+silhouette",
grid_method = "DP exact likelihood")
plot(res)
res = discretize.jointly(
data, k=ks, cluster_method = "Ball+BIC",
grid_method = "DP approx likelihood 1-way")
plot(res)Example 1. Nonlinear curves using kmeans+silhouette and Ball+BIC clustering with a fixed number of clusters.
x = rnorm(100)
y = log1p(abs(x))
z = ifelse(x >= -0.5 & x <= 0.5, 0, 1) + rnorm(100, 0, 0.1)
data = cbind(x, y, z)
ks = c(2:5)
#res = discretize.jointly(data, k=ks, cluster_method = "Ball+BIC",
# grid_method = "Sort+split", min_level = 1)
#plot(res)
res = discretize.jointly(
data, k=ks, cluster_method = "kmeans+silhouette",
grid_method = "Sort+split", min_level = 1)
plot(res)
#res = discretize.jointly(data, k=ks, cluster_method = "kmeans+silhouette",
# grid_method = "DP exact likelihood", min_level = 1)
#plot(res)
res = discretize.jointly(
data, k=ks, cluster_method = "Ball+BIC",
grid_method = "DP exact likelihood", min_level = 1)
plot(res)Example 2. Using a range for the number of kmeans+silhouette and Ball+BIC clusters
# using a clustering method other than kmeans+silhouette
x = rnorm(100)
y = log1p(abs(x))
z = sin(x)
data = cbind(x, y, z)
# pre-cluster the data using partition around medoids (PAM)
cluster_label = cluster::pam(x=data, diss = FALSE, metric = "euclidean", k = 4)$clustering
res = discretize.jointly(
data, cluster_label = cluster_label,
grid_method = "Sort+split", min_level = 1)
res = discretize.jointly(
data, cluster_label = cluster_label,
grid_method = "DP exact likelihood", min_level = 1)
plot(res, main="Original data\nPAM clustering",
main.table="Discretized data\nPAM & Sort+split")Example 3. Using the partition around medoids clustering method.
ks = 2:20
n = 40*10
sd = 60*4
x=rnorm(2*n, sd=sd)
y=rnorm(2*n, sd=sd)
x=c(x,rnorm(2*n, sd=sd/3))
y=c(y,rnorm(2*n, sd=sd/3)+200)
data = cbind(x, y)
res = discretize.jointly(
data, k=ks, cluster_method = "Ball+BIC",
grid_method = "Sort+split", min_level = 1)
plot(res)
res = discretize.jointly(
data, k=ks, cluster_method = "kmeans+silhouette",
grid_method = "Sort+split", min_level = 1)
plot(res)
res = discretize.jointly(
data, k=ks, cluster_method = "kmeans+silhouette",
grid_method = "DP approx likelihood 1-way", min_level = 1)
plot(res)
res = discretize.jointly(
data, k=ks, cluster_method = "Ball+BIC",
grid_method = "DP approx likelihood 1-way", min_level = 1)
plot(res)Example 4. Random patterns using kmeans+silhouette and Ball+BIC clustering with a range.
n <- 50*8
ks <- 2:20
X.C1 <- matrix(
c(rnorm(n, 5, sd=2),
rnorm(n, 0, sd=40)),
ncol = 2, byrow = FALSE
)
X.C2 <- matrix(
c(rnorm(n, 70, sd=1),
rnorm(n, 0, sd=1)),
ncol = 2, byrow = FALSE
)
X.C3 <- matrix(
c(rnorm(n, 150, sd=30),
rnorm(n, 0, sd=30)),
ncol = 2, byrow = FALSE
)
data = rbind(X.C1, X.C3)
res = discretize.jointly(
data, k=ks, cluster_method = "Ball+BIC",
grid_method = "Sort+split", min_level = 1)
plot(res)
res = discretize.jointly(
data, k=ks, cluster_method = "kmeans+silhouette",
grid_method = "Sort+split", min_level = 1)
plot(res)
res = discretize.jointly(
data, k=ks, cluster_method = "kmeans+silhouette",
grid_method = "DP approx likelihood 1-way",
min_level = 1, cutoff = 1)
plot(res)
res = discretize.jointly(
data, k=ks, cluster_method = "Ball+BIC",
grid_method = "DP approx likelihood 1-way",
min_level = 1)
plot(res)Example 5. Multi-cluster random patterns using kmeans+silhouette and Ball+BIC clustering with a range.
n <- 100
ks <- 2:10
label = c(rep(1, n), rep(2,n), rep(3,n), rep(4,n))
X1 = c(rnorm(n, 0, sd=2),
rnorm(n, 0, sd=2),
rnorm(n, 10, sd=2),
rnorm(n, 10, sd=2))
X2 = c(rnorm(n, 10, sd=2),
rnorm(n, 0, sd=2),
rnorm(n, 10, sd=2),
rnorm(n, 0, sd=2))
X3 = c(rnorm(4*n, 20, sd=10))
X4 = c(rnorm(4*n, 3, sd=20))
data = cbind(X1, X2, X3, X4)
#res = discretize.jointly(data, k=ks, cluster_method = "kmeans+silhouette",
# grid_method = "DP approx likelihood", min_level = 1)
#plot(res)
#
#res = discretize.jointly(data, k=ks, cluster_method = "kmeans+silhouette",
# grid_method = "DP Compressed majority", min_level = 1)
#plot(res)
#res = discretize.jointly(data, k=ks, cluster_method = "Ball+BIC",
# grid_method = "DP exact likelihood", min_level = 1)
#plot(res)
res = discretize.jointly(
data, k=ks, cluster_method = "Ball+BIC",
grid_method = "DP approx likelihood 1-way",
min_level = 1)
plot(res)
res = discretize.jointly(
data, k=ks, cluster_label = label,
grid_method = "DP approx likelihood 1-way",
min_level = 1)
plot(res)
#res = discretize.jointly(data, k=ks, cluster_label = label,
# grid_method = "Sort+split", min_level = 1)
#plot(res)
#
#res = discretize.jointly(data, k=ks, cluster_method = "kmeans+silhouette",
# grid_method = "Sort+split", min_level = 1)
#plot(res)Example 6. Exclusive or. Dim1 ⊕ Dim2, Dim3 and Dim4 are random
n <- 20
ks <- 2:10
label = c(rep(1, n), rep(2,5*n), rep(3, 5*n))
X1 = c(rnorm(n, 0, sd=1), rnorm(5*n, 15, sd=3), rnorm(5*n, 35, sd=3))
X2 = c(rnorm(n, 0, sd=1), rnorm(5*n, 0, sd=3), rnorm(5*n, 0, sd=3))
data = cbind(X1, X2)
res = discretize.jointly(
data, cluster_label = label,
grid_method = "DP exact likelihood", min_level = 1)
plot(res)
res = discretize.jointly(
data, cluster_label = label,
grid_method = "DP approx likelihood 1-way",
min_level = 1)
plot(res)
res.entropy = discretize.jointly(
data, cluster_label = label,
grid_method = "DP exact likelihood", min_level = 1, entropy = TRUE)
plot(res.entropy)Example 7. Three rounds well seperated on x axis
n <- 200
ks <- 2:10
label = c(rep(1, n), rep(2,5*n), rep(3,n), rep(4,3*n))
X1 = c(rnorm(n, 0, sd=3), rnorm(5*n, 15, sd=3), rnorm(n, 5, sd=2), rnorm(3*n, 15, sd=4))
X2 = c(rnorm(n, 0, sd=4), rnorm(5*n, 15, sd=3), rnorm(n, 10, sd=2), rnorm(3*n, 0, sd=4))
data = cbind(X1, X2)
res = discretize.jointly(
data, k=ks, cluster_method = "Ball+BIC",
grid_method = "DP exact likelihood", min_level = 1)
plot(res)
res = discretize.jointly(
data, k=ks, cluster_method = "Ball+BIC",
grid_method = "DP approx likelihood 1-way",
min_level = 1)
plot(res)
res = discretize.jointly(
data, k=ks, cluster_method = "Ball+BIC",
grid_method = "DP exact likelihood", min_level = 1, entropy = TRUE)
plot(res)Example 8. Four sphers with different centers and radius
Here we evaluate whether a method can recognize the two spheres.
n <- 200
ks <- 2:10
label = c(rep(1, n), rep(2,5*n))
X1 = c(rnorm(n, 0, sd=1), rnorm(5*n, 6, sd=3))
X2 = c(rnorm(n, 0, sd=1), rnorm(5*n, 0, sd=3))
data = cbind(X1, X2)
res = discretize.jointly(
data, k=ks, cluster_label = label,
grid_method = "DP exact likelihood", min_level = 1)
plot(res)
res = discretize.jointly(
data, k=ks, cluster_label = label,
grid_method = "DP approx likelihood 1-way",
min_level = 1)
plot(res)
res.entropy = discretize.jointly(
data, k=ks, cluster_label = label,
grid_method = "DP exact likelihood", min_level = 1, entropy = TRUE)
plot(res.entropy)Example 9. A small dense sphere overlapping a large sphere.