All files init_forgy.js

39.65% Statements 46/116
100% Branches 1/1
0% Functions 0/1
39.65% Lines 46/116

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 1171x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x                                                                                                                                             1x 1x 1x 1x 1x  
/**
* @license Apache-2.0
*
* Copyright (c) 2018 The Stdlib Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*    http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
 
'use strict';
 
// MODULES //
 
var factory = require( '@stdlib/random/base/discrete-uniform' ).factory;
var incrmean = require( '@stdlib/stats/incr/mean' );
 
 
// MAIN //
 
/**
* Initializes centroids by randomly assigning each data point to cluster and computing centroids.
*
* ## References
*
* -   Forgy, E. 1965. "Cluster Analysis of Multivariate Data: Efficiency versus Interpretability of Classification." _Biometrics_ 21 (3): 768–69.
*
* @private
* @param {ndarray} out - output centroids `kxd` matrix
* @param {ndarray} buffer - buffer containing data points
* @param {*} seed - PRNG seed
* @returns {ndarray} centroids
*/
function forgy( out, buffer, seed ) {
	var randi;
	var obuf;
	var npts;
	var buf;
	var sb1;
	var sb2;
	var so1;
	var so2;
	var acc;
	var oo;
	var oa;
	var ob;
	var c;
	var M;
	var N;
	var i;
	var j;

	M = out.shape[ 0 ];
	N = out.shape[ 1 ];

	obuf = out.data;
	so1 = out.strides[ 0 ];
	so2 = out.strides[ 1 ];
	oo = out.offset;

	buf = buffer.data;
	npts = buffer.shape[ 0 ];
	sb1 = buffer.strides[ 0 ];
	sb2 = buffer.strides[ 1 ];
	ob = buffer.offset;

	// Initialize a PRNG for randomly assigning data points to clusters:
	randi = factory( 0, M-1, {
		'seed': seed
	});

	// Initialize a strided (MxN) array for storing accumulated centroids...
	acc = [];
	for ( i = 0; i < M*N; i++ ) {
		acc.push( incrmean() );
	}

	// Randomly assign each data point to a cluster and update the respective cluster's centroid...
	for ( i = 0; i < npts; i++ ) {
		// Generate a random cluster index:
		c = randi();

		// Compute the accumulator index offset:
		oa = N * c;

		// Update the respective cluster centroid:
		for ( j = 0; j < N; j++ ) {
			acc[ oa+j ]( buf[ ob+(sb2*j) ] );
		}
		// Update the data point index offset:
		ob += sb1;
	}
	// Update the output matrix...
	oa = 0;
	for ( i = 0; i < M; i++ ) {
		for ( j = 0; j < N; j++ ) {
			obuf[ oo+(so2*j) ] = acc[ oa ]();
			oa += 1; // We can simply increment the array pointer as we know that the accumulator array is row-major single-segment contiguous
		}
		oo += so1;
	}
	return out;
}
 
 
// EXPORTS //
 
module.exports = forgy;