Implementing the QT Algorithm using C#

0 comments
Quality Threshold (or QT) is an algorithm used in cluster analysis which is described in this Wikipedia article (http://en.wikipedia.org/wiki/Cluster_analysis).

The basic idea of cluster analysis is to partition a set of points into clusters which have some relationship to each other. In the case of QT the user chooses the maximum diameter (i.e. distance between points) that a cluster can have and each point is then considered in turn, along with its neighbors, and allocated to a cluster.

The source code is listed below in case it is of interest to anyone else involved in this specialized field.


Source code including sample data

using
System;using System.Collections.Generic;
struct Point{
    public int X, Y;
    public Point(int x, int y)
    {
        this.X = x;
        this.Y = y;
    }
    public override string ToString()
    {
        return String.Format("({0}, {1})", X, Y);
    }

    public static int DistanceSquared(Point p1, Point p2)
    {
        int diffX = p2.X - p1.X;
        int diffY = p2.Y - p1.Y;
        return diffX * diffX + diffY * diffY;
    }
}
static class QT{
    static void Main()
    {
        List<Point> points = new List<Point>();
        // sample data        points.Add(new Point(0,100));
        points.Add(new Point(0,200));
        points.Add(new Point(0,275));
        points.Add(new Point(100, 150));
        points.Add(new Point(200,100));
        points.Add(new Point(250,200));
        double maxDiameter = 150.0;
        List<List<Point>> clusters = GetClusters(points, maxDiameter);
        Console.Clear();
         // print points to console        Console.WriteLine("The {0} points are :\n", points.Count);
        foreach(Point p in points) Console.Write(" {0} ", p);
        Console.WriteLine();
        // print clusters to console         for(int i = 0; i < clusters.Count; i++)
        {
           int count = clusters[i].Count;
           string plural = (count != 1) ? "s" : "";
           Console.WriteLine("\nCluster {0} consists of the following {1} point{2} :\n", i + 1, count, plural);
           foreach(Point p in clusters[i]) Console.Write(" {0} ", p);
           Console.WriteLine();
        }
        Console.ReadKey();
    }
    static List<List<Point>> GetClusters(List<Point> points, double maxDiameter)
    {
        if (points == null) return null;
        points = new List<Point>(points); // leave original List unaltered         List<List<Point>> clusters = new List<List<Point>>();
        while (points.Count > 0)
        {
            List<Point> bestCandidate = GetBestCandidate(points, maxDiameter);
            clusters.Add(bestCandidate);          
        }

        return clusters;
    }
    /*         GetBestCandidate() returns first candidate cluster encountered if there is more than one
        with the maximum number of points.
    */
    static List<Point> GetBestCandidate(List<Point> points, double maxDiameter)
    {
        double maxDiameterSquared = maxDiameter * maxDiameter; // square maximum diameter                List<List<Point>> candidates = new List<List<Point>>(); // stores all candidate clusters        List<Point> currentCandidate = null; // stores current candidate cluster        int[] candidateNumbers = new int[points.Count]; // keeps track of candidate numbers to which points have been allocated        int totalPointsAllocated = 0; // total number of points already allocated to candidates        int currentCandidateNumber = 0; // current candidate number
        for(int i = 0; i < points.Count; i++)
        {
            if (totalPointsAllocated == points.Count) break; // no need to continue further             if (candidateNumbers[i] > 0) continue; // point already allocated to a candidate            currentCandidateNumber++;
            currentCandidate = new List<Point>(); // create a new candidate cluster            currentCandidate.Add(points[i]); // add the current point to it            candidateNumbers[i] = currentCandidateNumber;
            totalPointsAllocated++;
            Point latestPoint = points[i]; // latest point added to current cluster            int[] diametersSquared = new int[points.Count]; // diameters squared of each point when added to current cluster
            // iterate through any remaining points            // successively selecting the point closest to the group until the threshold is exceeded            while (true)
            {
                if (totalPointsAllocated == points.Count) break; // no need to continue further                                int closest = -1; // index of closest point to current candidate cluster                int minDiameterSquared = Int32.MaxValue; // minimum diameter squared, initialized to impossible value 
                for (int j = i + 1; j < points.Count; j++)
                { 
                   if(candidateNumbers[j] > 0) continue; // point already allocated to a candidate           
                   // update diameters squared to allow for latest point added to current cluster                    int distSquared  = Point.DistanceSquared(latestPoint, points[j]);
                   if (distSquared > diametersSquared[j]) diametersSquared[j] = distSquared;
                   // check if closer than previous closest point                   if (diametersSquared[j] < minDiameterSquared)
                   {
                       minDiameterSquared = diametersSquared[j];
                       closest = j;
                   }
                }

                // if closest point is within maxDiameter, add it to the current candidate and mark it accordingly                if ((double)minDiameterSquared <= maxDiameterSquared)
                {
                   currentCandidate.Add(points[closest]);
                   candidateNumbers[closest] = currentCandidateNumber;
                   totalPointsAllocated++;
                }
                else // otherwise finished with current candidate                {
                   break;
                }                
            }
            // add current candidate to candidates list            candidates.Add(currentCandidate);
        }
        // now find the candidate cluster with the largest number of points        int maxPoints = -1; // impossibly small value        int bestCandidateNumber = 0; // ditto        for(int i = 0; i < candidates.Count; i++)
        {
           if (candidates[i].Count > maxPoints)
           {
               maxPoints = candidates[i].Count;
               bestCandidateNumber = i + 1; // counting from 1 rather than 0           }
        }
        // iterating backwards to avoid indexing problems, remove points in best candidate from points list        // this will automatically be persisted to caller as List<Point> is a reference type        for(int i = candidateNumbers.Length - 1; i >= 0; i--)
        {           
            if (candidateNumbers[i] == bestCandidateNumber) points.RemoveAt(i);
        }

        // return best candidate to caller                        return candidates[bestCandidateNumber - 1];
    }
}

READ MORE>>

0 comments: