Saturday, 19 July 2014

DataManager finding mean, mode (assuming single value) and median

import java.lang.Number ;
import java.util.Comparator ;

public class ValueFrequency<T extends Number> implements Comparator<ValueFrequency<T>> {

private T value ;
private long frequency ;

public void setFrequency(long frq) { frequency = frq ; }
public void setValue(T val) { value = val ;}

public long getFrequency() { return frequency ; }
public T getValue() { return value ; }

public ValueFrequency(T v) {
setValue(v) ;
setFrequency(0) ;
}
public void bump() { frequency += 1 ;}

public float getValueFreq() { return value.floatValue() * frequency ; }

@Override
public String toString() {
return "ValueFrequency: " + getValue() + " freq " + getFrequency();
}

public int compare(ValueFrequency<T> a, ValueFrequency<T> b) {
        return a.getValue().intValue() - b.getValue().intValue();
    }
    

}


import java.util.Map ;
import java.util.HashMap ;
import java.util.List ;
import java.util.ArrayList ;
import java.util.Random ;

import java.lang.Number ;
import java.util.Collections ;

// Shiva, this is the basic outline of what we discussed last night
// I have not checked it and it is certainly unfinished - with space for big improvements!
// Compile this, together with ValueFrequency.java and the run this class
// javac ValueFrequency.java
// javac DataManager.java
// java DataManager

public class DataManager<T extends Number> {

private List<ValueFrequency<T>>  vfList = new ArrayList<>() ; // This is crap! Some nice OrderedList Object with a very fast sort
private Map<T,ValueFrequency<T>> map = new HashMap<>() ; // Quick way of getting to our ValueFrequency Objects
private float mean ;
private long numsamples ;
private ValueFrequency<T> mode ; 


public void addValue(T value) {
ValueFrequency<T> vf ;
if (!map.containsKey(value)) {
vf = new ValueFrequency<T>(value) ;
map.put(value,vf) ;
vfList.add(vf) ;
} else {
vf = map.get(value) ;
}

vf.bump() ;

if (mode == null) {
mode = vf ;
} else {
// @TODO
// Assuming only ONE 'mode' exists here - this needs improving for a 'Set' of possible 'MODEs'
if (vf.getFrequency() > mode.getFrequency()) {
mode = vf ;
}
}
// Update mean based on last mean calculation

mean = (mean * numsamples + value.floatValue()) / (numsamples + 1) ;
numsamples +=1 ;

}

public float getMean() { return mean ; }

public T getMode() { return mode != null ? mode.getValue() : null; } // assuming one MODE value

public float getMedian() {

if (mode == null) 
return 0 ;

Collections.sort(vfList, mode) ;

if ((numsamples & 1) == 0) {
// Even number in our current sample size
// (numsamples) / 2 is our required index 
// ()(numsamples) / 2 ) + 1 is our 2nd required index 
// Median is the average of these two values 
ValueFrequency<T> vf1 = getValueFrequencyByIndex((numsamples/2) - 1) ;  // minus 1 cos the array indexes in Java start at 0!
ValueFrequency<T> vf2 = getValueFrequencyByIndex((numsamples/2)) ; 
// @TODO - Check for null return - maybe catch?
return (vf1.getValue().floatValue() + vf2.getValue().floatValue()) / 2 ;

} else {
System.out.println("Odd Number");
// Odd number in our current sample size
// (numsamples + 1) / 2 is our required index 
ValueFrequency<T> vf1 = getValueFrequencyByIndex((numsamples + 1)/2  - 1 ) ;
// @TODO - Check for null return 
return vf1.getValue().floatValue() ;
}

}

//@TODO Maybe this is NOT needed if we add cummFrequency to ValueFrequency and calculate
// this as we add/insert values?
// Assumed that vfList is sorted!
public ValueFrequency<T> getValueFrequencyByIndex(long index) {
System.out.println("getValueFrequencyByIndex Index " + index) ;
long cummFrequency = 0 ;
for(ValueFrequency<T> vf : vfList) {
cummFrequency += vf.getFrequency() ;
if (index < cummFrequency) 
return vf ;
}
return null ;
}

public long getTotalFrequency()  { return numsamples ; }

@Override
public String toString() {
return "mean: " + getMean()  
+ " mode: " + getMode() 
+ " meanian " + getMedian() 
+ "\n" ;
}

public void debug() {
long index = 0 ;
long cummFrequency = 0 ;

if (mode != null) {
Collections.sort(vfList, mode) ; // Use one of our objects - for comparison function
}


for(ValueFrequency<T> vf : vfList) {
cummFrequency += vf.getFrequency() ;
System.out.println(" ValueFrequency at index " + index + " = " + vf + " cumm Freq = " + cummFrequency) ;
index++ ;
}

}


static public void main(String [] args)
{
DataManager<Integer> dm = new DataManager<Integer>() ;

// Throwing Dice 20,000,000 times!
Random rnd = new Random(10) ;  // start off with same seed each time - remove 
for (int i = 0 ; i < 20000000 ; i++) {
// Throw two die and make a note of the sum! i.e a 'throw' should produce a result between 2 and 12
dm.addValue((rnd.nextInt(6) + 1) + (rnd.nextInt(6) + 1)) ;
}

dm.debug() ;
System.out.println(dm) ;
}


}

No comments:

Post a Comment