DataManager finding mean, mode (assuming single value) and median
import java.lang.Number ;
import java.util.Comparator ;
public class ValueFrequency<T extends Number> implements Comparator<ValueFrequency<T>> {
private T value ;
private long frequency ;
public void setFrequency(long frq) { frequency = frq ; }
public void setValue(T val) { value = val ;}
public long getFrequency() { return frequency ; }
public T getValue() { return value ; }
public ValueFrequency(T v) {
setValue(v) ;
setFrequency(0) ;
}
public void bump() { frequency += 1 ;}
public float getValueFreq() { return value.floatValue() * frequency ; }
@Override
public String toString() {
return "ValueFrequency: " + getValue() + " freq " + getFrequency();
}
public int compare(ValueFrequency<T> a, ValueFrequency<T> b) {
return a.getValue().intValue() - b.getValue().intValue();
}
}
import java.util.Map ;
import java.util.HashMap ;
import java.util.List ;
import java.util.ArrayList ;
import java.util.Random ;
import java.lang.Number ;
import java.util.Collections ;
// Shiva, this is the basic outline of what we discussed last night
// I have not checked it and it is certainly unfinished - with space for big improvements!
// Compile this, together with ValueFrequency.java and the run this class
// javac ValueFrequency.java
// javac DataManager.java
// java DataManager
public class DataManager<T extends Number> {
private List<ValueFrequency<T>> vfList = new ArrayList<>() ; // This is crap! Some nice OrderedList Object with a very fast sort
private Map<T,ValueFrequency<T>> map = new HashMap<>() ; // Quick way of getting to our ValueFrequency Objects
private float mean ;
private long numsamples ;
private ValueFrequency<T> mode ;
public void addValue(T value) {
ValueFrequency<T> vf ;
if (!map.containsKey(value)) {
vf = new ValueFrequency<T>(value) ;
map.put(value,vf) ;
vfList.add(vf) ;
} else {
vf = map.get(value) ;
}
vf.bump() ;
if (mode == null) {
mode = vf ;
} else {
// @TODO
// Assuming only ONE 'mode' exists here - this needs improving for a 'Set' of possible 'MODEs'
if (vf.getFrequency() > mode.getFrequency()) {
mode = vf ;
}
}
// Update mean based on last mean calculation
mean = (mean * numsamples + value.floatValue()) / (numsamples + 1) ;
numsamples +=1 ;
}
public float getMean() { return mean ; }
public T getMode() { return mode != null ? mode.getValue() : null; } // assuming one MODE value
public float getMedian() {
if (mode == null)
return 0 ;
Collections.sort(vfList, mode) ;
if ((numsamples & 1) == 0) {
// Even number in our current sample size
// (numsamples) / 2 is our required index
// ()(numsamples) / 2 ) + 1 is our 2nd required index
// Median is the average of these two values
ValueFrequency<T> vf1 = getValueFrequencyByIndex((numsamples/2) - 1) ; // minus 1 cos the array indexes in Java start at 0!
ValueFrequency<T> vf2 = getValueFrequencyByIndex((numsamples/2)) ;
// @TODO - Check for null return - maybe catch?
return (vf1.getValue().floatValue() + vf2.getValue().floatValue()) / 2 ;
} else {
System.out.println("Odd Number");
// Odd number in our current sample size
// (numsamples + 1) / 2 is our required index
ValueFrequency<T> vf1 = getValueFrequencyByIndex((numsamples + 1)/2 - 1 ) ;
// @TODO - Check for null return
return vf1.getValue().floatValue() ;
}
}
//@TODO Maybe this is NOT needed if we add cummFrequency to ValueFrequency and calculate
// this as we add/insert values?
// Assumed that vfList is sorted!
public ValueFrequency<T> getValueFrequencyByIndex(long index) {
System.out.println("getValueFrequencyByIndex Index " + index) ;
long cummFrequency = 0 ;
for(ValueFrequency<T> vf : vfList) {
cummFrequency += vf.getFrequency() ;
if (index < cummFrequency)
return vf ;
}
return null ;
}
public long getTotalFrequency() { return numsamples ; }
@Override
public String toString() {
return "mean: " + getMean()
+ " mode: " + getMode()
+ " meanian " + getMedian()
+ "\n" ;
}
public void debug() {
long index = 0 ;
long cummFrequency = 0 ;
if (mode != null) {
Collections.sort(vfList, mode) ; // Use one of our objects - for comparison function
}
for(ValueFrequency<T> vf : vfList) {
cummFrequency += vf.getFrequency() ;
System.out.println(" ValueFrequency at index " + index + " = " + vf + " cumm Freq = " + cummFrequency) ;
index++ ;
}
}
static public void main(String [] args)
{
DataManager<Integer> dm = new DataManager<Integer>() ;
// Throwing Dice 20,000,000 times!
Random rnd = new Random(10) ; // start off with same seed each time - remove
for (int i = 0 ; i < 20000000 ; i++) {
// Throw two die and make a note of the sum! i.e a 'throw' should produce a result between 2 and 12
dm.addValue((rnd.nextInt(6) + 1) + (rnd.nextInt(6) + 1)) ;
}
dm.debug() ;
System.out.println(dm) ;
}
}
import java.lang.Number ;
import java.util.Comparator ;
public class ValueFrequency<T extends Number> implements Comparator<ValueFrequency<T>> {
private T value ;
private long frequency ;
public void setFrequency(long frq) { frequency = frq ; }
public void setValue(T val) { value = val ;}
public long getFrequency() { return frequency ; }
public T getValue() { return value ; }
public ValueFrequency(T v) {
setValue(v) ;
setFrequency(0) ;
}
public void bump() { frequency += 1 ;}
public float getValueFreq() { return value.floatValue() * frequency ; }
@Override
public String toString() {
return "ValueFrequency: " + getValue() + " freq " + getFrequency();
}
public int compare(ValueFrequency<T> a, ValueFrequency<T> b) {
return a.getValue().intValue() - b.getValue().intValue();
}
}
import java.util.Map ;
import java.util.HashMap ;
import java.util.List ;
import java.util.ArrayList ;
import java.util.Random ;
import java.lang.Number ;
import java.util.Collections ;
// Shiva, this is the basic outline of what we discussed last night
// I have not checked it and it is certainly unfinished - with space for big improvements!
// Compile this, together with ValueFrequency.java and the run this class
// javac ValueFrequency.java
// javac DataManager.java
// java DataManager
public class DataManager<T extends Number> {
private List<ValueFrequency<T>> vfList = new ArrayList<>() ; // This is crap! Some nice OrderedList Object with a very fast sort
private Map<T,ValueFrequency<T>> map = new HashMap<>() ; // Quick way of getting to our ValueFrequency Objects
private float mean ;
private long numsamples ;
private ValueFrequency<T> mode ;
public void addValue(T value) {
ValueFrequency<T> vf ;
if (!map.containsKey(value)) {
vf = new ValueFrequency<T>(value) ;
map.put(value,vf) ;
vfList.add(vf) ;
} else {
vf = map.get(value) ;
}
vf.bump() ;
if (mode == null) {
mode = vf ;
} else {
// @TODO
// Assuming only ONE 'mode' exists here - this needs improving for a 'Set' of possible 'MODEs'
if (vf.getFrequency() > mode.getFrequency()) {
mode = vf ;
}
}
// Update mean based on last mean calculation
mean = (mean * numsamples + value.floatValue()) / (numsamples + 1) ;
numsamples +=1 ;
}
public float getMean() { return mean ; }
public T getMode() { return mode != null ? mode.getValue() : null; } // assuming one MODE value
public float getMedian() {
if (mode == null)
return 0 ;
Collections.sort(vfList, mode) ;
if ((numsamples & 1) == 0) {
// Even number in our current sample size
// (numsamples) / 2 is our required index
// ()(numsamples) / 2 ) + 1 is our 2nd required index
// Median is the average of these two values
ValueFrequency<T> vf1 = getValueFrequencyByIndex((numsamples/2) - 1) ; // minus 1 cos the array indexes in Java start at 0!
ValueFrequency<T> vf2 = getValueFrequencyByIndex((numsamples/2)) ;
// @TODO - Check for null return - maybe catch?
return (vf1.getValue().floatValue() + vf2.getValue().floatValue()) / 2 ;
} else {
System.out.println("Odd Number");
// Odd number in our current sample size
// (numsamples + 1) / 2 is our required index
ValueFrequency<T> vf1 = getValueFrequencyByIndex((numsamples + 1)/2 - 1 ) ;
// @TODO - Check for null return
return vf1.getValue().floatValue() ;
}
}
//@TODO Maybe this is NOT needed if we add cummFrequency to ValueFrequency and calculate
// this as we add/insert values?
// Assumed that vfList is sorted!
public ValueFrequency<T> getValueFrequencyByIndex(long index) {
System.out.println("getValueFrequencyByIndex Index " + index) ;
long cummFrequency = 0 ;
for(ValueFrequency<T> vf : vfList) {
cummFrequency += vf.getFrequency() ;
if (index < cummFrequency)
return vf ;
}
return null ;
}
public long getTotalFrequency() { return numsamples ; }
@Override
public String toString() {
return "mean: " + getMean()
+ " mode: " + getMode()
+ " meanian " + getMedian()
+ "\n" ;
}
public void debug() {
long index = 0 ;
long cummFrequency = 0 ;
if (mode != null) {
Collections.sort(vfList, mode) ; // Use one of our objects - for comparison function
}
for(ValueFrequency<T> vf : vfList) {
cummFrequency += vf.getFrequency() ;
System.out.println(" ValueFrequency at index " + index + " = " + vf + " cumm Freq = " + cummFrequency) ;
index++ ;
}
}
static public void main(String [] args)
{
DataManager<Integer> dm = new DataManager<Integer>() ;
// Throwing Dice 20,000,000 times!
Random rnd = new Random(10) ; // start off with same seed each time - remove
for (int i = 0 ; i < 20000000 ; i++) {
// Throw two die and make a note of the sum! i.e a 'throw' should produce a result between 2 and 12
dm.addValue((rnd.nextInt(6) + 1) + (rnd.nextInt(6) + 1)) ;
}
dm.debug() ;
System.out.println(dm) ;
}
}
No comments:
Post a Comment