1.前言:
在數值統計分析中,常要計算各類的統計值,利用程式來分析工程資料,並且可做後續繪圖或是進階應用
2.說明:
取待分析資料數列存在DataTable中,利用程式作統計分析,包含下列統計值:
平均值、總和、樣本標準差、變異數、中位數、第一四分位數、第三四分位數、四分位距、最小值、最大值、全距。
分析資料為未排序的1到20數列,存在dt.csv檔案中。
讀取資料
public DataTable TxtConvertToDataTable(string File, string TableName, string delimiter)
{
DataTable dt = new DataTable();
DataSet ds = new DataSet();
StreamReader s = new StreamReader(File, System.Text.Encoding.Default);
string[] columns = s.ReadLine().Split(delimiter.ToCharArray());
ds.Tables.Add(TableName);
foreach (string col in columns)
{
bool added = false;
string next = "";
int i = 0;
while (!added)
{
string columnname = col + next;
columnname = columnname.Replace("#", "");
columnname = columnname.Replace("'", "");
columnname = columnname.Replace("&", "");
if (!ds.Tables[TableName].Columns.Contains(columnname))
{
ds.Tables[TableName].Columns.Add(columnname.ToUpper());
added = true;
}
else
{
i++;
next = "_" + i.ToString();
}
}
}
string AllData = s.ReadToEnd();
string[] rows = AllData.Split("\n".ToCharArray());
foreach (string r in rows)
{
string[] items = r.Split(delimiter.ToCharArray());
ds.Tables[TableName].Rows.Add(items);
}
s.Close();
dt = ds.Tables[0];
return dt;
}
資料排序
private ArrayList ArySortedData(DataTable dt, string ColumnName)
{
ArrayList ary = new ArrayList();
foreach (DataRow row in dt.Rows)
{
//將資料轉成double型態
ary.Add(double.Parse(row[ColumnName].ToString().Trim()));
}
ary.Sort();//從小到大排序
return ary;
}
統計分析方法
private Hashtable StSummary(ArrayList aryData)
{
Hashtable stSummary = new Hashtable();
double _value = 0;
double Mean = 0;//平均值
double Sum = 0;//總和
double StandardDeviation = 0;//樣本標準差
double Variance = 0;//變異數
double Median = 0;//中位數
double FirstQuartile = 0;//第一四分位數
double ThirdQuartile = 0;//第三四分位數
double IQR = 0;//四分位距
double Minimum = double.MaxValue;//最小值
double Maximum = double.MinValue;//最大值
double Range = 0;//全距
double SumOfSquare = 0;//平方總和
//計算值取小數點8位
if (aryData.Count > 0)
{
for (int i = 0; i < aryData.Count; i++)
{
_value = double.Parse(aryData[i].ToString());
if (Maximum < _value) Maximum = _value;
if (Minimum > _value) Minimum = _value;
Sum += _value;
}
Mean = Math.Round(Sum / aryData.Count, 8);
Sum = Math.Round(Sum, 8);
Range = Math.Round(Maximum - Minimum, 8);
Maximum = Math.Round(Maximum, 8);
Minimum = Math.Round(Minimum, 8);
for (int i = 0; i < aryData.Count; i++)
{
_value = double.Parse(aryData[i].ToString());
SumOfSquare += Math.Pow((_value - Mean), 2);
}
if (aryData.Count > 1)
{
Variance = SumOfSquare / (aryData.Count - 1);
StandardDeviation = Math.Sqrt(SumOfSquare / (aryData.Count - 1));
Variance = Math.Round(Variance, 8);
StandardDeviation = Math.Round(StandardDeviation, 8);
}
else
{
Variance = double.NaN;
StandardDeviation = double.NaN;
}
Median = Math.Round(GetMedian(aryData), 8);
if (aryData.Count > 1)
{
FirstQuartile = GetFirstQuartile(aryData);
ThirdQuartile = GetThirdQuartile(aryData);
}
else
{
FirstQuartile = double.Parse(aryData[0].ToString());
ThirdQuartile = double.Parse(aryData[0].ToString());
}
IQR = ThirdQuartile - FirstQuartile;
FirstQuartile = Math.Round(FirstQuartile, 8);
ThirdQuartile = Math.Round(ThirdQuartile, 8);
IQR = Math.Round(IQR, 8);
}
else
{
Mean = double.NaN;
Sum = double.NaN;
StandardDeviation = double.NaN;
Variance = double.NaN;
Median = double.NaN;
FirstQuartile = double.NaN;
ThirdQuartile = double.NaN;
IQR = double.NaN;
Minimum = double.NaN;
Maximum = double.NaN;
Range = double.NaN;
}
stSummary.Add("Mean", Mean);
stSummary.Add("Sum", Sum);
stSummary.Add("StandardDeviation", StandardDeviation);
stSummary.Add("Variance", Variance);
stSummary.Add("Median", Median);
stSummary.Add("FirstQuartile", FirstQuartile);
stSummary.Add("ThirdQuartile", ThirdQuartile);
stSummary.Add("IQR", IQR);
stSummary.Add("Minimum", Minimum);
stSummary.Add("Maximum", Maximum);
stSummary.Add("Range", Range);
return stSummary;
}
取中位數
private double GetMedian(ArrayList aryData)
{
double _value = 0;
if (aryData.Count % 2 == 0)//數量為偶數
{
int _index = aryData.Count / 2;
double valLeft = double.Parse(aryData[_index - 1].ToString());
double valRight = double.Parse(aryData[_index].ToString());
_value = (valLeft + valRight) / 2;
}
else//數量為奇數
{
int _index = (aryData.Count + 1) / 2;
_value = double.Parse(aryData[_index- 1].ToString());
}
return _value;
}
四分位數的計算方式可參考下列網址的說明,下圖為各種計算方法列表。
http://mathworld.wolfram.com/Quartile.html
本範例使用的方法為Freund and Perles,Microsoft Excel 2007 以及早期版本使用QUARTILE功能也是此方法,R語言也是。Excel 2010及之後的版本包含兩個功能QUARTILE.INC(使用Freund and Perles)及QUARTILE.EXC(使用Minitab)。
可參考下列網址的說明:
http://smallbusiness.chron.com/convert-data-set-quartile-50676.html
取第一四分位數與第三四分位數
private double GetFirstQuartile(ArrayList aryData)
{
double _value = 0;
double _firstQuartilePosition = (double.Parse(aryData.Count.ToString()) + 3) / 4;
double _lowerValue = double.Parse(aryData[int.Parse(Math.Floor(_firstQuartilePosition).ToString()) - 1].ToString());
double _upperValue = double.Parse(aryData[int.Parse(Math.Floor(_firstQuartilePosition).ToString())].ToString());
double _factor = _firstQuartilePosition - Math.Floor(_firstQuartilePosition);
_value = _lowerValue + _factor * (_upperValue - _lowerValue);
return _value;
}
private double GetThirdQuartile(ArrayList aryData)
{
double _value = 0;
double _firstQuartilePosition = (3 * double.Parse(aryData.Count.ToString()) + 1) / 4;
double _lowerValue = double.Parse(aryData[int.Parse(Math.Floor(_firstQuartilePosition).ToString()) - 1].ToString());
double _upperValue = double.Parse(aryData[int.Parse(Math.Floor(_firstQuartilePosition).ToString())].ToString());
double _factor = _firstQuartilePosition - Math.Floor(_firstQuartilePosition);
_value = _lowerValue + _factor * (_upperValue - _lowerValue);
return _value;
}
3.應用:
private void btCalc_Click(object sender, EventArgs e)
{
DataTable dt = TxtConvertToDataTable(@"D:\TMP\dt.csv", "tmp", ",");
ArrayList aryData = ArySortedData(dt, "data");
Hashtable stSummary = StSummary(aryData);
string outputData = "平均值="+stSummary["Mean"].ToString()+"\n"+
"總和="+stSummary["Sum"].ToString()+"\n"+
"樣本標準差="+stSummary["StandardDeviation"].ToString()+"\n"+
"變異數="+stSummary["Variance"].ToString()+"\n"+
"中位數="+stSummary["Median"].ToString()+"\n"+
"第一四分位數="+stSummary["FirstQuartile"].ToString()+"\n"+
"第三四分位數="+stSummary["ThirdQuartile"].ToString()+"\n"+
"四分位距="+stSummary["IQR"].ToString()+"\n"+
"最小值="+stSummary["Minimum"].ToString()+"\n"+
"最大值="+stSummary["Maximum"].ToString()+"\n"+
"全距="+stSummary["Range"].ToString();
MessageBox.Show(outputData);
}
文章標籤
全站熱搜

Wow i really love reading your update..nice one tho