1.前言:
在數值統計分析中,常要計算各類的統計值,利用程式來分析工程資料,並且可做後續繪圖或是進階應用
2.說明:
取待分析資料數列存在DataTable中,利用程式作統計分析,包含下列統計值:
平均值、總和、樣本標準差、變異數、中位數、第一四分位數、第三四分位數、四分位距、最小值、最大值、全距。
分析資料為未排序的1到20數列,存在dt.csv檔案中。
讀取資料
public DataTable TxtConvertToDataTable(string File, string TableName, string delimiter) { DataTable dt = new DataTable(); DataSet ds = new DataSet(); StreamReader s = new StreamReader(File, System.Text.Encoding.Default); string[] columns = s.ReadLine().Split(delimiter.ToCharArray()); ds.Tables.Add(TableName); foreach (string col in columns) { bool added = false; string next = ""; int i = 0; while (!added) { string columnname = col + next; columnname = columnname.Replace("#", ""); columnname = columnname.Replace("'", ""); columnname = columnname.Replace("&", ""); if (!ds.Tables[TableName].Columns.Contains(columnname)) { ds.Tables[TableName].Columns.Add(columnname.ToUpper()); added = true; } else { i++; next = "_" + i.ToString(); } } } string AllData = s.ReadToEnd(); string[] rows = AllData.Split("\n".ToCharArray()); foreach (string r in rows) { string[] items = r.Split(delimiter.ToCharArray()); ds.Tables[TableName].Rows.Add(items); } s.Close(); dt = ds.Tables[0]; return dt; }
資料排序
private ArrayList ArySortedData(DataTable dt, string ColumnName) { ArrayList ary = new ArrayList(); foreach (DataRow row in dt.Rows) { //將資料轉成double型態 ary.Add(double.Parse(row[ColumnName].ToString().Trim())); } ary.Sort();//從小到大排序 return ary; }
統計分析方法
private Hashtable StSummary(ArrayList aryData) { Hashtable stSummary = new Hashtable(); double _value = 0; double Mean = 0;//平均值 double Sum = 0;//總和 double StandardDeviation = 0;//樣本標準差 double Variance = 0;//變異數 double Median = 0;//中位數 double FirstQuartile = 0;//第一四分位數 double ThirdQuartile = 0;//第三四分位數 double IQR = 0;//四分位距 double Minimum = double.MaxValue;//最小值 double Maximum = double.MinValue;//最大值 double Range = 0;//全距 double SumOfSquare = 0;//平方總和 //計算值取小數點8位 if (aryData.Count > 0) { for (int i = 0; i < aryData.Count; i++) { _value = double.Parse(aryData[i].ToString()); if (Maximum < _value) Maximum = _value; if (Minimum > _value) Minimum = _value; Sum += _value; } Mean = Math.Round(Sum / aryData.Count, 8); Sum = Math.Round(Sum, 8); Range = Math.Round(Maximum - Minimum, 8); Maximum = Math.Round(Maximum, 8); Minimum = Math.Round(Minimum, 8); for (int i = 0; i < aryData.Count; i++) { _value = double.Parse(aryData[i].ToString()); SumOfSquare += Math.Pow((_value - Mean), 2); } if (aryData.Count > 1) { Variance = SumOfSquare / (aryData.Count - 1); StandardDeviation = Math.Sqrt(SumOfSquare / (aryData.Count - 1)); Variance = Math.Round(Variance, 8); StandardDeviation = Math.Round(StandardDeviation, 8); } else { Variance = double.NaN; StandardDeviation = double.NaN; } Median = Math.Round(GetMedian(aryData), 8); if (aryData.Count > 1) { FirstQuartile = GetFirstQuartile(aryData); ThirdQuartile = GetThirdQuartile(aryData); } else { FirstQuartile = double.Parse(aryData[0].ToString()); ThirdQuartile = double.Parse(aryData[0].ToString()); } IQR = ThirdQuartile - FirstQuartile; FirstQuartile = Math.Round(FirstQuartile, 8); ThirdQuartile = Math.Round(ThirdQuartile, 8); IQR = Math.Round(IQR, 8); } else { Mean = double.NaN; Sum = double.NaN; StandardDeviation = double.NaN; Variance = double.NaN; Median = double.NaN; FirstQuartile = double.NaN; ThirdQuartile = double.NaN; IQR = double.NaN; Minimum = double.NaN; Maximum = double.NaN; Range = double.NaN; } stSummary.Add("Mean", Mean); stSummary.Add("Sum", Sum); stSummary.Add("StandardDeviation", StandardDeviation); stSummary.Add("Variance", Variance); stSummary.Add("Median", Median); stSummary.Add("FirstQuartile", FirstQuartile); stSummary.Add("ThirdQuartile", ThirdQuartile); stSummary.Add("IQR", IQR); stSummary.Add("Minimum", Minimum); stSummary.Add("Maximum", Maximum); stSummary.Add("Range", Range); return stSummary; }
取中位數
private double GetMedian(ArrayList aryData) { double _value = 0; if (aryData.Count % 2 == 0)//數量為偶數 { int _index = aryData.Count / 2; double valLeft = double.Parse(aryData[_index - 1].ToString()); double valRight = double.Parse(aryData[_index].ToString()); _value = (valLeft + valRight) / 2; } else//數量為奇數 { int _index = (aryData.Count + 1) / 2; _value = double.Parse(aryData[_index- 1].ToString()); } return _value; }
四分位數的計算方式可參考下列網址的說明,下圖為各種計算方法列表。
http://mathworld.wolfram.com/Quartile.html
本範例使用的方法為Freund and Perles,Microsoft Excel 2007 以及早期版本使用QUARTILE功能也是此方法,R語言也是。Excel 2010及之後的版本包含兩個功能QUARTILE.INC(使用Freund and Perles)及QUARTILE.EXC(使用Minitab)。
可參考下列網址的說明:
http://smallbusiness.chron.com/convert-data-set-quartile-50676.html
取第一四分位數與第三四分位數
private double GetFirstQuartile(ArrayList aryData) { double _value = 0; double _firstQuartilePosition = (double.Parse(aryData.Count.ToString()) + 3) / 4; double _lowerValue = double.Parse(aryData[int.Parse(Math.Floor(_firstQuartilePosition).ToString()) - 1].ToString()); double _upperValue = double.Parse(aryData[int.Parse(Math.Floor(_firstQuartilePosition).ToString())].ToString()); double _factor = _firstQuartilePosition - Math.Floor(_firstQuartilePosition); _value = _lowerValue + _factor * (_upperValue - _lowerValue); return _value; } private double GetThirdQuartile(ArrayList aryData) { double _value = 0; double _firstQuartilePosition = (3 * double.Parse(aryData.Count.ToString()) + 1) / 4; double _lowerValue = double.Parse(aryData[int.Parse(Math.Floor(_firstQuartilePosition).ToString()) - 1].ToString()); double _upperValue = double.Parse(aryData[int.Parse(Math.Floor(_firstQuartilePosition).ToString())].ToString()); double _factor = _firstQuartilePosition - Math.Floor(_firstQuartilePosition); _value = _lowerValue + _factor * (_upperValue - _lowerValue); return _value; }
3.應用:
private void btCalc_Click(object sender, EventArgs e) { DataTable dt = TxtConvertToDataTable(@"D:\TMP\dt.csv", "tmp", ","); ArrayList aryData = ArySortedData(dt, "data"); Hashtable stSummary = StSummary(aryData); string outputData = "平均值="+stSummary["Mean"].ToString()+"\n"+ "總和="+stSummary["Sum"].ToString()+"\n"+ "樣本標準差="+stSummary["StandardDeviation"].ToString()+"\n"+ "變異數="+stSummary["Variance"].ToString()+"\n"+ "中位數="+stSummary["Median"].ToString()+"\n"+ "第一四分位數="+stSummary["FirstQuartile"].ToString()+"\n"+ "第三四分位數="+stSummary["ThirdQuartile"].ToString()+"\n"+ "四分位距="+stSummary["IQR"].ToString()+"\n"+ "最小值="+stSummary["Minimum"].ToString()+"\n"+ "最大值="+stSummary["Maximum"].ToString()+"\n"+ "全距="+stSummary["Range"].ToString(); MessageBox.Show(outputData); }
文章標籤
全站熱搜