1.前言:
在數值統計分析中,常要計算各類的統計值,利用程式來分析工程資料,並且可做後續繪圖或是進階應用

2.說明:
取待分析資料數列存在DataTable中,利用程式作統計分析,包含下列統計值:
平均值、總和、樣本標準差、變異數、中位數、第一四分位數、第三四分位數、四分位距、最小值、最大值、全距。

分析資料為未排序的1到20數列,存在dt.csv檔案中。

讀取資料

public DataTable TxtConvertToDataTable(string File, string TableName, string delimiter)
{
	DataTable dt = new DataTable();
	DataSet ds = new DataSet();
	StreamReader s = new StreamReader(File, System.Text.Encoding.Default);
	string[] columns = s.ReadLine().Split(delimiter.ToCharArray());
	ds.Tables.Add(TableName);
	foreach (string col in columns)
	{
		bool added = false;
		string next = "";
		int i = 0;
		while (!added)
		{
			string columnname = col + next;
			columnname = columnname.Replace("#", "");
			columnname = columnname.Replace("'", "");
			columnname = columnname.Replace("&", "");

			if (!ds.Tables[TableName].Columns.Contains(columnname))
			{
				ds.Tables[TableName].Columns.Add(columnname.ToUpper());
				added = true;
			}
			else
			{
				i++;
				next = "_" + i.ToString();
			}
		}
	}

	string AllData = s.ReadToEnd();
	string[] rows = AllData.Split("\n".ToCharArray());

	foreach (string r in rows)
	{
		string[] items = r.Split(delimiter.ToCharArray());
		ds.Tables[TableName].Rows.Add(items);
	}

	s.Close();

	dt = ds.Tables[0];

	return dt;
}

資料排序

private ArrayList ArySortedData(DataTable dt, string ColumnName)
{
	ArrayList ary = new ArrayList();
	foreach (DataRow row in dt.Rows)
	{
		//將資料轉成double型態
		ary.Add(double.Parse(row[ColumnName].ToString().Trim()));
	}
	ary.Sort();//從小到大排序
	return ary;
}

統計分析方法

private Hashtable StSummary(ArrayList aryData)
{
	Hashtable stSummary = new Hashtable();

	double _value = 0;
	double Mean = 0;//平均值
	double Sum = 0;//總和
	double StandardDeviation = 0;//樣本標準差
	double Variance = 0;//變異數
	double Median = 0;//中位數
	double FirstQuartile = 0;//第一四分位數
	double ThirdQuartile = 0;//第三四分位數
	double IQR = 0;//四分位距
	double Minimum = double.MaxValue;//最小值
	double Maximum = double.MinValue;//最大值
	double Range = 0;//全距
	double SumOfSquare = 0;//平方總和

	//計算值取小數點8位
	if (aryData.Count > 0)
	{
		for (int i = 0; i < aryData.Count; i++)
		{
			_value = double.Parse(aryData[i].ToString());
			if (Maximum < _value) Maximum = _value;
			if (Minimum > _value) Minimum = _value;
			Sum += _value;
		}

		Mean = Math.Round(Sum / aryData.Count, 8);
		Sum = Math.Round(Sum, 8);
		Range = Math.Round(Maximum - Minimum, 8);
		Maximum = Math.Round(Maximum, 8);
		Minimum = Math.Round(Minimum, 8);                

		for (int i = 0; i < aryData.Count; i++)
		{
			_value = double.Parse(aryData[i].ToString());
			SumOfSquare += Math.Pow((_value - Mean), 2);
		}

		if (aryData.Count > 1)
		{
			Variance = SumOfSquare / (aryData.Count - 1);
			StandardDeviation = Math.Sqrt(SumOfSquare / (aryData.Count - 1));
			Variance = Math.Round(Variance, 8);
			StandardDeviation = Math.Round(StandardDeviation, 8);
		}
		else
		{
			Variance = double.NaN;
			StandardDeviation = double.NaN;
		}

		Median = Math.Round(GetMedian(aryData), 8);

		if (aryData.Count > 1)
		{
			FirstQuartile = GetFirstQuartile(aryData);
			ThirdQuartile = GetThirdQuartile(aryData);
		}
		else
		{
			FirstQuartile = double.Parse(aryData[0].ToString());
			ThirdQuartile = double.Parse(aryData[0].ToString());
		}
		
		IQR = ThirdQuartile - FirstQuartile;
		FirstQuartile = Math.Round(FirstQuartile, 8);
		ThirdQuartile = Math.Round(ThirdQuartile, 8);
		IQR = Math.Round(IQR, 8);
	}
	else
	{
		Mean = double.NaN;
		Sum = double.NaN;
		StandardDeviation = double.NaN;
		Variance = double.NaN;
		Median = double.NaN;
		FirstQuartile = double.NaN;
		ThirdQuartile = double.NaN;
		IQR = double.NaN;
		Minimum = double.NaN;
		Maximum = double.NaN;
		Range = double.NaN;
	}           

	stSummary.Add("Mean", Mean);
	stSummary.Add("Sum", Sum);
	stSummary.Add("StandardDeviation", StandardDeviation);
	stSummary.Add("Variance", Variance);
	stSummary.Add("Median", Median);
	stSummary.Add("FirstQuartile", FirstQuartile);
	stSummary.Add("ThirdQuartile", ThirdQuartile);
	stSummary.Add("IQR", IQR);
	stSummary.Add("Minimum", Minimum);
	stSummary.Add("Maximum", Maximum);
	stSummary.Add("Range", Range);

	return stSummary;
}

取中位數

private double GetMedian(ArrayList aryData)
{
	double _value = 0;

	if (aryData.Count % 2 == 0)//數量為偶數
	{
		int _index = aryData.Count / 2;
		double valLeft = double.Parse(aryData[_index - 1].ToString());
		double valRight = double.Parse(aryData[_index].ToString());
		_value = (valLeft + valRight) / 2;
	}
	else//數量為奇數
	{
		int _index = (aryData.Count + 1) / 2;
		_value = double.Parse(aryData[_index- 1].ToString());
	}

	return _value;
}

四分位數的計算方式可參考下列網址的說明,下圖為各種計算方法列表。
http://mathworld.wolfram.com/Quartile.html

quartile_20130926  

本範例使用的方法為Freund and Perles,Microsoft Excel 2007 以及早期版本使用QUARTILE功能也是此方法,R語言也是。Excel 2010及之後的版本包含兩個功能QUARTILE.INC(使用Freund and Perles)及QUARTILE.EXC(使用Minitab)。
可參考下列網址的說明:
http://smallbusiness.chron.com/convert-data-set-quartile-50676.html

取第一四分位數與第三四分位數

private double GetFirstQuartile(ArrayList aryData)
{
	double _value = 0;
	double _firstQuartilePosition = (double.Parse(aryData.Count.ToString()) + 3) / 4;
	double _lowerValue = double.Parse(aryData[int.Parse(Math.Floor(_firstQuartilePosition).ToString()) - 1].ToString());
	double _upperValue = double.Parse(aryData[int.Parse(Math.Floor(_firstQuartilePosition).ToString())].ToString());
	double _factor = _firstQuartilePosition - Math.Floor(_firstQuartilePosition);
	_value = _lowerValue + _factor * (_upperValue - _lowerValue);

	return _value;
}

private double GetThirdQuartile(ArrayList aryData)
{
	double _value = 0;
	double _firstQuartilePosition = (3 * double.Parse(aryData.Count.ToString()) + 1) / 4;
	double _lowerValue = double.Parse(aryData[int.Parse(Math.Floor(_firstQuartilePosition).ToString()) - 1].ToString());
	double _upperValue = double.Parse(aryData[int.Parse(Math.Floor(_firstQuartilePosition).ToString())].ToString());
	double _factor = _firstQuartilePosition - Math.Floor(_firstQuartilePosition);
	_value = _lowerValue + _factor * (_upperValue - _lowerValue);

	return _value;
}

3.應用:

private void btCalc_Click(object sender, EventArgs e)
{
	DataTable dt = TxtConvertToDataTable(@"D:\TMP\dt.csv", "tmp", ",");
	ArrayList aryData = ArySortedData(dt, "data");
	Hashtable stSummary = StSummary(aryData);
	string outputData = "平均值="+stSummary["Mean"].ToString()+"\n"+
		"總和="+stSummary["Sum"].ToString()+"\n"+
		"樣本標準差="+stSummary["StandardDeviation"].ToString()+"\n"+
		"變異數="+stSummary["Variance"].ToString()+"\n"+
		"中位數="+stSummary["Median"].ToString()+"\n"+
		"第一四分位數="+stSummary["FirstQuartile"].ToString()+"\n"+
		"第三四分位數="+stSummary["ThirdQuartile"].ToString()+"\n"+
		"四分位距="+stSummary["IQR"].ToString()+"\n"+
		"最小值="+stSummary["Minimum"].ToString()+"\n"+
		"最大值="+stSummary["Maximum"].ToString()+"\n"+
		"全距="+stSummary["Range"].ToString();
	MessageBox.Show(outputData);
}
arrow
arrow
    文章標籤
    C# Statistics
    全站熱搜

    西夏普 發表在 痞客邦 留言(1) 人氣()