算法实现/伪随机数/卡方检验
外观
以下程序用于计算 N 个小于 r 的正整数的卡方值。
'Calculates the chi-square value for N positive integers less than r
'Source: "Algorithms in C" - Robert Sedgewick - pp. 517
'NB: Sedgewick recommends: "...to be sure, the test should be tried a few times,
'since it could be wrong in about one out of ten times."
Public Function IsChiRandom(ByVal randomNums() As Integer, ByVal r As Integer) As Boolean
'Calculate the number of samples - N
Dim N As Integer = randomNums.Length
'According to Sedgewick: "This is valid if N is greater than about 10r"
If N <= (10 * r) Then
Return False
End If
Dim N_r As Double = N / r
Dim HT As Hashtable
Dim chi_square As Double = 0
'PART A: Get frequency of randoms
HT = Me.RandomFrequency(randomNums)
'PART B: Calculate chi-square - this approach is in Sedgewick
Dim f As Integer
For Each Item As DictionaryEntry In HT
f = Integer.Parse(Item.Value) - N_r
chi_square += Math.Pow(f, 2)
Next
chi_square = chi_square / N_r
'PART C: According to Sedgewick: "The statistic should be within 2(r)^1/2 of r
'This is valid if N is greater than about 10r"
If (r - 2 * Math.Sqrt(r) <= chi_square) And (r + 2 * Math.Sqrt(r) >= chi_square) Then
Return True
Else
Return False
End If
End Function
'Gets the frequency of occurrence of a randomly generated array of integers
'Output: A hashtable, key being the random number and value its frequency
Private Function RandomFrequency(ByVal randomNums() As Integer) As Hashtable
Dim HT As New Hashtable
Dim N As Integer = randomNums.Length
Dim count As Integer = 0
For i As Integer = 0 To N - 1
If HT.ContainsKey(randomNums(i)) Then
HT.Item(randomNums(i)) = HT.Item(randomNums(i)) + 1
Else
HT.Add(randomNums(i), 1)
End If
Next
Return HT
End Function
// Calculates the chi-square value for N positive integers less than r
// Source: "Algorithms in C" - Robert Sedgewick - pp. 517
// NB: Sedgewick recommends: "...to be sure, the test should be tried a few times,
// since it could be wrong in about one out of ten times."
public bool IsRandom(int[] randomNums, int r)
{
//Calculate the number of samples - N
int N = randomNums.Length;
//According to Sedgewick: "This is valid if N is greater than about 10r"
if (N <= 10*r)
return false;
double N_r = N / r;
double chi_square = 0;
Hashtable HT;
//PART A: Get frequency of randoms
HT = this.RandomFrequency (randomNums);
//PART B: Calculate chi-square - this approach is in Sedgewick
double f;
foreach (DictionaryEntry Item in HT)
{
f = (int)(Item.Value) - N_r;
chi_square += Math.Pow(f, 2);
}
chi_square = chi_square / N_r;
//PART C: According to Swdgewick: "The statistic should be within 2(r)^1/2 of r
//This is valid if N is greater than about 10r"
if((r - 2 * Math.Sqrt (r) <= chi_square) & (r + 2 * Math.Sqrt (r) >= chi_square))
return true;
else
return false;
}
//Gets the frequency of occurrence of a randomly generated array of integers
//Output: A hashtable, key being the random number and value its frequency
private Hashtable RandomFrequency(int[] randomNums)
{
Hashtable HT = new Hashtable();
int N = randomNums.Length;
for(int i = 0; i <= N-1; i++)
{
if (HT.ContainsKey(randomNums[i]))
HT[randomNums[i]] = (int) HT[randomNums[i]] + 1;
else
HT[randomNums[i]] = 1;
}
return HT;
}
/**
* Calculates the chi-square value for N positive integers less than r
* Source: "Algorithms in C" - Robert Sedgewick - pp. 517
* NB: Sedgewick recommends: "...to be sure, the test should be tried a few times,
* since it could be wrong in about one out of ten times."
* @param randomNums a uniformly-randomly-generated array of integers
* @param r upper bound for the random range
* @return whether it is likely to be uniformly randomly generated
*/
public static boolean isRandom(int[] randomNums, int r)
{
//According to Sedgewick: "This is valid if N is greater than about 10r"
if (randomNums.length <= 10 * r)
return false;
//PART A: Get frequency of randoms
Map<Integer,Integer> ht = getFrequencies(randomNums);
//PART B: Calculate chi-square - this approach is in Sedgewick
double n_r = (double)randomNums.length / r;
double chiSquare = 0;
for (int v : ht.values())
{
double f = v - n_r;
chiSquare += f * f;
}
chiSquare /= n_r;
//PART C: According to Swdgewick: "The statistic should be within 2(r)^1/2 of r
//This is valid if N is greater than about 10r"
return Math.abs(chiSquare - r) <= 2 * Math.sqrt(r);
}
/**
* @param nums an array of integers
* @return a Map, key being the number and value its frequency
*/
private static Map<Integer,Integer> getFrequencies(int[] nums)
{
Map<Integer,Integer> freqs = new HashMap<Integer,Integer>();
for (int x : nums)
{
if (freqs.containsKey(x))
freqs.put(x, freqs.get(x) + 1);
else
freqs.put(x, 1);
}
return freqs;
}
from math import sqrt
from collections import Counter
def is_random(random_nums, r: int):
"""Calculates the chi-square value for n positive integers less than r
Arguments:
- random_nums: list of uniformly-randomly generated integers
- r: int, upper bound for the random range
Source: "Algorithms in C" - Robert Sedgewick - pp. 517
NB: Sedgewick recommends:
...to be sure, the test should be tried a few times, since it could be
wrong in about one out of ten times.
Example:
>>> import os
>>> r = 256
>>> sample = os.urandom(r * 11)
>>> is_random(sample, r)
True
"""
# Calculate the number of samples - n
n = len(random_nums)
# According to Sedgewick:
# This is valid if n is greater than about 10r
if n <= 10 * r:
return False
n_r = n / r
# PART A: Get frequency of randoms
ht = Counter(random_nums)
# PART B: Calculate chi-square - this approach is in Sedgewick
chi_square = sum((v - n_r)**2 for v in ht.values()) / n_r
# PART C: According to Sedgewick:
# The statistic should be within 2(r)^1/2 of r
# This is valid if N is greater than about 10r
return abs(chi_square - r) <= 2 * sqrt(r)