应用程序 VBA/比较洗牌方法的偏差
外观
< 应用程序 VBA
此 VBA 代码模块展示了即使使用随机生成器,偏差也会如何影响洗牌算法。该模块适用于 MS Excel,并在工作表上打印两个统计面板以显示所选方法的不同之处。好的版本是 Knuth 洗牌算法,另一个则是通过对整个数组进行方法性交换(有时是多次交换)的方法。
- 将整个代码清单复制到 Excel 标准模块中,保存它,然后运行顶部过程。
- 洗牌算法运行许多循环(用户设置),并在统计信息中呈现离散结果的 bin 计数。请注意,为了保持显示和运行时间可管理,已使用了非常少的元素(3 或 4 个)。您可能想知道,离散结果组合的数量等于元素数量的阶乘。也就是说,使用六个元素,例如 A、B、C、D、E 和 F,将产生 6!= 120 个离散组合,以及运行时间的相应增加。
- 特别地,应注意的是,较小的变异系数 (CV) 表示一种接近随机的方法,而较高的 CV 表示该方法存在偏差。
- 使用多次交换的方法存在偏差,而 Knuth 方法则没有。
Option Explicit
Option Base 1
Private Sub RunShuffleBiasDemo()
'Run this to compare the bias for shuffling
'character arrays using two methods.
'Note large sets are too time consuming,
'since there are n! combinations.
'THIS SUB CLEARS AND WRITES TO SHEET1
Dim vArr As Variant, vRet As Variant
Dim nT As Long, bOK As Boolean, vT As Variant
Dim nCycles As Long, n As Long, sT As String
'load a typical 1D data array with test data
'3 elements makes 3! = 6 bins
'4 elements needs 4! = 24 bins etc.
vArr = Array("A", "B", "C")
'set number of cycles for test
'Typical time to complete 3 element test:
'25 secs for 100000 cycles
'225 secs for 1000000 cycles
nCycles = 100000
'dimension the collection array
ReDim vT(1 To nCycles)
'clear the worksheet
ClearWorksheet "Sheet1", 3 'contents and formats
'runs number of shuffle samples
For n = 1 To nCycles
'give way to commands-eg; break
DoEvents
'pass array to a random shuffling proc
bOK = KnuthArrShuffle(vArr, vRet)
'make a single string from array elements
sT = Arr1DToStr(vRet)
'save shuffle instance in an array
vT(n) = sT
Next n
'pass array to the proc with label for the display
CountUniqueArrayValues2 vT, 2, 2, "Test Set: Rnd Knuth"
'run a number of shuffle samples
For n = 1 To nCycles
'give way to commands-eg; break
DoEvents
'pass array to a random shuffling proc
bOK = BiasedMultiSwapArrShuffle(vArr, vRet)
'make a single string from array elements
sT = Arr1DToStr(vRet)
'save shuffle instance in an array
vT(n) = sT
Next n
'pass array to the proc with label for the display
CountUniqueArrayValues2 vT, 2, 7, "Test Set: Rnd Biased?"
'report end
MsgBox "Display done."
End Sub
Private Sub CountUniqueArrayValues2(vI As Variant, Optional nRow As Long = 1, _
Optional nCol As Long = 1, Optional sLabel As String = "")
'Counts instances of data number values in vI. Generates various stats
'for the bin quantities.
'THIS SUB CLEARS AND WRITES TO SHEET1
Dim vRV As Variant, vRQ As Variant, vDS As Variant
Dim LB As Long, UB As Long, vDB As Variant
Dim n As Long, bOK As Boolean
'make bins and count contents
bOK = DiscreteItemsCount(vI, vRV, vRQ)
LB = LBound(vRV, 1): UB = UBound(vRV, 1)
ReDim vDS(1 To 12, 1 To 3)
ReDim vDB(LB To UB + 2, 1 To 3)
If bOK Then 'load bins and stats arrays
vDB(1, 1) = sLabel: vDB(1, 2) = "Value": vDB(1, 3) = "Quantity"
For n = LB To UB
DoEvents
vDB(n + 2, 1) = "Bin # " & n 'headings
vDB(n + 2, 2) = vRV(n) 'value
vDB(n + 2, 3) = vRQ(n) 'quantity
Next n
On Error Resume Next 'avoids Mode() error when no value stands out
With Application.WorksheetFunction
vDS(1, 1) = sLabel: vDS(1, 2) = "": vDS(1, 3) = "Quantity"
vDS(3, 1) = "Average": vDS(3, 3) = Format(.Average(vRQ), "#0.000")
vDS(4, 1) = "Median": vDS(4, 3) = .Median(vRQ)
vDS(5, 1) = "Mode": vDS(5, 3) = .Mode(vRQ)
vDS(6, 1) = "Minimum": vDS(6, 3) = .Min(vRQ)
vDS(7, 1) = "Maximum": vDS(7, 3) = .Max(vRQ)
vDS(8, 1) = "Std.Deviation": vDS(8, 3) = Format(.StDevP(vRQ), "#0.000")
vDS(9, 1) = "StDev/Av %": vDS(9, 3) = Format(.StDevP(vRQ) * 100 / .Average(vRQ), "#0.000")
vDS(10, 1) = "Variance": vDS(10, 3) = Format(.VarP(vRQ), "#0.000")
vDS(11, 1) = "No.Unique Values": vDS(11, 3) = UBound(vRQ) - LBound(vRQ) + 1
vDS(12, 1) = "No.Samples": vDS(12, 3) = UBound(vI) - LBound(vI) + 1
End With
Err.Clear
Else
MsgBox "Problems getting bin count - closing"
Exit Sub
End If
'output to sheet
'ClearWorksheet "Sheet1", 3 'clear both contents and formats of the worksheet
Array2DToSheet vDS, "Sheet1", nRow, nCol 'transfer stats panel to sheet with top left at row2, col2
Array2DToSheet vDB, "Sheet1", nRow + 13, nCol 'transfer bins panel to sheet with top left below stats
FormatCells "Sheet1" 'apply font and autofit formats to all cells of the worksheet
End Sub
Private Function DiscreteItemsCount(vIn As Variant, vRetV As Variant, vRetQ As Variant) As Boolean
'Counts number of repeats of element values found in vIn
'Returns with one column for each unitque value and quantity found.
'Returns as 2D vRet, unsorted; row1=input value, row2=item count.
Dim vA As Variant, vTS As Variant, vTB As Variant
Dim s As Long, b As Long, n As Long, bFound As Boolean
Dim LB1 As Long, UB1 As Long, LB2 As Long, UB2 As Long
Dim LBS As Long, UBS As Long
'dimension 2D work array
ReDim vA(1 To 2, 1 To 1)
'get source 1D array bounds
LBS = LBound(vIn): UBS = UBound(vIn)
'get work array bounds
LB1 = LBound(vA, 1): UB1 = UBound(vA, 1) 'd1 rows
LB2 = LBound(vA, 2): UB2 = UBound(vA, 2) 'd2 cols
'intitial values
s = LBS
b = 0
vA(2, 1) = 0
Do 'step through store
DoEvents
'get source element value
vTS = vIn(s)
'check bins
Do
DoEvents
b = b + 1
'get bin element value
vTB = vA(1, b)
If vTS = vTB Then 'found in bins
vA(2, b) = CLng(vA(2, b)) + 1 'update bin
bFound = True
End If
Loop Until b >= UB2 Or bFound = True
If bFound = False Then 'no such bin exists yet
'not found in bins
If vA(2, UB2) <> 0 Then 'first element been used
ReDim Preserve vA(LB1 To UB1, LB2 To UB2 + 1)
UB2 = UBound(vA, 2)
End If
'update new bin
vA(1, UB2) = vTS
vA(2, UB2) = 1
bFound = True
End If
'reset loop variables
bFound = False
b = 0
s = s + 1
Loop Until s > UBS
'transfers -need to be separate for other uses
LB1 = LBound(vA, 1): UB1 = UBound(vA, 1) 'd1 rows
LB2 = LBound(vA, 2): UB2 = UBound(vA, 2) 'd2 cols
ReDim vRetV(LB2 To UB2) 'contains values
ReDim vRetQ(LB2 To UB2) 'contains quantities
For n = LB2 To UB2
vRetV(n) = vA(1, n)
vRetQ(n) = vA(2, n)
Next n
For n = LB2 To UB2
Debug.Print vRetV(n) & vbTab & vRetQ(n)
Next n
Debug.Print vbCrLf
DiscreteItemsCount = True
End Function
Private Sub ClearWorksheet(ByVal sSheet As String, Optional ByVal nOpt As Integer = 1)
'clears worksheet contents, formats, or both
'but does not remove charts from the worksheet
'nOpt options: contents=1, formats=2, all=3
Dim oWSht As Worksheet
Set oWSht = ThisWorkbook.Worksheets(sSheet)
oWSht.Activate
With oWSht.Cells
Select Case nOpt
Case 1 'contents only
.ClearContents
Case 2 'formats only
.ClearFormats
Case 3 'formats and contents
.Clear
Case Else
MsgBox "Illegal option in ClearWorksheet - closing"
Exit Sub
End Select
End With
oWSht.Cells(1, 1).Select
End Sub
Private Sub Array2DToSheet(ByVal vIn As Variant, sShtName As String, nStartRow As Long, nStartCol As Long)
' transfers contents of input 2D array to specified worksheet positions
' Works for any array bounds
Dim oSht As Worksheet, rTarget As Range
Dim nRows As Long, nCols As Long
Dim nNewEndC As Long, nNewEndR As Long
'get reference to sheet for output
Set oSht = ActiveWorkbook.Worksheets(sShtName)
'get the pre-shift end points
nRows = UBound(vIn, 1) - LBound(vIn, 1) + 1
nCols = UBound(vIn, 2) - LBound(vIn, 2) + 1
'modify end point for parameter starting values
nNewEndR = nRows + nStartRow - 1
nNewEndC = nCols + nStartCol - 1
' define the sheet range for the array contents
Set rTarget = oSht.Range(oSht.Cells(nStartRow, nStartCol), oSht.Cells(nNewEndR, nNewEndC))
'transfer the array contents to the sheet range
rTarget.Value = vIn
End Sub
Private Sub FormatCells(sSht As String)
' Applies certain formats to all cells
' of the named parameter worksheet
Dim oSht As Worksheet
Set oSht = ThisWorkbook.Worksheets(sSht)
oSht.Activate
'format all cells of the worksheet
oSht.Cells.Select
With Selection
.Font.Name = "Consolas" 'mono
.Font.Size = 14
.Columns.AutoFit
.Rows.AutoFit
.HorizontalAlignment = xlLeft 'xlRight 'xlCenter
.VerticalAlignment = xlBottom 'xlCenter 'xlTop
End With
oSht.Range("A1").Select
End Sub
Private Function KnuthArrShuffle(vIn As Variant, vR As Variant) As Boolean
' Performs a modified Knuth random shuffle on the elements of the input array.
' The original by Fisher-Yates, was modified for computers by Durstenfeld
' then popularised by Knuth. Returns result in vR with vIn unchanged.
'Note: Some or all elements COULD occupy their original positions,
'but only in accordance with expectation based on the random generator.
'This is best seen for small arrays, say with only 3 elements or so.
Dim vW As Variant
Dim LB As Long, UB As Long, nL As Long
Dim i As Long, j As Long
'initial assignments
LB = LBound(vIn): UB = UBound(vIn)
ReDim vR(LB To UB) 'return array
ReDim vW(LB To UB) 'work array
nL = UB - LB + 1 'length of input array
vW = vIn 'transfer to a work array
'working
Randomize
For i = LB To nL
'first get a random number
j = Int((UB - LB + 1) * Rnd + LB)
'transfer jth of vW to ith of vR
vR(i) = vW(j)
'replace selection with current last of vW
vW(j) = vW(UB)
'remove last of vW by shortening array
ReDim Preserve vW(LB To UB - 1)
'get new UBound of shortened vW
UB = UBound(vW)
'exception; return if last chara
If UB = LB Then
vR(i + 1) = vW(UB)
Exit For
End If
DoEvents 'allow breaks
Next i
KnuthArrShuffle = True
End Function
Private Function BiasedMultiSwapArrShuffle(vIn As Variant, Optional vRet As Variant) As Boolean
'Performs a random shuffle on input array strings.
'Input parameter is a single array. Returns in single vRet
'if provided, else in vIn modified. Multiple shuffles applied.
'Displays more bias than the Knuth method.
Dim vR As Variant
Dim sT As String, sTJ As String, sTS As String, nC As Long
Dim LB1 As Long, UB1 As Long, LB2 As Long, UB2 As Long
Dim i As Integer, j As Long, bSUsed As Boolean, bRUsed As Boolean
'get dimensions of vIn
LB1 = LBound(vIn): UB1 = UBound(vIn)
'dimension a work array
ReDim vR(LB1 To UB1)
'load local array
For i = LB1 To UB1
DoEvents
vR(i) = vIn(i)
Next i
'get dimensions of vR
LB2 = LBound(vR): UB2 = UBound(vR)
'randomize the rnd generator
Randomize
For i = LB2 To UB2
DoEvents
'get rnd number
j = Int((UB2 - LB2 + 1) * Rnd + LB2)
'exchange elements
sT = vR(i) 'swap
vR(i) = vR(j)
vR(j) = sT
Next i
'transfers
If Not IsMissing(vRet) Then
ReDim vRet(LB2 To UB2)
For i = LB2 To UB2
DoEvents
vRet(i) = vR(i)
Next i
Else
For i = LB2 To UB2
DoEvents
vIn(i) = vR(i)
Next i
End If
'return status
BiasedMultiSwapArrShuffle = True
End Function
Private Function Arr1DToStr(vIn As Variant) As String
' Makes a single string from 1D array string elements.
' Works for any array bounds.
Dim nC As Long, sT As String, sAccum As String
Dim LB As Long, UB As Long
LB = LBound(vIn): UB = UBound(vIn)
'join characters of array into string
For nC = LB To UB
DoEvents
sT = vIn(nC)
sAccum = sAccum & sT
Next
Arr1DToStr = sAccum
End Function