跳到内容

应用程序 VBA/比较洗牌方法的偏差

来自维基教科书,开放的书籍,开放的世界

此 VBA 代码模块展示了即使使用随机生成器,偏差也会如何影响洗牌算法。该模块适用于 MS Excel,并在工作表上打印两个统计面板以显示所选方法的不同之处。好的版本是 Knuth 洗牌算法,另一个则是通过对整个数组进行方法性交换(有时是多次交换)的方法。

关于代码的说明

[编辑 | 编辑源代码]
  • 将整个代码清单复制到 Excel 标准模块中,保存它,然后运行顶部过程。
  • 洗牌算法运行许多循环(用户设置),并在统计信息中呈现离散结果的 bin 计数。请注意,为了保持显示和运行时间可管理,已使用了非常少的元素(3 或 4 个)。您可能想知道,离散结果组合的数量等于元素数量的阶乘。也就是说,使用六个元素,例如 A、B、C、D、E 和 F,将产生 6!= 120 个离散组合,以及运行时间的相应增加。
  • 特别地,应注意的是,较小的变异系数 (CV) 表示一种接近随机的方法,而较高的 CV 表示该方法存在偏差。
  • 使用多次交换的方法存在偏差,而 Knuth 方法则没有。

VBA 代码模块

[编辑 | 编辑源代码]
Option Explicit
Option Base 1

Private Sub RunShuffleBiasDemo()
    'Run this to compare the bias for shuffling
    'character arrays using two methods.
    'Note large sets are too time consuming,
    'since there are n! combinations.
    'THIS SUB CLEARS AND WRITES TO SHEET1
    
    Dim vArr As Variant, vRet As Variant
    Dim nT As Long, bOK As Boolean, vT As Variant
    Dim nCycles As Long, n As Long, sT As String
    
    'load a typical 1D data array with test data
    '3 elements makes 3! = 6 bins
    '4 elements needs 4! = 24 bins etc.
    vArr = Array("A", "B", "C")
    
    'set number of cycles for test
    'Typical time to complete 3 element test:
    '25 secs for 100000 cycles
    '225 secs for 1000000 cycles
    nCycles = 100000
    
        
    'dimension the collection array
    ReDim vT(1 To nCycles)
        
    'clear the worksheet
    ClearWorksheet "Sheet1", 3 'contents and formats
        
    'runs number of shuffle samples
    For n = 1 To nCycles
        'give way to commands-eg; break
        DoEvents
        
        'pass array to a random shuffling proc
        bOK = KnuthArrShuffle(vArr, vRet)
                
        'make a single string from array elements
        sT = Arr1DToStr(vRet)
        
        'save shuffle instance in an array
        vT(n) = sT
    Next n
       
    'pass array to the proc with label for the display
    CountUniqueArrayValues2 vT, 2, 2, "Test Set: Rnd Knuth"
    
    'run a number of shuffle samples
    For n = 1 To nCycles
        'give way to commands-eg; break
        DoEvents
        
        'pass array to a random shuffling proc
        bOK = BiasedMultiSwapArrShuffle(vArr, vRet)
        
        'make a single string from array elements
        sT = Arr1DToStr(vRet)
        
        'save shuffle instance in an array
        vT(n) = sT
    Next n
       
    'pass array to the proc with label for the display
    CountUniqueArrayValues2 vT, 2, 7, "Test Set: Rnd Biased?"

    'report end
    MsgBox "Display done."

End Sub

Private Sub CountUniqueArrayValues2(vI As Variant, Optional nRow As Long = 1, _
                                   Optional nCol As Long = 1, Optional sLabel As String = "")
    'Counts instances of data number values in vI. Generates various stats
    'for the bin quantities.
    'THIS SUB CLEARS AND WRITES TO SHEET1
    
    Dim vRV As Variant, vRQ As Variant, vDS As Variant
    Dim LB As Long, UB As Long, vDB As Variant
    Dim n As Long, bOK As Boolean
    
    'make bins and count contents
    bOK = DiscreteItemsCount(vI, vRV, vRQ)
    
    LB = LBound(vRV, 1): UB = UBound(vRV, 1)
    ReDim vDS(1 To 12, 1 To 3)
    ReDim vDB(LB To UB + 2, 1 To 3)
    
    If bOK Then 'load bins and stats arrays
        vDB(1, 1) = sLabel: vDB(1, 2) = "Value": vDB(1, 3) = "Quantity"
        For n = LB To UB
             DoEvents
             vDB(n + 2, 1) = "Bin # " & n 'headings
             vDB(n + 2, 2) = vRV(n)       'value
             vDB(n + 2, 3) = vRQ(n)       'quantity
        Next n
        
        On Error Resume Next 'avoids Mode() error when no value stands out
        With Application.WorksheetFunction
            vDS(1, 1) = sLabel: vDS(1, 2) = "": vDS(1, 3) = "Quantity"
            vDS(3, 1) = "Average": vDS(3, 3) = Format(.Average(vRQ), "#0.000")
            vDS(4, 1) = "Median": vDS(4, 3) = .Median(vRQ)
            vDS(5, 1) = "Mode": vDS(5, 3) = .Mode(vRQ)
            vDS(6, 1) = "Minimum": vDS(6, 3) = .Min(vRQ)
            vDS(7, 1) = "Maximum": vDS(7, 3) = .Max(vRQ)
            vDS(8, 1) = "Std.Deviation": vDS(8, 3) = Format(.StDevP(vRQ), "#0.000")
            vDS(9, 1) = "StDev/Av %": vDS(9, 3) = Format(.StDevP(vRQ) * 100 / .Average(vRQ), "#0.000")
            vDS(10, 1) = "Variance": vDS(10, 3) = Format(.VarP(vRQ), "#0.000")
            vDS(11, 1) = "No.Unique Values": vDS(11, 3) = UBound(vRQ) - LBound(vRQ) + 1
            vDS(12, 1) = "No.Samples": vDS(12, 3) = UBound(vI) - LBound(vI) + 1
        End With
        Err.Clear
    Else
        MsgBox "Problems getting bin count - closing"
        Exit Sub
    End If
    
    'output to sheet
    'ClearWorksheet "Sheet1", 3                    'clear both contents and formats of the worksheet
    Array2DToSheet vDS, "Sheet1", nRow, nCol      'transfer stats panel to sheet with top left at row2, col2
    Array2DToSheet vDB, "Sheet1", nRow + 13, nCol 'transfer bins panel to sheet with top left below stats
    FormatCells "Sheet1"                          'apply font and autofit formats to all cells of the worksheet

End Sub

Private Function DiscreteItemsCount(vIn As Variant, vRetV As Variant, vRetQ As Variant) As Boolean
    'Counts number of repeats of element values found in vIn
    'Returns with one column for each unitque value and quantity found.
    'Returns as 2D vRet, unsorted; row1=input value, row2=item count.

    Dim vA As Variant, vTS As Variant, vTB As Variant
    Dim s As Long, b As Long, n As Long, bFound As Boolean
    Dim LB1 As Long, UB1 As Long, LB2 As Long, UB2 As Long
    Dim LBS As Long, UBS As Long
    
    'dimension 2D work array
    ReDim vA(1 To 2, 1 To 1)
    
    'get source 1D array bounds
    LBS = LBound(vIn): UBS = UBound(vIn)
    
    'get work array bounds
    LB1 = LBound(vA, 1): UB1 = UBound(vA, 1) 'd1 rows
    LB2 = LBound(vA, 2): UB2 = UBound(vA, 2) 'd2 cols
    
    'intitial values
    s = LBS
    b = 0
    vA(2, 1) = 0
    
    Do 'step through store
        DoEvents
        'get source element value
        vTS = vIn(s)
        'check bins
        Do
            DoEvents
            b = b + 1
            'get bin element value
            vTB = vA(1, b)
            If vTS = vTB Then 'found in bins
                vA(2, b) = CLng(vA(2, b)) + 1 'update bin
                bFound = True
            End If
        Loop Until b >= UB2 Or bFound = True
        
        If bFound = False Then 'no such bin exists yet
            'not found in bins
            If vA(2, UB2) <> 0 Then 'first element been used
                ReDim Preserve vA(LB1 To UB1, LB2 To UB2 + 1)
                UB2 = UBound(vA, 2)
            End If
            'update new bin
            vA(1, UB2) = vTS
            vA(2, UB2) = 1
            bFound = True
        End If
        
        'reset loop variables
        bFound = False
        b = 0
        s = s + 1
    Loop Until s > UBS

    'transfers -need to be separate for other uses
    LB1 = LBound(vA, 1): UB1 = UBound(vA, 1) 'd1 rows
    LB2 = LBound(vA, 2): UB2 = UBound(vA, 2) 'd2 cols
    ReDim vRetV(LB2 To UB2) 'contains values
    ReDim vRetQ(LB2 To UB2) 'contains quantities
    
    For n = LB2 To UB2
        vRetV(n) = vA(1, n)
        vRetQ(n) = vA(2, n)
    Next n
        
    For n = LB2 To UB2
        Debug.Print vRetV(n) & vbTab & vRetQ(n)
    Next n
        Debug.Print vbCrLf
   
   DiscreteItemsCount = True

End Function

Private Sub ClearWorksheet(ByVal sSheet As String, Optional ByVal nOpt As Integer = 1)
   'clears worksheet contents, formats, or both
   'but does not remove charts from the worksheet
   'nOpt options: contents=1, formats=2, all=3
      
   Dim oWSht As Worksheet
   Set oWSht = ThisWorkbook.Worksheets(sSheet)
   oWSht.Activate
   
   With oWSht.Cells
    Select Case nOpt
        Case 1 'contents only
            .ClearContents
        Case 2 'formats only
            .ClearFormats
        Case 3 'formats and contents
            .Clear
    Case Else
        MsgBox "Illegal option in ClearWorksheet - closing"
        Exit Sub
    End Select
   End With
   
   oWSht.Cells(1, 1).Select

End Sub

Private Sub Array2DToSheet(ByVal vIn As Variant, sShtName As String, nStartRow As Long, nStartCol As Long)
    ' transfers contents of input 2D array to specified worksheet positions
    ' Works for any array bounds
    
    Dim oSht As Worksheet, rTarget As Range
    Dim nRows As Long, nCols As Long
    Dim nNewEndC As Long, nNewEndR As Long
    
    'get reference to sheet for output
    Set oSht = ActiveWorkbook.Worksheets(sShtName)

    'get the pre-shift end points
    nRows = UBound(vIn, 1) - LBound(vIn, 1) + 1
    nCols = UBound(vIn, 2) - LBound(vIn, 2) + 1
    
    'modify end point for parameter starting values
    nNewEndR = nRows + nStartRow - 1
    nNewEndC = nCols + nStartCol - 1
       
    ' define the sheet range for the array contents
    Set rTarget = oSht.Range(oSht.Cells(nStartRow, nStartCol), oSht.Cells(nNewEndR, nNewEndC))
    
    'transfer the array contents to the sheet range
    rTarget.Value = vIn

End Sub

Private Sub FormatCells(sSht As String)
    ' Applies certain formats to all cells
    ' of the named parameter worksheet
    
    Dim oSht As Worksheet
    
    Set oSht = ThisWorkbook.Worksheets(sSht)
    oSht.Activate
    
    'format all cells of the worksheet
    oSht.Cells.Select
    With Selection
        .Font.Name = "Consolas" 'mono
        .Font.Size = 14
        .Columns.AutoFit
        .Rows.AutoFit
        .HorizontalAlignment = xlLeft 'xlRight 'xlCenter
        .VerticalAlignment = xlBottom 'xlCenter 'xlTop
    End With
    oSht.Range("A1").Select

End Sub

Private Function KnuthArrShuffle(vIn As Variant, vR As Variant) As Boolean
    ' Performs a modified Knuth random shuffle on the elements of the input array.
    ' The original by Fisher-Yates, was modified for computers by Durstenfeld
    ' then popularised by Knuth. Returns result in vR with vIn unchanged.
       
    'Note: Some or all elements COULD occupy their original positions,
    'but only in accordance with expectation based on the random generator.
    'This is best seen for small arrays, say with only 3 elements or so.
        
    Dim vW As Variant
    Dim LB As Long, UB As Long, nL As Long
    Dim i As Long, j As Long
    
    'initial assignments
    LB = LBound(vIn): UB = UBound(vIn)
    ReDim vR(LB To UB) 'return array
    ReDim vW(LB To UB) 'work array
    nL = UB - LB + 1   'length of input array
    vW = vIn 'transfer to a work array
            
    'working
    Randomize
    For i = LB To nL
        'first get a random number
        j = Int((UB - LB + 1) * Rnd + LB)
            
        'transfer jth of vW to ith of vR
        vR(i) = vW(j)
        
        'replace selection with current last of vW
        vW(j) = vW(UB)
        
        'remove last of vW by shortening array
        ReDim Preserve vW(LB To UB - 1)
        
        'get new UBound of shortened vW
        UB = UBound(vW)
        
        'exception; return if last chara
        If UB = LB Then
            vR(i + 1) = vW(UB)
            Exit For
        End If
                
        DoEvents 'allow breaks
    Next i
        
    KnuthArrShuffle = True

End Function

Private Function BiasedMultiSwapArrShuffle(vIn As Variant, Optional vRet As Variant) As Boolean
    'Performs a random shuffle on input array strings.
    'Input parameter is a single array.  Returns in single vRet
    'if provided, else in vIn modified.  Multiple shuffles applied.
    'Displays more bias than the Knuth method.
    
    Dim vR As Variant
    Dim sT As String, sTJ As String, sTS As String, nC As Long
    Dim LB1 As Long, UB1 As Long, LB2 As Long, UB2 As Long
    Dim i As Integer, j As Long, bSUsed As Boolean, bRUsed As Boolean

    'get dimensions of vIn
    LB1 = LBound(vIn): UB1 = UBound(vIn)

    'dimension a work array
    ReDim vR(LB1 To UB1)

    'load local array
    For i = LB1 To UB1
        DoEvents
        vR(i) = vIn(i)
    Next i

    'get dimensions of vR
    LB2 = LBound(vR): UB2 = UBound(vR)

    'randomize the rnd generator
    Randomize

    For i = LB2 To UB2
        
        DoEvents
        'get rnd number
        j = Int((UB2 - LB2 + 1) * Rnd + LB2)

        'exchange elements
        sT = vR(i) 'swap
        vR(i) = vR(j)
        vR(j) = sT
    Next i

    'transfers
    If Not IsMissing(vRet) Then
        ReDim vRet(LB2 To UB2)
        For i = LB2 To UB2
            DoEvents
            vRet(i) = vR(i)
        Next i
    Else
        For i = LB2 To UB2
            DoEvents
            vIn(i) = vR(i)
        Next i
    End If

    'return status
    BiasedMultiSwapArrShuffle = True

End Function

Private Function Arr1DToStr(vIn As Variant) As String
    ' Makes a single string from 1D array string elements.
    ' Works for any array bounds.

    Dim nC As Long, sT As String, sAccum As String
    Dim LB As Long, UB As Long

    LB = LBound(vIn): UB = UBound(vIn)

    'join characters of array into string
    For nC = LB To UB
        DoEvents
        sT = vIn(nC)
        sAccum = sAccum & sT
    Next

    Arr1DToStr = sAccum

End Function

另请参阅

[编辑 | 编辑源代码]
华夏公益教科书