From 7eaf4e726b82dbc1fda59164b6fcf47c644bbe80 Mon Sep 17 00:00:00 2001 From: kayvank Date: Sat, 28 Feb 2026 17:02:27 -0800 Subject: [PATCH 1/2] Implement Fisher Yates algorithm --- src/DataFrame/Operations/Permutation.hs | 28 ++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/src/DataFrame/Operations/Permutation.hs b/src/DataFrame/Operations/Permutation.hs index a381f98..6dc2179 100644 --- a/src/DataFrame/Operations/Permutation.hs +++ b/src/DataFrame/Operations/Permutation.hs @@ -9,16 +9,17 @@ import qualified Data.List as L import qualified Data.Text as T import qualified Data.Vector as V import qualified Data.Vector.Unboxed as VU +import qualified Data.Vector.Unboxed.Mutable as VUM import Control.Exception (throw) +import Control.Monad.ST (runST) import DataFrame.Errors (DataFrameException (..)) -import DataFrame.Internal.Column +import DataFrame.Internal.Column (Columnable, atIndicesStable) import DataFrame.Internal.DataFrame (DataFrame (..)) -import DataFrame.Internal.Expression -import DataFrame.Internal.Row -import DataFrame.Operations.Core -import System.Random -import System.Random.Shuffle (shuffle') +import DataFrame.Internal.Expression (Expr (Col)) +import DataFrame.Internal.Row (sortedIndexes', toRowVector) +import DataFrame.Operations.Core (columnNames, dimensions) +import System.Random (Random (randomR), RandomGen) -- | Sort order taken as a parameter by the 'sortBy' function. data SortOrder where @@ -76,4 +77,17 @@ shuffle pureGen df = df{columns = V.map (atIndicesStable indexes) (columns df)} shuffledIndices :: (RandomGen g) => g -> Int -> VU.Vector Int -shuffledIndices pureGen k = VU.fromList (shuffle' [0 .. (k - 1)] k pureGen) +shuffledIndices pureGen k = shuffleVec pureGen (VU.fromList [0 .. (k - 1)]) + where + shuffleVec :: (RandomGen g) => g -> VU.Vector Int -> VU.Vector Int + shuffleVec g v = runST $ do + vm <- VU.thaw v + let (n, nGen) = randomR (0, (k - 1)) g + go vm n nGen + VU.unsafeFreeze vm + + go v (-1) _ = pure () + go v 0 _ = pure () + go v maxInd gen = + let (n, nextGen) = randomR (0, maxInd) gen + in VUM.swap v 0 n *> go (VUM.tail v) (maxInd - 1) nextGen From 3e69bd573e37ee63c44a15344a617b2a270d56d1 Mon Sep 17 00:00:00 2001 From: kayvank Date: Sun, 1 Mar 2026 09:29:28 -0800 Subject: [PATCH 2/2] Clean up build warnings Issue 170, implement PR comments and clean up build warnings --- dataframe.cabal | 1 - src/DataFrame/Operations/Permutation.hs | 14 ++++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/dataframe.cabal b/dataframe.cabal index c99a60a..f7f2a2f 100644 --- a/dataframe.cabal +++ b/dataframe.cabal @@ -106,7 +106,6 @@ library process ^>= 1.6, snappy-hs ^>= 0.1, random >= 1.2 && < 1.3, - random-shuffle >= 0.0.4 && < 1, regex-tdfa >= 1.3.0 && < 2, scientific >=0.3.1 && <0.4, template-haskell >= 2.0 && < 3, diff --git a/src/DataFrame/Operations/Permutation.hs b/src/DataFrame/Operations/Permutation.hs index 6dc2179..b9f9c2c 100644 --- a/src/DataFrame/Operations/Permutation.hs +++ b/src/DataFrame/Operations/Permutation.hs @@ -77,17 +77,19 @@ shuffle pureGen df = df{columns = V.map (atIndicesStable indexes) (columns df)} shuffledIndices :: (RandomGen g) => g -> Int -> VU.Vector Int -shuffledIndices pureGen k = shuffleVec pureGen (VU.fromList [0 .. (k - 1)]) +shuffledIndices pureGen k + | k <= 0 = VU.empty + | otherwise = shuffleVec pureGen where - shuffleVec :: (RandomGen g) => g -> VU.Vector Int -> VU.Vector Int - shuffleVec g v = runST $ do - vm <- VU.thaw v - let (n, nGen) = randomR (0, (k - 1)) g + shuffleVec :: (RandomGen g) => g -> VU.Vector Int + shuffleVec g = runST $ do + vm <- VUM.generate k id + let (n, nGen) = randomR (1, (k - 1)) g go vm n nGen VU.unsafeFreeze vm go v (-1) _ = pure () go v 0 _ = pure () go v maxInd gen = - let (n, nextGen) = randomR (0, maxInd) gen + let (n, nextGen) = randomR (1, maxInd) gen in VUM.swap v 0 n *> go (VUM.tail v) (maxInd - 1) nextGen