haskell - optimizing AVL tree

as a part of learning haskell i decided to implement an AVL tree. as of now, i only implemented insertion.

the implementation works, but it performs 3-4 times slower than a random java implementation i found for a random list of 9999999 random numbers.

it performs almost as well when given an input list of [1..9999999] or [9999999..1] (descending or ascending list), so i think the problem may lie in the rl and lr rolls

i would appreciate any hint about how to make it run faster.

yes, i know it looks kind of ugly.

data Tree a =   Empty |
                Branch {    key     :: a,
                            balance :: Int,
                            left    :: Tree a,
                            right   :: Tree a,
                            up      :: Bool    
                          --used internally to stop updating balance
                deriving (Eq)

leaf :: (Ord a, Eq a) => a -> Tree a
leaf x = Branch x 0 Empty Empty True

-- insert ------------------------------------
treeInsert :: (Eq a, Ord a) => Tree a -> a -> Tree a
treeInsert Empty x  = leaf x
treeInsert (Branch y b l r _) x 
  | x < y =
    let nl@(Branch _ _ _ _ nlu) = treeInsert l x   -- nl = new left
      if nlu
        then if b==1  
               then roll $ Branch y  2      nl r False 
               else        Branch y (b + 1) nl r (b /= (-1)) 
        else               Branch y  b      nl r False
  | x > y = 
    let nr@(Branch _ _ _ _ nru) = treeInsert r x   -- nr = new right
      if nru 
        then if b==(-1) 
               then roll $ Branch y (-2)    l nr False 
               else        Branch y (b - 1) l nr (b /= 1) 
        else               Branch y  b      l nr False
  | otherwise =            Branch x  b      l r  False

-- rolls -------------------------------------
roll :: (Eq a, Ord a) => Tree a -> Tree a
-- ll roll
roll (Branch y 2 (Branch ly 1 ll lr _) r _) = 
            Branch ly  0 ll (Branch y 0 lr r False) False
-- rr roll
roll (Branch y (-2) l (Branch ry (-1) rl rr _) _) = 
            Branch ry  0 (Branch y 0 l rl False) rr False
-- lr rolls
roll (Branch y 2 (Branch ly (-1) ll (Branch lry lrb lrl lrr _) _) r _) = 
   case lrb of 
     0  ->  Branch lry 0 (Branch ly 0   ll  lrl False) 
                         (Branch y  0   lrr r   False) False
     1  ->  Branch lry 0 (Branch ly 0   ll  lrl False) 
                         (Branch y (-1) lrr r   False) False
     (-1)-> Branch lry 0 (Branch ly 1   ll  lrl False) 
                         (Branch y  0   lrr r   False) False
-- rl rolls
roll (Branch y (-2) l (Branch ry 1 (Branch rly rlb rll rlr _) rr _) _) = 
   case rlb of 
     0  ->  Branch rly 0 (Branch y  0   l   rll False) 
                         (Branch ry 0   rlr rr  False) False
     1  ->  Branch rly 0 (Branch y  0   l   rll False) 
                         (Branch ry (-1) rlr rr False) False
     (-1)-> Branch rly 0 (Branch y  1   l   rll False) 
                         (Branch ry 0   rlr rr  False) False

-- construct a tree --------------------------
construct :: (Eq a, Ord a) => Tree a -> [a] -> Tree a
construct = foldl' treeInsert

-- rands -------------------------------------
rands :: Int -> Int -> Int -> Int -> [Int]
rands n low high seed = take n $ randomRs (low, high) (mkStdGen seed)

-- test run
main = do
    seed <- round `fmap` getPOSIXTime
    let ma = 9999999
    let t = construct Empty ( rands ma 1 ma seed ) 
    start <- getPOSIXTime
    end <- t `seq` getPOSIXTime
    print (end - start)

