Hola Habr
Entonces, la última vez que demostramos empíricamente que puedes escribir fácilmente un tipo de wc de juguete en Haskell, que es significativamente más rápido que la implementación de wc de GNU Coreutils. Está claro que esta no es una comparación completamente honesta: nuestro programa no puede hacer nada más que contar bytes, cadenas y palabras, mientras que el wc real es mucho más poderoso: tiene varias estadísticas más, admite opciones, puede leer desde stdin ... En resumen, realmente Resultó solo un juguete.
Hoy lo arreglaremos. Nuestro objetivo principal es permitir al usuario seleccionar estadísticas específicas para el cálculo, sin contar lo que el usuario no necesita. Y lo más importante: nos esforzaremos por la modularidad, destacando cada estadística en una unidad aislada separada.
De hecho, si miramos la versión C , bueno, personalmente, no lo llamaría una muestra de código legible y compatible, ya que todo sucede allí en una gran función en 370 líneas. Intentaremos evitar esto.
La función principal de la versión C no encajaba en la pantalla 4k en orientación vertical con la cuarta fuente.
Además de esta modularización, nosotros, entre otras cosas:
- expresemos la idea de que algunas estadísticas, como contar el número de bytes, pueden funcionar de manera más eficiente en toda la entrada, mientras que otras deberían mirar cada byte;
- implementamos aún más estadísticas, disfrutando la oportunidad de hablar sobre cada una de ellas individualmente (lo que se llama razonamiento local);
- escribiremos algunas pruebas, disfrutando el razonamiento local una vez más;
- probaremos algunas técnicas mecanografiadas de forma casi dependiente, después de haber recibido correctamente el código de funcionamiento correcto pero de frenado encantador;
- Template Haskell;
- () () .
, :
{-# LANGUAGE Strict #-}
{-# LANGUAGE RecordWildCards #-}
module Data.WordCount where
import qualified Data.ByteString.Lazy as BS
data State = State
{ bs :: Int
, ws :: Int
, ls :: Int
, wasSpace :: Int
}
wc :: BS.ByteString -> (Int, Int, Int)
wc s = (bs, ws + 1 - wasSpace, ls)
where
State { .. } = BS.foldl' go (State 0 0 0 1) s
go State { .. } c = State (bs + 1) (ws + addWord) (ls + addLine) isSp
where
isSp | c == 32 || c - 9 <= 4 = 1
| otherwise = 0
addLine | c == 10 = 1
| otherwise = 0
addWord = (1 - wasSpace) * isSp
{-# INLINE wc #-}
, , . ?
, . , BS.foldl'
!
foldl, «, ». , ! , , ByteString
. , : ( , length
), count
( count 10
). , , !
, , . :
{-# LANGUAGE Strict #-}
import qualified Control.Foldl as L
import qualified Data.ByteString as BS
data WordState = WordState { ws :: Int, wasSpace :: Int }
wordsCount :: L.Fold BS.ByteString Int
wordsCount = L.Fold (BS.foldl' go) (WordState 0 1) (\WordState { .. } -> ws + 1 - wasSpace)
where
go WordState { .. } c = WordState (ws + addWord) isSp
where
isSp | c == 32 || c - 9 <= 4 = 1
| otherwise = 0
addWord = (1 - wasSpace) * isSp
, :
import qualified Control.Foldl.ByteString as BL
import qualified Data.ByteString.Lazy as BSL
main :: IO ()
main = do
[path] <- getArgs
contents <- unsafeMMapFile path
let res = BL.fold ((,,) <$> BL.length <*> BL.count 10 <*> wordsCount) (BSL.fromStrict contents) :: (Int, Int, Int)
print res
! ?
, ( 1.8- , tmpfs
- IO, ), 2.5 . , , , ( wc
, ), IDE, .
, 2.5 . , .
, ?
let res = BL.fold ((,) <$> BL.length <*> wordsCount) contents :: (Int, Int)
print res
1.55 . . ?
let res = BL.fold (BL.count 10) contents :: Int
print res
1.05 .
. . ! , ( '\n'
) , .
. , , foldl
.
foldl
. . foldl
ByteString
', ByteString
', . — 256 , L1-, L2 L1 ( L1 ).
, , 16-32 , L1, .
, , , , : BL.fold ((,) <$> wordsCount <*> wordsCount) contents
( , ) BL.fold wordsCount contents
. .
, , « », , .
...
, , . - . ?
- «» ( , ) f1
, f2
, f3
, f1
f3
. , , ,
options <- parseCliOptions
let theFold = foldl' f (zip options [f1, f2, f3]) emptyFold
where
f acc (True, stat) = acc `compose` stat
f acc (False, _) = acc
, , .
, — . , , , .
?
— . , , , :
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses, UndecidableInstances, FlexibleInstances #-}
{-# LANGUAGE TypeFamilyDependencies, FunctionalDependencies, PolyKinds, DataKinds, GADTs, TypeOperators #-}
{-# LANGUAGE ScopedTypeVariables #-}
- ? :
:
class Statistic s res st | res -> s, st -> s
, s -> res, s -> st
where
initState :: st
extractState :: st -> res
step :: st -> Word8 -> st
s
( DataKinds
), , , . , - :
initState :: proxy1 s -> proxy2 res -> st
extractState :: proxy s -> st -> res
step :: proxy1 s -> proxy2 res -> st -> Word8 -> st
, , , .
, , , , .
, - . , , , . , , . , . , (, , ) «» ( SIMD-, ). , . , , .
, , , ? GADT! - , GADT , . :
data StatCompTyOf = Chunked | ByteOnly
data StatComputation st compTy where
ChunkedComputation :: (st -> Word8 -> st)
-> (st -> BS.ByteString -> st)
-> StatComputation st 'Chunked
ByteOnlyComputation :: (st -> Word8 -> st)
-> StatComputation st 'ByteOnly
( ) BS
— , .
Statistic
, comp
step
computation
:
class Statistic s res st comp | res -> s, st -> s
, s -> res, s -> st, s -> comp
where
initState :: st
extractState :: st -> res
computation :: StatComputation st comp
. s
, res
, st
, .
, , . , SIMD- , 16-32 . .
? :
:
data Statistics = Bytes | Chars | Words | MaxLL | Lines deriving (Eq, Ord)
, wc
.
— Statistic
. , — , :
newtype Tagged a = Tagged Word64 deriving (Eq, Show, Num)
a
, Tagged 'Bytes
Tagged 'Chars
.
: :
instance Statistic 'Bytes (Tagged 'Bytes) (Tagged 'Bytes) 'Chunked where
initState = 0
extractState = id
computation = ChunkedComputation (\st _ -> st + 1) (\st str -> st + fromIntegral (BS.length str))
, , :
- ,
Bytes
, Tagged 'Bytes
, . , . - ( , ) 0.
- , , — .
computation
, 'Chunked
. , .
.
, , ,
., , :
instance Statistic 'Lines (Tagged 'Lines) (Tagged 'Lines) 'Chunked where
initState = 0
extractState = id
computation = ChunkedComputation (\st c -> st + if c == 10 then 1 else 0) (\st str -> st + fromIntegral (BS.count 10 str))
? :
data WordsState = WordsState { ws :: Word64, wasSpace :: Word64 }
instance Statistic 'Words (Tagged 'Words) WordsState 'ByteOnly where
initState = WordsState 0 1
extractState WordsState { .. } = Tagged (ws + 1 - wasSpace)
computation = ByteOnlyComputation step
where
step WordsState { .. } c = WordsState (ws + (1 - wasSpace) * isSp) isSp
where
isSp | c == 32 || c - 9 <= 4 = 1
| otherwise = 0
, .
, , . — UTF-8- ?
:
instance Statistic 'Chars (Tagged 'Chars) (Tagged 'Chars) 'ByteOnly where
initState = 0
extractState = id
computation = ByteOnlyComputation $ \cnt c ->
cnt + 1 - fromIntegral ( ((c .&. 0b10000000) `shiftR` 7)
.&. (1 - ((c .&. 0b01000000) `shiftR` 6))
)
UTF-8: , 10xxxxxx
. , UTF-8 , .
? (, , , ASCII):
instance Statistic 'MaxLL (Tagged 'MaxLL) MaxLLState 'ByteOnly where
initState = MaxLLState 0 0
extractState MaxLLState { .. } = Tagged $ max maxLen curLen
computation = ByteOnlyComputation step
where
step MaxLLState { .. } 9 = MaxLLState maxLen $ curLen + 8 - (curLen `rem` 8)
step MaxLLState { .. } 8 = MaxLLState maxLen $ max 0 (curLen - 1)
step MaxLLState { .. } c | c == 10
|| c == 12
|| c == 13 = MaxLLState (max maxLen curLen) 0
| c < 32 = MaxLLState maxLen curLen
step MaxLLState { .. } _ = MaxLLState maxLen (curLen + 1)
, backspace, wc
!
, . : .
a
— , b
— , — , . :
infixr 5 :::
data a ::: b = a ::: b deriving (Show)
(,)
, , , , , .
, .
-, ? , , . :
type family CombineCompTy a b where
CombineCompTy 'Chunked 'Chunked = 'Chunked
CombineCompTy _ _ = 'ByteOnly
Statistic
? - :
instance (Statistic sa resa sta compa, Statistic sb resb stb compb)
=> Statistic (sa '::: sb) (resa ::: resb) (sta ::: stb) (CombineCompTy compa compb) where
initState = initState ::: initState
extractState (a ::: b) = extractState a ::: extractState b
computation =
case (computation :: StatComputation sta compa, computation :: StatComputation stb compb) of
(ByteOnlyComputation a, ChunkedComputation b _)
-> ByteOnlyComputation $ combine a b
(ChunkedComputation a _, ByteOnlyComputation b)
-> ByteOnlyComputation $ combine a b
(ByteOnlyComputation a, ByteOnlyComputation b)
-> ByteOnlyComputation $ combine a b
(ChunkedComputation stepA chunkA, ChunkedComputation stepB chunkB)
-> ChunkedComputation (combine stepA stepB) (combine chunkA chunkB)
where
combine fa fb = \(a ::: b) w -> fa a w ::: fb b w
, sa
— resa
, sta
compa
, sb
/resb
/stb
/compb
, sa ::: sb
— , — resa ::: resb
, — sta ::: stb
, — CombineCompTy compa compb
.
( , ) :::
--- ( , ) :::
--- . sa
sb
— , , () , — , () .
, … . . , comp
, :
instance (Statistic sa resa sta compa,
Statistic sb resb stb compb,
comp ~ CombineCompTy compa compb)
=> Statistic (sa '::: sb) (resa ::: resb) (sta ::: stb) comp where
.
. :
- .
- , - .
, — , !
, CombineCompTy
( ). , , ( ), .
- , , , , , case
.
? , StatComputation st comp
, , , comp ~ CombineCompTy compa compb
. , compa
compb
. CombineCompTy
, , Chunked
compa
, compb
, .
compa
compb
? . , computation
, GADT. , StatComputation
. ChunkedComputation
, comp
Chunked
. ByteOnlyComputation
, ByteOnly
.
, CombineCompTy
_
-, , compa
, compb
.
, : Words '::: Words
, . ⊤ , , , .
, . ?
, Statistic
, ByteString
, . GADT, computation
. ChunkedComputation
, . ByteOnlyComputation
, BS.foldl'
. :
wc :: forall s res st comp. Statistic s res st comp => BS.ByteString -> res
wc s = extractState $! runCompute computation
where
runCompute :: StatComputation st comp -> st
runCompute (ByteOnlyComputation step) = BS.foldl' step initState s
runCompute (ChunkedComputation _ chunker) = chunker initState s
, (s
, st
, comp
) res
. , , runCompute
, . st
comp
wc
, ( ) forall
ScopedTypeVariables
.
:
let result = wc someBS :: Tagged 'Words ::: Tagged 'Lines
TypeApplications
s
:
let result = wc @('Words '::: 'Lines) someBS
, , , , .
, ?
, wc @'Words
, . — 1.51 , , , . , .
? wc @('Words '::: 'Words)
!
. , , , — , … : 1.34 . wc @('Words '::: 'Words '::: 'Words)
? 1.30 . , 'Words
.
— . #haskell
— . .
. GHC Core — , . , , , - . , … -. , , , .
, , . , ? wc @('Bytes '::: 'Words '::: 'Lines)
! — 1.53 . 1.45 , , , , .
, , . , .
— ! , .
, , , words
, 127
. , , .
QuickCheck-, ASCII, UTF-8-:
import qualified Data.ByteString.Char8 as BS
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import Data.WordCount
wrapUnicode :: UnicodeString -> (BS.ByteString, T.Text)
wrapUnicode ustr = (T.encodeUtf8 txt, txt)
where
txt = T.pack $ getUnicodeString ustr
replaceNonAsciiSpaces :: Char -> Char
replaceNonAsciiSpaces ch | ch >= chr 127 && isSpace ch = '_'
| otherwise = ch
main :: IO ()
main = hspec $ parallel $ modifyMaxSuccess (const 10000) $ modifyMaxSize (const 1000) $ do
describe "ASCII support" $ do
it "Counts bytes correctly" $ property $
\(getASCIIString -> str) -> wc @'Bytes (BS.pack str) `shouldBe` genericLength str
it "Counts chars correctly" $ property $
\(getASCIIString -> str) -> wc @'Chars (BS.pack str) `shouldBe` genericLength str
it "Counts words correctly" $ property $
\(getASCIIString -> str) -> wc @'Words (BS.pack str) `shouldBe` genericLength (words str)
it "Counts lines correctly" $ property $
\(getASCIIString -> str) -> wc @'Lines (BS.pack str) `shouldBe` genericLength (filter (== '\n') str)
describe "UTF8 support" $ do
it "Counts bytes correctly" $ property $
\(wrapUnicode -> (bs, _)) -> wc @'Bytes bs `shouldBe` fromIntegral (BS.length bs)
it "Counts chars correctly" $ property $
\(wrapUnicode -> (bs, txt)) -> wc @'Chars bs `shouldBe` fromIntegral (T.length txt)
it "Counts words correctly" $ property $
\(wrapUnicode -> (bs, txt)) -> wc @'Words bs `shouldBe` genericLength (T.words $ T.map replaceNonAsciiSpaces txt)
it "Counts lines correctly" $ property $
\(wrapUnicode -> (bs, txt)) -> wc @'Lines bs `shouldBe` fromIntegral (T.count "\n" txt)
!
:
- , , . , - UTF-8- .
- , … .
- : 10 ( ) 3-5 ( ).
, - C GNU Coreutils.
optparse-applicative. , , :
data Options = Options
{ countBytes :: Bool
, countChars :: Bool
, countLines :: Bool
, countMaxLineLength :: Bool
, countWords :: Bool
, files :: [FilePath]
}
options :: Parser Options
options = Options
<$> switch (long "bytes" <> short 'c' <> help "print the byte counts")
<*> switch (long "chars" <> short 'm' <> help "print the character counts")
<*> switch (long "lines" <> short 'l' <> help "print the newline counts")
<*> switch (long "max-line-length" <> short 'L' <> help "print the maximum display width")
<*> switch (long "words" <> short 'w' <> help "print the word counts")
<*> some (argument str (metavar "FILES..."))
main
, Statistics
, , :
main :: IO ()
main = do
Options { .. } <- execParser $ info (options <**> helper) (fullDesc <> progDesc "Print newline, word, and byte counts for each file")
let selectedStats = map snd $ filter fst [ (countBytes, Bytes), (countChars, Chars)
, (countWords, Words), (countMaxLineLength, MaxLL)
, (countLines, Lines)
]
let stats | null selectedStats = [Bytes, Words, Lines]
| otherwise = selectedStats
, , - .
, . ? , wc
. , , . !
, , , , - , — !
, , , .
, , :
data SomeStats where
MkSomeStats :: Statistic s res st comp => proxy s -> SomeStats
proxy s
— Statistic
. — .
. ? - :
wc' :: SomeStats -> BS.ByteString -> ?
wc' (MkSomeStats (_ :: proxy s)) input = wc @s input
… ?
? ? , res
Statistic
, .
, - ?
data SomeStats where
MkSomeStats :: Statistic s res st comp => proxy1 s -> proxy2 res -> SomeStats
wc' :: SomeStats -> BS.ByteString -> res
wc' (MkSomeStats (_ :: proxy1 s) (_ :: proxy2 res)) input = wc @s input
, : wc'
res
, SomeStats
, .
?
. wc
, ! , . , , , , show
, , res
Show
:
data SomeStats where
MkSomeStats :: (Statistic s res st comp, Show res) => proxy s -> SomeStats
wc
:
wc' :: SomeStats -> BS.ByteString -> String
wc' (MkSomeStats (_ :: proxy s)) input = show $ wc @s input
.
, stats
SomeStats
? :
promoteStat :: Statistics -> SomeStats
promoteStat Bytes = MkSomeStats (Proxy :: Proxy 'Bytes)
promoteStat Chars = MkSomeStats (Proxy :: Proxy 'Chars)
promoteStat Words = MkSomeStats (Proxy :: Proxy 'Words)
promoteStat MaxLL = MkSomeStats (Proxy :: Proxy 'MaxLL)
promoteStat Lines = MkSomeStats (Proxy :: Proxy 'Lines)
, , Statistics
, . - : , Bytes
() 'Bytes
, , .
, promoteStat
:
promoteStats :: [Statistics] -> SomeStats
promoteStats [s] = promoteStat s
promoteStats (s:ss) =
case (promoteStat s, promoteStats ss) of
(MkSomeStats (_ :: proxy1 st), MkSomeStats (_ :: proxy2 sst))
-> MkSomeStats (Proxy :: Proxy (st '::: sst))
, , promoteStat
.
, . : promoteStat
, promoteStats
. - , . promoteStat
promoteStats
, st
, , sst
— , .
, Statistic
( ). Statistic
, st ::: sst
Statistic
- , ! , , rest
resst
( , st
sst
) Show
. , rest ::: resst
Show
, st ::: sst
!
, , MkSomeStats (Proxy :: Proxy (st '::: sst))
. , !
, : . , , NonEmpty
.
, :
main :: IO ()
main = do
forM_ files $ \path -> do
contents <- unsafeMMapFile path
putStrLn $ wc' (promoteStats stats) contents
() ?
, 1.05 — , BS.count 10
.
, . , , ? , … 14 .
, 14 .
, , :
74,873,139,008 bytes allocated in the heap
, . , — GC (60,512 bytes maximum residency
).
. , , ? 27 , 120 .
, ? ?
«42 », : 41 194 . , RTS 60 .
?
, :
wc' (MkSomeStats (_ :: proxy s)) input = show $ wc @s input
, computation
s
— -, , . , Statistic
.
?
wc'
computation
, SomeStats
wc
, . , , , . , 1.8 — !
promoteStats
. ?
, stats
,
promoteStats [s] = promoteStat s
s
, , Words
, promoteStat
promoteStat Words = MkSomeStats (Proxy :: Proxy 'Words)
promoteStats
SomeStats
, Statistic
Words
.
. , ?
promoteStats (s:ss) =
case (promoteStat s, promoteStats ss) of
(MkSomeStats (_ :: proxy1 st), MkSomeStats (_ :: proxy2 sst)) -> MkSomeStats (Proxy :: Proxy (st '::: sst))
computation
: case
, promoteStat
promoteStats
, , , computation
Statistic
«» sa ::: sb
, , , .
, 13-14 , 28 — . , 65-70 .
, ( ) .
, . 13 (14 1 -) 1.8 — , 7 . .
: , () wc
. -
main = do
case stats of
[Words] -> print $ wc @'Words contents
[Bytes] -> print $ wc @'Bytes contents
[Lines] -> print $ wc @'Lines contents
[Words, Bytes] -> print $ wc @('Words '::: 'Bytes) contents
[Lines, Bytes] -> print $ wc @('Lines '::: 'Bytes) contents
. , , case
. , — .
, Template Haskell. (-) dispatch
, :
contents <- unsafeMMapFile path
putStrLn $ $(dispatch 'wc 'contents) stats
$(dispatch 'wc 'contents)
, case
- stats
, .
dispatch
— Template Haskell, , :
dispatch :: Name -> Name -> Q Exp
dispatch fun bs = reify ''Statistics >>= \case
TyConI (DataD _ _ _ _ cons _) -> do
let consNames = [ name | NormalC name _ <- cons ]
let powerset = filterM (const [True, False]) consNames
let matches = buildMatch fun bs <$> filter (not . null) powerset
fallbackMatch <- (\body -> Match WildP (NormalB body) []) <$> [e| error "Unexpected input" |]
pure $ LamCaseE $ matches <> [fallbackMatch]
_ -> fail "unsupported type"
buildMatch :: Name -> Name -> [Name] -> Match
buildMatch fun bs consNames = Match (ListP $ (`ConP` []) <$> consNames) (NormalB $ VarE 'show `AppE` (wcCall `AppE` VarE bs)) []
where
wcCall = VarE fun `AppTypeE` foldr1 f (PromotedT <$> consNames)
f accTy promotedTy = PromotedT '(:::) `AppT` accTy `AppT` promotedTy
, Statistics
(, , ), ( , powerset
) case
- buildMatch
. -case
( {-# LANGUAGE LambdaCase #-}
).
, , , , .
, (, TH) — , stats
. . ,
let selectedStats = map snd $ filter fst [ (countBytes, Bytes), (countChars, Chars)
, (countWords, Words), (countMaxLineLength, MaxLL)
, (countLines, Lines)
]
, Ord
. , stats
. , , - stats
, , .
!
main :: IO ()
main = do
Options { .. } <- execParser $ info (options <**> helper) (fullDesc <> progDesc "Print newline, word, and byte counts for each file")
let selectedStats = map snd $ filter fst [ (countBytes, Bytes), (countChars, Chars)
, (countWords, Words), (countMaxLineLength, MaxLL)
, (countLines, Lines)
]
let stats | null selectedStats = [Bytes, Words, Lines]
| otherwise = selectedStats
forM_ files $ \path -> do
contents <- unsafeMMapFile path
putStrLn $ $(dispatch 'wc 'contents) stats
? (31 — ), - . , wc
GNU Coreutils , -. , : 5 1.8 , . wc
Coreutils LC_ALL=C LANG=C
, .
:
:
- , , .
- , C (, , ) , . .
- ¹ ,
time
0.00user
. - ² (1.06 ) —
bytestring
. — bytestring
, (count
) SIMD-. count
C, , , , ( , SIMD), . - ³
wc
UTF-8-, , , , UTF-8-.
, .
, , wc
, . ?
: forM_
forConcurrently_
async:
main :: IO ()
main = do
Options { .. } <- execParser $ info (options <**> helper) (fullDesc <> progDesc "Print newline, word, and byte counts for each file")
let selectedStats = map snd $ filter fst [ (countBytes, Bytes), (countChars, Chars)
, (countWords, Words), (countMaxLineLength, MaxLL)
, (countLines, Lines)
]
let stats | null selectedStats = [Bytes, Words, Lines]
| otherwise = selectedStats
forConcurrently_ files $ \path -> do
contents <- unsafeMMapFile path
putStrLn $ $(dispatch 'wc 'contents) stats
, (, RTS, ). , -j
, .
?
1.22 — , .
, ( ), 1.47 , . , .
,
Tagged 123 ::: (Tagged 456 ::: Tagged 789)
, ! Statistic
:
class Statistic s res st comp | res -> s, st -> s
, s -> res, s -> st, s -> comp where
prettyPrint :: res -> String
, :
instance Statistic 'Bytes (Tagged 'Bytes) (Tagged 'Bytes) 'Chunked where
prettyPrint (Tagged n) = show n <> " bytes"
, :
prettyPrint (a ::: b) = prettyPrint a <> "\n" <> prettyPrint b
buildMatch
, prettyPrint
show
:
buildMatch fun bs consNames = Match (ListP $ (`ConP` []) <$> consNames) (NormalB $ VarE 'prettyPrint `AppE` (wcCall `AppE` VarE bs)) []
!
, mmap
. , : , hwc <(cat foo | grep bar)
.
, , — - mmap
. mmap
. ByteString
, . ByteString
— , :
import qualified Data.ByteString.Lazy as BSL
wcLazy :: forall s res st comp. Statistic s res st comp => BSL.ByteString -> res
wcLazy s = extractState $! runCompute computation
where
runCompute :: StatComputation st comp -> st
runCompute (ByteOnlyComputation step) = BSL.foldl' step initState s
runCompute (ChunkedComputation _ chunker) = BSL.foldlChunks chunker initState s
main
:
forConcurrently_ files $ \path -> do
stat <- getFileStatus path
if isRegularFile stat || isSymbolicLink stat
then countStrict stats $ unsafeMMapFile path
else countLazy stats $ BSL.readFile path
:
where
countStrict stats act = do
contents <- act
putStrLn $ $(dispatch 'wc 'contents) stats
countLazy stats act = do
contents <- act
putStrLn $ $(dispatch 'wcLazy 'contents) stats
!
, wcLazy
, , , .
stdin
, Coreutils wc
, — stdin
. , main
:
main = do
when (null files) $ countLazy stats BSL.getContents
-
cat testfile.txt | /usr/bin/time hwc-exe -cw
? , , 1.40 — , 1.22 .
, - : ? , , !
« » ( - ), stack build
7.9 , 2.24 ( strip
).
Template Haskell : 23 . , ( 31 ): 2.34 , — 4.3%.
… stack build
( ). stack build --fast
.
« » 5.6 --fast
, TH … 7.8 . . .
, , C. , : wc
GNU Coreutils 0.06 , — 21 . wc
24 . 100 , !
, , .
? , wc
, Unix-, -wc
, GNU Coreutils wc
. — ( !), , , , . , - « , », C C++, C.
, :
wc
, , , ;- ;
- , , ;
- , , , , , ;
- Template Haskell;
- , , ;
- , .
.
, :
- . , , , 16-32 ( L1). , 7 , 1.8 0.4 , 0.04%. Template Haskell!
- Nuestra implementación aún no es el equivalente completo
wc
, hace algo diferente. Por ejemplo, las estadísticas de recuento de caracteres no admiten codificaciones que no sean UTF-8 o ASCII, mientras que wc
funciona con una configuración regional arbitraria, delegando todo el trabajo de procesamiento de caracteres glibc. Pero dado que nuestra implementación hace que sea muy fácil agregar nuevas estadísticas, es bastante simple agregar soporte para algo así, y lo más importante, no afectará, por ejemplo, el recuento de la cantidad de líneas o palabras. Pero sobre "no pague por lo que no usa", ya escribí un poco más alto.