praxis

Various programming exercises.
git clone git://git.jtobin.io/praxis.git
Log | Files | Refs

Kwic.hs (1332B)


      1 -- The KWIC index system accepts an ordered set of lines, each line is an
      2 -- ordered set of words, and each word is an ordered set of characters. Any
      3 -- line may be "circularly shifted" by repeatedly removing the first word and
      4 -- appending it at the end of the line. The KWIC index system outputs a listing
      5 -- of all circular shifts of all lines in alphabetical order. This is a small
      6 -- system. Except under extreme circumstances (huge data base, no supporting
      7 -- software), such a system could be produced by a good programmer within a
      8 -- week or two.
      9 --
     10 -- (24 minutes in 2013)
     11 -- - j
     12 
     13 import Control.Monad
     14 import Data.List
     15 import System.Environment
     16 import System.Exit
     17 
     18 main :: IO ()
     19 main = do
     20   args    <- getArgs
     21   file    <- parseArgs args
     22   content <- fmap lines (readFile file)
     23   mapM_ putStrLn $ kwicSystem content
     24 
     25 parseArgs :: [a] -> IO a
     26 parseArgs []    = putStrLn "USAGE: ./kwic <filename>" >> exitSuccess
     27 parseArgs (x:_) = return x
     28 
     29 kwicSystem :: [String] -> [String]
     30 kwicSystem = map unwords
     31            . sort 
     32            . concatMap (allRotations . words) 
     33 
     34 rotateByOneWord :: [a] -> [a]
     35 rotateByOneWord ws = take (length ws) . drop 1 . cycle $ ws
     36 
     37 allRotations :: [a] -> [[a]]
     38 allRotations ws = go (length ws - 1) [ws]
     39   where go 0 acc = init acc
     40         go j acc = go (pred j) (rotateByOneWord (head acc) : acc)
     41