commit 3be4f8720a791167dcd859710a8566ac5021e9ab
parent 06fbb63bfcfad95a759a14f06cdeec19a7d313f9
Author: Jared Tobin <jared@jtobin.ca>
Date: Wed, 25 Sep 2013 23:02:03 +1200
Add KWIC system.
Diffstat:
2 files changed, 46 insertions(+), 0 deletions(-)
diff --git a/20130925_kwic/Kwic.hs b/20130925_kwic/Kwic.hs
@@ -0,0 +1,41 @@
+-- The KWIC index system accepts an ordered set of lines, each line is an
+-- ordered set of words, and each word is an ordered set of characters. Any
+-- line may be "circularly shifted" by repeatedly removing the first word and
+-- appending it at the end of the line. The KWIC index system outputs a listing
+-- of all circular shifts of all lines in alphabetical order. This is a small
+-- system. Except under extreme circumstances (huge data base, no supporting
+-- software), such a system could be produced by a good programmer within a
+-- week or two.
+--
+-- (24 minutes in 2013)
+-- - j
+
+import Control.Monad
+import Data.List
+import System.Environment
+import System.Exit
+
+main :: IO ()
+main = do
+ args <- getArgs
+ file <- parseArgs args
+ content <- fmap lines (readFile file)
+ mapM_ putStrLn $ kwicSystem content
+
+parseArgs :: [a] -> IO a
+parseArgs [] = putStrLn "USAGE: ./kwic <filename>" >> exitSuccess
+parseArgs (x:_) = return x
+
+kwicSystem :: [String] -> [String]
+kwicSystem = map unwords
+ . sort
+ . concatMap (allRotations . words)
+
+rotateByOneWord :: [a] -> [a]
+rotateByOneWord ws = take (length ws) . drop 1 . cycle $ ws
+
+allRotations :: [a] -> [[a]]
+allRotations ws = go (length ws - 1) [ws]
+ where go 0 acc = init acc
+ go j acc = go (pred j) (rotateByOneWord (head acc) : acc)
+
diff --git a/20130925_kwic/OrderedLines.txt b/20130925_kwic/OrderedLines.txt
@@ -0,0 +1,5 @@
+this is the first line
+and this is the second line
+and now the third line
+gimme the fourth
+and finally the fifth