urbit-hob

Haskell utilities for phonetic base wrangling.
Log | Files | Refs | README | LICENSE

commit d0d48f337a597457810b83ca23d0cfcea621c287
parent 62b5179313302a77052a6cda42881983c2a6abf0
Author: Jared Tobin <jared@jtobin.io>
Date:   Wed, 18 Sep 2019 10:06:29 -0230

co, ob: general refactoring

* Simplifies the structure of pretty much everything
* Improves error handling
* Prunes unnecessary stuff

Diffstat:
Alib/Data/Serialize/Extended.hs | 22++++++++++++++++++++++
Mlib/Urbit/Ob/Co.hs | 133++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
Mlib/Urbit/Ob/Ob.hs | 2+-
Mtest/Co/Tests/Property.hs | 17+++++++++++++++--
Mtest/Co/Tests/Unit.hs | 2+-
Mtest/Ob.hs | 2+-
Mtest/Ob/Tests/Property.hs | 11+++++++++--
Mtest/Ob/Tests/Unit.hs | 10++++++----
Murbit-hob.cabal | 6+++++-
9 files changed, 134 insertions(+), 71 deletions(-)

diff --git a/lib/Data/Serialize/Extended.hs b/lib/Data/Serialize/Extended.hs @@ -0,0 +1,22 @@ + +module Data.Serialize.Extended ( + roll + , unroll + ) where + +import Data.Bits +import qualified Data.ByteString as BS +import Data.List (unfoldr) +import Numeric.Natural (Natural) + +-- | Simple little-endian ByteString encoding for Naturals. +unroll :: Natural -> BS.ByteString +unroll = BS.pack . unfoldr step where + step 0 = Nothing + step i = Just (fromIntegral i, i `shiftR` 8) + +-- | Simple little-endian ByteString decoding for Naturals. +roll :: BS.ByteString -> Natural +roll = foldr unstep 0 . BS.unpack where + unstep b a = a `shiftL` 8 .|. fromIntegral b + diff --git a/lib/Urbit/Ob/Co.hs b/lib/Urbit/Ob/Co.hs @@ -1,66 +1,78 @@ -{-# LANGUAGE BangPatterns #-} {-# LANGUAGE OverloadedStrings #-} module Urbit.Ob.Co ( Patp - , render , patp , fromPatp + + , render + , parse ) where import qualified Data.ByteString as BS -import qualified Data.Vector as V -import qualified Data.Serialize as C +import Data.Char (isAsciiLower) +import Data.Foldable (foldrM) +import qualified Data.Serialize.Extended as C import qualified Data.Text as T -import Data.Word (Word8, Word16) +import qualified Data.Vector as V +import Data.Word (Word8) import Numeric.Natural (Natural) -import Urbit.Ob.Ob (fein, fynd) +import Prelude hiding (log) +import qualified Urbit.Ob.Ob as Ob (fein, fynd) +-- | A patp type. +-- +-- Bytes are stored little-endian. newtype Patp = Patp BS.ByteString deriving (Eq, Show) --- | Convert a Natural to a Patp value. +unPatp :: Patp -> BS.ByteString +unPatp (Patp p) = p + patp :: Natural -> Patp -patp n - | met 3 sxz <= 1 = Patp (BS.cons 0 (BS.singleton sxz8)) - | otherwise = Patp (C.encode sxz) - where - sxz = fein n - sxz8 = fromIntegral sxz :: Word8 +patp = Patp . BS.reverse . C.unroll . Ob.fein --- | Convert a Patp value to a Natural. fromPatp :: Patp -> Natural -fromPatp (Patp p) = decoded where - decoded = case BS.length p of - 2 -> case C.decode p :: Either String Word16 of - Left e -> internalErr "fromPatp" e - Right x -> fynd (fromIntegral x) - _ -> case C.decode p :: Either String Natural of - Left e -> internalErr "fromPatp" e - Right x -> fynd x - --- | Render a Patp value as Text. +fromPatp = Ob.fynd . C.roll . BS.reverse . unPatp + render :: Patp -> T.Text -render (Patp p) = prefixed where - prefix = V.unsafeIndex prefixes . fromIntegral - suffix = V.unsafeIndex suffixes . fromIntegral - - prefixed = case T.uncons encoded of - Just ('-', pp) -> T.cons '~' pp - Just _ -> T.cons '~' encoded - _ -> internalErr "render" mempty - - encoded = foldr alg mempty pruned where - alg (idx, x) acc - | odd idx = suffix x <> acc - | otherwise = "-" <> prefix x <> acc - - pruned = - let len = BS.length p - indexed = zip [len, pred len..] (BS.unpack p) - padding (idx, val) = idx /= 1 && val == 0 - in dropWhile padding indexed +render (Patp bs) = render' bs + +render' :: BS.ByteString -> T.Text +render' bs = + T.cons '~' + . snd + . BS.foldr alg (0 :: Int, mempty) + $ padded + where + alg val (idx, acc) = + let syl = if even idx then suffix val else prefix val + glue + | idx `mod` 8 == 0 = if idx == 0 then mempty else "--" + | even idx = "-" + | otherwise = mempty + in (succ idx, syl <> glue <> acc) + + padded = + let len = BS.length bs + in if (odd len && len > 2) || len == 0 + then BS.cons 0 bs + else bs + +parse :: T.Text -> Either T.Text Patp +parse p = + fmap (Patp . snd) + $ foldrM alg (0 :: Int, mempty) syls + where + alg syl (idx, acc) = do + word <- if even idx then fromSuffix syl else fromPrefix syl + return (succ idx, BS.cons word acc) + + syls = + T.chunksOf 3 + . T.filter isAsciiLower + $ p prefixes :: V.Vector T.Text prefixes = V.fromList @@ -87,6 +99,16 @@ prefixes = V.fromList ,"lap","tal","pit","nam","bon","ros","ton","fod","pon","sov","noc","sor" ,"lav","mat","mip","fip"] +prefix :: Integral a => a -> T.Text +prefix = V.unsafeIndex prefixes . fromIntegral + +fromPrefix :: T.Text -> Either T.Text Word8 +fromPrefix syl = case V.findIndex (== syl) prefixes of + Nothing -> Left (msg syl) + Just x -> Right (fromIntegral x :: Word8) + where + msg s = "urbit-hob (fromPrefix): invalid prefix \"" <> s <> "\"" + suffixes :: V.Vector T.Text suffixes = V.fromList ["zod","nec","bud","wes","sev","per","sut","let","ful","pen","syt","dur" @@ -112,20 +134,13 @@ suffixes = V.fromList ,"lyr","tes","mud","nyt","byr","sen","weg","fyr","mur","tel","rep","teg" ,"pec","nel","nev","fes"] -rsh :: Integral a => a -> a -> a -> a -rsh a b c = c `div` 2 ^ (2 ^ a * b) - -met :: Integral a => a -> a -> a -met = loop 0 where - loop !acc a b - | b == 0 = acc - | otherwise = loop (succ acc) a (rsh a 1 b) - -internalErr :: String -> String -> a -internalErr fn msg = error $ mconcat - [ "urbit-hob (" - , fn - , "): internal error -- please report this as a bug!\n" - , msg - ] +suffix :: Integral a => a -> T.Text +suffix = V.unsafeIndex suffixes . fromIntegral + +fromSuffix :: T.Text -> Either T.Text Word8 +fromSuffix syl = case V.findIndex (== syl) suffixes of + Nothing -> Left (msg syl) + Just x -> Right (fromIntegral x :: Word8) + where + msg s = "urbit-hob (fromSuffix): invalid suffix \"" <> s <> "\"" diff --git a/lib/Urbit/Ob/Ob.hs b/lib/Urbit/Ob/Ob.hs @@ -14,8 +14,8 @@ module Urbit.Ob.Ob ( import Data.Bits import Data.Word (Word32) -import Urbit.Ob.Muk (muk) import Prelude hiding (tail) +import Urbit.Ob.Muk (muk) -- | Conceal structure v3. fein :: (Integral a, Bits a) => a -> a diff --git a/test/Co/Tests/Property.hs b/test/Co/Tests/Property.hs @@ -3,18 +3,23 @@ module Co.Tests.Property ( tests ) where -import Numeric.Natural +import qualified Data.Text as T +import Data.Word (Word32) +import Numeric.Natural (Natural) import Test.Hspec import Test.Hspec.Core.QuickCheck (modifyMaxSuccess) import Test.QuickCheck import qualified Urbit.Ob.Co as Co nats :: Gen Natural -nats = fmap fromIntegral (arbitrary `suchThat` (>= (0 :: Int))) +nats = fmap fromIntegral (arbitrary :: Gen Word32) patps :: Gen Co.Patp patps = fmap Co.patp nats +patpStrings :: Gen T.Text +patpStrings = fmap Co.render patps + tests :: Spec tests = do describe "fromPatp" $ @@ -27,3 +32,11 @@ tests = do it "inverts fromPatp" $ forAll patps $ \x -> Co.patp (Co.fromPatp x) == x + describe "render" $ + modifyMaxSuccess (const 1000) $ + it "inverts parse" $ + forAll patpStrings $ \x -> + case Co.parse x of + Left _ -> False + Right p -> Co.render p == x + diff --git a/test/Co/Tests/Unit.hs b/test/Co/Tests/Unit.hs @@ -29,7 +29,7 @@ tests = it "matches 128-bit reference values" $ do let big_128_01 = 0x00000000000000010000000000000000 - patp_128_01 = "~doznec--fipfes-fipfes-fipfes-fipfes" + patp_128_01 = "~doznec--dozzod-dozzod-dozzod-dozzod" Co.render (Co.patp big_128_01) `shouldBe` patp_128_01 let big_128_02 = 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF diff --git a/test/Ob.hs b/test/Ob.hs @@ -11,5 +11,5 @@ main = hspec $ do context "small input space" S.tests context "medium input space" M.tests - context "32-bit input space" P.tests + context "propert tests" P.tests context "unit tests" U.tests diff --git a/test/Ob/Tests/Property.hs b/test/Ob/Tests/Property.hs @@ -4,6 +4,7 @@ module Ob.Tests.Property ( ) where import Data.Word (Word32, Word64) +import Numeric.Natural (Natural) import Test.Hspec import Test.Hspec.Core.QuickCheck (modifyMaxSuccess) import Test.QuickCheck @@ -15,18 +16,24 @@ planets = arbitrary `suchThat` (> 0xFFFF) word64 :: Gen Word64 word64 = arbitrary +nat :: Gen Natural +nat = do + a <- fmap fromIntegral word64 + b <- fmap fromIntegral word64 + return (a * b) + tests :: Spec tests = do describe "fynd" $ modifyMaxSuccess (const 1000) $ it "inverts fein" $ - forAll word64 $ \x -> + forAll nat $ \x -> Ob.fynd (Ob.fein x) == x describe "fein" $ modifyMaxSuccess (const 1000) $ it "inverts fynd" $ - forAll word64 $ \x -> + forAll nat $ \x -> Ob.fein (Ob.fynd x) == x describe "feis" $ diff --git a/test/Ob/Tests/Unit.hs b/test/Ob/Tests/Unit.hs @@ -13,19 +13,21 @@ tests = do it "matches reference values" $ do Ob.fein 123456789 `shouldBe` (1897766331 :: Int) Ob.fein 15663360 `shouldBe` (1208402137 :: Int) + Ob.fein 0x10000000000000000 `shouldBe` (0x10000000000000000 :: Integer) describe "fynd" $ it "matches reference values" $ do Ob.fynd 1208402137 `shouldBe` (15663360 :: Int) Ob.fynd 1897766331 `shouldBe` (123456789 :: Int) + Ob.fynd 0x10000000000000000 `shouldBe` (0x10000000000000000 :: Integer) describe "feis" $ it "matches reference values" $ do - Ob.feis 123456789 `shouldBe` (2060458291 :: Word32) - Ob.feis 15663360 `shouldBe` (1195593620 :: Word32) + Ob.feis 123456789 `shouldBe` 2060458291 + Ob.feis 15663360 `shouldBe` 1195593620 describe "tail" $ it "matches reference values" $ do - Ob.tail 123456789 `shouldBe` (1107963580 :: Word32) - Ob.tail 1195593620 `shouldBe` (15663360 :: Word32) + Ob.tail 123456789 `shouldBe` 1107963580 + Ob.tail 1195593620 `shouldBe` 15663360 diff --git a/urbit-hob.cabal b/urbit-hob.cabal @@ -54,10 +54,13 @@ library , Urbit.Ob.Co , Urbit.Ob.Muk , Urbit.Ob.Ob + + other-modules: + Data.Serialize.Extended + build-depends: base >= 4.7 && < 6 , bytestring >= 0.10 && < 1 - , cereal >= 0.5 && < 1 , murmur3 >= 1.0 && < 2 , text >= 1.2 && < 2 , vector >= 0.12 && < 1 @@ -96,5 +99,6 @@ Test-suite co , hspec , hspec-core , QuickCheck + , text , urbit-hob