urbit-hob

Haskell utilities for phonemic base wrangling.
git clone git://git.jtobin.io/urbit-hob.git
Log | Files | Refs | README | LICENSE

Co.hs (9111B)


      1 {-# LANGUAGE DeriveGeneric #-}
      2 {-# LANGUAGE OverloadedStrings #-}
      3 
      4 -- |
      5 -- Module: Urbit.Ob.Co
      6 -- Copyright: (c) 2019 Jared Tobin
      7 -- License: MIT
      8 --
      9 -- Maintainer: Jared Tobin <jared@jtobin.io>
     10 -- Stability: unstable
     11 -- Portability: ghc
     12 --
     13 -- General functions for atom printing.
     14 --
     15 -- Roughly analogous to the +co arm in hoon.hoon.
     16 
     17 module Urbit.Ob.Co (
     18     Patp(..)
     19   , Patq(..)
     20 
     21   , patp
     22   , patq
     23 
     24   , fromPatp
     25   , fromPatq
     26 
     27   , renderPatp
     28   , renderPatq
     29 
     30   , parsePatp
     31   , parsePatq
     32   ) where
     33 
     34 import qualified Data.ByteString as BS
     35 import Data.Char (isAsciiLower)
     36 import Data.Foldable (foldrM)
     37 import qualified Data.Serialize.Extended as C
     38 import qualified Data.Text as T
     39 import qualified Data.Vector as V
     40 import Data.Word (Word8)
     41 import GHC.Generics (Generic)
     42 import Numeric.Natural (Natural)
     43 import Prelude hiding (log)
     44 import qualified Urbit.Ob.Ob as Ob (fein, fynd)
     45 
     46 -- | Hoon's \@p encoding.
     47 --
     48 --   This encoding is an /obfuscated/ representation of some underlying number,
     49 --   but a pronounceable, memorable, and unique one.
     50 --
     51 --   The representation exists for any natural number, but it's typically used
     52 --   only for naming Azimuth points, and thus normal 32-bit Urbit ships.
     53 --
     54 --   (It's also used for naming comets, i.e. self-signed 128-bit Urbit ships.)
     55 --
     56 newtype Patp = Patp {
     57     unPatp :: BS.ByteString
     58   } deriving (Eq, Ord, Generic)
     59 
     60 instance Show Patp where
     61   show = T.unpack . renderPatp
     62 
     63 -- | Hoon's \@q encoding.
     64 --
     65 --   Unlike \@p, the \@q encoding is a /non-obfuscated/ representation of an
     66 --   atom.
     67 --
     68 --   It's typically used for serializing arbitrary data in a memorable and
     69 --   pronounceable fashion.
     70 --
     71 newtype Patq = Patq {
     72     unPatq :: BS.ByteString
     73   } deriving (Eq, Ord, Generic)
     74 
     75 instance Show Patq where
     76   show = T.unpack . renderPatq
     77 
     78 -- | Convert a 'Natural' to \@p.
     79 --
     80 --   >>> patp 0
     81 --   ~zod
     82 --   >>> patp 256
     83 --   ~marzod
     84 --   >>> patp 65536
     85 --   ~dapnep-ronmyl
     86 --   >>> patp 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
     87 --   ~fipfes-fipfes-fipfes-fipfes--fipfes-fipfes-fipfes-fipfes
     88 --
     89 patp :: Natural -> Patp
     90 patp = Patp . BS.reverse . C.unroll . Ob.fein
     91 
     92 -- | Convert a 'Natural' to \@q.
     93 --
     94 --   >>> patq 0
     95 --   ~zod
     96 --   >>> patq 256
     97 --   ~marzod
     98 --   >>> patq 65536
     99 --   ~nec-dozzod
    100 --   >>> patp 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
    101 --   ~fipfes-fipfes-fipfes-fipfes-fipfes-fipfes-fipfes-fipfes
    102 --
    103 patq :: Natural -> Patq
    104 patq = Patq . BS.reverse . C.unroll
    105 
    106 -- | Convert a \@p value to its corresponding 'Natural'.
    107 --
    108 --   >>> let zod = patp 0
    109 --   >>> fromPatp zod
    110 --   0
    111 --
    112 fromPatp :: Patp -> Natural
    113 fromPatp = Ob.fynd . C.roll . BS.reverse . unPatp
    114 
    115 -- | Convert a \@q value to its corresponding 'Natural'.
    116 --
    117 --   >>> let zod = patq 0
    118 --   >>> fromPatq zod
    119 --   0
    120 --
    121 fromPatq :: Patq -> Natural
    122 fromPatq = C.roll . BS.reverse . unPatq
    123 
    124 -- | Render a \@p value as 'T.Text'.
    125 --
    126 --   >>> renderPatp (patp 0)
    127 --   "~zod"
    128 --   >>> renderPatp (patp 15663360)
    129 --   "~nidsut-tomdun"
    130 renderPatp :: Patp -> T.Text
    131 renderPatp (Patp bs) = render Padding LongSpacing bs
    132 
    133 -- | Render a \@p value as 'T.Text'.
    134 --
    135 --   >>> renderPatq (patq 0)
    136 --   "~zod"
    137 --   >>> renderPatq (patq 15663360)
    138 --   "~mun-marzod"
    139 renderPatq :: Patq -> T.Text
    140 renderPatq (Patq bs) = render NoPadding ShortSpacing bs
    141 
    142 -- | Parse a \@p value existing as 'T.Text'.
    143 --
    144 --   >>> parsePatp "~nidsut-tomdun"
    145 --   Right ~nidsut-tomdun
    146 --   > parsePatp "~fipfes-fipfes-fipfes-doznec"
    147 --   Right ~fipfes-fipfes-fipfes-doznec
    148 --
    149 parsePatp :: T.Text -> Either T.Text Patp
    150 parsePatp = fmap Patp . parse
    151 
    152 -- | Parse a \@q value existing as 'T.Text'.
    153 --
    154 --   >>> parsePatq "~nec-dozzod"
    155 --   Right ~nec-dozzod
    156 --   > parsePatq "~fipfes-fipfes-fipfes-doznec"
    157 --   Right ~fipfes-fipfes-fipfes-doznec
    158 --
    159 parsePatq :: T.Text -> Either T.Text Patq
    160 parsePatq = fmap Patq . parse
    161 
    162 -- Padding option for rendering.
    163 data Padding =
    164     Padding
    165   | NoPadding
    166   deriving Eq
    167 
    168 -- Spacing option for rendering.
    169 data Spacing =
    170     LongSpacing
    171   | ShortSpacing
    172   deriving Eq
    173 
    174 -- General-purpose renderer.
    175 render :: Padding -> Spacing -> BS.ByteString -> T.Text
    176 render padding spacing bs =
    177       T.cons '~'
    178     . snd
    179     . BS.foldr alg (0 :: Int, mempty)
    180     $ padded
    181   where
    182     alg val (idx, acc) =
    183       let syl = if even idx then suffix val else prefix val
    184           glue
    185             | idx `mod` 8 == 0 = if idx == 0 then mempty else dash
    186             | even idx         = "-"
    187             | otherwise        = mempty
    188       in  (succ idx, syl <> glue <> acc)
    189 
    190     padded
    191       | padCondition = BS.cons 0 bs
    192       | otherwise    = bs
    193 
    194     dash = case spacing of
    195       LongSpacing  -> "--"
    196       ShortSpacing -> "-"
    197 
    198     padCondition =
    199       let len = BS.length bs
    200       in  case padding of
    201             NoPadding -> len == 0
    202             Padding   -> (odd len && len > 2) || len == 0
    203 
    204 -- General-purpose (non-strict) parser.
    205 parse :: T.Text -> Either T.Text BS.ByteString
    206 parse p =
    207       fmap snd
    208     $ foldrM alg (0 :: Int, mempty) syls
    209   where
    210     alg syl (idx, acc) = do
    211       word <- if even idx then fromSuffix syl else fromPrefix syl
    212       return (succ idx, BS.cons word acc)
    213 
    214     syls =
    215         T.chunksOf 3
    216       . T.filter isAsciiLower
    217       $ p
    218 
    219 prefixes :: V.Vector T.Text
    220 prefixes = V.fromList
    221   ["doz","mar","bin","wan","sam","lit","sig","hid","fid","lis","sog","dir"
    222   ,"wac","sab","wis","sib","rig","sol","dop","mod","fog","lid","hop","dar"
    223   ,"dor","lor","hod","fol","rin","tog","sil","mir","hol","pas","lac","rov"
    224   ,"liv","dal","sat","lib","tab","han","tic","pid","tor","bol","fos","dot"
    225   ,"los","dil","for","pil","ram","tir","win","tad","bic","dif","roc","wid"
    226   ,"bis","das","mid","lop","ril","nar","dap","mol","san","loc","nov","sit"
    227   ,"nid","tip","sic","rop","wit","nat","pan","min","rit","pod","mot","tam"
    228   ,"tol","sav","pos","nap","nop","som","fin","fon","ban","mor","wor","sip"
    229   ,"ron","nor","bot","wic","soc","wat","dol","mag","pic","dav","bid","bal"
    230   ,"tim","tas","mal","lig","siv","tag","pad","sal","div","dac","tan","sid"
    231   ,"fab","tar","mon","ran","nis","wol","mis","pal","las","dis","map","rab"
    232   ,"tob","rol","lat","lon","nod","nav","fig","nom","nib","pag","sop","ral"
    233   ,"bil","had","doc","rid","moc","pac","rav","rip","fal","tod","til","tin"
    234   ,"hap","mic","fan","pat","tac","lab","mog","sim","son","pin","lom","ric"
    235   ,"tap","fir","has","bos","bat","poc","hac","tid","hav","sap","lin","dib"
    236   ,"hos","dab","bit","bar","rac","par","lod","dos","bor","toc","hil","mac"
    237   ,"tom","dig","fil","fas","mit","hob","har","mig","hin","rad","mas","hal"
    238   ,"rag","lag","fad","top","mop","hab","nil","nos","mil","fop","fam","dat"
    239   ,"nol","din","hat","nac","ris","fot","rib","hoc","nim","lar","fit","wal"
    240   ,"rap","sar","nal","mos","lan","don","dan","lad","dov","riv","bac","pol"
    241   ,"lap","tal","pit","nam","bon","ros","ton","fod","pon","sov","noc","sor"
    242   ,"lav","mat","mip","fip"]
    243 
    244 prefix :: Integral a => a -> T.Text
    245 prefix = V.unsafeIndex prefixes . fromIntegral
    246 
    247 fromPrefix :: T.Text -> Either T.Text Word8
    248 fromPrefix syl = case V.findIndex (== syl) prefixes of
    249     Nothing -> Left msg
    250     Just x  -> Right (fromIntegral x :: Word8)
    251   where
    252     msg = "(urbit-hob) bad parse: invalid prefix \"" <> syl <> "\""
    253 
    254 suffixes :: V.Vector T.Text
    255 suffixes = V.fromList
    256   ["zod","nec","bud","wes","sev","per","sut","let","ful","pen","syt","dur"
    257   ,"wep","ser","wyl","sun","ryp","syx","dyr","nup","heb","peg","lup","dep"
    258   ,"dys","put","lug","hec","ryt","tyv","syd","nex","lun","mep","lut","sep"
    259   ,"pes","del","sul","ped","tem","led","tul","met","wen","byn","hex","feb"
    260   ,"pyl","dul","het","mev","rut","tyl","wyd","tep","bes","dex","sef","wyc"
    261   ,"bur","der","nep","pur","rys","reb","den","nut","sub","pet","rul","syn"
    262   ,"reg","tyd","sup","sem","wyn","rec","meg","net","sec","mul","nym","tev"
    263   ,"web","sum","mut","nyx","rex","teb","fus","hep","ben","mus","wyx","sym"
    264   ,"sel","ruc","dec","wex","syr","wet","dyl","myn","mes","det","bet","bel"
    265   ,"tux","tug","myr","pel","syp","ter","meb","set","dut","deg","tex","sur"
    266   ,"fel","tud","nux","rux","ren","wyt","nub","med","lyt","dus","neb","rum"
    267   ,"tyn","seg","lyx","pun","res","red","fun","rev","ref","mec","ted","rus"
    268   ,"bex","leb","dux","ryn","num","pyx","ryg","ryx","fep","tyr","tus","tyc"
    269   ,"leg","nem","fer","mer","ten","lus","nus","syl","tec","mex","pub","rym"
    270   ,"tuc","fyl","lep","deb","ber","mug","hut","tun","byl","sud","pem","dev"
    271   ,"lur","def","bus","bep","run","mel","pex","dyt","byt","typ","lev","myl"
    272   ,"wed","duc","fur","fex","nul","luc","len","ner","lex","rup","ned","lec"
    273   ,"ryd","lyd","fen","wel","nyd","hus","rel","rud","nes","hes","fet","des"
    274   ,"ret","dun","ler","nyr","seb","hul","ryl","lud","rem","lys","fyn","wer"
    275   ,"ryc","sug","nys","nyl","lyn","dyn","dem","lux","fed","sed","bec","mun"
    276   ,"lyr","tes","mud","nyt","byr","sen","weg","fyr","mur","tel","rep","teg"
    277   ,"pec","nel","nev","fes"]
    278 
    279 suffix :: Integral a => a -> T.Text
    280 suffix = V.unsafeIndex suffixes . fromIntegral
    281 
    282 fromSuffix :: T.Text -> Either T.Text Word8
    283 fromSuffix syl = case V.findIndex (== syl) suffixes of
    284     Nothing -> Left msg
    285     Just x  -> Right (fromIntegral x :: Word8)
    286   where
    287     msg = "(urbit-hob) bad parse: invalid suffix \"" <> syl <> "\""
    288