[Initial addition Dec 11 2007 jupdike@gmail.com**20071211222454] { addfile ./ANSI.hs hunk ./ANSI.hs 1 +-- Partially taken from Hugs AnsiScreen.hs library: +module ANSI + ( highlightOn + , highlightOff + , highlight + , cleareol, clearbol, clearline, clearDown, clearUp, cls + , goto + , cursorUp, cursorDown, cursorLeft, cursorRight + , savePosition, restorePosition + , Highlight(..) + , Colour(..) + , colourCycle + , enableScrollRegion, scrollUp, scrollDown + , lineWrap + ) where + +import List (intersperse,isPrefixOf) +import Char (isDigit) +import ColourHighlight + + +-- Basic screen control codes: + +type Pos = (Int,Int) + +at :: Pos -> String -> String +goto :: Int -> Int -> String +home :: String +cls :: String + +at (x,y) s = goto x y ++ s +goto x y = '\ESC':'[':(show y ++(';':show x ++ "H")) +home = goto 1 1 + +cursorUp = "\ESC[A" +cursorDown = "\ESC[B" +cursorRight = "\ESC[C" +cursorLeft = "\ESC[D" + +cleareol = "\ESC[K" +clearbol = "\ESC[1K" +clearline = "\ESC[2K" +clearDown = "\ESC[J" +clearUp = "\ESC[1J" +-- Choose whichever of the following lines is suitable for your system: +cls = "\ESC[2J" -- for PC with ANSI.SYS +--cls = "\^L" -- for Sun window + +savePosition = "\ESC7" +restorePosition = "\ESC8" + + +-- data Colour -- imported from ColourHighlight +-- data Highlight -- imported from ColourHighlight + +instance Enum Highlight where + fromEnum Normal = 0 + fromEnum Bold = 1 + fromEnum Dim = 2 + fromEnum Underscore = 4 + fromEnum Blink = 5 + fromEnum ReverseVideo = 7 + fromEnum Concealed = 8 + fromEnum (Foreground c) = 30 + fromEnum c + fromEnum (Background c) = 40 + fromEnum c + +highlight :: [Highlight] -> String -> String +highlight attrs s = highlightOn attrs ++ s ++ highlightOff + +highlightOn [] = highlightOn [Normal] +highlightOn attrs = "\ESC[" + ++ concat (intersperse ";" (map (show.fromEnum) attrs)) + ++"m" +highlightOff = "\ESC[0m" + + +-- An infinite supply of colours. +colourCycle :: [Colour] +colourCycle = cycle [Red,Blue,Magenta,Green,Cyan] + + +-- Scrolling +enableScrollRegion :: Int -> Int -> String +enableScrollRegion start end = "\ESC["++show start++';':show end++"r" + +scrollDown = "\ESCD" +scrollUp = "\ESCM" + +-- Line-wrapping mode +lineWrap True = "\ESC[7h" +lineWrap False = "\ESC[7l" + addfile ./ColourHighlight.hs hunk ./ColourHighlight.hs 1 +module ColourHighlight + ( Colour(..) + , Highlight(..) + ) where + +data Colour = Black | Red | Green | Yellow | Blue | Magenta | Cyan | White + deriving (Eq,Show,Read,Enum) + +data Highlight = + Normal + | Bold + | Dim + | Underscore + | Blink + | ReverseVideo + | Concealed + | Foreground Colour + | Background Colour + | Note String + deriving (Eq,Show,Read) + addfile ./Colourise.hs hunk ./Colourise.hs 1 +module Colourise + ( module ColourHighlight + , ColourPrefs(..) + , readColourPrefs + , colourise + ) where + +import ColourHighlight +import System (getEnv) +import Char +import List + +--myhead :: [a] -> a +--myhead [] = error "my head, emptylist" +--myhead (x:xs) = x + +data ColourPrefs = ColourPrefs + { keyword, keyglyph, layout, comment + , conid, varid, conop, varop + , string, char, number, variddecl + , selection, variantselection :: [Highlight] + } deriving (Eq,Show,Read) + +defaultColourPrefs = ColourPrefs + { keyword = [Foreground Green,Underscore] + , keyglyph = [Foreground Red] + , layout = [Foreground Cyan] + , comment = [Foreground Blue] + , conid = [Normal] + , varid = [Normal] + , variddecl= [Normal] + , conop = [Foreground Red,Bold] + , varop = [Foreground Cyan] + , string = [Foreground Magenta] + , char = [Foreground Magenta] + , number = [Foreground Magenta] + , selection = [Bold, Foreground Magenta] + , variantselection = [Dim, Foreground Red, Underscore] + } + +readColourPrefs :: IO ColourPrefs +readColourPrefs = catch + (do val <- readFile ".hscolour" + return (read val)) + (\e-> catch + (do home <- getEnv "HOME" + val <- readFile (home++"/.hscolour") + return (read val)) + (\e-> return defaultColourPrefs)) + +colourise :: ColourPrefs -> String -> [(String,[Highlight])] +colourise pref str = map (\s->(s,classify pref s)) list + where list = filter (/=[]) $ glue $ tokenise $ str + +-- Lex Haskell source code into a token stream +tokenise :: String -> [String] +tokenise [] = [] +tokenise ('-':'-':s) = ("--"++comment):tokenise rest where (comment,rest) = span (/='\n') s +tokenise (c:s) | isSpace c + = (c:ss): tokenise rest where (ss,rest) = span isSpace s +tokenise s | length lexs > 0 = let (tok,rest) = head lexs in tok: tokenise rest + | otherwise = error $ "Problem parsing: " ++ show s + where lexs = (Prelude.lex s) + +-- what can an identifier token start with? +small = "abcdefghijklmnopqrstuvwxyz_" -- ++ unicodeSmall + +-- Glue sequences of tokens into more useful blobs +--glue (q:".":n:rest) | Char.isUpper (head q) -- qualified names +-- = glue ((q++"."++n): rest) + + +-- identifier at beginning of line (i.e. top-level declaration) +glue (wsNl:s:ss) | last wsNl == '\n' && head s `elem` small && s `notElem` keywords + = --error $ "Found something:"++wsNl++s + init wsNl : ('\n':s) : glue ss +glue ("`":rest) = -- `varid` -> varop + case glue rest of + (qn:"`":rest) -> ("`"++qn++"`"): glue rest + _ -> ("`": rest) +glue (s:ss) | all (=='-') s && length s >=2 -- eol comment + = (s++concat c): glue rest + where (c,rest) = break ('\n'`elem`) ss +glue ("{":"-":ss) = ("{-"++c): glue rest -- nested comment + where (c,rest) = nestcomment 0 ss +glue (s:ss) = s: glue ss +glue [] = [] + +nestcomment :: Int -> [String] -> (String,[String]) +nestcomment n ("{":"-":ss) | n>=0 = (("{-"++cs),rm) + where (cs,rm) = nestcomment (n+1) ss +nestcomment n ("-":"}":ss) | n>0 = (("-}"++cs),rm) + where (cs,rm) = nestcomment (n-1) ss +nestcomment n ("-":"}":ss) | n==0 = ("-}",ss) +nestcomment n (s:ss) | n>=0 = ((s++cs),rm) + where (cs,rm) = nestcomment n ss +nestcomment n [] = error "no closing comment -}" + +{- +data TokenType = + KeyWord | KeyGlyph | Layout | Comment | ConId | VarId | ConOp | VarOp | + String | Char | Number | Error + deriving (Eq) +-} + +-- Classify tokens +classify :: ColourPrefs -> String -> [Highlight] +classify pref s@(h:_) + | h=='\n' && + (length s > 1) && + ((head (tail s)) `elem` small) = variddecl pref + | isSpace h = [Normal] + | all (=='-') s = comment pref + | "--" `isPrefixOf` s + && any isSpace s = comment pref -- not fully correct + | "{-" `isPrefixOf` s = comment pref + | s `elem` keywords = keyword pref + | s `elem` keyglyphs = keyglyph pref + | s `elem` layoutchars = layout pref + | isUpper h = conid pref + | isLower h = varid pref + | h `elem` symbols = varop pref + | h==':' = conop pref + | h=='`' = varop pref + | h=='"' = string pref + | h=='\'' = char pref + | isDigit h = number pref + | otherwise = selection pref +classify _ [] = [] + +-- Haskell keywords +keywords = + ["case","class","data","default","deriving","do","else" + ,"if","import","in","infix","infixl","infixr","instance","let","module" + ,"newtype","of","then","type","where","_","foreign","ccall","as"] +keyglyphs = + ["..","::","=","\\","|","<-","->","@","~","=>"] +layoutchars = + map (:[]) ";{}[]()," +symbols = + "!#$%&*+./<=>?@\\^|-~" addfile ./Doc.lhs hunk ./Doc.lhs 1 +%ignore + +> {-# OPTIONS_GHC -fglasgow-exts #-} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +> module Doc where +> import Data.Generics +> import ListToTree + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%endignore + +==== Document and related types + +=== Basic special markup and types for structured documents + +== Basic types + +A document is a list of top-level elements (for example, paragraphs, +lists, etc.): + +> type Document = [TopElem] + +A Top-level element is a list of simple elements (for example, plain +text, bolded text, italic text, linked text or images, entries in an +unordered list, etc.) + +> type Elems = [Elem] +> data TopElem = Par Elems +> -- | Pre Elems +> | Div DivInfo [Elems] +> -- | Tbl [[Elems]] +> | RawBeg Char Int Elems -- raw beginner-style paragraph, converted to a SpecPar +> | SpecPar Special [Elems] -- special paragraph type +> -- special possibly tree-shaped paragraph type (outlines). top level is a list with empy node +> | TreePar Special (Tree Elems) +> deriving (Show, Eq, Data, Typeable) + +Where ""Tree a"" is imported from ""ListToTree"" module: + +< data Tree a = Tree a [Tree a] +< deriving (Show, Eq, Data, Typeable) + +> data Special = Head1 | Head2 | Head3 | Head4 -- Heading types +> | Code -- code with > or < signs on the left (removed by the parser--only unlit cares which) +> | Pre -- Quoted literal text, with a " at the far left +> | Comment -- MML internal comment and/or directives +> | BList -- bulleted list +> | NList -- numbered list +> | SList -- star list -- in this case, like poetry with bold +> | Poetry -- outline/list with no bullets +> | BQuote -- block quote, indented paragraph +> deriving (Show, Eq, Data, Typeable) + +Divs let you create subdocuments, for example to place text in +floating figures or multiple columns, or special centered text, etc. + +> data DivInfo = Custom String +> deriving (Show, Eq, Data, Typeable) + +An atomic element is either simple text, an entity (explained below), +a link, marked-up text (italic or bold, which may contain any number +of atomic elements) or an image element. + +> type Entity = String -- "ntilde", "ndash", "nbsp" for example + +> data Elem = Txt String +> | Ent Entity +> | TT Elems +> | Ital Elems +> | Bold Elems +> | VarInt Elems +> | UrlElem Url +> | RawLnk Elems -- to be processed into a Lnk or Img +> | Lnk Url Elems +> | Img Url (Maybe Url) (Maybe AltTxt) +> | Fnote Integer Elems -- footnote +> deriving (Show, Eq, Data, Typeable) + +== Urls, Links, Images + +A Url could be just a String but it could also be something more +involved, for example: + +> data Url = Url ProtocolStr UrlStr +> -- | Intern Integer String +> deriving (Show, Eq, Data, Typeable) + +Alternate Text, Protocol Strings, Url Strings are just strings, but +type synonyms are a good thing. + +> type AltTxt = String +> type ProtocolStr = String +> type UrlStr = String + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +Clean up a paragraph by stripping leading and trailing whitespace off +of Txt elems, but only in certain situations (e.g. ""[[ http://abc ]]"") + +> stripWs, stripStart, stripEnd :: Elems -> Elems +> stripWs = filter (/= Txt "") . stripStart +> stripStart [] = [] +> stripStart (Txt t1:es) = (Txt (stripLeft t1)) : stripEnd es +> stripStart (e:es) = e : (stripEnd es) +> stripEnd [] = [] +> stripEnd (Txt t:[]) = Txt (stripRight t) : [] +> stripEnd (e:[]) = e : [] +> stripEnd (e:es) = e : stripEnd es + +> strip, stripRight, stripLeft :: String -> String + +> stripRight s = reverse (stripLeft (reverse s)) -- I hope lazy evaluation makes this faster than +> -- intuition makes it seem... though it is probably +> -- 2n instead of n, for n == length s +> stripLeft = dropWhile (`elem` " \t") +> strip = stripRight . stripLeft -- order matters here! addfile ./Doc2Html.lhs hunk ./Doc2Html.lhs 1 +> module Doc2Html where +> import Monad ( join ) +> import List ( intersperse ) +> import HsColour + +> import HTMLEnt + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +My modules + +> import ListToTree +> import Doc +> import GroupSquash ( groupSquash ) + +> import XHTML + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + > elems2string ps = concat (intersperse "\n" (map htmlEntityEncode (extractStrings ps))) + +> elems2string ps = concat (intersperse "\n" (extractStrings ps)) + +> doc2xml :: Document -> [XML] +> doc2xml d = map topsToXML d + +> topsToXML :: TopElem -> XML +> topsToXML (Par es) = pp $ elemsToXMLs es +> topsToXML (RawBeg c i es) = error$ "Unsupported special paragraph: "++show c +> topsToXML (SpecPar Comment ps) = comment $ extractStrings ps +> topsToXML (SpecPar Pre ps) = "pre" `around` [txt $ htmlEntityEncode $ elems2string ps] +> topsToXML (SpecPar Code ps) = elm "pre" ["class"-->"code"] [txt $ tail $ cssColourise $ '\n':elems2string ps] + +> topsToXML (SpecPar Head4 ps) = h1 $ concat $ intersperse [br] (map elemsToXMLs ps) +> topsToXML (SpecPar Head3 ps) = h2 $ concat $ intersperse [br] (map elemsToXMLs ps) +> topsToXML (SpecPar Head2 ps) = h3 $ concat $ intersperse [br] (map elemsToXMLs ps) +> topsToXML (SpecPar Head1 ps) = h4 $ concat $ intersperse [br] (map elemsToXMLs ps) +> topsToXML (SpecPar BQuote ps) = ulNone $ map (li . elemsToXMLs) ps + +> topsToXML (TreePar special (Tree _ treelist)) = head $ subtreeToXML 0 special treelist +> topsToXML x = error$ "Some unknown TopElem: " ++ show x ++"\n\n" + +> subtreeToXML _ _ [] = [] +> subtreeToXML l special@NList treelist = [ol l $ map (li . (treeToXMLs l special)) treelist] +> subtreeToXML l special@BList treelist = [ul $ map (li . (treeToXMLs l special)) treelist] +> subtreeToXML l special@SList treelist = [ulNone $ map (li . (singleton . bb) . (treeToXMLs l special)) treelist] +> subtreeToXML l special@Poetry treelist = [ulNone $ map (li . (treeToXMLs l special)) treelist] + +> treeToXMLs l special (Tree a treelist) = elemsToXMLs a ++ subtreeToXML (l+1) special treelist + +> singleton x = [x] +> li = around "li" +> olLoop = [ around "ol" +> , elm "ol" ["style"-->"list-style-type: upper-alpha;"] +> , elm "ol" ["style"-->"list-style-type: upper-roman;"] +> , elm "ol" ["style"-->"list-style-type: lower-alpha;"] +> , elm "ol" ["style"-->"list-style-type: lower-roman;"] ] +> ol l = olLoop !! (l `mod` (length olLoop)) +> ul = around "ul" +> ulNone = elm "ul" ["style"-->"list-style-type: none;"] + +> extractStrings ps = map (getStrings "") ps +> where getStrings acc [] = acc +> getStrings acc ((Txt s):rest) = getStrings (acc++s) rest +> getStrings acc (_:rest) = getStrings acc rest + + > xflatnl :: Elems -> [XML] + > xflatnl es = [txt "nothing"] + +> elemsToXMLs :: Elems -> [XML] +> elemsToXMLs = map elemToXML + +> elemToXML (Txt t) = txt $ t -- htmlEntityEncode t +> elemToXML (Ent x) = case length x of +> 1 -> txt $ htmlEntityEncode x +> _ -> txt $ "&"++x++";" -- single chars are self +> elemToXML (Ital es) = ii $ elemsToXMLs es +> elemToXML (Bold es) = bb $ elemsToXMLs es + +> -- TODO Variable Interpolation: this should generate an error OR an +> -- empty string for normal use because it should have been replaced +> -- by now. Maybe literally printing it is a good thing because then +> -- the author knows where variables failed to be expanded ... but +> -- this is not where a lookup occurs, so this point should never +> -- really be reached. + +> elemToXML (VarInt ((Txt keyname):_)) = hole $ keyname +> elemToXML (VarInt es) = tt $ [txt "<<"] ++ elemsToXMLs es ++ [txt ">>"] + + > elemToXML (VarInt es) | isNothing val = tt $ [txt "<<"] ++ key ++ [txt ">>"] + > | otherwise = val + > where xml = lookup key env + > key = elemsToXMLs es + +> -- TODO This gets flattened, but it should either do something different or cause an error, right? +> elemToXML (RawLnk es) = tt $ elemsToXMLs es + +> elemToXML (TT es) = tt $ elemsToXMLs es +> elemToXML (Fnote n es) = lnk showN [sup $ [txt showN]] where showN = show n + +Deal with the different shaped UrlElem, Img, and Lnk elements and do +the best thing possible with the given amount of information. + +> elemToXML (UrlElem url) = let (Url p u) = url +> pu = p++u in aa pu pu +> elemToXML (Img (Url _ i) targ a) = +> let altt = case a of Nothing -> [] +> Just s -> ["alt"-->s] in +> case targ of Nothing -> imgWith $ altt++["src"-->i] +> Just (Url p u) -> lnk pu [imgWith $ altt ++ ["src"-->i]] +> where pu = p++u + +> -- TODO use strip on elemsToXMLs es +> elemToXML (Lnk (Url p u) es) = lnk pu $ elemsToXMLs es where pu = p++u + addfile ./GroupSquash.lhs hunk ./GroupSquash.lhs 1 +> module GroupSquash ( groupSquash ) where +> import List ( groupBy ) + +Group-and-squasher utility function: + +> groupSquash :: (a->a->Bool) -> ([a]->[a]) -> [a] -> [a] +> groupSquash eq squasher items = +> foldr1 (++) $ map squasher $ groupBy eq items + +Example usage of this sweet utility function: + +< let squasher xs = case head xs of +< 's' -> "$" +< 'p' -> map (const 'q') xs +< _ -> xs +< in +< groupSquash (==) squasher "Mississippi" + +" ==> Mi$i$iqqi addfile ./HTML.hs hunk ./HTML.hs 1 +module HTML where + +import XHTML + +type TitleStr = String +type StylesheetStr = String +type DTDStr = String + +dtdXHTML = "\n\n" + +--dtdXHTML = "" + +--dtdXHTML = "\n" + +type OtherHeads = [XML] + +pageString :: DTDStr -> TitleStr -> StylesheetStr -> OtherHeads -> XML -> String +pageString d t s other body = + let title = "title" `around` [txt t] + style = "link" `with` ["href"-->s, "rel"-->"stylesheet", "media"-->"screen", + "type"-->"text/css"] + head = "head" `around` ([title, style]++other) + html = elm "html" ["xmlns"-->"http://www.w3.org/1999/xhtml", "xml:lang"-->"en", "lang"-->"en"] [head, body] --"html" `around` [head, body] + in + d ++ showXHTML 100 html + +xhtmlPage title stylesheet otherheads body = cleanup $ pageString dtdXHTML title stylesheet otherheads body + +xmlToStr xml = cleanup $ showXHTMLs 100 xml + +cleanup = unlines . map (unindentclosingtag 2) . lines + +-- was ... . filter nonblank . lines + +nonblank [] = False +nonblank ('\r':rest) = nonblank rest +nonblank ('\n':rest) = nonblank rest +nonblank (' ':rest) = nonblank rest +nonblank _ = True + +unindentclosingtag n line = + if isonlyclosingtag line + then removeindent n line + else line + +isonlyclosingtag (' ':xs) = isonlyclosingtag xs +isonlyclosingtag x = ioct2 x + +ioct2 ('<':xs) = ioct3 xs +ioct2 _ = False + +ioct3 ('/':xs) = True +ioct3 _ = False + +removeindent 0 x = x +removeindent n (' ':xs) = removeindent (n-1) xs +removeindent n x = x addfile ./HTMLEnt.hs hunk ./HTMLEnt.hs 1 +module HTMLEnt where + +htmlEntityEncode :: String -> String +htmlEntityEncode s = foldr (++) [] (map htmlEntChar s) +htmlEntChar :: Char -> String +htmlEntChar '<' = "<" +htmlEntChar '>' = ">" +htmlEntChar '&' = "&" +htmlEntChar c = [c] + addfile ./HsColour.hs hunk ./HsColour.hs 1 +module HsColour ( cssColourise ) where + +import ANSI +import Colourise +--import System + +{- +import IO (hFlush,stdout) + +main = do + p <- System.getProgName + a <- System.getArgs + pref <- readColourPrefs + case a of + [] -> help p + ["-h"] -> help p + ["-help"] -> help p + ["-tty"] -> Prelude.interact (tty pref) + ["-html"] -> Prelude.interact (html pref) + ["-css"] -> Prelude.interact css + [a] -> do readFile a >>= putStr . tty pref + ["-tty",a] -> do readFile a >>= putStr . tty pref + ["-html",a] -> do readFile a >>= putStr . html pref + ["-css",a] -> do readFile a >>= putStr . css + _ -> help p + hFlush stdout + where + tty pref = concat . map renderTTY . colourise pref + html pref = ("
"++) . (++"
") + . concat . map renderHTML . colourise pref + help p = error ("Usage: "++p++" [-tty|-html|-css] [file.hs]") +-} + +-- cssColourise = {- (cssPrefix++) . (++cssSuffix) . -} + +cssColourise str = concatMap renderCSS $ colourise cssPref str + +renderTTY :: (String,[Highlight]) -> String +renderTTY (s,h) = highlight h s + +renderHTML :: (String,[Highlight]) -> String +renderHTML (s,h) = fontify h (escape s) + +-- Html stuff +fontify [] s = s +fontify (h:hs) s = font h (fontify hs s) + +font Normal s = s +font Bold s = ""++s++"" +font Dim s = ""++s++"" +font Underscore s = ""++s++"" +font Blink s = ""++s++"" +font ReverseVideo s = s +font Concealed s = s +font (Foreground c) s = ""++s++"" +font (Background c) s = ""++s++"" + +escape ('<':cs) = "<"++escape cs +escape ('>':cs) = ">"++escape cs +escape ('&':cs) = "&"++escape cs +escape (c:cs) = c: escape cs +escape [] = [] + +-- CSS stuff + +cssPref = ColourPrefs + { keyword = [Note "keyword"] + , keyglyph = [Note "keyglyph"] + , layout = [Note "layout"] + , comment = [Note "comment"] + , conid = [Note "conid"] + , varid = [Note "varid"] + , variddecl= [Note "variddecl"] + , conop = [Note "conop"] + , varop = [Note "varop"] + , string = [Note "str"] + , char = [Note "chr"] + , number = [Note "num"] + , selection = [Note "sel"] + , variantselection = [Note "varsel"] + } + +renderCSS :: (String,[Highlight]) -> String +renderCSS ('\n':text, [Note cls]) = "\n" ++ escape text ++ "" --move initial newlines out of span +renderCSS (text,[Note cls]) = "" ++ escape text ++ "" +renderCSS (text,[Normal]) = escape text + + +cssPrefix = "
"
+cssSuffix = "
" addfile ./ListToTree.lhs hunk ./ListToTree.lhs 1 +==== List To Tree module + +% ignore + +> {-# OPTIONS_GHC -fglasgow-exts #-} + +> module ListToTree +> ( Tree ( Tree ) +> , pairListToTreeList +> , EmptyNode +> , pairListToTree +> ) +> where +> import Data.Generics + +% end ignore + +There was a thread on haskell-cafe about doing this exact task. The +code I wrote is embarassingly complicated compared to their simple +answer. Thanks to Cale G. and Chris K. + +> data Tree a = Tree a [Tree a] deriving (Show, Eq, Data, Typeable) + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +Actual exported function + +> pairListToTreeList :: [(Int, a)] -> [Tree a] +> pairListToTreeList [] = [] +> pairListToTreeList ((n,a):aa) = +> let (kids, sibs) = span (\x-> fst x > n) aa +> in (Tree a (pairListToTreeList kids)) : pairListToTreeList sibs + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +Or straight to a tree, if an empty node for that tree type has +meaning. + +> class EmptyNode a where +> empty :: a +> instance EmptyNode [a] where +> empty = [] +> pairListToTree :: EmptyNode a => [(Int, a)] -> Tree a +> pairListToTree list = Tree empty (pairListToTreeList list) + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +Example test code + +> test = do print $ pairListToTree list + +> list :: [(Int, String)] +> list = [(1, "topA"), +> (2, "secondC"), +> (2, "secondD"), +> (3, "thirdG"), +> (3, "thirdH"), +> (2, "secondE"), +> (1, "topB"), +> (2, "secondF")] + +yields + +< Tree "" [Tree "topA" [Tree "secondC" [], +< Tree "secondD" +< [Tree "thirdG" [], +< Tree "thirdH" []], +< Tree "secondE" []] +< , Tree "topB" [Tree "secondF" []]] addfile ./MD5.lhs hunk ./MD5.lhs 1 + +> module MD5 (md5) where + +> import Char +> import Bits +> import Word + +> type ABCD = (Word32, Word32, Word32, Word32) +> type XYZ = (Word32, Word32, Word32) +> type Rotation = Int + +MD5 test suite: +MD5 ("") = d41d8cd98f00b204e9800998ecf8427e +MD5 ("a") = 0cc175b9c0f1b6a831c399e269772661 +MD5 ("abc") = 900150983cd24fb0d6963f7d28e17f72 +MD5 ("message digest") = f96b697d7cb7938d525a2f31aaf161d0 +MD5 ("abcdefghijklmnopqrstuvwxyz") = c3fcd3d76192e4007dfb496cca67e13b +MD5 ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789") = +d174ab98d277d9f5a5611c2c9f419d9f +MD5 ("123456789012345678901234567890123456789012345678901234567890123456 +78901234567890") = 57edf4a22be3c955ac49da2e2107b67a + +md5test :: IO() +md5test = foldr (\x y -> putStr (md5 x) >> putStr "\n" >> y) (putStr "") ["", "a", "abc", "message digest", "abcdefghijklmnopqrstuvwxyz", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", "12345678901234567890123456789012345678901234567890123456789012345678901234567890"] + +MD5> md5test +d41d8cd98f00b204e9800998ecf8427e +0cc175b9c0f1b6a831c399e269772661 +900150983cd24fb0d6963f7d28e17f72 +f96b697d7cb7938d525a2f31aaf161d0 +c3fcd3d76192e4007dfb496cca67e13b +d174ab98d277d9f5a5611c2c9f419d9f +57edf4a22be3c955ac49da2e2107b67a + +MD5> + +> md5 :: String -> String +> md5 s = s5 +> where s1_2 = md5_step_1_2_pad_length s +> abcd = md5_step_3_init +> abcd' = md5_step_4_main abcd s1_2 +> s5 = md5_step_5_display abcd' + +> md5_step_1_2_pad_length :: String -> String +> md5_step_1_2_pad_length s = md5_step_1_2_work 0 s + +> md5_step_1_2_work :: Integer -> String -> String +> md5_step_1_2_work c64 "" = padding ++ len +> where padding = '\128':replicate (fromInteger (((448 - (c64 + 1)) `mod` 512) `div` 8)) '\000' +> len = map chr $ size_split 8 (fromInteger c64) +> md5_step_1_2_work c64 (c:cs) = c:md5_step_1_2_work c64' cs +> where c64' = (c64 + 8) `mod` (2^64) + +> size_split :: Int -> Word32 -> [Int] +> size_split 0 _ = [] +> size_split p n = (fromIntegral d):size_split (p-1) n' +> where (n', d) = divMod n 256 + +> md5_step_3_init :: ABCD +> md5_step_3_init = (0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476) + +> md5_step_4_main :: ABCD -> String -> ABCD +> md5_step_4_main abcd "" = abcd +> md5_step_4_main (a, b, c, d) s = md5_step_4_main abcd4 s' +> where (s64, s') = takeDrop 64 s +> (r1, r2, r3, r4) = rounds +> abcd0 = (a, b, c, d) +> abcd1 = md5_step_4_round md5_step_4_f abcd0 s16_0 r1 +> abcd2 = md5_step_4_round md5_step_4_g abcd1 s16_1 r2 +> abcd3 = md5_step_4_round md5_step_4_h abcd2 s16_2 r3 +> (a', b', c', d') = md5_step_4_round md5_step_4_i abcd3 s16_3 r4 +> abcd4 = (a + a', b + b', c + c', d + d') +> s16_0 = get_word_32s s64 +> s16_1 = map (\x -> s16_0 !! x) [(5 * x + 1) `mod` 16 | x <- [0..15]] +> s16_2 = map (\x -> s16_0 !! x) [(3 * x + 5) `mod` 16 | x <- [0..15]] +> s16_3 = map (\x -> s16_0 !! x) [(7 * x) `mod` 16 | x <- [0..15]] + +> get_word_32s :: String -> [Word32] +> get_word_32s "" = [] +> get_word_32s ss = this:rest +> where (s, ss') = takeDrop 4 ss +> this = sum $ zipWith (*) (map (fromIntegral.ord) s) +> [256^x | x <- [0..3] :: [Int]] +> rest = get_word_32s ss' + +> md5_step_4_round :: (XYZ -> Word32) -> ABCD -> [Word32] +> -> [(Rotation, Word32)] -> ABCD +> md5_step_4_round f (a, b, c, d) s ns = foldl (doit f) (a, b, c, d) ns' +> where ns' = zipWith (\x (y, z) -> (x,y,z)) s ns + +> doit :: (XYZ -> Word32) -> ABCD -> (Word32, Rotation, Word32) -> ABCD +> doit f (a, b, c, d) (k, s, i) = (d, a', b, c) +> where mid_a = a + f(b,c,d) + k + i +> rot_a = rotL mid_a s +> a' = b + rot_a + +> md5_step_4_f :: XYZ -> Word32 +> md5_step_4_f (x, y, z) = (x .&. y) .|. ((complement x) .&. z) + +> md5_step_4_g :: XYZ -> Word32 +> md5_step_4_g (x, y, z) = (x .&. z) .|. (y .&. (complement z)) + +> md5_step_4_h :: XYZ -> Word32 +> md5_step_4_h (x, y, z) = x `xor` y `xor` z + +> md5_step_4_i :: XYZ -> Word32 +> md5_step_4_i (x, y, z) = y `xor` (x .|. (complement z)) + +> rounds :: ([(Rotation, Word32)], +> [(Rotation, Word32)], +> [(Rotation, Word32)], +> [(Rotation, Word32)]) +> rounds = (r1, r2, r3, r4) +> where r1 = [(s11, 0xd76aa478), (s12, 0xe8c7b756), (s13, 0x242070db), +> (s14, 0xc1bdceee), (s11, 0xf57c0faf), (s12, 0x4787c62a), +> (s13, 0xa8304613), (s14, 0xfd469501), (s11, 0x698098d8), +> (s12, 0x8b44f7af), (s13, 0xffff5bb1), (s14, 0x895cd7be), +> (s11, 0x6b901122), (s12, 0xfd987193), (s13, 0xa679438e), +> (s14, 0x49b40821)] +> r2 = [(s21, 0xf61e2562), (s22, 0xc040b340), (s23, 0x265e5a51), +> (s24, 0xe9b6c7aa), (s21, 0xd62f105d), (s22, 0x2441453), +> (s23, 0xd8a1e681), (s24, 0xe7d3fbc8), (s21, 0x21e1cde6), +> (s22, 0xc33707d6), (s23, 0xf4d50d87), (s24, 0x455a14ed), +> (s21, 0xa9e3e905), (s22, 0xfcefa3f8), (s23, 0x676f02d9), +> (s24, 0x8d2a4c8a)] +> r3 = [(s31, 0xfffa3942), (s32, 0x8771f681), (s33, 0x6d9d6122), +> (s34, 0xfde5380c), (s31, 0xa4beea44), (s32, 0x4bdecfa9), +> (s33, 0xf6bb4b60), (s34, 0xbebfbc70), (s31, 0x289b7ec6), +> (s32, 0xeaa127fa), (s33, 0xd4ef3085), (s34, 0x4881d05), +> (s31, 0xd9d4d039), (s32, 0xe6db99e5), (s33, 0x1fa27cf8), +> (s34, 0xc4ac5665)] +> r4 = [(s41, 0xf4292244), (s42, 0x432aff97), (s43, 0xab9423a7), +> (s44, 0xfc93a039), (s41, 0x655b59c3), (s42, 0x8f0ccc92), +> (s43, 0xffeff47d), (s44, 0x85845dd1), (s41, 0x6fa87e4f), +> (s42, 0xfe2ce6e0), (s43, 0xa3014314), (s44, 0x4e0811a1), +> (s41, 0xf7537e82), (s42, 0xbd3af235), (s43, 0x2ad7d2bb), +> (s44, 0xeb86d391)] +> s11 = 7 +> s12 = 12 +> s13 = 17 +> s14 = 22 +> s21 = 5 +> s22 = 9 +> s23 = 14 +> s24 = 20 +> s31 = 4 +> s32 = 11 +> s33 = 16 +> s34 = 23 +> s41 = 6 +> s42 = 10 +> s43 = 15 +> s44 = 21 + +> takeDrop :: Int -> [a] -> ([a], [a]) +> takeDrop _ [] = ([], []) +> takeDrop 0 xs = ([], xs) +> takeDrop n (x:xs) = (x:ys, zs) +> where (ys, zs) = takeDrop (n-1) xs + +> md5_step_5_display :: ABCD -> String +> md5_step_5_display (a,b,c,d) = concat $ map display_32bits_as_hex [a,b,c,d] + +> display_32bits_as_hex :: Word32 -> String +> display_32bits_as_hex x0 = map getc [y2,y1,y4,y3,y6,y5,y8,y7] +> where (x1, y1) = divMod x0 16 +> (x2, y2) = divMod x1 16 +> (x3, y3) = divMod x2 16 +> (x4, y4) = divMod x3 16 +> (x5, y5) = divMod x4 16 +> (x6, y6) = divMod x5 16 +> (y8, y7) = divMod x6 16 +> getc n = (['0'..'9'] ++ ['a'..'f']) !! (fromIntegral n) + +> rotL :: Word32 -> Rotation -> Word32 +> rotL a s = shiftL a s .|. shiftL a (s-32) + addfile ./Mmlparse.lhs hunk ./Mmlparse.lhs 1 +==== MML +==== A Wiki-like Minimal Markup Language + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +=== Parser module + +%if False + +> {-# OPTIONS_GHC -fglasgow-exts #-} + +> module Mmlparse where +> import Text.ParserCombinators.Parsec hiding ( Parser ) +> import Text.ParserCombinators.Parsec.Prim hiding ( Parser ) +> import List +> import Monad +> import Data.Generics +> +> import Doc +> import Doc2Html +> import HTMLEnt +> import GroupSquash ( groupSquash ) +> +> import ListToTree + +%endif + +> type Parser a = GenParser Char PState a + +> type PState = Integer +> emptyState :: PState +> emptyState = 0 + +Simple helper function to flatten a list of lists +and remove duplicates, e.g. ""["//", "**", "[[", "]]"] --> "/*[]" "" + +> flattenUnion :: (Eq a) => [[a]] -> [a] +> flattenUnion = nub . concat + +%if False + +5-tuple pattern matcher helper functions (so we can pass them around +without having to define lambdas everywhere) + +> t1of5 :: (a,b,c,d,e) -> a +> t2of5 :: (a,b,c,d,e) -> b +> t3of5 :: (a,b,c,d,e) -> c +> t4of5 :: (a,b,c,d,e) -> d +> t5of5 :: (a,b,c,d,e) -> e +> t1of5 (a,_,_,_,_) = a +> t2of5 (_,b,_,_,_) = b +> t3of5 (_,_,c,_,_) = c +> t4of5 (_,_,_,d,_) = d +> t5of5 (_,_,_,_,e) = e + +%endif + +%%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% + +=== Basic parsers + +== Normal text + +Alphanumeric plus certain puncuation are consider normal, non-special +characters in both TeX and HTML, but everything else does something +weird so we have to deal with it separately + +> lowercase = ['a'..'z'] +> uppercase = ['A'..'Z'] +> alpha = lowercase ++ uppercase +> digits = ['0'..'9'] +> spacing = " \t" +> normalPunct = "!?.,:;()" -- for now... +> normalChars = alpha ++ digits ++ normalPunct -- ++ spacing + +%%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% + +== Urls, Links, Images + +What does the parser recognize as a url and not just as normal text? +The phantom protocol `lnk' is useful for relative links and links to +images, etc. + +> normalProtocols = words "https:// http:// ftp:// mailto: file:/// urn: ldap:// news: telnet://" +> phantomProtocols = words "lnk:" -- later deleted away +> linktoProtocols = phantomProtocols ++ normalProtocols + +== ""img:"" + +Specifies that a url is to be treated as an image in the html +`img src' sense. This protocol is a phantom protocol as well, +meaning that it will get deleted later. You could just as well +have a compound url like this: ""img:http://www.google.com/images/logo_sm.gif"" + +> imageProtocols = words "img:" + +These are all the currently supported protocols, but it +is trivial to add new ones. + +> protocols = imageProtocols ++ linktoProtocols +> protocol :: Parser Elem +> protocol = do p <- choice $ map (try . string) protocols +> url <- many1 $ oneOf validUrlChars +> return $ UrlElem $ Url p url + +What characters are valid in a url? Almost anything on the keyboard +but spaces and ""{}^"|<>[]"" (""[]()"" may normally be valid but +that is bad since we want to use those to group links, plus delimiters are not +``naturally'' part of a URL). From p.12 of + http://www.ietf.org/rfc/rfc3986.txt : + +> validUrlChars = "%:,/?#@!$&'*+=-._~" ++ alpha ++ digits + +%%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% + +== Paragraphs and line endings + +Newline characters (or other line enders, if necessary) + +> enders :: [Char] +> enders = "\n" + +> anEnder :: Parser Char +> anEnder = (oneOf enders "a line ending character") + +A solitary line ending character keeps everything in the same +paragraph; the newline character is returned. This line ender +should not be followed by any ``special beginner'' characters +(bullets in a list, etc.) + +> lineEnd :: Parser Char +> lineEnd = try (do c <- anEnder +> notFollowedBy $ choice [anEnder, oneOf (filter (`notElem` spacing) beginners)] +> return c) +> "one line ending character, no special beginner chars following" + +One or more line ending characters mark a new paragraph. This assumes +that the parsers that eat the internals of the paragraph keep eating +after they swallow a single line ender, unless the next line begins +with special characters (""oneOf beginners"", as above) + +> parEnd :: Parser () +> parEnd = try (do count 1 anEnder -- count 2 anEnder +> skipMany anEnder) "two newlines, OR a newline and one of "++beginners + +%%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% + +== Small simple parsers + +Nothing strange. This string of text is tagged with Txt and we move on +to the next element. + +> plainWordElem :: Parser Elem +> plainWordElem = (do s <- many1 $ (oneOf normalChars "normal char") +> return $ Txt $ s) "normal characters" + +A single line ending. This is preserved as Txt as well. + +> lineEndElem :: Parser Elem +> lineEndElem = (do c <- lineEnd +> return $ Txt $ " ") "a line ending character" -- [c] + +Multiple whitespace is treated as a single space because HTML and TeX +treat whitespace this way. + +> spaceElem :: Parser Elem +> spaceElem = (do s <- many1 $ oneOf spacing +> return $ Txt $ " ") "whitespace" + +The text begins with one of the protocols, ""http://"", ""ftp:"", etc. and +should be treated differently than normal text. + +> urlElem :: Parser Elem +> urlElem = try (do u <- protocol +> return $ u) "a url element" + +== Quasi normal text + +" Deal with single * single / single [ ==> Ent (Literal Char) +" vs **bold** //italic// [[link and/or image]] ==> Bold, Ital, RawLnk + +One //quasiNormal// character is normal and gets returned as a a +Literal Entity. Two //quasiNormal// chars seen sequentially opens an +Ital, Bold, or RawLnk element and a new "many1 piece" parser. + +> quasiNormalCharElem :: Parser Elem +> quasiNormalCharElem = try (do c <- oneOf quasiNormal +> notFollowedBy (oneOf [c]) +> return $ Ent [c]) +> ("only one of "++quasiNormal) + +Pulled from the markuppers below, both opening and closing strings, +this is a simple of list of quasiNormal characters. e.g. ""/*["_$"" + +> quasiNormal :: [Char] +> quasiNormal = flattenUnion $ openerStrs ++ map t2of5 allMarkuppers + +== Markup! + +" //italic// **bold** [[link and/or image]] +" $$math$$ ""typewriter + verbatim"" __foot note__ + +> type OpenerString = String +> type CloserString = String +> type Markupper = (OpenerString, CloserString, +> PState -> Elems -> Elem, +> [String] -> Parser Elems, +> PState->PState) + +Deleting ""s"" from the current list of valid markuppers at this level +makes it so that for example, + +" // followed later by text and then another // then more text + +treats the second ""//"" as a closing delimiter instead of an opener, +which would recurse infinitely... + +> deleter s = \os -> many1 $ pieceMaker $ delete s os + +> allMarkuppers = [ ("//", "//", \_->Ital, deleter "//", id), +> ("**", "**", \_->Bold, deleter "**", id), +> ("<<", ">>", \_->VarInt, \_ -> varInterpParser, id), +> ("[[", "]]", \_-> (lnkRawToReal . RawLnk . stripWs), deleter "[[", id), +> ("\"\"", "\"\"", \_->TT, \_ -> verbatimParser, id ), +> ("__", "__", Fnote, deleter "__", (+1)) ] + +Work on these some more: +" ("$$", "$$", \_->RawLnk, deleter "$$", id) + +Pull out all the opener strings so we can iterate over them in +""markupElem"": + +> openerStrs :: [OpenerString] +> openerStrs = map t1of5 allMarkuppers + +Take an opener string and return its corresponding (1) closer string, +(2) markupTag constructor, or (3) middleParser custom innards +recursive parser. + +> matchingCloser :: OpenerString -> CloserString +> matchingCloser s = case find (\x -> t1of5 x == s) allMarkuppers +> of Nothing -> error $ "Bad markupper opening string: " ++ s +> (Just x) -> t2of5 x +> markupTag :: OpenerString -> (PState -> Elems -> Elem) +> markupTag s = case find (\x -> t1of5 x == s) allMarkuppers +> of Nothing -> error $ "Bad markupper opening string: " ++ s +> (Just x) -> t3of5 x +> middleParser :: OpenerString -> [String] -> Parser Elems +> middleParser s os = case find (\x -> t1of5 x == s) allMarkuppers of +> Nothing -> error $ "Bad markupper opening string: " ++ s +> (Just x) -> (t4of5 x) os +> stateUpdater :: OpenerString -> (PState -> PState) +> stateUpdater s = case find (\x -> t1of5 x == s) allMarkuppers of +> Nothing -> error $ "Bad markupper opening string: " ++ s +> (Just x) -> t5of5 x + +//pieceMaker// takes a list of Strings that describe possible valid +markup openers for recursing on nested sub-paragraphs (using +//markupElem//) and returns a Parser Elem that can be used with many1 +to give you "many pieces" + +> markupElem :: [OpenerString] -> Parser Elem +> markupElem openers = (do s <- try $ choice $ map string openers +> updateState $ stateUpdater s +> ps <- middleParser s openers +> close <- option "" $ string $ matchingCloser s +> n <- getState +> return $ (markupTag s) n $ ps) "markedup text" + +%%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%% + +== Entities + +to deal with non-normal text low level elements, even some +kinds of ASCII text: the characters that break TeX or HTML we escape +ourselves this way. For example, We need entities like ""< & + $ % > +(~ or nbsp) _ # \"" to be escpaed for either TeX or HTML. Plus we can +add special characters or accent characters too. + +> multicharEntityElem :: Parser Elem +> multicharEntityElem = choice $ map specialMaker specialEntSeqs + +> singlecharEntityElem :: Parser Elem +> singlecharEntityElem = choice $ map normalMaker escapeNeededChars + +Sort the list such that the longest strings are first (so longer +matches occur before shorter ones), i.e. "example" matches before "ex" +but if it fails, "ex" can then try and match. The other way around, +"example" would never match. + +> specialEntSeqs = sortBy cmpLengthFst list +> where { list = +> [ ("~~", "nbsp"), -- a non breaking space, ~ in TeX and   in HTML +> ("`", "lsquo"), -- left and right single quote (back tick and apostrophe, pretty much) +> ("'", "rsquo"), -- ` ' +> ("``", "ldquo"), -- left and right double quotes +> ("''", "rdquo"), -- `` '' +> ("--", "ndash"), -- a dash a little longer than a hyphen, expressing range, e.g. pp. 4--5. +> ("---", "mdash"), -- even longer than that, expressing---if you can believe it---caesura +> ("->", "rarr"), -- some arrows for convenience +> ("<-", "larr"), +> ("=>", "rArr"), +> ("<=", "lArr"), +> ("-->", "rarr"), +> ("<--", "larr"), +> ("==>", "rArr"), +> ("<==", "lArr"), +> ("\\ae\\", "aelig"), -- aelig i.e. a and e tied together as one character +> ("\\AE\\", "AElig"), -- AElig i.e. A and E tied together as one character, uppercase +> ("\\~n\\", "ntilde"),-- ~n "enye" the letter in Espa\~n\ol +> ("\\!\\", "iexcl") -- \!\ay! inverted exclamation point +> ] ++ map umlmaker "aAeEiIoOuUyY" -- \"X\ where X is a vowel -- make umlauted vowels +> where { umlmaker x = ("\\\""++[x]++"\\", [x]++"uml") } } +> +> cmpLengthFst x y = compare (length (fst y)) (length (fst x)) +> specialMaker (s,tag) = try $ do { s <- string s; return $ Ent tag } + +> escapeNeededChars = "~%@#+-=<|>&" -- $_/*[]\" --- this is not true: quasiNormal takes care of $, _ etc. + +> normalMaker :: Char -> Parser Elem +> normalMaker c = do { s <- string [c]; return $ Ent [c] } + +%%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%% + +Blessed are the pieceMakers... + +> pieceMaker :: [OpenerString] -> Parser Elem +> pieceMaker openers = (choice [ spaceElem, urlElem, plainWordElem, +> markupElem openers, +> multicharEntityElem, +> quasiNormalCharElem, +> singlecharEntityElem, +> lineEndElem ]) "a low level element (possible fix: "++ +> "close markup before paragraph or line ending)" + +""<>"" is a sort of template replacement to stick the +document represented by ""identifier"" from the environment into the +document at this point. ""varInterpParser"" parses strings of +characters satisfying: + +> varInterpParser :: Parser Elems +> varInterpParser = do c <- oneOf alpha +> s <- many1 $ oneOf $ alpha++digits++"_" +> return$ [ Txt $ [c]++s ] + +Verbatim text is contained in ""double-quotes"" and inside the doulbe +quotes, one quote character is considered automatically escaped. The +only characters not literally quoted are quote and backslash. ""\\\""" +is interpreted as a literal quote and backslash-backslash, (""\\\\"") +which is treated as a single backslash, e.g. ""\\\"\\\""" --> ""\"\""" + +> aDoubleQuote :: Parser Char +> aDoubleQuote = char '"' + +> verbatimParser :: Parser Elems +> verbatimParser = do es <- many1 $ choice [ verbLitParser, onlyOneQuote, lineEndElem, verbEscapeSeq ] +> return es + +> onlyOneQuote, verbLitParser, verbEscapeSeq :: Parser Elem + +> verbEscapeSeq = do char '\\' +> c <- oneOf "\\\"" -- either backslash or quote +> return $ Txt [c] + +> onlyOneQuote = try (do c <- aDoubleQuote +> notFollowedBy aDoubleQuote +> return $ Txt [c]) "exactly one double quote character" + +> verbLitParser = (do s <- many1 $ verbLitCharParser +> return $ Txt $ htmlEntityEncode s) +> "at least one non-quote, non-backslash, non-new-line character." +> verbLitCharParser :: Parser Char +> verbLitCharParser = (oneOf $ verbLitChars) +> "any character except double-quote, backslash, or new line" + +> verbLitChars = spacing++normalChars++"{}^|<>[]%:/?#@!$&`'()*+,;=-._~" + +%%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% + +Clean up a paragraph by stripping leading and trailing whitespace off +of Txt elems, but only in certain situations (e.g. ""[[ http://abc ]]"") + +%%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% + +== Special kinds of paragraphs: += headings, lists/outlines, block quotes, code, comments + +" = headings +" # list/outline with numbers. explicit numbers allow arbitrary count start (later to be done) +" - list/outline with bullets +" * list/outline with bullets (how is this different?) +" : list/outline with no bullets (each : is one identation) +" indented with spaces or tabs = (blockquote) paragraph. +" > code +" < code +" % comment +" " pre-formatted, literal + +Normal beginners + +> normalBegs = spacing ++ "=#*-:" +> beginners = literalBegs ++ normalBegs + +> beginnerStr :: Parser (Char,Int) +> beginnerStr = do seq <- choice beginnerSeqs +> option "" (many1 $ oneOf spacing) +> return $ (head seq, length seq) + +> beginnerSeqs :: [Parser String] +> beginnerSeqs = map (many1 . char) normalBegs + +> normalBeginnerPar :: Parser TopElem +> normalBeginnerPar = try $ +> do (kind,number) <- beginnerStr +> es <- many paragraphParts -- this is normal paragraph innards +> return $ RawBeg kind number es + +The characters ""%"", "">"", ""\""" and ""<"" code blocks read all +characters literally and don't try to treat code or comments as +marked-up text! They also have two addtional requirements: (1) use +only one of them, and (2) put a single space after them, i.e. ""% comment"" + +> literalBegs = "%<>\"" + +> literalSeqs :: [Parser String] +> literalSeqs = map (many1 . char) literalBegs + +> literalBegStr :: Parser (Char,Int) +> literalBegStr = do seq <- choice literalSeqs +> oneOf spacing +> return $ (head seq, length seq) + +> literalBeginnerPar = try $ +> do (kind,number) <- literalBegStr +> es <- literalPar -- this parser reads all text literally +> return $ RawBeg kind number es + +> specialBeginnerPar :: Parser TopElem +> specialBeginnerPar = choice [literalBeginnerPar, normalBeginnerPar] + +> literalPar :: Parser Elems +> literalPar = do cs <- many $ noneOf enders +> return $ [Txt cs] + +%%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% + +> simplePar :: Parser TopElem +> simplePar = do ps <- many1 paragraphParts +> return $ Par ps +> +> paragraphParts :: Parser Elem +> paragraphParts = pieceMaker openerStrs + +%%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% + +=== Putting it all together + +== Paragraphs + +> topLevelPar :: Parser TopElem +> topLevelPar = choice [specialBeginnerPar, simplePar] + +Paragraphs separated by parEnd tokens form a Document + +> aDoc :: Parser Document +> aDoc = (topLevelPar `sepEndBy` parEnd) +> "top level paragraphs separated by paragraph endings "++ +> "(usually two or more newlines)" + +""doc"" of a string is the parsed structured document from the input +string or a little document telling where the error was if it fails + +> doc :: String -> Document +> doc input = case runParser aDoc emptyState "" (filter (/='\r') input) of +> Left err -> [Par [Txt $ "Parse error at " ++ (show err)]] +> Right ps -> postParParse $ filter nonEmpty ps +> where nonEmpty (Par p) = p /= [] +> nonEmpty (RawBeg _ _ p) = p /= [] +> nonEmpty (SpecPar _ ps) = ps/= [] --TODO filter sublists + +%%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% + +Post-process and collect up like TopElem paragraphs and specialize +them, getting rid of all ""RawBeg"" TopElems + +> postParParse :: [TopElem] -> [TopElem] +> postParParse = groupSquash parEq squashTopElems + +Group-and-squasher utility function: + +< groupSquash :: (a->a->Bool) -> ([a]->[a]) -> [a] -> [a] +< groupSquash eq squasher items = +< foldr1 (++) $ map squasher $ groupBy eq items + +Example usage of this sweet utility function: + +< let squasher xs = case head xs of +< 's' -> "$" +< 'p' -> map (const 'q') xs +< _ -> xs +< in +< groupSquash (==) squasher "Mississippi" + +" ==> Mi$i$iqqi + +Now back to the Document. Are two TopElems alike? + +> parEq :: TopElem -> TopElem -> Bool +> parEq (Par _) (Par _) = True +> parEq (RawBeg _ _ _) (RawBeg _ _ _) = True +> parEq _ _ = False + +Given a list of TopElems of one type (i.e. all Pars or Begs), flatten +the Begs and return the Pars or Pres as is + +> squashTopElems :: [TopElem] -> [TopElem] +> squashTopElems ps = +> case head ps of +> (Par _) -> ps +> (RawBeg _ _ _) -> groupSquash begEq begSquash ps +> _ -> error$ "Unsupported" + +Group and squash RawBeg-type TopElems, again with the comparison and +squasher functions. + +> begEq :: TopElem -> TopElem -> Bool +> begEq (RawBeg '=' i _) (RawBeg '=' i' _) = i==i' -- heading nesting levels are different blocks +> begEq (RawBeg '%' i _) (RawBeg '%' i' _) = i==i' -- comments %% and %%% are different blocks +> begEq (RawBeg '<' i _) (RawBeg '<' i' _) = i==i' -- code blocks < and << are different blocks +> begEq (RawBeg '>' i _) (RawBeg '>' i' _) = i==i' -- code blocks > and >> are different blocks +> begEq (RawBeg ' ' i _) (RawBeg ' ' i' _) = False --i==i' -- ?block quotes ' ' and ' ' are diff't blocks? +> begEq (RawBeg '\t' i _) (RawBeg '\t' i' _) = i==i' -- ditto +> begEq (RawBeg c _ _) (RawBeg c' _ _) = c==c' -- everything else: nesting won't matter for eq +> begEq _ _ = False + +Given a list of Begs of one type (i.e. all same char), flatten the +Begs into a Head1, etc. elem + +> begSquash :: [TopElem] -> [TopElem] +> begSquash ps = +> let chopped = map extractEs ps -- # * - : where nesting does something +> outline = pairsToOutline $ map extractNestEs ps -- where we need the number value: +> -- =, %, <, >, space don't +> in case head ps of +> (RawBeg ' ' _ _) -> [SpecPar BQuote chopped] +> (RawBeg '<' _ _) -> [SpecPar Code chopped] +> (RawBeg '>' _ _) -> [SpecPar Code chopped] +> (RawBeg '%' _ _) -> [SpecPar Comment chopped] +> (RawBeg '"' _ _) -> [SpecPar Pre chopped] + +> (RawBeg '=' 1 _) -> [SpecPar Head1 chopped] +> (RawBeg '=' 2 _) -> [SpecPar Head2 chopped] +> (RawBeg '=' 3 _) -> [SpecPar Head3 chopped] +> (RawBeg '=' 4 _) -> [SpecPar Head4 chopped] +> (RawBeg '=' i _) -> error$ "Head level of 1,2,3, or 4 expected. Found head"++show i++"." + +> (RawBeg '-' _ _) -> [TreePar BList outline] +> (RawBeg '*' _ _) -> [TreePar SList outline] +> (RawBeg ':' _ _) -> [TreePar Poetry outline] +> (RawBeg '#' _ _) -> [TreePar NList outline] +> (RawBeg _ _ _) -> error$ "Unsupported type of special paragraph: " ++ show ps + +> extractEs :: TopElem -> Elems +> extractEs (RawBeg _ _ es) = es +> extractEs _ = error "We are only dealing with RawBegs here. This shouldn't happen." + +> extractNestEs :: TopElem -> (Int, Elems) +> extractNestEs (RawBeg _ i es) = (i, es) +> extractNestEs _ = error "We are only dealing with RawBegs here. This shouldn't happen." + +> pairsToOutline :: [(Int, Elems)] -> (Tree Elems) +> pairsToOutline = pairListToTree + +%%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% + +=== Restructure RawLnks + +Turn the handy simple img/lnk notation into sensibly structured data + +== Flatten Elems += into a string for use as AltTxt to an image + +> altTextOf :: Elems -> Maybe AltTxt +> altTextOf es = case toRawString es of { "" -> Nothing ; x -> Just x } + +> toRawString :: (Data a, Typeable a) => a -> String +> toRawString e = strip $ stringize e + +Collect and concatenate all strings in the given document or element +or toplevel or whatever! + +> stringize :: (Data a, Typeable a) => a -> String +> stringize t = str t +> where str :: (Data a, Typeable a) => a -> String +> str s = case cast s of +> Just s -> s -- stop recursion on a string, hopefully +> Nothing -> findLits s +> findLits :: (Typeable a, Data a) => a -> String +> findLits t = case cast_ent t of -- render literals literally! +> Just ent -> case length ent of { 1 -> ent; _ -> entToString ent } +> Nothing -> concat (gmapQ stringize t) -- recurse and look for strings + +> cast_ent :: (Typeable a) => a -> Maybe String +> cast_ent e = case cast_elem e of +> Just elm -> pulloutEnt elm +> Nothing -> Nothing +> cast_elem :: (Typeable a) => a -> Maybe Elem +> cast_elem = cast +> pulloutEnt :: Elem -> Maybe String +> pulloutEnt (Ent e) = Just e +> pulloutEnt _ = Nothing + +> entToString :: Entity -> String +> entToString e = +> case revlookup e specialEntSeqs of +> Nothing -> error$ "Unrecognized entity in attempt to convert to raw text: "++ show e +> Just s -> s + +Look up a key in a [(value, key)] environment (specialEntSeqs is +shaped this way) + +> revlookup :: Eq a => a -> [(b, a)] -> Maybe b +> revlookup k env = lookup k flipenv +> where flipenv = map flip2tup env +> flip2tup (a,b) = (b,a) + +== Strip phantom protocol + +Throw away the made up protocols such as ""lnk:"" + +> phantomStrip :: Url -> Url +> phantomStrip (Url p u) = if p `elem` phantomProtocols then (Url "" u) else (Url p u) + +> unphantom :: ProtocolStr -> ProtocolStr +> unphantom p = if p `elem` phantomProtocols then "" else p + +Pull out possible img or target for a link + +> imgOfUrl, targetUrl :: Elem -> Maybe Url +> imgOfUrl (UrlElem (Url p u)) = if p `elem` imageProtocols then Just (Url "" u) else Nothing +> imgOfUrl _ = Nothing +> targetUrl (UrlElem (Url p u)) = if p `elem` linktoProtocols then Just (Url p u) else Nothing +> targetUrl _ = Nothing + +Ensure that a given element is not a UrlElem so we can pass that along +unaltered. + +> nonUrl :: Elem -> Elems -- singleton or empty list, i.e. [e] or [] +> nonUrl (UrlElem _) = [] +> nonUrl e = [e] + +Convert a RawLnk to Img or Lnk, if possible + +> lnkRawToReal = realLnkToElem . rawLnkToLnk + +Collect up relevant information in a link grouping. We automate this +so that (1) we don't have to be too verbose, and (2) we don't have to +remember the order of "arguments". All combinations should "do the +right thing". + +> rawLnkToLnk :: Elem -> Elem +> rawLnkToLnk (RawLnk es) = realLnk es [] Nothing Nothing +> rawLnkToLnk x = x -- do nothing to non-RawLnk elements +> +> realLnk :: Elems -> Elems -> (Maybe Url) -> (Maybe Url) -> Elem +> -- ran out of input elements, see what we have accumulated as im or target +> realLnk [] es im target = +> case im of +> { Nothing -> case target of Nothing -> RawLnk es +> Just t -> let t'@(Url p u) = phantomStrip t in +> if es /= [] then Lnk t' es +> else Lnk t' [Txt (p++u)] +> ; Just i -> Img i target (altTextOf es) } +> -- see what the head of input elements is and update our im or target accumulator +> realLnk (e:es) tailEs im target = +> let im' = imgOfUrl e +> targ' = case im' of Nothing -> targetUrl e +> _ -> Nothing +> e' = nonUrl e -- singleton or empty list, i.e. [e] or [] +> in realLnk es (tailEs ++ e') (im `mplus` im') (target `mplus` targ') + +Finish processing the ""Elem""s with Urls, Imgs, or Lnks to be in final form. + +> realLnkToElem :: Elem -> Elem +> realLnkToElem (UrlElem url) = +> let (Url p u) = url in +> if p `elem` imageProtocols then Img (Url "" u) Nothing Nothing --"" +> else let (Url p' u') = phantomStrip url in UrlElem (Url p' u') +> --"" ++p'++u'++ "" +> realLnkToElem (Img (Url _ i) targ a) = +> -- let altt = case a of Nothing -> "" +> -- Just s -> "alt=\""++s++"\"" in +> case targ of Nothing -> Img (Url "" i) Nothing a --"" +> Just (Url p u) -> Img (Url "" i) (Just (Url (unphantom p) u)) a +> -- ""++ +> -- " -- altt ++" />" +> realLnkToElem (Lnk (Url p u) es) = Lnk (Url p u) (stripWs es) +> -- "" ++ strip (elemsToHtmlString es) ++ "" +> realLnkToElem x = x -- identity on everything else + +%%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% %%%%%%%% + +> test p s = runParser p emptyState "" s + +Example usage: + +< test entityElem "<--" + addfile ./PPrint.hs hunk ./PPrint.hs 1 +----------------------------------------------------------- +-- Daan Leijen (c) 2000, http://www.cs.uu.nl/~daan +-- +-- $version: $ +-- +-- Pretty print module based on Philip Wadlers "prettier printer" +-- "A prettier printer" +-- Draft paper, April 1997, revised March 1998. +-- http://cm.bell-labs.com/cm/cs/who/wadler/papers/prettier/prettier.ps +-- +-- Haskell98 compatible +----------------------------------------------------------- +module PPrint + ( Doc + , Pretty, pretty + + , show, putDoc, hPutDoc + + , (<>) + , (<+>) + , (), () + , (<$>), (<$$>) + + , sep, fillSep, hsep, vsep + , cat, fillCat, hcat, vcat + , punctuate + + , align, hang, indent + , fill, fillBreak + + , list, tupled, semiBraces, encloseSep + , angles, langle, rangle + , parens, lparen, rparen + , braces, lbrace, rbrace + , brackets, lbracket, rbracket + , dquotes, dquote, squotes, squote + + , comma, space, dot, backslash + , semi, colon, equals + + , string, bool, int, integer, float, double, rational + + , softline, softbreak + , empty, char, text, line, linebreak, nest, group + , column, nesting, width + + , SimpleDoc(..) + , renderPretty, renderCompact + , displayS, displayIO + ) where + +import IO (Handle,hPutStr,hPutChar,stdout) + +infixr 5 ,,<$>,<$$> +infixr 6 <>,<+> + + +----------------------------------------------------------- +-- list, tupled and semiBraces pretty print a list of +-- documents either horizontally or vertically aligned. +----------------------------------------------------------- +list = encloseSep lbracket rbracket comma +tupled = encloseSep lparen rparen comma +semiBraces = encloseSep lbrace rbrace semi + +encloseSep left right sep ds + = case ds of + [] -> left <> right + [d] -> left <> d <> right + _ -> align (cat (zipWith (<>) (left : repeat sep) ds) <> right) + + +----------------------------------------------------------- +-- punctuate p [d1,d2,...,dn] => [d1 <> p,d2 <> p, ... ,dn] +----------------------------------------------------------- +punctuate p [] = [] +punctuate p [d] = [d] +punctuate p (d:ds) = (d <> p) : punctuate p ds + + +----------------------------------------------------------- +-- high-level combinators +----------------------------------------------------------- +sep = group . vsep +fillSep = fold () +hsep = fold (<+>) +vsep = fold (<$>) + +cat = group . vcat +fillCat = fold () +hcat = fold (<>) +vcat = fold (<$$>) + +fold f [] = empty +fold f ds = foldr1 f ds + +x <> y = x `beside` y +x <+> y = x <> space <> y +x y = x <> softline <> y +x y = x <> softbreak <> y +x <$> y = x <> line <> y +x <$$> y = x <> linebreak <> y + +softline = group line +softbreak = group linebreak + +squotes = enclose squote squote +dquotes = enclose dquote dquote +braces = enclose lbrace rbrace +parens = enclose lparen rparen +angles = enclose langle rangle +brackets = enclose lbracket rbracket +enclose l r x = l <> x <> r + +lparen = char '(' +rparen = char ')' +langle = char '<' +rangle = char '>' +lbrace = char '{' +rbrace = char '}' +lbracket = char '[' +rbracket = char ']' + +squote = char '\'' +dquote = char '"' +semi = char ';' +colon = char ':' +comma = char ',' +space = char ' ' +dot = char '.' +backslash = char '\\' +equals = char '=' + + +----------------------------------------------------------- +-- Combinators for prelude types +----------------------------------------------------------- + +-- string is like "text" but replaces '\n' by "line" +string "" = empty +string ('\n':s) = line <> string s +string s = case (span (/='\n') s) of + (xs,ys) -> text xs <> string ys + +bool :: Bool -> Doc +bool b = text (show b) + +int :: Int -> Doc +int i = text (show i) + +integer :: Integer -> Doc +integer i = text (show i) + +float :: Float -> Doc +float f = text (show f) + +double :: Double -> Doc +double d = text (show d) + +rational :: Rational -> Doc +rational r = text (show r) + + +----------------------------------------------------------- +-- overloading "pretty" +----------------------------------------------------------- +class Pretty a where + pretty :: a -> Doc + prettyList :: [a] -> Doc + prettyList = list . map pretty + +instance Pretty a => Pretty [a] where + pretty = prettyList + +instance Pretty Doc where + pretty = id + +instance Pretty () where + pretty () = text "()" + +instance Pretty Bool where + pretty b = bool b + +instance Pretty Char where + pretty c = char c + prettyList s = string s + +instance Pretty Int where + pretty i = int i + +instance Pretty Integer where + pretty i = integer i + +instance Pretty Float where + pretty f = float f + +instance Pretty Double where + pretty d = double d + + +--instance Pretty Rational where +-- pretty r = rational r + +instance (Pretty a,Pretty b) => Pretty (a,b) where + pretty (x,y) = tupled [pretty x, pretty y] + +instance (Pretty a,Pretty b,Pretty c) => Pretty (a,b,c) where + pretty (x,y,z)= tupled [pretty x, pretty y, pretty z] + +instance Pretty a => Pretty (Maybe a) where + pretty Nothing = empty + pretty (Just x) = pretty x + + + +----------------------------------------------------------- +-- semi primitive: fill and fillBreak +----------------------------------------------------------- +fillBreak f x = width x (\w -> + if (w > f) then nest f linebreak + else text (spaces (f - w))) + +fill f d = width d (\w -> + if (w >= f) then empty + else text (spaces (f - w))) + +width d f = column (\k1 -> d <> column (\k2 -> f (k2 - k1))) + + +----------------------------------------------------------- +-- semi primitive: Alignment and indentation +----------------------------------------------------------- +indent i d = hang i (text (spaces i) <> d) + +hang i d = align (nest i d) + +align d = column (\k -> + nesting (\i -> nest (k - i) d)) --nesting might be negative :-) + + + +----------------------------------------------------------- +-- Primitives +----------------------------------------------------------- +data Doc = Empty + | Char Char -- invariant: char is not '\n' + | Text !Int String -- invariant: text doesn't contain '\n' + | Line !Bool -- True <=> when undone by group, do not insert a space + | Cat Doc Doc + | Nest !Int Doc + | Union Doc Doc -- invariant: first lines of first doc longer than the first lines of the second doc + | Column (Int -> Doc) + | Nesting (Int -> Doc) + +data SimpleDoc = SEmpty + | SChar Char SimpleDoc + | SText !Int String SimpleDoc + | SLine !Int SimpleDoc + + +empty = Empty + +char '\n' = line +char c = Char c + +text "" = Empty +text s = Text (length s) s + +line = Line False +linebreak = Line True + +beside x y = Cat x y +nest i x = Nest i x +column f = Column f +nesting f = Nesting f +group x = Union (flatten x) x + +flatten :: Doc -> Doc +flatten (Cat x y) = Cat (flatten x) (flatten y) +flatten (Nest i x) = Nest i (flatten x) +flatten (Line break) = if break then Empty else Text 1 " " +flatten (Union x y) = flatten x +flatten (Column f) = Column (flatten . f) +flatten (Nesting f) = Nesting (flatten . f) +flatten other = other --Empty,Char,Text + + + +----------------------------------------------------------- +-- Renderers +----------------------------------------------------------- + +----------------------------------------------------------- +-- renderPretty: the default pretty printing algorithm +----------------------------------------------------------- + +-- list of indentation/document pairs; saves an indirection over [(Int,Doc)] +data Docs = Nil + | Cons !Int Doc Docs + +renderPretty :: Float -> Int -> Doc -> SimpleDoc +renderPretty rfrac w x + = best 0 0 (Cons 0 x Nil) + where + -- r :: the ribbon width in characters + r = max 0 (min w (round (fromIntegral w * rfrac))) + + -- best :: n = indentation of current line + -- k = current column + -- (ie. (k >= n) && (k - n == count of inserted characters) + best n k Nil = SEmpty + best n k (Cons i d ds) + = case d of + Empty -> best n k ds + Char c -> let k' = k+1 in seq k' (SChar c (best n k' ds)) + Text l s -> let k' = k+l in seq k' (SText l s (best n k' ds)) + Line _ -> SLine i (best i i ds) + Cat x y -> best n k (Cons i x (Cons i y ds)) + Nest j x -> let i' = i+j in seq i' (best n k (Cons i' x ds)) + Union x y -> nicest n k (best n k (Cons i x ds)) + (best n k (Cons i y ds)) + + Column f -> best n k (Cons i (f k) ds) + Nesting f -> best n k (Cons i (f i) ds) + + --nicest :: r = ribbon width, w = page width, + -- n = indentation of current line, k = current column + -- x and y, the (simple) documents to chose from. + -- precondition: first lines of x are longer than the first lines of y. + nicest n k x y | fits width x = x + | otherwise = y + where + width = min (w - k) (r - k + n) + + +fits w x | w < 0 = False +fits w SEmpty = True +fits w (SChar c x) = fits (w - 1) x +fits w (SText l s x) = fits (w - l) x +fits w (SLine i x) = True + + +----------------------------------------------------------- +-- renderCompact: renders documents without indentation +-- fast and fewer characters output, good for machines +----------------------------------------------------------- +renderCompact :: Doc -> SimpleDoc +renderCompact x + = scan 0 [x] + where + scan k [] = SEmpty + scan k (d:ds) = case d of + Empty -> scan k ds + Char c -> let k' = k+1 in seq k' (SChar c (scan k' ds)) + Text l s -> let k' = k+l in seq k' (SText l s (scan k' ds)) + Line _ -> SLine 0 (scan 0 ds) + Cat x y -> scan k (x:y:ds) + Nest j x -> scan k (x:ds) + Union x y -> scan k (y:ds) + Column f -> scan k (f k:ds) + Nesting f -> scan k (f 0:ds) + + + +----------------------------------------------------------- +-- Displayers: displayS and displayIO +----------------------------------------------------------- +displayS :: SimpleDoc -> ShowS +displayS SEmpty = id +displayS (SChar c x) = showChar c . displayS x +displayS (SText l s x) = showString s . displayS x +displayS (SLine i x) = showString ('\n':indentation i) . displayS x + +displayIO :: Handle -> SimpleDoc -> IO () +displayIO handle simpleDoc + = display simpleDoc + where + display SEmpty = return () + display (SChar c x) = do{ hPutChar handle c; display x} + display (SText l s x) = do{ hPutStr handle s; display x} + display (SLine i x) = do{ hPutStr handle ('\n':indentation i); display x} + + +----------------------------------------------------------- +-- default pretty printers: show, putDoc and hPutDoc +----------------------------------------------------------- +instance Show Doc where + showsPrec d doc = displayS (renderPretty 0.4 80 doc) + +putDoc :: Doc -> IO () +putDoc doc = hPutDoc stdout doc + +hPutDoc :: Handle -> Doc -> IO () +hPutDoc handle doc = displayIO handle (renderPretty 0.4 80 doc) + + + +----------------------------------------------------------- +-- insert spaces +-- "indentation" used to insert tabs but tabs seem to cause +-- more trouble than they solve :-) +----------------------------------------------------------- +spaces n | n <= 0 = "" + | otherwise = replicate n ' ' + +indentation n = spaces n + +--indentation n | n >= 8 = '\t' : indentation (n-8) +-- | otherwise = spaces n addfile ./Template.hs hunk ./Template.hs 1 +module Template ( template ) where + +import IO +import Data.Generics +import Time +import Locale ( defaultTimeLocale ) +import List ( intersperse, transpose ) +import System.Random hiding ( split ) +import System.Directory -- ( doesFileExist ) +import Data.Maybe ( catMaybes ) +import Char ( isLower ) + +import Doc +import Mmlparse +import Doc2Html +import XHTML +import HTML +import Useful + +-- horizontal align 2 elements +a <<>> b = matrix2tableClass "horiz" [[[a], [b]]] + +horz a b c = matrix2tableClass "horiz" [[[a], [b], [c]]] + +-- vertical align 2 elements +a /// b = matrix2tableClass "vert" [[ [a] ], + [ [b] ]] + +-- something more nifty +mytable a b = elm "table" ["class"-->"mytable"] rows + where rows = [elm "tr" [] tds] + tds = [elm "td" ["class"-->"goleft"] [a], + elm "td" ["class"-->"goright"] [b]] + +iframe = around "iframe" + +--elm "a" ["id"-->name, "href"-->target] [elm "img" ["src"-->"images/search-blank.gif", "alt"-->name] []] + +searchbar_form = elm "form" form_attrs [formtable] + where form_attrs = ["action"-->"javascript:submit();", "id"-->"searchform", "name"-->"f"] + formtable = horz xbutton (txt "") xinput + xinput = "input" `with` ["size"-->"60", "id"-->"field", + "name"-->"q", "type"-->"text"] +-- xbutton = "input" `with` ["name"-->"btnG", "id"-->"image", "class"-->"search", +-- "type"-->"image", "src"-->"images/search-blank.gif", "maxlength"-->"256"] + xbutton = "input" `with` ["name"-->"btnG", + "type"-->"submit", "value"-->"   Search   ", "maxlength"-->"256"] + +tdClass name cs = elm "td" ["class"-->name] cs +tdClass2 name cs = elm "td" ["colspan"-->"2", "class"-->name] cs +tdClassN n name cs = elm "td" ["colspan"-->show n, "class"-->name] cs +tr = around "tr" + +center el = around "center" [el] + +pageLayout loggedin atitle quote author left message pageName xs = {-center-} elm "table" ["class"-->"bigtable"] [row1, row2, row3] + where row1 = tr [tdClass2 "paneTopLeft" (topleft_top loggedin pageName atitle)] + row2 = tr [tdClass "paneLeft" left, tdClass "paneContent" (message ++ xs)] + row3 = tr [tdClass "paneLeftBottom" bottom_left, tdClass "paneFooter" (footer quote author)] + +makeFlipper name target = elm "a" ["id"-->name, "href"-->target] [elm "img" ["src"-->"images/blank.gif", "alt"-->name] []] + +loginBtn p = makeFlipper "login" ("index.cgi?action=login&page=" ++ p) +logoutBtn p = makeFlipper "logout" ("index.cgi?action=logout&page=" ++ p) +newBtn p = makeFlipper "add" ("index.cgi?action=add&page=" ++ p) +pdfBtn p = makeFlipper "pdf" ("index.cgi?action=viewpdf&page=" ++ p) +editBtn p = makeFlipper "edit" ("index.cgi?action=edit&page=" ++ p) + +loggedoutBtns p = [pdfBtn p, loginBtn p] +loggedinBtns p = [editBtn p, pdfBtn p, newBtn p, logoutBtn p] + +fig8 = elm "a" ["href"-->"index.cgi?Fig8"] [imgSrcAlt "images/fig8-small.png" "Hypnotic Figure Eight Klein Bottle Emblem"] + +spacePad = txt " " +blankline = txt " 
" +-- header heading top banner part, whatumcallit +topleft_top loggedin p atitle = [mytable (fig8 <<>> (h1 [spacePad, spacePad, spacePad, txt ""] -- /// + -- imgSrcAlt "images/banner.png" "home of Jared Updike on the web + )) + (elm "p" ["class"-->"goright"] (intersperse spacePad buttons))] + where buttons = if loggedin then loggedinBtns p else loggedoutBtns p + +ppClass c es = elm "p" ["class"-->c] es + +bottom_left = [pp extras] + +footer quote author = [pp [txt " "]] + +{- +footer quote author = [ppClass "goright" [txt quote], + ppClass "goright" [ii [txt$ "   —"++author]], + pp [txt "Copyright (C) 2007 Jared Updike"]] +-} +hiddeninput k v = "input" `with` ["type"-->"hidden", "name"-->k, "value"-->v] + + +loginform pageName = + elm "form" ["method"-->"post", "id"-->"loginform", "name"-->"f", + "action"-->("index.cgi?action=validate&page="++pageName), "onsubmit"-->"scramble();"] + ([matrix2tableClass "horiz" [[ [txt "Username "],[xusername] ] , + [ [txt "Password "],[xpassword] ] , + [ [] ,[xsubmit ] ]] ]) -- ++ xhiddens) + where xusername = "input" `with` ["size"-->"20", "name"-->"username", "type"-->"text"] + xpassword = "input" `with` ["size"-->"20", "name"-->"password", "type"-->"password"] + xsubmit = "input" `with` ["type"-->"submit", "value"-->"Log In"] + --xhiddens = [hiddeninput "action" "validate", + -- hiddeninput "page" pageName] + +jscriptAts = ["type"-->"text/javascript"] + +script fname = "script" `with` (jscriptAts ++ ["src"-->fname]) + +scripts somescripts = + (if sIFRuse then sIFRinclude else []) ++ somescripts ++ + [ comment ["[if gte IE 5.5000]>", + "", + " gets added + +---- sIFR stuff ---- +-------------------- +sIFRuse = False + +sIFRinclude = ["link" `with` ["href"-->"css/sIFR-screen.css", "rel"-->"stylesheet", "media"-->"screen", + "type"-->"text/css"], + "link" `with` ["href"-->"css/sIFR-print.css", "rel"-->"stylesheet", "media"-->"print", + "type"-->"text/css"], + script "scripts/sifr.js", + script "scripts/sifr-addons.js"] + +sIFR = Comm "" [blah] "" --elm "script" jscriptAts [Comm "//"] + where blah = "\n" + +white = "ffffff" +black = "000000" +skyblue = "5eb4ff" + +sIFRscriptlines = "if (typeof sIFR == \"function\") {\n" ++ replacers ++ "};\n" + where replacers = concatMap replacer [("body h1", black, white), + ("body h2", black, white), + ("body h3", white, skyblue), + ("body h4", black, white)] + replacer (selector,col,bgcol) = "sIFR.replaceElement(named({sSelector:\""++selector++"\", sFlashSrc:\"tradegothic.swf\", sColor:\"#"++col++"\", sLinkColor:\"#000000\", sBgColor:\"#"++bgcol++"\", sHoverColor:\"#CCCCCC\"}));\n" +------------------------ +---- end sIFR stuff ---- + +validateUrlCSS = "http://jigsaw.w3.org/css-validator/validator?uri=http://www.updike.org/" +validateUrlXHTML = "http://validator.w3.org/check?uri=http://www.updike.org/" +--"http://validator.w3.org/check?uri=referer" + +extras = [lnk validateUrlXHTML [validateIconXHTML], + lnk validateUrlCSS [validateIconCSS], + txt "   "] + where validateIconXHTML = imgSrcAlt "images/w3c-valid-xhtml.gif" "Valid XHTML 1.0 Transitional!" + validateIconCSS = imgSrcAlt "images/w3c-valid-css.gif" "Valid CSS!" + + +----------- Variable substitution + +type Env = [(String, Elem)] +type XMLEnv = [(String, XML)] + +-- stage 1: substitute in the structured Document + +defaultEnv = [("edit", Txt ".")] + +varSubst :: Env -> Elem -> Elem +varSubst env = varInt + where varInt (VarInt es) = lookupper es + varInt x = x + lookupper es = + let k = (stringize es) in + case lookup k env of + Nothing -> VarInt es + Just v -> v +subst :: Env -> Document -> Document +subst env = everywhere (mkT (varSubst env)) + +-- stage 2: substitute XML + +xmlvarSubst :: XMLEnv -> XML -> XML +xmlvarSubst env = varInt + where varInt (Hole es) = lookupper es + varInt x = x + lookupper k = + case lookup k env of + Nothing -> tt [txt $ "<<" ++ k ++ ">>"] + Just v -> v +xmlSubst :: XMLEnv -> [XML] -> [XML] +xmlSubst env = everywhere (mkT (xmlvarSubst env)) + +tryfilter = replace "%s" "QUERY
" . replace "?" "?
" . eat "http://" . eat "http://www." + +items2table items = matrix2table$ [[bb [txt "Keyword   "]], [bb [txt "URL"]]] : pairs + where pairs = [[[txt a], [txt$ tryfilter b]] | (a:b:_) <- (map (split ',') items)] + +getLink :: Document -> [Elem] +getLink = everything (++) ([] `mkQ` lnkMatcher) + where lnkMatcher :: Elem -> [Elem] + lnkMatcher e@(Lnk _ _) = [e] + lnkMatcher _ = [] + +-- bullet = txt "∘" --ring operator or composition dot +bullet = txt "•" -- => bullet = txt "•" -- black bullet +--bullet = txt "○" -- white circle + +linkLike :: FilePath -> [String] -> String -> IO [XML] +linkLike root ortags link = do + cnts <- readFile (root++"links/"++link++".tags") + let linktags = filter (/="") $ lines cnts -- (\x -> x /= "" && (not $ isLower $ head x)) $ lines cnts + let cond = any (`elem` ortags) linktags || + (filter (not . isLower . head) ortags == [] && filter (not . isLower . head) linktags == []) + if cond + then do cs <- readFile (root++"links/"++link++".txt") + let d = doc cs + let ls = getLink d + return $ if length ls > 0 then + [lnk ("index.cgi?action=edit&page=links/"++link) [bullet], txt "   ", elemToXML $ head ls, br] + else [] + else return [] + +linksLike :: FilePath -> [String] -> IO [XML] +linksLike root tags = do + files <- getDirectoryContents (root ++ "/links") + let possible_links = map (replace ".tags" "") $ filter (`contains` ".tags") files + links <- mapM (linkLike root tags) possible_links + return$ concat links + +getXmlEnv pageName root newPage = do + let pageFilename = root++pageName++".txt" +-- cnts <- readFile "scripts/search.js" +-- let items = tail $ takeWhile (/="//END") +-- $ dropWhile (/="//BEGIN") +-- $ lines +-- $ filter (/= '"') cnts + allstr <- readFile (root++"tags.txt") + let alltags = lines allstr + --let tagstrs = unlines alltagstr --zip (map show [1..22]) (map odd [1..22]) + fileExists <- doesFileExist (root++pageName++".tags") + somestr <- if fileExists then readFile (root++pageName++".tags") else return "" + let sometags = filter (/="") $ lines somestr + let tags = zip alltags (map (`elem` sometags) alltags) + editpageform <- editform tags pageName pageFilename newPage + links <- linksLike root sometags + return [("searchbar", searchbar_form) + ,("loginform", loginform pageName) +-- ,("searchhelp", items2table items) + ,("editpage", editpageform) + ,("renamepage", renamepageform pageName) + ,("numlinks", txt $ show (length links `div` 4)) + ,("links", pp links) --(map (\x->"li" `around` [x]) links)) + ] + +textarea cnts = elm "textarea" ["name"-->"content", "cols"-->"72", "rows"-->"20"] [txt $ cnts] + +maketagcheckbox ("", _) = [spacePad] +maketagcheckbox (label, checked) = ["input" `with` checkedAtt++["type"-->"checkbox", + "name"-->("TAG_"++(tagencode label))], + txt (' ':label), spacePad, spacePad] + where checkedAtt = if checked then ["checked"-->"on"] else [] + +tagcheckboxtable labels = matrix2table rows + where n = length labels + n' = numrows*numcols + labels' = labels ++ replicate (n'-n) empty + empty = ("", False) + numrows = (n+numcols-1) `div` numcols + numcols = 3 + cols = let f [] = [] + f xs = let (x,rest) = splitAt numrows xs + in x : f rest + in f $ map maketagcheckbox labels + rows = transpose cols + +renamepageform oldpagename = + elm "form" ["method"-->"post", "id"-->"loginform", "name"-->"f", + "action"-->("index.cgi?action=movepage&page="++oldpagename)] + ([matrix2tableClass "horiz" [[ [txt "Old page name "],[txt oldpagename] ] , + [ [txt "New page name "],[xnewpage] ] , + [ [ ] ,[xsubmit ] ]] ]) -- ++ xhiddens) + where xnewpage = "input" `with` ["size"-->"40", "name"-->"newpagename", "type"-->"text"] + xsubmit = "input" `with` ["type"-->"submit", "value"-->"Rename page"] + +editform tags page filename newPage = do +-- let (newPage, newtitle) = case newpagetitle of { Nothing -> (False, "") ; Just a -> (True, a) } + cnts <- if newPage then return "" else do { cnts' <- readFile filename; return $ fixEndingNewlines cnts' } + let editTitleLine = [ txt page, spacePad, spacePad, spacePad, spacePad, spacePad, spacePad, + elm "a" ["style"-->"font-size: 68%;", + "href"-->("index.cgi?action=rename&page="++page)] [txt "(Rename)"] ] + let addTitleLine = [ xtitle ] + let firstLine = if newPage then addTitleLine else editTitleLine +-- let targetpage = if newPage then page else newtitle + return $ elm "form" ["method"-->"post", "id"-->"loginform", "name"-->"f", + "action"-->("index.cgi?action=modify")] --, "onsubmit"-->"scramble();"] + ([matrix2tableClass "horiz" + [[ firstLine + ], + [ [textarea cnts] ], + [ [bb [txt "Tags"]] ], + [ xtags ], + [ [bb [txt "Other Tags"], spacePad, spacePad, spacePad, xtagsbox] ], + [ [xsubmit, + spacePad, spacePad, spacePad, spacePad, spacePad, spacePad, + spacePad, spacePad, spacePad, spacePad, spacePad, spacePad, + aa ("index.cgi?"++page) "Cancel"] ] + ] + ] ++ xhiddens) + where xtitle = "input" `with` ["size"-->"20", "name"-->"page", "type"-->"text", + "value"-->page] + xtagsbox = "input" `with` ["size"-->"65", "name"-->"tagsbox", "type"-->"text", + "value"-->""] + xsubmit = "input" `with` ["type"-->"submit", "value"-->"Submit these changes"] + --xcancel = "input" `with` ["type"-->"button", "value"-->"Cancel", + -- "onClick"-->("index.cgi?"++page)] + xhiddens = [hiddeninput "action" "modify", + hiddeninput "page" page] + xtags = [ tagcheckboxtable tags ] + +-- find holes in the document + +findVarSubsts :: Document -> [String] +findVarSubsts = everything (++) ([] `mkQ` varIntMatcher) + where varIntMatcher :: Elem -> [String] + varIntMatcher (VarInt es) = [stringize es] + varIntMatcher _ = [] + +lmap :: (Eq a) => [(a,b)] -> [a] -> [b] +lmap env xs = catMaybes (map (\k-> lookup k env) xs) + +-- find the first Head4 for use as the Title + +findHead4sSubsts :: Document -> [String] +findHead4sSubsts = everything (++) ([] `mkQ` head4Matcher) + where head4Matcher :: TopElem -> [String] + head4Matcher (SpecPar Head4 es) = [stringize es] + head4Matcher _ = [] + +-- certain widgets need certain javascript files loaded (these +-- javascripts are also responsible to move the cursor where they want +-- it in a startup() function + +widgetsScriptsEnv = + [("searchbar", script "scripts/search.js") + ,("loginform", script "scripts/login.js") + ,("renamepage", script "scripts/rename.js") + ] + +noTimeDiff = TimeDiff { tdYear = 0, tdMonth = 0, tdDay = 0, tdHour = 0, tdMin = 0, tdSec = 0, tdPicosec = 0 } + +--- MAIN I/O ----------------------------------- + +template loggedin messagestr pageName root filename = do + mytemplate <- readFile "/home/public/template.html" + + titlestr <- readFile (root++"titles.txt") + let possibletitles = filter (/="") (lines titlestr) + rint <- (randomIO :: IO Int) + let rindex = rint `mod` (length possibletitles) + let atitle = possibletitles !! rindex + leftstr <- readFile (root++"navigation.txt") + let navigation = doc2xml $ doc leftstr + --linkstr <- readFile (root++"links.txt") + --let links = doc2xml $ doc linkstr + let newPage = filename `endsWith` "add.txt" + + time' <- getClockTime + let time = addToClockTime (noTimeDiff { tdHour = -8 }) time' + ctime <- toCalendarTime time + -- let CalendarTime { ctHour = utchour } = ctime' + -- let ctime = ctime' -- { ctHour = (utchour+16) `mod` 24 } -- PST is -8 hours + -- --let datestr = formatCalendarTime defaultTimeLocale "%Y%m%d%H%M%S" ctime + let datestr1 = formatCalendarTime defaultTimeLocale "%A, " ctime + let datestr2 = formatCalendarTime defaultTimeLocale "%d %B %Y" ctime + + -- chop leading zero off day of the month number + let datestr = datestr1 ++ (if head datestr2 == '0' then tail datestr2 else datestr2) + + let env = ("date", Txt datestr):defaultEnv + + let quote = "Quote me as saying I was mis-quoted." + let author = "Groucho Marx" + + -- read in the input file or stdin if filename is - + s <- if filename=="-" then getContents else readFile filename + let d = subst env $ doc s + xmlEnv <- getXmlEnv pageName root newPage + let xs = xmlSubst xmlEnv (doc2xml d) -- get rid of all holes + let xmlString = xmlToStr xs + let left = xmlSubst xmlEnv (navigation) -- ++ links) + let leftString = xmlToStr left + + let message = if messagestr /= "" then [ppClass "message" [aa ("index.cgi?"++pageName) "X", txt " ", txt messagestr]] else [] + let body = hbody ["onload"-->"startup();"] + ([pageLayout loggedin atitle quote author left message pageName xs] ++ (if sIFRuse then [sIFR] else [])) + let somescripts = lmap widgetsScriptsEnv (findVarSubsts d) + let title_maybe = findHead4sSubsts d + let title = if title_maybe == [] then "Untitled - add a title" else head title_maybe + let page = xhtmlPage title "css/alpha.css" (scripts somescripts) body + --putStrLn $ page + putStrLn $ templateReplace mytemplate [ ("@content", xmlString) + , ("@navigation", leftString) + , ("@title", title) + ] + +templateReplace templ [] = templ +templateReplace templ ((a,b):rest) = templateReplace (replace a b templ) rest + +-- main = template False "" "-" + +-- tester = template False "" "/home/jared/alpha/login.txt" addfile ./Useful.hs hunk ./Useful.hs 1 +module Useful where + +import List ( intersperse, tails ) +import Numeric ( readHex ) +import Char ( chr ) + +------------------------------- +-- Really useful functions + +hex2num :: (Num a) => String -> a +hex2num s = let (result, _):_ = readHex s in result + +toEnv s = let cleanup = replace "; " ";" s in map tuple (split ';' cleanup) + +tuple :: String -> (String, String) +tuple line = case split '=' line of + a:b:_ -> (a,b) + a:_ -> (a,"") + _ -> ("","") -- not good, probably won't happen... + +tabtuple :: String -> (String, String) +tabtuple line = case split '\t' line of + a:b:_ -> (a,b) + a:_ -> (a,"") + _ -> ("","") -- not good, probably won't happen... + +------------------------------- +-- Titles and tags +tagencode s = concat $ intersperse "_" $ map fix (words s) + where fix "&" = "N" + fix x = x + +tagdecode s = concat $ intersperse " " $ map unfix (split '_' s) + where unfix "N" = "&" + unfix x = x + +------------------------------- +-- Strings + +split :: Char -> String -> [String] +split _ "" = [] +split c s = let (l, s') = break (== c) s + in l : case s' of + [] -> [] + (_:s'') -> split c s'' + +beginsWith [] [] = True +beginsWith _ [] = True +beginsWith [] _ = False +beginsWith (a:aa) (b:bb) + | a == b = aa `beginsWith` bb + | otherwise = False + +dropping [] [] = [] +dropping [] _ = [] +dropping x [] = x +dropping s@(a:aa) (b:bb) | a == b = dropping aa bb + | otherwise = s + +-- replace all occurrences of 'this' with 'that' in the string 'str' +-- like Python replace +replace _ _ [] = [] +replace this that str + | str `beginsWith` this = let after = (str `dropping` this) + in that ++ replace this that after + | otherwise = + let x:xs = str + in x : replace this that xs + +eat s = replace s "" + +-- sometimes newlines get out of hand on the end of form POST submissions, +-- so trim all the end newlines and add a single newline +fixEndingNewlines = reverse . ('\n':) . dropWhile (=='\n') . reverse . filter (/= '\r') + +endsWith a b = beginsWith (reverse a) (reverse b) + +a `contains` b = any (`beginsWith` b) $ tails a addfile ./XHTML.hs hunk ./XHTML.hs 1 +module XHTML ( module XML, + ii, bb, tt, sup, -- make [XML] into italic, tt, sup, or bold XML + pp, h1, h2, h3, h4, -- turn [XML] into top-level HTML element + br, --
+ aa, -- target linkname --> XML link + lnk, -- target cs --> XML cs + img, -- src --> + imgSrcAlt, -- src alt --> alt + imgWith, -- ats --> + + hbody, -- ats cs --> cs + + showXHTML, + showXHTMLs, + + table1row, + rowize, + matrix2table, + matrix2tableClass ) where + +import XML +import Maybe +import PPrint + +-- markuppers +ii, bb, tt :: [XML] -> XML +ii = around "i" +bb = around "b" +tt = around "tt" +sup = around "sup" + +-- different kinds of paragraphs +pre, pp, h1, h2, h3, h4 :: [XML] -> XML +pp = around "p" +h1 = around "h1" +h2 = around "h2" +h3 = around "h3" +h4 = around "h4" +pre = around "pre" + +-- links and images +aa :: String -> String -> XML +aa target linkname = target `lnk` [txt linkname] + +lnk :: String -> [XML] -> XML +lnk target cs = elm "a" ["href"-->target] cs + +br :: XML +br = "br" `around` [] + +img :: String -> XML +img s = imgWith ["src"-->s] + +imgSrcAlt :: String -> String -> XML +imgSrcAlt s a = imgWith ["src"-->s, "alt"-->a] + +imgWith :: [XML.Att] -> XML +imgWith ats = "img" `with` src : alt : border : ats' + where ats' = ats `without` ["src", "border", "alt"] + alt = defaulter "alt" (rhs src) + src = defaulter "src" "no source" + border = defaulter "border" "0" + defaulter k v = k --> (fromMaybe v (getAtt k ats)) -- only if doesn't exist in ats + without origAts removelabels = filter (\x -> (lhs x) `notElem` removelabels) origAts + +rhs (Att _ rh) = rh +lhs (Att lh _) = lh + +getAtt :: String -> [Att] -> Maybe String +getAtt attName list = listToMaybe (map rhs (filter matcher list)) + where matcher (Att key val) = key==attName + +rowize :: [XMLs] -> XML +rowize r = "tr" `around` tds + where tds = (map (\x->"td" `around` x) r) + +table1row :: XMLs -> XML +table1row rs = "table" `around` [rowize (map (:[]) rs)] + +matrix2table :: [[XMLs]] -> XML +matrix2table rs = "table" `around` (map rowize rs) + +matrix2tableClass :: String -> [[XMLs]] -> XML +matrix2tableClass c rs = elm "table" ["class"-->c] (map rowize rs) + +hbody ats cs = elm "body" ats cs + +-- because HTML //IS ACTUALLY// space/indentation/newline sensitive!!!! +-- adding 'optional' whitespace/indentation/newlines between raw text and a +-- tag can sometimes have different meaning +-- e.g. this text +-- ---this means that we have to be tight where there's raw +-- text nearby, but we can loosen up and make a more indented +-- nicer shape everywhere else + +isTxt (Text _) = True +isTxt _ = False + +rawEnough (Text _) = True +rawEnough (Comm _ _ _) = True +rawEnough (Elm "textarea" _ _) = True +rawEnough (Elm _ _ cs) = or $ map isTxt cs +rawEnough (Hole x) = error $ "Holes shouldn't get through! ==> " ++ show x + +xhtmls2doc :: [XML] -> Doc +xhtmls2doc xs = glueHard $ map xhtml2doc xs + + +xhtml2doc x | rawEnough x = tightXML2doc x + | otherwise = looseXML2doc x + +tightXML2doc :: XML -> Doc +tightXML2doc e@(Comm l ss r) = xml2doc e --text l <> align (vsep ((map text ss) )) <> text r +--this should stay as xml2doc (not XHTML2doc) because this happened to loop forever... + +tightXML2doc (Text " ") = softline +tightXML2doc (Text "\n") = softline +tightXML2doc (Text t) = text t + +-- textarea problems... +tightXML2doc (Elm n@("textarea") ats [Text s]) = + text"<"<>text n<> attrs2doc ats<> text ">"<> text s <>text" text n <> text ">" + +-- hacks for IE sucking... (problems in the element) +-- +tightXML2doc (Elm n@("script") ats []) + = text"<"<>text n<> attrs2doc ats<> text"> text n <> text ">" +-- same for link +tightXML2doc (Elm n@("link") ats []) = + text"<"<>text n<> attrs2doc ats<> text"> text n <> text ">" + +tightXML2doc (Elm n ats []) = text"<"<>text n<> attrs2doc ats<> text" />" <> softbreak + +-- change

...

arbitrary junk

to

...

where arbitrary junk gets thrown away +tightXML2doc e@(Elm "p" _ ((ee@(Elm "p" _ cs)):_)) = wrap ee guts + where guts = (glue $ map xhtml2doc cs) -- <> linebreak + +-- change

...
arbitrary junk

to
...
where arbitrary junk gets thrown away +tightXML2doc e@(Elm "p" _ ((ee@(Elm "form" _ cs)):_)) = wrap ee guts + where guts = (glue $ map xhtml2doc cs) -- <> linebreak +-- TODO was xml2doc --^ here + + +tightXML2doc e@(Elm _ _ cs) = wrap e guts + where guts = (glue $ map xhtml2doc cs) -- <> linebreak + + +looseXML2doc (Text _) = error "txt node is never considered loose" +looseXML2doc e@(Comm l ss r) = xhtml2doc e --text l <+> align (vsep ((map text ss) ++ [text r])) +-- TODO was xhtml2doc --^ here + +looseXML2doc e@(Elm _ _ []) = tightXML2doc e + +-- change

...
arbitrary junk

to
...
where arbitrary junk gets thrown away +looseXML2doc e@(Elm "p" _ ((ee@(Elm "form" _ cs)):_)) = wrap ee (linebreak <> looseXMLs2doc cs) + +looseXML2doc e@(Elm _ _ cs) = wrap e (linebreak <> looseXMLs2doc cs) + +looseXMLs2doc :: [XML] -> Doc +looseXMLs2doc xs = glueHard $ map xhtml2doc xs + + +showXHTMLs w d = displayS (renderPretty 1.0 w (xhtmls2doc d)) "" + +showXHTML w d = displayS (renderPretty 1.0 w (xhtml2doc d)) "" + +--showXHTMLs w d = displayS (renderPretty 1.0 w (xhtmls2doc d)) "" + +--showXML w d = displayS (renderPretty 1.0 w (xhtml2doc d)) "" +--showXMLs w d = displayS (renderPretty 1.0 w (xhtmls2doc d)) "" + addfile ./XML.hs hunk ./XML.hs 1 +{-# OPTIONS_GHC -fglasgow-exts #-} + +module XML where --(XML, Att, intersperse, txt, elm, with, around, comment, (-->), showXML, showXMLs) where + +import List ( intersperse ) +import Data.Generics +import PPrint + +import HTMLEnt ( htmlEntityEncode ) + +-- ADT for XML + +data XML = Elm String [Att] [XML] + | Text String + | Comm String [String] String + | Hole String -- where this string is the name of the variable to interpolate + deriving (Eq, Show, Data, Typeable) +data Att = Att String String + deriving (Eq, Show, Data, Typeable) + +type XMLs = [XML] + +-- constructors and helper functions + +infixr 0 `with` +infix 6 --> + +hole = Hole +txt = Text +elm = Elm +n `with` attrs = elm n attrs [] +n `around` contents = elm n [] contents +x --> y | x == "value" = Att x y -- yet another hack... so that "  Search  " + -- stays the same, instead of "&amp; Search &amp;" + | otherwise = Att x (htmlEntityEncode y) +comment ss = Comm "" + +{- + +=== Example usage: + +< xbutton = "input" `with` ["name"-->"btnG", "id"-->"image", +< "type"-->"image", "src"-->"search.png", "maxlength"-->"256"] +< xinput = "input" `with` ["size"-->"60", "id"-->"field", +< "name"-->"q", "type"-->"text"] + +which gives + +" + +and + +" + +when used with ""showXML xbutton"" and ""showXML xinput"" + +-} + +-- insert a full blank line +x <$$$> y = x <$> empty <$> y + +glue xs = foldr (<>) empty xs +glueHard xs = foldr (<$$>) empty (map (hang 2) xs) + +wrap (Elm n ats _) guts = ((text"<"<>text n<> attrs2doc ats<> text">") <> -- <>linebreak<> + guts <> + (text"text n<>text">")) +wrap _ _ = error "only use wrap for tags with contents" + +sep `myjoin` ss = concat $ intersperse sep ss +nlJoin = myjoin "\n" + +xmls2doc :: [XML] -> Doc +xmls2doc xs = glueHard $ map xml2doc xs + +xml2doc :: XML -> Doc +xml2doc (Comm l ss r) = text l <> align (vsep ((map text ss) ++ [text r])) +--text l <$> text (nlJoin ss) <$> text r + +xml2doc (Text " ") = softline +xml2doc (Text "\n") = softline +xml2doc (Text t) = text t +xml2doc (Elm n ats []) = text"<"<>text n<> attrs2doc ats<> text" />" <> softbreak + +xml2doc e@(Elm _ _ cs) = wrap e guts + where guts = (glue $ map xml2doc cs) -- <> linebreak + + +{--((text"<"<>text n<> attrs2doc ats<> text">") <> -- <>linebreak<> + guts <> + (text"text n<>text">")) -- <> softbreak -} + + +attrs2doc [] = empty +attrs2doc ats = space <> hcat (intersperse space (map attr2doc ats)) +attr2doc (Att k v) = text k <>equals<> quote v + where quote s = dquotes $ text s + +--showXML w d = displayS (renderPretty 1.0 w (xml2doc d)) "" +--showXMLs w d = displayS (renderPretty 1.0 w (xmls2doc d)) "" + addfile ./blah.hs hunk ./blah.hs 1 +import Time + +noTimeDiff = TimeDiff { tdYear = 0, tdMonth = 0, tdDay = 0, tdHour = 0, tdMin = 0, tdSec = 0, tdPicosec = 0 } + +dif = noTimeDiff { tdHour = -8 } + +adder ct = addToClockTime dif ct addfile ./g hunk ./g 1 +#!/bin/bash + +ghc --make index -o index.cgi +#strip index.cgi +chown :web index.cgi +cp index.cgi /home/htdocs + +# && upx -9 ~/alpha/index.cgi + addfile ./index.hs hunk ./index.hs 1 +module Main ( main ) where + +import System.IO +import System.Environment +import System.Directory +import Network.URI (unEscapeString) +import List (unionBy, union, sortBy, sort) +import Control.Monad (liftM) +import System.Time +import System.Locale +import Char (toLower, toUpper) + +import Doc (strip) +import Template (template) +import MD5 (md5) + +import Useful -- really useful functions ( split, tuple, replace, etc. ) + +contenttype = "Content-Type: text/html; charset=utf-8" +httpheader = contenttype ++ "\n\n" + +------------------------------- +-- Cookies and login + +getKey key cookieStr = + let cookieEnv = toEnv cookieStr + in case lookup key cookieEnv of + Nothing -> "" + Just val -> val + +getsecretcode environ = + case lookup "REMOTE_ADDR" environ of + Nothing -> "" + Just ipaddr -> playwith ipaddr + +playwith s = let { + ; hashonce = md5 s -- hash once with md5 + ; digit, digit2 :: Int + ; digit = hex2num [head hashonce] -- get numeric value of first hex digit + ; digit2 = hex2num [hashonce !! digit] -- goto index 'digit' in the string + } in hashonce --(iterate md5 hashonce) !! digit2 -- hash 'digit2' times more + +isLoggedIn environ = + case lookup "HTTP_COOKIE" environ of + Nothing -> False + Just cookieStr -> getKey "LoggedIn" cookieStr == getsecretcode environ -- something here is wrong? this is bad + +-- end Cookies and login +---------------------------- + +parseQueryString qstr = case qstr of + "" -> [("page", "Home"), ("action", "view")] + qstr -> result + where result | '=' `elem` qstr = map tuple (split '&' qstr) + | otherwise = [("page", qstr), ("action", "view")] + +parseCnts cs = map tuple (split '&' cs) + +--------------- +-- Note: this needs a trailing slash on the end! +--root = "/Users/jared/Documents/UpdikeOrg/" +--root = "/Library/WebServer/CGI-Executables/" +--root = "/Users/jared/Documents/WebData/" +--root = "/Library/WebServer/Documents/WebData/" +--root = "/home/htdocs/webdata/" +root = "/home/protected/webdata/" +--------------- + +procValidate env = do + let username = dlookup "" "username" env + let password = dlookup "" "password" env + let page = dlookup "Home" "page" env + let file = dlookup "" "file" env + let ip = dlookup "" "ip" env + ps <- myreadFile "/home/protected/passwd" + let userPasswdsEnv = map tuple (lines ps) + let realPasswd = dlookup "" username userPasswdsEnv + let valid = password == realPasswd + + if valid + then let secretcode = playwith ip in + if secretcode == "" then do + let message = "Could not give your browser a cookie because \ + \this server could not uniquely identify your computer." + template False message page root file + else do + putStrLn$ "Set-Cookie: LoggedIn="++ secretcode ++"; Max-Age=360000; Version=1; Path=/" + putStr httpheader + template True "Log in successful." page root file + else do + putStr httpheader + template False "Access denied." page root file + +basehandler ioerr = do + putStr httpheader + putStrLn $ "
IO ERROR: "++ show ioerr + +writehandler ioerr = do + putStrLn "
WRITE " + basehandler ioerr + return () + +fxandler ioerr = do + putStrLn "
FILE EXISTS " + basehandler ioerr + return () + +readhandler ioerr = do + putStrLn "
READ " + basehandler ioerr + return "" + +procView env = do +{- + putStr httpheader + + let page = dlookup "Home" "page" env + --let loggedin = read $ dlookup "False" "loggedin" env + let file = dlookup "" "file" env + let f = (root++page++".txt") + fileExists <- doesFileExist f `catch` fxhandler + if fileExists + then putStrLn$ "File "++f++" exists" + else putStrLn$ "File "++f++" doesn't exist" ---- Apache says the file (/Users/jared/Documents/UpdikeOrg/Home.txt) doesn't exist, + ---- but when I run it as me it says it does exist + str <- myreadFile f + putStrLn str +-} + + let page = dlookup "Home" "page" env + let loggedin = read $ dlookup "False" "loggedin" env + let file = dlookup "" "file" env + fileExists <- doesFileExist (root++page++".txt") + putStr httpheader + if fileExists + then do tagstr <- myreadFile (root++page++".tags") + let hidden = or $ map (=="hidden") $ lines tagstr + if hidden && not loggedin + then template loggedin "You don't have permission to access this page." "Home" root (root++"Home.txt") + else template loggedin "" page root file + else if loggedin + then template loggedin ("Page `"++ page ++"' doesn't exist but you can add it.") page root (root++"add.txt") + else template loggedin ("Page "++ page ++" doesn't exist. Login to add it.") "Home" root (root++"login.txt") `catch` basehandler + + +procLogin env = do + let page = dlookup "Home" "page" env + let loggedin = read $ dlookup "False" "loggedin" env + let file = dlookup "" "file" env + putStr httpheader + if loggedin + then template loggedin "You are already logged in." page root file + else template loggedin "" page root (root++"login.txt") + +procLogout env = do + let page = dlookup "Home" "page" env + let file = dlookup "" "file" env + putStrLn "Set-Cookie: LoggedIn=rutabaga; Max-Age=0; Version=1; Path=/" + putStr httpheader + fileExists <- doesFileExist (root++page++".txt") + if fileExists + then template False "Logged out." page root file + else template False "Logged out." "Home" root file + +procEdit env = do + let page = dlookup "Home" "page" env + let loggedin = read $ dlookup "False" "loggedin" env + let file = dlookup "" "file" env + putStr httpheader + fileExists <- doesFileExist (root++page++".txt") + if not loggedin + then if fileExists + then template loggedin "You are not logged in." page root (root++"login.txt") + else template loggedin ("Page "++ page ++" doesn't exist. Login to add it.") "Home" root (root++"login.txt") + else if fileExists + then template loggedin "" page root (root++"edit.txt") + else template loggedin "" "Home" root (root++"Home.txt") + +procRename env = do + let page = dlookup "Home" "page" env + let loggedin = read $ dlookup "False" "loggedin" env + let file = dlookup "" "file" env + putStr httpheader + fileExists <- doesFileExist (root++page++".txt") + if not loggedin + then if fileExists + then template loggedin "You are not logged in." page root (root++"login.txt") + else template loggedin ("Page "++ page ++" doesn't exist. Login to add it.") "Home" root (root++"login.txt") + else if fileExists + then template loggedin "" page root (root++"rename.txt") + else template loggedin ("Page "++ page ++" doesn't exist so you can't rename it.") "Home" root (root++"Home.txt") + +procAdd env = do + let page = dlookup "Home" "page" env + let loggedin = read $ dlookup "False" "loggedin" env + let file = dlookup "" "file" env + let datestr = dlookup "untitled" "datestr" env + putStr httpheader + fileExists <- doesFileExist (root++page++".txt") + if not loggedin + then if fileExists + then template loggedin "You are not logged in." page root (root++"login.txt") + else template loggedin ("Page "++ page ++" doesn't exist. Login to add it.") "Home" root (root++"login.txt") + else template loggedin "" datestr root (root++"add.txt") + +lowCmp x y = map toLower x `compare` map toLower y + +updateTagsWith root tags = do + allstr <- myreadFile (root++"tags.txt") + let alltags = lines allstr + let newtags = sortBy lowCmp $ union tags alltags + mywriteFile (root++"tags.tmp") $ unlines newtags + removeFile (root++"tags.txt") + renameFile (root++"tags.tmp") (root++"tags.txt") + +mywriteFile a b = writeFile a b `catch` writehandler +myreadFile x = do s <- readFile x `catch` readhandler + return s + +setFolder folder page = if length pieces > 1 then page else folder++"/"++page + where pieces = split '/' page + +procModify env = do + let pageraw = unEscapeString $ dlookup "Home" "page" env + let newCnts = fixEndingNewlines $ unEscapeString $ replace "+" " " $ dlookup "" "content" env + let tagsbox = unEscapeString $ replace "\n" "" $ replace "+" " " $ dlookup "" "tagsbox" env + let loggedin = read $ dlookup "False" "loggedin" env + let file = dlookup "" "file" env + let checktags = map (tagdecode . drop 4 . fst) $ filter (\(x,y)-> x `beginsWith` "TAG_" && y=="on") env + let tags = sortBy lowCmp $ union checktags (filter (/="") $ map strip $ split ',' tagsbox) + let page = if "link" `elem` tags then setFolder "links" pageraw else pageraw + putStr httpheader + if not loggedin + then template loggedin "You are not logged in." page root (root++"login.txt") + else if newCnts == "" then template loggedin "Nothing to change." page root (root++"login.txt") + else do + mywriteFile (root++page++".txt") newCnts + mywriteFile (root++page++".tags") $ unlines tags + updateTagsWith root tags + --putStrLn newCnts + template loggedin "Page updated." page root file + +procMovepage env = do + let page = dlookup "Home" "page" env + let newpagename = dlookup "" "newpagename" env + let loggedin = read $ dlookup "False" "loggedin" env + + let oldfile = unEscapeString $ dlookup "" "file" env + let newfile = unEscapeString $ root++newpagename++".txt" + + let oldtagsfile = unEscapeString $ replace ".txt" ".tags" oldfile + let newtagsfile = unEscapeString $ root++newpagename++".tags" + + putStr httpheader + if not loggedin + then template loggedin "You are not logged in." page root (root++"login.txt") + else do renameFile oldfile newfile + renameFile oldtagsfile newtagsfile + template loggedin ("Page "++page++" renamed to "++newpagename++".") newpagename root (root++newpagename++".txt") + +--------------- + +upperfirst [] = [] +upperfirst (x:xs) = toUpper x : xs + +firstThatExists [] _ = return Nothing +firstThatExists (s:ss) root = do + exists <- doesFileExist (root++s) + if exists then return$ Just s else + firstThatExists ss root + +findBestMatch page = do + let blahPlus ext = page ++ ext + let lblahPlus ext = map toLower page ++ ext + let ublahPlus ext = (upperfirst . map toLower) page ++ ext + firstThatExists [ blahPlus ".txt" + , blahPlus ".lhs" + , blahPlus ".hs" + , lblahPlus ".txt" + , lblahPlus ".lhs" + , lblahPlus ".hs" + , ublahPlus ".txt" + , ublahPlus ".lhs" + , ublahPlus ".hs" + ] root + +dlookup d k env = + case lookup k env of + Nothing -> d + Just a -> a + +actionsEnv = [("view", procView), ("login", procLogin), ("validate", procValidate), + ("logout", procLogout), ("edit", procEdit), ("modify", procModify), ("add", procAdd), + ("rename", procRename), ("movepage", procMovepage)] + +ff osEnviron = case lookup "HTTP_COOKIE" osEnviron of + Nothing -> [] + Just cookieStr -> toEnv cookieStr + +main = do + -- handle get and post input as well as cookies + cnts <- getContents -- POST method + mywriteFile "/home/protected/POST.log" cnts + -- let cnts = "" + osEnviron <- getEnvironment + let qstr = unEscapeString $ dlookup "" "QUERY_STRING" osEnviron -- GET method + let ip = dlookup "" "REMOTE_ADDR" osEnviron + let loggedin = ("loggedin", show $ isLoggedIn osEnviron) +{- + putStrLn httpheader + putStrLn $ show osEnviron + + let k = ff osEnviron + putStrLn $ "----------
" ++ show k +-} + {- + time <- getClockTime + ctime <- toCalendarTime time + let CalendarTime { ctTZ = gimme } = ctime + let datestr = show gimme + --let datestr = formatCalendarTime defaultTimeLocale "%Y%m%d%H%M%S" ctime + -} + + let env' = ("ip", ip) : loggedin : + unionBy (\x y -> fst x == fst y) (parseCnts cnts) (parseQueryString qstr) + + let curpage = dlookup "Home" "page" env' + mBest <- findBestMatch curpage + let (bestFname,newpage) = case mBest of + Nothing -> ("Home.txt", curpage) + Just s -> (s, takeWhile (/= '.') s) + let env = ("page", newpage):("file", root++bestFname) : filter (\x -> fst x /= "page") env' + + let actionName = dlookup "view" "action" env + case lookup actionName actionsEnv of + Nothing -> procView env -- default to 'view' if given some invalid action + Just action -> action env + }