1 {-|
    2 
    3 Parsers for standard ledger and timelog files.
    4 
    5 -}
    6 
    7 module Ledger.Parse
    8 where
    9 import Prelude hiding (readFile, putStr, print)
   10 import Control.Monad
   11 import Control.Monad.Error
   12 import Text.ParserCombinators.Parsec
   13 import Text.ParserCombinators.Parsec.Char
   14 import Text.ParserCombinators.Parsec.Language
   15 import Text.ParserCombinators.Parsec.Combinator
   16 import qualified Text.ParserCombinators.Parsec.Token as P
   17 import System.Directory
   18 import System.IO.UTF8
   19 import System.IO (stdin)
   20 import qualified Data.Map as Map
   21 import Data.Time.LocalTime
   22 import Data.Time.Calendar
   23 import Ledger.Utils
   24 import Ledger.Types
   25 import Ledger.Dates
   26 import Ledger.AccountName
   27 import Ledger.Amount
   28 import Ledger.LedgerTransaction
   29 import Ledger.Posting
   30 import Ledger.Commodity
   31 import Ledger.TimeLog
   32 import Ledger.RawLedger
   33 import System.FilePath(takeDirectory,combine)
   34 
   35 
   36 -- utils
   37 
   38 -- | Some context kept during parsing.
   39 data LedgerFileCtx = Ctx {
   40       ctxYear     :: !(Maybe Integer)  -- ^ the default year most recently specified with Y
   41     , ctxCommod   :: !(Maybe String)   -- ^ I don't know
   42     , ctxAccount  :: ![String]         -- ^ the current stack of "container" accounts specified by !account
   43     } deriving (Read, Show)
   44 
   45 emptyCtx :: LedgerFileCtx
   46 emptyCtx = Ctx { ctxYear = Nothing, ctxCommod = Nothing, ctxAccount = [] }
   47 
   48 -- containing accounts "nest" hierarchically
   49 
   50 pushParentAccount :: String -> GenParser tok LedgerFileCtx ()
   51 pushParentAccount parent = updateState addParentAccount
   52     where addParentAccount ctx0 = ctx0 { ctxAccount = normalize parent : ctxAccount ctx0 }
   53           normalize = (++ ":") 
   54 
   55 popParentAccount :: GenParser tok LedgerFileCtx ()
   56 popParentAccount = do ctx0 <- getState
   57                       case ctxAccount ctx0 of
   58                         [] -> unexpected "End of account block with no beginning"
   59                         (_:rest) -> setState $ ctx0 { ctxAccount = rest }
   60 
   61 getParentAccount :: GenParser tok LedgerFileCtx String
   62 getParentAccount = liftM (concat . reverse . ctxAccount) getState
   63 
   64 setYear :: Integer -> GenParser tok LedgerFileCtx ()
   65 setYear y = updateState (\ctx -> ctx{ctxYear=Just y})
   66 
   67 getYear :: GenParser tok LedgerFileCtx (Maybe Integer)
   68 getYear = liftM ctxYear getState
   69 
   70 printParseError :: (Show a) => a -> IO ()
   71 printParseError e = do putStr "ledger parse error at "; print e
   72 
   73 -- let's get to it
   74 
   75 parseLedgerFile :: LocalTime -> FilePath -> ErrorT String IO RawLedger
   76 parseLedgerFile t "-" = liftIO (hGetContents stdin) >>= parseLedger t "-"
   77 parseLedgerFile t f   = liftIO (readFile f) >>= parseLedger t f
   78 
   79 -- | Parses the contents of a ledger file, or gives an error.  Requires
   80 -- the current (local) time to calculate any unfinished timelog sessions,
   81 -- we pass it in for repeatability.
   82 parseLedger :: LocalTime -> FilePath -> String -> ErrorT String IO RawLedger
   83 parseLedger reftime inname intxt = do
   84   case runParser ledgerFile emptyCtx inname intxt of
   85     Right m  -> liftM (rawLedgerConvertTimeLog reftime) $ m `ap` (return rawLedgerEmpty)
   86     Left err -> throwError $ show err
   87 
   88 
   89 ledgerFile :: GenParser Char LedgerFileCtx (ErrorT String IO (RawLedger -> RawLedger))
   90 ledgerFile = do items <- many ledgerItem
   91                 eof
   92                 return $ liftM (foldr (.) id) $ sequence items
   93     where 
   94       -- As all ledger line types can be distinguished by the first
   95       -- character, excepting transactions versus empty (blank or
   96       -- comment-only) lines, can use choice w/o try
   97       ledgerItem = choice [ ledgerDirective
   98                           , liftM (return . addLedgerTransaction) ledgerTransaction
   99                           , liftM (return . addModifierTransaction) ledgerModifierTransaction
  100                           , liftM (return . addPeriodicTransaction) ledgerPeriodicTransaction
  101                           , liftM (return . addHistoricalPrice) ledgerHistoricalPrice
  102                           , ledgerDefaultYear
  103                           , emptyLine >> return (return id)
  104                           , liftM (return . addTimeLogEntry)  timelogentry
  105                           ]
  106 
  107 ledgerDirective :: GenParser Char LedgerFileCtx (ErrorT String IO (RawLedger -> RawLedger))
  108 ledgerDirective = do char '!' <?> "directive"
  109                      directive <- many nonspace
  110                      case directive of
  111                        "include" -> ledgerInclude
  112                        "account" -> ledgerAccountBegin
  113                        "end"     -> ledgerAccountEnd
  114 
  115 ledgerInclude :: GenParser Char LedgerFileCtx (ErrorT String IO (RawLedger -> RawLedger))
  116 ledgerInclude = do many1 spacenonewline
  117                    filename <- restofline
  118                    outerState <- getState
  119                    outerPos <- getPosition
  120                    let inIncluded = show outerPos ++ " in included file " ++ show filename ++ ":\n"
  121                    return $ do contents <- expandPath outerPos filename >>= readFileE outerPos
  122                                case runParser ledgerFile outerState filename contents of
  123                                  Right l   -> l `catchError` (\err -> throwError $ inIncluded ++ err)
  124                                  Left perr -> throwError $ inIncluded ++ show perr
  125     where readFileE outerPos filename = ErrorT $ do (liftM Right $ readFile filename) `catch` leftError
  126               where leftError err = return $ Left $ currentPos ++ whileReading ++ show err
  127                     currentPos = show outerPos
  128                     whileReading = " reading " ++ show filename ++ ":\n"
  129 
  130 expandPath :: (MonadIO m) => SourcePos -> FilePath -> m FilePath
  131 expandPath pos fp = liftM mkRelative (expandHome fp)
  132   where
  133     mkRelative = combine (takeDirectory (sourceName pos))
  134     expandHome inname | "~/" `isPrefixOf` inname = do homedir <- liftIO getHomeDirectory
  135                                                       return $ homedir ++ drop 1 inname
  136                       | otherwise                = return inname
  137 
  138 ledgerAccountBegin :: GenParser Char LedgerFileCtx (ErrorT String IO (RawLedger -> RawLedger))
  139 ledgerAccountBegin = do many1 spacenonewline
  140                         parent <- ledgeraccountname
  141                         newline
  142                         pushParentAccount parent
  143                         return $ return id
  144 
  145 ledgerAccountEnd :: GenParser Char LedgerFileCtx (ErrorT String IO (RawLedger -> RawLedger))
  146 ledgerAccountEnd = popParentAccount >> return (return id)
  147 
  148 -- parsers
  149 
  150 -- | Parse a RawLedger from either a ledger file or a timelog file.
  151 -- It tries first the timelog parser then the ledger parser; this means
  152 -- parse errors for ledgers are useful while those for timelogs are not.
  153 
  154 {-| Parse a ledger file. Here is the ledger grammar from the ledger 2.5 manual:
  155 
  156 @
  157 The ledger file format is quite simple, but also very flexible. It supports
  158 many options, though typically the user can ignore most of them. They are
  159 summarized below.  The initial character of each line determines what the
  160 line means, and how it should be interpreted. Allowable initial characters
  161 are:
  162 
  163 NUMBER      A line beginning with a number denotes an entry. It may be followed by any
  164             number of lines, each beginning with whitespace, to denote the entry’s account
  165             transactions. The format of the first line is:
  166 
  167                     DATE[=EDATE] [*|!] [(CODE)] DESC
  168 
  169             If ‘*’ appears after the date (with optional effective date), it indicates the entry
  170             is “cleared”, which can mean whatever the user wants it t omean. If ‘!’ appears
  171             after the date, it indicates d the entry is “pending”; i.e., tentatively cleared from
  172             the user’s point of view, but not yet actually cleared. If a ‘CODE’ appears in
  173             parentheses, it may be used to indicate a check number, or the type of the
  174             transaction. Following these is the payee, or a description of the transaction.
  175             The format of each following transaction is:
  176 
  177                       ACCOUNT     AMOUNT    [; NOTE]
  178 
  179             The ‘ACCOUNT’ may be surrounded by parentheses if it is a virtual
  180             transactions, or square brackets if it is a virtual transactions that must
  181             balance. The ‘AMOUNT’ can be followed by a per-unit transaction cost,
  182             by specifying ‘ AMOUNT’, or a complete transaction cost with ‘\@ AMOUNT’.
  183             Lastly, the ‘NOTE’ may specify an actual and/or effective date for the
  184             transaction by using the syntax ‘[ACTUAL_DATE]’ or ‘[=EFFECTIVE_DATE]’ or
  185             ‘[ACTUAL_DATE=EFFECtIVE_DATE]’.
  186 
  187 =           An automated entry. A value expression must appear after the equal sign.
  188             After this initial line there should be a set of one or more transactions, just as
  189             if it were normal entry. If the amounts of the transactions have no commodity,
  190             they will be applied as modifiers to whichever real transaction is matched by
  191             the value expression.
  192  
  193 ~           A period entry. A period expression must appear after the tilde.
  194             After this initial line there should be a set of one or more transactions, just as
  195             if it were normal entry.
  196 
  197 !           A line beginning with an exclamation mark denotes a command directive. It
  198             must be immediately followed by the command word. The supported commands
  199             are:
  200 
  201            ‘!include’
  202                         Include the stated ledger file.
  203            ‘!account’
  204                         The account name is given is taken to be the parent of all transac-
  205                         tions that follow, until ‘!end’ is seen.
  206            ‘!end’       Ends an account block.
  207  
  208 ;          A line beginning with a colon indicates a comment, and is ignored.
  209  
  210 Y          If a line begins with a capital Y, it denotes the year used for all subsequent
  211            entries that give a date without a year. The year should appear immediately
  212            after the Y, for example: ‘Y2004’. This is useful at the beginning of a file, to
  213            specify the year for that file. If all entries specify a year, however, this command
  214            has no effect.
  215            
  216  
  217 P          Specifies a historical price for a commodity. These are usually found in a pricing
  218            history file (see the ‘-Q’ option). The syntax is:
  219 
  220                   P DATE SYMBOL PRICE
  221 
  222 N SYMBOL   Indicates that pricing information is to be ignored for a given symbol, nor will
  223            quotes ever be downloaded for that symbol. Useful with a home currency, such
  224            as the dollar ($). It is recommended that these pricing options be set in the price
  225            database file, which defaults to ‘~/.pricedb’. The syntax for this command is:
  226 
  227                   N SYMBOL
  228 
  229         
  230 D AMOUNT   Specifies the default commodity to use, by specifying an amount in the expected
  231            format. The entry command will use this commodity as the default when none
  232            other can be determined. This command may be used multiple times, to set
  233            the default flags for different commodities; whichever is seen last is used as the
  234            default commodity. For example, to set US dollars as the default commodity,
  235            while also setting the thousands flag and decimal flag for that commodity, use:
  236 
  237                   D $1,000.00
  238 
  239 C AMOUNT1 = AMOUNT2
  240            Specifies a commodity conversion, where the first amount is given to be equiv-
  241            alent to the second amount. The first amount should use the decimal precision
  242            desired during reporting:
  243 
  244                   C 1.00 Kb = 1024 bytes
  245 
  246 i, o, b, h
  247            These four relate to timeclock support, which permits ledger to read timelog
  248            files. See the timeclock’s documentation for more info on the syntax of its
  249            timelog files.
  250 @
  251 
  252 See "Tests" for sample data.
  253 -}
  254 
  255 emptyLine :: GenParser Char st ()
  256 emptyLine = do many spacenonewline
  257                optional $ (char ';' <?> "comment") >> many (noneOf "\n")
  258                newline
  259                return ()
  260 
  261 ledgercomment :: GenParser Char st String
  262 ledgercomment = 
  263     try (do
  264           char ';'
  265           many spacenonewline
  266           many (noneOf "\n")
  267         ) 
  268     <|> return "" <?> "comment"
  269 
  270 ledgerModifierTransaction :: GenParser Char LedgerFileCtx ModifierTransaction
  271 ledgerModifierTransaction = do
  272   char '=' <?> "modifier transaction"
  273   many spacenonewline
  274   valueexpr <- restofline
  275   postings <- ledgerpostings
  276   return $ ModifierTransaction valueexpr postings
  277 
  278 ledgerPeriodicTransaction :: GenParser Char LedgerFileCtx PeriodicTransaction
  279 ledgerPeriodicTransaction = do
  280   char '~' <?> "periodic transaction"
  281   many spacenonewline
  282   periodexpr <- restofline
  283   postings <- ledgerpostings
  284   return $ PeriodicTransaction periodexpr postings
  285 
  286 ledgerHistoricalPrice :: GenParser Char LedgerFileCtx HistoricalPrice
  287 ledgerHistoricalPrice = do
  288   char 'P' <?> "historical price"
  289   many spacenonewline
  290   date <- ledgerdate
  291   many spacenonewline
  292   symbol1 <- commoditysymbol
  293   many spacenonewline
  294   (Mixed [Amount c price pri]) <- someamount
  295   restofline
  296   return $ HistoricalPrice date symbol1 (symbol c) price
  297 
  298 -- like ledgerAccountBegin, updates the LedgerFileCtx
  299 ledgerDefaultYear :: GenParser Char LedgerFileCtx (ErrorT String IO (RawLedger -> RawLedger))
  300 ledgerDefaultYear = do
  301   char 'Y' <?> "default year"
  302   many spacenonewline
  303   y <- many1 digit
  304   let y' = read y
  305   guard (y' >= 1000)
  306   setYear y'
  307   return $ return id
  308 
  309 -- | Try to parse a ledger entry. If we successfully parse an entry, ensure it is balanced,
  310 -- and if we cannot, raise an error.
  311 ledgerTransaction :: GenParser Char LedgerFileCtx LedgerTransaction
  312 ledgerTransaction = do
  313   date <- ledgerdate <?> "transaction"
  314   status <- ledgerstatus
  315   code <- ledgercode
  316   description <- liftM rstrip (many1 (noneOf ";\n") <?> "description")
  317   comment <- ledgercomment
  318   restofline
  319   postings <- ledgerpostings
  320   let t = LedgerTransaction date status code description comment postings ""
  321   case balanceLedgerTransaction t of
  322     Right t' -> return t'
  323     Left err -> error err
  324 
  325 ledgerdate :: GenParser Char LedgerFileCtx Day
  326 ledgerdate = try ledgerfulldate <|> ledgerpartialdate
  327 
  328 ledgerfulldate :: GenParser Char LedgerFileCtx Day
  329 ledgerfulldate = do
  330   (y,m,d) <- ymd
  331   many spacenonewline
  332   return $ fromGregorian (read y) (read m) (read d)
  333 
  334 -- | Match a partial M/D date in a ledger. Warning, this terminates the
  335 -- program if it finds a match when there is no default year specified.
  336 ledgerpartialdate :: GenParser Char LedgerFileCtx Day
  337 ledgerpartialdate = do
  338   (_,m,d) <- md
  339   many spacenonewline
  340   y <- getYear
  341   when (y==Nothing) $ error "partial date found, but no default year specified"
  342   return $ fromGregorian (fromJust y) (read m) (read d)
  343 
  344 ledgerdatetime :: GenParser Char LedgerFileCtx LocalTime
  345 ledgerdatetime = do 
  346   day <- ledgerdate
  347   h <- many1 digit
  348   char ':'
  349   m <- many1 digit
  350   s <- optionMaybe $ do
  351       char ':'
  352       many1 digit
  353   many spacenonewline
  354   let tod = TimeOfDay (read h) (read m) (maybe 0 (fromIntegral.read) s)
  355   return $ LocalTime day tod
  356 
  357 ledgerstatus :: GenParser Char st Bool
  358 ledgerstatus = try (do { char '*' <?> "status"; many1 spacenonewline; return True } ) <|> return False
  359 
  360 ledgercode :: GenParser Char st String
  361 ledgercode = try (do { char '(' <?> "code"; code <- anyChar `manyTill` char ')'; many1 spacenonewline; return code } ) <|> return ""
  362 
  363 ledgerpostings :: GenParser Char LedgerFileCtx [Posting]
  364 ledgerpostings = many1 $ try ledgerposting
  365 
  366 ledgerposting :: GenParser Char LedgerFileCtx Posting
  367 ledgerposting = do
  368   many1 spacenonewline
  369   status <- ledgerstatus
  370   account <- transactionaccountname
  371   let (ptype, account') = (postingTypeFromAccountName account, unbracket account)
  372   amount <- postingamount
  373   many spacenonewline
  374   comment <- ledgercomment
  375   restofline
  376   parent <- getParentAccount
  377   return (Posting status account' amount comment ptype)
  378 
  379 -- Qualify with the parent account from parsing context
  380 transactionaccountname :: GenParser Char LedgerFileCtx AccountName
  381 transactionaccountname = liftM2 (++) getParentAccount ledgeraccountname
  382 
  383 -- | account names may have single spaces inside them, and are terminated by two or more spaces
  384 ledgeraccountname :: GenParser Char st String
  385 ledgeraccountname = do
  386     accountname <- many1 (nonspace <|> singlespace)
  387     return $ striptrailingspace accountname
  388     where 
  389       singlespace = try (do {spacenonewline; do {notFollowedBy spacenonewline; return ' '}})
  390       -- couldn't avoid consuming a final space sometimes, harmless
  391       striptrailingspace s = if last s == ' ' then init s else s
  392 
  393 -- accountnamechar = notFollowedBy (oneOf "()[]") >> nonspace
  394 --     <?> "account name character (non-bracket, non-parenthesis, non-whitespace)"
  395 
  396 postingamount :: GenParser Char st MixedAmount
  397 postingamount =
  398   try (do
  399         many1 spacenonewline
  400         a <- someamount <|> return missingamt
  401         return a
  402       ) <|> return missingamt
  403 
  404 someamount :: GenParser Char st MixedAmount
  405 someamount = try leftsymbolamount <|> try rightsymbolamount <|> nosymbolamount 
  406 
  407 leftsymbolamount :: GenParser Char st MixedAmount
  408 leftsymbolamount = do
  409   sym <- commoditysymbol 
  410   sp <- many spacenonewline
  411   (q,p,comma) <- amountquantity
  412   pri <- priceamount
  413   let c = Commodity {symbol=sym,side=L,spaced=not $ null sp,comma=comma,precision=p}
  414   return $ Mixed [Amount c q pri]
  415   <?> "left-symbol amount"
  416 
  417 rightsymbolamount :: GenParser Char st MixedAmount
  418 rightsymbolamount = do
  419   (q,p,comma) <- amountquantity
  420   sp <- many spacenonewline
  421   sym <- commoditysymbol
  422   pri <- priceamount
  423   let c = Commodity {symbol=sym,side=R,spaced=not $ null sp,comma=comma,precision=p}
  424   return $ Mixed [Amount c q pri]
  425   <?> "right-symbol amount"
  426 
  427 nosymbolamount :: GenParser Char st MixedAmount
  428 nosymbolamount = do
  429   (q,p,comma) <- amountquantity
  430   pri <- priceamount
  431   let c = Commodity {symbol="",side=L,spaced=False,comma=comma,precision=p}
  432   return $ Mixed [Amount c q pri]
  433   <?> "no-symbol amount"
  434 
  435 commoditysymbol :: GenParser Char st String
  436 commoditysymbol = many1 (noneOf "-.0123456789;\n ") <?> "commodity symbol"
  437 
  438 priceamount :: GenParser Char st (Maybe MixedAmount)
  439 priceamount =
  440     try (do
  441           many spacenonewline
  442           char '@'
  443           many spacenonewline
  444           a <- someamount
  445           return $ Just a
  446           ) <|> return Nothing
  447 
  448 -- gawd.. trying to parse a ledger number without error:
  449 
  450 -- | parse a ledger-style numeric quantity and also return the number of
  451 -- digits to the right of the decimal point and whether thousands are
  452 -- separated by comma.
  453 amountquantity :: GenParser Char st (Double, Int, Bool)
  454 amountquantity = do
  455   sign <- optionMaybe $ string "-"
  456   (intwithcommas,frac) <- numberparts
  457   let comma = ',' `elem` intwithcommas
  458   let precision = length frac
  459   -- read the actual value. We expect this read to never fail.
  460   let int = filter (/= ',') intwithcommas
  461   let int' = if null int then "0" else int
  462   let frac' = if null frac then "0" else frac
  463   let sign' = fromMaybe "" sign
  464   let quantity = read $ sign'++int'++"."++frac'
  465   return (quantity, precision, comma)
  466   <?> "commodity quantity"
  467 
  468 -- | parse the two strings of digits before and after a possible decimal
  469 -- point.  The integer part may contain commas, or either part may be
  470 -- empty, or there may be no point.
  471 numberparts :: GenParser Char st (String,String)
  472 numberparts = numberpartsstartingwithdigit <|> numberpartsstartingwithpoint
  473 
  474 numberpartsstartingwithdigit :: GenParser Char st (String,String)
  475 numberpartsstartingwithdigit = do
  476   let digitorcomma = digit <|> char ','
  477   first <- digit
  478   rest <- many digitorcomma
  479   frac <- try (do {char '.'; many digit >>= return}) <|> return ""
  480   return (first:rest,frac)
  481                      
  482 numberpartsstartingwithpoint :: GenParser Char st (String,String)
  483 numberpartsstartingwithpoint = do
  484   char '.'
  485   frac <- many1 digit
  486   return ("",frac)
  487                      
  488 
  489 {-| Parse a timelog entry. Here is the timelog grammar from timeclock.el 2.6:
  490 
  491 @
  492 A timelog contains data in the form of a single entry per line.
  493 Each entry has the form:
  494 
  495   CODE YYYY/MM/DD HH:MM:SS [COMMENT]
  496 
  497 CODE is one of: b, h, i, o or O.  COMMENT is optional when the code is
  498 i, o or O.  The meanings of the codes are:
  499 
  500   b  Set the current time balance, or \"time debt\".  Useful when
  501      archiving old log data, when a debt must be carried forward.
  502      The COMMENT here is the number of seconds of debt.
  503 
  504   h  Set the required working time for the given day.  This must
  505      be the first entry for that day.  The COMMENT in this case is
  506      the number of hours in this workday.  Floating point amounts
  507      are allowed.
  508 
  509   i  Clock in.  The COMMENT in this case should be the name of the
  510      project worked on.
  511 
  512   o  Clock out.  COMMENT is unnecessary, but can be used to provide
  513      a description of how the period went, for example.
  514 
  515   O  Final clock out.  Whatever project was being worked on, it is
  516      now finished.  Useful for creating summary reports.
  517 @
  518 
  519 Example:
  520 
  521 i 2007/03/10 12:26:00 hledger
  522 o 2007/03/10 17:26:02
  523 
  524 -}
  525 timelogentry :: GenParser Char LedgerFileCtx TimeLogEntry
  526 timelogentry = do
  527   code <- oneOf "bhioO"
  528   many1 spacenonewline
  529   datetime <- ledgerdatetime
  530   comment <- liftM2 (++) getParentAccount restofline
  531   return $ TimeLogEntry (read [code]) datetime comment
  532 
  533 
  534 -- misc parsing
  535 
  536 -- | Parse a --display expression which is a simple date predicate, like
  537 -- "d>[DATE]" or "d<=[DATE]", and return a transaction-matching predicate.
  538 datedisplayexpr :: GenParser Char st (Transaction -> Bool)
  539 datedisplayexpr = do
  540   char 'd'
  541   op <- compareop
  542   char '['
  543   (y,m,d) <- smartdate
  544   char ']'
  545   let ltdate = parsedate $ printf "%04s/%02s/%02s" y m d
  546   let matcher = \(Transaction{tdate=d}) -> 
  547                   case op of
  548                     "<"  -> d <  ltdate
  549                     "<=" -> d <= ltdate
  550                     "="  -> d == ltdate
  551                     "==" -> d == ltdate -- just in case
  552                     ">=" -> d >= ltdate
  553                     ">"  -> d >  ltdate
  554   return matcher              
  555 
  556 compareop = choice $ map (try . string) ["<=",">=","==","<","=",">"]
  557