1 {-| 2 3 Parsers for standard ledger and timelog files. 4 5 -} 6 7 module Ledger.Parse 8 where 9 import Prelude hiding (readFile, putStr, print) 10 import Control.Monad 11 import Control.Monad.Error 12 import Text.ParserCombinators.Parsec 13 import Text.ParserCombinators.Parsec.Char 14 import Text.ParserCombinators.Parsec.Language 15 import Text.ParserCombinators.Parsec.Combinator 16 import qualified Text.ParserCombinators.Parsec.Token as P 17 import System.Directory 18 import System.IO.UTF8 19 import System.IO (stdin) 20 import qualified Data.Map as Map 21 import Data.Time.LocalTime 22 import Data.Time.Calendar 23 import Ledger.Utils 24 import Ledger.Types 25 import Ledger.Dates 26 import Ledger.AccountName 27 import Ledger.Amount 28 import Ledger.LedgerTransaction 29 import Ledger.Posting 30 import Ledger.Commodity 31 import Ledger.TimeLog 32 import Ledger.RawLedger 33 import System.FilePath(takeDirectory,combine) 34 35 36 -- utils 37 38 -- | Some context kept during parsing. 39 data LedgerFileCtx = Ctx { 40 ctxYear :: !(Maybe Integer) -- ^ the default year most recently specified with Y 41 , ctxCommod :: !(Maybe String) -- ^ I don't know 42 , ctxAccount :: ![String] -- ^ the current stack of "container" accounts specified by !account 43 } deriving (Read, Show) 44 45 emptyCtx :: LedgerFileCtx 46 emptyCtx = Ctx { ctxYear = Nothing, ctxCommod = Nothing, ctxAccount = [] } 47 48 -- containing accounts "nest" hierarchically 49 50 pushParentAccount :: String -> GenParser tok LedgerFileCtx () 51 pushParentAccount parent = updateState addParentAccount 52 where addParentAccount ctx0 = ctx0 { ctxAccount = normalize parent : ctxAccount ctx0 } 53 normalize = (++ ":") 54 55 popParentAccount :: GenParser tok LedgerFileCtx () 56 popParentAccount = do ctx0 <- getState 57 case ctxAccount ctx0 of 58 [] -> unexpected "End of account block with no beginning" 59 (_:rest) -> setState $ ctx0 { ctxAccount = rest } 60 61 getParentAccount :: GenParser tok LedgerFileCtx String 62 getParentAccount = liftM (concat . reverse . ctxAccount) getState 63 64 setYear :: Integer -> GenParser tok LedgerFileCtx () 65 setYear y = updateState (\ctx -> ctx{ctxYear=Just y}) 66 67 getYear :: GenParser tok LedgerFileCtx (Maybe Integer) 68 getYear = liftM ctxYear getState 69 70 printParseError :: (Show a) => a -> IO () 71 printParseError e = do putStr "ledger parse error at "; print e 72 73 -- let's get to it 74 75 parseLedgerFile :: LocalTime -> FilePath -> ErrorT String IO RawLedger 76 parseLedgerFile t "-" = liftIO (hGetContents stdin) >>= parseLedger t "-" 77 parseLedgerFile t f = liftIO (readFile f) >>= parseLedger t f 78 79 -- | Parses the contents of a ledger file, or gives an error. Requires 80 -- the current (local) time to calculate any unfinished timelog sessions, 81 -- we pass it in for repeatability. 82 parseLedger :: LocalTime -> FilePath -> String -> ErrorT String IO RawLedger 83 parseLedger reftime inname intxt = do 84 case runParser ledgerFile emptyCtx inname intxt of 85 Right m -> liftM (rawLedgerConvertTimeLog reftime) $ m `ap` (return rawLedgerEmpty) 86 Left err -> throwError $ show err 87 88 89 ledgerFile :: GenParser Char LedgerFileCtx (ErrorT String IO (RawLedger -> RawLedger)) 90 ledgerFile = do items <- many ledgerItem 91 eof 92 return $ liftM (foldr (.) id) $ sequence items 93 where 94 -- As all ledger line types can be distinguished by the first 95 -- character, excepting transactions versus empty (blank or 96 -- comment-only) lines, can use choice w/o try 97 ledgerItem = choice [ ledgerDirective 98 , liftM (return . addLedgerTransaction) ledgerTransaction 99 , liftM (return . addModifierTransaction) ledgerModifierTransaction 100 , liftM (return . addPeriodicTransaction) ledgerPeriodicTransaction 101 , liftM (return . addHistoricalPrice) ledgerHistoricalPrice 102 , ledgerDefaultYear 103 , emptyLine >> return (return id) 104 , liftM (return . addTimeLogEntry) timelogentry 105 ] 106 107 ledgerDirective :: GenParser Char LedgerFileCtx (ErrorT String IO (RawLedger -> RawLedger)) 108 ledgerDirective = do char '!' <?> "directive" 109 directive <- many nonspace 110 case directive of 111 "include" -> ledgerInclude 112 "account" -> ledgerAccountBegin 113 "end" -> ledgerAccountEnd 114 115 ledgerInclude :: GenParser Char LedgerFileCtx (ErrorT String IO (RawLedger -> RawLedger)) 116 ledgerInclude = do many1 spacenonewline 117 filename <- restofline 118 outerState <- getState 119 outerPos <- getPosition 120 let inIncluded = show outerPos ++ " in included file " ++ show filename ++ ":\n" 121 return $ do contents <- expandPath outerPos filename >>= readFileE outerPos 122 case runParser ledgerFile outerState filename contents of 123 Right l -> l `catchError` (\err -> throwError $ inIncluded ++ err) 124 Left perr -> throwError $ inIncluded ++ show perr 125 where readFileE outerPos filename = ErrorT $ do (liftM Right $ readFile filename) `catch` leftError 126 where leftError err = return $ Left $ currentPos ++ whileReading ++ show err 127 currentPos = show outerPos 128 whileReading = " reading " ++ show filename ++ ":\n" 129 130 expandPath :: (MonadIO m) => SourcePos -> FilePath -> m FilePath 131 expandPath pos fp = liftM mkRelative (expandHome fp) 132 where 133 mkRelative = combine (takeDirectory (sourceName pos)) 134 expandHome inname | "~/" `isPrefixOf` inname = do homedir <- liftIO getHomeDirectory 135 return $ homedir ++ drop 1 inname 136 | otherwise = return inname 137 138 ledgerAccountBegin :: GenParser Char LedgerFileCtx (ErrorT String IO (RawLedger -> RawLedger)) 139 ledgerAccountBegin = do many1 spacenonewline 140 parent <- ledgeraccountname 141 newline 142 pushParentAccount parent 143 return $ return id 144 145 ledgerAccountEnd :: GenParser Char LedgerFileCtx (ErrorT String IO (RawLedger -> RawLedger)) 146 ledgerAccountEnd = popParentAccount >> return (return id) 147 148 -- parsers 149 150 -- | Parse a RawLedger from either a ledger file or a timelog file. 151 -- It tries first the timelog parser then the ledger parser; this means 152 -- parse errors for ledgers are useful while those for timelogs are not. 153 154 {-| Parse a ledger file. Here is the ledger grammar from the ledger 2.5 manual: 155 156 @ 157 The ledger file format is quite simple, but also very flexible. It supports 158 many options, though typically the user can ignore most of them. They are 159 summarized below. The initial character of each line determines what the 160 line means, and how it should be interpreted. Allowable initial characters 161 are: 162 163 NUMBER A line beginning with a number denotes an entry. It may be followed by any 164 number of lines, each beginning with whitespace, to denote the entry’s account 165 transactions. The format of the first line is: 166 167 DATE[=EDATE] [*|!] [(CODE)] DESC 168 169 If ‘*’ appears after the date (with optional effective date), it indicates the entry 170 is “cleared”, which can mean whatever the user wants it t omean. If ‘!’ appears 171 after the date, it indicates d the entry is “pending”; i.e., tentatively cleared from 172 the user’s point of view, but not yet actually cleared. If a ‘CODE’ appears in 173 parentheses, it may be used to indicate a check number, or the type of the 174 transaction. Following these is the payee, or a description of the transaction. 175 The format of each following transaction is: 176 177 ACCOUNT AMOUNT [; NOTE] 178 179 The ‘ACCOUNT’ may be surrounded by parentheses if it is a virtual 180 transactions, or square brackets if it is a virtual transactions that must 181 balance. The ‘AMOUNT’ can be followed by a per-unit transaction cost, 182 by specifying ‘ AMOUNT’, or a complete transaction cost with ‘\@ AMOUNT’. 183 Lastly, the ‘NOTE’ may specify an actual and/or effective date for the 184 transaction by using the syntax ‘[ACTUAL_DATE]’ or ‘[=EFFECTIVE_DATE]’ or 185 ‘[ACTUAL_DATE=EFFECtIVE_DATE]’. 186 187 = An automated entry. A value expression must appear after the equal sign. 188 After this initial line there should be a set of one or more transactions, just as 189 if it were normal entry. If the amounts of the transactions have no commodity, 190 they will be applied as modifiers to whichever real transaction is matched by 191 the value expression. 192 193 ~ A period entry. A period expression must appear after the tilde. 194 After this initial line there should be a set of one or more transactions, just as 195 if it were normal entry. 196 197 ! A line beginning with an exclamation mark denotes a command directive. It 198 must be immediately followed by the command word. The supported commands 199 are: 200 201 ‘!include’ 202 Include the stated ledger file. 203 ‘!account’ 204 The account name is given is taken to be the parent of all transac- 205 tions that follow, until ‘!end’ is seen. 206 ‘!end’ Ends an account block. 207 208 ; A line beginning with a colon indicates a comment, and is ignored. 209 210 Y If a line begins with a capital Y, it denotes the year used for all subsequent 211 entries that give a date without a year. The year should appear immediately 212 after the Y, for example: ‘Y2004’. This is useful at the beginning of a file, to 213 specify the year for that file. If all entries specify a year, however, this command 214 has no effect. 215 216 217 P Specifies a historical price for a commodity. These are usually found in a pricing 218 history file (see the ‘-Q’ option). The syntax is: 219 220 P DATE SYMBOL PRICE 221 222 N SYMBOL Indicates that pricing information is to be ignored for a given symbol, nor will 223 quotes ever be downloaded for that symbol. Useful with a home currency, such 224 as the dollar ($). It is recommended that these pricing options be set in the price 225 database file, which defaults to ‘~/.pricedb’. The syntax for this command is: 226 227 N SYMBOL 228 229 230 D AMOUNT Specifies the default commodity to use, by specifying an amount in the expected 231 format. The entry command will use this commodity as the default when none 232 other can be determined. This command may be used multiple times, to set 233 the default flags for different commodities; whichever is seen last is used as the 234 default commodity. For example, to set US dollars as the default commodity, 235 while also setting the thousands flag and decimal flag for that commodity, use: 236 237 D $1,000.00 238 239 C AMOUNT1 = AMOUNT2 240 Specifies a commodity conversion, where the first amount is given to be equiv- 241 alent to the second amount. The first amount should use the decimal precision 242 desired during reporting: 243 244 C 1.00 Kb = 1024 bytes 245 246 i, o, b, h 247 These four relate to timeclock support, which permits ledger to read timelog 248 files. See the timeclock’s documentation for more info on the syntax of its 249 timelog files. 250 @ 251 252 See "Tests" for sample data. 253 -} 254 255 emptyLine :: GenParser Char st () 256 emptyLine = do many spacenonewline 257 optional $ (char ';' <?> "comment") >> many (noneOf "\n") 258 newline 259 return () 260 261 ledgercomment :: GenParser Char st String 262 ledgercomment = 263 try (do 264 char ';' 265 many spacenonewline 266 many (noneOf "\n") 267 ) 268 <|> return "" <?> "comment" 269 270 ledgerModifierTransaction :: GenParser Char LedgerFileCtx ModifierTransaction 271 ledgerModifierTransaction = do 272 char '=' <?> "modifier transaction" 273 many spacenonewline 274 valueexpr <- restofline 275 postings <- ledgerpostings 276 return $ ModifierTransaction valueexpr postings 277 278 ledgerPeriodicTransaction :: GenParser Char LedgerFileCtx PeriodicTransaction 279 ledgerPeriodicTransaction = do 280 char '~' <?> "periodic transaction" 281 many spacenonewline 282 periodexpr <- restofline 283 postings <- ledgerpostings 284 return $ PeriodicTransaction periodexpr postings 285 286 ledgerHistoricalPrice :: GenParser Char LedgerFileCtx HistoricalPrice 287 ledgerHistoricalPrice = do 288 char 'P' <?> "historical price" 289 many spacenonewline 290 date <- ledgerdate 291 many spacenonewline 292 symbol1 <- commoditysymbol 293 many spacenonewline 294 (Mixed [Amount c price pri]) <- someamount 295 restofline 296 return $ HistoricalPrice date symbol1 (symbol c) price 297 298 -- like ledgerAccountBegin, updates the LedgerFileCtx 299 ledgerDefaultYear :: GenParser Char LedgerFileCtx (ErrorT String IO (RawLedger -> RawLedger)) 300 ledgerDefaultYear = do 301 char 'Y' <?> "default year" 302 many spacenonewline 303 y <- many1 digit 304 let y' = read y 305 guard (y' >= 1000) 306 setYear y' 307 return $ return id 308 309 -- | Try to parse a ledger entry. If we successfully parse an entry, ensure it is balanced, 310 -- and if we cannot, raise an error. 311 ledgerTransaction :: GenParser Char LedgerFileCtx LedgerTransaction 312 ledgerTransaction = do 313 date <- ledgerdate <?> "transaction" 314 status <- ledgerstatus 315 code <- ledgercode 316 description <- liftM rstrip (many1 (noneOf ";\n") <?> "description") 317 comment <- ledgercomment 318 restofline 319 postings <- ledgerpostings 320 let t = LedgerTransaction date status code description comment postings "" 321 case balanceLedgerTransaction t of 322 Right t' -> return t' 323 Left err -> error err 324 325 ledgerdate :: GenParser Char LedgerFileCtx Day 326 ledgerdate = try ledgerfulldate <|> ledgerpartialdate 327 328 ledgerfulldate :: GenParser Char LedgerFileCtx Day 329 ledgerfulldate = do 330 (y,m,d) <- ymd 331 many spacenonewline 332 return $ fromGregorian (read y) (read m) (read d) 333 334 -- | Match a partial M/D date in a ledger. Warning, this terminates the 335 -- program if it finds a match when there is no default year specified. 336 ledgerpartialdate :: GenParser Char LedgerFileCtx Day 337 ledgerpartialdate = do 338 (_,m,d) <- md 339 many spacenonewline 340 y <- getYear 341 when (y==Nothing) $ error "partial date found, but no default year specified" 342 return $ fromGregorian (fromJust y) (read m) (read d) 343 344 ledgerdatetime :: GenParser Char LedgerFileCtx LocalTime 345 ledgerdatetime = do 346 day <- ledgerdate 347 h <- many1 digit 348 char ':' 349 m <- many1 digit 350 s <- optionMaybe $ do 351 char ':' 352 many1 digit 353 many spacenonewline 354 let tod = TimeOfDay (read h) (read m) (maybe 0 (fromIntegral.read) s) 355 return $ LocalTime day tod 356 357 ledgerstatus :: GenParser Char st Bool 358 ledgerstatus = try (do { char '*' <?> "status"; many1 spacenonewline; return True } ) <|> return False 359 360 ledgercode :: GenParser Char st String 361 ledgercode = try (do { char '(' <?> "code"; code <- anyChar `manyTill` char ')'; many1 spacenonewline; return code } ) <|> return "" 362 363 ledgerpostings :: GenParser Char LedgerFileCtx [Posting] 364 ledgerpostings = many1 $ try ledgerposting 365 366 ledgerposting :: GenParser Char LedgerFileCtx Posting 367 ledgerposting = do 368 many1 spacenonewline 369 status <- ledgerstatus 370 account <- transactionaccountname 371 let (ptype, account') = (postingTypeFromAccountName account, unbracket account) 372 amount <- postingamount 373 many spacenonewline 374 comment <- ledgercomment 375 restofline 376 parent <- getParentAccount 377 return (Posting status account' amount comment ptype) 378 379 -- Qualify with the parent account from parsing context 380 transactionaccountname :: GenParser Char LedgerFileCtx AccountName 381 transactionaccountname = liftM2 (++) getParentAccount ledgeraccountname 382 383 -- | account names may have single spaces inside them, and are terminated by two or more spaces 384 ledgeraccountname :: GenParser Char st String 385 ledgeraccountname = do 386 accountname <- many1 (nonspace <|> singlespace) 387 return $ striptrailingspace accountname 388 where 389 singlespace = try (do {spacenonewline; do {notFollowedBy spacenonewline; return ' '}}) 390 -- couldn't avoid consuming a final space sometimes, harmless 391 striptrailingspace s = if last s == ' ' then init s else s 392 393 -- accountnamechar = notFollowedBy (oneOf "()[]") >> nonspace 394 -- <?> "account name character (non-bracket, non-parenthesis, non-whitespace)" 395 396 postingamount :: GenParser Char st MixedAmount 397 postingamount = 398 try (do 399 many1 spacenonewline 400 a <- someamount <|> return missingamt 401 return a 402 ) <|> return missingamt 403 404 someamount :: GenParser Char st MixedAmount 405 someamount = try leftsymbolamount <|> try rightsymbolamount <|> nosymbolamount 406 407 leftsymbolamount :: GenParser Char st MixedAmount 408 leftsymbolamount = do 409 sym <- commoditysymbol 410 sp <- many spacenonewline 411 (q,p,comma) <- amountquantity 412 pri <- priceamount 413 let c = Commodity {symbol=sym,side=L,spaced=not $ null sp,comma=comma,precision=p} 414 return $ Mixed [Amount c q pri] 415 <?> "left-symbol amount" 416 417 rightsymbolamount :: GenParser Char st MixedAmount 418 rightsymbolamount = do 419 (q,p,comma) <- amountquantity 420 sp <- many spacenonewline 421 sym <- commoditysymbol 422 pri <- priceamount 423 let c = Commodity {symbol=sym,side=R,spaced=not $ null sp,comma=comma,precision=p} 424 return $ Mixed [Amount c q pri] 425 <?> "right-symbol amount" 426 427 nosymbolamount :: GenParser Char st MixedAmount 428 nosymbolamount = do 429 (q,p,comma) <- amountquantity 430 pri <- priceamount 431 let c = Commodity {symbol="",side=L,spaced=False,comma=comma,precision=p} 432 return $ Mixed [Amount c q pri] 433 <?> "no-symbol amount" 434 435 commoditysymbol :: GenParser Char st String 436 commoditysymbol = many1 (noneOf "-.0123456789;\n ") <?> "commodity symbol" 437 438 priceamount :: GenParser Char st (Maybe MixedAmount) 439 priceamount = 440 try (do 441 many spacenonewline 442 char '@' 443 many spacenonewline 444 a <- someamount 445 return $ Just a 446 ) <|> return Nothing 447 448 -- gawd.. trying to parse a ledger number without error: 449 450 -- | parse a ledger-style numeric quantity and also return the number of 451 -- digits to the right of the decimal point and whether thousands are 452 -- separated by comma. 453 amountquantity :: GenParser Char st (Double, Int, Bool) 454 amountquantity = do 455 sign <- optionMaybe $ string "-" 456 (intwithcommas,frac) <- numberparts 457 let comma = ',' `elem` intwithcommas 458 let precision = length frac 459 -- read the actual value. We expect this read to never fail. 460 let int = filter (/= ',') intwithcommas 461 let int' = if null int then "0" else int 462 let frac' = if null frac then "0" else frac 463 let sign' = fromMaybe "" sign 464 let quantity = read $ sign'++int'++"."++frac' 465 return (quantity, precision, comma) 466 <?> "commodity quantity" 467 468 -- | parse the two strings of digits before and after a possible decimal 469 -- point. The integer part may contain commas, or either part may be 470 -- empty, or there may be no point. 471 numberparts :: GenParser Char st (String,String) 472 numberparts = numberpartsstartingwithdigit <|> numberpartsstartingwithpoint 473 474 numberpartsstartingwithdigit :: GenParser Char st (String,String) 475 numberpartsstartingwithdigit = do 476 let digitorcomma = digit <|> char ',' 477 first <- digit 478 rest <- many digitorcomma 479 frac <- try (do {char '.'; many digit >>= return}) <|> return "" 480 return (first:rest,frac) 481 482 numberpartsstartingwithpoint :: GenParser Char st (String,String) 483 numberpartsstartingwithpoint = do 484 char '.' 485 frac <- many1 digit 486 return ("",frac) 487 488 489 {-| Parse a timelog entry. Here is the timelog grammar from timeclock.el 2.6: 490 491 @ 492 A timelog contains data in the form of a single entry per line. 493 Each entry has the form: 494 495 CODE YYYY/MM/DD HH:MM:SS [COMMENT] 496 497 CODE is one of: b, h, i, o or O. COMMENT is optional when the code is 498 i, o or O. The meanings of the codes are: 499 500 b Set the current time balance, or \"time debt\". Useful when 501 archiving old log data, when a debt must be carried forward. 502 The COMMENT here is the number of seconds of debt. 503 504 h Set the required working time for the given day. This must 505 be the first entry for that day. The COMMENT in this case is 506 the number of hours in this workday. Floating point amounts 507 are allowed. 508 509 i Clock in. The COMMENT in this case should be the name of the 510 project worked on. 511 512 o Clock out. COMMENT is unnecessary, but can be used to provide 513 a description of how the period went, for example. 514 515 O Final clock out. Whatever project was being worked on, it is 516 now finished. Useful for creating summary reports. 517 @ 518 519 Example: 520 521 i 2007/03/10 12:26:00 hledger 522 o 2007/03/10 17:26:02 523 524 -} 525 timelogentry :: GenParser Char LedgerFileCtx TimeLogEntry 526 timelogentry = do 527 code <- oneOf "bhioO" 528 many1 spacenonewline 529 datetime <- ledgerdatetime 530 comment <- liftM2 (++) getParentAccount restofline 531 return $ TimeLogEntry (read [code]) datetime comment 532 533 534 -- misc parsing 535 536 -- | Parse a --display expression which is a simple date predicate, like 537 -- "d>[DATE]" or "d<=[DATE]", and return a transaction-matching predicate. 538 datedisplayexpr :: GenParser Char st (Transaction -> Bool) 539 datedisplayexpr = do 540 char 'd' 541 op <- compareop 542 char '[' 543 (y,m,d) <- smartdate 544 char ']' 545 let ltdate = parsedate $ printf "%04s/%02s/%02s" y m d 546 let matcher = \(Transaction{tdate=d}) -> 547 case op of 548 "<" -> d < ltdate 549 "<=" -> d <= ltdate 550 "=" -> d == ltdate 551 "==" -> d == ltdate -- just in case 552 ">=" -> d >= ltdate 553 ">" -> d > ltdate 554 return matcher 555 556 compareop = choice $ map (try . string) ["<=",">=","==","<","=",">"] 557