项目作者: hrbrmstr

项目描述 :
:card_index: Extract contents from Outlook '.msg' files in R
高级语言: C
项目地址: git://github.com/hrbrmstr/msgxtractr.git
创建时间: 2017-08-23T11:23:49Z
项目社区:https://github.com/hrbrmstr/msgxtractr

开源协议:

下载


Build
Status
AppVeyor Build
Status
codecov

msgxtractr : Read Outlook ‘.msg’ Files

‘Microsoft’ ‘Outlook’ messages can be saved in ‘.msg’ files. Tools are
provided that enable extraction of metadata, envelope, headers, body and
attachments from these files.

The following functions are implemented:

  • read_msg: Read in an Outlook ‘.msg’ file
  • save_attachments: Save all attachments from a ‘msg’ object
  • tidy_msg: Turn a ‘msg’ object into a ‘tibble’

Installation

  1. devtools::install_github("hrbrmstr/msgxtractr")

Usage

  1. library(msgxtractr)
  2. # current version
  3. packageVersion("msgxtractr")
  1. ## [1] '0.3.0'
  1. str(msg1 <- read_msg(system.file("extdata/unicode.msg", package="msgxtractr")))
  1. ## List of 8
  2. ## $ headers : tibble [1 × 18] (S3: tbl_df/tbl/data.frame)
  3. ## ..$ Return-path : chr "<brizhou@gmail.com>"
  4. ## ..$ Received :List of 1
  5. ## .. ..$ : chr [1:4] "from st11p00mm-smtpin007.mac.com ([17.172.84.240])\nby ms06561.mac.com (Oracle Communications Messaging Server "| __truncated__ "from mail-vc0-f182.google.com ([209.85.220.182])\nby st11p00mm-smtpin007.mac.com\n(Oracle Communications Messag"| __truncated__ "by mail-vc0-f182.google.com with SMTP id ie18so3484487vcb.13 for\n<brianzhou@me.com>; Mon, 18 Nov 2013 00:26:25 -0800 (PST)" "by 10.58.207.196 with HTTP; Mon, 18 Nov 2013 00:26:24 -0800 (PST)"
  6. ## ..$ Original-recipient : chr "rfc822;brianzhou@me.com"
  7. ## ..$ Received-SPF : chr "pass (st11p00mm-smtpin006.mac.com: domain of brizhou@gmail.com\ndesignates 209.85.220.182 as permitted sender)\"| __truncated__
  8. ## ..$ DKIM-Signature : chr "v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com;\ns=20120113; h=mime-version:date:message-id:subject:f"| __truncated__
  9. ## ..$ MIME-version : chr "1.0"
  10. ## ..$ X-Received : chr "by 10.221.47.193 with SMTP id ut1mr14470624vcb.8.1384763184960;\nMon, 18 Nov 2013 00:26:24 -0800 (PST)"
  11. ## ..$ Date : chr "Mon, 18 Nov 2013 10:26:24 +0200"
  12. ## ..$ Message-id : chr "<CADtJ4eNjQSkGcBtVteCiTF+YFG89+AcHxK3QZ=-Mt48xygkvdQ@mail.gmail.com>"
  13. ## ..$ Subject : chr "Test for TIF files"
  14. ## ..$ From : chr "Brian Zhou <brizhou@gmail.com>"
  15. ## ..$ To : chr "brianzhou@me.com"
  16. ## ..$ Cc : chr "Brian Zhou <brizhou@gmail.com>"
  17. ## ..$ Content-type : chr "multipart/mixed; boundary=001a113392ecbd7a5404eb6f4d6a"
  18. ## ..$ Authentication-results : chr "st11p00mm-smtpin007.mac.com; dkim=pass\nreason=\"2048-bit key\" header.d=gmail.com header.i=@gmail.com\nheader."| __truncated__
  19. ## ..$ x-icloud-spam-score : chr "33322\nf=gmail.com;e=gmail.com;pp=ham;spf=pass;dkim=pass;wl=absent;pwl=absent"
  20. ## ..$ X-Proofpoint-Virus-Version: chr "vendor=fsecure\nengine=2.50.10432:5.10.8794,1.0.14,0.0.0000\ndefinitions=2013-11-18_02:2013-11-18,2013-11-17,19"| __truncated__
  21. ## ..$ X-Proofpoint-Spam-Details : chr "rule=notspam policy=default score=0 spamscore=0\nsuspectscore=0 phishscore=0 bulkscore=0 adultscore=0 classifie"| __truncated__
  22. ## $ sender :List of 2
  23. ## ..$ sender_email: chr "brizhou@gmail.com"
  24. ## ..$ sender_name : chr "Brian Zhou"
  25. ## $ recipients :List of 2
  26. ## ..$ :List of 3
  27. ## .. ..$ display_name : NULL
  28. ## .. ..$ address_type : chr "SMTP"
  29. ## .. ..$ email_address: chr "brianzhou@me.com"
  30. ## ..$ :List of 3
  31. ## .. ..$ display_name : NULL
  32. ## .. ..$ address_type : chr "SMTP"
  33. ## .. ..$ email_address: chr "brizhou@gmail.com"
  34. ## $ subject : chr "Test for TIF files"
  35. ## $ body :List of 2
  36. ## ..$ text: chr "This is a test email to experiment with the MS Outlook MSG Extractor\r\n\r\n\r\n-- \r\n\r\n\r\nKind regards\r\n"| __truncated__
  37. ## ..$ html: NULL
  38. ## $ attachments :List of 2
  39. ## ..$ :List of 4
  40. ## .. ..$ filename : chr "importOl.tif"
  41. ## .. ..$ long_filename: chr "import OleFileIO.tif"
  42. ## .. ..$ mime : chr "image/tiff"
  43. ## .. ..$ content : raw [1:969674] 49 49 2a 00 ...
  44. ## ..$ :List of 4
  45. ## .. ..$ filename : chr "raisedva.tif"
  46. ## .. ..$ long_filename: chr "raised value error.tif"
  47. ## .. ..$ mime : chr "image/tiff"
  48. ## .. ..$ content : raw [1:1033142] 49 49 2a 00 ...
  49. ## $ display_envelope:List of 2
  50. ## ..$ display_cc: chr "Brian Zhou"
  51. ## ..$ display_to: chr "brianzhou@me.com"
  52. ## $ times :List of 3
  53. ## ..$ creation_time: NULL
  54. ## ..$ last_mod_time: NULL
  55. ## ..$ last_mod_name: NULL
  56. ## - attr(*, "class")= chr "msg"
  1. print(msg1)
  1. ## Mon, 18 Nov 2013 10:26:24 +0200
  2. ## From: Brian Zhou <brizhou@gmail.com>
  3. ## To: brianzhou@me.com
  4. ## Subject: Test for TIF files
  5. ## Attachments: 2
  1. str(msg2 <- read_msg(system.file("extdata/TestMessage-ansi.msg", package="msgxtractr")))
  1. ## List of 8
  2. ## $ headers : NULL
  3. ## $ sender : list()
  4. ## $ recipients :List of 3
  5. ## ..$ :List of 3
  6. ## .. ..$ display_name : NULL
  7. ## .. ..$ address_type : NULL
  8. ## .. ..$ email_address: NULL
  9. ## ..$ :List of 3
  10. ## .. ..$ display_name : NULL
  11. ## .. ..$ address_type : NULL
  12. ## .. ..$ email_address: NULL
  13. ## ..$ :List of 3
  14. ## .. ..$ display_name : NULL
  15. ## .. ..$ address_type : NULL
  16. ## .. ..$ email_address: NULL
  17. ## $ subject : NULL
  18. ## $ body :List of 2
  19. ## ..$ text: NULL
  20. ## ..$ html: NULL
  21. ## $ attachments :List of 1
  22. ## ..$ :List of 4
  23. ## .. ..$ filename : NULL
  24. ## .. ..$ long_filename: NULL
  25. ## .. ..$ mime : NULL
  26. ## .. ..$ content : raw [1:10934] 50 4b 03 04 ...
  27. ## $ display_envelope: list()
  28. ## $ times :List of 3
  29. ## ..$ creation_time: NULL
  30. ## ..$ last_mod_time: NULL
  31. ## ..$ last_mod_name: NULL
  32. ## - attr(*, "class")= chr "msg"
  1. str(msg3 <- read_msg(system.file("extdata/TestMessage-default.msg", package="msgxtractr")))
  1. ## List of 8
  2. ## $ headers : NULL
  3. ## $ sender :List of 2
  4. ## ..$ sender_email: chr "sender@example.com"
  5. ## ..$ sender_name : chr "Sender"
  6. ## $ recipients :List of 3
  7. ## ..$ :List of 3
  8. ## .. ..$ display_name : NULL
  9. ## .. ..$ address_type : chr "SMTP"
  10. ## .. ..$ email_address: chr "recipient1@example.com"
  11. ## ..$ :List of 3
  12. ## .. ..$ display_name : NULL
  13. ## .. ..$ address_type : chr "SMTP"
  14. ## .. ..$ email_address: chr "cc1@example.com"
  15. ## ..$ :List of 3
  16. ## .. ..$ display_name : NULL
  17. ## .. ..$ address_type : chr "SMTP"
  18. ## .. ..$ email_address: chr "recipient2@example.com"
  19. ## $ subject : chr "New Message!"
  20. ## $ body :List of 2
  21. ## ..$ text: chr "This is some bold html!"
  22. ## ..$ html: chr "<HTML><HEAD>\r\n<META content=\"text/html; charset=UTF-8\" http-equiv=Content-Type>\r\n<META name=GENERATOR con"| __truncated__
  23. ## $ attachments :List of 1
  24. ## ..$ :List of 4
  25. ## .. ..$ filename : chr "TestAttachment1.xlsx"
  26. ## .. ..$ long_filename: chr "TestAttachment1.xlsx"
  27. ## .. ..$ mime : NULL
  28. ## .. ..$ content : raw [1:10934] 50 4b 03 04 ...
  29. ## $ display_envelope:List of 2
  30. ## ..$ display_cc: chr "CC1"
  31. ## ..$ display_to: chr "Recipient 1; Recipient 2"
  32. ## $ times :List of 3
  33. ## ..$ creation_time: NULL
  34. ## ..$ last_mod_time: NULL
  35. ## ..$ last_mod_name: NULL
  36. ## - attr(*, "class")= chr "msg"
  1. str(msg4 <- read_msg(system.file("extdata/TestMessage-unicode.msg", package="msgxtractr")))
  1. ## List of 8
  2. ## $ headers : NULL
  3. ## $ sender :List of 2
  4. ## ..$ sender_email: chr "sender@example.com"
  5. ## ..$ sender_name : chr "Sender"
  6. ## $ recipients :List of 3
  7. ## ..$ :List of 3
  8. ## .. ..$ display_name : NULL
  9. ## .. ..$ address_type : chr "SMTP"
  10. ## .. ..$ email_address: chr "recipient1@example.com"
  11. ## ..$ :List of 3
  12. ## .. ..$ display_name : NULL
  13. ## .. ..$ address_type : chr "SMTP"
  14. ## .. ..$ email_address: chr "cc1@example.com"
  15. ## ..$ :List of 3
  16. ## .. ..$ display_name : NULL
  17. ## .. ..$ address_type : chr "SMTP"
  18. ## .. ..$ email_address: chr "recipient2@example.com"
  19. ## $ subject : chr "New Message!"
  20. ## $ body :List of 2
  21. ## ..$ text: chr "This is some bold html!"
  22. ## ..$ html: chr "<HTML><HEAD>\r\n<META content=\"text/html; charset=UTF-8\" http-equiv=Content-Type>\r\n<META name=GENERATOR con"| __truncated__
  23. ## $ attachments :List of 1
  24. ## ..$ :List of 4
  25. ## .. ..$ filename : chr "TestAttachment1.xlsx"
  26. ## .. ..$ long_filename: chr "TestAttachment1.xlsx"
  27. ## .. ..$ mime : NULL
  28. ## .. ..$ content : raw [1:10934] 50 4b 03 04 ...
  29. ## $ display_envelope:List of 2
  30. ## ..$ display_cc: chr "CC1"
  31. ## ..$ display_to: chr "Recipient 1; Recipient 2"
  32. ## $ times :List of 3
  33. ## ..$ creation_time: NULL
  34. ## ..$ last_mod_time: NULL
  35. ## ..$ last_mod_name: NULL
  36. ## - attr(*, "class")= chr "msg"
  1. str(tidy_msg(msg1), 2)
  1. ## tibble [1 × 8] (S3: tbl_df/tbl/data.frame)
  2. ## $ headers :List of 1
  3. ## $ sender :List of 1
  4. ## $ recipients :List of 1
  5. ## $ subject : chr "Test for TIF files"
  6. ## $ body :List of 1
  7. ## $ attachments :List of 1
  8. ## $ display_envelope:List of 1
  9. ## $ times :List of 1
  1. str(tidy_msg(msg2), 2)
  1. ## tibble [1 × 4] (S3: tbl_df/tbl/data.frame)
  2. ## $ recipients :List of 1
  3. ## $ body :List of 1
  4. ## $ attachments:List of 1
  5. ## $ times :List of 1
  1. str(tidy_msg(msg3), 2)
  1. ## tibble [1 × 7] (S3: tbl_df/tbl/data.frame)
  2. ## $ sender :List of 1
  3. ## $ recipients :List of 1
  4. ## $ subject : chr "New Message!"
  5. ## $ body :List of 1
  6. ## $ attachments :List of 1
  7. ## $ display_envelope:List of 1
  8. ## $ times :List of 1
  1. str(tidy_msg(msg4), 2)
  1. ## tibble [1 × 7] (S3: tbl_df/tbl/data.frame)
  2. ## $ sender :List of 1
  3. ## $ recipients :List of 1
  4. ## $ subject : chr "New Message!"
  5. ## $ body :List of 1
  6. ## $ attachments :List of 1
  7. ## $ display_envelope:List of 1
  8. ## $ times :List of 1

Code of Conduct

Please note that this project is released with a Contributor Code of
Conduct
. By participating in this project you agree to
abide by its terms.