Thanks @chrillek for your help.
This is for now the final version of the script which:
- Adds the mail to DEVONthink
- Adds all PDF documents to DEVONthink - based on MIME-type
- Archives added mails based on the year the mail was received
- Handles duplicates in the selection (by not parsing/moving them)
- Takes ~1:20 min for 194 mails including 4 duplicates - removing the duplicate check does not reduce the runtime significantly
- Logs found duplicates and sanitises the log output
If you’re interested using this script:
-
Create a file at “~/Library/Scripts/Applications/Mail/Add mail and PDF attachments to DEVONthink.scpt” with the content given below.
-
Open Mail.app
-
Select mails
-
Run script from global script menu
-
Mails from selection which are left over might be duplicates by date sent, subject and sender
-- Import attachments of selected messages to DEVONthink.
-- Created by Christian Grunenberg on Fri May 18 2012.
-- Copyright (c) 2012-2020. All rights reserved.
use AppleScript version "2.4" -- Yosemite (10.10) or later
use scripting additions
property pNoSubjectString : "(no subject)"
property pMailboxAccount : "CHANGE ME"
-- property pArchiveMailbox : "Archive"
property pArchiveMailbox : "Archive.Test"
tell application "Mail"
try
tell application id "DNtp"
if not (exists current database) then error "No database is in use."
end tell
set theSelection to the selection
if the length of theSelection is less than 1 then error "One or more messages must be selected."
my createMailboxForThisYear(pMailboxAccount, pArchiveMailbox)
set messageList to {}
repeat with theMessage in theSelection
my parseMail(theMessage, messageList, pMailboxAccount, pArchiveMailbox)
end repeat
on error error_message number error_number
if error_number is not -128 then display alert "Mail" message error_message as warning
end try
end tell
on createMailboxForThisYear(theMailboxAccount, theArchiveMailbox)
tell application "Mail"
set theYear to (year of (get current date))
make new mailbox in account theMailboxAccount with properties {name:theArchiveMailbox & "/" & theYear}
end tell
end createMailboxForThisYear
on archiveMail(theMessage, theMailboxAccount, theArchiveMailbox)
tell application "Mail"
tell theMessage
set {theDateReceived, theDateSent} to {the date received, the date sent}
set theYear to (year of theDateReceived)
end tell
end tell
tell application "Mail"
set the read status of theMessage to true
set mailbox of theMessage to mailbox (theArchiveMailbox & "/" & theYear) of account theMailboxAccount
end tell
end archiveMail
on parseMail(theMessage, messageList, theMailboxAccount, theArchiveMailbox)
if my isUnique(theMessage, messageList) is false then return
tell application "Mail"
set theMailRecord to my importMail(theMessage)
repeat with theAttachment in mail attachments of theMessage
my parseAttachment(theAttachment, theMailRecord, theMessage)
end repeat
end tell
my archiveMail(theMessage, theMailboxAccount, theArchiveMailbox)
end parseMail
on importMail(theMessage)
tell application "Mail"
tell theMessage
set {theMessageId, theDateReceived, theDateSent, theSender, theSubject, theSource, theReadFlag} to {the id, the date received, the date sent, the sender, subject, the source, the read status}
end tell
end tell
if theSubject is equal to "" then set theSubject to pNoSubjectString
tell application id "DNtp"
set theRecord to create record with {name:theSubject & ".eml", type:unknown, creation date:theDateSent, modification date:theDateReceived, URL:theSender, source:(theSource as string), unread:(not theReadFlag)}
perform smart rule trigger import event record theRecord
set theMetaData to meta data of theRecord
try
set theAuthor to |kMDItemAuthors| of theMetaData
on error
set theAuthor to |kMDItemAuthorEmailAddresses| of theMetaData
end try
set theNewName to theAuthor & "-" & theMessageId & "-" & theSubject
set the name of theRecord to theNewName
end tell
return theRecord
end importMail
on parseAttachment(theAttachment, theMailRecord, theMessage)
tell application "Mail"
if not downloaded of theAttachment then return
set theFolder to (POSIX path of (path to temporary items))
set theFile to theFolder & (name of theAttachment)
tell theAttachment to save in theFile
if my isPDF(theFile) is false then return
my importAttachment(theFile, theMailRecord, theMessage)
end tell
end parseAttachment
on importAttachment(theFile, theMailRecord, theMessage)
tell application "Mail"
tell theMessage
set {theMessageId, theDateReceived, theDateSent, theSender, theSubject, theSource, theReadFlag} to {the id, the date received, the date sent, the sender, subject, the source, the read status}
end tell
end tell
tell application id "DNtp"
set theMetaData to meta data of theMailRecord
set theUrl to URL of theMailRecord
try
set theAuthor to |kMDItemAuthors| of theMetaData
on error
set theAuthor to |kMDItemAuthorEmailAddresses| of theMetaData
end try
set theAttachmentRecord to import theFile
set unread of theAttachmentRecord to (not theReadFlag)
set URL of theAttachmentRecord to theUrl
set theOldName to the (name of theAttachmentRecord)
set name of theAttachmentRecord to theAuthor & "-" & theMessageId & "-" & theSubject & "-" & theOldName
perform smart rule trigger import event record theAttachmentRecord
end tell
end importAttachment
on isUnique(theMessage, messageList)
tell application "Mail"
tell theMessage
set {theDateSent, theSender, theSubject} to {the date sent, the sender, subject}
end tell
end tell
set theCurrentMessageItem to (theSender & theSubject & theDateSent)
repeat with theMessageItem in messageList
if (contents of theMessageItem) is equal to theCurrentMessageItem then
log "[WARN] msg=\"Duplicate in selection detected\" sender=\"" & my sanitizeString(theSender) & "\"" & " date-sent=\"" & (the short date string of theDateSent) & " " & (the time string of theDateSent) & "\" subject=\"" & my sanitizeString(theSubject) & "\""
return false
end if
end repeat
set end of messageList to theCurrentMessageItem
return true
end isUnique
on isPDF(thePath)
set theMimeType to do shell script "file -b --mime-type " & quoted form of thePath
if theMimeType is "application/pdf" then
return true
end if
return false
end isPDF
on sanitizeString(theString)
set theCleanedString to do shell script "echo " & quoted form of theString & "|sed \"s/[^[:alnum:][:space:]._-]//g\""
return theCleanedString
end sanitizeString