Script: Convert RTF to MultiMarkdown

pete31 · April 25, 2020, 6:19am

The version above is old!

This script converts RTF to MultiMarkdown.

It needs Pandoc and RegexAndStuffLib installed (put the “RegexAndStuffLib” script in /Users/Username/Library/Script Libraries/).

There’s an option to remove empty lines that pandoc produces (removing unwanted lines is not ideal but couldn’t find the option in pandoc to avoid them …). If the resulting markdown record in unrendered view doesn’t look similar to the rtf record try again with removeEmptyLines set to false.

Make sure to uncomment / add all properties you’d like the markdown record to take over from the rtf.

-- Convert RTF to MultiMarkdown (via textutil and pandoc)
-- This script needs Pandoc (https://pandoc.org/installing.html) and RegexAndStuffLib (https://latenightsw.com/support/freeware/) installed.
-- It does not support RTFD

use scripting additions
use script "RegexAndStuffLib" version "1.0.6"

property removeEmptyLines : true

tell application id "DNtp"
	try
		set windowClass to class of window 1
		if {viewer window, search window} contains windowClass then
			set currentRecord_s to selection of window 1
		else if windowClass = document window then
			set currentRecord_s to content record of window 1 as list
		end if
		
		set theOutputGroup to display group selector
		
		set displaySuffix to do shell script "defaults read com.devon-technologies.think3 DisplaySuffix"
		
		show progress indicator "Converting... " steps (count of currentRecord_s) with cancel button
		
		repeat with thisRecord in currentRecord_s
			if type of thisRecord = rtf then
				try
					if displaySuffix = 0 then
						set theName to name of thisRecord
					else
						set theName to my basename(name of thisRecord)
					end if
					
					step progress indicator theName
					
					if theName contains "/" then
						set theName to my encode_Text(theName, true, true) -- encode in case the name contains e.g. an url
						set encodedName to true
					else
						set encodedName to false
					end if
					
					set thePath to path of thisRecord
					set theOutputPath to (POSIX path of (path to temporary items folder) & theName & ".md") as string
					
					set convertToMultiMarkdown to do shell script "textutil " & quoted form of thePath & " -convert html -stdout | /usr/local/bin/pandoc -f html-native_divs-native_spans -t markdown_mmd --wrap=preserve -o " & quoted form of theOutputPath
					
					set newRecord to indicate theOutputPath to theOutputGroup
					consolidate record newRecord
					
					tell application "Finder" to delete file (POSIX file theOutputPath as alias)
					
					tell newRecord
						set URL to (URL of thisRecord)
						set comment to (comment of thisRecord)
						#set creation date to (creation date of thisRecord)
						#set addition date to (addition date of thisRecord)
						#set modification date to (modification date of thisRecord)
						
						set theText to plain text
						set firstLine to paragraph 1 in theText
						
						if firstLine contains ":" then
							set escapedFirstLine to regex change firstLine search pattern (":") replace template ("\\\\:")
							set escapedText_List to ((escapedFirstLine as list) & paragraphs 2 thru -1 in theText) as list
							set escapedText to my string_From_List(escapedText_List, linefeed)
							set plain text to escapedText
							set theText to plain text
						end if
						
						if removeEmptyLines = true then
							set cleanText_1 to regex change theText search pattern ("\\n\\n") replace template (space & space & linefeed)
							set cleanText_2 to regex change cleanText_1 search pattern ("^ +$") replace template ("")
							set plain text to cleanText_2
						end if
						
						if encodedName = true then
							set name to my decode_Text(name)
						end if
					end tell
					
				on error
					set label of thisRecord to 1
				end try
			end if
		end repeat
		
		hide progress indicator
		
		open window for record theOutputGroup
		activate
		
	on error error_message number error_number
		hide progress indicator
		if the error_number is not -128 then display alert "DEVONthink" message error_message as warning
		return
	end try
end tell

on basename(filename)
	set revName to reverse of characters of filename as string
	set revNameWithoutExtension to characters ((character offset of "." in revName) + 1) thru -1 in revName as string
	set theBasename to reverse of characters of revNameWithoutExtension as string
end basename

on encode_Text(theText, encodeCommonSpecialCharacters, encodeExtendedSpecialCharacters)
	set theStandardCharacters to "abcdefghijklmnopqrstuvwxyz0123456789"
	set theCommonSpecialCharacterList to "$+!'/?;&@=#%><{}\"~`^\\|*"
	set theExtendedSpecialCharacterList to ".-_:"
	set theAcceptableCharacters to theStandardCharacters
	if encodeCommonSpecialCharacters is false then set theAcceptableCharacters to theAcceptableCharacters & theCommonSpecialCharacterList
	if encodeExtendedSpecialCharacters is false then set theAcceptableCharacters to theAcceptableCharacters & theExtendedSpecialCharacterList
	set theEncodedText to ""
	repeat with theCurrentCharacter in theText
		if theCurrentCharacter is in theAcceptableCharacters then
			set theEncodedText to (theEncodedText & theCurrentCharacter)
		else
			set theEncodedText to (theEncodedText & encodeCharacter(theCurrentCharacter)) as string
		end if
	end repeat
	return theEncodedText
end encode_Text

on encodeCharacter(theCharacter)
	set theASCIINumber to (the ASCII number theCharacter)
	set theHexList to {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F"}
	set theFirstItem to item ((theASCIINumber div 16) + 1) of theHexList
	set theSecondItem to item ((theASCIINumber mod 16) + 1) of theHexList
	return ("%" & theFirstItem & theSecondItem) as string
end encodeCharacter

on decode_Text(theText)
	local str
	try
		return (do shell script "/bin/echo " & quoted form of theText & ¬
			" | perl -MURI::Escape -lne 'print uri_unescape($_)'")
	on error eMsg number eNum
		error "Can't urlDecode: " & eMsg number eNum
	end try
end decode_Text

on string_From_List(theList, theDelimiter)
	set theString to ""
	set theCount to 0
	
	repeat with thisItem in theList
		set theCount to theCount + 1
		set thisItem to thisItem as string
		if theCount ≠ (count of theList) then
			set theString to theString & thisItem & theDelimiter
		else
			set theString to theString & thisItem
		end if
	end repeat
	
	return theString
end string_From_List