Adding Incoming Links to Markdown Notes in DEVONthink 3

Hiya! I’m a long time lurker, first time poster.

TL;DR: I created a AppleScript that adds incoming links to the bottom of a DEVONthink Markdown note. It’s loosely based on Bernardo_V’s work which can be found here.


I’m working to go all-in on using DEVONthink 3 (DT3) as my tool for persisting evergreen notes or slip-box notes. One of the things I really like about Andy’s notes is the “Links to this note” section he includes at the bottom of each note, which include a short summary of the note as well as the linked title.

Suffice it to say, I endeavored to mimic this sort of functionality in DT3 and the initial results are very good. Here’s a screen grab of my slip-box note that includes to auto-generated “Incoming Links” section:

In order to make this work, I opted to go with the wiki-style inter-note linking method as (a) I wanted to be able to easily differentiate between external and internal links and (b) this method doesn’t tightly couple my inter-note linking to DEVONthink.

Here’s the AppleScript that is responsible for adding the “Incoming Links” section, which is executed when the note is opened:

(*
	This script adds Incoming links to a Markdown note executed via a DEVONthink 3 Smart Rule.
	
	WARNING: It's assumed that the "Incoming Links" section is _always_ the last section
	in the selected note. If the note includes text _after_ the "Incoming Links" section
	it will be **removed**.
	
	The script is loosely based on a similar script created by Bernardo_V and posted
	to the DEVONthink Discourse forums.
	See: https://discourse.devontechnologies.com/t/return-links-back-links/54390/2
*)
use AppleScript version "2.4" -- Yosemite (10.10) or later
use scripting additions

property _sectionTitle : "Incoming Links"
property _newLine : "
"

on performSmartRule(theRecords)
	repeat with _record in theRecords
		my updateIncomingLinks(_record)
	end repeat
end performSmartRule

on updateIncomingLinks(_record)
	tell application id "DNtp"
		if _record is not equal to missing value then
			set _text to plain text of _record
			set _database to current database
			
			-- Search for all notes that reference the selected note.
			set _terms to {name of _record, reference URL of _record}
			set _terms to _terms & my stringToList(aliases of _record, {", ", "; "})
			set _searchTerms to my buildSearchString(_terms)
			
			set _results to search _searchTerms in _database
			
			set _incomingLinks to {}
			repeat with _result in _results
				-- Note: There doesn't appear to be a way to exclude from the search results
				-- records where the given terms appear before the "Incoming Links" section
				-- using the build-in search operators.
				--
				-- Therefore, we check to see if the result contains one or more of the
				-- given search terms by redacting the incoming links secion.
				if _result's id is not equal to _record's id and my textContains(my removeIncomingLinks(_result's plain text), _terms) then
					set _summaryText to every paragraph of rich text of _result
					set item 1 of _summaryText to ""
					set the end of _incomingLinks to my generateReferenceAndSummary(_result's name, _terms, _summaryText as text)
				end if
			end repeat
			set _incomingLinks to my sortlist(_incomingLinks)
			
			-- Remove and replace the "Incoming Links" section from the selected note
			set _text to my removeIncomingLinks(_text)
			if (count of _incomingLinks) is greater than 0 then
				set _text to _text & return & return & "## " & _sectionTitle & _incomingLinks as text
				set the plain text of _record to _text
			else if plain text of _record is not equal to _text then
				set the plain text of _record to _text
			end if
		end if
	end tell
end updateIncomingLinks

on generateReferenceAndSummary(_aliases, _terms, _text)
	return my generateReference(_aliases, _text) & my generateSummary(_terms, _text)
end generateReferenceAndSummary

on generateReference(_aliases, _text)
	return (return & "* [[" & first item of my removeNonNumericValues(my stringToList(_aliases, {", ", "; "})) & "]]")
end generateReference

on generateSummary(_terms, _text)
	set _summary to ""
	
	repeat with _term in _terms
		if _text contains _term then
			set _text to my replaceText(_text, {_newLine, "	"}, " ")
			set _text to my replaceText(_text, {"[", "]", "•", "↩"}, "")
			set _text to my replaceText(_text, "'", "'\\''")
			
			try
				set _summary to (do shell script "grep -o -E -i '((\\w+\\W+){0,15}" & _term & "(\\W+\\w+){0,15})' <<< '" & _text & "' | head -1") as Unicode text
				--set _summary to my replaceText(_summary, _term, "**" & _term & "**")
				set _summary to " <small>" & _summary & "...</small>"
				exit repeat
			end try
		end if
	end repeat
	
	return _summary
end generateSummary

(* Removes the incoming links section for a given record *)
on removeIncomingLinks(_text)
	set _old to AppleScript's text item delimiters
	try
		set AppleScript's text item delimiters to {return & return & "## " & _sectionTitle}
		set _sections to every text item of _text
		set AppleScript's text item delimiters to _old
		set _text to item 1 of _sections
	on error
		set AppleScript's text item delimiters to _old
	end try
	
	return _text
end removeIncomingLinks

(* Returns the non-numeric values from a list *)
on removeNonNumericValues(_list)
	set _newList to {}
	
	repeat with _item in _list
		try
			set _result to do shell script "grep -E '^\\d+$' <<< \"" & _item & "\"" as string
		on error
			set _newList to _newList & _item
		end try
	end repeat
	
	return _newList
end removeNonNumericValues

(* Checks to see if a given string contains a list of other strings *)
on textContains(_text, _strings)
	repeat with _string in _strings
		if _text contains _string then return true
	end repeat
	return false
end textContains

(* Converts a delimited string to a list *)
on stringToList(_text, _delims)
	set _list to {}
	
	if _text is not equal to "" and _text is not equal to missing value then
		set _old to AppleScript's text item delimiters
		set AppleScript's text item delimiters to _delims
		set _list to every text item of _text
		set AppleScript's text item delimiters to _old
	end if
	
	return _list
end stringToList

(* Builds a search string to find incoming links based on the list of terms provided *)
on buildSearchString(_terms)
	set _searchStr to "content:"
	
	set _total to count of _terms
	set _cur to 0
	repeat with _term in _terms
		set _cur to _cur + 1
		--set _searchStr to _searchStr & "(\"" & _term & "\" AND (\"" & _term & "\" NOT AFTER \"" & _sectionTitle & "\"))"
		-- set _searchStr to _searchStr & "(\"" & _term & "\" NOT AFTER \"" & _sectionTitle & "\")"
		set _searchStr to _searchStr & "(\"" & _term & "\")"
		if _cur is not equal to _total then
			set _searchStr to _searchStr & " OR "
		end if
	end repeat
	
	return _searchStr & " kind:markdown"
end buildSearchString

on replaceText(theString, old, new)
	set {TID, text item delimiters} to {text item delimiters, old}
	set theStringItems to text items of theString
	set text item delimiters to new
	set theString to theStringItems as text
	set text item delimiters to TID
	return theString
end replaceText

on sortlist(theList)
	set theIndexList to {}
	set theSortedList to {}
	repeat (length of theList) times
		set theLowItem to ""
		repeat with a from 1 to (length of theList)
			if a is not in theIndexList then
				set theCurrentItem to item a of theList as text
				if theLowItem is "" then
					set theLowItem to theCurrentItem
					set theLowItemIndex to a
				else if theCurrentItem comes before theLowItem then
					set theLowItem to theCurrentItem
					set theLowItemIndex to a
				end if
			end if
		end repeat
		set end of theSortedList to theLowItem
		set end of theIndexList to theLowItemIndex
	end repeat
	return theSortedList
end sortlist

WARNING: Should you try using this script, please know that it assumes the “Incoming Links” section is always the last section in the selected note. If the note includes text after the “Incoming Links” section it will be removed by the script.

Here’s how I setup the Smart Rule that executes the script:

Enjoy!

5 Likes

Nice!
I like the idea of adding information pertaining to the linked pages. Well thought.

There is, however, some encoding issue regarding diacritical marks.


* [[Mete]] <small>tags: #Corpora/03_Aristóteles/Obras
hashtags:
aliases: Mete., Meteorologica, Metereologica
links: 02 Física 184a10-436a01   Mete I   Mete II   Mete III   Mete IV...</small>
* [[Mu]] <small>tags: #Corpora/03_Aristóteles/Obras
aliases: de Mundo, Mu.,
links: 02 Física 184a10-436a01...</small>
* [[Mundo]] <small>tags: #Corpora/03_Aristóteles/Mundo
hashtags:
aliases: Mundo,
links: 02 Física 184a10-436a01 De Mundo 391at2 1ΠΕΡΙ ΚΟΣΜΟΥ1
391a01 Πολλάκις μὲν ἔμοιγε θεῖόν τι καὶ δαιμόνιον ὄντως χρῆμα, 391a02 ὦ Ἀλέξανδρε, ἡ φιλοσοφία ἔδοξεν εἶναι, μάλιστα δὲ ἐν οἷς 391a03 μόνη διαραμένη πρὸς τὴν τῶν ὄντων θέαν ἐσπούδασε γνῶναι 391a04 τὴν ἐν αὐτοῖς ἀλήθειαν, καὶ τῶν ἄλλων ταύτης ἀποστάντων 391a05 διὰ τὸ ὕψος καὶ τὸ μέγεθος, αὕτη τὸ πρᾶγμα οὐκ ἔδεισεν 391a06 οὐδ᾽ αὑτὴν τῶν καλλίστων ἀπηξίωσεν, ἀλλὰ καὶ συγγενεστάτην 391a07 ἑαυτῇ καὶ μάλιστα πρέπουσαν ἐνόμισεν εἶναι τὴν ἐκείνων 391a08 μάθησιν. Ἐπειδὴ γὰρ οὐχ οἷόν τε ἦν τῷ σώματι εἰς τὸν οὐράνιον 391a09 ἀφικέσθαι τόπον καὶ τὴν γῆν ἐκλιπόντα τὸν ἱερὸν 391a10...</small>
* [[Phys]] <small>tags: #Corpora/03_Aristóteles/Obras
hashtags: aliases: Phys., Physica, Ph.,
links: 02 Física 184a10-436a01   Phys I Princípios   Phys II Natureza   Phys III Movimento e infinito   Phys IV Lugar...</small>

I will try to take a look later and see if I can find the cause of this.

Edit: thanks for fixing the typo, Jim @BLUEFROG :wink:

1 Like

This behavior is probably symptomatic of the fact that I’m using the rich text property of the Markdown note, which removes a lot of the Markdown “cruft” (e.g. references to footnotes).

It’s probably safer to use the plain text property and perform any desired text replacements on it to cleanup the text for summarization.

No problem :slight_smile:

I was wrong, I think the diacritical marks getting munged is related to the the shell script call that summarizes the text containing the term. I changed it to interpret the return result as Unicode text. That seemed to fix the issue, YMMV.

This is really great. Thanks @JacobIO!

My pleasure! I found a few bugs in the script that I originally posted. I’ll work to update my post in the next couple of days to include the updated AppleScript.

Here’s an updated version of the script that fixes a number of bugs:

(*
	This script adds Incoming links to a Markdown note executed via a DEVONthink 3 Smart Rule.
	
	WARNING: It's assumed that the "Incoming Links" section is _always_ the last section
	in the selected note. If the note includes text _after_ the "Incoming Links" section
	it will be **removed**.
	
	The script is loosely based on a similar script created by Bernardo_V and posted
	to the DEVONthink Discourse forums.
	See: https://discourse.devontechnologies.com/t/return-links-back-links/54390/2
*)
use AppleScript version "2.4" -- Yosemite (10.10) or later
use scripting additions

property _sectionTitle : "Links to this note"
property _newLine : "
"

tell application id "DNtp"
	set _record to (content record of think window 1)
	my updateIncomingLinks(_record)
end tell

on performSmartRule(theRecords)
	repeat with _record in theRecords
		my updateIncomingLinks(_record)
	end repeat
end performSmartRule

on updateIncomingLinks(_record)
	tell application id "DNtp"
		if _record is not equal to missing value then
			set _text to plain text of _record
			set _database to current database
			
			-- Search for all notes that reference the selected note.
			set _terms to {name of _record, reference URL of _record}
			set _terms to _terms & my stringToList(aliases of _record, {", ", "; "})
			set _searchTerms to my buildSearchString(_terms)
			
			set _results to search _searchTerms in _database
			
			set _incomingLinks to {}
			repeat with _result in _results
				-- Note: There doesn't appear to be a way to exclude from the search results
				-- records where the given terms appear before the "Incoming Links" section
				-- using the build-in search operators.
				--
				-- Therefore, we check to see if the result contains one or more of the
				-- given search terms by redacting the incoming links secion.
				if _result's id is not equal to _record's id and my textContains(my removeIncomingLinks(_result's plain text), _terms) then
					set _summaryText to every paragraph of rich text of _result
					set item 1 of _summaryText to ""
					set _aliases to _result's aliases
					if _aliases is equal to "" then
						set _aliases to _result's name
					end if
					set the end of _incomingLinks to my generateReferenceAndSummary({_result's name}, _terms, _summaryText as text)
				end if
			end repeat
			set _incomingLinks to my sortlist(_incomingLinks)
			
			-- Remove and replace the "Incoming Links" section from the selected note
			set _text to my removeIncomingLinks(_text)
			if (count of _incomingLinks) is greater than 0 then
				set _text to _text & _newLine & _newLine & "## " & _sectionTitle & _incomingLinks as text
				set the plain text of _record to _text
			else if plain text of _record is not equal to _text then
				set the plain text of _record to _text
				tell _record to save
			end if
		end if
	end tell
end updateIncomingLinks

on generateReferenceAndSummary(_aliases, _terms, _text)
	return return & "* " & (my generateReference(_aliases)) & " " & my generateSummary(_terms, _text)
end generateReferenceAndSummary

on generateReference(_aliases)
	return ("[[" & first item of my removeNonNumericValues(my stringToList(_aliases, {", ", "; "})) & "]]")
end generateReference

on generateSummary(_terms, _text)
	set _summary to ""
	-- set _text to htmlToText(_html)
	
	repeat with _term in _terms
		if _text contains _term then
			set _term to replaceText(_term, "'", "'\\''")
			
			set _text to my replaceText(_text, {_newLine, "	"}, " ")
			set _text to my replaceText(_text, {"[", "]", "•", "↩"}, "")
			set _text to my replaceText(_text, "'", "'\\''")
			
			try
				set _summary to (do shell script "grep -o -E -i '((\\w+\\W+){0,15}" & _term & "(\\W+\\w+){0,15})' <<< '" & _text & "' | head -1") as text
				exit repeat
			end try
		end if
	end repeat
	
	return _summary
end generateSummary

(* Removes the incoming links section for a given record *)
on removeIncomingLinks(_text)
	set _old to AppleScript's text item delimiters
	try
		set AppleScript's text item delimiters to {_newLine & _newLine & "## Links to this note", return & return & "## Links to this note"}
		set _sections to every text item of _text
		set AppleScript's text item delimiters to _old
		set _text to item 1 of _sections
	on error
		set AppleScript's text item delimiters to _old
	end try
	
	return _text
end removeIncomingLinks

(* Returns the non-numeric values from a list *)
on removeNonNumericValues(_list)
	set _newList to {}
	
	repeat with _item in _list
		try
			set _result to do shell script "grep -E '^\\d+$' <<< \"" & _item & "\"" as text
		on error
			set _newList to _newList & _item
		end try
	end repeat
	
	return _newList
end removeNonNumericValues

(* Checks to see if a given string contains a list of other strings *)
on textContains(_text, _strings)
	repeat with _string in _strings
		if _text contains _string then return true
	end repeat
	return false
end textContains

(* Converts a delimited string to a list *)
on stringToList(_text, _delims)
	set _list to {}
	
	if _text is not equal to "" and _text is not equal to missing value then
		set _old to AppleScript's text item delimiters
		set AppleScript's text item delimiters to _delims
		set _list to every text item of _text
		set AppleScript's text item delimiters to _old
	end if
	
	return _list
end stringToList

(* Builds a search string to find incoming links based on the list of terms provided *)
on buildSearchString(_terms)
	set _searchStr to "content:"
	
	set _total to count of _terms
	set _cur to 0
	repeat with _term in _terms
		set _cur to _cur + 1
		set _searchStr to _searchStr & "(\"" & _term & "\")"
		if _cur is not equal to _total then
			set _searchStr to _searchStr & " OR "
		end if
	end repeat
	
	return _searchStr & " kind:markdown"
end buildSearchString

(* Converts an HTML string to text. *)
on htmlToText(_html)
	return (do shell script "textutil -stdout -stdin -convert txt <<< '" & my escapeSingleQuotes(_html) & "'") as Unicode text
end htmlToText

(*
	Escapes single quotes, which is useful when using terminal commands
	that take strings from stdin.
*)
on escapeSingleQuotes(_text)
	return replaceText(_text, "'", "'\\''")
end escapeSingleQuotes

on replaceText(theString, old, new)
	set {TID, text item delimiters} to {text item delimiters, old}
	set theStringItems to text items of theString
	set text item delimiters to new
	set theString to theStringItems as text
	set text item delimiters to TID
	return theString
end replaceText

on sortlist(theList)
	set theIndexList to {}
	set theSortedList to {}
	repeat (length of theList) times
		set theLowItem to ""
		repeat with a from 1 to (length of theList)
			if a is not in theIndexList then
				set theCurrentItem to item a of theList as text
				if theLowItem is "" then
					set theLowItem to theCurrentItem
					set theLowItemIndex to a
				else if theCurrentItem comes before theLowItem then
					set theLowItem to theCurrentItem
					set theLowItemIndex to a
				end if
			end if
		end repeat
		set end of theSortedList to theLowItem
		set end of theIndexList to theLowItemIndex
	end repeat
	return theSortedList
end sortlist
1 Like