Additional resources - Automatically capture and annotate items: DEVONthink helper, Smart rule scripts, JS/Markdown helper

Part of workflow discussed here: Automatically capture and annotate items (to use with Obsidian)

Applescript: Devonthink Helper

use AppleScript version "2.4" -- Yosemite (10.10) or later
use framework "Foundation"
use scripting additions
use script "RegexAndStuffLib" version "1.0.7"


on getRecordType(recordType)
	if theRecordType is in {"markdown", "«constant ****mkdn»"} then
		return "markdown"
	else if theRecordType is in {"bookmark", "«constant ****DTnx»"} then
		return "bookmark"
	else if theRecordType is in {"pdf", "PDF document", "«constant ****pdf »"} then
		return "pdf"
	else
		return missing value
	end if
end getRecordType


on getAncestorTags(theRecord)
	tell application id "DNtp"
		set tagList to {}
		if (exists parent 1 of theRecord) then
			return (tags of theRecord) & my getAncestorTags(parent 1 of theRecord)
		else
			return tags of theRecord
		end if
	end tell
end getAncestorTags


on getExtension(theFilename)
	set theExtension to ""
	
	set {od, AppleScript's text item delimiters} to {AppleScript's text item delimiters, "."}
	set theFilenameComponents to text items of theFilename
	if (count of theFilenameComponents) is greater than 1 then
		set theExtension to "." & last item of theFilenameComponents
	end if
	set AppleScript's text item delimiters to od
	
	return theExtension
end getExtension


on trimText(theText)
	set commandString to "echo " & quoted form of theText & " | sed -E 's|^\\n$||'" as string
	set theResult to do shell script commandString without altering line endings
	return theResult
end trimText


on formatDate(theDate)
	set theFormatter to current application's NSDateFormatter's new()
	theFormatter's setDateFormat:"yyyy-MM-dd HH:mm"
	return (theFormatter's stringFromDate:(theDate)) as string
end formatDate

(*
on urlencode(str)
	return percent encode str encoded characters "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_."
end urlencode
*)

-- Source https://discourse.devontechnologies.com/t/applescript-encoding-urls-and-smart-rules/59555/2
-- Source https://forum.latenightsw.com/t/open-in-script-debugger-links-gone/682/9
on urlencode(str) -- http://harvey.nu/applescript_url_encode_routine.html
	local nsStr
	set nsStr to current application's NSString's stringWithString:(str)
	--set characterSet to current application's NSCharacterSet's URLQueryAllowedCharacterSet()
	set characterSet to current application's class "NSCharacterSet"'s characterSetWithCharactersInString:("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_.")
	return (nsStr's stringByAddingPercentEncodingWithAllowedCharacters:characterSet) as text
end urlencode


on replaceText(find, replace, subject)
	set prevTIDs to text item delimiters of AppleScript
	set text item delimiters of AppleScript to find
	set subject to text items of subject
	
	set text item delimiters of AppleScript to replace
	set subject to subject as text
	set text item delimiters of AppleScript to prevTIDs
	
	return subject
end replaceText


on sanitize(str)
	-- We also exclude # to prevent from unwanted taggingev
	set replaceChars to {"%", "&", "/", "?", "<", ">", "\\", "*", "|", ":", "*"}
	set stripChars to {"#"}
	set theResult to ""
	repeat with char in str
		if char is in replaceChars then
			set char to "-"
		else if char is in stripChars then
			set char to ""
		end if
		set theResult to theResult & char
	end repeat
	if theResult ends with "." then set theResult to characters 1 thru -2 of theResult as string
	return theResult
end sanitize


on simpleSort(my_list)
	set the index_list to {}
	set the sorted_list to {}
	repeat (the number of items in my_list) times
		set the low_item to ""
		repeat with i from 1 to (number of items in my_list)
			if i is not in the index_list then
				set this_item to item i of my_list as text
				if the low_item is "" then
					set the low_item to this_item
					set the low_item_index to i
				else if this_item comes before the low_item then
					set the low_item to this_item
					set the low_item_index to i
				end if
			end if
		end repeat
		set the end of sorted_list to the low_item
		set the end of the index_list to the low_item_index
	end repeat
	return the sorted_list
end simpleSort


on uniqueList(theList)
	set resultList to {}
	repeat with i from 1 to count of items of theList
		if item i of theList is not in resultList then
			set end of resultList to item i of theList
		end if
	end repeat
	return resultList
end uniqueList

Smart rule script: content tagged or moved

  • Search Content folder
  • Kind is Any Document
  • On Moving, On Classifying, On Tagging, On Demand
use DT : script "DEVONthink helper"
use ma : script "Markdown Annotation helper"
use script "RegexAndStuffLib" version "1.0.7"

on performSmartRule(theRecords)
	tell application id "DNtp"
		repeat with theRecord in theRecords
			set theTags to tags of theRecord
			set thePath to path of theRecord
			if exists annotation of theRecord then
				set maRecord to annotation of theRecord
				set maText to plain text of maRecord
				set maText to ma's updateTags(maText, theTags, false, true)
				set maText to ma's updatePath(maText, thePath, false)
				set plain text of maRecord to maText
			end if
		end repeat
	end tell
end performSmartRule

Smart rule script: (Re)process content

  • Search Content folder
  • Kind is Markdown, Bookmark, PDF/PS
  • On Import, On Demand
  • See Applescript: Process incoming annotation (in this post)

Smart rule script: Content renamed

  • Kind is Any Document
  • On Renaming
use DT : script "DEVONthink helper"
use ma : script "Markdown Annotation helper"

on performSmartRule(theRecords)
	tell application id "DNtp"
		repeat with theRecord in theRecords
			set theRecordName to name of theRecord
			set theRecordExtension to DT's getExtension(theRecordName)
			set maTitle to DT's sanitize(theRecordName)
			set name of theRecord to maTitle & theRecordExtension
			set maPath to path of theRecord
			if exists annotation of theRecord then
				set maRecord to annotation of theRecord
				set name of maRecord to maTitle & ".md"
				set maText to plain text of maRecord
				set maText to ma's updateTitle(maText, maTitle, maPath, false)
				set plain text of maRecord to maText
			end if
		end repeat
	end tell
end performSmartRule

Smart rule script: Content tagged or removed

  • Search Content folder
  • Kind is Any Document
  • On Moving, On Classifying, On Tagging, On Demand
use DT : script "DEVONthink helper"
use ma : script "Markdown Annotation helper"
use script "RegexAndStuffLib" version "1.0.7"

on performSmartRule(theRecords)
	tell application id "DNtp"
		repeat with theRecord in theRecords
			set theTags to tags of theRecord
			set thePath to path of theRecord
			if exists annotation of theRecord then
				set maRecord to annotation of theRecord
				set maText to plain text of maRecord
				set maText to ma's updateTags(maText, theTags, false, true)
				set maText to ma's updatePath(maText, thePath, false)
				set plain text of maRecord to maText
			end if
		end repeat
	end tell
end performSmartRule

Smart rule script: annotation renamed

  • Search Annotation folder
  • Kind is Markdown
  • On Renaming
use ma : script "Markdown Annotation helper"
use DT : script "DEVONthink helper"
use script "RegexAndStuffLib" version "1.0.7"

on performSmartRule(theRecords)
	tell application id "DNtp"
		repeat with theRecord in theRecords
			set theRecordName to name of theRecord
			set maText to plain text of theRecord
			set maTitle to DT's sanitize(theRecordName)
			set itemUUID to ma's getItemUUID(maText)
			if itemUUID is not equal to "" then
				set itemRecord to get record with uuid itemUUID
				set itemRecordExtension to DT's getExtension(name of itemRecord)
				set name of itemRecord to maTitle & itemRecordExtension
				set maPath to path of itemRecord
			end if
			set maText to ma's updateTitle(maText, maTitle, maPath, false)
			set plain text of theRecord to maText
			set name of theRecord to maTitle & ".md"
		end repeat
	end tell
end performSmartRule

Smart rule script: annotation updated

  • Search Annotation folder
  • Kind is Markdown
  • After Saving
use ma : script "Markdown Annotation helper"
use DT : script "DEVONthink helper"
use script "RegexAndStuffLib" version "1.0.7"

on performSmartRule(theRecords)
	tell application id "DNtp"
		repeat with theRecord in theRecords
			set theRecordName to name of theRecord
			set theRecordExtension to DT's getExtension(theRecordName)
			
			set maText to plain text of theRecord
			set maPath to path of theRecord
			set maExcerpt to ma's getExcerpt(maText)
			set maURL to ma's getURL(maText)
			set maTags to ma's getTags(maText)
			
			-- reference: on updateText(maText, theDate, theTitle, theURL, thePDFURL, theAnnotationURL, theTags, theExcerpt, stripTagsFromContent)
			
			set itemUUID to ma's getItemUUID(maText)
			if itemUUID is not equal to "" then
				set itemRecord to get record with uuid itemUUID
				
				if maExcerpt is not equal to "" then
					set comment of itemRecord to maExcerpt
				end if
				
				if maTags is not equal to {} then
					set tags of itemRecord to maTags
				end if
			end if
			
			set maText to ma's updateText(maText, missing value, missing value, maExcerpt, maURL, missing value, missing value, maPath, maTags, true)
			set plain text of theRecord to maText
		end repeat
	end tell
end performSmartRule

Smart rule script: set original tags

  • Search Capture folder (before tagging / classifying)
  • Kind is Any Document
  • On Creation, On Import, On Tagging
use script "RegexAndStuffLib" version "1.0.7"
use scripting additions

on performSmartRule(theRecords)
	tell application id "DNtp"
		repeat with theRecord in theRecords
			set theTagsList to tags of theRecord
			set cleanedTagsList to {}
			repeat with theTag in theTagsList
				if (theTag as string) is not equal to "00 processed" then
					set the end of cleanedTagsList to theTag as string
				end if
			end repeat
			set cleanedTagsString to join strings cleanedTagsList using delimiter ","
			add custom meta data cleanedTagsString for "originaltags" to theRecord
		end repeat
	end tell
end performSmartRule

Smart rule script: apply original tags

  • Search Content folder
  • Kind is Any Document
  • On Classifying, On Demand
  • After Execute Script also: Apply Rule "On replicating / tagging"
use AppleScript version "2.4" -- Yosemite (10.10) or later
use script "RegexAndStuffLib" version "1.0.7"
use scripting additions

on performSmartRule(theRecords)
	tell application id "DNtp"
		repeat with theRecord in theRecords
			set recordTags to tags of theRecord
			set originalTags to get custom meta data for "originaltags" from theRecord
			if originalTags is not missing value then
				set originalTagsList to split string originalTags using delimiters ","
				set tags of theRecord to (recordTags & originalTagsList)
			end if
		end repeat
	end tell
end performSmartRule

Javascript for use with Markdown documents

  • The Keyboard Maestro in this script is a simple KM macro that opens the supplied link - this is to prevent the restriction in Obsidian to open file:// links (and it also prevents that DT links to the item in DT if it recognizes the file:// URL for an item in the database; I want to specifically open the item in the original app / Finder)
document.addEventListener('DOMContentLoaded', () => {
		var title = document.querySelector('title').innerText;
		var date = document.querySelector('meta[name="date"]');
		var url = document.querySelector('meta[name="url"]');
		var itemurl = document.querySelector('meta[name="itemurl"]');
		var annotationurl = document.querySelector('meta[name="annotationurl"]');
		var path = document.querySelector('meta[name="path"]');
		var tags = document.querySelector('meta[name="tags"]');

		if(date) { date = date.content }
		if(url) { url = url.content }
		if(itemurl) { itemurl = itemurl.content }
		if(annotationurl) { annotationurl = annotationurl.content }
		if(tags) { tags = tags.content.substring(1, tags.content.length - 1) }
		if(path) { path = path.content }

		body = document.querySelector("body").innerHTML;
		info = "<p><a href=\"" + itemurl + "\">Item</a> | <a href=\"obsidian://open?vault=Notes&file="+ encodeURIComponent(title) + "\">Obsidian</a> | <a href=\"kmtrigger://macro=Open&value=%2FUsers%2Fmdbraber%2FLibrary%2FMobile%20Documents%2Fcom~apple~CloudDocs%2F"+ encodeURIComponent(path) + "\">File</a><br/>Tags: <i>"+ tags +"</i></p><h1>" + title + "</h1>";

		document.querySelector("body").innerHTML = info + "\n" + body;
})

1 Like

From your code

info = "<p><a href=\"" + itemurl + "\">Item</a> | <a href=\"obsidian://open?vault=Notes&file="+ encodeURIComponent(title) + "\">Obsidian</a> | <a href=\"kmtrigger://macro=Open&value=%2FUsers%2Fmdbraber%2FLibrary%2FMobile%20Documents%2Fcom~apple~CloudDocs%2F"+ encodeURIComponent(path) + "\">File</a><br/>Tags: <i>"+ tags +"</i></p><h1>" + title + "</h1>";

A template string would look like that (note the single backquotes!)

info = `<p><a href="${itemurl}">Item</a> | <a href="obsidian://open?vault=Notes&file="${encodeURIComponent(title)}">Obsidian</a> …`

Also, you might want to use JavaScripts URL encoding for the kmtrigger: URL. In fact, I’d do that once at the top of the script and store the stuff in a const for better legiblity.

As a side note: Using variables (body, ìnfo) without declaring them is considered a no-go nowadays. As is declaring something as a (global!) varthat is in fact only a block-localconst` (itemurl, annotationurl etc.).

More a question of style: Instead of using all these vars derived from meta data, I’d use an object like so:

const metadata = {};
["date", "url", "itemurl", "annotation", "path", "tags"].forEach( tag => {
  const node = document.querySelector(`meta[name="${tag}"]`) /* there again: a template string! */
  if (node)  metadata[tag] = node.content;
})

and access the object properties later.

However, there’s also a logical flaw in your script: You set all these values from the meta elements only if the meta elments are defined – but later, in your info = part, you use these variables regardless. In other words: if there is no path in meta data, the path variable is not defined. But you still try to use it to set the value of info. Which will cause a runtime error.

So apparently you assume all these metadata to always exist, which makes the whole if acrobatics superfluous…

Thanks! As you can see I’m no JS expert and there’s a lot to improve on that quickly-written code. The whole JS is still a WIP. I’ll definitely add your tips when expanding this!

I have a JavaScript group in DEVONthink. Into it goes every little bit @chrillek thankfully explains here, his Scripting with JXA | JavaScript for Automation (JXA) , the great resources @mhucka shared (and stuff I randomly find elsewhere). So if one day (hopefully never) AppleScript dies I’ll know where to start :slight_smile:

1 Like

:slight_smile: Do you cut-and-paste the things as links, or do you save code snippets for later reference (the problem with clipping these pages as PDF is that you don’t get the full code)

Bookmarks only if I think the content will be online in some years. Often print to PDF (in Safari).

What works most often is

  • selecting what you need in Safari
  • dragging it onto DEVONthink’s icon in the dock
    (this creates a webarchive)

Converting the webarchive to PDF in DEVONthink works fine for most contents, but I’m actually not sure whether I do this with code snippets.

Anyway, it should be possible to get the content as webarchive or as PDF (via a webarchive) - and AppleScript will live longer than JavaScript :wink:

But I’m by no means someone who often captures code (no programmer), so probably better to ask other users for best practice.

1 Like

Thanks for the tips! I don’t often use drag and dropping, but this might be interesting. When doing this with text from this thread it indeed creates a .webarchive - but interestingly the URL is https://discourse.devontechnologies.com - not the full URL of this thread e.g. Is that the expected behavior?

Or if one day you want to script a web page (think markdown!)

1 Like

OK, you’ve found a new one :laughing:

… one of the problematic URLs (over here) was that capturing webarchives from discourse forums uses e.g. https://discourse.devontechnologies.com/latest instead of the actual thread’s (or post’s) URL.

This Script: Create webarchive from selection with correct URL fixes this “error” (be aware, it uses UI scripting).

Although I’m still sure it’s really an error in case of discourse forums, I meanwhile think that not including what comes after a # in a URL maybe does make sense (e.g. headings may change - I guess a URL that includes a # will not be redirected in this case, so one would end up with nothing.)

But given that one knows that a URL that’s not accessible may be not accessible due to a # heading that changed, I still think it’s far more convient to be able to directly jump to a record’s online source.

(Wrote the script when I was very disappointed to find out that I couldn’t open a record’s URL to it’s place in a very very long Markdown document. But again, there’s probably a reason why they decided to do it this way - however that doesn’t mean we shouldn’t customize it)

I don’t think that’s correct - URLs with # work fine here (DT 3.8.3), e.g. try selecting some text from this URL and it will include the full URL fine: python - How can I do multiple substitutions using regex? - Stack Overflow

I think what’s happening is that there’s some hardcoded logic for Discourse forums (or just for this Discourse forum) - I’ve also seen it with clipping PDFs where @cgrunenberg confirmed it is hardcoded behavior: Set preferred width when capturing PDFs - #10 by cgrunenberg

TBH I don’t think exceptions like that should be hardcoded, but then again as you say: there might be some other reasoning in play here too.

Nope. The part after the hash sign is (kind of) indicative only. To be more precise: everything before the hash is resolved by the server. Everything after it is resolved by the browser.

So if the post after the hash sign has changed or is missing, the server does not care and delivers the page anyway (which is a good thing, because you do not want a web server to look into every document it delivers for performance reasons).

The browser looks for a HTML element with an ID identical to the text following the hash sign. If it exists, the browser positions the document so that the element appears at the top of the viewport. If it does not exist the browser does nothing at all.

1 Like

I stand corrected: it sometimes works, it sometimes doesn’t e.g. with Github. It seems that when navigating to a URL from a page link (e.g. navigating to a heading) it doesn’t become part of the URL, but it does get included when following the link to an external URL with # heading…

1 Like

OK, for a moment you got me thinking … why did you write that script?

Please see this …

… and that’s actually what I wrote in the script’s thread: GitHub.

You won’t find StackOverflow mentioned there … :slight_smile:

Edit: Didn’t see your reply before posting @mdbraber

Yes, I think that might have been the reason why a lot of webarchives I captured didn’t get the “right” URL. It’s obviously super annoying to come back to a bunch of records and every records’ URL is pointing to something but not to the actual place I would expect to land when opening its URL.

Based on @chrillek’s feedback (thanks!) I’ve updated the JS code to do the following:

  • Create links to Item, Obsidian, Annotation and URL
  • List tags
  • Create a H1 header for the item
  • Style via:: and excerpt:: inline tags
  • Link blockquotes (i.e. highlights from PDFs) as DT links with a search parameter

document.addEventListener('DOMContentLoaded', () => {
	const metadata = {};
	metadata["title"] = document.querySelector('title').innerText;
	["date", "url", "itemurl", "annotation", "path", "tags"].forEach( tag => {
		const node = document.querySelector(`meta[name="${tag}"]`)
		if (node)  metadata[tag] = node.content;
	})

	if (metadata["itemurl"]) {

		body = document.querySelector("body").innerHTML;
		info = `<p><a href="${metadata['itemurl']}">Item</a> | <a href="obsidian://open?vault=Notes&file=${encodeURIComponent(metadata['title'])}.md">Obsidian</a> | <a href="kmtrigger://macro=Open&value=${encodeURIComponent('/Users/mdbraber/Library/Mobile Documents/com~apple~CloudDocs/Content/')+metadata["path"]}">File</a>`
		if (metadata["url"])  info = info + ` | <a href="${metadata['url']}">URL</a>`
		if (metadata["tags"]) info = info + `<br/>Tags: <i>${metadata['tags']}</i>`
		info = info + `</p><h1>${metadata['title']}</h1>`

		document.querySelector("body").innerHTML = info + "\n" + body;

		elExcerpt = document.evaluate("//p[contains(.,'excerpt::')]", document, null, XPathResult.ANY_TYPE, null);
		excerpt = elExcerpt.iterateNext();

		elVia = document.evaluate("//p[contains(.,'via::')]", document, null, XPathResult.ANY_TYPE, null);
		via = elVia.iterateNext();

		if(excerpt) { excerpt.innerHTML = `<b>${excerpt.innerHTML.substr(10)}</b>`; }

		if(via) {
            via.innerHTML = `<i>${via.innerHTML.substr(6)}</i><br/><br/><hr/>`;
        } else {
            excerpt.innerHTML += "<br/><br/><hr/>";
        }

		document.querySelectorAll('blockquote > p').forEach(quote => {
			quote.innerHTML = `<a href="${metadata["itemurl"]}?search=${encodeURIComponent(quote.innerText)}">${quote.innerText}</a>`;
		});

	}
});