This handler extracts all URLs of a PDF and optionally filters them.
Filtering
- Filter by URL start.
- Filter by URL end.
- Filter by URL start and URL end.
- Filter can either
"include"
or"exclude"
the passed lists.
Handler
-- Script Library - Extract PDF URLs
use AppleScript version "2.4"
use framework "Foundation"
use framework "Quartz"
use scripting additions
on extractPDF_URLs(theRecord, theMode, theURLBeginsWith_list, theURLEndsWith_list)
try
tell application id "DNtp"
try
set theType to type of theRecord as string
if theType is not in {"PDF document", "«constant ****pdf »"} then
display alert "Error" message "Please make sure to select a PDF document" as warning giving up after 60
return
end if
set thePath to path of theRecord
if theMode = "include" then
set exclude to ""
set theConnector to " OR "
else if theMode = "exclude" then
set exclude to "! "
set theConnector to " AND "
else if theMode = "" then
set theURLBeginsWith_list to {}
set theURLEndsWith_list to {}
else
display alert "Error" message "Please make sure to use either \"include\" or \"exclude\"" as warning giving up after 60
return
end if
on error error_message number error_number
if the error_number is not -128 then display alert "DEVONthink" message error_message as warning
error number -128
end try
end tell
set thePDF to current application's PDFDocument's alloc()'s initWithURL:(current application's |NSURL|'s fileURLWithPath:thePath)
set thePDF_PageCount to thePDF's pageCount()
set thePDFAnnotationSubtypeLink to (current application's PDFAnnotationSubtypeLink)
set thePDF_URLs_all to (current application's NSMutableArray's arrayWithArray:{})
repeat with i from 0 to (thePDF_PageCount - 1)
set thePage to (thePDF's pageAtIndex:i)
set thePage_Annnotations to thePage's annotations()
set thePage_Annnotations_Count to thePage_Annnotations's |count|()
if thePage_Annnotations_Count > 0 then
repeat with j from 0 to (thePage_Annnotations_Count - 1)
set thisAnnotation to (thePage_Annnotations's objectAtIndex:j)
if ((thisAnnotation's valueForAnnotationKey:(current application's PDFAnnotationKeySubtype))'s isEqualTo:thePDFAnnotationSubtypeLink) then
set thisURL to thisAnnotation's |URL|()
if thisURL ≠ missing value then
(thePDF_URLs_all's addObject:(thisURL's absoluteString()))
end if
end if
end repeat
end if
end repeat
if thePDF_URLs_all's |count|() > 0 then
set thePDF_URLs_deduplicated to (current application's NSOrderedSet's orderedSetWithArray:thePDF_URLs_all)'s array()
if theURLBeginsWith_list ≠ {} or theURLEndsWith_list ≠ {} then
set theArray_BeginsWith to current application's NSMutableArray's arrayWithArray:theURLBeginsWith_list
set theArray_EndsWith to current application's NSMutableArray's arrayWithArray:theURLEndsWith_list
set thePredicateString_BeginsWithArray to current application's NSMutableArray's arrayWithArray:{}
set thePredicateString_EndsWithArray to current application's NSMutableArray's arrayWithArray:{}
set theCount to 0
repeat with thisArray in {theArray_BeginsWith, theArray_EndsWith}
if theCount = 0 then
repeat with i from 0 to ((thisArray's |count|()) - 1)
(thePredicateString_BeginsWithArray's addObject:(current application's NSString's stringWithFormat_("(" & exclude & "self BEGINSWITH [cd] '%@')", (thisArray's objectAtIndex:i))))
end repeat
else
repeat with i from 0 to ((thisArray's |count|()) - 1)
(thePredicateString_EndsWithArray's addObject:(current application's NSString's stringWithFormat_("(" & exclude & "self ENDSWITH [cd] '%@')", (thisArray's objectAtIndex:i))))
end repeat
end if
set theCount to theCount + 1
end repeat
set thePredicateString_BeginsWithArray_Count to thePredicateString_BeginsWithArray's |count|()
if thePredicateString_BeginsWithArray_Count > 0 then
if thePredicateString_BeginsWithArray_Count = 1 then
set thePredicateString_BeginsWith to thePredicateString_BeginsWithArray's firstObject()
else if thePredicateString_BeginsWithArray_Count > 1 then
set thePredicateString_BeginsWith to (current application's NSString's stringWithFormat_("(%@)", ((thePredicateString_BeginsWithArray's componentsJoinedByString:theConnector))))
end if
end if
set thePredicateString_EndsWithArray_Count to thePredicateString_EndsWithArray's |count|()
if thePredicateString_EndsWithArray_Count > 0 then
if thePredicateString_EndsWithArray_Count = 1 then
set thePredicateString_EndsWith to thePredicateString_EndsWithArray's firstObject()
else if thePredicateString_EndsWithArray_Count > 1 then
set thePredicateString_EndsWith to (current application's NSString's stringWithFormat_("(%@)", ((thePredicateString_EndsWithArray's componentsJoinedByString:theConnector))))
end if
end if
if thePredicateString_BeginsWithArray_Count > 0 and thePredicateString_EndsWithArray_Count > 0 then
set thePredicateString to thePredicateString_BeginsWith's stringByAppendingFormat_(" AND %@", thePredicateString_EndsWith)
else if thePredicateString_BeginsWithArray_Count > 0 and thePredicateString_EndsWithArray_Count = 0 then
set thePredicateString to thePredicateString_BeginsWith
else if thePredicateString_BeginsWithArray_Count = 0 and thePredicateString_EndsWithArray_Count > 0 then
set thePredicateString to thePredicateString_EndsWith
end if
set thePDF_URLs_deduplicated_filtered to thePDF_URLs_deduplicated's filteredArrayUsingPredicate:(current application's NSPredicate's predicateWithFormat:thePredicateString)
set thePDF_URLs to thePDF_URLs_deduplicated_filtered
else
set thePDF_URLs to thePDF_URLs_deduplicated
end if
set thePDF_URLs to thePDF_URLs's sortedArrayUsingSelector:"localizedStandardCompare:"
else
set thePDF_URLs to thePDF_URLs_all
end if
return thePDF_URLs as list
on error error_message number error_number
activate
display alert "Error: Handler \"extractPDF_URLs\"" message error_message as warning
error number -128
end try
end extractPDF_URLs
Examples
Note: All examples use the handler as Script Library.
Include
Example - Extract all URLs
To extract all URLs without any filtering pass empty lists.
Click
-- Example - Extract all URLs
tell application id "DNtp"
try
set theRecords to selected records
if theRecords = {} then error "Please select a PDF record"
set thisRecord to item 1 of theRecords
set theMode to "include"
set theURLBeginsWith_list to {}
set theURLEndsWith_list to {}
set thePDF_URLs to script "Extract PDF URLs"'s extractPDF_URLs(thisRecord, theMode, theURLBeginsWith_list, theURLEndsWith_list)
on error error_message number error_number
if the error_number is not -128 then display alert "DEVONthink" message error_message as warning
return
end try
end tell
Example - Extract URLs that start with “http://” or “https://”
Click
-- Example - Extract URLs that start with "http://" or "https://"
tell application id "DNtp"
try
set theRecords to selected records
if theRecords = {} then error "Please select a PDF record"
set thisRecord to item 1 of theRecords
set theMode to "include"
set theURLBeginsWith_list to {"http://", "https://"}
set theURLEndsWith_list to {}
set thePDF_URLs to script "Extract PDF URLs"'s extractPDF_URLs(thisRecord, theMode, theURLBeginsWith_list, theURLEndsWith_list)
on error error_message number error_number
if the error_number is not -128 then display alert "DEVONthink" message error_message as warning
return
end try
end tell
Example - Extract URLs that end with “.pdf”
Click
-- Example - Extract URLs that end with ".pdf"
tell application id "DNtp"
try
set theRecords to selected records
if theRecords = {} then error "Please select a PDF record"
set thisRecord to item 1 of theRecords
set theMode to "include"
set theURLBeginsWith_list to {}
set theURLEndsWith_list to {".pdf"}
set thePDF_URLs to script "Extract PDF URLs"'s extractPDF_URLs(thisRecord, theMode, theURLBeginsWith_list, theURLEndsWith_list)
on error error_message number error_number
if the error_number is not -128 then display alert "DEVONthink" message error_message as warning
return
end try
end tell
Exclude
Example - Extract URLs that don’t start with “mailto:” or “tel:” or “bitcoin:” or “file:” and don’t end with “.gif” or “.jpg” or “.jpeg” or “.png”
Click
-- Example - Extract URLs that don't start with "mailto:" or "tel:" or "bitcoin:" or "file:" and don't end with ".gif" or ".jpg" or ".jpeg" or ".png"
tell application id "DNtp"
try
set theRecords to selected records
if theRecords = {} then error "Please select a PDF record"
set thisRecord to item 1 of theRecords
set theMode to "exclude"
set theURLBeginsWith_list to {"mailto:", "tel:", "bitcoin:", "file:"}
set theURLEndsWith_list to {".gif", ".jpg", ".jpeg", ".png"}
set thePDF_URLs to script "Extract PDF URLs"'s extractPDF_URLs(thisRecord, theMode, theURLBeginsWith_list, theURLEndsWith_list)
on error error_message number error_number
if the error_number is not -128 then display alert "DEVONthink" message error_message as warning
return
end try
end tell
Setup
Usage as Script Library:
It is best used as Script Library.
- In Finder:
- press ⇧⌘G
- paste
~/Library/Script Libraries
- if no window is opened
- paste
~/Library/
- create folder
Script Libraries
- paste
- In Script Editor.app
- create new document
- paste the handler
- press ⌘S
- press ⇧⌘G
- paste
~/Library/Script Libraries/Extract PDF URLs.scpt
- save
To call the Script Library use:
set theMode to "include"
set theURLBeginsWith_list to {}
set theURLEndsWith_list to {}
set thePDF_URLs to script "Extract PDF URLs"'s extractPDF_URLs(thisRecord, theMode, theURLBeginsWith_list, theURLEndsWith_list)
Usage inline:
If you want to use it inline make sure to add these statements at the top of your script:
use AppleScript version "2.4"
use framework "Foundation"
use framework "Quartz"
use scripting additions
To call the handler inline use my
:
set theMode to "include"
set theURLBeginsWith_list to {}
set theURLEndsWith_list to {}
set thePDF_URLs to my extractPDF_URLs(thisRecord, theMode, theURLBeginsWith_list, theURLEndsWith_list)